In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from imblearn.over_sampling import SMOTE

# --- Load Data ---
url = 'https://raw.githubusercontent.com/nandarishik/Ferry-Internship/main/realistic_medication_adherence_data.csv'
df = pd.read_csv(url)

# --- Handle missing values ---
for col in df.columns:
    if df[col].isnull().any():
        if df[col].dtype == 'object':
            df[col].fillna(df[col].mode()[0], inplace=True)
        else:
            df[col].fillna(df[col].median(), inplace=True)

# --- Feature Engineering ---
# Patient Readiness Score
readiness_features = df[['health_literacy_score', 'social_support_index', 'belief_in_medication']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(readiness_features)
df['patient_readiness_score'] = (
    scaled_features[:, 0] + scaled_features[:, 1] + scaled_features[:, 2] + df['provider_consistency'].astype(int)
)

# Literacy x Income
income_map = {'Low':1, 'Medium':2, 'High':3}
df['income_numeric'] = df['income_bracket'].map(income_map)
df['literacy_x_income'] = df['health_literacy_score'] * df['income_numeric']

# --- Prepare X and y ---
y = df['medication_adherence']
X = df.drop([
    'medication_adherence', 'health_literacy_score', 'social_support_index',
    'belief_in_medication', 'provider_consistency', 'income_bracket', 'income_numeric'
], axis=1)
X = pd.get_dummies(X, drop_first=True)

# --- Train-test split (stratified) ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Optional: Oversample minority class with SMOTE ---
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

# --- Compute class weights ---
cw = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_res), y=y_train_res)
class_weights = dict(enumerate(cw))
print("Class Weights:", class_weights)

# --- Neural Network ---
model = Sequential([
    Input(shape=(X_train_res.shape[1],)),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Early stopping to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# --- Train model ---
history = model.fit(
    X_train_res, y_train_res,
    validation_split=0.2,
    epochs=100,
    batch_size=16,
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=1
)

# --- Evaluate ---
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob >= 0.5).astype(int)

print("\nNeural Network Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].median(), inplace=True)


Class Weights: {0: np.float64(1.0), 1: np.float64(1.0)}
Epoch 1/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.4187 - loss: 33.0647 - val_accuracy: 0.6556 - val_loss: 8.2641
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5041 - loss: 10.5496 - val_accuracy: 0.4000 - val_loss: 4.2673
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5571 - loss: 3.4984 - val_accuracy: 0.4889 - val_loss: 3.2623
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5757 - loss: 2.2830 - val_accuracy: 0.4778 - val_loss: 2.9988
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5508 - loss: 2.4488 - val_accuracy: 0.4889 - val_loss: 2.5205
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5026 - loss: 2.4300 - val_accuracy: