In [None]:
# 03-hyperparameter-tuning.ipynb

# 1. Imports
import pandas as pd
import numpy as np
import json
import itertools
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow import keras
from tensorflow.keras import layers
import os

# 2. Load processed data
train = pd.read_csv("processed_data/train.csv")
val = pd.read_csv("processed_data/val.csv")
test = pd.read_csv("processed_data/test.csv")

X_train, y_train = train.drop("G3_binary", axis=1), train["G3_binary"]
X_val, y_val = val.drop("G3_binary", axis=1), val["G3_binary"]
X_test, y_test = test.drop("G3_binary", axis=1), test["G3_binary"]

# Class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {0: class_weights[0], 1: class_weights[1]}


# 3. Sequential Model Hyperparameter Tuning
hidden_layers_options = [[10], [16,8], [32,16], [64,32]]
learning_rates = [0.001, 0.01]
dropout_rates = [0.0, 0.3]
seq_results = []

def build_seq(layers_config, lr, dropout):
    model = keras.Sequential()
    model.add(layers.Input(shape=(X_train.shape[1],)))
    for units in layers_config:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                  loss="binary_crossentropy",
                  metrics=["accuracy", keras.metrics.AUC(name="auc")])
    return model

print("🔎 Running Sequential model tuning...")
run_num = 1
for layers_config, lr, dr in itertools.product(hidden_layers_options, learning_rates, dropout_rates):
    print(f"Sequential Model {run_num}: Layers={layers_config}, LR={lr}, Dropout={dr}")
    model = build_seq(layers_config, lr, dr)
    model.fit(X_train, y_train,
              validation_data=(X_val, y_val),
              epochs=30, batch_size=32, verbose=0,
              class_weight=class_weights_dict)
    
    probs = model.predict(X_test).ravel()
    preds = (probs > 0.5).astype(int)
    
    seq_results.append({
        "Model No.": run_num,
        "Layers": str(layers_config),
        "Learning Rate": lr,
        "Dropout": dr,
        "Accuracy": accuracy_score(y_test, preds),
        "Precision": precision_score(y_test, preds, zero_division=0),
        "Recall": recall_score(y_test, preds, zero_division=0),
        "F1-Score": f1_score(y_test, preds, zero_division=0),
        "ROC-AUC": roc_auc_score(y_test, probs)
    })
    run_num += 1

os.makedirs("results", exist_ok=True)

seq_df = pd.DataFrame(seq_results)
seq_df.to_csv("results/sequential_hyperparameter_results.csv", index=False)

# Select best Sequential model (prioritise F1, then ROC-AUC)
best_seq = seq_df.sort_values(by=["F1-Score","ROC-AUC"], ascending=False).iloc[0]
with open("results/best_params_sequential.json", "w") as f:
    json.dump(best_seq.to_dict(), f)

print("✅ Sequential tuning complete. Best model saved.")


# 4. Wide & Deep Model Hyperparameter Tuning
cat_cols = [col for col in X_train.columns if any(prefix in col for prefix in 
    ['school_', 'sex_', 'address_', 'famsize_', 'Pstatus_', 'Mjob_', 'Fjob_', 
     'reason_', 'guardian_', 'schoolsup_', 'famsup_', 'paid_', 'activities_', 
     'nursery_', 'higher_', 'internet_', 'romantic_'])]
num_cols = [col for col in X_train.columns if col not in cat_cols]

X_train_wide, X_val_wide, X_test_wide = X_train[cat_cols], X_val[cat_cols], X_test[cat_cols]
X_train_deep, X_val_deep, X_test_deep = X_train[num_cols], X_val[num_cols], X_test[num_cols]

param_grid = [
    {"layers":[[10]], "lr":[0.001,0.01], "dropout":[0.0,0.3]},
    {"layers":[[16,8]], "lr":[0.001,0.01], "dropout":[0.0,0.3]}
]

wd_results = []

def build_wd(hidden_layers, lr, dropout):
    input_wide = layers.Input(shape=(X_train_wide.shape[1],), name="wide_input")
    input_deep = layers.Input(shape=(X_train_deep.shape[1],), name="deep_input")
    
    deep = input_deep
    for units in hidden_layers:
        deep = layers.Dense(units, activation='relu')(deep)
        if dropout > 0:
            deep = layers.Dropout(dropout)(deep)
    
    combined = layers.concatenate([input_wide, deep])
    output = layers.Dense(1, activation='sigmoid')(combined)
    model = keras.Model(inputs=[input_wide, input_deep], outputs=output)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy', keras.metrics.AUC(name='auc')])
    return model

print("🔎 Running Wide & Deep model tuning...")
run_num = 1
for g in param_grid:
    for combo in itertools.product(g["layers"], g["lr"], g["dropout"]):
        layers_config, lr, dr = combo
        print(f"Wide&Deep Model {run_num}: Layers={layers_config}, LR={lr}, Dropout={dr}")
        model = build_wd(layers_config, lr, dr)
        model.fit([X_train_wide, X_train_deep], y_train,
                  validation_data=([X_val_wide, X_val_deep], y_val),
                  epochs=30, batch_size=32, verbose=0,
                  class_weight=class_weights_dict)
        
        probs = model.predict([X_test_wide, X_test_deep]).ravel()
        preds = (probs > 0.5).astype(int)
        
        wd_results.append({
            "Model No.": run_num,
            "Layers": str(layers_config),
            "Learning Rate": lr,
            "Dropout": dr,
            "Accuracy": accuracy_score(y_test, preds),
            "Precision": precision_score(y_test, preds, zero_division=0),
            "Recall": recall_score(y_test, preds, zero_division=0),
            "F1-Score": f1_score(y_test, preds, zero_division=0),
            "ROC-AUC": roc_auc_score(y_test, probs)
        })
        run_num += 1

wd_df = pd.DataFrame(wd_results)
wd_df.to_csv("results/widendeep_hyperparameter_results.csv", index=False)

# Select best Wide & Deep model
best_wd = wd_df.sort_values(by=["F1-Score","ROC-AUC"], ascending=False).iloc[0]
with open("results/best_params_widendeep.json", "w") as f:
    json.dump(best_wd.to_dict(), f)

print("✅ Wide&Deep tuning complete. Best model saved.")


🔎 Running Sequential model tuning...
Sequential Model 1: Layers=[10], LR=0.001, Dropout=0.0
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Sequential Model 2: Layers=[10], LR=0.001, Dropout=0.3
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Sequential Model 3: Layers=[10], LR=0.01, Dropout=0.0
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Sequential Model 4: Layers=[10], LR=0.01, Dropout=0.3
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Sequential Model 5: Layers=[16, 8], LR=0.001, Dropout=0.0
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Sequential Model 6: Layers=[16, 8], LR=0.001, Dropout=0.3
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Sequential Model 7: Layers=[16, 8], LR=0.01, Dropout=0.0
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Sequential Model 8: Layers=[16, 8], LR=0.01, Dropout=0.3
[1m2/2[0m