<a href="https://colab.research.google.com/github/l-isaro/PredictED-model-training-and-evaluation/blob/main/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# PredictED-Rwanda: Starter Notebook for ML Evaluation (with G1 & G2)

# 🧱 1. Setup and Load Data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import os

# Create directory for saved models
os.makedirs("saved_models", exist_ok=True)

# Load UCI student performance data (Math)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student-mat.csv"
df = pd.read_csv("student-mat.csv", sep=';')

# 🧹 2. Preprocess: Binary label (At Risk if G3 < 10)
df['target'] = (df['G3'] < 10).astype(int)
y = df['target']
X = df.drop(['G3', 'target'], axis=1)  # Keep G1 and G2 for now
X = pd.get_dummies(X, drop_first=True)  # One-hot encoding

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 📊 Utility: Evaluate model
def evaluate_model(y_true, y_pred, y_prob):
    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1 Score": f1_score(y_true, y_pred),
        "Loss": log_loss(y_true, y_prob)
    }

# 🧠 3. Instance 1 - Basic Neural Network (No Optimizer Specified, No Early Stopping)
model_1 = models.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_1.fit(X_train, y_train, epochs=20, verbose=0)

# Evaluate Instance 1
y_prob_1 = model_1.predict(X_test)
y_pred_1 = (y_prob_1 > 0.5).astype(int)
metrics_1 = evaluate_model(y_test, y_pred_1, y_prob_1)
print("Instance 1 Metrics:", metrics_1)
model_1.save("saved_models/nn_instance1.keras")

# 🔄 4. Instances 2–5 Templates (Modify and Run Multiple Times)
def build_and_train_nn(instance_id, optimizer='adam', regularizer=None, dropout_rate=None,
                       early_stopping=False, learning_rate=None, epochs=20, layers_config=[16, 8]):
    callbacks = []
    if early_stopping:
        callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3))

    opt = tf.keras.optimizers.get(optimizer)
    if learning_rate:
        opt.learning_rate = learning_rate

    model = models.Sequential()
    model.add(layers.Input(shape=(X_train.shape[1],)))

    for units in layers_config:
        model.add(layers.Dense(units, activation='relu',
                               kernel_regularizer=regularizer))
        if dropout_rate:
            model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, validation_split=0.2, verbose=0, callbacks=callbacks)

    y_prob = model.predict(X_test)
    y_pred = (y_prob > 0.5).astype(int)
    results = evaluate_model(y_test, y_pred, y_prob)

    model.save(f"saved_models/nn_instance{instance_id}.keras")
    print(f"Instance {instance_id} Metrics:", results)
    return results

# Instance 2
build_and_train_nn(
    instance_id=2,
    optimizer='rmsprop',
    regularizer=regularizers.l2(0.01),
    dropout_rate=0.3,
    early_stopping=True,
    learning_rate=0.001,
    epochs=25,
    layers_config=[32, 16]
)

# Instance 3
build_and_train_nn(
    instance_id=3,
    optimizer='adam',
    regularizer=regularizers.l1(0.001),
    dropout_rate=0.4,
    early_stopping=False,
    learning_rate=0.0005,
    epochs=30,
    layers_config=[64, 32]
)

# Instance 4
build_and_train_nn(
    instance_id=4,
    optimizer='adamax',
    regularizer=regularizers.l2(0.005),
    dropout_rate=0.2,
    early_stopping=True,
    learning_rate=0.002,
    epochs=20,
    layers_config=[64, 32, 16]
)

# Instance 5 (Optional)
build_and_train_nn(
    instance_id=5,
    optimizer='nadam',
    regularizer=regularizers.l1_l2(l1=0.001, l2=0.001),
    dropout_rate=0.5,
    early_stopping=True,
    learning_rate=0.0008,
    epochs=30,
    layers_config=[128, 64, 32]
)


# Example usage:
# from tensorflow.keras import regularizers
# build_and_train_nn(2, optimizer='rmsprop', regularizer=regularizers.l2(0.01),
#                    dropout_rate=0.3, early_stopping=True, learning_rate=0.001)

# 🤖 5. Classical ML Model (SVM or Logistic Regression)
def classical_model_evaluation():
    clf = SVC(kernel='linear', C=1.0, probability=True)
    clf.fit(X_train, y_train)
    y_prob = clf.predict_proba(X_test)[:, 1]
    y_pred = clf.predict(X_test)
    results = evaluate_model(y_test, y_pred, y_prob)
    print("SVM Metrics:", results)
    import joblib
    joblib.dump(clf, "saved_models/svm_model.pkl")
    return results

# Run it using:
classical_model_evaluation()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Instance 1 Metrics: {'Accuracy': 0.7974683544303798, 'Precision': 0.8235294117647058, 'Recall': 0.5185185185185185, 'F1 Score': 0.6363636363636364, 'Loss': 0.45622851288578703}
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Instance 2 Metrics: {'Accuracy': 0.810126582278481, 'Precision': 0.8, 'Recall': 0.5925925925925926, 'F1 Score': 0.6808510638297872, 'Loss': 0.3243249068962388}
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Instance 3 Metrics: {'Accuracy': 0.8481012658227848, 'Precision': 0.8571428571428571, 'Recall': 0.6666666666666666, 'F1 Score': 0.75, 'Loss': 0.3718102318392076}
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Instance 4 Metrics: {'Accuracy': 0.810126582278481, 'Precision': 0.8333333333333334, 'Recall': 0.5555555555555556, 'F1 Score': 0.6666666666666666, 'Loss': 0.4409757938761647}
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━

{'Accuracy': 0.8987341772151899,
 'Precision': 0.8275862068965517,
 'Recall': 0.8888888888888888,
 'F1 Score': 0.8571428571428571,
 'Loss': 0.18176725915112482}