In [1]:
#---(Regression Version) ---
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
DATA_PATH = os.path.join(BASE_DIR, 'Results', 'Output', 'cleaned_student_data.csv')
OUT_DIR = os.path.join(BASE_DIR, 'Results', 'Output', 'ModelResults')
os.makedirs(OUT_DIR, exist_ok=True)

# Load data
data = pd.read_csv("/content/cleaned_student_data.csv")

# Split features and target
TARGET = 'G3'
X = data.drop(columns=[TARGET])
y = data[TARGET]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale for models that need normalization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [4]:
from sklearn.neural_network import MLPRegressor

# ------------------ PREP ------------------
# Make sure OUT_DIR exists
OUT_DIR = 'Results_Output_ModelResults'
os.makedirs(OUT_DIR, exist_ok=True)

# Scale input features
X_scaled = StandardScaler().fit_transform(X)

# ------------------ MODEL TRAINING ------------------
# Define parameter grid
param_grid = {
    'hidden_layer_sizes': [(64,), (128, 64)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.01]
}

# Set up the MLPRegressor with improved settings
mlp = MLPRegressor(max_iter=1000, early_stopping=True, random_state=42)

# GridSearch to find best parameters
grid = GridSearchCV(mlp, param_grid, cv=5, scoring='r2')
grid.fit(X_scaled, y)

# Best model
best_model = grid.best_estimator_

# Predict on the same scaled data (or use test split if needed)
y_pred = best_model.predict(X_scaled)

# ------------------ METRICS ------------------
mae = mean_absolute_error(y, y_pred)
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y, y_pred)

# Print results
print("Best Params:", grid.best_params_)
print(f"MAE: {mae:.3f}, RMSE: {rmse:.3f}, R²: {r2:.3f}")

# ------------------ SAVE RESULTS ------------------
# Save evaluation metrics to CSV
results_df = pd.DataFrame([{
    'Model': 'MLP',
    'MAE': mae,
    'MSE': mse,
    'RMSE': rmse,
    'R2': r2
}])
results_csv_path = os.path.join(OUT_DIR, 'mlp_results.csv')
results_df.to_csv(results_csv_path, index=False)

# Save the model
model_file_path = os.path.join(OUT_DIR, 'mlp_best.joblib')
joblib.dump(best_model, model_file_path)

Best Params: {'activation': 'relu', 'hidden_layer_sizes': (128, 64), 'learning_rate_init': 0.01}
MAE: 3.259, RMSE: 4.384, R²: 0.094


['Results_Output_ModelResults/mlp_best.joblib']