In [1]:
#---(Regression Version) ---
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
DATA_PATH = os.path.join(BASE_DIR, 'Results', 'Output', 'cleaned_student_data.csv')
OUT_DIR = os.path.join(BASE_DIR, 'Results', 'Output', 'ModelResults')
os.makedirs(OUT_DIR, exist_ok=True)

# Load data
data = pd.read_csv("/content/cleaned_student_data.csv")

# Split features and target
TARGET = 'G3'
X = data.drop(columns=[TARGET])
y = data[TARGET]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale for models that need normalization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [5]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from google.colab import files

OUT_DIR = 'Results_Output_ModelResults'
os.makedirs(OUT_DIR, exist_ok=True)

X_scaled = StandardScaler().fit_transform(X)
results = []

for k in range(2, 7):
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = km.fit_predict(X_scaled)
    sil = silhouette_score(X_scaled, labels)
    results.append({'k': k, 'Silhouette': sil, 'Inertia': km.inertia_})

# Save the results CSV
results_df = pd.DataFrame(results)
results_csv_path = os.path.join(OUT_DIR, 'kmeans_results.csv')
results_df.to_csv(results_csv_path, index=False)

print(results_df)

# Save  best KMeans model (example: best k with highest silhouette score)
best_k = results_df.loc[results_df['Silhouette'].idxmax(), 'k']
best_km = KMeans(n_clusters=int(best_k), random_state=42, n_init=10)
best_labels = best_km.fit_predict(X_scaled)

model_file_path = os.path.join(OUT_DIR, f'kmeans_best_k{int(best_k)}.joblib')
joblib.dump(best_km, model_file_path)



   k  Silhouette      Inertia
0  2    0.117390  6755.838497
1  3    0.103399  6374.575126
2  4    0.125891  5996.559790
3  5    0.069805  5848.530216
4  6    0.067448  5616.576001


['Results_Output_ModelResults/kmeans_best_k4.joblib']