In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import joblib
import pickle
import os

# Load data
df = pd.read_csv(r'C:\Users\lenovo\Desktop\students_academic_project\data\processed_data.csv')

# Features: remove ExamScore and FinalGrade so model learns from behavior only
X = df.drop(columns=['ExamScore', 'FinalGrade'])
y_grade = df['FinalGrade']        # classifier target
y_score = df['ExamScore']         # regressor target

# Train/test split (same split for both models)
X_train, X_test, y_grade_train, y_grade_test = train_test_split(
    X, y_grade, test_size=0.2, random_state=42, stratify=y_grade
)
_, _, y_score_train, y_score_test = train_test_split(
    X, y_score, test_size=0.2, random_state=42, stratify=y_grade
)

# Fit scaler for numeric columns
numeric_cols = ["StudyHours", "Attendance", "AssignmentCompletion", "OnlineCourses", "Age"]
scaler = StandardScaler()
scaler.fit(X_train[numeric_cols])

# Scale training numeric columns
X_train_scaled = X_train.copy()
X_train_scaled[numeric_cols] = scaler.transform(X_train[numeric_cols])

# 1) Train classifier for FinalGrade
clf = RandomForestClassifier(n_estimators=200, max_depth=7, random_state=42)
clf.fit(X_train_scaled, y_grade_train)

# 2) Train regressor for ExamScore
reg = RandomForestRegressor(n_estimators=200, max_depth=7, random_state=42)
reg.fit(X_train_scaled, y_score_train)

# Save models and scaler to app folder
app_dir = r'C:\Users\lenovo\Desktop\students_academic_project\app'
os.makedirs(app_dir, exist_ok=True)
joblib.dump(clf, os.path.join(app_dir, 'trained_classifier.pkl'))
joblib.dump(reg, os.path.join(app_dir, 'trained_regressor.pkl'))
with open(os.path.join(app_dir, 'scaler.pkl'), 'wb') as f:
    pickle.dump(scaler, f)

print("✅ SUCCESS: Classifier, Regressor, and Scaler saved to app folder.")

✅ SUCCESS: Classifier, Regressor, and Scaler saved to app folder.
