In [2]:
# 📦 Import Required Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import joblib

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 📥 Load Dataset from Colab Files (Already uploaded!)
df = pd.read_csv('/content/StudentPerformanceFactors.csv')

# 🔠 Encode Categorical Columns
label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 🎯 Define Features and Target
X = df.drop(columns=['Exam_Score'])
y = df['Exam_Score']

# ✂️ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🌲 Train Compact RandomForest Regressor
model = RandomForestRegressor(
    n_estimators=50,        # ⬇ Reduced from 100 → 50
    max_depth=10,           # ⬇ Added depth limit
    min_samples_leaf=4,     # ⬆ Makes model lighter
    random_state=42
)
model.fit(X_train, y_train)

# 🔍 Predict on Test Data
y_pred = model.predict(X_test)

# 📏 Evaluate Performance
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

# 💾 Save Model and Encoders (Compact Version)
joblib.dump(model, 'model.pkl', compress=3)  # 📦 Compression applied!
joblib.dump(label_encoders, 'label_encoders.pkl')

print("✅ Smaller model saved as 'model.pkl'")


MAE: 1.15
RMSE: 2.18
R² Score: 0.66
✅ Smaller model saved as 'model.pkl'


In [15]:
# 🔁 Reinstall compatible versions
!pip install -q --upgrade pip
!pip uninstall -y numpy scikit-learn joblib xgboost
!pip install numpy==1.23.5 scikit-learn==1.2.2 joblib==1.2.0 xgboost==1.7.6

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m0.9/1.8 MB[0m [31m26.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hFound existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
Found existing installation: scikit-learn 1.6.1
Uninstalling scikit-learn-1.6.1:
  Successfully uninstalled scikit-learn-1.6.1
Found existing installation: joblib 1.5.1
Uninstalling joblib-1.5.1:
  Successfully uninstalled joblib-1.5.1
Found existing installation: xgboost 3.0.2
Uninstalling xgboost-3.0.2:
  Successfully uninstalled xgboost-3.0.2
Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting scikit-learn==1.2.2
  Downloading sci