# EDA & Model — Sundar Edition

This notebook demonstrates regression training and also notes how to use the app for classification (Pass/At-Risk) and SHAP explainability if installed.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

df = pd.read_csv('../data/student_performance_sample.csv')
df.head()

In [None]:
df.describe()

In [None]:
FEATURES = ["Hours_Studied","Attendance","Past_Score","Sleep_Hours","Social_Media_Hours"]
TARGET = "Final_Score"

X = df[FEATURES]
y = df[TARGET]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2, mae, rmse

In [None]:
imp = model.feature_importances_
for f, v in sorted(zip(FEATURES, imp), key=lambda x: -x[1]):
    print(f"{f:>20s}: {v:.3f}")