In [None]:

import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

df = pd.read_csv("student.csv")

# Basic cleaning
df.columns = df.columns.str.strip()
df = df.dropna()   


label_encoders = {}
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le


target_col = "exam_score"
feature_names = [c for c in df.columns if c != target_col]
X = df[feature_names]
y = df[target_col]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

rf_model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)


lr_preds = lr_model.predict(X_test)
rf_preds = rf_model.predict(X_test)

print("LinearRegression R2:", r2_score(y_test, lr_preds))
print("RandomForest R2:", r2_score(y_test, rf_preds))


reg_model = rf_model   
# Save artifacts
with open("reg_model.pkl", "wb") as f:
    pickle.dump(reg_model, f)

with open("feature_names.pkl", "wb") as f:
    pickle.dump(feature_names, f)

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

print("Saved reg_model.pkl, feature_names.pkl, label_encoders.pkl")


LinearRegression R2: 0.8881336475406082
RandomForest R2: 0.8737930769410011
Saved reg_model.pkl, feature_names.pkl, label_encoders.pkl
