In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import pickle

In [9]:
# Step 1: Load the data
data = pd.read_csv('StudentsPerformance.csv')
data 

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95
996,male,group C,high school,free/reduced,none,62,55,55
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77


In [10]:
# Step 2: Preprocess the data
label_encoder = LabelEncoder()

In [11]:
# Encode categorical columns
for col in ["gender", "race/ethnicity", "parental level of education", "lunch", "test preparation course"]:
    data[col] = label_encoder.fit_transform(data[col])

In [12]:

# Calculate the average score
data["average_score"] = data[["math score", "reading score", "writing score"]].mean(axis=1)

In [13]:
# Step 3: Split the data into training and testing sets
X = data.drop(["math score", "reading score", "writing score", "average_score"], axis=1)
y = data["average_score"]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Step 4: Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [16]:
# Step 5: Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 220.49899016576083


In [17]:

# Step 6: Save the model using Pickle
with open("student_performance_model.pkl", "wb") as file:
    pickle.dump(model, file)