In [147]:
!unzip -q "/content/archive (1).zip" -d "/content/"

replace /content/StudentsPerformance.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [148]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import torch
from torch.utils.data import Dataset, DataLoader

In [149]:
df =pd.read_csv('StudentsPerformance.csv')
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [150]:
df.columns

Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score'],
      dtype='object')

In [151]:
#target
y=df['math score'].values

In [152]:
#input features
X = df.drop(columns=[
    'math score', 'reading score', 'writing score',
])


In [153]:
#preprocessing
categorical=X.select_dtypes(include='object').columns.tolist()
numeric=[]
preprocessor = ColumnTransformer(transformers=[
    ("cat", OneHotEncoder(drop='first'), categorical)
], remainder="passthrough")


In [154]:
X = preprocessor.fit_transform(X)

# Scale target
y = df["math score"].values
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Creating a  PyTorch Dataset

In [155]:
class StudentDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.toarray(), dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = StudentDataset(X_train, y_train)
test_ds = StudentDataset(X_test, y_test)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)


Build Model with ModuleList

In [156]:
import torch.nn as nn
class DynamicRegressionNet(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super().__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layers = nn.ModuleList([
            nn.Linear(hidden_dim, hidden_dim) for _ in range(num_layers)
        ])
        self.output_layer = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        for layer in self.hidden_layers:
            x = torch.relu(layer(x))
        return self.output_layer(x)

model = DynamicRegressionNet(X.shape[1], hidden_dim=64, num_layers=3).to("cuda" if torch.cuda.is_available() else "cpu")


Train the model

In [157]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Train Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Train Loss: 4586.5792
Epoch 2, Train Loss: 3703.3498
Epoch 3, Train Loss: 715.2897
Epoch 4, Train Loss: 310.5508
Epoch 5, Train Loss: 250.8283
Epoch 6, Train Loss: 231.4684
Epoch 7, Train Loss: 214.3015
Epoch 8, Train Loss: 204.1502
Epoch 9, Train Loss: 195.8182
Epoch 10, Train Loss: 193.5124


Evaluate the model

In [158]:
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        pred = model(xb).cpu().numpy()
        preds.extend(pred.flatten())
        actuals.extend(yb.numpy().flatten())

from sklearn.metrics import mean_squared_error
mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")


Test MSE: 212.0762


In [159]:
#Gradio App

In [160]:
pip install gradio




In [161]:
import joblib

joblib.dump(preprocessor, "preprocessor.pkl")
joblib.dump(scaler_y, "scaler_y.pkl")


['scaler_y.pkl']

In [162]:
preprocessor = joblib.load("preprocessor.pkl")
scaler_y = joblib.load("scaler_y.pkl")


In [163]:
def predict_math_score(gender, race, parent_edu, lunch, test_prep):
    input_dict = {
        "gender": [gender],
        "race/ethnicity": [race],
        "parental level of education": [parent_edu],
        "lunch": [lunch],
        "test preparation course": [test_prep]
    }
    df_input = pd.DataFrame(input_dict)
    x = preprocessor.transform(df_input)
    x_tensor = torch.tensor(x.toarray(), dtype=torch.float32).to(device)

    with torch.no_grad():
        pred_scaled = model(x_tensor).cpu().numpy()[0][0]
        pred_unscaled = scaler_y.inverse_transform([[pred_scaled]])[0][0]

    return round(pred_unscaled, 2)

interface = gr.Interface(
    fn=predict_math_score,
    inputs=[
        gr.Radio(["female", "male"], label="Gender"),
        gr.Dropdown(["group A", "group B", "group C", "group D", "group E"], label="Race/Ethnicity"),
        gr.Dropdown([
            "some high school", "high school", "some college",
            "associate's degree", "bachelor's degree", "master's degree"
        ], label="Parental Level of Education"),
        gr.Radio(["standard", "free/reduced"], label="Lunch Type"),
        gr.Radio(["none", "completed"], label="Test Preparation Course")
    ],
    outputs=gr.Number(label="Predicted Math Score"),
    title="📚 Student Math Score Predictor",
    description="Enter student information to predict math score using a trained PyTorch model."
)

interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bdbf07c153cd04858d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [164]:
print(f"Test MSE: {mean_squared_error(actuals, preds):.4f}")


Test MSE: 212.0762


In [165]:
print(df['gender'].value_counts())
print(df['race/ethnicity'].value_counts())
print(df['lunch'].value_counts())


gender
female    518
male      482
Name: count, dtype: int64
race/ethnicity
group C    319
group D    262
group B    190
group E    140
group A     89
Name: count, dtype: int64
lunch
standard        645
free/reduced    355
Name: count, dtype: int64


In [166]:
df_eval = df.copy()

# Prepare inputs
X_all = preprocessor.transform(df.drop(columns=['math score', 'reading score', 'writing score']))
X_tensor = torch.tensor(X_all.toarray(), dtype=torch.float32).to(device)

# Predict all math scores
model.eval()
with torch.no_grad():
    preds = model(X_tensor).cpu().numpy().flatten()

df_eval['predicted_math'] = preds
df_eval['true_math'] = df['math score']
df_eval['error'] = df_eval['predicted_math'] - df_eval['true_math']




In [167]:
# Group-wise analysis
print("\n Average Predicted Math Score by Gender:")
print(df_eval.groupby('gender')['predicted_math'].mean())

print("\n Average Error by Gender:")
print(df_eval.groupby('gender')['error'].mean())

print("\n Average Predicted Math Score by Race:")
print(df_eval.groupby('race/ethnicity')['predicted_math'].mean())

print("\n Average Error by Race:")
print(df_eval.groupby('race/ethnicity')['error'].mean())

print("\n Average Predicted Math Score by Lunch:")
print(df_eval.groupby('lunch')['predicted_math'].mean())

print("\n Average Error by Lunch:")
print(df_eval.groupby('lunch')['error'].mean())


 Average Predicted Math Score by Gender:
gender
female    61.289268
male      67.829765
Name: predicted_math, dtype: float32

 Average Error by Gender:
gender
female   -2.343935
male     -0.898454
Name: error, dtype: float64

 Average Predicted Math Score by Race:
race/ethnicity
group A    53.561264
group B    62.591885
group C    63.256123
group D    65.826202
group E    73.980064
Name: predicted_math, dtype: float32

 Average Error by Race:
race/ethnicity
group A   -8.067948
group B   -0.860748
group C   -1.207826
group D   -1.536396
group E    0.158633
Name: error, dtype: float64

 Average Predicted Math Score by Lunch:
lunch
free/reduced    56.313095
standard        68.915718
Name: predicted_math, dtype: float32

 Average Error by Lunch:
lunch
free/reduced   -2.608031
standard       -1.118391
Name: error, dtype: float64
