In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/krishnaik06/mlproject/main/notebook/data/stud.csv")
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

cat_features = [feature for feature in X.columns if X[feature].dtype == 'O']
num_features = [feature for feature in X.columns if feature not in cat_features]

In [3]:
cat_features,num_features

(['gender',
  'race_ethnicity',
  'parental_level_of_education',
  'lunch',
  'test_preparation_course'],
 ['math_score', 'reading_score'])

In [4]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

num_encoder = StandardScaler()
one_hot_encoder = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ('OneHotEncoder', one_hot_encoder, cat_features),
        ('StandardScaler', num_encoder, num_features)
    ]
)

In [5]:
X = preprocessor.fit_transform(X)

In [6]:
X

array([[ 1.        ,  0.        ,  0.        , ...,  1.        ,
         0.39002351,  0.19399858],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.19207553,  1.42747598],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
         1.57771141,  1.77010859],
       ...,
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
        -0.46775108,  0.12547206],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.12609287,  0.60515772],
       [ 1.        ,  0.        ,  0.        , ...,  1.        ,
         0.71993682,  1.15336989]])

In [7]:
num_columns = X.shape[1]

In [8]:
X.shape

(1000, 19)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
class StudentDataset(Dataset):
    def __init__(self, X, y):
        self.data = X
        self.target_feature = y
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = self.data[idx].astype(np.float32)
        y = self.target_feature.iloc[idx].astype(np.float32)
        return torch.tensor(X), torch.tensor(y)
    
train_dataset = StudentDataset(X_train, y_train)
test_dataset = StudentDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=32)
test_dataloader = DataLoader(test_dataset, batch_size=32)

In [37]:
class SingleLayerNN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        
        self.fc1 = nn.Sequential(nn.Linear(input_size, 128),
                                 nn.ReLU(),
                                 nn.Linear(128, 1))
        
    
    def forward(self, x: torch.tensor) -> torch.tensor:
        return self.fc1(x)

In [38]:
device = 'mps'
model = SingleLayerNN(input_size=num_columns).to(device)

In [39]:
loss_function = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4)

In [40]:
def evaluate(predictions, targets):
    rmse = np.sqrt(mean_squared_error(targets, predictions))
    r2 = r2_score(targets, predictions)
    return rmse, r2

In [41]:
epochs = 1000

for epoch in range(epochs):
    train_loss = 0.0
    all_targets_train = []
    all_predictions_train = []
    model.train()
    for _, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X).reshape(-1)
        
        all_targets_train.extend(y.detach().cpu().numpy())
        all_predictions_train.extend(y_pred.detach().cpu().numpy())
        
        loss = loss_function(y_pred, y)
        train_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 20 == 0:
        train_loss /= len(train_dataloader)
        rmse, r2 = evaluate(all_predictions_train, all_targets_train)
        print(f"After epoch {epoch+1}, train loss is {train_loss}, RMSE is {rmse}, and R2 is {r2}")        
    
    test_loss = 0.0
    all_targets_test = []
    all_predictions_test = []
    model.eval()
    with torch.inference_mode():
        for _, (X, y) in enumerate(test_dataloader):
            X, y = X.to(device), y.to(device)
            y_pred = model(X).reshape(-1)
            
            all_targets_test.extend(y.cpu().numpy())
            all_predictions_test.extend(y_pred.cpu().numpy())
            
            loss = loss_function(y_pred, y)
            test_loss += loss.item()
    
    if (epoch + 1) % 20 == 0:
        test_loss /= len(test_dataloader)
        rmse, r2 = evaluate(all_predictions_test, all_targets_test)
        print(f"After epoch {epoch+1}, test loss is {test_loss}, RMSE is {rmse}, and R2 is {r2}")
        print("------------------------------------")

After epoch 20, train loss is 3633.9680859375, RMSE is 60.28240203857422, and R2 is -14.988730838359128
After epoch 20, test loss is 3366.7598702566966, RMSE is 58.19843292236328, and R2 is -13.053189984404723
------------------------------------
After epoch 40, train loss is 942.3897680664063, RMSE is 30.698368072509766, and R2 is -3.1463259050342938
After epoch 40, test loss is 850.4998343331473, RMSE is 29.346771240234375, and R2 is -2.5733293895326086
------------------------------------
After epoch 60, train loss is 57.30001525878906, RMSE is 7.569677352905273, and R2 is 0.74789142854219
After epoch 60, test loss is 56.03723308018276, RMSE is 7.746601581573486, and R2 is 0.7510140942795168
------------------------------------
After epoch 80, train loss is 19.042268142700195, RMSE is 4.363744735717773, and R2 is 0.9162178381448822
After epoch 80, test loss is 23.298622948782786, RMSE is 5.015213966369629, and R2 is 0.8956406652171527
------------------------------------
After epoch