# __*MULTILAYER PERCEPTRON*__

## __*IMPORT LIBRARIES*__

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold
from skorch import NeuralNetRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [2]:
data = pd.read_csv('cleaned_data.csv')
data = data.dropna(subset=['marathon_time_in_minutes']) # NOTE: dropping NaN in marathon times 
X = data[['marathon_time_in_minutes', 'height', 'age', 'HRmax', 'days', 'weight']]
y = data[['vo2max', 'weeklyKM']]
y.head()

Unnamed: 0,vo2max,weeklyKM
0,59.7,65.0
1,46.7,110.0
2,62.0,90.0
3,61.9,65.0
4,50.2,12.5


In [3]:
# NOTE split is 70-10-20
def splitting(X,y,seed):

    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.125, random_state=seed)

    x_scaler = StandardScaler()
    y_scaler = StandardScaler()

    X_train_scaled = x_scaler.fit_transform(X_train)
    X_val_scaled = x_scaler.transform(X_val)
    X_test_scaled = x_scaler.transform(X_test)

    y_train_scaled = y_scaler.fit_transform(y_train)
    y_val_scaled = y_scaler.transform(y_val)
    y_test_scaled = y_scaler.transform(y_test)

    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)

    X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)

    return  X_train,y_train,X_test,y_test,X_train_scaled,X_val_scaled,X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor, y_scaler,x_scaler

## __*DEFINE MODEL*__

In [None]:
class MLP(nn.Module):
    def __init__(self,  num_units=64, dropout=0.3):
        super().__init__()
        self.model = nn.Sequential(
            # NOTE : This is the input layer  
            nn.Linear(6, num_units),
            nn.ReLU(),
            nn.Dropout(dropout),
            # NOTE : this is the hidden layer
            nn.Linear(num_units, num_units // 2),
            nn.ReLU(),
            # NOTE: this is the output layer
            nn.Linear(num_units // 2, 2)
        )

    def forward(self, x):
        return self.model(x)

## __*FINE TUNING WITH GRID SEARCH*__

Used skorch to exploit gridSearch from scikit learn

In [12]:
def tune_parameters(regressor, X_train, y_train):
    param_grid = {
        'lr': [0.001, 0.01, 0.1],
        'max_epochs': [100, 200],
        'module__num_units': [32, 64, 128, 256],
        'module__dropout': [0.0, 0.1, 0.2, 0.3],  
    }

    X_train = X_train.astype('float32')
    y_train = y_train.astype('float32')
    
    gs = GridSearchCV(regressor, param_grid, cv=5, scoring='neg_mean_squared_error')
    gs.fit(X_train, y_train)

    return gs

In [13]:
# NOTE : Wrap model into this wrapper to adapt to skorch
regressor = NeuralNetRegressor(
    module=MLP,
    max_epochs=100,
    lr=0.01,
    optimizer=torch.optim.Adam,
    criterion=nn.MSELoss,
    iterator_train__shuffle=True,
    verbose=0  
)

X_train,y_train,X_test,y_test,X_train_scaled,X_val_scaled,X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor, y_scaler,x_scaler= splitting(X,y,42)
gs_42 = tune_parameters(regressor,X_train_scaled,y_train_scaled)
print("Best MSE score is :", gs_42.best_score_)
print("Best params:", gs_42.best_params_)

Best MSE score is : -0.5628836154937744
Best params: {'lr': 0.001, 'max_epochs': 100, 'module__dropout': 0.0, 'module__num_units': 32}


In [14]:
# NOTE : Wrap model into this wrapper to adapt to skorch
regressor = NeuralNetRegressor(
    module=MLP,
    max_epochs=200,
    lr=0.01,
    optimizer=torch.optim.Adam,
    criterion=nn.MSELoss,
    iterator_train__shuffle=True,
    verbose=0  
)

X_train,y_train,X_test,y_test,X_train_scaled,X_val_scaled,X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor, y_scaler,x_scaler= splitting(X,y,20)
gs_20 = tune_parameters(regressor,X_train_scaled,y_train_scaled)
print("Best MSE score is :", gs_20.best_score_)
print("Best params:", gs_20.best_params_)

Best MSE score is : -0.5508951663970947
Best params: {'lr': 0.001, 'max_epochs': 100, 'module__dropout': 0.0, 'module__num_units': 32}


In [15]:
# NOTE : Wrap model into this wrapper to adapt to skorch
regressor = NeuralNetRegressor(
    module=MLP,
    max_epochs=100,
    lr=0.01,
    optimizer=torch.optim.Adam,
    criterion=nn.MSELoss,
    iterator_train__shuffle=True,
    verbose=0  
)

X_train,y_train,X_test,y_test,X_train_scaled,X_val_scaled,X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor, y_scaler,x_scaler = splitting(X,y,35)
gs_35 = tune_parameters(regressor,X_train_scaled,y_train_scaled)
print("Best MSE score is :", gs_35.best_score_)
print("Best params:", gs_35.best_params_)

Best MSE score is : -0.5295068681240082
Best params: {'lr': 0.001, 'max_epochs': 200, 'module__dropout': 0.3, 'module__num_units': 32}


## __*TRAIN MODEL*__

In [17]:
def train_MLP(X,y,params,seed):

    X_train,y_train,X_test,y_test,X_train_scaled,X_val_scaled,X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled, X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor, y_scaler,x_scaler= splitting(X,y,seed)

    model = MLP(num_units=params['module__num_units'], dropout=params['module__dropout'])

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=params['lr'])

    epochs = params['max_epochs']
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_tensor)
            val_loss = criterion(val_outputs, y_val_tensor)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {loss.item():.4f} | Val Loss: {val_loss.item():.4f}")

    # Final Evaluation on Test Set
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_outputs_unscaled = y_scaler.inverse_transform(test_outputs.numpy())
        print("\nSample predictions on test set:\n", test_outputs_unscaled)
    
    return test_outputs, y_test_tensor, y_scaler


In [22]:
seed = 42
test_outputs, y_test_tensor, y_scaler = train_MLP(X,y,gs_42.best_params_,seed)

predictions = y_scaler.inverse_transform(test_outputs.numpy())
ground_truth = y_scaler.inverse_transform(y_test_tensor.numpy())

# Compute R² score for each output and overall
r2 = r2_score(ground_truth, predictions, multioutput='raw_values')  # individual R²
r2_mean = r2_score(ground_truth, predictions)  # mean R²

print("\nR² scores per target [vo2max, weight, weeklyKM]:", r2)
print("Mean R² score across all targets:", r2_mean)

Epoch [10/100] | Train Loss: 0.9257 | Val Loss: 0.9050
Epoch [20/100] | Train Loss: 0.8457 | Val Loss: 0.7908
Epoch [30/100] | Train Loss: 0.7699 | Val Loss: 0.6784
Epoch [40/100] | Train Loss: 0.6968 | Val Loss: 0.5738
Epoch [50/100] | Train Loss: 0.6306 | Val Loss: 0.4872
Epoch [60/100] | Train Loss: 0.5720 | Val Loss: 0.4272
Epoch [70/100] | Train Loss: 0.5226 | Val Loss: 0.3928
Epoch [80/100] | Train Loss: 0.4818 | Val Loss: 0.3731
Epoch [90/100] | Train Loss: 0.4503 | Val Loss: 0.3618
Epoch [100/100] | Train Loss: 0.4277 | Val Loss: 0.3563

Sample predictions on test set:
 [[39.414448 26.539988]
 [51.396435 64.40856 ]
 [48.088844 54.28353 ]
 [48.5962   56.643963]
 [51.653545 42.154095]
 [46.34853  51.963234]
 [47.197342 46.63934 ]
 [38.100697 47.77415 ]
 [36.10136  30.192787]
 [53.722473 66.662636]
 [44.562843 47.273937]
 [52.96723  61.541256]
 [45.937767 30.30504 ]
 [45.302246 67.31389 ]
 [48.655457 59.788124]
 [50.00933  44.04806 ]
 [51.0911   48.087215]
 [53.650024 64.02008 ]
 

In [23]:
seed = 20
test_outputs, y_test_tensor, y_scaler = train_MLP(X,y,gs_35.best_params_,seed)

# Unscale predictions and ground truth
predictions = y_scaler.inverse_transform(test_outputs.numpy())
ground_truth = y_scaler.inverse_transform(y_test_tensor.numpy())

# Compute R² score for each output and overall
r2 = r2_score(ground_truth, predictions, multioutput='raw_values')  # individual R²
r2_mean = r2_score(ground_truth, predictions)  # mean R²

print("\nR² scores per target [vo2max, weight, weeklyKM]:", r2)
print("Mean R² score across all targets:", r2_mean)

Epoch [10/200] | Train Loss: 0.9665 | Val Loss: 1.0945
Epoch [20/200] | Train Loss: 0.9322 | Val Loss: 1.0443
Epoch [30/200] | Train Loss: 0.8762 | Val Loss: 0.9843
Epoch [40/200] | Train Loss: 0.8467 | Val Loss: 0.9168
Epoch [50/200] | Train Loss: 0.7578 | Val Loss: 0.8450
Epoch [60/200] | Train Loss: 0.6669 | Val Loss: 0.7686
Epoch [70/200] | Train Loss: 0.6543 | Val Loss: 0.7001
Epoch [80/200] | Train Loss: 0.6300 | Val Loss: 0.6551
Epoch [90/200] | Train Loss: 0.5216 | Val Loss: 0.6347
Epoch [100/200] | Train Loss: 0.5141 | Val Loss: 0.6323
Epoch [110/200] | Train Loss: 0.4870 | Val Loss: 0.6351
Epoch [120/200] | Train Loss: 0.4665 | Val Loss: 0.6411
Epoch [130/200] | Train Loss: 0.4007 | Val Loss: 0.6468
Epoch [140/200] | Train Loss: 0.4708 | Val Loss: 0.6450
Epoch [150/200] | Train Loss: 0.4539 | Val Loss: 0.6418
Epoch [160/200] | Train Loss: 0.4245 | Val Loss: 0.6370
Epoch [170/200] | Train Loss: 0.4349 | Val Loss: 0.6393
Epoch [180/200] | Train Loss: 0.3831 | Val Loss: 0.6463
E

In [24]:
seed = 35
test_outputs, y_test_tensor, y_scaler = train_MLP(X,y,gs_20.best_params_,seed)

# Unscale predictions and ground truth
predictions = y_scaler.inverse_transform(test_outputs.numpy())
ground_truth = y_scaler.inverse_transform(y_test_tensor.numpy())

# Compute R² score for each output and overall
r2 = r2_score(ground_truth, predictions, multioutput='raw_values')  # individual R²
r2_mean = r2_score(ground_truth, predictions)  # mean R²

print("\nR² scores per target [vo2max, weight, weeklyKM]:", r2)
print("Mean R² score across all targets:", r2_mean)

Epoch [10/100] | Train Loss: 0.9807 | Val Loss: 1.0880
Epoch [20/100] | Train Loss: 0.8971 | Val Loss: 1.0074
Epoch [30/100] | Train Loss: 0.8205 | Val Loss: 0.9256
Epoch [40/100] | Train Loss: 0.7462 | Val Loss: 0.8356
Epoch [50/100] | Train Loss: 0.6740 | Val Loss: 0.7473
Epoch [60/100] | Train Loss: 0.6060 | Val Loss: 0.6729
Epoch [70/100] | Train Loss: 0.5457 | Val Loss: 0.6169
Epoch [80/100] | Train Loss: 0.4984 | Val Loss: 0.5916
Epoch [90/100] | Train Loss: 0.4662 | Val Loss: 0.5910
Epoch [100/100] | Train Loss: 0.4444 | Val Loss: 0.5928

Sample predictions on test set:
 [[58.671757 91.529755]
 [48.564846 62.55994 ]
 [56.67211  67.24394 ]
 [54.08416  69.03769 ]
 [47.874016 56.495155]
 [47.74811  63.83114 ]
 [49.403767 53.042156]
 [45.12547  42.8971  ]
 [41.479218 47.345024]
 [52.30327  64.498405]
 [45.974026 31.854631]
 [54.366806 82.77523 ]
 [54.17583  61.47211 ]
 [50.86432  51.550247]
 [45.175945 43.425304]
 [47.066383 48.599823]
 [48.076614 55.29405 ]
 [48.532032 49.164413]
 