# __*MULTILAYER PERCEPTRON*__

## __*IMPORT LIBRARIES*__

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score

In [9]:
data = pd.read_csv('cleaned_data.csv')
data = data.dropna(subset=['marathon_time_in_minutes']) # NOTE: dropping NaN in marathon times 
X = data[['marathon_time_in_minutes', 'height', 'age', 'HRmax', 'days', 'FFM']]
y = data[['vo2max', 'weight', 'weeklyKM']]
y.head()

Unnamed: 0,vo2max,weight,weeklyKM
0,59.7,70.7,65.0
1,46.7,71.7,110.0
2,62.0,66.2,90.0
3,61.9,67.8,65.0
4,50.2,68.3,12.5


In [10]:
# NOTE split is 70-10-20
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.125, random_state=42)

In [None]:
x_scaler = StandardScaler()
y_scaler = StandardScaler()

X_train_scaled = x_scaler.fit_transform(X_train)
X_val_scaled = x_scaler.transform(X_val)
X_test_scaled = x_scaler.transform(X_test)

y_train_scaled = y_scaler.fit_transform(y_train)
y_val_scaled = y_scaler.transform(y_val)
y_test_scaled = y_scaler.transform(y_test)

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)

# Define model
class MultiOutputRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(6, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 3)
        )

    def forward(self, x):
        return self.model(x)

model = MultiOutputRegressor()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training with validation
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Validation loss
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {loss.item():.4f} | Val Loss: {val_loss.item():.4f}")

# Final Evaluation on Test Set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_outputs_unscaled = y_scaler.inverse_transform(test_outputs.numpy())
    print("\nSample predictions on test set:\n", test_outputs_unscaled[:5])


Epoch [10/100] | Train Loss: 0.8491 | Val Loss: 0.9256
Epoch [20/100] | Train Loss: 0.7028 | Val Loss: 0.7702
Epoch [30/100] | Train Loss: 0.5562 | Val Loss: 0.5885
Epoch [40/100] | Train Loss: 0.4442 | Val Loss: 0.4400
Epoch [50/100] | Train Loss: 0.3832 | Val Loss: 0.3621
Epoch [60/100] | Train Loss: 0.3479 | Val Loss: 0.3335
Epoch [70/100] | Train Loss: 0.3227 | Val Loss: 0.3275
Epoch [80/100] | Train Loss: 0.3042 | Val Loss: 0.3201
Epoch [90/100] | Train Loss: 0.2906 | Val Loss: 0.3112
Epoch [100/100] | Train Loss: 0.2800 | Val Loss: 0.3056

Sample predictions on test set:
 [[42.51277  89.75764  28.502151]
 [50.07931  79.348625 66.083885]
 [47.99734  67.24286  63.118916]
 [46.98724  63.938183 57.438084]
 [52.227726 73.03701  30.640934]]


In [15]:

# Unscale predictions and ground truth
predictions = y_scaler.inverse_transform(test_outputs.numpy())
ground_truth = y_scaler.inverse_transform(y_test_tensor.numpy())

# Compute R² score for each output and overall
r2 = r2_score(ground_truth, predictions, multioutput='raw_values')  # individual R²
r2_mean = r2_score(ground_truth, predictions, multioutput='uniform_average')  # mean R²

print("\nR² scores per target [vo2max, weight, weeklyKM]:", r2)
print("Mean R² score across all targets:", r2_mean)


R² scores per target [vo2max, weight, weeklyKM]: [0.46953768 0.8191459  0.46256977]
Mean R² score across all targets: 0.5837511420249939
