In [1]:
import torch
from sklearn.model_selection import train_test_split
import pandas as pd

from model import MLP
from training_defs import create_dataloader, get_loss_optimizer, SpectraDataset

In [2]:
csv_path = "../../data/simpler_data_rwc.csv"
df = pd.read_csv(csv_path)

train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

print(f"Train: {len(train_df)/len(df)*100:.2f}%")
print(f"Validation: {len(val_df)/len(df)*100:.2f}%")
print(f"Test: {len(test_df)/len(df)*100:.2f}%")

Train: 69.99%
Validation: 14.97%
Test: 15.03%


In [3]:
train_loader = create_dataloader(train_df)
val_loader   = create_dataloader(val_df, shuffle=False)
test_loader  = create_dataloader(test_df, shuffle=False)

In [4]:
input_dim = len([c for c in train_df.columns if c.isdigit()])

mlp = MLP(input_dim=input_dim, output_dim=3)

loss_fn, optimizer = get_loss_optimizer(mlp, lr=1e-3)

In [5]:
epochs = 50
print("Training started...")
for epoch in range(epochs):
    print(f"Starting epoch {epoch+1}/{epochs}...")
    mlp.train()
    total_loss = 0.0

    for i, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        preds = mlp(X)
        loss = loss_fn(preds, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        if (i+1) % 10 == 0:
            print(f"Loss after batch {i+1}: {loss.item():.4f}")

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} finished - Avg Training Loss: {avg_loss:.4f}")

    mlp.eval()
    total_val_loss = 0.0
    with torch.no_grad():
        for X_val, y_val in val_loader:
            val_preds = mlp(X_val)
            val_loss = loss_fn(val_preds, y_val)
            total_val_loss += val_loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    print(f"📊 Epoch [{epoch+1}/{epochs}] - Validation Loss: {avg_val_loss:.4f}\n")

print("Training finished!")

Training started...
Starting epoch 1/50...
Loss after batch 10: 0.0588
Loss after batch 20: 0.0771
Loss after batch 30: 0.0456
Epoch 1 finished - Avg Training Loss: 0.0881
📊 Epoch [1/50] - Validation Loss: 0.0524

Starting epoch 2/50...
Loss after batch 10: 0.0486
Loss after batch 20: 0.0361
Loss after batch 30: 0.0346
Epoch 2 finished - Avg Training Loss: 0.0344
📊 Epoch [2/50] - Validation Loss: 0.0381

Starting epoch 3/50...
Loss after batch 10: 0.0241
Loss after batch 20: 0.0222
Loss after batch 30: 0.0301
Epoch 3 finished - Avg Training Loss: 0.0253
📊 Epoch [3/50] - Validation Loss: 0.0310

Starting epoch 4/50...
Loss after batch 10: 0.0270
Loss after batch 20: 0.0232
Loss after batch 30: 0.0174
Epoch 4 finished - Avg Training Loss: 0.0217
📊 Epoch [4/50] - Validation Loss: 0.0279

Starting epoch 5/50...
Loss after batch 10: 0.0215
Loss after batch 20: 0.0144
Loss after batch 30: 0.0174
Epoch 5 finished - Avg Training Loss: 0.0183
📊 Epoch [5/50] - Validation Loss: 0.0239

Starting e

In [6]:
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

In [7]:
mlp.eval()

all_preds, all_targets = [], []

with torch.no_grad():  
    for X, y in test_loader:  
        preds = mlp(X)
        all_preds.append(preds.numpy())
        all_targets.append(y.numpy())

all_preds = np.vstack(all_preds)
all_targets = np.vstack(all_targets)

# compute metrics
r2 = r2_score(all_targets, all_preds, multioutput="uniform_average")
rmse = np.sqrt(mean_squared_error(all_targets, all_preds))

print(f"Test R²: {r2:.4f}")
print(f"Test RMSE: {rmse:.4f}")

Test R²: 0.9016
Test RMSE: 0.0960
