In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [20]:
def determine_device():
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda"
    elif torch.backends.mps.is_available():
        device = "mps" # Apple M1

    print(f"We are using device: {device}")
    return device

In [21]:
DEVICE = determine_device()
RANDOM_SEED = 24

We are using device: mps


In [22]:
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

In [23]:
def get_data_item(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    return df

In [24]:
train_data = get_data_item('../data/data_meta_holds_train_norm.csv')
train_data.head()

Unnamed: 0,uuid,angle,quality_average,difficulty_average,ascensionist_count,hold_0,hold_1,hold_2,hold_3,hold_4,...,hold_1692,hold_1693,hold_1694,hold_1695,hold_1696,hold_1697,hold_1698,hold_1699,hold_1700,hold_1701
0,3a6b75ff3c4d4a4e83730daab1776861,0.571429,0.5,23.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,9F2A5E33DD2449028B0487F4C9549DD7,0.857143,1.0,21.5,4.2e-05,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,31e90392f7a14b03a503bc910578958a,0.714286,0.939455,17.0056,0.010664,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,e37a79ea21704ffdba8a4add2968e878,0.642857,0.894735,25.9737,0.001547,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,B6A37D51FE384CFC96BBE6C2C26A6518,0.571429,1.0,18.0,8.4e-05,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
ascentions_per_route = train_data['ascensionist_count'].values
del train_data

# sample_weights = (ascentions_per_route / sum(ascentions_per_route)) * len(ascentions_per_route)
sample_weights = ascentions_per_route * len(ascentions_per_route) # Because we already normalized the data
sample_weights_tensor = torch.Tensor(sample_weights)

sample_weights_tensor.shape, sample_weights_tensor[0]

(torch.Size([93363]), tensor(0.))

In [26]:
class BoulderingDataset(Dataset):
    def __init__(self, *, train_path='../data/data_meta_holds_train_norm.csv', X_transform=None, Y_transform=None):
        super().__init__()
        train_data = get_data_item(train_path)
        Y_column_name = 'difficulty_average'
        ID_column_name = 'uuid'
        self.X = torch.Tensor(train_data.drop([Y_column_name, ID_column_name], axis=1).values)
        self.Y = torch.Tensor(train_data[Y_column_name].values).unsqueeze(1)
        self.IDs = train_data[ID_column_name].values
        self.X_transform = X_transform
        self.Y_transform = Y_transform

    def __getitem__(self, index):
        x = self.X[index]
        y = self.Y[index]

        if self.X_transform:
            x = self.X_transform(x)
        if self.Y_transform:
            y = self.Y_transform(y)

        return x, y, self.IDs[index]

    def __len__(self):
        return len(self.X)

In [27]:
class ANN(nn.Module):
    def __init__(self, input_dim: int, hidden_dims: list, output_dim: int, activation_fn=nn.LeakyReLU):
        super().__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dims[0]))
        layers.append(activation_fn())
        
        for i in range(1, len(hidden_dims)):
            layers.append(nn.Linear(hidden_dims[i-1], hidden_dims[i]))
            layers.append(activation_fn())
        
        self.layers = nn.ModuleList(layers)
        self.fcOut = nn.Linear(hidden_dims[-1], output_dim)

    def forward(self, x) -> torch.Tensor:
        for layer in self.layers:
            x = layer(x)
        return self.fcOut(x)

In [28]:
def compute_regression_metrics(y_pred, y_true, *, metrics=['MSE', 'MAE', 'R2'], mse=None):
    metrics_dict = {}
    
    if 'MSE' in metrics or 'RMSE' in metrics:
        if mse:
            metrics_dict['MSE'] = mse
        else:
            metrics_dict['MSE'] = mean_squared_error(y_pred, y_true)

    if 'RMSE' in metrics:
        metrics_dict['RMSE'] = np.sqrt(metrics_dict['MSE'])
            
    if 'MAE' in metrics:
        metrics_dict['MAE'] = mean_absolute_error(y_pred, y_true)
        
    if 'R2' in metrics:
        metrics_dict['R2'] = r2_score(y_pred, y_true)
        
    return metrics_dict

In [29]:
def train_one_epoch(model, train_loader, optimizer, loss_fn, device=DEVICE):
    model.train()
    
    total_loss = 0.0
    for features, targets, id in train_loader:
        features, targets = features.to(device), targets.to(device)
        
        logits = model(features)
        loss = loss_fn(logits, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

In [30]:
def evaluate(model, valid_loader, *, device=DEVICE, mse=None):
    model.eval()
    
    with torch.no_grad():
        y_pred, y_true = [], []
        for features, targets, id in valid_loader:
            features = features.to(device)
            logits = model(features).cpu().numpy()
            y_pred.extend(logits)
            y_true.extend(targets.cpu().numpy())
    
    metrics = compute_regression_metrics(np.array(y_pred), np.array(y_true), mse=mse)
    return metrics

In [31]:
dataset = BoulderingDataset()

len(dataset)

93363

In [32]:
dataset.Y.shape, dataset.X.shape

(torch.Size([93363, 1]), torch.Size([93363, 1705]))

In [33]:
dataset.X[:2], dataset.Y[:2]

(tensor([[5.7143e-01, 5.0000e-01, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
          0.0000e+00],
         [8.5714e-01, 1.0000e+00, 4.1818e-05,  ..., 0.0000e+00, 0.0000e+00,
          0.0000e+00]]),
 tensor([[23.0000],
         [21.5000]]))

In [34]:
# Source https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets

from torch.utils.data import Subset
from sklearn.model_selection import train_test_split

TEST_SIZE = 0.3
train_indices, test_indices, _, _ = train_test_split(
    range(len(dataset.Y)),
    dataset.Y,
    test_size=TEST_SIZE,
    random_state=RANDOM_SEED,
    shuffle=True
)

PARTIAL_DATASET = False
PARTIAL_PCT = 0.05
if PARTIAL_DATASET:
    train_indices = train_indices[:int(len(train_indices) * PARTIAL_PCT)]
    test_indices = test_indices[:int(len(test_indices) * PARTIAL_PCT)]

train_dataset = Subset(dataset, train_indices)
valid_dataset = Subset(dataset, test_indices)
del dataset
if DEVICE == "cuda":
    torch.cuda.empty_cache()

train_sample_weights_tensor = sample_weights_tensor[train_indices]
sampler = WeightedRandomSampler(train_sample_weights_tensor, len(train_sample_weights_tensor))

In [35]:
input_dim = train_dataset.dataset.X.shape[1] # columns of the flattened bitmap + metadata (angle, quality, ascents)
ouput_dim = 1 # regression

input_dim, ouput_dim

(1705, 1)

In [36]:
import optuna

DEBUG = False
if PARTIAL_DATASET:
    print(f'ATTENTION: Using only {PARTIAL_PCT * 100}% of the dataset!')

print(f'Random seed: {RANDOM_SEED}, train size: {len(train_dataset)}, valid size: {len(valid_dataset)}. Device: {DEVICE}')

results = []

def objective(trial):
    best_valid_mse = float('inf')
    best_model_state = None
    gradient_exploded = False
    metrics = {
        'MSE': { 'train': [], 'valid': [] },
        'MAE': { 'train': [], 'valid': [] },
        'R2': { 'train': [], 'valid': [] }
    }
    MAX_EPOCHS = 200

    BATCH_SIZE = trial.suggest_categorical('BATCH_SIZE', [4, 8, 16])
    LEARNING_RATE = trial.suggest_categorical('LEARNING_RATE', [0.0001, 0.0005, 0.001, 0.005])
    SCHEDULER_FACTOR = trial.suggest_categorical('SCHEDULER_FACTOR', [0.1, 0.3])
    SCHEDULER_PATIENCE = trial.suggest_categorical('SCHEDULER_PATIENCE', [5])
    HIDDEN_DIMS = trial.suggest_categorical('HIDDEN_DIMS', [
        '1250, 1000, 750, 500, 250',
        '1500, 1100, 700, 300',
    ])
    HIDDEN_DIMS = tuple(map(int, HIDDEN_DIMS.split(', ')))
    ACTIVATION_FN = trial.suggest_categorical('ACTIVATION_FN', ['nn.PReLU', 'nn.ReLU'])
    ACTIVATION_FN = eval(ACTIVATION_FN)

    model = ANN(input_dim, HIDDEN_DIMS, ouput_dim, activation_fn=ACTIVATION_FN).to(DEVICE)

    DEBUG and print(f"Model layers: {[input_dim, *HIDDEN_DIMS, ouput_dim]}")
    DEBUG and print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')
    DEBUG and print(f'Trial {trial.number} params: {trial.params}')

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=SCHEDULER_PATIENCE, factor=SCHEDULER_FACTOR, verbose=True if DEBUG else False)
    loss_fn = nn.MSELoss()
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler)
    valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)

    for epoch in range(MAX_EPOCHS):
        gradient_exploded = False
        train_loss = train_one_epoch(model, train_loader, optimizer, loss_fn, DEVICE)

        train_metrics = evaluate(model, train_loader, mse=train_loss)
        valid_metrics = evaluate(model, valid_loader)

        if epoch > 0 and train_loss > 1000 * metrics['MSE']['train'][-1]:
            print(f'Gradient explosion at epoch {epoch + 1}. Exiting epoch. train_loss: {train_loss}')
            gradient_exploded = True
            break

        if valid_metrics['MSE'] < best_valid_mse:
            DEBUG and print(f'New best weights at epoch {epoch + 1}.')
            best_valid_mse = valid_metrics['MSE']
            best_model_state = model.state_dict()

        scheduler.step(valid_metrics['MSE'])
        
        DEBUG and print(f'Epoch {epoch + 1}/{MAX_EPOCHS}, lr: {optimizer.param_groups[0]["lr"]:.0e}; train MAE {train_metrics["MAE"]:.2f}, R2 {train_metrics["R2"]:.2f}, MSE {train_metrics["MSE"]:.2f}; valid MAE {valid_metrics["MAE"]:.2f}, R2 {valid_metrics["R2"]:.2f}, MSE {valid_metrics["MSE"]:.2f}')
        for metric in train_metrics.keys():
            metrics[metric]['train'].append(train_metrics[metric])
            metrics[metric]['valid'].append(valid_metrics[metric])

        # Early stopping if MSE of the last 3 epochs is higher than the mean of the 15 previous epochs
        if epoch > 17 and np.mean(metrics['MSE']['valid'][-3:]) >= np.mean(metrics['MSE']['valid'][-18:-3]):
            print(f'Early stopping at epoch {epoch + 1}')
            break

        if epoch == MAX_EPOCHS - 1:
            print(f'Wow, max epochs reached ({MAX_EPOCHS}), stopping.')

    if trial.number == 0 and not gradient_exploded:
        print(f'Saving first weights with MSE {best_valid_mse:.2f}')
        torch.save(best_model_state, 'best_model_state.pth')
    elif best_valid_mse < study.best_value:
        print(f'New overall best MSE in trial {trial.number}: {best_valid_mse:.2f}, saving weights.')
        torch.save(best_model_state, f'best_model_state-trial-{trial.number}.pth')

    valid_mae_mean = np.mean(metrics['MAE']['valid'])

    results.append({
        'BATCH_SIZE': BATCH_SIZE,
        'LEARNING_RATE': LEARNING_RATE,
        'SCHEDULER_FACTOR': SCHEDULER_FACTOR,
        'SCHEDULER_PATIENCE': SCHEDULER_PATIENCE,
        'HIDDEN_DIMS': HIDDEN_DIMS,
        'ACTIVATION_FN': ACTIVATION_FN.__name__,
        'MSE_train': np.mean(metrics['MSE']['train']),
        'MSE_valid': np.mean(metrics['MSE']['valid']),
        'MAE_train': np.mean(metrics['MAE']['train']),
        'MAE_valid': valid_mae_mean,
        'R2_train': np.mean(metrics['R2']['train']),
        'R2_valid': np.mean(metrics['R2']['valid']),
    })

    return valid_mae_mean

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print(f'Finished study with {len(study.trials)} trials. Best MAE valid: {study.best_value:.2f}, best params: {study.best_params}')

df = pd.DataFrame(results)
df.to_csv('results.csv', index=False)
print('Saved results to results.csv')

[I 2023-10-25 00:02:36,996] A new study created in memory with name: no-name-ba2e379a-5fcf-40db-b8ad-807be825b83f


Random seed: 24, train size: 65354, valid size: 28009. Device: mps


[I 2023-10-25 01:09:41,987] Trial 0 finished with value: 1.5725665092468262 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 76
Saving first weights with MSE 4.09


[I 2023-10-25 04:31:45,811] Trial 1 finished with value: 1.6831867694854736 and parameters: {'BATCH_SIZE': 4, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 25


[I 2023-10-25 08:08:32,055] Trial 2 finished with value: 1.6446332931518555 and parameters: {'BATCH_SIZE': 8, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 44. Stopping training. train_loss: 2.7444523677833903


[I 2023-10-25 10:10:18,022] Trial 3 finished with value: 1.7151546478271484 and parameters: {'BATCH_SIZE': 4, 'LEARNING_RATE': 0.005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 30


[I 2023-10-25 10:59:56,447] Trial 4 finished with value: 1.6054675579071045 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 60


[I 2023-10-25 11:21:29,798] Trial 5 finished with value: 1.8552310466766357 and parameters: {'BATCH_SIZE': 8, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.3, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 11. Stopping training. train_loss: 30.507381767917177


[I 2023-10-25 14:09:42,593] Trial 6 finished with value: 1.5989214181900024 and parameters: {'BATCH_SIZE': 4, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 62


[I 2023-10-25 15:48:00,582] Trial 7 finished with value: 1.585307240486145 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 123


[I 2023-10-25 17:25:15,911] Trial 8 finished with value: 1.6431796550750732 and parameters: {'BATCH_SIZE': 4, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 31


[I 2023-10-25 18:13:53,103] Trial 9 finished with value: 1.6066139936447144 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 56


[I 2023-10-25 18:20:38,659] Trial 10 finished with value: 1.8747860193252563 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.001, 'SCHEDULER_FACTOR': 0.3, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 8. Stopping training. train_loss: 9.369982703974884


[I 2023-10-25 18:23:14,420] Trial 11 finished with value: 2.117952823638916 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.005, 'SCHEDULER_FACTOR': 0.3, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 3. Stopping training. train_loss: 3905644127.771279


[I 2023-10-25 18:57:43,372] Trial 12 finished with value: 1.6490471363067627 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 40


[I 2023-10-25 19:46:18,275] Trial 13 finished with value: 1.6049894094467163 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 56


[I 2023-10-25 20:21:24,328] Trial 14 finished with value: 1.634039282798767 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.3, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 46


[I 2023-10-25 21:07:19,560] Trial 15 finished with value: 1.5965079069137573 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 57


[I 2023-10-25 21:12:10,836] Trial 16 finished with value: 3.61995792388916 and parameters: {'BATCH_SIZE': 8, 'LEARNING_RATE': 0.005, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 3. Stopping training. train_loss: 3338.094598407718


[I 2023-10-25 21:49:49,968] Trial 17 finished with value: 1.6297515630722046 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 48


[I 2023-10-25 22:04:21,934] Trial 18 finished with value: 1.6938062906265259 and parameters: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0005, 'SCHEDULER_FACTOR': 0.3, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.PReLU'}. Best is trial 0 with value: 1.5725665092468262.


Gradient explosion at epoch 15. Stopping training. train_loss: 17.925700521311807


[I 2023-10-25 23:08:25,241] Trial 19 finished with value: 1.6315791606903076 and parameters: {'BATCH_SIZE': 8, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1500, 1100, 700, 300', 'ACTIVATION_FN': 'nn.ReLU'}. Best is trial 0 with value: 1.5725665092468262.


Early stopping at epoch 44
Finished study with 20 trials. Best MAE valid: 1.57, best params: {'BATCH_SIZE': 16, 'LEARNING_RATE': 0.0001, 'SCHEDULER_FACTOR': 0.1, 'SCHEDULER_PATIENCE': 5, 'HIDDEN_DIMS': '1250, 1000, 750, 500, 250', 'ACTIVATION_FN': 'nn.PReLU'}
Saved results to results.csv


In [37]:
def plot_metric_per_epoch(metric: str, *, train_metrics: list, valid_metrics: list):
    plt.figure(figsize=(10, 6))
    plt.plot(train_metrics, label=f'train {metric}')
    plt.plot(valid_metrics, label=f'valid {metric}')
    plt.xlabel('Epoch')
    plt.ylabel(metric)
    plt.legend()
    plt.show()

In [None]:
plot_metric_per_epoch('MSE', train_metrics=study.best_params['MSE_train'], valid_metrics=study.best_params['MSE_valid'])
plot_metric_per_epoch('MAE', train_metrics=study.best_params['MAE_train'], valid_metrics=study.best_params['MAE_valid'])
plot_metric_per_epoch('R2', train_metrics=study.best_params['R2_train'], valid_metrics=study.best_params['R2_valid'])