In [1]:
import os
import numpy as np
import pandas as pd
import xarray as xr

from sklearn.metrics import *
from copy import deepcopy
import matplotlib.pyplot as plt
import math


from tqdm.auto import tqdm
import seaborn as sns

from src.helpers import *
from src.visualize import *

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.utils.data as data
from torch.utils.data import Dataset
import pickle

import optuna

from hypnettorch.hnets import HMLP
from hypnettorch.mnets import SimpleRNN


device = torch.device('cpu') #suposed to be cuda
dtype = torch.float32

import sys
sys.path.append("c:\\Users\\nerea\\OneDrive\\Documentos\\EPFL_MASTER\\PDM\\Project\\PyalData")
# to change for the actual path where PyalData has been cloned

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = './Data/Processed_Data/Tidy_Sansa_13_04.pkl'

with open(data_path, 'rb') as file:
    df = pickle.load(file)

In [3]:
baseline_df = df.loc[df.type == 'BASELINE'].reset_index()
stim_df = df.loc[df.type == 'TONIC'].reset_index()

In [4]:
X_train_b, y_train_b, X_val_b, y_val_b, X_test_b, y_test_b, info_train_b, info_val_b, info_test_b = train_test_split(baseline_df, train_variable = 'both_rates', 
                                                                                                   target_variable = 'target_pos', num_folds = 5)

Test trials  4
Val trials 3


In [5]:
X_train_s, y_train_s, X_val_s, y_val_s, X_test_s, y_test_s, info_train_s, info_val_s, info_test_s = train_test_split(stim_df, train_variable = 'both_rates', 
                                                                                                   target_variable = 'target_pos', num_folds = 5)

Test trials  8
Val trials 7


In [6]:
# Test one of the folds first
fold_num = 'fold0'
fold = 0

print('We are testing the optimization method on fold ', fold)

def input_mats(x,y, seq_length = 75):
    x = x[fold_num]
    y = y[fold_num]
    x = x.reshape(x.shape[0] // seq_length, seq_length, x.shape[1])  
    y = y.reshape(y.shape[0] // seq_length, seq_length, y.shape[1])  
    return x,y

x_train_base, y_train_base = input_mats(X_train_b, y_train_b)
x_train_stim, y_train_stim = input_mats(X_train_s, y_train_s)

x_val_base, y_val_base = input_mats(X_val_b, y_val_b)
x_val_stim, y_val_stim = input_mats(X_val_s, y_val_s)

x_test_base, y_test_base = input_mats(X_test_b, y_test_b)
x_test_stim, y_test_stim = input_mats(X_test_s, y_test_s)

seed = 42
torch.manual_seed(seed)

We are testing the optimization method on fold  0


<torch._C.Generator at 0x7f82ee1cf0f0>

In [7]:
# Specify that we want our tensors on the GPU and in float32
device = torch.device('cuda:0') #suposed to be cuda
dtype = torch.float32
path_to_models = './Models'

num_dim_output = y_train_stim.shape[2]
num_features = x_train_stim.shape[2]

seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x7f82ee1cf0f0>

In [8]:
# Define parameters for training
num_epochs = 50
batch_size_train = 25
batch_size_val = 25

In [9]:
class SequenceDataset(Dataset):

    def __init__(self, y, X, seq_length):
        """
        Initializes the SequenceDataset.
        
        Args:
            y (torch.Tensor): The target labels for each sequence.
            X (torch.Tensor): The input sequences.
            sequence_length (int): The desired length of each sequence.
        """
        self.sequence_length = seq_length
        self.y = torch.tensor(y)
        self.X = torch.tensor(X)

    def __len__(self):
        """
        Returns the total number of samples in the dataset.
        """
        return self.X.shape[0] * self.X.shape[1]

    def __getitem__(self, i): 
        """
        Gets the i-th sample from the dataset.
        
        Args:
            i (int): Index of the desired sample.
        
        Returns:
            xx (torch.Tensor): Input sequence of length sequence_length.
            yy (torch.Tensor): Corresponding target sequence.
        """
        trial_index = i // self.X.shape[1]
        point_index = i % self.X.shape[1]
        
        if point_index > self.sequence_length - 1:
            point_start = point_index - self.sequence_length
            xx = self.X[trial_index, point_start:point_index, :]
            yy = self.y[trial_index, point_start+1:point_index+1, :]
        else:
            padding_x = self.X[trial_index, 0:1, :].repeat(self.sequence_length - point_index, 1)
            padding_y = self.y[trial_index, 0:1, :].repeat(self.sequence_length - point_index - 1, 1)
            xx = self.X[trial_index, 0:point_index, :]
            xx = torch.cat((padding_x, xx), dim=0)
            yy = self.y[trial_index, 0:point_index + 1, :]
            yy = torch.cat((padding_y, yy), dim=0)
            
        return xx, yy

In [10]:
def huber_loss(X, y, delta):
    residual = torch.abs(X - y)
    condition = residual < delta
    loss = torch.where(condition, 0.5 * residual**2, delta * residual - 0.5 * delta**2)
    return loss.mean()

In [11]:
def reg_hnet_noweights(weights, alpha, l1_ratio):
    
    """
    Implement an L1-L2 penalty on the norm of the model weights.

    model: MLP
    alpha: scaling parameter for the regularization.
    l1_ratio: mixing parameter between L1 and L2 loss.

    Returns:
    reg: regularization term
    """
    l1_loss = 0
    l2_loss = 0

    # Accumulate L1 and L2 losses for weight matrices in the model
    for weight_tensor in weights[1:2]:
        l1_loss += torch.sum(torch.abs(weight_tensor))
        l2_loss += torch.sum(weight_tensor.pow(2))

    reg = l1_ratio * l1_loss + (1 - l1_ratio) * l2_loss

    reg = alpha * reg

    return reg

In [12]:
# Fit the LSTM model
def train_model_optuna(trial):
    
    ###########################################################
    ### Definition of inputs
    ###########################################################
    X_base = x_train_base
    Y_base = y_train_base
    X_val_base = x_val_base
    Y_val_base = y_val_base

    X_stim = x_train_stim
    Y_stim = y_train_stim
    X_val_stim = x_val_stim
    Y_val_stim = y_val_stim


    ###########################################################
    ### Definition of hyperparameters and model for main network
    ###########################################################
    hidden_units = trial.suggest_int('hidden_units', 10, 50)
    input_rec = trial.suggest_int('input_rec', 30, 60)
    seq_length_LSTM = trial.suggest_int('seq_length_LSTM', 10, 20)


    model = SimpleRNN(n_in=num_features, rnn_layers=(hidden_units,), 
                    fc_layers_pre=(input_rec,), fc_layers=(num_dim_output,),
                    use_lstm = True, no_weights = True).to(device)
    

    ###########################################################
    ### Definition of hyperparameters and model for hypernetwork
    ###########################################################
    num_conditions = 2 # fixed, as we only have baseline and stimulation
    size_task_embedding = trial.suggest_int('size_task_embedding', 4, 50)
    num_units_hnet = trial.suggest_int('num_units_hnet', 10,50)
    num_layers_hnet = trial.suggest_int('num_layers_hnet', 1,4)
    layers = [num_units_hnet for i in range(num_layers_hnet)]
    # Model
    hnet = HMLP(model.param_shapes, uncond_in_size=0,
             cond_in_size=size_task_embedding,
            layers=layers, 
            num_cond_embs=num_conditions).to(device)
    
    hnet.apply_hyperfan_init(mnet=model)

    ###########################################################
    ### Fixed training parameters
    ###########################################################
    num_epochs= 1000
    early_stop = 5
    

    ###########################################################
    ### Defining training hyperparameters to optimize
    ###########################################################

    # Set up the optimizer with the specified learning rate
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log = True)
    # Set up the optimizer with the specified learning rate
    optimizer = torch.optim.Adam(hnet.internal_params, lr=lr)

    # Set up a learning rate scheduler
    lr_step_size = 10 # trial.suggest_int('lr_step_size', 5, 15)
    lr_gamma = 0.9 # trial.suggest_float('lr_gamma', 1, 1.5)
    scheduler = lr_scheduler.StepLR(optimizer, 
                                    step_size=lr_step_size, 
                                    gamma=lr_gamma)
    # hyperparameter for huber loss
    delta = trial.suggest_int('huber_delta', 5, 10) #was 8 before

    # hyperparameter for regularizer
    alpha = trial.suggest_float('alpha_reg', 1e-7, 1e-3, log = True) # was 1e-5 before
    l1_ratio =trial.suggest_float('l1_ratio_reg', 0.2, 0.8) #was 0.5 before

    # Reshaping data and creating batchs
    batch_size_train = trial.suggest_int('batch_size_train', 20, 50)
    batch_size_val = trial.suggest_int('batch_size_val',20, 50)

    train_dataset_baseline = SequenceDataset(
    y_train_base,    x_train_base,    seq_length=seq_length_LSTM)
    train_dataset_stim = SequenceDataset(
    y_train_stim,    x_train_stim,    seq_length=seq_length_LSTM)
    val_dataset_baseline = SequenceDataset(
    y_val_base,    x_val_base,    seq_length=seq_length_LSTM)
    val_dataset_stim = SequenceDataset(
    y_val_stim,    x_val_stim,    seq_length=seq_length_LSTM)

    loader_train_b = data.DataLoader(train_dataset_baseline, batch_size=batch_size_train, shuffle=True)
    loader_train_s = data.DataLoader(train_dataset_stim, batch_size=batch_size_train, shuffle=True)

    loader_val_b = data.DataLoader(val_dataset_baseline, batch_size=batch_size_val, shuffle=True)
    loader_val_s = data.DataLoader(val_dataset_stim, batch_size=batch_size_val, shuffle=True)


    #####################################################
    ########## START TRAINING 
    #####################################################

    # Keep track of the best model's parameters and loss
    best_model_wts = deepcopy(model.state_dict())
    best_loss = 1e8

    # Enable anomaly detection for debugging
    torch.autograd.set_detect_anomaly(True)

    # Track the train and validation loss
    train_losses = []
    val_losses = []
    # Counters for early stopping
    not_increased = 0
    end_train = 0
    
    regularizer = True

    # Loop through epochs
    for epoch in np.arange(num_epochs):
        for phase in ['train', 'val']:
            # set model to train/validation as appropriate
            if phase == 'train':
                model.train()
                loaders = zip(loader_train_b, loader_train_s)
            else:
                model.eval()
                loaders = zip(loader_val_b, loader_val_s)

            # Initialize variables to track loss and batch size
            running_loss = 0
            running_size = 0        

            # Iterate over batches in the loader
            for data_b, data_s in loaders:

                # Define data for this batch
                x_b = data_b[0].to('cuda')
                y_b = data_b[1].to('cuda')
                x_s = data_s[0].to('cuda')
                y_s = data_s[1].to('cuda')

                
                if phase == "train":
                    with torch.set_grad_enabled(True):
                        optimizer.zero_grad()

                        # Compute BASELINE loss.
                        W_base = hnet(cond_id=0)
                        base_P = model.forward(x_b, weights=W_base)
                        base_P = torch.squeeze(base_P) # torch.sigmoid(base_P))
                        loss_base = huber_loss(base_P, y_b, delta = delta)
                        
                        
                        # Compute STIMULATION loss.
                        W_stim = hnet(cond_id=1)
                        stim_P = model.forward(x_s, weights=W_stim)
                        stim_P = torch.squeeze(stim_P) #torch.sigmoid(stim_P))
                        loss_stim = huber_loss(stim_P, y_s, delta = delta)

                        # Combine loss for 2 tasks
                        loss_t = loss_base + loss_stim    #only for printing

                        # Add regularization to the loss in the training phase
                        if regularizer is not None:
                            loss_stim_reg = loss_stim + reg_hnet_noweights(W_stim, l1_ratio, alpha)
                            loss_base_reg = loss_base + reg_hnet_noweights(W_base, l1_ratio, alpha)
                            # Combine loss for 2 tasks
                            loss_t_r = loss_base_reg + loss_stim_reg

                        else:               
                            loss_t_r = loss_t 

                        # Compute gradients and perform an optimization step
                        loss_t_r.backward()
                        optimizer.step()
                else:
                    # just compute the loss in validation phase
                    W_base = hnet(cond_id=0)
                    base_P = model.forward(x_b, weights=W_base)
                    base_P = torch.squeeze(base_P) #torch.sigmoid(base_P))
                    loss_base = huber_loss(base_P, y_b, delta = delta)

                    W_stim = hnet(cond_id=1)
                    stim_P = model.forward(x_s, weights=W_stim)
                    stim_P = torch.squeeze(stim_P) #torch.sigmoid(stim_P))
                    loss_stim = huber_loss(stim_P, y_s, delta = delta)

                    loss_t = loss_base + loss_stim

                # Ensure the loss is finite
                assert torch.isfinite(loss_t)
                assert torch.isfinite(loss_t_r)
                running_loss += loss_t.item()
                running_size += 1

            # compute the train/validation loss and update the best
            # model parameters if this is the lowest validation loss yet
            running_loss /= running_size
            if phase == "train":
                train_losses.append(running_loss)
            else:
                val_losses.append(running_loss)
                
                # Update best model parameters if validation loss improves
                if running_loss < best_loss:
                    best_loss = running_loss
                    best_model_wts = deepcopy(model.state_dict())
                    not_increased = 0
                else:
                    # Perform early stopping if validation loss doesn't improve
                    if epoch > 10:
                        not_increased += 1
                        # print('Not increased : {}/5'.format(not_increased))
                        if not_increased == early_stop:
                            print('Decrease LR')
                            for g in optimizer.param_groups:
                                g['lr'] = g['lr'] / 2
                            not_increased = 0
                            end_train += 1
                        
                        if end_train == 2:
                            model.load_state_dict(best_model_wts)
                            return best_loss
                        

        # Update learning rate with the scheduler
        scheduler.step()
        #print("Epoch {:03} Train {:.4f} Val {:.4f}".format(epoch, train_losses[-1], val_losses[-1]))


        trial.report(val_losses[-1], epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    

    return best_loss


In [13]:
study = optuna.create_study(direction="minimize")

# Set pruning options
study.pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5)

study.optimize(train_model_optuna, n_trials=30)

importance_scores = optuna.importance.get_param_importances(study)

# Print importance scores
for param, score in importance_scores.items():
    print(f"{param}: {score}")

pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print('Study statistics: ')
print("Number of finished trials: ", len(study.trials))
print("Number of pruned trials: ", len(pruned_trials))
print("Number of complete trials: ", len(complete_trials))

print('Best trial: ')
trial = study.best_trial

print("Loss: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2024-04-22 16:49:57,077] A new study created in memory with name: no-name-e83fc5d1-77d2-4094-bc5a-4993a98882e8


Creating a simple RNN with 10428 weights.
Created MLP Hypernet.
Hypernetwork with 357529 weights and 10428 outputs (compression ratio: 34.29).
The network consists of 357489 unconditional weights (357489 internally maintained) and 40 conditional weights (40 internally maintained).
Epoch 000 Train 4.7249 Val 3.7838
Epoch 001 Train 3.1366 Val 2.4792
Epoch 002 Train 1.8752 Val 1.4785
Epoch 003 Train 1.0148 Val 0.8099
Epoch 004 Train 0.5244 Val 0.4099
Epoch 005 Train 0.2646 Val 0.2443
Epoch 006 Train 0.1701 Val 0.1808
Epoch 007 Train 0.1355 Val 0.1501
Epoch 008 Train 0.1131 Val 0.1369
Epoch 009 Train 0.1040 Val 0.1244
Epoch 010 Train 0.0988 Val 0.1249
Epoch 011 Train 0.0921 Val 0.1155
Epoch 012 Train 0.0884 Val 0.1111
Epoch 013 Train 0.0832 Val 0.1082
Epoch 014 Train 0.0789 Val 0.1050
Epoch 015 Train 0.0771 Val 0.1033
Epoch 016 Train 0.0730 Val 0.0978
Epoch 017 Train 0.0732 Val 0.1011
Epoch 018 Train 0.0692 Val 0.1015
Epoch 019 Train 0.0670 Val 0.0959
Epoch 020 Train 0.0656 Val 0.0880
Epoc

[I 2024-04-22 17:15:29,646] Trial 0 finished with value: 0.05891955678222606 and parameters: {'hidden_units': 19, 'input_rec': 43, 'seq_length_LSTM': 11, 'size_task_embedding': 20, 'num_units_hnet': 33, 'num_layers_hnet': 3, 'lr': 2.0059438162661392e-05, 'huber_delta': 9, 'alpha_reg': 0.00011252632910055825, 'l1_ratio_reg': 0.3651107054885052, 'batch_size_train': 32, 'batch_size_val': 36}. Best is trial 0 with value: 0.05891955678222606.


Decrease LR
Creating a simple RNN with 8669 weights.
Created MLP Hypernet.
Hypernetwork with 418560 weights and 8669 outputs (compression ratio: 48.28).
The network consists of 418462 unconditional weights (418462 internally maintained) and 98 conditional weights (98 internally maintained).
Epoch 000 Train 1.5703 Val 1.4964
Epoch 001 Train 1.1810 Val 1.1705
Epoch 002 Train 0.8924 Val 0.9223
Epoch 003 Train 0.6403 Val 0.6737
Epoch 004 Train 0.4489 Val 0.5003
Epoch 005 Train 0.3220 Val 0.3741
Epoch 006 Train 0.2440 Val 0.2894
Epoch 007 Train 0.1925 Val 0.2560
Epoch 008 Train 0.1551 Val 0.2171
Epoch 009 Train 0.1329 Val 0.2003
Epoch 010 Train 0.1153 Val 0.1839
Epoch 011 Train 0.1028 Val 0.1708
Epoch 012 Train 0.0914 Val 0.1611
Epoch 013 Train 0.0830 Val 0.1510
Epoch 014 Train 0.0764 Val 0.1391
Epoch 015 Train 0.0711 Val 0.1302
Epoch 016 Train 0.0649 Val 0.1194
Epoch 017 Train 0.0614 Val 0.1257
Epoch 018 Train 0.0583 Val 0.1151
Epoch 019 Train 0.0542 Val 0.1199
Epoch 020 Train 0.0514 Val 0

[I 2024-04-22 17:31:01,764] Trial 1 finished with value: 0.07427431724078508 and parameters: {'hidden_units': 14, 'input_rec': 42, 'seq_length_LSTM': 14, 'size_task_embedding': 49, 'num_units_hnet': 47, 'num_layers_hnet': 1, 'lr': 1.3070208278080213e-05, 'huber_delta': 9, 'alpha_reg': 0.0002070864599485419, 'l1_ratio_reg': 0.3388216255543003, 'batch_size_train': 38, 'batch_size_val': 23}. Best is trial 0 with value: 0.05891955678222606.


Decrease LR
Creating a simple RNN with 19299 weights.
Created MLP Hypernet.
Hypernetwork with 815462 weights and 19299 outputs (compression ratio: 42.25).
The network consists of 815396 unconditional weights (815396 internally maintained) and 66 conditional weights (66 internally maintained).
Epoch 000 Train 0.1082 Val 0.0679
Epoch 001 Train 0.0516 Val 0.0661
Epoch 002 Train 0.0485 Val 0.0696
Epoch 003 Train 0.0457 Val 0.0608
Epoch 004 Train 0.0375 Val 0.0593
Epoch 005 Train 0.0300 Val 0.0593
Epoch 006 Train 0.0253 Val 0.0579
Epoch 007 Train 0.0214 Val 0.0542
Epoch 008 Train 0.0198 Val 0.0502
Epoch 009 Train 0.0181 Val 0.0523
Epoch 010 Train 0.0169 Val 0.0503
Epoch 011 Train 0.0158 Val 0.0484
Epoch 012 Train 0.0150 Val 0.0474
Epoch 013 Train 0.0143 Val 0.0498
Epoch 014 Train 0.0134 Val 0.0479
Epoch 015 Train 0.0127 Val 0.0505
Epoch 016 Train 0.0123 Val 0.0470
Epoch 017 Train 0.0122 Val 0.0470
Epoch 018 Train 0.0115 Val 0.0482
Epoch 019 Train 0.0102 Val 0.0467
Epoch 020 Train 0.0102 Val

[I 2024-04-22 17:40:46,464] Trial 2 finished with value: 0.04602542646080345 and parameters: {'hidden_units': 32, 'input_rec': 58, 'seq_length_LSTM': 13, 'size_task_embedding': 33, 'num_units_hnet': 41, 'num_layers_hnet': 3, 'lr': 0.00044443365152317953, 'huber_delta': 8, 'alpha_reg': 1.512076567610703e-07, 'l1_ratio_reg': 0.7524167352917817, 'batch_size_train': 34, 'batch_size_val': 27}. Best is trial 2 with value: 0.04602542646080345.


Decrease LR
Creating a simple RNN with 17876 weights.
Created MLP Hypernet.
Hypernetwork with 521588 weights and 17876 outputs (compression ratio: 29.18).
The network consists of 521540 unconditional weights (521540 internally maintained) and 48 conditional weights (48 internally maintained).
Epoch 000 Train 0.1564 Val 0.0943
Epoch 001 Train 0.0653 Val 0.0779
Epoch 002 Train 0.0396 Val 0.0571
Epoch 003 Train 0.0305 Val 0.0491
Epoch 004 Train 0.0257 Val 0.0479
Epoch 005 Train 0.0226 Val 0.0455
Epoch 006 Train 0.0211 Val 0.0451
Epoch 007 Train 0.0183 Val 0.0432
Epoch 008 Train 0.0166 Val 0.0414
Epoch 009 Train 0.0141 Val 0.0425
Epoch 010 Train 0.0123 Val 0.0436
Epoch 011 Train 0.0109 Val 0.0447
Epoch 012 Train 0.0095 Val 0.0473
Epoch 013 Train 0.0078 Val 0.0467
Epoch 014 Train 0.0067 Val 0.0454
Decrease LR
Epoch 015 Train 0.0057 Val 0.0438
Epoch 016 Train 0.0051 Val 0.0462
Epoch 017 Train 0.0046 Val 0.0449
Epoch 018 Train 0.0044 Val 0.0464
Epoch 019 Train 0.0038 Val 0.0447


[I 2024-04-22 17:46:15,647] Trial 3 finished with value: 0.04140114768215861 and parameters: {'hidden_units': 39, 'input_rec': 40, 'seq_length_LSTM': 15, 'size_task_embedding': 24, 'num_units_hnet': 28, 'num_layers_hnet': 4, 'lr': 0.0016783133238250521, 'huber_delta': 6, 'alpha_reg': 0.0002577339781036089, 'l1_ratio_reg': 0.672162300160937, 'batch_size_train': 43, 'batch_size_val': 45}. Best is trial 3 with value: 0.04140114768215861.


Decrease LR
Creating a simple RNN with 20416 weights.
Created MLP Hypernet.
Hypernetwork with 822302 weights and 20416 outputs (compression ratio: 40.28).
The network consists of 822256 unconditional weights (822256 internally maintained) and 46 conditional weights (46 internally maintained).
Epoch 000 Train 0.2725 Val 0.0684
Epoch 001 Train 0.0407 Val 0.0593
Epoch 002 Train 0.0308 Val 0.0621
Epoch 003 Train 0.0281 Val 0.0540
Epoch 004 Train 0.0264 Val 0.0476
Epoch 005 Train 0.0254 Val 0.0472
Epoch 006 Train 0.0246 Val 0.0475
Epoch 007 Train 0.0233 Val 0.0479
Epoch 008 Train 0.0230 Val 0.0471
Epoch 009 Train 0.0227 Val 0.0453
Epoch 010 Train 0.0218 Val 0.0446
Epoch 011 Train 0.0214 Val 0.0444
Epoch 012 Train 0.0205 Val 0.0465
Epoch 013 Train 0.0199 Val 0.0448
Epoch 014 Train 0.0186 Val 0.0421
Epoch 015 Train 0.0188 Val 0.0438
Epoch 016 Train 0.0179 Val 0.0432
Epoch 017 Train 0.0164 Val 0.0434
Epoch 018 Train 0.0162 Val 0.0432
Decrease LR
Epoch 019 Train 0.0160 Val 0.0452
Epoch 020 Trai

[I 2024-04-22 17:52:50,085] Trial 4 finished with value: 0.0421086738023414 and parameters: {'hidden_units': 47, 'input_rec': 35, 'seq_length_LSTM': 14, 'size_task_embedding': 23, 'num_units_hnet': 39, 'num_layers_hnet': 4, 'lr': 0.00019204824128562557, 'huber_delta': 7, 'alpha_reg': 0.00015148875775767496, 'l1_ratio_reg': 0.4685122952188539, 'batch_size_train': 47, 'batch_size_val': 44}. Best is trial 3 with value: 0.04140114768215861.


Decrease LR
Creating a simple RNN with 17398 weights.
Created MLP Hypernet.
Hypernetwork with 453212 weights and 17398 outputs (compression ratio: 26.05).
The network consists of 453198 unconditional weights (453198 internally maintained) and 14 conditional weights (14 internally maintained).
Epoch 000 Train 0.4158 Val 0.1497
Epoch 001 Train 0.0851 Val 0.0752
Epoch 002 Train 0.0487 Val 0.0636
Epoch 003 Train 0.0401 Val 0.0574
Epoch 004 Train 0.0347 Val 0.0532
Epoch 005 Train 0.0299 Val 0.0483
Epoch 006 Train 0.0261 Val 0.0482
Epoch 007 Train 0.0232 Val 0.0496
Epoch 008 Train 0.0210 Val 0.0489
Epoch 009 Train 0.0189 Val 0.0494
Epoch 010 Train 0.0171 Val 0.0462
Epoch 011 Train 0.0160 Val 0.0475
Epoch 012 Train 0.0145 Val 0.0470
Epoch 013 Train 0.0133 Val 0.0481
Epoch 014 Train 0.0129 Val 0.0460
Epoch 015 Train 0.0116 Val 0.0480
Epoch 016 Train 0.0109 Val 0.0472
Epoch 017 Train 0.0104 Val 0.0485
Epoch 018 Train 0.0101 Val 0.0500
Decrease LR
Epoch 019 Train 0.0091 Val 0.0476
Epoch 020 Trai

[I 2024-04-22 18:00:32,884] Trial 5 finished with value: 0.04475161809498376 and parameters: {'hidden_units': 41, 'input_rec': 35, 'seq_length_LSTM': 10, 'size_task_embedding': 7, 'num_units_hnet': 25, 'num_layers_hnet': 2, 'lr': 7.968432978600385e-05, 'huber_delta': 10, 'alpha_reg': 1.6392655864135365e-06, 'l1_ratio_reg': 0.5641664026718061, 'batch_size_train': 30, 'batch_size_val': 41}. Best is trial 3 with value: 0.04140114768215861.


Decrease LR
Creating a simple RNN with 13995 weights.
Created MLP Hypernet.
Hypernetwork with 660213 weights and 13995 outputs (compression ratio: 47.17).
The network consists of 660203 unconditional weights (660203 internally maintained) and 10 conditional weights (10 internally maintained).
Epoch 000 Train 0.5031 Val 0.4603
Epoch 001 Train 0.3186 Val 0.2983
Epoch 002 Train 0.1888 Val 0.1954
Epoch 003 Train 0.1178 Val 0.1445
Epoch 004 Train 0.0832 Val 0.1120


[I 2024-04-22 18:02:05,871] Trial 6 pruned. 


Epoch 005 Train 0.0664 Val 0.0997
Creating a simple RNN with 21852 weights.
Created MLP Hypernet.
Hypernetwork with 858312 weights and 21852 outputs (compression ratio: 39.28).
The network consists of 858232 unconditional weights (858232 internally maintained) and 80 conditional weights (80 internally maintained).
Epoch 000 Train 0.6765 Val 0.4705
Epoch 001 Train 0.2547 Val 0.2262
Epoch 002 Train 0.1280 Val 0.1368
Epoch 003 Train 0.0859 Val 0.1042
Epoch 004 Train 0.0672 Val 0.0882


[I 2024-04-22 18:03:47,136] Trial 7 pruned. 


Epoch 005 Train 0.0571 Val 0.0776
Creating a simple RNN with 10325 weights.
Created MLP Hypernet.
Hypernetwork with 527977 weights and 10325 outputs (compression ratio: 51.14).
The network consists of 527925 unconditional weights (527925 internally maintained) and 52 conditional weights (52 internally maintained).
Epoch 000 Train 0.2581 Val 0.0700
Epoch 001 Train 0.0370 Val 0.0544
Epoch 002 Train 0.0212 Val 0.0489
Epoch 003 Train 0.0155 Val 0.0467
Epoch 004 Train 0.0125 Val 0.0482
Epoch 005 Train 0.0098 Val 0.0499
Epoch 006 Train 0.0077 Val 0.0541
Epoch 007 Train 0.0062 Val 0.0531
Epoch 008 Train 0.0051 Val 0.0581
Epoch 009 Train 0.0039 Val 0.0528
Epoch 010 Train 0.0032 Val 0.0520
Epoch 011 Train 0.0026 Val 0.0508
Epoch 012 Train 0.0022 Val 0.0535
Epoch 013 Train 0.0018 Val 0.0532
Epoch 014 Train 0.0015 Val 0.0480
Decrease LR
Epoch 015 Train 0.0014 Val 0.0560
Epoch 016 Train 0.0012 Val 0.0527
Epoch 017 Train 0.0011 Val 0.0518
Epoch 018 Train 0.0010 Val 0.0526
Epoch 019 Train 0.0009 Val

[I 2024-04-22 18:09:19,232] Trial 8 finished with value: 0.04668999018347644 and parameters: {'hidden_units': 14, 'input_rec': 51, 'seq_length_LSTM': 16, 'size_task_embedding': 26, 'num_units_hnet': 50, 'num_layers_hnet': 1, 'lr': 0.006587622056505793, 'huber_delta': 10, 'alpha_reg': 0.0001304253219122565, 'l1_ratio_reg': 0.7850528070730365, 'batch_size_train': 30, 'batch_size_val': 32}. Best is trial 3 with value: 0.04140114768215861.


Decrease LR
Creating a simple RNN with 23260 weights.
Created MLP Hypernet.
Hypernetwork with 513031 weights and 23260 outputs (compression ratio: 22.06).
The network consists of 512959 unconditional weights (512959 internally maintained) and 72 conditional weights (72 internally maintained).
Epoch 000 Train 0.8527 Val 0.6735
Epoch 001 Train 0.4987 Val 0.4050
Epoch 002 Train 0.2615 Val 0.2267
Epoch 003 Train 0.1261 Val 0.1345
Epoch 004 Train 0.0830 Val 0.1085


[I 2024-04-22 18:10:53,741] Trial 9 pruned. 


Epoch 005 Train 0.0680 Val 0.0970
Creating a simple RNN with 12252 weights.
Created MLP Hypernet.
Hypernetwork with 135292 weights and 12252 outputs (compression ratio: 11.04).
The network consists of 135262 unconditional weights (135262 internally maintained) and 30 conditional weights (30 internally maintained).
Epoch 000 Train 0.2510 Val 0.1163
Epoch 001 Train 0.0687 Val 0.0759
Epoch 002 Train 0.0437 Val 0.0527
Epoch 003 Train 0.0304 Val 0.0460
Epoch 004 Train 0.0252 Val 0.0428
Epoch 005 Train 0.0224 Val 0.0390
Epoch 006 Train 0.0210 Val 0.0390
Epoch 007 Train 0.0186 Val 0.0384
Epoch 008 Train 0.0169 Val 0.0373
Epoch 009 Train 0.0144 Val 0.0387
Epoch 010 Train 0.0124 Val 0.0370
Epoch 011 Train 0.0107 Val 0.0373
Epoch 012 Train 0.0096 Val 0.0399
Epoch 013 Train 0.0084 Val 0.0391
Epoch 014 Train 0.0077 Val 0.0397
Decrease LR
Epoch 015 Train 0.0068 Val 0.0377
Epoch 016 Train 0.0064 Val 0.0406
Epoch 017 Train 0.0060 Val 0.0393
Epoch 018 Train 0.0057 Val 0.0405
Epoch 019 Train 0.0055 Val

[I 2024-04-22 18:16:25,500] Trial 10 finished with value: 0.036975778469882564 and parameters: {'hidden_units': 31, 'input_rec': 32, 'seq_length_LSTM': 19, 'size_task_embedding': 15, 'num_units_hnet': 10, 'num_layers_hnet': 4, 'lr': 0.002719136472254405, 'huber_delta': 5, 'alpha_reg': 9.367466469591627e-06, 'l1_ratio_reg': 0.6380706893491844, 'batch_size_train': 46, 'batch_size_val': 50}. Best is trial 10 with value: 0.036975778469882564.


Decrease LR
Creating a simple RNN with 12131 weights.
Created MLP Hypernet.
Hypernetwork with 133949 weights and 12131 outputs (compression ratio: 11.04).
The network consists of 133921 unconditional weights (133921 internally maintained) and 28 conditional weights (28 internally maintained).
Epoch 000 Train 0.2910 Val 0.1099
Epoch 001 Train 0.0769 Val 0.0841
Epoch 002 Train 0.0444 Val 0.0513
Epoch 003 Train 0.0278 Val 0.0428
Epoch 004 Train 0.0242 Val 0.0436
Epoch 005 Train 0.0214 Val 0.0427
Epoch 006 Train 0.0188 Val 0.0390
Epoch 007 Train 0.0162 Val 0.0394
Epoch 008 Train 0.0150 Val 0.0383
Epoch 009 Train 0.0129 Val 0.0381
Epoch 010 Train 0.0111 Val 0.0367
Epoch 011 Train 0.0098 Val 0.0385
Epoch 012 Train 0.0083 Val 0.0383
Epoch 013 Train 0.0076 Val 0.0408
Epoch 014 Train 0.0069 Val 0.0396
Decrease LR
Epoch 015 Train 0.0063 Val 0.0405
Epoch 016 Train 0.0054 Val 0.0408
Epoch 017 Train 0.0052 Val 0.0405
Epoch 018 Train 0.0048 Val 0.0414
Epoch 019 Train 0.0047 Val 0.0426


[I 2024-04-22 18:21:50,122] Trial 11 finished with value: 0.03666015840803216 and parameters: {'hidden_units': 32, 'input_rec': 30, 'seq_length_LSTM': 20, 'size_task_embedding': 14, 'num_units_hnet': 10, 'num_layers_hnet': 4, 'lr': 0.0034028840097651237, 'huber_delta': 5, 'alpha_reg': 8.38820392002778e-06, 'l1_ratio_reg': 0.6400974750754305, 'batch_size_train': 49, 'batch_size_val': 49}. Best is trial 11 with value: 0.03666015840803216.


Decrease LR
Creating a simple RNN with 11621 weights.
Created MLP Hypernet.
Hypernetwork with 128217 weights and 11621 outputs (compression ratio: 11.03).
The network consists of 128191 unconditional weights (128191 internally maintained) and 26 conditional weights (26 internally maintained).
Epoch 000 Train 0.2038 Val 0.0693
Epoch 001 Train 0.0365 Val 0.0485
Epoch 002 Train 0.0252 Val 0.0399
Epoch 003 Train 0.0198 Val 0.0380
Epoch 004 Train 0.0173 Val 0.0372
Epoch 005 Train 0.0145 Val 0.0381
Epoch 006 Train 0.0120 Val 0.0412
Epoch 007 Train 0.0096 Val 0.0381
Epoch 008 Train 0.0077 Val 0.0391
Epoch 009 Train 0.0063 Val 0.0386
Epoch 010 Train 0.0054 Val 0.0378
Epoch 011 Train 0.0047 Val 0.0423
Epoch 012 Train 0.0041 Val 0.0389
Epoch 013 Train 0.0035 Val 0.0367
Epoch 014 Train 0.0031 Val 0.0398
Epoch 015 Train 0.0028 Val 0.0411
Epoch 016 Train 0.0023 Val 0.0410
Epoch 017 Train 0.0020 Val 0.0400
Decrease LR
Epoch 018 Train 0.0018 Val 0.0412
Epoch 019 Train 0.0016 Val 0.0415
Epoch 020 Trai

[I 2024-04-22 18:28:02,541] Trial 12 finished with value: 0.03667137352906793 and parameters: {'hidden_units': 30, 'input_rec': 31, 'seq_length_LSTM': 20, 'size_task_embedding': 13, 'num_units_hnet': 10, 'num_layers_hnet': 3, 'lr': 0.009818191294744729, 'huber_delta': 5, 'alpha_reg': 5.5237116916880655e-06, 'l1_ratio_reg': 0.6154040867846227, 'batch_size_train': 49, 'batch_size_val': 50}. Best is trial 11 with value: 0.03666015840803216.


Decrease LR
Creating a simple RNN with 9953 weights.
Created MLP Hypernet.
Hypernetwork with 119880 weights and 9953 outputs (compression ratio: 12.04).
The network consists of 119854 unconditional weights (119854 internally maintained) and 26 conditional weights (26 internally maintained).
Epoch 000 Train 0.2767 Val 0.0789
Epoch 001 Train 0.0357 Val 0.0450
Epoch 002 Train 0.0232 Val 0.0367
Epoch 003 Train 0.0190 Val 0.0390
Epoch 004 Train 0.0155 Val 0.0371
Epoch 005 Train 0.0120 Val 0.0341
Epoch 006 Train 0.0091 Val 0.0367
Epoch 007 Train 0.0073 Val 0.0388
Epoch 008 Train 0.0061 Val 0.0363
Epoch 009 Train 0.0053 Val 0.0399
Epoch 010 Train 0.0045 Val 0.0376
Epoch 011 Train 0.0038 Val 0.0361
Epoch 012 Train 0.0034 Val 0.0375
Epoch 013 Train 0.0030 Val 0.0372
Epoch 014 Train 0.0026 Val 0.0374
Decrease LR
Epoch 015 Train 0.0024 Val 0.0391
Epoch 016 Train 0.0021 Val 0.0379
Epoch 017 Train 0.0019 Val 0.0393
Epoch 018 Train 0.0018 Val 0.0397
Epoch 019 Train 0.0018 Val 0.0420


[I 2024-04-22 18:33:31,391] Trial 13 finished with value: 0.034108375129166556 and parameters: {'hidden_units': 26, 'input_rec': 30, 'seq_length_LSTM': 20, 'size_task_embedding': 13, 'num_units_hnet': 11, 'num_layers_hnet': 3, 'lr': 0.008724683475679063, 'huber_delta': 6, 'alpha_reg': 3.3324906702023635e-06, 'l1_ratio_reg': 0.2132256325697216, 'batch_size_train': 50, 'batch_size_val': 50}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 10859 weights.
Created MLP Hypernet.
Hypernetwork with 196338 weights and 10859 outputs (compression ratio: 18.08).
The network consists of 196312 unconditional weights (196312 internally maintained) and 26 conditional weights (26 internally maintained).
Epoch 000 Train 0.2274 Val 0.0781
Epoch 001 Train 0.0551 Val 0.0642
Epoch 002 Train 0.0399 Val 0.0565
Epoch 003 Train 0.0303 Val 0.0551
Epoch 004 Train 0.0265 Val 0.0478
Epoch 005 Train 0.0224 Val 0.0469
Epoch 006 Train 0.0208 Val 0.0486
Epoch 007 Train 0.0192 Val 0.0433
Epoch 008 Train 0.0181 Val 0.0434
Epoch 009 Train 0.0160 Val 0.0448
Epoch 010 Train 0.0146 Val 0.0450
Epoch 011 Train 0.0138 Val 0.0480
Epoch 012 Train 0.0129 Val 0.0452
Epoch 013 Train 0.0119 Val 0.0447
Epoch 014 Train 0.0111 Val 0.0453
Decrease LR
Epoch 015 Train 0.0102 Val 0.0497
Epoch 016 Train 0.0091 Val 0.0470
Epoch 017 Train 0.0087 Val 0.0461
Epoch 018 Train 0.0083 Val 0.0459
Epoch 019 Train 0.0082 Val 0.0479


[I 2024-04-22 18:38:59,245] Trial 14 finished with value: 0.04325725001848413 and parameters: {'hidden_units': 24, 'input_rec': 37, 'seq_length_LSTM': 18, 'size_task_embedding': 13, 'num_units_hnet': 17, 'num_layers_hnet': 3, 'lr': 0.0016853815481690482, 'huber_delta': 6, 'alpha_reg': 1.0576826856557215e-06, 'l1_ratio_reg': 0.23229372046381225, 'batch_size_train': 50, 'batch_size_val': 46}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 8972 weights.
Created MLP Hypernet.
Hypernetwork with 153662 weights and 8972 outputs (compression ratio: 17.13).
The network consists of 153628 unconditional weights (153628 internally maintained) and 34 conditional weights (34 internally maintained).
Epoch 000 Train 0.3896 Val 0.0934
Epoch 001 Train 0.0731 Val 0.0797
Epoch 002 Train 0.0514 Val 0.0652
Epoch 003 Train 0.0345 Val 0.0487
Epoch 004 Train 0.0265 Val 0.0454
Epoch 005 Train 0.0224 Val 0.0431
Epoch 006 Train 0.0201 Val 0.0427
Epoch 007 Train 0.0182 Val 0.0413
Epoch 008 Train 0.0163 Val 0.0417
Epoch 009 Train 0.0137 Val 0.0396
Epoch 010 Train 0.0127 Val 0.0442
Epoch 011 Train 0.0108 Val 0.0432
Epoch 012 Train 0.0094 Val 0.0432
Epoch 013 Train 0.0083 Val 0.0456
Epoch 014 Train 0.0074 Val 0.0450
Decrease LR
Epoch 015 Train 0.0067 Val 0.0428
Epoch 016 Train 0.0062 Val 0.0462
Epoch 017 Train 0.0058 Val 0.0447
Epoch 018 Train 0.0056 Val 0.0410
Epoch 019 Train 0.0053 Val 0.0462


[I 2024-04-22 18:45:14,501] Trial 15 finished with value: 0.039649954645896386 and parameters: {'hidden_units': 23, 'input_rec': 30, 'seq_length_LSTM': 17, 'size_task_embedding': 17, 'num_units_hnet': 16, 'num_layers_hnet': 4, 'lr': 0.004385989310185562, 'huber_delta': 6, 'alpha_reg': 3.260626923075163e-05, 'l1_ratio_reg': 0.2487731050598352, 'batch_size_train': 43, 'batch_size_val': 40}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 15033 weights.
Created MLP Hypernet.
Hypernetwork with 241176 weights and 15033 outputs (compression ratio: 16.04).
The network consists of 241158 unconditional weights (241158 internally maintained) and 18 conditional weights (18 internally maintained).
Epoch 000 Train 0.3781 Val 0.1677
Epoch 001 Train 0.1707 Val 0.1190
Epoch 002 Train 0.0805 Val 0.0728
Epoch 003 Train 0.0436 Val 0.0563
Epoch 004 Train 0.0359 Val 0.0550
Epoch 005 Train 0.0253 Val 0.0461
Epoch 006 Train 0.0184 Val 0.0437


[I 2024-04-22 18:47:38,162] Trial 16 pruned. 


Epoch 007 Train 0.0155 Val 0.0456
Creating a simple RNN with 14727 weights.
Created MLP Hypernet.
Hypernetwork with 355877 weights and 14727 outputs (compression ratio: 24.16).
The network consists of 355817 unconditional weights (355817 internally maintained) and 60 conditional weights (60 internally maintained).
Epoch 000 Train 0.2057 Val 0.1352
Epoch 001 Train 0.0912 Val 0.0652
Epoch 002 Train 0.0395 Val 0.0516
Epoch 003 Train 0.0305 Val 0.0491
Epoch 004 Train 0.0273 Val 0.0500


[I 2024-04-22 18:49:29,146] Trial 17 pruned. 


Epoch 005 Train 0.0234 Val 0.0498
Creating a simple RNN with 6057 weights.
Created MLP Hypernet.
Hypernetwork with 85325 weights and 6057 outputs (compression ratio: 14.09).
The network consists of 85305 unconditional weights (85305 internally maintained) and 20 conditional weights (20 internally maintained).
Epoch 000 Train 0.2550 Val 0.1026
Epoch 001 Train 0.0757 Val 0.0898
Epoch 002 Train 0.0503 Val 0.0612
Epoch 003 Train 0.0297 Val 0.0462
Epoch 004 Train 0.0210 Val 0.0432
Epoch 005 Train 0.0182 Val 0.0412
Epoch 006 Train 0.0166 Val 0.0427
Epoch 007 Train 0.0142 Val 0.0423
Epoch 008 Train 0.0125 Val 0.0433
Epoch 009 Train 0.0107 Val 0.0430
Epoch 010 Train 0.0099 Val 0.0429
Epoch 011 Train 0.0084 Val 0.0406
Epoch 012 Train 0.0077 Val 0.0405
Epoch 013 Train 0.0072 Val 0.0421
Epoch 014 Train 0.0061 Val 0.0420
Epoch 015 Train 0.0058 Val 0.0414
Epoch 016 Train 0.0054 Val 0.0425
Decrease LR
Epoch 017 Train 0.0045 Val 0.0426
Epoch 018 Train 0.0044 Val 0.0448
Epoch 019 Train 0.0041 Val 0.04

[I 2024-04-22 18:56:17,809] Trial 18 finished with value: 0.04049005195319517 and parameters: {'hidden_units': 10, 'input_rec': 33, 'seq_length_LSTM': 18, 'size_task_embedding': 10, 'num_units_hnet': 13, 'num_layers_hnet': 3, 'lr': 0.003883978241006223, 'huber_delta': 5, 'alpha_reg': 3.6720893304546627e-06, 'l1_ratio_reg': 0.31789612859573635, 'batch_size_train': 45, 'batch_size_val': 38}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 13743 weights.
Created MLP Hypernet.
Hypernetwork with 289131 weights and 13743 outputs (compression ratio: 21.04).
The network consists of 289123 unconditional weights (289123 internally maintained) and 8 conditional weights (8 internally maintained).
Epoch 000 Train 0.2599 Val 0.0755
Epoch 001 Train 0.0329 Val 0.0607
Epoch 002 Train 0.0223 Val 0.0495
Epoch 003 Train 0.0183 Val 0.0450
Epoch 004 Train 0.0149 Val 0.0470
Epoch 005 Train 0.0114 Val 0.0476
Epoch 006 Train 0.0092 Val 0.0477


[I 2024-04-22 18:58:24,241] Trial 19 pruned. 


Epoch 007 Train 0.0073 Val 0.0481
Creating a simple RNN with 14308 weights.
Created MLP Hypernet.
Hypernetwork with 461424 weights and 14308 outputs (compression ratio: 32.25).
The network consists of 461390 unconditional weights (461390 internally maintained) and 34 conditional weights (34 internally maintained).
Epoch 000 Train 0.1118 Val 0.0681
Epoch 001 Train 0.0446 Val 0.0622
Epoch 002 Train 0.0402 Val 0.0610
Epoch 003 Train 0.0380 Val 0.0536
Epoch 004 Train 0.0365 Val 0.0572


[I 2024-04-22 19:00:00,124] Trial 20 pruned. 


Epoch 005 Train 0.0371 Val 0.0557
Creating a simple RNN with 11367 weights.
Created MLP Hypernet.
Hypernetwork with 136835 weights and 11367 outputs (compression ratio: 12.04).
The network consists of 136811 unconditional weights (136811 internally maintained) and 24 conditional weights (24 internally maintained).
Epoch 000 Train 0.0735 Val 0.0716
Epoch 001 Train 0.0245 Val 0.0425
Epoch 002 Train 0.0150 Val 0.0440
Epoch 003 Train 0.0100 Val 0.0454
Epoch 004 Train 0.0074 Val 0.0459
Epoch 005 Train 0.0053 Val 0.0470
Epoch 006 Train 0.0038 Val 0.0466
Epoch 007 Train 0.0029 Val 0.0459
Epoch 008 Train 0.0022 Val 0.0468
Epoch 009 Train 0.0018 Val 0.0469
Epoch 010 Train 0.0014 Val 0.0469
Epoch 011 Train 0.0012 Val 0.0465
Epoch 012 Train 0.0010 Val 0.0462
Epoch 013 Train 0.0008 Val 0.0467
Epoch 014 Train 0.0007 Val 0.0458
Decrease LR
Epoch 015 Train 0.0006 Val 0.0477
Epoch 016 Train 0.0005 Val 0.0459
Epoch 017 Train 0.0005 Val 0.0463
Epoch 018 Train 0.0005 Val 0.0475
Epoch 019 Train 0.0004 Val

[I 2024-04-22 19:05:34,105] Trial 21 finished with value: 0.042454063622191326 and parameters: {'hidden_units': 28, 'input_rec': 33, 'seq_length_LSTM': 20, 'size_task_embedding': 12, 'num_units_hnet': 11, 'num_layers_hnet': 3, 'lr': 0.008683080337045464, 'huber_delta': 5, 'alpha_reg': 5.759214744836728e-06, 'l1_ratio_reg': 0.6110069779040423, 'batch_size_train': 48, 'batch_size_val': 50}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 7776 weights.
Created MLP Hypernet.
Hypernetwork with 148826 weights and 7776 outputs (compression ratio: 19.14).
The network consists of 148788 unconditional weights (148788 internally maintained) and 38 conditional weights (38 internally maintained).
Epoch 000 Train 0.2247 Val 0.1512
Epoch 001 Train 0.1075 Val 0.0955
Epoch 002 Train 0.0657 Val 0.0810
Epoch 003 Train 0.0458 Val 0.0587
Epoch 004 Train 0.0282 Val 0.0447
Epoch 005 Train 0.0224 Val 0.0435
Epoch 006 Train 0.0192 Val 0.0424
Epoch 007 Train 0.0173 Val 0.0428
Epoch 008 Train 0.0150 Val 0.0448
Epoch 009 Train 0.0127 Val 0.0429
Epoch 010 Train 0.0110 Val 0.0442
Epoch 011 Train 0.0098 Val 0.0446
Epoch 012 Train 0.0085 Val 0.0423
Epoch 013 Train 0.0080 Val 0.0444
Epoch 014 Train 0.0074 Val 0.0444
Epoch 015 Train 0.0064 Val 0.0432
Epoch 016 Train 0.0059 Val 0.0442
Decrease LR
Epoch 017 Train 0.0055 Val 0.0439
Epoch 018 Train 0.0052 Val 0.0439
Epoch 019 Train 0.0048 Val 0.0437
Epoch 020 Train 

[I 2024-04-22 19:11:36,774] Trial 22 finished with value: 0.04230191191993042 and parameters: {'hidden_units': 19, 'input_rec': 30, 'seq_length_LSTM': 19, 'size_task_embedding': 19, 'num_units_hnet': 18, 'num_layers_hnet': 3, 'lr': 0.0027463124412829966, 'huber_delta': 5, 'alpha_reg': 3.2508301610897744e-06, 'l1_ratio_reg': 0.7158810459493113, 'batch_size_train': 50, 'batch_size_val': 48}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 14505 weights.
Created MLP Hypernet.
Hypernetwork with 159941 weights and 14505 outputs (compression ratio: 11.03).
The network consists of 159915 unconditional weights (159915 internally maintained) and 26 conditional weights (26 internally maintained).
Epoch 000 Train 0.1381 Val 0.0922
Epoch 001 Train 0.0397 Val 0.0487
Epoch 002 Train 0.0237 Val 0.0392
Epoch 003 Train 0.0185 Val 0.0434
Epoch 004 Train 0.0157 Val 0.0442
Epoch 005 Train 0.0132 Val 0.0408
Epoch 006 Train 0.0103 Val 0.0409
Epoch 007 Train 0.0083 Val 0.0451
Epoch 008 Train 0.0069 Val 0.0430
Epoch 009 Train 0.0058 Val 0.0457
Epoch 010 Train 0.0050 Val 0.0444
Epoch 011 Train 0.0041 Val 0.0452
Epoch 012 Train 0.0035 Val 0.0455
Epoch 013 Train 0.0028 Val 0.0487
Epoch 014 Train 0.0022 Val 0.0489
Decrease LR
Epoch 015 Train 0.0019 Val 0.0506
Epoch 016 Train 0.0017 Val 0.0479
Epoch 017 Train 0.0015 Val 0.0496
Epoch 018 Train 0.0014 Val 0.0461
Epoch 019 Train 0.0013 Val 0.0491


[I 2024-04-22 19:17:13,221] Trial 23 finished with value: 0.03915894933652075 and parameters: {'hidden_units': 34, 'input_rec': 36, 'seq_length_LSTM': 18, 'size_task_embedding': 13, 'num_units_hnet': 10, 'num_layers_hnet': 3, 'lr': 0.005367265275910398, 'huber_delta': 5, 'alpha_reg': 1.413231533156293e-05, 'l1_ratio_reg': 0.6300206446272003, 'batch_size_train': 47, 'batch_size_val': 50}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 19572 weights.
Created MLP Hypernet.
Hypernetwork with 294140 weights and 19572 outputs (compression ratio: 15.03).
The network consists of 294098 unconditional weights (294098 internally maintained) and 42 conditional weights (42 internally maintained).
Epoch 000 Train 0.1617 Val 0.0875
Epoch 001 Train 0.0439 Val 0.0638
Epoch 002 Train 0.0326 Val 0.0500
Epoch 003 Train 0.0261 Val 0.0476
Epoch 004 Train 0.0218 Val 0.0460
Epoch 005 Train 0.0185 Val 0.0431
Epoch 006 Train 0.0146 Val 0.0423
Epoch 007 Train 0.0122 Val 0.0417
Epoch 008 Train 0.0106 Val 0.0384
Epoch 009 Train 0.0092 Val 0.0403
Epoch 010 Train 0.0081 Val 0.0408
Epoch 011 Train 0.0072 Val 0.0431
Epoch 012 Train 0.0064 Val 0.0431
Epoch 013 Train 0.0056 Val 0.0436
Epoch 014 Train 0.0052 Val 0.0432
Decrease LR
Epoch 015 Train 0.0046 Val 0.0448
Epoch 016 Train 0.0041 Val 0.0441
Epoch 017 Train 0.0039 Val 0.0451
Epoch 018 Train 0.0038 Val 0.0417
Epoch 019 Train 0.0036 Val 0.0423


[I 2024-04-22 19:23:41,584] Trial 24 finished with value: 0.03840133901558546 and parameters: {'hidden_units': 43, 'input_rec': 39, 'seq_length_LSTM': 20, 'size_task_embedding': 21, 'num_units_hnet': 14, 'num_layers_hnet': 2, 'lr': 0.0020802033413381583, 'huber_delta': 6, 'alpha_reg': 6.108678213676071e-06, 'l1_ratio_reg': 0.5311994873480823, 'batch_size_train': 44, 'batch_size_val': 43}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 9206 weights.
Created MLP Hypernet.
Hypernetwork with 120128 weights and 9206 outputs (compression ratio: 13.05).
The network consists of 120110 unconditional weights (120110 internally maintained) and 18 conditional weights (18 internally maintained).
Epoch 000 Train 0.2188 Val 0.0691
Epoch 001 Train 0.0339 Val 0.0470
Epoch 002 Train 0.0249 Val 0.0400
Epoch 003 Train 0.0183 Val 0.0397
Epoch 004 Train 0.0151 Val 0.0428
Epoch 005 Train 0.0119 Val 0.0451
Epoch 006 Train 0.0092 Val 0.0447
Epoch 007 Train 0.0074 Val 0.0452
Epoch 008 Train 0.0059 Val 0.0439
Epoch 009 Train 0.0049 Val 0.0409
Epoch 010 Train 0.0041 Val 0.0402
Epoch 011 Train 0.0035 Val 0.0410
Epoch 012 Train 0.0031 Val 0.0411
Epoch 013 Train 0.0026 Val 0.0408
Epoch 014 Train 0.0022 Val 0.0405
Decrease LR
Epoch 015 Train 0.0020 Val 0.0444
Epoch 016 Train 0.0017 Val 0.0438
Epoch 017 Train 0.0016 Val 0.0413
Epoch 018 Train 0.0015 Val 0.0465
Epoch 019 Train 0.0014 Val 0.0440


[I 2024-04-22 19:30:01,486] Trial 25 finished with value: 0.039733627646335604 and parameters: {'hidden_units': 21, 'input_rec': 34, 'seq_length_LSTM': 19, 'size_task_embedding': 9, 'num_units_hnet': 12, 'num_layers_hnet': 3, 'lr': 0.009799203426985943, 'huber_delta': 5, 'alpha_reg': 5.969195846445162e-07, 'l1_ratio_reg': 0.59553062947283, 'batch_size_train': 48, 'batch_size_val': 48}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 10768 weights.
Created MLP Hypernet.
Hypernetwork with 216813 weights and 10768 outputs (compression ratio: 20.13).
The network consists of 216785 unconditional weights (216785 internally maintained) and 28 conditional weights (28 internally maintained).
Epoch 000 Train 0.3080 Val 0.0709
Epoch 001 Train 0.0377 Val 0.0568
Epoch 002 Train 0.0316 Val 0.0591
Epoch 003 Train 0.0264 Val 0.0560
Epoch 004 Train 0.0206 Val 0.0518


[I 2024-04-22 19:31:49,159] Trial 26 pruned. 


Epoch 005 Train 0.0186 Val 0.0479
Creating a simple RNN with 14015 weights.
Created MLP Hypernet.
Hypernetwork with 380619 weights and 14015 outputs (compression ratio: 27.16).
The network consists of 380563 unconditional weights (380563 internally maintained) and 56 conditional weights (56 internally maintained).
Epoch 000 Train 0.1711 Val 0.1044
Epoch 001 Train 0.0658 Val 0.0742
Epoch 002 Train 0.0361 Val 0.0484
Epoch 003 Train 0.0245 Val 0.0405
Epoch 004 Train 0.0216 Val 0.0390
Epoch 005 Train 0.0190 Val 0.0399
Epoch 006 Train 0.0167 Val 0.0398
Epoch 007 Train 0.0148 Val 0.0397
Epoch 008 Train 0.0135 Val 0.0398
Epoch 009 Train 0.0119 Val 0.0373
Epoch 010 Train 0.0108 Val 0.0391
Epoch 011 Train 0.0100 Val 0.0379
Epoch 012 Train 0.0088 Val 0.0382
Epoch 013 Train 0.0079 Val 0.0378
Epoch 014 Train 0.0071 Val 0.0387
Decrease LR
Epoch 015 Train 0.0064 Val 0.0379
Epoch 016 Train 0.0059 Val 0.0396
Epoch 017 Train 0.0054 Val 0.0385
Epoch 018 Train 0.0051 Val 0.0389
Epoch 019 Train 0.0048 Val

[I 2024-04-22 19:38:06,683] Trial 27 finished with value: 0.037298525279522356 and parameters: {'hidden_units': 36, 'input_rec': 31, 'seq_length_LSTM': 20, 'size_task_embedding': 28, 'num_units_hnet': 26, 'num_layers_hnet': 3, 'lr': 0.0038407773987667528, 'huber_delta': 7, 'alpha_reg': 2.562054749667704e-06, 'l1_ratio_reg': 0.7293587415623108, 'batch_size_train': 50, 'batch_size_val': 50}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 13260 weights.
Created MLP Hypernet.
Hypernetwork with 212704 weights and 13260 outputs (compression ratio: 16.04).
The network consists of 212670 unconditional weights (212670 internally maintained) and 34 conditional weights (34 internally maintained).
Epoch 000 Train 0.1582 Val 0.0839
Epoch 001 Train 0.0383 Val 0.0497
Epoch 002 Train 0.0240 Val 0.0449
Epoch 003 Train 0.0179 Val 0.0445
Epoch 004 Train 0.0143 Val 0.0428
Epoch 005 Train 0.0108 Val 0.0476
Epoch 006 Train 0.0083 Val 0.0441
Epoch 007 Train 0.0063 Val 0.0429
Epoch 008 Train 0.0051 Val 0.0436
Epoch 009 Train 0.0042 Val 0.0404
Epoch 010 Train 0.0035 Val 0.0421
Epoch 011 Train 0.0030 Val 0.0431
Epoch 012 Train 0.0026 Val 0.0387
Epoch 013 Train 0.0022 Val 0.0416
Epoch 014 Train 0.0019 Val 0.0412
Epoch 015 Train 0.0016 Val 0.0425
Epoch 016 Train 0.0015 Val 0.0420
Decrease LR
Epoch 017 Train 0.0013 Val 0.0430
Epoch 018 Train 0.0012 Val 0.0388
Epoch 019 Train 0.0011 Val 0.0415
Epoch 020 Trai

[I 2024-04-22 19:45:03,282] Trial 28 finished with value: 0.03867565437342744 and parameters: {'hidden_units': 31, 'input_rec': 36, 'seq_length_LSTM': 18, 'size_task_embedding': 17, 'num_units_hnet': 15, 'num_layers_hnet': 2, 'lr': 0.005297718408174701, 'huber_delta': 5, 'alpha_reg': 2.176486136462753e-05, 'l1_ratio_reg': 0.20571143159122676, 'batch_size_train': 41, 'batch_size_val': 43}. Best is trial 13 with value: 0.034108375129166556.


Decrease LR
Creating a simple RNN with 9578 weights.
Created MLP Hypernet.
Hypernetwork with 221808 weights and 9578 outputs (compression ratio: 23.16).
The network consists of 221768 unconditional weights (221768 internally maintained) and 40 conditional weights (40 internally maintained).
Epoch 000 Train 0.0903 Val 0.0756
Epoch 001 Train 0.0337 Val 0.0550
Epoch 002 Train 0.0216 Val 0.0481
Epoch 003 Train 0.0179 Val 0.0454
Epoch 004 Train 0.0157 Val 0.0464
Epoch 005 Train 0.0140 Val 0.0448
Epoch 006 Train 0.0131 Val 0.0436


[I 2024-04-22 19:47:31,841] Trial 29 pruned. 


Epoch 007 Train 0.0121 Val 0.0439
lr: 0.21014374889128268
huber_delta: 0.13013276935793192
batch_size_val: 0.12130770741551816
num_units_hnet: 0.10569991913357972
size_task_embedding: 0.10488262950363018
batch_size_train: 0.07627180230148475
seq_length_LSTM: 0.0706055119627895
input_rec: 0.043272813606277515
l1_ratio_reg: 0.042129665231524185
alpha_reg: 0.03388434194171077
num_layers_hnet: 0.031113185708871262
hidden_units: 0.03055590494539925
Study statistics: 
Number of finished trials:  30
Number of pruned trials:  9
Number of complete trials:  21
Best trial: 
Loss: 0.034108375129166556
Best hyperparameters: {'hidden_units': 26, 'input_rec': 30, 'seq_length_LSTM': 20, 'size_task_embedding': 13, 'num_units_hnet': 11, 'num_layers_hnet': 3, 'lr': 0.008724683475679063, 'huber_delta': 6, 'alpha_reg': 3.3324906702023635e-06, 'l1_ratio_reg': 0.2132256325697216, 'batch_size_train': 50, 'batch_size_val': 50}


In [14]:
import joblib
import json
# Retrieve the best trial
best_trial = study.best_trial

# Get the best hyperparameters
best_params = best_trial.params

# Save the best parameters
with open('stim_0411_best_params.json', 'w') as f:
    json.dump(best_params, f)

### Plotting the study

In [18]:
import plotly

In [19]:
optuna.visualization.plot_optimization_history(study)

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [20]:
#Plotting the accuracies for each hyperparameter for each trial.
    
optuna.visualization.plot_slice(study)

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.

In [21]:
# Plotting the optimization history of the study.

optuna.visualization.plot_optimization_history(study)

#Plotting the accuracies for each hyperparameter for each trial.
    
optuna.visualization.plot_slice(study)

# Plotting the accuracy surface for the hyperparameters involved in the random forest model.

optuna.visualization.plot_contour(study, params=["seq_length_LSTM", "n_hidden_units"]) 

ImportError: Tried to import 'plotly' but failed. Please make sure that the package is installed correctly to use this feature. Actual error: No module named 'plotly'.