<a href="https://colab.research.google.com/github/mavillan/jane-street-market-prediction/blob/main/tuning/snn-optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install -Iv scikit-learn==0.23.2 > /dev/null 2>&1
!pip install optuna > /dev/null 2>&1

In [3]:
import copy
import os
import numpy as np 
import pandas as pd
from pathlib import Path
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge

import torch
import torch.nn as nn
from torch.utils.data import Dataset,TensorDataset,DataLoader

# custom modules
import sys
sys.path.append("/content/drive/MyDrive/kaggle/janestreet")
from torch_utils import Monitor, train_step, valid_step

def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    
set_seed(2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

***
### utility functions

In [4]:
def utility_score(date, weight, resp, action):
    """
    Fast computation of utility score
    """
    date = date.astype(int)
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return -u

In [5]:
def cat_encoder(X):
    """
    Fast one-hot encoding of feature_0
    """
    X["feature_00"] = 0
    idx00 = X.query("feature_0 == -1").index
    X.loc[idx00,"feature_00"] = 1
    
    X["feature_01"] = 0
    idx01 = X.query("feature_0 == 1").index
    X.loc[idx01,"feature_01"] = 1
    
    return X.iloc[:,1:]

In [6]:
def show_metrics(monitor):
    x = np.arange(len(monitor.train_loss))
    
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(21, 7))
    
    ax1 = axes[0]
    ax2 = ax1.twinx()
    ax1.plot(x, monitor.train_loss, 'go-', label="train_loss")
    ax2.plot(x, monitor.train_metric, 'ro-', label="train_metric")
    plt.legend(loc="best")
    ax1.set_xlabel('epochs')
    ax1.set_ylabel('loss')
    ax1.set_title("Training")
    plt.grid()
    
    ax1 = axes[1]
    ax2 = ax1.twinx()
    ax1.plot(x, monitor.valid_loss, 'go-', label="valid_loss")
    ax2.plot(x, monitor.valid_metric, 'ro-', label="valid_metric")
    plt.legend(loc="best")
    ax1.set_xlabel('epochs')
    ax2.set_ylabel('metric')
    ax1.set_title("Validation")
    plt.grid()
    
    plt.show()

***
### preparing the data

In [7]:
root = Path("/content/drive/MyDrive/kaggle/janestreet/preprocessing/")

train = pd.read_parquet(root/"train.parquet")
features = pd.read_parquet(root/"features.parquet")

train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2390491 entries, 0 to 2390490
Columns: 143 entries, date to w4
dtypes: float32(140), int16(1), int32(1), int8(1)
memory usage: 1.3 GB


In [8]:
train = train.query("date > 85").query("weight > 0").reset_index(drop=True)

input_features = [col for col in train.columns if "feature" in col]
resp_cols = ['resp', 'resp_1', 'resp_2', 'resp_3', 'resp_4']
w_cols = ["w", "w1", "w2", "w3", "w4"]

X_dset = train.loc[:,input_features].copy()
y_dset = (train.loc[:,resp_cols] > 0).astype(int).copy()
w_dset = train.loc[:, w_cols].copy()
dwr_dset = train.loc[:, ["date","weight","resp"]].copy()

In [9]:
%%time 

with open("/content/drive/MyDrive/kaggle/janestreet/imputer/imputer_f0m1.pickle", "rb") as file:
    imputer_f0m1 = pickle.load(file)
    file.close()
    
with open("/content/drive/MyDrive/kaggle/janestreet/imputer/imputer_f0p1.pickle", "rb") as file:
    imputer_f0p1 = pickle.load(file)
    file.close()

idx_f0m1 = X_dset.query("feature_0 == -1").index
X_dset.loc[idx_f0m1, input_features[1:]] = imputer_f0m1.transform(X_dset.loc[idx_f0m1, input_features[1:]])

idx_f0p1 = X_dset.query("feature_0 ==  1").index
X_dset.loc[idx_f0p1, input_features[1:]] = imputer_f0p1.transform(X_dset.loc[idx_f0p1, input_features[1:]])

CPU times: user 41.8 s, sys: 6.56 s, total: 48.4 s
Wall time: 43.9 s


In [10]:
X_dset = cat_encoder(X_dset)
input_features = X_dset.columns.tolist()

***
### model definition

In [11]:
class GBN(nn.Module):
    """
    Ghost Batch Normalization
    https://arxiv.org/abs/1705.08741
    """

    def __init__(self, input_dim, virtual_batch_size=128, momentum=0.01):
        super(GBN, self).__init__()

        self.input_dim = input_dim
        self.virtual_batch_size = virtual_batch_size
        self.bn = nn.BatchNorm1d(self.input_dim, momentum=momentum)

    def forward(self, x):
        chunks = x.chunk(int(np.ceil(x.shape[0] / self.virtual_batch_size)), 0)
        res = [self.bn(x_) for x_ in chunks]

        return torch.cat(res, dim=0)
    
class NormalLinear(nn.Module):
    """ 
    Linear layer with normalized weights
    """
    def __init__(self, size_in, size_out, bias=True):
        super(NormalLinear, self).__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights vector
        weights_v = torch.Tensor(size_out, size_in)
        nn.init.kaiming_uniform_(weights_v, a=np.sqrt(5)) 
        self.weights_v = nn.Parameter(weights_v)
        # weights magnitude
        weights_m = torch.norm(weights_v, dim=1, keepdim=True)
        self.weights_m = nn.Parameter(weights_m.clone().detach())
        
        if bias:
            bias_v = torch.Tensor(size_out)    
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weights_v)
            bound = 1 / np.sqrt(fan_in)
            nn.init.uniform_(bias_v, -bound, bound)
            self.bias = nn.Parameter(bias_v)
        else:
            self.register_parameter('bias', None)
            
    def _compute_weights(self):
        norm_per_output = torch.norm(self.weights_v, dim=1, keepdim=True)
        return self.weights_m * torch.div(self.weights_v, norm_per_output)
            
    def forward(self, x):
        weights = self._compute_weights()
        return nn.functional.linear(x, weights, self.bias)

In [12]:
class SNN(nn.Module):
    """
    SNN for pretraining
    """

    def __init__(self, input_dim, output_dim, nn_depth, nn_width, dropout, momentum=0.02, virtual_batch_size=128):
        super().__init__()
        
        self.bn_in = GBN(input_dim, virtual_batch_size=virtual_batch_size, momentum=momentum)
        self.dp_in = nn.Dropout(dropout)
        self.ln_in = NormalLinear(input_dim, nn_width, bias=False)
        
        self.bnorms = nn.ModuleList(
            [GBN(nn_width, virtual_batch_size=virtual_batch_size, momentum=momentum) 
             for i in range(nn_depth-1)])
        self.dropouts = nn.ModuleList(
            [nn.Dropout(dropout) 
             for i in range(nn_depth-1)])
        self.linears = nn.ModuleList(
            [NormalLinear(nn_width, nn_width, bias=False) 
             for i in range(nn_depth-1)])
        
        self.bn_out = GBN(nn_width, virtual_batch_size=virtual_batch_size, momentum=momentum)
        self.dp_out = nn.Dropout(dropout/2)
        self.ln_out = NormalLinear(nn_width, output_dim, bias=False)

    def forward(self, x):
        x = self.bn_in(x)
        x = self.dp_in(x)
        x = self.ln_in(x)
        x = nn.functional.relu(x)

        for bn_layer,dp_layer,ln_layer in zip(self.bnorms,self.dropouts,self.linears):
            x = bn_layer(x)
            x = dp_layer(x)
            x = ln_layer(x)
            x = nn.functional.relu(x)
            
        x = self.bn_out(x)
        x = self.dp_out(x)
        x = self.ln_out(x)
        return x

In [13]:
class BCELabelSmoothing(nn.Module):
    def __init__(self, label_smoothing=0.0):
        super(BCELabelSmoothing, self).__init__()
        self.label_smoothing = label_smoothing
        self.bce_loss = torch.nn.functional.binary_cross_entropy_with_logits
        
    def forward(self, prediction, target, weight=None):
        target_smooth = target*(1.0 - self.label_smoothing) + 0.5*self.label_smoothing
        if weight is None:
            loss = self.bce_loss(prediction, target_smooth, reduction="mean")
        else:
            loss = self.bce_loss(prediction, target_smooth, weight, reduction="sum") / torch.sum(weight)
        return loss

bce_loss = BCELabelSmoothing(label_smoothing=1e-2)

***
### Hyperparameter tuning

In [14]:
# 80% rows for train & 20% for valid
train_idx = train.query("date < 430").index
valid_idx = train.query("date >= 430").index

train_dset = TensorDataset(torch.tensor(X_dset.loc[train_idx].values, dtype=torch.float), 
                           torch.tensor(y_dset.loc[train_idx].values, dtype=torch.float),
                           torch.tensor(w_dset.loc[train_idx].values, dtype=torch.float),
                           torch.tensor(dwr_dset.loc[train_idx].values, dtype=torch.float),
                          )

valid_dset = TensorDataset(torch.tensor(X_dset.loc[valid_idx].values, dtype=torch.float), 
                           torch.tensor(y_dset.loc[valid_idx].values, dtype=torch.float),
                           torch.tensor(w_dset.loc[valid_idx].values, dtype=torch.float),
                           torch.tensor(dwr_dset.loc[valid_idx].values, dtype=torch.float),
                          )

dataset_sizes = {'train': len(train_dset), 'valid': len(valid_dset)}
train_dataloader = DataLoader(train_dset, batch_size=2048, shuffle=True, num_workers=2)
valid_dataloader = DataLoader(valid_dset, batch_size=len(valid_dset), shuffle=False, num_workers=2)

print("Number of step per epoch:", len(train_dset)//2048)

Number of step per epoch: 612


In [15]:
if os.path.exists(f"/content/drive/MyDrive/kaggle/janestreet/logs/snn_tuning.csv"):
    logger = open(f"/content/drive/MyDrive/kaggle/janestreet/logs/snn_tuning.csv", "a")
else:
    logger = open(f"/content/drive/MyDrive/kaggle/janestreet/logs/snn_tuning.csv", "w")
    logger.write("trial;params;loss;metric;loss_hist;metric_hist\n")

In [17]:
default_nn_kwargs = {
    "input_dim":len(input_features),
    "output_dim":len(resp_cols),
    "nn_depth":3,
    }

def objective(trial):
    sampled_nn_kwargs = {
        #"nn_depth": trial.suggest_int("nn_depth", 3, 5)
        "nn_width": int(trial.suggest_discrete_uniform("nn_width", 64, 144, 16)),
        "dropout": trial.suggest_discrete_uniform("nn_dropout", 0.1, 0.5, 0.05),
        "momentum": trial.suggest_discrete_uniform("momentum", 0.01, 0.1, 0.01),
        "virtual_batch_size": 2 ** trial.suggest_int("virtual_batch_size", 7, 10),
        }
    nn_kwargs = {**sampled_nn_kwargs, **default_nn_kwargs}

    # other hyperparams
    weight_decay = 10 ** trial.suggest_int("weight_decay", -6, -2)
    pct_start = trial.suggest_discrete_uniform("pct_start", 0.1, 0.5, 0.1)

    sampled_params = {
        **sampled_nn_kwargs,
        "weight_decay":weight_decay,
        "pct_start":pct_start,
        }
    print("-"*80)
    print("sampled_params:", sampled_params)

    model = SNN(**nn_kwargs)
    model = model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=5e-2, momentum=0.9, weight_decay=weight_decay)

    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=1e-1,
        epochs=50,
        pct_start=pct_start, 
        anneal_strategy='cos', 
        cycle_momentum=True, 
        base_momentum=0.8, 
        max_momentum=0.9, 
        div_factor=1e1,
        final_div_factor=1e0,
        steps_per_epoch=len(train_dataloader),
        verbose=False)
    
    monitor = Monitor(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        patience=10,
        metric_fn=utility_score,
        experiment_name=f'SNN',
        num_epochs=50,
        dataset_sizes=dataset_sizes,
        early_stop_on_metric=False,
        lower_is_better=True)
    
    for epoch in monitor.iter_epochs:
        train_step(model, train_dataloader, optimizer, monitor, bce_loss, scheduler=scheduler, clip_value=None)    
        early_stop = valid_step(model, valid_dataloader, optimizer, monitor, bce_loss)
        if early_stop: break

    logger.write(f"{trial.number};{sampled_params};{monitor.best_loss};{monitor.best_metric};{monitor.valid_loss[-10:]};{monitor.valid_metric[-10:]}\n")
    logger.flush()

    print(f"best_valid_loss: {monitor.best_loss} - best_valid_metric: {monitor.best_metric}")
    return monitor.best_loss

In [18]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200, timeout=43200, show_progress_bar=False)

[32m[I 2021-02-18 05:09:39,557][0m A new study created in memory with name: no-name-a81a37d6-cbb1-4814-8a81-3dae3121cb05[0m


--------------------------------------------------------------------------------
sampled_params: {'nn_width': 144, 'dropout': 0.1, 'momentum': 0.06999999999999999, 'virtual_batch_size': 256, 'weight_decay': 1e-06, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 05:22:18,749][0m Trial 0 finished with value: 0.6860414743423462 and parameters: {'nn_width': 144.0, 'nn_dropout': 0.1, 'momentum': 0.06999999999999999, 'virtual_batch_size': 8, 'weight_decay': -6, 'pct_start': 0.4}. Best is trial 0 with value: 0.6860414743423462.[0m



best_valid_loss: 0.6860414743423462 - best_valid_metric: -2280.263682876855
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 128, 'dropout': 0.1, 'momentum': 0.01, 'virtual_batch_size': 128, 'weight_decay': 1e-05, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 05:37:08,433][0m Trial 1 finished with value: 0.6865214705467224 and parameters: {'nn_width': 128.0, 'nn_dropout': 0.1, 'momentum': 0.01, 'virtual_batch_size': 7, 'weight_decay': -5, 'pct_start': 0.4}. Best is trial 0 with value: 0.6860414743423462.[0m



best_valid_loss: 0.6865214705467224 - best_valid_metric: -2401.3878974318177
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 144, 'dropout': 0.5, 'momentum': 0.09, 'virtual_batch_size': 128, 'weight_decay': 1e-05, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 06:12:14,041][0m Trial 2 finished with value: 0.6868324875831604 and parameters: {'nn_width': 144.0, 'nn_dropout': 0.5, 'momentum': 0.09, 'virtual_batch_size': 7, 'weight_decay': -5, 'pct_start': 0.4}. Best is trial 0 with value: 0.6860414743423462.[0m



best_valid_loss: 0.6868324875831604 - best_valid_metric: -2428.3672622771246
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.2, 'momentum': 0.06999999999999999, 'virtual_batch_size': 1024, 'weight_decay': 0.001, 'pct_start': 0.1}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 06:41:37,618][0m Trial 3 finished with value: 0.6858563423156738 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.2, 'momentum': 0.06999999999999999, 'virtual_batch_size': 10, 'weight_decay': -3, 'pct_start': 0.1}. Best is trial 3 with value: 0.6858563423156738.[0m



best_valid_loss: 0.6858563423156738 - best_valid_metric: -2809.530850085786
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 128, 'dropout': 0.35, 'momentum': 0.02, 'virtual_batch_size': 512, 'weight_decay': 0.01, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…


invalid value encountered in double_scalars

[32m[I 2021-02-18 06:49:29,373][0m Trial 4 finished with value: 0.6895866394042969 and parameters: {'nn_width': 128.0, 'nn_dropout': 0.35, 'momentum': 0.02, 'virtual_batch_size': 9, 'weight_decay': -2, 'pct_start': 0.4}. Best is trial 3 with value: 0.6858563423156738.[0m



best_valid_loss: 0.6895866394042969 - best_valid_metric: -2122.075263868897
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.35, 'momentum': 0.06999999999999999, 'virtual_batch_size': 1024, 'weight_decay': 0.01, 'pct_start': 0.1}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 06:57:05,187][0m Trial 5 finished with value: 0.6908299922943115 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.35, 'momentum': 0.06999999999999999, 'virtual_batch_size': 10, 'weight_decay': -2, 'pct_start': 0.1}. Best is trial 3 with value: 0.6858563423156738.[0m



best_valid_loss: 0.6908299922943115 - best_valid_metric: -1616.8620550049493
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 64, 'dropout': 0.2, 'momentum': 0.08, 'virtual_batch_size': 256, 'weight_decay': 1e-06, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 07:18:04,538][0m Trial 6 finished with value: 0.6853498816490173 and parameters: {'nn_width': 64.0, 'nn_dropout': 0.2, 'momentum': 0.08, 'virtual_batch_size': 8, 'weight_decay': -6, 'pct_start': 0.4}. Best is trial 6 with value: 0.6853498816490173.[0m



best_valid_loss: 0.6853498816490173 - best_valid_metric: -2756.24957641707
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.25, 'momentum': 0.09999999999999999, 'virtual_batch_size': 512, 'weight_decay': 1e-06, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 07:38:12,174][0m Trial 7 finished with value: 0.6850849390029907 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.25, 'momentum': 0.09999999999999999, 'virtual_batch_size': 9, 'weight_decay': -6, 'pct_start': 0.5}. Best is trial 7 with value: 0.6850849390029907.[0m



best_valid_loss: 0.6850849390029907 - best_valid_metric: -2729.422141896749
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 128, 'dropout': 0.35, 'momentum': 0.01, 'virtual_batch_size': 512, 'weight_decay': 0.001, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 07:53:16,552][0m Trial 8 finished with value: 0.6871764063835144 and parameters: {'nn_width': 128.0, 'nn_dropout': 0.35, 'momentum': 0.01, 'virtual_batch_size': 9, 'weight_decay': -3, 'pct_start': 0.30000000000000004}. Best is trial 7 with value: 0.6850849390029907.[0m



best_valid_loss: 0.6871764063835144 - best_valid_metric: -2420.6441026884245
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 128, 'dropout': 0.45000000000000007, 'momentum': 0.09, 'virtual_batch_size': 128, 'weight_decay': 0.01, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 08:01:29,007][0m Trial 9 finished with value: 0.6905988454818726 and parameters: {'nn_width': 128.0, 'nn_dropout': 0.45000000000000007, 'momentum': 0.09, 'virtual_batch_size': 7, 'weight_decay': -2, 'pct_start': 0.30000000000000004}. Best is trial 7 with value: 0.6850849390029907.[0m



best_valid_loss: 0.6905988454818726 - best_valid_metric: -1226.2486409547173
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.25, 'momentum': 0.05, 'virtual_batch_size': 512, 'weight_decay': 1e-05, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 08:21:32,352][0m Trial 10 finished with value: 0.6850166320800781 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.25, 'momentum': 0.05, 'virtual_batch_size': 9, 'weight_decay': -5, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6850166320800781 - best_valid_metric: -2657.440513338197
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 512, 'weight_decay': 1e-05, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 08:41:31,315][0m Trial 11 finished with value: 0.6851619482040405 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 9, 'weight_decay': -5, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6851619482040405 - best_valid_metric: -2625.1292111903267
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 512, 'weight_decay': 1e-06, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 09:05:28,753][0m Trial 12 finished with value: 0.6854343414306641 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 9, 'weight_decay': -6, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6854343414306641 - best_valid_metric: -2547.958137993524
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.15000000000000002, 'momentum': 0.05, 'virtual_batch_size': 1024, 'weight_decay': 0.0001, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 09:19:15,667][0m Trial 13 finished with value: 0.6856173276901245 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.05, 'virtual_batch_size': 10, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6856173276901245 - best_valid_metric: -2589.855179839603
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.30000000000000004, 'momentum': 0.09999999999999999, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 09:46:59,631][0m Trial 14 finished with value: 0.6851698160171509 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.30000000000000004, 'momentum': 0.09999999999999999, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.2}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6851698160171509 - best_valid_metric: -2703.4021515616614
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.25, 'momentum': 0.03, 'virtual_batch_size': 512, 'weight_decay': 0.0001, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 10:04:22,118][0m Trial 15 finished with value: 0.6850519776344299 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.25, 'momentum': 0.03, 'virtual_batch_size': 9, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6850519776344299 - best_valid_metric: -2953.8820250293593
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 64, 'dropout': 0.4, 'momentum': 0.03, 'virtual_batch_size': 256, 'weight_decay': 0.0001, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 10:36:54,462][0m Trial 16 finished with value: 0.6856908202171326 and parameters: {'nn_width': 64.0, 'nn_dropout': 0.4, 'momentum': 0.03, 'virtual_batch_size': 8, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6856908202171326 - best_valid_metric: -2886.985912936599
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.2, 'momentum': 0.05, 'virtual_batch_size': 1024, 'weight_decay': 0.001, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 10:51:00,764][0m Trial 17 finished with value: 0.6864088177680969 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.2, 'momentum': 0.05, 'virtual_batch_size': 10, 'weight_decay': -3, 'pct_start': 0.2}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6864088177680969 - best_valid_metric: -2458.5929034984674
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.30000000000000004, 'momentum': 0.03, 'virtual_batch_size': 512, 'weight_decay': 0.0001, 'pct_start': 0.5}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 11:14:07,473][0m Trial 18 finished with value: 0.6851761341094971 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.30000000000000004, 'momentum': 0.03, 'virtual_batch_size': 9, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 10 with value: 0.6850166320800781.[0m



best_valid_loss: 0.6851761341094971 - best_valid_metric: -2744.310362535595
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 11:29:10,065][0m Trial 19 finished with value: 0.6845667362213135 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6845667362213135 - best_valid_metric: -2929.520128211251
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 64, 'dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 11:44:55,140][0m Trial 20 finished with value: 0.6856577396392822 and parameters: {'nn_width': 64.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.2}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6856577396392822 - best_valid_metric: -2502.3352977467507
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 512, 'weight_decay': 0.0001, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 12:02:56,629][0m Trial 21 finished with value: 0.6857389807701111 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.060000000000000005, 'virtual_batch_size': 9, 'weight_decay': -4, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6857389807701111 - best_valid_metric: -2586.2047850135145
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 12:20:51,181][0m Trial 22 finished with value: 0.6849544644355774 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.25, 'momentum': 0.04, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.4}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6849544644355774 - best_valid_metric: -2660.9067965130484
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.2, 'momentum': 0.04, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 12:48:24,507][0m Trial 23 finished with value: 0.684851884841919 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.2, 'momentum': 0.04, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.684851884841919 - best_valid_metric: -2597.258292977153
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.15000000000000002, 'momentum': 0.04, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 13:04:26,211][0m Trial 24 finished with value: 0.6858370304107666 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.04, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6858370304107666 - best_valid_metric: -2421.6436982188516
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 64, 'dropout': 0.2, 'momentum': 0.02, 'virtual_batch_size': 128, 'weight_decay': 1e-06, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 13:24:24,250][0m Trial 25 finished with value: 0.6860268115997314 and parameters: {'nn_width': 64.0, 'nn_dropout': 0.2, 'momentum': 0.02, 'virtual_batch_size': 7, 'weight_decay': -6, 'pct_start': 0.2}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6860268115997314 - best_valid_metric: -2634.338234133801
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.1, 'momentum': 0.060000000000000005, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 13:37:42,814][0m Trial 26 finished with value: 0.6852511167526245 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.1, 'momentum': 0.060000000000000005, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6852511167526245 - best_valid_metric: -2305.7595364305694
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.2, 'momentum': 0.04, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 13:55:14,323][0m Trial 27 finished with value: 0.685075581073761 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.2, 'momentum': 0.04, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.4}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.685075581073761 - best_valid_metric: -2552.441501044014
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.15000000000000002, 'momentum': 0.05, 'virtual_batch_size': 128, 'weight_decay': 1e-06, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 14:06:22,388][0m Trial 28 finished with value: 0.6868628263473511 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.15000000000000002, 'momentum': 0.05, 'virtual_batch_size': 7, 'weight_decay': -6, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6868628263473511 - best_valid_metric: -2346.9084179232714
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 64, 'dropout': 0.1, 'momentum': 0.02, 'virtual_batch_size': 256, 'weight_decay': 1e-06, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 14:18:15,882][0m Trial 29 finished with value: 0.6860052943229675 and parameters: {'nn_width': 64.0, 'nn_dropout': 0.1, 'momentum': 0.02, 'virtual_batch_size': 8, 'weight_decay': -6, 'pct_start': 0.4}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6860052943229675 - best_valid_metric: -2633.8869871474717
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.30000000000000004, 'momentum': 0.08, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 14:52:21,444][0m Trial 30 finished with value: 0.6849079132080078 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.30000000000000004, 'momentum': 0.08, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.2}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6849079132080078 - best_valid_metric: -2693.986557765319
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.30000000000000004, 'momentum': 0.06999999999999999, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 15:11:37,553][0m Trial 31 finished with value: 0.6856822371482849 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.30000000000000004, 'momentum': 0.06999999999999999, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.2}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6856822371482849 - best_valid_metric: -2623.446173830799
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.30000000000000004, 'momentum': 0.08, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.2}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 15:33:30,548][0m Trial 32 finished with value: 0.6851816773414612 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.30000000000000004, 'momentum': 0.08, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.2}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6851816773414612 - best_valid_metric: -2701.2934767253973
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.25, 'momentum': 0.08, 'virtual_batch_size': 128, 'weight_decay': 1e-05, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 15:52:24,752][0m Trial 33 finished with value: 0.6859628558158875 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.25, 'momentum': 0.08, 'virtual_batch_size': 7, 'weight_decay': -5, 'pct_start': 0.4}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6859628558158875 - best_valid_metric: -2595.924289393096
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 80, 'dropout': 0.1, 'momentum': 0.060000000000000005, 'virtual_batch_size': 256, 'weight_decay': 0.0001, 'pct_start': 0.1}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 16:03:47,170][0m Trial 34 finished with value: 0.6859133839607239 and parameters: {'nn_width': 80.0, 'nn_dropout': 0.1, 'momentum': 0.060000000000000005, 'virtual_batch_size': 8, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6859133839607239 - best_valid_metric: -2629.498974992336
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 96, 'dropout': 0.2, 'momentum': 0.09, 'virtual_batch_size': 128, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 16:25:32,201][0m Trial 35 finished with value: 0.685157060623169 and parameters: {'nn_width': 96.0, 'nn_dropout': 0.2, 'momentum': 0.09, 'virtual_batch_size': 7, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.685157060623169 - best_valid_metric: -2715.573421956674
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 144, 'dropout': 0.4, 'momentum': 0.03, 'virtual_batch_size': 256, 'weight_decay': 1e-06, 'pct_start': 0.4}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 17:00:52,274][0m Trial 36 finished with value: 0.6857970952987671 and parameters: {'nn_width': 144.0, 'nn_dropout': 0.4, 'momentum': 0.03, 'virtual_batch_size': 8, 'weight_decay': -6, 'pct_start': 0.4}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6857970952987671 - best_valid_metric: -2692.5518982674075
--------------------------------------------------------------------------------
sampled_params: {'nn_width': 112, 'dropout': 0.35, 'momentum': 0.06999999999999999, 'virtual_batch_size': 256, 'weight_decay': 1e-05, 'pct_start': 0.30000000000000004}


HBox(children=(FloatProgress(value=0.0, description='SNN', max=50.0, style=ProgressStyle(description_width='in…

[32m[I 2021-02-18 17:34:25,110][0m Trial 37 finished with value: 0.6855679750442505 and parameters: {'nn_width': 112.0, 'nn_dropout': 0.35, 'momentum': 0.06999999999999999, 'virtual_batch_size': 8, 'weight_decay': -5, 'pct_start': 0.30000000000000004}. Best is trial 19 with value: 0.6845667362213135.[0m



best_valid_loss: 0.6855679750442505 - best_valid_metric: -2554.002845624295


***