<a href="https://colab.research.google.com/github/mavillan/jane-street-market-prediction/blob/main/tuning/cnn1d-optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!pip install -Iv scikit-learn==0.23.2 > /dev/null 2>&1
!pip install optuna > /dev/null 2>&1

In [3]:
import copy
import os
import numpy as np 
import pandas as pd
from pathlib import Path
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import optuna

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import BayesianRidge

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,TensorDataset,DataLoader

# custom modules
import sys
sys.path.append("/content/drive/MyDrive/kaggle/janestreet")
from torch_utils import Monitor, train_step, valid_step

def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    
set_seed(2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

***
### utility functions

In [4]:
def utility_score(date, weight, resp, action):
    """
    Fast computation of utility score
    """
    date = date.astype(int)
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return -u

In [5]:
def cat_encoder(X):
    """
    Fast one-hot encoding of feature_0
    """
    X["feature_00"] = 0
    idx00 = X.query("feature_0 == -1").index
    X.loc[idx00,"feature_00"] = 1
    
    X["feature_01"] = 0
    idx01 = X.query("feature_0 == 1").index
    X.loc[idx01,"feature_01"] = 1
    
    return X.iloc[:,1:]

In [6]:
def show_metrics(monitor):
    x = np.arange(len(monitor.train_loss))
    
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(21, 7))
    
    ax1 = axes[0]
    ax2 = ax1.twinx()
    ax1.plot(x, monitor.train_loss, 'go-', label="train_loss")
    ax2.plot(x, monitor.train_metric, 'ro-', label="train_metric")
    plt.legend(loc="best")
    ax1.set_xlabel('epochs')
    ax1.set_ylabel('loss')
    ax1.set_title("Training")
    plt.grid()
    
    ax1 = axes[1]
    ax2 = ax1.twinx()
    ax1.plot(x, monitor.valid_loss, 'go-', label="valid_loss")
    ax2.plot(x, monitor.valid_metric, 'ro-', label="valid_metric")
    plt.legend(loc="best")
    ax1.set_xlabel('epochs')
    ax2.set_ylabel('metric')
    ax1.set_title("Validation")
    plt.grid()
    
    plt.show()

***
### preparing the data

In [7]:
root = Path("/content/drive/MyDrive/kaggle/janestreet/preprocessing/")

train = pd.read_parquet(root/"train.parquet")
features = pd.read_parquet(root/"features.parquet")

train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2390491 entries, 0 to 2390490
Columns: 143 entries, date to w4
dtypes: float32(140), int16(1), int32(1), int8(1)
memory usage: 1.3 GB


In [8]:
train = train.query("date > 85").query("weight > 0").reset_index(drop=True)

input_features = [col for col in train.columns if "feature" in col]
resp_cols = ['resp', 'resp_1', 'resp_2', 'resp_3', 'resp_4']
w_cols = ["w", "w1", "w2", "w3", "w4"]

X_dset = train.loc[:,input_features].copy()
y_dset = (train.loc[:,resp_cols] > 0).astype(int).copy()
w_dset = train.loc[:, w_cols].copy()
dwr_dset = train.loc[:, ["date","weight","resp"]].copy()

In [9]:
%%time 

with open("/content/drive/MyDrive/kaggle/janestreet/imputer/imputer_f0m1.pickle", "rb") as file:
    imputer_f0m1 = pickle.load(file)
    file.close()
    
with open("/content/drive/MyDrive/kaggle/janestreet/imputer/imputer_f0p1.pickle", "rb") as file:
    imputer_f0p1 = pickle.load(file)
    file.close()

idx_f0m1 = X_dset.query("feature_0 == -1").index
X_dset.loc[idx_f0m1, input_features[1:]] = imputer_f0m1.transform(X_dset.loc[idx_f0m1, input_features[1:]])

idx_f0p1 = X_dset.query("feature_0 ==  1").index
X_dset.loc[idx_f0p1, input_features[1:]] = imputer_f0p1.transform(X_dset.loc[idx_f0p1, input_features[1:]])

CPU times: user 45.7 s, sys: 6.71 s, total: 52.4 s
Wall time: 49.1 s


In [10]:
X_dset = cat_encoder(X_dset)
input_features = X_dset.columns.tolist()

***
### model definition

In [11]:
class NormalLinear(nn.Module):
    """ 
    Linear layer with normalized weights
    """
    def __init__(self, size_in, size_out, bias=True):
        super(NormalLinear, self).__init__()
        self.size_in, self.size_out = size_in, size_out
        # weights vector
        weights_v = torch.Tensor(size_out, size_in)
        nn.init.kaiming_uniform_(weights_v, a=np.sqrt(5)) 
        self.weights_v = nn.Parameter(weights_v)
        # weights magnitude
        weights_m = torch.norm(weights_v, dim=1, keepdim=True)
        self.weights_m = nn.Parameter(weights_m.clone().detach())
        
        if bias:
            bias_v = torch.Tensor(size_out)    
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weights_v)
            bound = 1 / np.sqrt(fan_in)
            nn.init.uniform_(bias_v, -bound, bound)
            self.bias = nn.Parameter(bias_v)
        else:
            self.register_parameter('bias', None)
            
    def _compute_weights(self):
        norm_per_output = torch.norm(self.weights_v, dim=1, keepdim=True)
        return self.weights_m * torch.div(self.weights_v, norm_per_output)
            
    def forward(self, x):
        weights = self._compute_weights()
        return nn.functional.linear(x, weights, self.bias)


class CNN1D(nn.Module):
    def __init__(self, input_dim, output_dim, sign_size=16, cha_input=32, cha_hidden=32, K=2,
                 dropout_input=0.2, dropout_hidden=0.2, dropout_output=0.2):
        super(CNN1D, self).__init__()

        hidden_size = sign_size*cha_input
        sign_size1 = sign_size
        sign_size2 = sign_size//2
        output_size = (sign_size//4) * cha_hidden

        self.hidden_size = hidden_size
        self.cha_input = cha_input
        self.cha_hidden = cha_hidden
        self.K = K
        self.sign_size1 = sign_size1
        self.sign_size2 = sign_size2
        self.output_size = output_size
        self.dropout_input = dropout_input
        self.dropout_hidden = dropout_hidden
        self.dropout_output = dropout_output

        self.batch_norm1 = nn.BatchNorm1d(input_dim)
        self.dropout1 = nn.Dropout(dropout_input)
        self.dense1 = NormalLinear(input_dim, hidden_size, bias=False)

        # 1st conv layer
        self.batch_norm_c1 = nn.BatchNorm1d(cha_input)
        self.conv1 = nn.Conv1d(cha_input, cha_input*K, kernel_size = 5, stride = 1, padding=2,  groups=cha_input, bias=False)

        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size = sign_size2)

        # 2nd conv layer
        self.batch_norm_c2 = nn.BatchNorm1d(cha_input*K)
        self.dropout_c2 = nn.Dropout(dropout_hidden)
        self.conv2 = nn.Conv1d(cha_input*K, cha_hidden, kernel_size = 3, stride = 1, padding=1, bias=False)

        # 3rd conv layer
        self.batch_norm_c3 = nn.BatchNorm1d(cha_hidden)
        self.dropout_c3 = nn.Dropout(dropout_hidden)
        self.conv3 = nn.Conv1d(cha_hidden, cha_hidden, kernel_size = 3, stride = 1, padding=1, bias=False)

        # 4th conv layer
        self.batch_norm_c4 = nn.BatchNorm1d(cha_hidden)
        self.conv4 = nn.Conv1d(cha_hidden, cha_hidden, kernel_size = 5, stride = 1, padding=2, groups=cha_hidden, bias=False)

        self.avg_po_c4 = nn.AvgPool1d(kernel_size=4, stride=2, padding=1)

        self.flt = nn.Flatten()

        self.batch_norm2 = nn.BatchNorm1d(output_size)
        self.dropout2 = nn.Dropout(dropout_output)
        self.dense2 = NormalLinear(output_size, output_dim, bias=False)

    def forward(self, x):

        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.celu(self.dense1(x))

        x = x.reshape(x.shape[0], self.cha_input, self.sign_size1)

        x = self.batch_norm_c1(x)
        x = F.relu(self.conv1(x))

        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = F.relu(self.conv2(x))
        x_s = x

        x = self.batch_norm_c3(x)
        x = self.dropout_c3(x)
        x = F.relu(self.conv3(x))

        x = self.batch_norm_c4(x)
        x = self.conv4(x)
        x =  x + x_s
        x = F.relu(x)

        x = self.avg_po_c4(x)

        x = self.flt(x)

        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.dense2(x)

        return x

In [12]:
class BCELabelSmoothing(nn.Module):
    def __init__(self, label_smoothing=0.0):
        super(BCELabelSmoothing, self).__init__()
        self.label_smoothing = label_smoothing
        self.bce_loss = torch.nn.functional.binary_cross_entropy_with_logits
        
    def forward(self, prediction, target, weight=None):
        target_smooth = target*(1.0 - self.label_smoothing) + 0.5*self.label_smoothing
        if weight is None:
            loss = self.bce_loss(prediction, target_smooth, reduction="mean")
        else:
            loss = self.bce_loss(prediction, target_smooth, weight, reduction="sum") / torch.sum(weight)
        return loss

bce_loss = BCELabelSmoothing(label_smoothing=1e-2)

***
### Hyperparameter tuning

In [13]:
# 80% rows for train & 20% for valid
train_idx = train.query("date < 430").index
valid_idx = train.query("date >= 430").index

train_dset = TensorDataset(torch.tensor(X_dset.loc[train_idx].values, dtype=torch.float), 
                           torch.tensor(y_dset.loc[train_idx].values, dtype=torch.float),
                           torch.tensor(w_dset.loc[train_idx].values, dtype=torch.float),
                           torch.tensor(dwr_dset.loc[train_idx].values, dtype=torch.float),
                          )

valid_dset = TensorDataset(torch.tensor(X_dset.loc[valid_idx].values, dtype=torch.float), 
                           torch.tensor(y_dset.loc[valid_idx].values, dtype=torch.float),
                           torch.tensor(w_dset.loc[valid_idx].values, dtype=torch.float),
                           torch.tensor(dwr_dset.loc[valid_idx].values, dtype=torch.float),
                          )

dataset_sizes = {'train': len(train_dset), 'valid': len(valid_dset)}
train_dataloader = DataLoader(train_dset, batch_size=2048, shuffle=True, num_workers=2)
valid_dataloader = DataLoader(valid_dset, batch_size=len(valid_dset), shuffle=False, num_workers=2)

print("Number of step per epoch:", len(train_dset)//2048)

Number of step per epoch: 612


In [14]:
if os.path.exists(f"/content/drive/MyDrive/kaggle/janestreet/logs/cnn1d_tuning.csv"):
    logger = open(f"/content/drive/MyDrive/kaggle/janestreet/logs/cnn1d_tuning.csv", "a")
else:
    logger = open(f"/content/drive/MyDrive/kaggle/janestreet/logs/cnn1d_tuning.csv", "w")
    logger.write("trial;params;loss;metric;loss_hist;metric_hist\n")

In [15]:
default_params = dict(
    input_dim=len(input_features),
    output_dim=len(resp_cols),
)

def objective(trial):
    sign_size = int(trial.suggest_discrete_uniform("sign_size", 8, 16, 8))
    cha_input = int(trial.suggest_discrete_uniform("cha_input", 16, 64, 16))
    cha_hidden = int(trial.suggest_discrete_uniform("cha_hidden", 32, 64, 16))
    K = trial.suggest_int("K", 1, 2)
    dropout_input = trial.suggest_discrete_uniform("dropout_input", 0.1, 0.3, 0.05)
    dropout_hidden = trial.suggest_discrete_uniform("dropout_hidden", 0.2, 0.4, 0.05) 
    dropout_output = trial.suggest_discrete_uniform("dropout_output", 0.1, 0.3, 0.05)

    nn_kwargs = {
        **default_params,
        "sign_size":sign_size,
        "cha_input":cha_input,
        "cha_hidden":cha_hidden,
        "K":K,
        "dropout_input":dropout_input,
        "dropout_hidden":dropout_hidden,
        "dropout_output":dropout_output,
    }

    # other hyperparams
    weight_decay = 10 ** trial.suggest_int("weight_decay", -4, -3)
    pct_start = trial.suggest_discrete_uniform("pct_start", 0.1, 0.5, 0.1)
    max_lr = 1e-2

    sampled_params = {
        **nn_kwargs,
        "weight_decay":weight_decay,
        "pct_start":pct_start,
        "max_lr":max_lr,
    }
    print("-"*80)
    print("sampled_params:", sampled_params)

    model = CNN1D(**nn_kwargs)
    model = model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=max_lr, momentum=0.9, weight_decay=weight_decay)

    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, 
        max_lr=max_lr,
        epochs=50,
        pct_start=pct_start, 
        anneal_strategy='cos', 
        cycle_momentum=True, 
        base_momentum=0.8, 
        max_momentum=0.9, 
        div_factor=1e1,
        final_div_factor=1e0,
        steps_per_epoch=len(train_dataloader),
        verbose=False)
    
    monitor = Monitor(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        patience=10,
        metric_fn=utility_score,
        experiment_name=f'CNN1D',
        num_epochs=50,
        dataset_sizes=dataset_sizes,
        early_stop_on_metric=False,
        lower_is_better=True)
    
    for epoch in monitor.iter_epochs:
        train_step(model, train_dataloader, optimizer, monitor, bce_loss, scheduler=scheduler, clip_value=None)    
        early_stop = valid_step(model, valid_dataloader, optimizer, monitor, bce_loss)
        if early_stop: break

    logger.write(f"{trial.number};{sampled_params};{monitor.best_loss};{monitor.best_metric};{monitor.valid_loss[-10:]};{monitor.valid_metric[-10:]}\n")
    logger.flush()

    print(f"best_valid_loss: {monitor.best_loss} - best_valid_metric: {monitor.best_metric}")
    return monitor.best_loss

In [16]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200, timeout=43200, show_progress_bar=False)

[32m[I 2021-02-20 07:48:10,273][0m A new study created in memory with name: no-name-ff9b6785-0709-4d09-beac-bb90389c530d[0m


--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 16, 'cha_hidden': 32, 'K': 2, 'dropout_input': 0.3, 'dropout_hidden': 0.2, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 08:13:29,929][0m Trial 0 finished with value: 0.6862826347351074 and parameters: {'sign_size': 8.0, 'cha_input': 16.0, 'cha_hidden': 32.0, 'K': 2, 'dropout_input': 0.3, 'dropout_hidden': 0.2, 'dropout_output': 0.15000000000000002, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 0 with value: 0.6862826347351074.[0m



best_valid_loss: 0.6862826347351074 - best_valid_metric: -2379.248269593212
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 48, 'cha_hidden': 32, 'K': 2, 'dropout_input': 0.1, 'dropout_hidden': 0.2, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.0001, 'pct_start': 0.30000000000000004, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 08:35:58,013][0m Trial 1 finished with value: 0.685924768447876 and parameters: {'sign_size': 8.0, 'cha_input': 48.0, 'cha_hidden': 32.0, 'K': 2, 'dropout_input': 0.1, 'dropout_hidden': 0.2, 'dropout_output': 0.15000000000000002, 'weight_decay': -4, 'pct_start': 0.30000000000000004}. Best is trial 1 with value: 0.685924768447876.[0m



best_valid_loss: 0.685924768447876 - best_valid_metric: -2623.068328984319
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 16, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 09:09:07,975][0m Trial 2 finished with value: 0.6852376461029053 and parameters: {'sign_size': 16.0, 'cha_input': 16.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6852376461029053 - best_valid_metric: -2505.2557703555913
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 48, 'cha_hidden': 32, 'K': 1, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 09:34:26,037][0m Trial 3 finished with value: 0.6854497194290161 and parameters: {'sign_size': 8.0, 'cha_input': 48.0, 'cha_hidden': 32.0, 'K': 1, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.15000000000000002, 'weight_decay': -3, 'pct_start': 0.1}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6854497194290161 - best_valid_metric: -2617.4733056973714
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 64, 'cha_hidden': 32, 'K': 1, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.001, 'pct_start': 0.4, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 10:01:14,661][0m Trial 4 finished with value: 0.6856414675712585 and parameters: {'sign_size': 8.0, 'cha_input': 64.0, 'cha_hidden': 32.0, 'K': 1, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.15000000000000002, 'weight_decay': -3, 'pct_start': 0.4}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6856414675712585 - best_valid_metric: -2631.032968851208
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 64, 'cha_hidden': 48, 'K': 1, 'dropout_input': 0.3, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.2, 'weight_decay': 0.001, 'pct_start': 0.4, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 10:34:19,010][0m Trial 5 finished with value: 0.6858645677566528 and parameters: {'sign_size': 8.0, 'cha_input': 64.0, 'cha_hidden': 48.0, 'K': 1, 'dropout_input': 0.3, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.2, 'weight_decay': -3, 'pct_start': 0.4}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6858645677566528 - best_valid_metric: -2560.2206829085244
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 48, 'cha_hidden': 64, 'K': 1, 'dropout_input': 0.1, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.3, 'weight_decay': 0.0001, 'pct_start': 0.5, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 11:02:26,499][0m Trial 6 finished with value: 0.6854771971702576 and parameters: {'sign_size': 8.0, 'cha_input': 48.0, 'cha_hidden': 64.0, 'K': 1, 'dropout_input': 0.1, 'dropout_hidden': 0.35000000000000003, 'dropout_output': 0.3, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6854771971702576 - best_valid_metric: -2668.7072863299595
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 16, 'cha_hidden': 64, 'K': 1, 'dropout_input': 0.25, 'dropout_hidden': 0.4, 'dropout_output': 0.3, 'weight_decay': 0.001, 'pct_start': 0.4, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 11:36:45,138][0m Trial 7 finished with value: 0.6861257553100586 and parameters: {'sign_size': 8.0, 'cha_input': 16.0, 'cha_hidden': 64.0, 'K': 1, 'dropout_input': 0.25, 'dropout_hidden': 0.4, 'dropout_output': 0.3, 'weight_decay': -3, 'pct_start': 0.4}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6861257553100586 - best_valid_metric: -2628.2535765825423
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 48, 'cha_hidden': 48, 'K': 1, 'dropout_input': 0.3, 'dropout_hidden': 0.25, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.001, 'pct_start': 0.30000000000000004, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 12:02:12,625][0m Trial 8 finished with value: 0.6860132217407227 and parameters: {'sign_size': 8.0, 'cha_input': 48.0, 'cha_hidden': 48.0, 'K': 1, 'dropout_input': 0.3, 'dropout_hidden': 0.25, 'dropout_output': 0.15000000000000002, 'weight_decay': -3, 'pct_start': 0.30000000000000004}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6860132217407227 - best_valid_metric: -2493.386793832761
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 8, 'cha_input': 16, 'cha_hidden': 32, 'K': 1, 'dropout_input': 0.25, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.15000000000000002, 'weight_decay': 0.0001, 'pct_start': 0.5, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 12:36:30,723][0m Trial 9 finished with value: 0.6867477893829346 and parameters: {'sign_size': 8.0, 'cha_input': 16.0, 'cha_hidden': 32.0, 'K': 1, 'dropout_input': 0.25, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.15000000000000002, 'weight_decay': -4, 'pct_start': 0.5}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6867477893829346 - best_valid_metric: -2594.373549625988
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 13:05:11,484][0m Trial 10 finished with value: 0.6853508949279785 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 2 with value: 0.6852376461029053.[0m



best_valid_loss: 0.6853508949279785 - best_valid_metric: -2804.013063634279
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 13:40:10,329][0m Trial 11 finished with value: 0.6851301789283752 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6851301789283752 - best_valid_metric: -2649.2201549267947
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 14:11:01,844][0m Trial 12 finished with value: 0.6858416795730591 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6858416795730591 - best_valid_metric: -2365.644997709886
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 14:37:02,206][0m Trial 13 finished with value: 0.685330331325531 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.685330331325531 - best_valid_metric: -2489.8650512902946
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 16, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 15:04:24,596][0m Trial 14 finished with value: 0.6857402920722961 and parameters: {'sign_size': 16.0, 'cha_input': 16.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6857402920722961 - best_valid_metric: -2563.1082384829424
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.25, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 15:39:30,709][0m Trial 15 finished with value: 0.6856235861778259 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.25, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6856235861778259 - best_valid_metric: -2638.76847242649
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 16, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.1, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 16:14:25,718][0m Trial 16 finished with value: 0.6858413219451904 and parameters: {'sign_size': 16.0, 'cha_input': 16.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.1, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6858413219451904 - best_valid_metric: -2596.2894567888125
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.1, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 16:33:10,321][0m Trial 17 finished with value: 0.6862168908119202 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.1, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6862168908119202 - best_valid_metric: -2635.9777812881375
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.3, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 16:58:56,067][0m Trial 18 finished with value: 0.6853377223014832 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.3, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6853377223014832 - best_valid_metric: -2490.869056077916
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 16, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.30000000000000004, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 17:33:42,701][0m Trial 19 finished with value: 0.6854088306427002 and parameters: {'sign_size': 16.0, 'cha_input': 16.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.30000000000000004}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6854088306427002 - best_valid_metric: -2646.3700040813023
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.25, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 18:03:01,037][0m Trial 20 finished with value: 0.6857221722602844 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.25, 'dropout_hidden': 0.2, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6857221722602844 - best_valid_metric: -2446.732944217695
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 18:32:12,276][0m Trial 21 finished with value: 0.6856446266174316 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6856446266174316 - best_valid_metric: -2684.6209251450396
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 32, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': 0.0001, 'pct_start': 0.2, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 19:01:25,451][0m Trial 22 finished with value: 0.6854614615440369 and parameters: {'sign_size': 16.0, 'cha_input': 32.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.15000000000000002, 'dropout_hidden': 0.25, 'dropout_output': 0.25, 'weight_decay': -4, 'pct_start': 0.2}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6854614615440369 - best_valid_metric: -2508.642687409782
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 48, 'cha_hidden': 64, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 19:31:38,535][0m Trial 23 finished with value: 0.6852570176124573 and parameters: {'sign_size': 16.0, 'cha_input': 48.0, 'cha_hidden': 64.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.25, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6852570176124573 - best_valid_metric: -2717.5285113774926
--------------------------------------------------------------------------------
sampled_params: {'input_dim': 131, 'output_dim': 5, 'sign_size': 16, 'cha_input': 48, 'cha_hidden': 48, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.2, 'weight_decay': 0.0001, 'pct_start': 0.1, 'max_lr': 0.01}


HBox(children=(FloatProgress(value=0.0, description='CNN1D', max=50.0, style=ProgressStyle(description_width='…

[32m[I 2021-02-20 20:01:11,643][0m Trial 24 finished with value: 0.6857308149337769 and parameters: {'sign_size': 16.0, 'cha_input': 48.0, 'cha_hidden': 48.0, 'K': 2, 'dropout_input': 0.2, 'dropout_hidden': 0.30000000000000004, 'dropout_output': 0.2, 'weight_decay': -4, 'pct_start': 0.1}. Best is trial 11 with value: 0.6851301789283752.[0m



best_valid_loss: 0.6857308149337769 - best_valid_metric: -2569.9084942630943


***