# emb_1

* validを2017/01/01~2017/09/30にする
* NNで回帰してみた

In [1]:
import optuna
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import timedelta, datetime
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
import pytorch_lightning as pl
from test_tube import Experiment
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from pytorch_lightning import Trainer
from pytorch_lightning.logging import TestTubeLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint

use_cuda = torch.cuda.is_available() and True
device = torch.device("cuda" if use_cuda else "cpu")

pd.set_option('display.max_columns', 500)
warnings.simplefilter("ignore")



### load dataset

In [2]:
DATA_DIR = '../../dataset/input/'
train_raw = pd.read_csv(DATA_DIR + 'train.csv')
test_raw = pd.read_csv(DATA_DIR + 'test.csv')

In [3]:
train_raw.head()

Unnamed: 0,holiday,temperature,rain_in_hour,snow_in_hour,clouds_cover,weather,weather_detail,timestamp,traffic_volume
0,New Years Day,263.49,0.0,0.0,58,Clouds,broken clouds,2013-01-01 00:00:00,1439
1,,263.78,0.0,0.0,40,Clouds,scattered clouds,2013-01-01 01:00:00,1502
2,,264.16,0.0,0.0,75,Snow,heavy snow,2013-01-01 02:00:00,933
3,,263.95,0.0,0.0,90,Clouds,overcast clouds,2013-01-01 03:00:00,576
4,,263.65,0.0,0.0,90,Clouds,overcast clouds,2013-01-01 04:00:00,372


In [4]:
test_raw.head()

Unnamed: 0,holiday,temperature,rain_in_hour,snow_in_hour,clouds_cover,weather,weather_detail,timestamp
0,New Years Day,249.36,0.0,0.0,1,Clear,sky is clear,2018-01-01 00:00:00
1,,249.08,0.0,0.0,1,Clear,sky is clear,2018-01-01 01:00:00
2,,248.86,0.0,0.0,1,Clear,sky is clear,2018-01-01 02:00:00
3,,248.72,0.0,0.0,1,Clear,sky is clear,2018-01-01 03:00:00
4,,248.43,0.0,0.0,1,Clear,sky is clear,2018-01-01 04:00:00


In [5]:
print("shape of train df : ", train_raw.shape)
print("shape of test df : ", test_raw.shape)

shape of train df :  (37696, 9)
shape of test df :  (7949, 8)


### preprocess

In [6]:
# remove irregular data
train_raw = train_raw[train_raw['temperature'] != 0]
train_raw = train_raw[train_raw['rain_in_hour']  != 9831.30]

In [7]:
train_raw.shape

(37685, 9)

In [8]:
n_train = len(train_raw)
tmp_df = pd.concat([train_raw, test_raw], sort=True)

In [9]:
# feature enginnering from timestamp
def preprocessor_for_date(df):
    # basic
    df['date'] = pd.to_datetime(df['timestamp'])
    df['Year'] = df['date'].apply(lambda x: x.year)
    df['Month'] = df['date'].apply(lambda x: x.month)
    df['Day'] = df['date'].apply(lambda x: x.day)
    df['Hour'] = df['date'].apply(lambda x: x.hour)
    df['DayOfWeek'] = df['date'].apply(lambda x: x.dayofweek)
    df['DayOfYear'] = df['date'].apply(lambda x: x.dayofyear)
    df['WeekOfYear'] = df['date'].apply(lambda x: x.weekofyear)
    df['WeekOfMonth'] = df['date'].apply(lambda x: x.day // 7 + 1)
    return df

tmp_df = preprocessor_for_date(tmp_df)

In [10]:
# fill correct value for holidays column
def preprocessor_for_holidays(df):
    df['date'] = pd.to_datetime(df['timestamp'])
    holidays_col = df[df['holiday'] != 'None']
    for holiday, date in zip(holidays_col['holiday'], holidays_col['date']):
        df['holiday'].mask((df['date'] > date) & (df['date'] < date +  timedelta(days=1)), holiday, inplace=True)
    return df

tmp_df = preprocessor_for_holidays(tmp_df)

In [11]:
# label encoding
def label_encoding(df, categorical_features):
    label_encoders = {}
    for cat_col in categorical_features:
        label_encoders[cat_col] = LabelEncoder()
        df[cat_col] = label_encoders[cat_col].fit_transform(df[cat_col])
    return df

categorical_features = ['holiday', 'weather', 'weather_detail', 'Month', 'Day', 'Hour', 
                        'DayOfWeek', 'DayOfYear', 'WeekOfYear', 'WeekOfMonth']
tmp_df = label_encoding(tmp_df, categorical_features)

# emb_dimsが必要
cat_dims = [int(tmp_df[col].nunique()) for col in categorical_features]
emb_dims = [(x, min(50, (x + 1) // 2)) for x in cat_dims]

In [12]:
# scaling 
def scaling_continuous_feature(df, numerical_features):
    scaler = StandardScaler()
    df[numerical_features] = scaler.fit_transform(df[numerical_features])
    return df

continuous_feature = ['temperature', 'rain_in_hour', 'snow_in_hour', 'clouds_cover']
tmp_df = scaling_continuous_feature(tmp_df, continuous_feature)

In [13]:
tmp_df.head()

Unnamed: 0,clouds_cover,holiday,rain_in_hour,snow_in_hour,temperature,timestamp,traffic_volume,weather,weather_detail,date,Year,Month,Day,Hour,DayOfWeek,DayOfYear,WeekOfYear,WeekOfMonth
0,0.243744,6,-0.133546,-0.027985,-1.412115,2013-01-01 00:00:00,1439.0,1,2,2013-01-01 00:00:00,2013,0,0,0,1,0,0,0
1,-0.217067,6,-0.133546,-0.027985,-1.389521,2013-01-01 01:00:00,1502.0,1,24,2013-01-01 01:00:00,2013,0,0,1,1,0,0,0
2,0.678953,6,-0.133546,-0.027985,-1.359916,2013-01-01 02:00:00,933.0,8,10,2013-01-01 02:00:00,2013,0,0,2,1,0,0,0
3,1.062962,6,-0.133546,-0.027985,-1.376277,2013-01-01 03:00:00,576.0,1,19,2013-01-01 03:00:00,2013,0,0,3,1,0,0,0
4,1.062962,6,-0.133546,-0.027985,-1.399649,2013-01-01 04:00:00,372.0,1,19,2013-01-01 04:00:00,2013,0,0,4,1,0,0,0


In [14]:
tmp_df.tail()

Unnamed: 0,clouds_cover,holiday,rain_in_hour,snow_in_hour,temperature,timestamp,traffic_volume,weather,weather_detail,date,Year,Month,Day,Hour,DayOfWeek,DayOfYear,WeekOfYear,WeekOfMonth
7944,0.678953,7,-0.133546,-0.027985,0.142922,2018-09-30 19:00:00,,1,2,2018-09-30 19:00:00,2018,8,29,19,6,272,38,4
7945,1.062962,7,-0.133546,-0.027985,0.089165,2018-09-30 20:00:00,,1,19,2018-09-30 20:00:00,2018,8,29,20,6,272,38,4
7946,1.062962,7,-0.133546,-0.027985,0.086828,2018-09-30 21:00:00,,10,21,2018-09-30 21:00:00,2018,8,29,21,6,272,38,4
7947,1.062962,7,-0.133546,-0.027985,0.036967,2018-09-30 22:00:00,,1,19,2018-09-30 22:00:00,2018,8,29,22,6,272,38,4
7948,1.062962,7,-0.133546,-0.027985,0.039304,2018-09-30 23:00:00,,1,19,2018-09-30 23:00:00,2018,8,29,23,6,272,38,4


In [15]:
train_tmp = tmp_df[:n_train]
valid = train_tmp[(train_tmp['date'] >= "2017-01-01") & (train_tmp['date'] < "2017-10-01")]
train = train_tmp[(train_tmp['date'] < "2017-01-01") | (train_tmp['date'] >= "2017-10-01")]
test = tmp_df[n_train:]
del test['traffic_volume']

print(train.shape, valid.shape, test.shape)

(29733, 18) (7952, 18) (7949, 17)


### Train

In [16]:
# define the custom dataset
class TrafficVolumePredictionDataset(Dataset):
    def __init__(self, data, cont_cols=None, output_col=None, unused_cols=None):
        self.num_data = data.shape[0]
        if output_col:
            raw_y = data[output_col].astype(np.float32).values.reshape(-1, 1)
            self.y = np.log(raw_y + 1)
        else:
            # for test inference
            self.y =  np.zeros((self.num_data, 1))
        self.cont_cols = cont_cols
        self.cat_cols = [col for col in data.columns 
                         if col not in self.cont_cols + [output_col] + unused_cols]
        self.cont_X = data[self.cont_cols].astype(np.float32).values
        self.cat_X = data[self.cat_cols].astype(np.int64).values
        
    def __len__(self):
        return self.num_data

    def __getitem__(self, idx):
        return [self.y[idx], self.cont_X[idx], self.cat_X[idx]]
    
# define custom loss
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self, yhat, y):
        loss = torch.sqrt(self.mse(yhat, y) + self.eps)
        return loss

# define my model
class MyModel(nn.Module):
    def __init__(self, emb_dims, num_of_cont, lin_layer_sizes):
        super(MyModel, self).__init__()
        
        # embeeding layers
        self.emb_layers = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims])

        # Linear Layers
        self.num_of_embs = sum([y for x, y in emb_dims])
        self.num_of_cont = num_of_cont
        first_lin_layer = nn.Linear(self.num_of_embs + self.num_of_cont, lin_layer_sizes[0])
        self.lin_layers = \
            nn.ModuleList([first_lin_layer] + \
            [nn.Linear(lin_layer_sizes[i], lin_layer_sizes[i + 1])
            for i in range(len(lin_layer_sizes) - 1)])
        
        # initialize weight
        for lin_layer in self.lin_layers:
            nn.init.kaiming_normal_(lin_layer.weight.data)

        # Output Layer
        self.output_layer = nn.Linear(lin_layer_sizes[-1], 1)
        nn.init.kaiming_normal_(self.output_layer.weight.data)

        # Batch Norm Layers
        self.bn_layers = nn.ModuleList([nn.BatchNorm1d(size) for size in lin_layer_sizes])
        
    def forward(self, cont_data, cat_data):
        # embedding
        x = [emb_layer(cat_data[:, i]) for i, emb_layer in enumerate(self.emb_layers)]
        x = torch.cat(x, 1)

        # concat
        x = torch.cat([x, cont_data], 1) 
        
        # linear
        for lin_layer, bn_layer in zip(self.lin_layers, self.bn_layers):
            x = F.relu(lin_layer(x))
            x = bn_layer(x)

        x = self.output_layer(x)

        return x

In [17]:
## define pytorch lightnig class

class TrafficVolumePrediction(pl.LightningModule):
    def __init__(self, emb_dims, num_of_cont, lin_layer_sizes):
        super(TrafficVolumePrediction, self).__init__()
        # embbeding layer
        self.emb_layers = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims])

        # Linear Layers
        self.num_of_embs = sum([y for x, y in emb_dims])
        self.num_of_cont = num_of_cont
        first_lin_layer = nn.Linear(self.num_of_embs + self.num_of_cont, lin_layer_sizes[0])
        self.lin_layers = \
            nn.ModuleList([first_lin_layer] + \
            [nn.Linear(lin_layer_sizes[i], lin_layer_sizes[i + 1])
            for i in range(len(lin_layer_sizes) - 1)])
        
        # initialize weight
        for lin_layer in self.lin_layers:
            nn.init.kaiming_normal_(lin_layer.weight.data)

        # Output Layer
        self.output_layer = nn.Linear(lin_layer_sizes[-1], 1)
        nn.init.kaiming_normal_(self.output_layer.weight.data)

        # Batch Norm Layers
        self.bn_layers = nn.ModuleList([nn.BatchNorm1d(size) for size in lin_layer_sizes])
        # loss function
        self.criterion = RMSELoss()
    
    def forward(self, cont_data, cat_data):
        # embedding
        x = [emb_layer(cat_data[:, i]) for i, emb_layer in enumerate(self.emb_layers)]
        x = torch.cat(x, 1)

        # concat
        x = torch.cat([x, cont_data], 1) 
        
        # linear
        for lin_layer, bn_layer in zip(self.lin_layers, self.bn_layers):
            x = F.relu(lin_layer(x))
            x = bn_layer(x)

        x = self.output_layer(x)
        return x
    
    def training_step(self, batch, batch_nb):
        # REQUIRED
        y, cont_x, cat_x = batch
        y_hat = self.forward(cont_x, cat_x)
        return {'loss': self.criterion(y_hat, y)}

    def validation_step(self, batch, batch_nb):
        # OPTIONAL
        y, cont_x, cat_x = batch
        y_hat = self.forward(cont_x, cat_x)
        return {'val_loss': self.criterion(y_hat, y)}

    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        print('val_loss ', avg_loss)
        return {'avg_val_loss': avg_loss}
    
    def test_step(self, batch, batch_nb):
        # OPTIONAL
        y, cont_x, cat_x = batch
        y_hat = self.forward(cont_x, cat_x)
        return {'val_loss': self.criterion(y_hat, y)}

    def test_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        return {'avg_val_loss': avg_loss}

    def configure_optimizers(self):
        # REQUIRED
        # can return multiple optimizers and learning_rate schedulers
        return torch.optim.Adam(self.parameters(), lr=0.001)

    @pl.data_loader
    def tng_dataloader(self):
        # REQUIRED
        dataset = TrafficVolumePredictionDataset(
            data=train,
            cont_cols=['temperature', 'rain_in_hour', 'clouds_cover'],
            output_col='traffic_volume',
            unused_cols=['timestamp', 'Year', 'snow_in_hour', 'date']
        )
        return DataLoader(dataset, batch_size=256, shuffle=True)
    
    @pl.data_loader
    def val_dataloader(self):
        # OPTIONAL
        dataset = TrafficVolumePredictionDataset(
            data=valid,
            cont_cols=['temperature', 'rain_in_hour', 'clouds_cover'],
            output_col='traffic_volume',
            unused_cols=['timestamp', 'Year', 'snow_in_hour', 'date']
        )
        return DataLoader(dataset, batch_size=256, shuffle=True)

In [18]:
# define variables
emb_dims = emb_dims
cont_col = ['temperature', 'rain_in_hour', 'clouds_cover']
num_of_cont = len(cont_col)
lin_layer_sizes = [128, 64, 32]

In [19]:
# train
# pytorch lightningは、validデータを与えるとデフォルトでearly stoppingする

# custom checkpoint and early stop
checkpoint_callback = ModelCheckpoint(
    filepath='result/best_model/',
    save_best_only=True,
    verbose=True,
    monitor='avg_val_loss',
    mode='min'
)

model = TrafficVolumePrediction(emb_dims, num_of_cont, lin_layer_sizes)
trainer = Trainer(default_save_path='result', max_nb_epochs=100, 
                  checkpoint_callback=checkpoint_callback)
trainer.fit(model)

Validation sanity check:   0%|          | 0/5 [00:00<?, ?batch/s]

val_loss  tensor(0.3091)


Epoch 12:  79%|███████▊  | 117/149 [00:02<00:00, 37.65batch/s, batch_nb=116, loss=0.234, v_nb=2]
Epoch 12:  87%|████████▋ | 130/149 [00:03<00:00, 48.22batch/s, batch_nb=116, loss=0.234, v_nb=2]
Epoch 12:  98%|█████████▊| 146/149 [00:03<00:00, 60.72batch/s, batch_nb=116, loss=0.234, v_nb=2]

val_loss  tensor(0.2912)


Epoch 12: 100%|██████████| 149/149 [00:03<00:00, 60.72batch/s, batch_nb=116, loss=0.234, v_nb=2]
Epoch 13:  79%|███████▊  | 117/149 [00:02<00:00, 44.56batch/s, batch_nb=116, loss=0.226, v_nb=2]
Epoch 13:  85%|████████▌ | 127/149 [00:02<00:00, 54.71batch/s, batch_nb=116, loss=0.226, v_nb=2]
Epoch 13:  96%|█████████▌| 143/149 [00:02<00:00, 67.91batch/s, batch_nb=116, loss=0.226, v_nb=2]

val_loss  tensor(0.2978)


Epoch 13: 100%|██████████| 149/149 [00:02<00:00, 67.91batch/s, batch_nb=116, loss=0.226, v_nb=2]
Epoch 14:  79%|███████▊  | 117/149 [00:02<00:00, 36.56batch/s, batch_nb=116, loss=0.216, v_nb=2]
Epoch 14:  80%|███████▉  | 119/149 [00:02<00:00, 35.90batch/s, batch_nb=116, loss=0.216, v_nb=2]
Epoch 14:  86%|████████▌ | 128/149 [00:03<00:00, 43.48batch/s, batch_nb=116, loss=0.216, v_nb=2]
Epoch 14:  96%|█████████▌| 143/149 [00:03<00:00, 55.02batch/s, batch_nb=116, loss=0.216, v_nb=2]

val_loss  tensor(0.2788)


Epoch 14: 100%|██████████| 149/149 [00:03<00:00, 55.02batch/s, batch_nb=116, loss=0.216, v_nb=2]
Epoch 15:  79%|███████▊  | 117/149 [00:02<00:00, 40.55batch/s, batch_nb=116, loss=0.210, v_nb=2]
Epoch 15:  79%|███████▉  | 118/149 [00:02<00:00, 43.29batch/s, batch_nb=116, loss=0.210, v_nb=2]
Epoch 15:  89%|████████▊ | 132/149 [00:02<00:00, 54.48batch/s, batch_nb=116, loss=0.210, v_nb=2]
Epoch 15:  98%|█████████▊| 146/149 [00:03<00:00, 66.33batch/s, batch_nb=116, loss=0.210, v_nb=2]

val_loss  tensor(0.2980)


Epoch 15: 100%|██████████| 149/149 [00:03<00:00, 66.33batch/s, batch_nb=116, loss=0.210, v_nb=2]
Epoch 16:  79%|███████▊  | 117/149 [00:03<00:00, 42.69batch/s, batch_nb=116, loss=0.212, v_nb=2]
Epoch 16:  79%|███████▉  | 118/149 [00:03<00:00, 44.47batch/s, batch_nb=116, loss=0.212, v_nb=2]
Epoch 16:  90%|████████▉ | 134/149 [00:03<00:00, 56.27batch/s, batch_nb=116, loss=0.212, v_nb=2]
Epoch 16:  99%|█████████▉| 148/149 [00:03<00:00, 67.50batch/s, batch_nb=116, loss=0.212, v_nb=2]

val_loss  tensor(0.2843)


Epoch 16: 100%|██████████| 149/149 [00:03<00:00, 67.50batch/s, batch_nb=116, loss=0.212, v_nb=2]
Epoch 17:  79%|███████▊  | 117/149 [00:03<00:01, 30.93batch/s, batch_nb=116, loss=0.207, v_nb=2]
Epoch 17:  85%|████████▍ | 126/149 [00:03<00:00, 38.42batch/s, batch_nb=116, loss=0.207, v_nb=2]
Epoch 17:  91%|█████████▏| 136/149 [00:03<00:00, 47.01batch/s, batch_nb=116, loss=0.207, v_nb=2]
Epoch 17:  98%|█████████▊| 146/149 [00:04<00:00, 55.58batch/s, batch_nb=116, loss=0.207, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 94.40batch/s] 

val_loss  tensor(0.2775)


Epoch 17: 100%|██████████| 149/149 [00:04<00:00, 55.58batch/s, batch_nb=116, loss=0.207, v_nb=2]
Epoch 18:  79%|███████▊  | 117/149 [00:03<00:00, 41.61batch/s, batch_nb=116, loss=0.204, v_nb=2]
Epoch 18:  85%|████████▍ | 126/149 [00:03<00:00, 50.83batch/s, batch_nb=116, loss=0.204, v_nb=2]
Epoch 18:  93%|█████████▎| 139/149 [00:03<00:00, 62.11batch/s, batch_nb=116, loss=0.204, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 135.28batch/s]

val_loss  tensor(0.2912)


Epoch 18: 100%|██████████| 149/149 [00:03<00:00, 62.11batch/s, batch_nb=116, loss=0.204, v_nb=2]
Epoch 19:  79%|███████▊  | 117/149 [00:02<00:00, 40.19batch/s, batch_nb=116, loss=0.202, v_nb=2]
Epoch 19:  87%|████████▋ | 130/149 [00:03<00:00, 50.41batch/s, batch_nb=116, loss=0.202, v_nb=2]
Validating:  44%|████▍     | 14/32 [00:00<00:00, 136.85batch/s]
Epoch 19:  92%|█████████▏| 137/149 [00:03<00:00, 53.47batch/s, batch_nb=116, loss=0.202, v_nb=2]

val_loss  tensor(0.2678)


Epoch 19: 100%|██████████| 149/149 [00:03<00:00, 53.47batch/s, batch_nb=116, loss=0.202, v_nb=2]
Epoch 20:  79%|███████▊  | 117/149 [00:03<00:00, 43.46batch/s, batch_nb=116, loss=0.192, v_nb=2]
Epoch 20:  81%|████████  | 121/149 [00:03<00:00, 49.78batch/s, batch_nb=116, loss=0.192, v_nb=2]
Epoch 20:  91%|█████████▏| 136/149 [00:03<00:00, 61.98batch/s, batch_nb=116, loss=0.192, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 148.52batch/s]

val_loss  tensor(0.2647)


Epoch 20: 100%|██████████| 149/149 [00:03<00:00, 61.98batch/s, batch_nb=116, loss=0.192, v_nb=2]
Epoch 21:  79%|███████▊  | 117/149 [00:02<00:00, 38.59batch/s, batch_nb=116, loss=0.197, v_nb=2]
Epoch 21:  87%|████████▋ | 129/149 [00:02<00:00, 48.10batch/s, batch_nb=116, loss=0.197, v_nb=2]
Epoch 21:  95%|█████████▌| 142/149 [00:02<00:00, 59.28batch/s, batch_nb=116, loss=0.197, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 125.09batch/s]

val_loss  tensor(0.2711)


Epoch 21: 100%|██████████| 149/149 [00:02<00:00, 59.28batch/s, batch_nb=116, loss=0.197, v_nb=2]
Epoch 22:  79%|███████▊  | 117/149 [00:02<00:00, 44.70batch/s, batch_nb=116, loss=0.199, v_nb=2]
Epoch 22:  79%|███████▉  | 118/149 [00:02<00:00, 47.43batch/s, batch_nb=116, loss=0.199, v_nb=2]
Epoch 22:  89%|████████▉ | 133/149 [00:03<00:00, 59.61batch/s, batch_nb=116, loss=0.199, v_nb=2]
Epoch 22:  99%|█████████▉| 148/149 [00:03<00:00, 72.72batch/s, batch_nb=116, loss=0.199, v_nb=2]

val_loss  tensor(0.2593)


Epoch 22: 100%|██████████| 149/149 [00:03<00:00, 72.72batch/s, batch_nb=116, loss=0.199, v_nb=2]
Epoch 23:  79%|███████▊  | 117/149 [00:02<00:00, 38.53batch/s, batch_nb=116, loss=0.189, v_nb=2]
Epoch 23:  83%|████████▎ | 123/149 [00:02<00:00, 44.66batch/s, batch_nb=116, loss=0.189, v_nb=2]
Epoch 23:  91%|█████████ | 135/149 [00:03<00:00, 54.76batch/s, batch_nb=116, loss=0.189, v_nb=2]
Epoch 23:  97%|█████████▋| 145/149 [00:03<00:00, 63.32batch/s, batch_nb=116, loss=0.189, v_nb=2]

val_loss  tensor(0.2749)


Epoch 23: 100%|██████████| 149/149 [00:03<00:00, 63.32batch/s, batch_nb=116, loss=0.189, v_nb=2]
Epoch 24:  79%|███████▊  | 117/149 [00:03<00:00, 38.77batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 24:  82%|████████▏ | 122/149 [00:03<00:00, 45.53batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 24:  91%|█████████▏| 136/149 [00:03<00:00, 57.01batch/s, batch_nb=116, loss=0.185, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 137.04batch/s]

val_loss  tensor(0.2500)


Epoch 24: 100%|██████████| 149/149 [00:03<00:00, 57.01batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 25:  79%|███████▊  | 117/149 [00:03<00:00, 38.41batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 25:  86%|████████▌ | 128/149 [00:03<00:00, 47.58batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 25:  95%|█████████▌| 142/149 [00:03<00:00, 59.16batch/s, batch_nb=116, loss=0.185, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 123.09batch/s]

val_loss  tensor(0.2535)


Epoch 25: 100%|██████████| 149/149 [00:03<00:00, 59.16batch/s, batch_nb=116, loss=0.185, v_nb=2]
Epoch 26:  79%|███████▊  | 117/149 [00:02<00:00, 39.97batch/s, batch_nb=116, loss=0.186, v_nb=2]
Epoch 26:  83%|████████▎ | 124/149 [00:02<00:00, 47.70batch/s, batch_nb=116, loss=0.186, v_nb=2]
Epoch 26:  92%|█████████▏| 137/149 [00:03<00:00, 58.00batch/s, batch_nb=116, loss=0.186, v_nb=2]
Validating:  78%|███████▊  | 25/32 [00:00<00:00, 123.24batch/s]

val_loss  tensor(0.2565)


Epoch 26: 100%|██████████| 149/149 [00:03<00:00, 58.00batch/s, batch_nb=116, loss=0.186, v_nb=2]
Epoch 27:  79%|███████▊  | 117/149 [00:03<00:00, 41.25batch/s, batch_nb=116, loss=0.190, v_nb=2]
Epoch 27:  87%|████████▋ | 129/149 [00:03<00:00, 51.30batch/s, batch_nb=116, loss=0.190, v_nb=2]
Epoch 27:  96%|█████████▌| 143/149 [00:03<00:00, 63.17batch/s, batch_nb=116, loss=0.190, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 134.72batch/s]

val_loss  tensor(0.2684)


Epoch 27: 100%|██████████| 149/149 [00:03<00:00, 63.17batch/s, batch_nb=116, loss=0.190, v_nb=2]
Epoch 28:  79%|███████▊  | 117/149 [00:03<00:00, 37.62batch/s, batch_nb=116, loss=0.181, v_nb=2]
Epoch 28:  86%|████████▌ | 128/149 [00:03<00:00, 47.27batch/s, batch_nb=116, loss=0.181, v_nb=2]
Epoch 28:  95%|█████████▌| 142/149 [00:03<00:00, 58.67batch/s, batch_nb=116, loss=0.181, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 137.92batch/s]

val_loss  tensor(0.2491)


Epoch 28: 100%|██████████| 149/149 [00:03<00:00, 58.67batch/s, batch_nb=116, loss=0.181, v_nb=2]
Epoch 29:  79%|███████▊  | 117/149 [00:03<00:00, 41.60batch/s, batch_nb=116, loss=0.174, v_nb=2]
Epoch 29:  87%|████████▋ | 130/149 [00:03<00:00, 52.19batch/s, batch_nb=116, loss=0.174, v_nb=2]
Epoch 29:  97%|█████████▋| 144/149 [00:03<00:00, 64.17batch/s, batch_nb=116, loss=0.174, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 139.85batch/s]

val_loss  tensor(0.2634)


Epoch 29: 100%|██████████| 149/149 [00:03<00:00, 64.17batch/s, batch_nb=116, loss=0.174, v_nb=2]
Epoch 30:  79%|███████▊  | 117/149 [00:02<00:00, 41.18batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 30:  86%|████████▌ | 128/149 [00:02<00:00, 51.09batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 30:  95%|█████████▍| 141/149 [00:02<00:00, 62.45batch/s, batch_nb=116, loss=0.168, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 134.18batch/s]

val_loss  tensor(0.2445)


Epoch 30: 100%|██████████| 149/149 [00:03<00:00, 62.45batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 31:  79%|███████▊  | 117/149 [00:02<00:00, 45.03batch/s, batch_nb=116, loss=0.176, v_nb=2]
Epoch 31:  88%|████████▊ | 131/149 [00:02<00:00, 56.35batch/s, batch_nb=116, loss=0.176, v_nb=2]
Epoch 31:  98%|█████████▊| 146/149 [00:02<00:00, 68.90batch/s, batch_nb=116, loss=0.176, v_nb=2]
Validating:  94%|█████████▍| 30/32 [00:00<00:00, 147.90batch/s]

val_loss  tensor(0.2453)


Epoch 31: 100%|██████████| 149/149 [00:02<00:00, 68.90batch/s, batch_nb=116, loss=0.176, v_nb=2]
Epoch 32:  79%|███████▊  | 117/149 [00:02<00:00, 45.44batch/s, batch_nb=116, loss=0.164, v_nb=2]
Epoch 32:  85%|████████▍ | 126/149 [00:02<00:00, 54.82batch/s, batch_nb=116, loss=0.164, v_nb=2]
Epoch 32:  95%|█████████▍| 141/149 [00:02<00:00, 67.14batch/s, batch_nb=116, loss=0.164, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 144.55batch/s]

val_loss  tensor(0.2510)


Epoch 32: 100%|██████████| 149/149 [00:02<00:00, 67.14batch/s, batch_nb=116, loss=0.164, v_nb=2]
Epoch 33:  79%|███████▊  | 117/149 [00:02<00:00, 45.54batch/s, batch_nb=116, loss=0.171, v_nb=2]
Epoch 33:  79%|███████▉  | 118/149 [00:02<00:00, 48.37batch/s, batch_nb=116, loss=0.171, v_nb=2]
Epoch 33:  90%|████████▉ | 134/149 [00:02<00:00, 57.08batch/s, batch_nb=116, loss=0.171, v_nb=2]
Validating:  72%|███████▏  | 23/32 [00:00<00:00, 110.90batch/s]

val_loss  tensor(0.2344)


Epoch 33: 100%|██████████| 149/149 [00:02<00:00, 57.08batch/s, batch_nb=116, loss=0.171, v_nb=2]
Epoch 34:  79%|███████▊  | 117/149 [00:02<00:00, 45.34batch/s, batch_nb=116, loss=0.172, v_nb=2]
Epoch 34:  85%|████████▍ | 126/149 [00:02<00:00, 54.75batch/s, batch_nb=116, loss=0.172, v_nb=2]
Epoch 34:  95%|█████████▍| 141/149 [00:02<00:00, 67.14batch/s, batch_nb=116, loss=0.172, v_nb=2]
Validating:  94%|█████████▍| 30/32 [00:00<00:00, 145.98batch/s]

val_loss  tensor(0.2495)


Epoch 34: 100%|██████████| 149/149 [00:02<00:00, 67.14batch/s, batch_nb=116, loss=0.172, v_nb=2]
Epoch 35:  79%|███████▊  | 117/149 [00:02<00:00, 45.04batch/s, batch_nb=116, loss=0.167, v_nb=2]
Epoch 35:  79%|███████▉  | 118/149 [00:02<00:00, 47.58batch/s, batch_nb=116, loss=0.167, v_nb=2]
Epoch 35:  89%|████████▉ | 133/149 [00:02<00:00, 59.59batch/s, batch_nb=116, loss=0.167, v_nb=2]
Epoch 35:  99%|█████████▉| 148/149 [00:02<00:00, 72.37batch/s, batch_nb=116, loss=0.167, v_nb=2]

val_loss  tensor(0.2439)


Epoch 35: 100%|██████████| 149/149 [00:02<00:00, 72.37batch/s, batch_nb=116, loss=0.167, v_nb=2]
Epoch 36:  79%|███████▊  | 117/149 [00:02<00:00, 44.30batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 36:  87%|████████▋ | 130/149 [00:02<00:00, 55.46batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 36:  97%|█████████▋| 144/149 [00:02<00:00, 67.19batch/s, batch_nb=116, loss=0.168, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 140.39batch/s]

val_loss  tensor(0.2397)


Epoch 36: 100%|██████████| 149/149 [00:02<00:00, 67.19batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 37:  79%|███████▊  | 117/149 [00:02<00:00, 46.39batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 37:  88%|████████▊ | 131/149 [00:02<00:00, 57.92batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 37:  97%|█████████▋| 144/149 [00:02<00:00, 69.46batch/s, batch_nb=116, loss=0.168, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 139.70batch/s]

val_loss  tensor(0.2572)


Epoch 37: 100%|██████████| 149/149 [00:02<00:00, 69.46batch/s, batch_nb=116, loss=0.168, v_nb=2]
Epoch 38:  79%|███████▊  | 117/149 [00:02<00:00, 43.95batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 38:  85%|████████▍ | 126/149 [00:02<00:00, 53.45batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 38:  93%|█████████▎| 139/149 [00:02<00:00, 64.59batch/s, batch_nb=116, loss=0.166, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 130.12batch/s]

val_loss  tensor(0.2594)


Epoch 38: 100%|██████████| 149/149 [00:02<00:00, 64.59batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 39:  79%|███████▊  | 117/149 [00:03<00:00, 35.71batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 39:  82%|████████▏ | 122/149 [00:03<00:00, 42.51batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 39:  92%|█████████▏| 137/149 [00:03<00:00, 53.75batch/s, batch_nb=116, loss=0.165, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 135.82batch/s]

val_loss  tensor(0.2498)


Epoch 39: 100%|██████████| 149/149 [00:03<00:00, 53.75batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 40:  79%|███████▊  | 117/149 [00:03<00:00, 33.42batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 40:  83%|████████▎ | 124/149 [00:03<00:00, 40.95batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 40:  91%|█████████ | 135/149 [00:03<00:00, 49.81batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 40:  97%|█████████▋| 145/149 [00:03<00:00, 57.94batch/s, batch_nb=116, loss=0.163, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 100.26batch/s]

val_loss  tensor(0.2381)


Epoch 40: 100%|██████████| 149/149 [00:03<00:00, 57.94batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 41:  79%|███████▊  | 117/149 [00:03<00:00, 38.88batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 41:  84%|████████▍ | 125/149 [00:03<00:00, 47.21batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 41:  93%|█████████▎| 139/149 [00:03<00:00, 58.81batch/s, batch_nb=116, loss=0.163, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 130.59batch/s]

val_loss  tensor(0.2392)


Epoch 41: 100%|██████████| 149/149 [00:03<00:00, 58.81batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 42:  79%|███████▊  | 117/149 [00:03<00:00, 33.85batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 42:  82%|████████▏ | 122/149 [00:03<00:00, 39.88batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 42:  89%|████████▉ | 133/149 [00:03<00:00, 49.04batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 42:  98%|█████████▊| 146/149 [00:03<00:00, 60.18batch/s, batch_nb=116, loss=0.166, v_nb=2]

val_loss  tensor(0.2372)


Epoch 42: 100%|██████████| 149/149 [00:03<00:00, 60.18batch/s, batch_nb=116, loss=0.166, v_nb=2]
Epoch 43:  79%|███████▊  | 117/149 [00:03<00:00, 36.00batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 43:  79%|███████▉  | 118/149 [00:03<00:00, 38.72batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 43:  89%|████████▊ | 132/149 [00:03<00:00, 49.31batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 43:  97%|█████████▋| 145/149 [00:03<00:00, 60.16batch/s, batch_nb=116, loss=0.157, v_nb=2]

val_loss  tensor(0.2462)


Epoch 43: 100%|██████████| 149/149 [00:03<00:00, 60.16batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 44:  79%|███████▊  | 117/149 [00:03<00:00, 38.64batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 44:  80%|███████▉  | 119/149 [00:03<00:00, 43.19batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 44:  89%|████████▉ | 133/149 [00:03<00:00, 53.24batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 44:  99%|█████████▊| 147/149 [00:03<00:00, 65.04batch/s, batch_nb=116, loss=0.157, v_nb=2]

val_loss  tensor(0.2395)


Epoch 44: 100%|██████████| 149/149 [00:03<00:00, 65.04batch/s, batch_nb=116, loss=0.157, v_nb=2]
Epoch 45:  79%|███████▊  | 117/149 [00:03<00:00, 33.02batch/s, batch_nb=116, loss=0.162, v_nb=2]
Epoch 45:  79%|███████▉  | 118/149 [00:03<00:00, 35.84batch/s, batch_nb=116, loss=0.162, v_nb=2]
Epoch 45:  87%|████████▋ | 129/149 [00:03<00:00, 44.88batch/s, batch_nb=116, loss=0.162, v_nb=2]
Epoch 45:  94%|█████████▍| 140/149 [00:03<00:00, 54.32batch/s, batch_nb=116, loss=0.162, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 101.25batch/s]

val_loss  tensor(0.2448)


Epoch 45: 100%|██████████| 149/149 [00:03<00:00, 54.32batch/s, batch_nb=116, loss=0.162, v_nb=2]
Epoch 46:  79%|███████▊  | 117/149 [00:03<00:01, 30.74batch/s, batch_nb=116, loss=0.155, v_nb=2]
Epoch 46:  79%|███████▉  | 118/149 [00:03<00:00, 34.63batch/s, batch_nb=116, loss=0.155, v_nb=2]
Epoch 46:  89%|████████▊ | 132/149 [00:03<00:00, 44.72batch/s, batch_nb=116, loss=0.155, v_nb=2]
Epoch 46:  99%|█████████▊| 147/149 [00:03<00:00, 56.49batch/s, batch_nb=116, loss=0.155, v_nb=2]

val_loss  tensor(0.2413)


Epoch 46: 100%|██████████| 149/149 [00:03<00:00, 56.49batch/s, batch_nb=116, loss=0.155, v_nb=2]
Epoch 47:  79%|███████▊  | 117/149 [00:02<00:00, 40.03batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 47:  83%|████████▎ | 123/149 [00:03<00:00, 47.90batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 47:  88%|████████▊ | 131/149 [00:03<00:00, 53.80batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 47:  97%|█████████▋| 145/149 [00:03<00:00, 65.53batch/s, batch_nb=116, loss=0.163, v_nb=2]

val_loss  tensor(0.2400)


Epoch 47: 100%|██████████| 149/149 [00:03<00:00, 65.53batch/s, batch_nb=116, loss=0.163, v_nb=2]
Epoch 48:  79%|███████▊  | 117/149 [00:02<00:00, 42.19batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 48:  85%|████████▍ | 126/149 [00:02<00:00, 51.37batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 48:  94%|█████████▍| 140/149 [00:03<00:00, 63.29batch/s, batch_nb=116, loss=0.153, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 141.25batch/s]

val_loss  tensor(0.2413)


Epoch 48: 100%|██████████| 149/149 [00:03<00:00, 63.29batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 49:  79%|███████▊  | 117/149 [00:03<00:01, 31.43batch/s, batch_nb=116, loss=0.156, v_nb=2]
Epoch 49:  79%|███████▉  | 118/149 [00:03<00:00, 34.30batch/s, batch_nb=116, loss=0.156, v_nb=2]
Epoch 49:  86%|████████▌ | 128/149 [00:03<00:00, 42.57batch/s, batch_nb=116, loss=0.156, v_nb=2]
Epoch 49:  93%|█████████▎| 138/149 [00:03<00:00, 51.14batch/s, batch_nb=116, loss=0.156, v_nb=2]
Epoch 49:  99%|█████████▉| 148/149 [00:03<00:00, 59.17batch/s, batch_nb=116, loss=0.156, v_nb=2]

val_loss  tensor(0.2405)


Epoch 49: 100%|██████████| 149/149 [00:03<00:00, 59.17batch/s, batch_nb=116, loss=0.156, v_nb=2]
Epoch 50:  79%|███████▊  | 117/149 [00:03<00:00, 40.94batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 50:  79%|███████▉  | 118/149 [00:03<00:00, 43.85batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 50:  89%|████████▊ | 132/149 [00:03<00:00, 55.07batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 50:  99%|█████████▊| 147/149 [00:03<00:00, 67.62batch/s, batch_nb=116, loss=0.165, v_nb=2]

val_loss  tensor(0.2341)


Epoch 50: 100%|██████████| 149/149 [00:03<00:00, 67.62batch/s, batch_nb=116, loss=0.165, v_nb=2]
Epoch 51:  79%|███████▊  | 117/149 [00:02<00:00, 44.16batch/s, batch_nb=116, loss=0.154, v_nb=2]
Epoch 51:  85%|████████▍ | 126/149 [00:02<00:00, 53.52batch/s, batch_nb=116, loss=0.154, v_nb=2]
Epoch 51:  95%|█████████▍| 141/149 [00:02<00:00, 66.12batch/s, batch_nb=116, loss=0.154, v_nb=2]
Validating:  94%|█████████▍| 30/32 [00:00<00:00, 146.22batch/s]

val_loss  tensor(0.2410)


Epoch 51: 100%|██████████| 149/149 [00:02<00:00, 66.12batch/s, batch_nb=116, loss=0.154, v_nb=2]
Epoch 52:  79%|███████▊  | 117/149 [00:02<00:00, 42.20batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 52:  80%|███████▉  | 119/149 [00:02<00:00, 46.09batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 52:  90%|████████▉ | 134/149 [00:02<00:00, 57.75batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 52:  99%|█████████▉| 148/149 [00:02<00:00, 69.83batch/s, batch_nb=116, loss=0.148, v_nb=2]

val_loss  tensor(0.2332)


Epoch 52: 100%|██████████| 149/149 [00:02<00:00, 69.83batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 53:  79%|███████▊  | 117/149 [00:02<00:00, 34.16batch/s, batch_nb=116, loss=0.150, v_nb=2]
Epoch 53:  85%|████████▍ | 126/149 [00:03<00:00, 41.46batch/s, batch_nb=116, loss=0.150, v_nb=2]
Epoch 53:  91%|█████████▏| 136/149 [00:03<00:00, 49.70batch/s, batch_nb=116, loss=0.150, v_nb=2]
Epoch 53:  97%|█████████▋| 145/149 [00:03<00:00, 56.47batch/s, batch_nb=116, loss=0.150, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 88.27batch/s]

val_loss  tensor(0.2416)


Epoch 53: 100%|██████████| 149/149 [00:03<00:00, 56.47batch/s, batch_nb=116, loss=0.150, v_nb=2]
Epoch 54:  79%|███████▊  | 117/149 [00:03<00:00, 32.67batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 54:  85%|████████▍ | 126/149 [00:03<00:00, 40.33batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 54:  91%|█████████▏| 136/149 [00:03<00:00, 48.67batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 54:  99%|█████████▊| 147/149 [00:03<00:00, 57.89batch/s, batch_nb=116, loss=0.153, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 101.67batch/s]

val_loss  tensor(0.2367)


Epoch 54: 100%|██████████| 149/149 [00:03<00:00, 57.89batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 55:  79%|███████▊  | 117/149 [00:03<00:00, 37.76batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 55:  86%|████████▌ | 128/149 [00:03<00:00, 47.22batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 55:  95%|█████████▌| 142/149 [00:03<00:00, 58.52batch/s, batch_nb=116, loss=0.153, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 134.13batch/s]

val_loss  tensor(0.2443)


Epoch 55: 100%|██████████| 149/149 [00:03<00:00, 58.52batch/s, batch_nb=116, loss=0.153, v_nb=2]
Epoch 56:  79%|███████▊  | 117/149 [00:03<00:00, 37.92batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 56:  87%|████████▋ | 129/149 [00:03<00:00, 47.91batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 56:  94%|█████████▍| 140/149 [00:03<00:00, 57.34batch/s, batch_nb=116, loss=0.151, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 127.42batch/s]

val_loss  tensor(0.2360)


Epoch 56: 100%|██████████| 149/149 [00:03<00:00, 57.34batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 57:  79%|███████▊  | 117/149 [00:03<00:00, 34.58batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 57:  79%|███████▉  | 118/149 [00:03<00:00, 35.54batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 57:  89%|████████▊ | 132/149 [00:03<00:00, 45.50batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 57:  96%|█████████▌| 143/149 [00:03<00:00, 55.15batch/s, batch_nb=116, loss=0.148, v_nb=2]

val_loss  tensor(0.2406)


Epoch 57: 100%|██████████| 149/149 [00:03<00:00, 55.15batch/s, batch_nb=116, loss=0.148, v_nb=2]
Epoch 58:  79%|███████▊  | 117/149 [00:03<00:00, 38.76batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 58:  86%|████████▌ | 128/149 [00:03<00:00, 48.62batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 58:  95%|█████████▌| 142/149 [00:03<00:00, 60.34batch/s, batch_nb=116, loss=0.151, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 133.34batch/s]

val_loss  tensor(0.2343)


Epoch 58: 100%|██████████| 149/149 [00:03<00:00, 60.34batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 59:  79%|███████▊  | 117/149 [00:03<00:00, 38.67batch/s, batch_nb=116, loss=0.141, v_nb=2]
Epoch 59:  81%|████████  | 121/149 [00:03<00:00, 44.38batch/s, batch_nb=116, loss=0.141, v_nb=2]
Epoch 59:  91%|█████████ | 135/149 [00:03<00:00, 55.76batch/s, batch_nb=116, loss=0.141, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 132.63batch/s]

val_loss  tensor(0.2354)


Epoch 59: 100%|██████████| 149/149 [00:03<00:00, 55.76batch/s, batch_nb=116, loss=0.141, v_nb=2]
Epoch 60:  79%|███████▊  | 117/149 [00:03<00:00, 35.88batch/s, batch_nb=116, loss=0.146, v_nb=2]
Epoch 60:  79%|███████▉  | 118/149 [00:03<00:00, 37.76batch/s, batch_nb=116, loss=0.146, v_nb=2]
Epoch 60:  86%|████████▌ | 128/149 [00:03<00:00, 46.32batch/s, batch_nb=116, loss=0.146, v_nb=2]
Epoch 60:  93%|█████████▎| 138/149 [00:03<00:00, 54.25batch/s, batch_nb=116, loss=0.146, v_nb=2]
Epoch 60:  99%|█████████▉| 148/149 [00:03<00:00, 62.43batch/s, batch_nb=116, loss=0.146, v_nb=2]

val_loss  tensor(0.2301)


Epoch 60: 100%|██████████| 149/149 [00:03<00:00, 62.43batch/s, batch_nb=116, loss=0.146, v_nb=2]
Epoch 61:  79%|███████▊  | 117/149 [00:03<00:00, 36.17batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 61:  81%|████████  | 121/149 [00:03<00:00, 42.04batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 61:  90%|████████▉ | 134/149 [00:03<00:00, 52.38batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 61:  95%|█████████▍| 141/149 [00:03<00:00, 54.44batch/s, batch_nb=116, loss=0.136, v_nb=2]
Validating: 100%|██████████| 32/32 [00:00<00:00, 100.34batch/s]

val_loss  tensor(0.2295)


Epoch 61: 100%|██████████| 149/149 [00:03<00:00, 54.44batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 62:  79%|███████▊  | 117/149 [00:02<00:00, 39.70batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 62:  82%|████████▏ | 122/149 [00:02<00:00, 46.00batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 62:  91%|█████████▏| 136/149 [00:03<00:00, 57.52batch/s, batch_nb=116, loss=0.151, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 132.10batch/s]

val_loss  tensor(0.2313)


Epoch 62: 100%|██████████| 149/149 [00:03<00:00, 57.52batch/s, batch_nb=116, loss=0.151, v_nb=2]
Epoch 63:  79%|███████▊  | 117/149 [00:02<00:00, 39.94batch/s, batch_nb=116, loss=0.145, v_nb=2]
Epoch 63:  80%|███████▉  | 119/149 [00:02<00:00, 44.15batch/s, batch_nb=116, loss=0.145, v_nb=2]
Epoch 63:  89%|████████▊ | 132/149 [00:03<00:00, 54.92batch/s, batch_nb=116, loss=0.145, v_nb=2]
Epoch 63:  97%|█████████▋| 144/149 [00:03<00:00, 65.28batch/s, batch_nb=116, loss=0.145, v_nb=2]

val_loss  tensor(0.2440)


Epoch 63: 100%|██████████| 149/149 [00:03<00:00, 65.28batch/s, batch_nb=116, loss=0.145, v_nb=2]
Epoch 64:  79%|███████▊  | 117/149 [00:02<00:00, 38.29batch/s, batch_nb=116, loss=0.140, v_nb=2]
Epoch 64:  80%|███████▉  | 119/149 [00:02<00:00, 42.53batch/s, batch_nb=116, loss=0.140, v_nb=2]
Epoch 64:  89%|████████▉ | 133/149 [00:03<00:00, 53.45batch/s, batch_nb=116, loss=0.140, v_nb=2]
Epoch 64:  98%|█████████▊| 146/149 [00:03<00:00, 64.37batch/s, batch_nb=116, loss=0.140, v_nb=2]

val_loss  tensor(0.2353)


Epoch 64: 100%|██████████| 149/149 [00:03<00:00, 64.37batch/s, batch_nb=116, loss=0.140, v_nb=2]
Epoch 65:  79%|███████▊  | 117/149 [00:02<00:00, 43.71batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 65:  79%|███████▉  | 118/149 [00:02<00:00, 46.68batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 65:  89%|████████▉ | 133/149 [00:02<00:00, 58.60batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 65:  98%|█████████▊| 146/149 [00:02<00:00, 69.99batch/s, batch_nb=116, loss=0.139, v_nb=2]

val_loss  tensor(0.2286)


Epoch 65: 100%|██████████| 149/149 [00:02<00:00, 69.99batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 66:  79%|███████▊  | 117/149 [00:03<00:00, 35.99batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 66:  81%|████████  | 120/149 [00:03<00:00, 40.88batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 66:  89%|████████▉ | 133/149 [00:03<00:00, 51.39batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 66:  98%|█████████▊| 146/149 [00:03<00:00, 62.52batch/s, batch_nb=116, loss=0.137, v_nb=2]

val_loss  tensor(0.2351)


Epoch 66: 100%|██████████| 149/149 [00:03<00:00, 62.52batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 67:  79%|███████▊  | 117/149 [00:03<00:00, 39.99batch/s, batch_nb=116, loss=0.143, v_nb=2]
Epoch 67:  81%|████████  | 121/149 [00:03<00:00, 45.42batch/s, batch_nb=116, loss=0.143, v_nb=2]
Epoch 67:  91%|█████████ | 135/149 [00:03<00:00, 56.76batch/s, batch_nb=116, loss=0.143, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 133.83batch/s]

val_loss  tensor(0.2236)


Epoch 67: 100%|██████████| 149/149 [00:03<00:00, 56.76batch/s, batch_nb=116, loss=0.143, v_nb=2]
Epoch 68:  79%|███████▊  | 117/149 [00:03<00:00, 33.45batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 68:  83%|████████▎ | 123/149 [00:03<00:00, 40.25batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 68:  90%|████████▉ | 134/149 [00:03<00:00, 49.62batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 68:  97%|█████████▋| 145/149 [00:03<00:00, 59.27batch/s, batch_nb=116, loss=0.138, v_nb=2]

val_loss  tensor(0.2317)


Epoch 68: 100%|██████████| 149/149 [00:03<00:00, 59.27batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 69:  79%|███████▊  | 117/149 [00:02<00:00, 40.97batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 69:  82%|████████▏ | 122/149 [00:03<00:00, 47.59batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 69:  91%|█████████▏| 136/149 [00:03<00:00, 59.21batch/s, batch_nb=116, loss=0.139, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 137.15batch/s]

val_loss  tensor(0.2326)


Epoch 69: 100%|██████████| 149/149 [00:03<00:00, 59.21batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 70:  79%|███████▊  | 117/149 [00:03<00:00, 35.80batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 70:  81%|████████  | 121/149 [00:03<00:00, 41.91batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 70:  91%|█████████ | 135/149 [00:03<00:00, 52.77batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 70:  97%|█████████▋| 145/149 [00:03<00:00, 60.72batch/s, batch_nb=116, loss=0.133, v_nb=2]

val_loss  tensor(0.2312)


Epoch 70: 100%|██████████| 149/149 [00:03<00:00, 60.72batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 71:  79%|███████▊  | 117/149 [00:03<00:00, 39.09batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 71:  85%|████████▌ | 127/149 [00:03<00:00, 48.14batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 71:  95%|█████████▍| 141/149 [00:03<00:00, 59.53batch/s, batch_nb=116, loss=0.133, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 123.72batch/s]

val_loss  tensor(0.2317)


Epoch 71: 100%|██████████| 149/149 [00:03<00:00, 59.53batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 72:  79%|███████▊  | 117/149 [00:03<00:00, 33.59batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 72:  82%|████████▏ | 122/149 [00:03<00:00, 40.39batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 72:  92%|█████████▏| 137/149 [00:03<00:00, 51.40batch/s, batch_nb=116, loss=0.134, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 137.61batch/s]

val_loss  tensor(0.2249)


Epoch 72: 100%|██████████| 149/149 [00:03<00:00, 51.40batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 73:  79%|███████▊  | 117/149 [00:03<00:00, 37.65batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 73:  79%|███████▉  | 118/149 [00:03<00:00, 40.01batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 73:  88%|████████▊ | 131/149 [00:03<00:00, 50.32batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 73:  96%|█████████▌| 143/149 [00:03<00:00, 60.48batch/s, batch_nb=116, loss=0.139, v_nb=2]

val_loss  tensor(0.2255)


Epoch 73: 100%|██████████| 149/149 [00:03<00:00, 60.48batch/s, batch_nb=116, loss=0.139, v_nb=2]
Epoch 74:  79%|███████▊  | 117/149 [00:03<00:00, 34.82batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 74:  86%|████████▌ | 128/149 [00:03<00:00, 44.07batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 74:  95%|█████████▍| 141/149 [00:03<00:00, 54.65batch/s, batch_nb=116, loss=0.137, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 127.55batch/s]

val_loss  tensor(0.2328)


Epoch 74: 100%|██████████| 149/149 [00:03<00:00, 54.65batch/s, batch_nb=116, loss=0.137, v_nb=2]
Epoch 75:  79%|███████▊  | 117/149 [00:03<00:00, 37.65batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 75:  86%|████████▌ | 128/149 [00:03<00:00, 47.25batch/s, batch_nb=116, loss=0.129, v_nb=2]
Validating:  44%|████▍     | 14/32 [00:00<00:00, 134.40batch/s]
Epoch 75:  92%|█████████▏| 137/149 [00:03<00:00, 52.05batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 75:  99%|█████████▉| 148/149 [00:03<00:00, 60.82batch/s, batch_nb=116, loss=0.129, v_nb=2]

val_loss  tensor(0.2295)


Epoch 75: 100%|██████████| 149/149 [00:03<00:00, 60.82batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 76:  79%|███████▊  | 117/149 [00:03<00:00, 40.58batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 76:  87%|████████▋ | 130/149 [00:03<00:00, 51.03batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 76:  96%|█████████▌| 143/149 [00:03<00:00, 62.08batch/s, batch_nb=116, loss=0.133, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 133.42batch/s]

val_loss  tensor(0.2317)


Epoch 76: 100%|██████████| 149/149 [00:03<00:00, 62.08batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 77:  79%|███████▊  | 117/149 [00:03<00:00, 38.81batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 77:  80%|███████▉  | 119/149 [00:03<00:00, 43.05batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 77:  89%|████████▉ | 133/149 [00:03<00:00, 53.92batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 77:  98%|█████████▊| 146/149 [00:03<00:00, 65.07batch/s, batch_nb=116, loss=0.134, v_nb=2]

val_loss  tensor(0.2353)


Epoch 77: 100%|██████████| 149/149 [00:03<00:00, 65.07batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 78:  79%|███████▊  | 117/149 [00:03<00:00, 33.04batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 78:  83%|████████▎ | 123/149 [00:03<00:00, 39.72batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 78:  90%|████████▉ | 134/149 [00:03<00:00, 48.54batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 78:  97%|█████████▋| 145/149 [00:03<00:00, 57.83batch/s, batch_nb=116, loss=0.138, v_nb=2]
Validating:  97%|█████████▋| 31/32 [00:00<00:00, 103.61batch/s]

val_loss  tensor(0.2308)


Epoch 78: 100%|██████████| 149/149 [00:03<00:00, 57.83batch/s, batch_nb=116, loss=0.138, v_nb=2]
Epoch 79:  79%|███████▊  | 117/149 [00:03<00:01, 31.51batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 79:  80%|███████▉  | 119/149 [00:03<00:00, 34.97batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 79:  85%|████████▌ | 127/149 [00:03<00:00, 41.97batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 79:  92%|█████████▏| 137/149 [00:03<00:00, 50.77batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 79:  99%|█████████▊| 147/149 [00:03<00:00, 59.48batch/s, batch_nb=116, loss=0.133, v_nb=2]

val_loss  tensor(0.2326)


Epoch 79: 100%|██████████| 149/149 [00:03<00:00, 59.48batch/s, batch_nb=116, loss=0.133, v_nb=2]
Epoch 80:  79%|███████▊  | 117/149 [00:03<00:00, 34.67batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 80:  82%|████████▏ | 122/149 [00:03<00:00, 41.38batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 80:  91%|█████████▏| 136/149 [00:03<00:00, 51.97batch/s, batch_nb=116, loss=0.136, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 130.28batch/s]

val_loss  tensor(0.2417)


Epoch 80: 100%|██████████| 149/149 [00:03<00:00, 51.97batch/s, batch_nb=116, loss=0.136, v_nb=2]
Epoch 81:  79%|███████▊  | 117/149 [00:02<00:00, 38.83batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 81:  82%|████████▏ | 122/149 [00:03<00:00, 45.68batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 81:  91%|█████████ | 135/149 [00:03<00:00, 56.31batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 81:  99%|█████████▊| 147/149 [00:03<00:00, 66.61batch/s, batch_nb=116, loss=0.129, v_nb=2]

val_loss  tensor(0.2301)


Epoch 81: 100%|██████████| 149/149 [00:03<00:00, 66.61batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 82:  79%|███████▊  | 117/149 [00:03<00:00, 36.46batch/s, batch_nb=116, loss=0.135, v_nb=2]
Epoch 82:  81%|████████  | 120/149 [00:03<00:00, 41.01batch/s, batch_nb=116, loss=0.135, v_nb=2]
Epoch 82:  89%|████████▉ | 133/149 [00:03<00:00, 51.46batch/s, batch_nb=116, loss=0.135, v_nb=2]
Epoch 82:  99%|█████████▊| 147/149 [00:03<00:00, 62.77batch/s, batch_nb=116, loss=0.135, v_nb=2]

val_loss  tensor(0.2353)


Epoch 82: 100%|██████████| 149/149 [00:03<00:00, 62.77batch/s, batch_nb=116, loss=0.135, v_nb=2]
Epoch 83:  79%|███████▊  | 117/149 [00:03<00:00, 38.63batch/s, batch_nb=116, loss=0.126, v_nb=2]
Epoch 83:  85%|████████▌ | 127/149 [00:03<00:00, 47.93batch/s, batch_nb=116, loss=0.126, v_nb=2]
Epoch 83:  94%|█████████▍| 140/149 [00:03<00:00, 59.08batch/s, batch_nb=116, loss=0.126, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 125.51batch/s]

val_loss  tensor(0.2306)


Epoch 83: 100%|██████████| 149/149 [00:03<00:00, 59.08batch/s, batch_nb=116, loss=0.126, v_nb=2]
Epoch 84:  79%|███████▊  | 117/149 [00:03<00:00, 34.23batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 84:  83%|████████▎ | 123/149 [00:03<00:00, 41.16batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 84:  90%|████████▉ | 134/149 [00:03<00:00, 50.37batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 84:  96%|█████████▌| 143/149 [00:03<00:00, 57.44batch/s, batch_nb=116, loss=0.125, v_nb=2]
Validating:  94%|█████████▍| 30/32 [00:00<00:00, 94.29batch/s] 

val_loss  tensor(0.2282)


Epoch 84: 100%|██████████| 149/149 [00:03<00:00, 57.44batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 85:  79%|███████▊  | 117/149 [00:03<00:00, 35.18batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 85:  82%|████████▏ | 122/149 [00:03<00:00, 40.55batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 85:  89%|████████▉ | 133/149 [00:03<00:00, 50.01batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 85:  98%|█████████▊| 146/149 [00:04<00:00, 60.67batch/s, batch_nb=116, loss=0.121, v_nb=2]

val_loss  tensor(0.2248)


Epoch 85: 100%|██████████| 149/149 [00:04<00:00, 60.67batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 86:  79%|███████▊  | 117/149 [00:03<00:00, 38.13batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 86:  81%|████████  | 121/149 [00:03<00:00, 43.92batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 86:  89%|████████▉ | 133/149 [00:03<00:00, 54.22batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 86:  98%|█████████▊| 146/149 [00:03<00:00, 65.41batch/s, batch_nb=116, loss=0.129, v_nb=2]

val_loss  tensor(0.2285)


Epoch 86: 100%|██████████| 149/149 [00:03<00:00, 65.41batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 87:  79%|███████▊  | 117/149 [00:03<00:00, 37.33batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 87:  79%|███████▉  | 118/149 [00:03<00:00, 39.87batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 87:  88%|████████▊ | 131/149 [00:03<00:00, 50.09batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 87:  96%|█████████▌| 143/149 [00:03<00:00, 60.14batch/s, batch_nb=116, loss=0.128, v_nb=2]

val_loss  tensor(0.2312)


Epoch 87: 100%|██████████| 149/149 [00:03<00:00, 60.14batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 88:  79%|███████▊  | 117/149 [00:03<00:01, 31.88batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 88:  80%|███████▉  | 119/149 [00:03<00:00, 35.52batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 88:  86%|████████▌ | 128/149 [00:03<00:00, 42.09batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 88:  91%|█████████▏| 136/149 [00:03<00:00, 49.02batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 88:  98%|█████████▊| 146/149 [00:03<00:00, 57.11batch/s, batch_nb=116, loss=0.129, v_nb=2]

val_loss  tensor(0.2342)


Epoch 88: 100%|██████████| 149/149 [00:03<00:00, 57.11batch/s, batch_nb=116, loss=0.129, v_nb=2]
Epoch 89:  79%|███████▊  | 117/149 [00:02<00:00, 42.74batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 89:  79%|███████▉  | 118/149 [00:02<00:00, 45.51batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 89:  89%|████████▊ | 132/149 [00:03<00:00, 56.81batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 89:  94%|█████████▍| 140/149 [00:03<00:00, 58.74batch/s, batch_nb=116, loss=0.128, v_nb=2]

val_loss  tensor(0.2346)


Epoch 89: 100%|██████████| 149/149 [00:03<00:00, 58.74batch/s, batch_nb=116, loss=0.128, v_nb=2]
Epoch 90:  79%|███████▊  | 117/149 [00:03<00:00, 34.31batch/s, batch_nb=116, loss=0.130, v_nb=2]
Epoch 90:  81%|████████  | 121/149 [00:03<00:00, 39.26batch/s, batch_nb=116, loss=0.130, v_nb=2]
Epoch 90:  90%|████████▉ | 134/149 [00:03<00:00, 49.40batch/s, batch_nb=116, loss=0.130, v_nb=2]
Epoch 90:  99%|█████████▊| 147/149 [00:03<00:00, 59.90batch/s, batch_nb=116, loss=0.130, v_nb=2]

val_loss  tensor(0.2412)


Epoch 90: 100%|██████████| 149/149 [00:03<00:00, 59.90batch/s, batch_nb=116, loss=0.130, v_nb=2]
Epoch 91:  79%|███████▊  | 117/149 [00:03<00:00, 36.26batch/s, batch_nb=116, loss=0.131, v_nb=2]
Epoch 91:  80%|███████▉  | 119/149 [00:03<00:00, 37.06batch/s, batch_nb=116, loss=0.131, v_nb=2]
Epoch 91:  89%|████████▊ | 132/149 [00:03<00:00, 46.82batch/s, batch_nb=116, loss=0.131, v_nb=2]
Epoch 91:  97%|█████████▋| 145/149 [00:03<00:00, 57.79batch/s, batch_nb=116, loss=0.131, v_nb=2]

val_loss  tensor(0.2342)


Epoch 91: 100%|██████████| 149/149 [00:03<00:00, 57.79batch/s, batch_nb=116, loss=0.131, v_nb=2]
Epoch 92:  79%|███████▊  | 117/149 [00:03<00:00, 33.34batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 92:  85%|████████▍ | 126/149 [00:03<00:00, 41.35batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 92:  91%|█████████▏| 136/149 [00:03<00:00, 49.21batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 92:  97%|█████████▋| 145/149 [00:03<00:00, 56.30batch/s, batch_nb=116, loss=0.134, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 95.25batch/s]

val_loss  tensor(0.2387)


Epoch 92: 100%|██████████| 149/149 [00:03<00:00, 56.30batch/s, batch_nb=116, loss=0.134, v_nb=2]
Epoch 93:  79%|███████▊  | 117/149 [00:03<00:00, 37.33batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 93:  79%|███████▉  | 118/149 [00:03<00:00, 40.34batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 93:  88%|████████▊ | 131/149 [00:03<00:00, 50.56batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 93:  96%|█████████▌| 143/149 [00:03<00:00, 60.97batch/s, batch_nb=116, loss=0.123, v_nb=2]

val_loss  tensor(0.2513)


Epoch 93: 100%|██████████| 149/149 [00:03<00:00, 60.97batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 94:  79%|███████▊  | 117/149 [00:03<00:00, 37.27batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 94:  80%|███████▉  | 119/149 [00:03<00:00, 41.23batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 94:  89%|████████▉ | 133/149 [00:03<00:00, 51.81batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 94:  97%|█████████▋| 144/149 [00:03<00:00, 61.11batch/s, batch_nb=116, loss=0.121, v_nb=2]

val_loss  tensor(0.2265)


Epoch 94: 100%|██████████| 149/149 [00:03<00:00, 61.11batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 95:  79%|███████▊  | 117/149 [00:03<00:00, 38.34batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 95:  83%|████████▎ | 124/149 [00:03<00:00, 45.88batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 95:  92%|█████████▏| 137/149 [00:03<00:00, 56.70batch/s, batch_nb=116, loss=0.123, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 122.87batch/s]

val_loss  tensor(0.2331)


Epoch 95: 100%|██████████| 149/149 [00:03<00:00, 56.70batch/s, batch_nb=116, loss=0.123, v_nb=2]
Epoch 96:  79%|███████▊  | 117/149 [00:03<00:00, 34.63batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 96:  85%|████████▌ | 127/149 [00:03<00:00, 42.86batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 96:  93%|█████████▎| 138/149 [00:03<00:00, 52.28batch/s, batch_nb=116, loss=0.125, v_nb=2]
Validating:  69%|██████▉   | 22/32 [00:00<00:00, 104.81batch/s]

val_loss  tensor(0.2414)


Epoch 96: 100%|██████████| 149/149 [00:03<00:00, 52.28batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 97:  79%|███████▊  | 117/149 [00:03<00:00, 35.36batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 97:  84%|████████▍ | 125/149 [00:03<00:00, 43.65batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 97:  93%|█████████▎| 139/149 [00:03<00:00, 54.73batch/s, batch_nb=116, loss=0.124, v_nb=2]
Validating:  88%|████████▊ | 28/32 [00:00<00:00, 137.07batch/s]

val_loss  tensor(0.2345)


Epoch 97: 100%|██████████| 149/149 [00:03<00:00, 54.73batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 98:  79%|███████▊  | 117/149 [00:03<00:00, 37.92batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 98:  83%|████████▎ | 123/149 [00:03<00:00, 44.82batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 98:  91%|█████████▏| 136/149 [00:03<00:00, 55.37batch/s, batch_nb=116, loss=0.121, v_nb=2]
Validating:  81%|████████▏ | 26/32 [00:00<00:00, 120.74batch/s]

val_loss  tensor(0.2388)


Epoch 98: 100%|██████████| 149/149 [00:03<00:00, 55.37batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 99:  79%|███████▊  | 117/149 [00:03<00:00, 36.19batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 99:  83%|████████▎ | 124/149 [00:03<00:00, 43.88batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 99:  93%|█████████▎| 138/149 [00:03<00:00, 55.16batch/s, batch_nb=116, loss=0.124, v_nb=2]
Validating:  84%|████████▍ | 27/32 [00:00<00:00, 127.32batch/s]

val_loss  tensor(0.2214)


Epoch 99: 100%|██████████| 149/149 [00:03<00:00, 55.16batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 100:  79%|███████▊  | 117/149 [00:03<00:00, 37.55batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 100:  85%|████████▍ | 126/149 [00:03<00:00, 46.54batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 100:  95%|█████████▍| 141/149 [00:03<00:00, 58.21batch/s, batch_nb=116, loss=0.125, v_nb=2]
Validating:  91%|█████████ | 29/32 [00:00<00:00, 141.31batch/s]

val_loss  tensor(0.2347)


Epoch 100: 100%|██████████| 149/149 [00:03<00:00, 58.21batch/s, batch_nb=116, loss=0.125, v_nb=2]
Epoch 101:  79%|███████▊  | 117/149 [00:03<00:00, 38.17batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 101:  87%|████████▋ | 130/149 [00:03<00:00, 48.12batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 101:  94%|█████████▍| 140/149 [00:03<00:00, 56.51batch/s, batch_nb=116, loss=0.121, v_nb=2]
Validating:  75%|███████▌  | 24/32 [00:00<00:00, 117.50batch/s]

val_loss  tensor(0.2430)


Epoch 101: 100%|██████████| 149/149 [00:03<00:00, 56.51batch/s, batch_nb=116, loss=0.121, v_nb=2]
Epoch 102:  79%|███████▊  | 117/149 [00:03<00:01, 31.39batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 102:  83%|████████▎ | 123/149 [00:03<00:00, 38.05batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 102:  91%|█████████▏| 136/149 [00:03<00:00, 48.08batch/s, batch_nb=116, loss=0.124, v_nb=2]
Validating:  78%|███████▊  | 25/32 [00:00<00:00, 121.92batch/s]

val_loss  tensor(0.2431)


Epoch 102: 100%|██████████| 149/149 [00:03<00:00, 48.08batch/s, batch_nb=116, loss=0.124, v_nb=2]
Epoch 102: 100%|██████████| 149/149 [00:03<00:00, 41.17batch/s, batch_nb=116, loss=0.124, v_nb=2]


1

## Predict

In [21]:
def predict(model, test_data_loader, checkpoint):
    model.load_state_dict(checkpoint['state_dict'])

    for param in model.parameters():
        param.requires_grad = False
        
    model.eval()
    test_preds = []
    for i, (y, cont_x, cat_x) in enumerate(tqdm(test_data_loader)):
        with torch.no_grad():
            cat_x = cat_x.to(device)
            cont_x = cont_x.to(device)
            preds = model.forward(cont_x, cat_x)
            test_preds.extend(preds.data.cpu().numpy())
    return test_preds


batch_size = 256
all_test_preds = []
checkpoint = torch.load('result/best_model/_ckpt_epoch_99.ckpt')
test_dataset = TrafficVolumePredictionDataset(
    data=test,
    cont_cols=['temperature', 'rain_in_hour', 'clouds_cover'],
    output_col=None,
    unused_cols=['timestamp', 'Year', 'snow_in_hour', 'date']
)

test_data_loader = DataLoader(
    test_dataset, 
    batch_size=batch_size,
    shuffle=False
)

y_test_log_pred = predict(model, test_data_loader, checkpoint)

100%|██████████| 32/32 [00:00<00:00, 123.72it/s]


In [24]:
y_test_pred = np.exp(y_test_log_pred) - 1

#save predictions in rows
save_path = './submission/embv1lgb{}.txt'.format(9263289631)
np.savetxt(save_path, y_test_pred)