In [1]:
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold
import dateutil.easter as easter

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
from accelerate import Accelerator
import torch.optim as optim

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
import time

In [7]:
from tqdm.notebook import tqdm

In [36]:
import gc

## Global Variables ###

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
device

device(type='cuda', index=0)

In [10]:
from colorama import Fore, Back, Style
r_ = Fore.RED
b_ = Fore.BLUE
c_ = Fore.CYAN
g_ = Fore.GREEN
y_ = Fore.YELLOW
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [11]:
BASE_DIR = '/sharedHDD/rohit/timeseries_learning/ubiquant/'
DATA_DIR = BASE_DIR+'data/parquet/'
INPUT_DIR = BASE_DIR+'input/'
WEIGHTS_DIR = BASE_DIR + 'weights/'

In [12]:
input_chunk_length = 30
output_chunk_length = 1
embedding_dim = 50 ## Investment id embedding dim

In [13]:
features = [f'f_{i}' for i in range(300)]

## Load data ##

In [14]:
%%time
train_df = pd.read_parquet(DATA_DIR+'train_low_mem.parquet')

CPU times: user 8.97 s, sys: 15.5 s, total: 24.5 s
Wall time: 4.2 s


In [15]:
investment_ids = train_df.investment_id.unique().tolist()

In [16]:
max(investment_ids)

3773

#### make test and train df #####

In [17]:
fold = 1

In [18]:
with open(BASE_DIR+'input/folds.pickle', 'rb') as f:
    folds = pickle.load(f)

In [19]:
train_indxs = folds[fold]['train']
test_indxs = folds[fold]['test']
test_f_df = train_df[train_df.index.isin(test_indxs)].reset_index(drop=True)
train_f_df = train_df[train_df.index.isin(train_indxs)].reset_index(drop=True)

#### Load Training data #####

In [20]:
loaded = np.load(INPUT_DIR+'fold_train_'+str(fold)+'.npz')

In [21]:
X_train = loaded['X_train']
Y_train = loaded['Y_train']

In [22]:
Y_train = Y_train.reshape(-1,1)

In [23]:
print(X_train.shape,Y_train.shape)

(942773, 30, 302) (942773, 1)


#### Make train and val split ####

In [24]:
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

In [25]:
print(X_train.shape,Y_train.shape,X_val.shape,Y_val.shape)

(754218, 30, 302) (754218, 1) (188555, 30, 302) (188555, 1)


In [37]:
gc.collect()

234

## Modeling ##

In [38]:
config = {
    'seq_length' : input_chunk_length,
    'num_epochs' : 200,
    'lr' : 0.00001,
    'input_size' : 351,
    'hidden_size' : 351,
    'num_layers' : 1,
    'num_classes' :1, ## This is  output dimension
    'train_shuffle': True,
    'val_shuffle': True,
    'batch_size' : 4096,
    'best_model_name' : str(fold)+'_lstm',
    'bidirectional' : False,
    'only_last_hidden': True
}

In [46]:
class TSDataset(Dataset):
    
    def __init__(self, val=False):
        """
        Args:
        """
#         self.x=x
#         self.y=y
        self.val = val

    def __len__(self):
        return len(X_val) if self.val else len(X_train)

    def __getitem__(self, idx):
        if self.val:
            sample = [torch.Tensor(X_val[idx]), torch.Tensor(Y_val[idx])]
        else:
            sample = [torch.Tensor(X_train[idx]), torch.Tensor(Y_train[idx])]
        return sample

#### Model ####

In [47]:
num_epochs = config['num_epochs']
lr = config['lr']
input_size = config['input_size']
hidden_size = config['hidden_size']
num_layers = config['num_layers']
num_classes = config['num_classes']
seq_length = config['seq_length']
bidirectional = config['bidirectional']
only_last_hidden = config['only_last_hidden']

#### Loss function ######

In [48]:
def pearson_loss(x, y):
    xd = x - x.mean()
    yd = y - y.mean()
    nom = (xd * yd).sum()
    denom = ((xd ** 2).sum() * (yd ** 2).sum()).sqrt()
    return 1 - nom / denom

In [49]:
class LstmTsModel(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers,seq_length):
        super(LstmTsModel, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        cat_input_dim: int = 3774
        
        self.embedding = nn.Embedding(cat_input_dim, embedding_dim)
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True,bidirectional=bidirectional)
        
        if bidirectional:
            m=2
        else:
            m=1
        
        if only_last_hidden:
            input_fc_dim = hidden_size*m
        else:
            input_fc_dim = self.seq_length*hidden_size*m
        
        print(input_fc_dim)
        
        self.fc = nn.Sequential(nn.Linear(input_fc_dim, input_fc_dim//2),
                                nn.BatchNorm1d(num_features=input_fc_dim//2),
                                nn.Dropout(0.2),
                                nn.ReLU(),
                                
#                                 nn.Linear(input_fc_dim//4, input_fc_dim//16),
#                                 nn.BatchNorm1d(num_features=input_fc_dim//16),
#                                 nn.Dropout(0.2),
#                                 nn.ReLU(),
                                
#                                 nn.Linear(input_fc_dim//16, input_fc_dim//32),
#                                 nn.BatchNorm1d(num_features=input_fc_dim//32),
#                                 # nn.Dropout(0.2),
#                                 nn.ReLU(),
                                
                                
                                nn.Linear(input_fc_dim//2, self.num_classes)
                                )

    def forward(self, x):
        # Propagate input through LSTM
        
        investment_ids = x[:,:,0].type(torch.LongTensor).to(device)
        
        embeddings = self.embedding(investment_ids)
        
        x = torch.cat([embeddings,x[:,:,1:]], dim=2)
        
        h_out, (h_n, _) = self.lstm(x)
        if only_last_hidden:
            h_out = h_out[:,-1:,:]
        
#         print(h_out.shape)
        h_out = h_out.flatten(start_dim=1)
#         print(h_out.shape)
        
        out = self.fc(h_out)
        
        return out

In [50]:
def run(model,train_dl,val_dl):
    def evaluate(model,valid_loader):
        model.eval()
        valid_loss = 0
        rec_loss = 0
        with torch.no_grad():
            for i, inputs in enumerate(tqdm(valid_loader)):
                dataX = inputs[0]
                dataY = inputs[1]
                outputs = model(dataX)
                loss = criterion(outputs, dataY)
                valid_loss += loss.item()

        valid_loss /= len(valid_loader)
        return valid_loss
    
    def train_and_evaluate_loop(train_loader,model,optimizer,criterion,epoch,lr_scheduler=None,valid_loader=None, best_loss=99999):
        train_loss = 0
        for i, inputs in enumerate(tqdm(train_loader)):
            optimizer.zero_grad()
            model.train()
            
            dataX = inputs[0]
            dataY = inputs[1]
            outputs = model(dataX)
            loss = criterion(outputs, dataY)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            if lr_scheduler:
                lr_scheduler.step()
        
        train_loss /= len(train_loader)
        if valid_loader:
            valid_loss = evaluate(model,valid_loader) 
            print(f"Epoch:{epoch} |Train Loss:{train_loss}|Valid Loss:{valid_loss}")
            if valid_loss <= best_loss:
                print(f"{g_}Loss Decreased from {best_loss} to {valid_loss}{sr_}")

                best_loss = valid_loss
                torch.save(model.state_dict(), WEIGHTS_DIR+config['best_model_name'])
        else:
            print(f"Epoch:{epoch} |Train Loss:{train_loss}")
            
                    
        return best_loss
    
    accelerator = Accelerator()
    print(f"{accelerator.device} is used")

    
    
    optimizer = optim.Adam(model.parameters(),lr=config['lr'],amsgrad=False)
    criterion = pearson_loss
    
    # lr_scheduler = CosineAnnealingWarmupRestarts(optimizer, **config_lr)
    # lr_scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **config_lr)
    lr_scheduler = None

    model,train_dl,val_dl,optimizer,lr_scheduler,criterion = accelerator.prepare(model,train_dl,val_dl,optimizer,lr_scheduler,criterion)

    best_loss = 9999999
    start_time = time.time()
    for epoch in tqdm(range(config["num_epochs"])):
        print(f"Epoch Started:{epoch}")
        best_loss = train_and_evaluate_loop(train_dl,model,optimizer,criterion,epoch,lr_scheduler,valid_loader=val_dl, best_loss=best_loss)
        
        end_time = time.time()
        print(f"{m_}Time taken by epoch {epoch} is {end_time-start_time:.2f}s{sr_}")
        start_time = end_time
        
    return best_loss, model

In [51]:
model = LstmTsModel(num_classes, input_size, hidden_size, num_layers,seq_length)

351


In [52]:
train_dl = DataLoader(TSDataset(), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
val_dl = DataLoader(TSDataset(val=True), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)

In [53]:
gc.collect()

147

In [54]:
best_loss, model = run(model,train_dl,val_dl)

cuda is used


  0%|          | 0/200 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.9574286360998412|Valid Loss:0.9334095424794137
[32mLoss Decreased from 9999999 to 0.9334095424794137[0m
[35mTime taken by epoch 0 is 61.85s[0m
Epoch Started:1


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.9303017023447397|Valid Loss:0.9228735845139686
[32mLoss Decreased from 0.9334095424794137 to 0.9228735845139686[0m
[35mTime taken by epoch 1 is 62.23s[0m
Epoch Started:2


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.9200448670902768|Valid Loss:0.9180552122440744
[32mLoss Decreased from 0.9228735845139686 to 0.9180552122440744[0m
[35mTime taken by epoch 2 is 62.35s[0m
Epoch Started:3


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.9108076997705408|Valid Loss:0.9082347991618704
[32mLoss Decreased from 0.9180552122440744 to 0.9082347991618704[0m
[35mTime taken by epoch 3 is 62.56s[0m
Epoch Started:4


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.9025904081963204|Valid Loss:0.902476485739363
[32mLoss Decreased from 0.9082347991618704 to 0.902476485739363[0m
[35mTime taken by epoch 4 is 62.20s[0m
Epoch Started:5


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.8954873355659279|Valid Loss:0.9010849797979315
[32mLoss Decreased from 0.902476485739363 to 0.9010849797979315[0m
[35mTime taken by epoch 5 is 61.83s[0m
Epoch Started:6


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.8906683863820256|Valid Loss:0.8921696657830096
[32mLoss Decreased from 0.9010849797979315 to 0.8921696657830096[0m
[35mTime taken by epoch 6 is 61.51s[0m
Epoch Started:7


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.8859544960228173|Valid Loss:0.8947233481610075
[35mTime taken by epoch 7 is 61.36s[0m
Epoch Started:8


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.881099348454862|Valid Loss:0.8922749719721206
[35mTime taken by epoch 8 is 64.64s[0m
Epoch Started:9


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.8776114872983984|Valid Loss:0.8891478104794279
[32mLoss Decreased from 0.8921696657830096 to 0.8891478104794279[0m
[35mTime taken by epoch 9 is 69.51s[0m
Epoch Started:10


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.8724239201159091|Valid Loss:0.8903895946259194
[35mTime taken by epoch 10 is 60.97s[0m
Epoch Started:11


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.8690633786691202|Valid Loss:0.887872373804133
[32mLoss Decreased from 0.8891478104794279 to 0.887872373804133[0m
[35mTime taken by epoch 11 is 61.70s[0m
Epoch Started:12


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.8651356229910979|Valid Loss:0.8886894355428979
[35mTime taken by epoch 12 is 60.75s[0m
Epoch Started:13


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.8625522826168989|Valid Loss:0.8809173639784468
[32mLoss Decreased from 0.887872373804133 to 0.8809173639784468[0m
[35mTime taken by epoch 13 is 61.75s[0m
Epoch Started:14


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.8584796621992782|Valid Loss:0.8814342224851568
[35mTime taken by epoch 14 is 61.83s[0m
Epoch Started:15


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.8547865071812192|Valid Loss:0.8838056592231102
[35mTime taken by epoch 15 is 62.13s[0m
Epoch Started:16


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.8519170078071387|Valid Loss:0.8798020520108811
[32mLoss Decreased from 0.8809173639784468 to 0.8798020520108811[0m
[35mTime taken by epoch 16 is 61.97s[0m
Epoch Started:17


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.8493811169186154|Valid Loss:0.8781700768369309
[32mLoss Decreased from 0.8798020520108811 to 0.8781700768369309[0m
[35mTime taken by epoch 17 is 61.61s[0m
Epoch Started:18


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.8459630418468166|Valid Loss:0.877301331530226
[32mLoss Decreased from 0.8781700768369309 to 0.877301331530226[0m
[35mTime taken by epoch 18 is 62.01s[0m
Epoch Started:19


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.8434766495550001|Valid Loss:0.8739244595487067
[32mLoss Decreased from 0.877301331530226 to 0.8739244595487067[0m
[35mTime taken by epoch 19 is 61.52s[0m
Epoch Started:20


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.8398139682975976|Valid Loss:0.8797105132265294
[35mTime taken by epoch 20 is 61.43s[0m
Epoch Started:21


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.8357835167163127|Valid Loss:0.8755691330483619
[35mTime taken by epoch 21 is 61.48s[0m
Epoch Started:22


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.8331081258284079|Valid Loss:0.8743538120959667
[35mTime taken by epoch 22 is 62.85s[0m
Epoch Started:23


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.8303227163649894|Valid Loss:0.873374159031726
[32mLoss Decreased from 0.8739244595487067 to 0.873374159031726[0m
[35mTime taken by epoch 23 is 61.73s[0m
Epoch Started:24


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.8267778267731538|Valid Loss:0.872936393352265
[32mLoss Decreased from 0.873374159031726 to 0.872936393352265[0m
[35mTime taken by epoch 24 is 61.81s[0m
Epoch Started:25


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:25 |Train Loss:0.8234655103167972|Valid Loss:0.8702267002552113
[32mLoss Decreased from 0.872936393352265 to 0.8702267002552113[0m
[35mTime taken by epoch 25 is 62.34s[0m
Epoch Started:26


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:26 |Train Loss:0.8213803213995856|Valid Loss:0.8741940526252098
[35mTime taken by epoch 26 is 61.80s[0m
Epoch Started:27


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:27 |Train Loss:0.8174816572988356|Valid Loss:0.875884196859725
[35mTime taken by epoch 27 is 62.19s[0m
Epoch Started:28


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:28 |Train Loss:0.814707013078638|Valid Loss:0.8762105969672508
[35mTime taken by epoch 28 is 61.71s[0m
Epoch Started:29


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:29 |Train Loss:0.81171891044926|Valid Loss:0.8725099119734256
[35mTime taken by epoch 29 is 61.41s[0m
Epoch Started:30


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:30 |Train Loss:0.8103780595031944|Valid Loss:0.8706260526433904
[35mTime taken by epoch 30 is 62.45s[0m
Epoch Started:31


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:31 |Train Loss:0.8072732206937429|Valid Loss:0.8745738229853042
[35mTime taken by epoch 31 is 62.29s[0m
Epoch Started:32


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:32 |Train Loss:0.8025457546517656|Valid Loss:0.8717747127756159
[35mTime taken by epoch 32 is 61.86s[0m
Epoch Started:33


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:33 |Train Loss:0.7992638610504769|Valid Loss:0.8731485706694583
[35mTime taken by epoch 33 is 61.56s[0m
Epoch Started:34


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:34 |Train Loss:0.797411598063804|Valid Loss:0.8760812789835828
[35mTime taken by epoch 34 is 61.16s[0m
Epoch Started:35


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:35 |Train Loss:0.7933367458549706|Valid Loss:0.869857223744088
[32mLoss Decreased from 0.8702267002552113 to 0.869857223744088[0m
[35mTime taken by epoch 35 is 61.61s[0m
Epoch Started:36


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:36 |Train Loss:0.79088318219056|Valid Loss:0.8690008001124605
[32mLoss Decreased from 0.869857223744088 to 0.8690008001124605[0m
[35mTime taken by epoch 36 is 61.50s[0m
Epoch Started:37


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:37 |Train Loss:0.7877375953906292|Valid Loss:0.8729767533058815
[35mTime taken by epoch 37 is 61.77s[0m
Epoch Started:38


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:38 |Train Loss:0.7855926594218692|Valid Loss:0.874053622813935
[35mTime taken by epoch 38 is 61.50s[0m
Epoch Started:39


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:39 |Train Loss:0.782694310755343|Valid Loss:0.8680460833488627
[32mLoss Decreased from 0.8690008001124605 to 0.8680460833488627[0m
[35mTime taken by epoch 39 is 61.32s[0m
Epoch Started:40


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:40 |Train Loss:0.779426916869911|Valid Loss:0.8647924636272674
[32mLoss Decreased from 0.8680460833488627 to 0.8647924636272674[0m
[35mTime taken by epoch 40 is 61.78s[0m
Epoch Started:41


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:41 |Train Loss:0.7762927164902558|Valid Loss:0.8677194930137472
[35mTime taken by epoch 41 is 61.43s[0m
Epoch Started:42


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:42 |Train Loss:0.7739481929186228|Valid Loss:0.8676172647070377
[35mTime taken by epoch 42 is 61.52s[0m
Epoch Started:43


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:43 |Train Loss:0.7713516164470363|Valid Loss:0.8699568662237613
[35mTime taken by epoch 43 is 61.81s[0m
Epoch Started:44


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:44 |Train Loss:0.7673149376302152|Valid Loss:0.8656316006437261
[35mTime taken by epoch 44 is 62.24s[0m
Epoch Started:45


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:45 |Train Loss:0.7667141031574558|Valid Loss:0.8727433656124358
[35mTime taken by epoch 45 is 61.68s[0m
Epoch Started:46


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:46 |Train Loss:0.763776390294771|Valid Loss:0.869705652936976
[35mTime taken by epoch 46 is 61.60s[0m
Epoch Started:47


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:47 |Train Loss:0.7601082950024991|Valid Loss:0.870220848854552
[35mTime taken by epoch 47 is 61.58s[0m
Epoch Started:48


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:48 |Train Loss:0.7572581922685778|Valid Loss:0.8652194522796793
[35mTime taken by epoch 48 is 61.98s[0m
Epoch Started:49


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:49 |Train Loss:0.7556397315618154|Valid Loss:0.8705292572366431
[35mTime taken by epoch 49 is 62.01s[0m
Epoch Started:50


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:50 |Train Loss:0.7523342058465288|Valid Loss:0.8678541056653286
[35mTime taken by epoch 50 is 61.86s[0m
Epoch Started:51


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:51 |Train Loss:0.7491921408756359|Valid Loss:0.8698477808465349
[35mTime taken by epoch 51 is 62.05s[0m
Epoch Started:52


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:52 |Train Loss:0.7479331677024429|Valid Loss:0.8681047508057128
[35mTime taken by epoch 52 is 61.88s[0m
Epoch Started:53


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:53 |Train Loss:0.7444478901656898|Valid Loss:0.8636859921698875
[32mLoss Decreased from 0.8647924636272674 to 0.8636859921698875[0m
[35mTime taken by epoch 53 is 62.20s[0m
Epoch Started:54


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:54 |Train Loss:0.742480562184308|Valid Loss:0.8674601354497544
[35mTime taken by epoch 54 is 61.76s[0m
Epoch Started:55


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:55 |Train Loss:0.739585198905017|Valid Loss:0.8695642910105117
[35mTime taken by epoch 55 is 62.28s[0m
Epoch Started:56


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:56 |Train Loss:0.7364211556073782|Valid Loss:0.8697050188450103
[35mTime taken by epoch 56 is 62.34s[0m
Epoch Started:57


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:57 |Train Loss:0.7344896116772214|Valid Loss:0.8623620842365508
[32mLoss Decreased from 0.8636859921698875 to 0.8623620842365508[0m
[35mTime taken by epoch 57 is 62.57s[0m
Epoch Started:58


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:58 |Train Loss:0.7318805897558057|Valid Loss:0.8702282664623666
[35mTime taken by epoch 58 is 61.98s[0m
Epoch Started:59


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:59 |Train Loss:0.7293137949866217|Valid Loss:0.8673447421256532
[35mTime taken by epoch 59 is 62.33s[0m
Epoch Started:60


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:60 |Train Loss:0.7271173625378995|Valid Loss:0.8669141036398867
[35mTime taken by epoch 60 is 62.00s[0m
Epoch Started:61


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:61 |Train Loss:0.723479653371347|Valid Loss:0.8702860614086719
[35mTime taken by epoch 61 is 62.40s[0m
Epoch Started:62


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:62 |Train Loss:0.7219298910450291|Valid Loss:0.8669099249738328
[35mTime taken by epoch 62 is 62.69s[0m
Epoch Started:63


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:63 |Train Loss:0.7194934886855048|Valid Loss:0.8653587407254159
[35mTime taken by epoch 63 is 61.86s[0m
Epoch Started:64


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:64 |Train Loss:0.7183764947427286|Valid Loss:0.8666667646550118
[35mTime taken by epoch 64 is 62.22s[0m
Epoch Started:65


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:65 |Train Loss:0.71493023505082|Valid Loss:0.8643761023562005
[35mTime taken by epoch 65 is 62.32s[0m
Epoch Started:66


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:66 |Train Loss:0.7123807787895202|Valid Loss:0.8675159063745053
[35mTime taken by epoch 66 is 62.74s[0m
Epoch Started:67


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:67 |Train Loss:0.7108645529360385|Valid Loss:0.8649714107209063
[35mTime taken by epoch 67 is 62.28s[0m
Epoch Started:68


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:68 |Train Loss:0.7080100123946731|Valid Loss:0.8668781556981675
[35mTime taken by epoch 68 is 62.32s[0m
Epoch Started:69


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:69 |Train Loss:0.7058376173715334|Valid Loss:0.8686955647265657
[35mTime taken by epoch 69 is 61.77s[0m
Epoch Started:70


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:70 |Train Loss:0.7042786582096203|Valid Loss:0.8672617179282168
[35mTime taken by epoch 70 is 62.10s[0m
Epoch Started:71


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:71 |Train Loss:0.7005676897796425|Valid Loss:0.8659083196457397
[35mTime taken by epoch 71 is 62.08s[0m
Epoch Started:72


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:72 |Train Loss:0.6998449802398682|Valid Loss:0.8648780546289809
[35mTime taken by epoch 72 is 62.77s[0m
Epoch Started:73


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:73 |Train Loss:0.6973771043725916|Valid Loss:0.8668281080874991
[35mTime taken by epoch 73 is 62.44s[0m
Epoch Started:74


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:74 |Train Loss:0.6963135368115193|Valid Loss:0.8689907243911256
[35mTime taken by epoch 74 is 62.51s[0m
Epoch Started:75


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:75 |Train Loss:0.693429631800265|Valid Loss:0.866924468507158
[35mTime taken by epoch 75 is 62.36s[0m
Epoch Started:76


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:76 |Train Loss:0.6908975871833595|Valid Loss:0.8646442205347913
[35mTime taken by epoch 76 is 62.25s[0m
Epoch Started:77


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:77 |Train Loss:0.6882983890739647|Valid Loss:0.865651364022113
[35mTime taken by epoch 77 is 62.30s[0m
Epoch Started:78


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:78 |Train Loss:0.6858709747726853|Valid Loss:0.8662745635560218
[35mTime taken by epoch 78 is 62.18s[0m
Epoch Started:79


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:79 |Train Loss:0.684717857837677|Valid Loss:0.8679612190165418
[35mTime taken by epoch 79 is 62.17s[0m
Epoch Started:80


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:80 |Train Loss:0.6830829675133164|Valid Loss:0.8666366186547787
[35mTime taken by epoch 80 is 62.57s[0m
Epoch Started:81


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:81 |Train Loss:0.6819313838675215|Valid Loss:0.8650374577400533
[35mTime taken by epoch 81 is 62.74s[0m
Epoch Started:82


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:82 |Train Loss:0.6780164370665679|Valid Loss:0.8648200440914073
[35mTime taken by epoch 82 is 62.83s[0m
Epoch Started:83


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:83 |Train Loss:0.6762500920811215|Valid Loss:0.8697810337898579
[35mTime taken by epoch 83 is 62.16s[0m
Epoch Started:84


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:84 |Train Loss:0.6736070855243786|Valid Loss:0.8626253757070987
[35mTime taken by epoch 84 is 62.31s[0m
Epoch Started:85


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:85 |Train Loss:0.6707346722886369|Valid Loss:0.8627624752673697
[35mTime taken by epoch 85 is 62.65s[0m
Epoch Started:86


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:86 |Train Loss:0.670919775318455|Valid Loss:0.8675628925891633
[35mTime taken by epoch 86 is 62.42s[0m
Epoch Started:87


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:87 |Train Loss:0.6683096553828265|Valid Loss:0.8657629844990182
[35mTime taken by epoch 87 is 62.56s[0m
Epoch Started:88


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:88 |Train Loss:0.6658995908659857|Valid Loss:0.8661450680266035
[35mTime taken by epoch 88 is 62.25s[0m
Epoch Started:89


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:89 |Train Loss:0.6634602392042005|Valid Loss:0.8653235841304698
[35mTime taken by epoch 89 is 62.37s[0m
Epoch Started:90


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:90 |Train Loss:0.6629324507068943|Valid Loss:0.8641281584475903
[35mTime taken by epoch 90 is 62.99s[0m
Epoch Started:91


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:91 |Train Loss:0.6603445977778049|Valid Loss:0.869072881150753
[35mTime taken by epoch 91 is 62.80s[0m
Epoch Started:92


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:92 |Train Loss:0.6576094765920897|Valid Loss:0.8679794542332913
[35mTime taken by epoch 92 is 62.20s[0m
Epoch Started:93


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:93 |Train Loss:0.6560214790138038|Valid Loss:0.8618532203613444
[32mLoss Decreased from 0.8623620842365508 to 0.8618532203613444[0m
[35mTime taken by epoch 93 is 66.44s[0m
Epoch Started:94


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:94 |Train Loss:0.6543733529142431|Valid Loss:0.8631553066537735
[35mTime taken by epoch 94 is 70.39s[0m
Epoch Started:95


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:95 |Train Loss:0.6532356281538267|Valid Loss:0.8621938811971787
[35mTime taken by epoch 95 is 70.46s[0m
Epoch Started:96


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:96 |Train Loss:0.6501243484986795|Valid Loss:0.8606830074432048
[32mLoss Decreased from 0.8618532203613444 to 0.8606830074432048[0m
[35mTime taken by epoch 96 is 71.56s[0m
Epoch Started:97


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:97 |Train Loss:0.6481762547750731|Valid Loss:0.8651354794806623
[35mTime taken by epoch 97 is 70.11s[0m
Epoch Started:98


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:98 |Train Loss:0.6467855179632033|Valid Loss:0.8627630624365299
[35mTime taken by epoch 98 is 69.47s[0m
Epoch Started:99


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:99 |Train Loss:0.6442769292238596|Valid Loss:0.8646882303217625
[35mTime taken by epoch 99 is 69.25s[0m
Epoch Started:100


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:100 |Train Loss:0.6441758171932117|Valid Loss:0.865015483917074
[35mTime taken by epoch 100 is 68.73s[0m
Epoch Started:101


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:101 |Train Loss:0.6416838375297753|Valid Loss:0.8626342656764578
[35mTime taken by epoch 101 is 67.25s[0m
Epoch Started:102


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:102 |Train Loss:0.6392380401894853|Valid Loss:0.8665203972065703
[35mTime taken by epoch 102 is 66.80s[0m
Epoch Started:103


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:103 |Train Loss:0.638372726053805|Valid Loss:0.8623023654552217
[35mTime taken by epoch 103 is 68.18s[0m
Epoch Started:104


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:104 |Train Loss:0.6367309022594143|Valid Loss:0.8600959004239833
[32mLoss Decreased from 0.8606830074432048 to 0.8600959004239833[0m
[35mTime taken by epoch 104 is 67.87s[0m
Epoch Started:105


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:105 |Train Loss:0.6344714847770897|Valid Loss:0.8638257929619323
[35mTime taken by epoch 105 is 67.13s[0m
Epoch Started:106


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:106 |Train Loss:0.631491818621352|Valid Loss:0.8617717864665579
[35mTime taken by epoch 106 is 66.55s[0m
Epoch Started:107


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:107 |Train Loss:0.6307813422099964|Valid Loss:0.8644092209795688
[35mTime taken by epoch 107 is 66.85s[0m
Epoch Started:108


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:108 |Train Loss:0.62860354700604|Valid Loss:0.8625979588386861
[35mTime taken by epoch 108 is 67.76s[0m
Epoch Started:109


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:109 |Train Loss:0.6271186377551105|Valid Loss:0.8640639464905921
[35mTime taken by epoch 109 is 66.73s[0m
Epoch Started:110


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:110 |Train Loss:0.6255770415873141|Valid Loss:0.8658277925024641
[35mTime taken by epoch 110 is 63.89s[0m
Epoch Started:111


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:111 |Train Loss:0.6240442691622554|Valid Loss:0.8637566224057623
[35mTime taken by epoch 111 is 64.62s[0m
Epoch Started:112


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:112 |Train Loss:0.6213237114854762|Valid Loss:0.8600302427373034
[32mLoss Decreased from 0.8600959004239833 to 0.8600302427373034[0m
[35mTime taken by epoch 112 is 65.50s[0m
Epoch Started:113


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:113 |Train Loss:0.620588000078459|Valid Loss:0.8609058438463414
[35mTime taken by epoch 113 is 65.03s[0m
Epoch Started:114


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:114 |Train Loss:0.6193860459972073|Valid Loss:0.8608364716489264
[35mTime taken by epoch 114 is 69.09s[0m
Epoch Started:115


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:115 |Train Loss:0.6174977412094941|Valid Loss:0.8611640575084281
[35mTime taken by epoch 115 is 64.77s[0m
Epoch Started:116


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:116 |Train Loss:0.6152051864443598|Valid Loss:0.8631988193126435
[35mTime taken by epoch 116 is 64.67s[0m
Epoch Started:117


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:117 |Train Loss:0.6135912234718736|Valid Loss:0.8643994927406311
[35mTime taken by epoch 117 is 64.90s[0m
Epoch Started:118


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:118 |Train Loss:0.6119909031971081|Valid Loss:0.8628352259067779
[35mTime taken by epoch 118 is 64.87s[0m
Epoch Started:119


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:119 |Train Loss:0.610824631033717|Valid Loss:0.8616227279318139
[35mTime taken by epoch 119 is 64.86s[0m
Epoch Started:120


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:120 |Train Loss:0.6099677884900893|Valid Loss:0.8578357138532273
[32mLoss Decreased from 0.8600302427373034 to 0.8578357138532273[0m
[35mTime taken by epoch 120 is 64.59s[0m
Epoch Started:121


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:121 |Train Loss:0.6064041933497867|Valid Loss:0.8599161594472033
[35mTime taken by epoch 121 is 64.18s[0m
Epoch Started:122


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:122 |Train Loss:0.6061213035841246|Valid Loss:0.8643470710896431
[35mTime taken by epoch 122 is 68.99s[0m
Epoch Started:123


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:123 |Train Loss:0.6039794702787658|Valid Loss:0.8603114168694679
[35mTime taken by epoch 123 is 68.40s[0m
Epoch Started:124


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:124 |Train Loss:0.602369106460262|Valid Loss:0.8600997113166972
[35mTime taken by epoch 124 is 68.26s[0m
Epoch Started:125


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:125 |Train Loss:0.6003949519750235|Valid Loss:0.8607562044833569
[35mTime taken by epoch 125 is 67.39s[0m
Epoch Started:126


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:126 |Train Loss:0.5993209059173996|Valid Loss:0.8593718992902878
[35mTime taken by epoch 126 is 67.82s[0m
Epoch Started:127


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:127 |Train Loss:0.5962516011418523|Valid Loss:0.8613568721933568
[35mTime taken by epoch 127 is 66.87s[0m
Epoch Started:128


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:128 |Train Loss:0.5961416901768865|Valid Loss:0.8584289525417571
[35mTime taken by epoch 128 is 66.58s[0m
Epoch Started:129


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:129 |Train Loss:0.5952509316238197|Valid Loss:0.8595940711650443
[35mTime taken by epoch 129 is 68.04s[0m
Epoch Started:130


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:130 |Train Loss:0.5931419253349304|Valid Loss:0.8582621051910075
[35mTime taken by epoch 130 is 67.18s[0m
Epoch Started:131


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:131 |Train Loss:0.5917296599697422|Valid Loss:0.8665781553755415
[35mTime taken by epoch 131 is 67.97s[0m
Epoch Started:132


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:132 |Train Loss:0.5898124404855677|Valid Loss:0.8568732776540391
[32mLoss Decreased from 0.8578357138532273 to 0.8568732776540391[0m
[35mTime taken by epoch 132 is 67.09s[0m
Epoch Started:133


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:133 |Train Loss:0.5888737588315397|Valid Loss:0.8592967771469279
[35mTime taken by epoch 133 is 66.88s[0m
Epoch Started:134


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:134 |Train Loss:0.5874540187217093|Valid Loss:0.859260192576875
[35mTime taken by epoch 134 is 68.43s[0m
Epoch Started:135


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:135 |Train Loss:0.5850912999462436|Valid Loss:0.8558531961542495
[32mLoss Decreased from 0.8568732776540391 to 0.8558531961542495[0m
[35mTime taken by epoch 135 is 69.11s[0m
Epoch Started:136


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:136 |Train Loss:0.5843074099437611|Valid Loss:0.8563359887041944
[35mTime taken by epoch 136 is 66.63s[0m
Epoch Started:137


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:137 |Train Loss:0.582323951656754|Valid Loss:0.8580388033643682
[35mTime taken by epoch 137 is 63.23s[0m
Epoch Started:138


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:138 |Train Loss:0.5808913185789778|Valid Loss:0.8545733728307359
[32mLoss Decreased from 0.8558531961542495 to 0.8545733728307359[0m
[35mTime taken by epoch 138 is 64.34s[0m
Epoch Started:139


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:139 |Train Loss:0.5798558676565015|Valid Loss:0.8557735237669437
[35mTime taken by epoch 139 is 61.92s[0m
Epoch Started:140


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:140 |Train Loss:0.5775474119830776|Valid Loss:0.8576497955525175
[35mTime taken by epoch 140 is 62.17s[0m
Epoch Started:141


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:141 |Train Loss:0.5764780717927056|Valid Loss:0.8576657822791566
[35mTime taken by epoch 141 is 62.07s[0m
Epoch Started:142


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:142 |Train Loss:0.5753311205554653|Valid Loss:0.8553645319127022
[35mTime taken by epoch 142 is 63.41s[0m
Epoch Started:143


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:143 |Train Loss:0.575170669684539|Valid Loss:0.85381296213637
[32mLoss Decreased from 0.8545733728307359 to 0.85381296213637[0m
[35mTime taken by epoch 143 is 63.24s[0m
Epoch Started:144


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:144 |Train Loss:0.5720845660647831|Valid Loss:0.8550529391207593
[35mTime taken by epoch 144 is 63.44s[0m
Epoch Started:145


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:145 |Train Loss:0.5712870037233507|Valid Loss:0.8588227147751666
[35mTime taken by epoch 145 is 63.01s[0m
Epoch Started:146


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:146 |Train Loss:0.5708201353614395|Valid Loss:0.8563042105512416
[35mTime taken by epoch 146 is 63.62s[0m
Epoch Started:147


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:147 |Train Loss:0.5687715965348321|Valid Loss:0.8538304656109912
[35mTime taken by epoch 147 is 64.70s[0m
Epoch Started:148


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:148 |Train Loss:0.5670070934940029|Valid Loss:0.8530876598459609
[32mLoss Decreased from 0.85381296213637 to 0.8530876598459609[0m
[35mTime taken by epoch 148 is 62.74s[0m
Epoch Started:149


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:149 |Train Loss:0.5650518378695926|Valid Loss:0.8556355073096904
[35mTime taken by epoch 149 is 61.56s[0m
Epoch Started:150


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:150 |Train Loss:0.5647090016184626|Valid Loss:0.8566311179323399
[35mTime taken by epoch 150 is 62.84s[0m
Epoch Started:151


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:151 |Train Loss:0.563870759590252|Valid Loss:0.8534502881638547
[35mTime taken by epoch 151 is 63.07s[0m
Epoch Started:152


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:152 |Train Loss:0.5618007476265366|Valid Loss:0.851302887531037
[32mLoss Decreased from 0.8530876598459609 to 0.851302887531037[0m
[35mTime taken by epoch 152 is 62.72s[0m
Epoch Started:153


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:153 |Train Loss:0.5603298464336911|Valid Loss:0.857758548665554
[35mTime taken by epoch 153 is 63.79s[0m
Epoch Started:154


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:154 |Train Loss:0.5588144730877231|Valid Loss:0.851304013678368
[35mTime taken by epoch 154 is 63.37s[0m
Epoch Started:155


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:155 |Train Loss:0.5580353740099314|Valid Loss:0.8566710568488912
[35mTime taken by epoch 155 is 63.68s[0m
Epoch Started:156


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:156 |Train Loss:0.5561868490399541|Valid Loss:0.8537126246919023
[35mTime taken by epoch 156 is 67.59s[0m
Epoch Started:157


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:157 |Train Loss:0.5553201933164854|Valid Loss:0.8508743212578145
[32mLoss Decreased from 0.851302887531037 to 0.8508743212578145[0m
[35mTime taken by epoch 157 is 67.44s[0m
Epoch Started:158


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:158 |Train Loss:0.553557567660873|Valid Loss:0.8550697248032753
[35mTime taken by epoch 158 is 68.61s[0m
Epoch Started:159


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:159 |Train Loss:0.5522343207050014|Valid Loss:0.8552809433734163
[35mTime taken by epoch 159 is 68.58s[0m
Epoch Started:160


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:160 |Train Loss:0.5513575779425132|Valid Loss:0.8586123864701454
[35mTime taken by epoch 160 is 70.34s[0m
Epoch Started:161


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:161 |Train Loss:0.5502225643879658|Valid Loss:0.8514265671689459
[35mTime taken by epoch 161 is 71.28s[0m
Epoch Started:162


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:162 |Train Loss:0.5481572402490152|Valid Loss:0.8505599029520725
[32mLoss Decreased from 0.8508743212578145 to 0.8505599029520725[0m
[35mTime taken by epoch 162 is 70.46s[0m
Epoch Started:163


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:163 |Train Loss:0.5471715988339605|Valid Loss:0.84742575122955
[32mLoss Decreased from 0.8505599029520725 to 0.84742575122955[0m
[35mTime taken by epoch 163 is 68.89s[0m
Epoch Started:164


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:164 |Train Loss:0.5457045529339765|Valid Loss:0.8534497973766733
[35mTime taken by epoch 164 is 69.35s[0m
Epoch Started:165


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:165 |Train Loss:0.5445367307276339|Valid Loss:0.847173236786051
[32mLoss Decreased from 0.84742575122955 to 0.847173236786051[0m
[35mTime taken by epoch 165 is 69.39s[0m
Epoch Started:166


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:166 |Train Loss:0.5436729859661411|Valid Loss:0.8484075652792099
[35mTime taken by epoch 166 is 69.33s[0m
Epoch Started:167


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:167 |Train Loss:0.5425265647269584|Valid Loss:0.8506814507727928
[35mTime taken by epoch 167 is 66.00s[0m
Epoch Started:168


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:168 |Train Loss:0.5416770976942938|Valid Loss:0.8515669437164956
[35mTime taken by epoch 168 is 66.60s[0m
Epoch Started:169


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:169 |Train Loss:0.5396275072484403|Valid Loss:0.8499582130858239
[35mTime taken by epoch 169 is 65.96s[0m
Epoch Started:170


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:170 |Train Loss:0.5370311575966913|Valid Loss:0.8472977508889868
[35mTime taken by epoch 170 is 67.47s[0m
Epoch Started:171


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:171 |Train Loss:0.5371077750180219|Valid Loss:0.8475657485901041
[35mTime taken by epoch 171 is 70.52s[0m
Epoch Started:172


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:172 |Train Loss:0.5360413712424201|Valid Loss:0.8500164785283677
[35mTime taken by epoch 172 is 71.42s[0m
Epoch Started:173


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:173 |Train Loss:0.5346304706625036|Valid Loss:0.8434749588053277
[32mLoss Decreased from 0.847173236786051 to 0.8434749588053277[0m
[35mTime taken by epoch 173 is 72.03s[0m
Epoch Started:174


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:174 |Train Loss:0.5347240174138869|Valid Loss:0.845765712413382
[35mTime taken by epoch 174 is 71.78s[0m
Epoch Started:175


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:175 |Train Loss:0.5323909856177665|Valid Loss:0.849669431118255
[35mTime taken by epoch 175 is 68.87s[0m
Epoch Started:176


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:176 |Train Loss:0.5306915879249573|Valid Loss:0.8479153856318048
[35mTime taken by epoch 176 is 68.69s[0m
Epoch Started:177


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:177 |Train Loss:0.5299669671703029|Valid Loss:0.8457885866469526
[35mTime taken by epoch 177 is 69.11s[0m
Epoch Started:178


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:178 |Train Loss:0.5286217016142768|Valid Loss:0.8456054697645471
[35mTime taken by epoch 178 is 68.28s[0m
Epoch Started:179


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:179 |Train Loss:0.5273162748362566|Valid Loss:0.8469390678913036
[35mTime taken by epoch 179 is 69.10s[0m
Epoch Started:180


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:180 |Train Loss:0.5267089395909696|Valid Loss:0.8496766623030317
[35mTime taken by epoch 180 is 69.80s[0m
Epoch Started:181


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:181 |Train Loss:0.5253752724544422|Valid Loss:0.8445854821103684
[35mTime taken by epoch 181 is 69.04s[0m
Epoch Started:182


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:182 |Train Loss:0.5247772783846468|Valid Loss:0.8453096151351929
[35mTime taken by epoch 182 is 69.57s[0m
Epoch Started:183


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:183 |Train Loss:0.5231319807671212|Valid Loss:0.8435962517210778
[35mTime taken by epoch 183 is 69.29s[0m
Epoch Started:184


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:184 |Train Loss:0.522424891188338|Valid Loss:0.8456559548986718
[35mTime taken by epoch 184 is 69.06s[0m
Epoch Started:185


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:185 |Train Loss:0.5209540547551336|Valid Loss:0.8473672308820359
[35mTime taken by epoch 185 is 68.02s[0m
Epoch Started:186


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:186 |Train Loss:0.5198003343633704|Valid Loss:0.842380016408068
[32mLoss Decreased from 0.8434749588053277 to 0.842380016408068[0m
[35mTime taken by epoch 186 is 63.54s[0m
Epoch Started:187


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:187 |Train Loss:0.5184169076584481|Valid Loss:0.8422280372457301
[32mLoss Decreased from 0.842380016408068 to 0.8422280372457301[0m
[35mTime taken by epoch 187 is 64.27s[0m
Epoch Started:188


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:188 |Train Loss:0.517969135658161|Valid Loss:0.8405938173862214
[32mLoss Decreased from 0.8422280372457301 to 0.8405938173862214[0m
[35mTime taken by epoch 188 is 63.24s[0m
Epoch Started:189


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:189 |Train Loss:0.5160807200380274|Valid Loss:0.839028757937411
[32mLoss Decreased from 0.8405938173862214 to 0.839028757937411[0m
[35mTime taken by epoch 189 is 62.12s[0m
Epoch Started:190


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:190 |Train Loss:0.5153653698998528|Valid Loss:0.8436380180906742
[35mTime taken by epoch 190 is 62.23s[0m
Epoch Started:191


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:191 |Train Loss:0.5142053836100811|Valid Loss:0.8418731473861857
[35mTime taken by epoch 191 is 62.00s[0m
Epoch Started:192


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:192 |Train Loss:0.5130335524275497|Valid Loss:0.8395272188998283
[35mTime taken by epoch 192 is 65.49s[0m
Epoch Started:193


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:193 |Train Loss:0.5120400966824712|Valid Loss:0.8444464523741539
[35mTime taken by epoch 193 is 69.53s[0m
Epoch Started:194


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:194 |Train Loss:0.5109546506727064|Valid Loss:0.8368912605529136
[32mLoss Decreased from 0.839028757937411 to 0.8368912605529136[0m
[35mTime taken by epoch 194 is 69.68s[0m
Epoch Started:195


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:195 |Train Loss:0.509596472495311|Valid Loss:0.8419847564494356
[35mTime taken by epoch 195 is 69.11s[0m
Epoch Started:196


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:196 |Train Loss:0.5087450214334437|Valid Loss:0.8392266364807778
[35mTime taken by epoch 196 is 68.88s[0m
Epoch Started:197


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:197 |Train Loss:0.5069893836975098|Valid Loss:0.8411283581814868
[35mTime taken by epoch 197 is 68.41s[0m
Epoch Started:198


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:198 |Train Loss:0.5067439098615905|Valid Loss:0.8411135825705021
[35mTime taken by epoch 198 is 67.70s[0m
Epoch Started:199


  0%|          | 0/185 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch:199 |Train Loss:0.5054275599685876|Valid Loss:0.8428976865524941
[35mTime taken by epoch 199 is 67.30s[0m
