In [1]:
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold
import dateutil.easter as easter

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
from accelerate import Accelerator
import torch.optim as optim

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
import time

In [7]:
from tqdm.notebook import tqdm

In [8]:
import gc

## Global Variables ###

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
device

device(type='cuda', index=0)

In [11]:
from colorama import Fore, Back, Style
r_ = Fore.RED
b_ = Fore.BLUE
c_ = Fore.CYAN
g_ = Fore.GREEN
y_ = Fore.YELLOW
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [12]:
BASE_DIR = '/sharedHDD/rohit/timeseries_learning/ubiquant/'
DATA_DIR = BASE_DIR+'data/parquet/'
INPUT_DIR = BASE_DIR+'input/'
WEIGHTS_DIR = BASE_DIR + 'weights/'

In [13]:
input_chunk_length = 5
output_chunk_length = 1
# embedding_dim = 56 ## Investment id embedding dim

In [14]:
features = [f'f_{i}' for i in range(300)]

## Load data ##

In [15]:
%%time
train_df = pd.read_parquet(DATA_DIR+'train_low_mem.parquet')

CPU times: user 8.62 s, sys: 14.4 s, total: 23 s
Wall time: 4.19 s


In [16]:
investment_ids = train_df.investment_id.unique().tolist()

In [17]:
max(investment_ids)

3773

In [18]:
with open(BASE_DIR+'input/folds.pickle', 'rb') as f:
    folds = pickle.load(f)

#### Utility functions ######

In [19]:
all_features_columns = ['target'] #+ features

In [20]:
def get_feature_array_train(rows):
    target_val = rows.target.values
    
    inp_vec = rows[all_features_columns].values
    
#     future_covariates = rows[['investment_id']+features].values
    
    future_covariates = rows[features].values
    
    return inp_vec,target_val,future_covariates

In [21]:
def sliding_windows(X,Y,FCVs, seq_length,prediction_length):
    x = []
    y = []
    future_covariates = []
    for i in range(len(X)-(prediction_length+seq_length)):
        _x = X[i:(i+seq_length),:].reshape(seq_length,-1)
#         _y = Y[(i+seq_length):(i+prediction_length+seq_length),:]
        _y = Y[i+seq_length]
        _future_covariate = FCVs[i+seq_length]
        x.append(_x)
        y.append(_y)
        future_covariates.append(_future_covariate)

    return x,y,future_covariates

In [22]:
def make_sequences(df,to_exclude_ids):
    x = []
    y = []
    future_covariates = []
    investment_ids = df.investment_id.unique()
    for iid in investment_ids:
        if iid not in to_exclude_ids:
            df_tmp = df[df.investment_id == iid].sort_values('time_id')
            tmp_X,tmp_Y,tmp_fcvs = get_feature_array_train(df_tmp)
            _x,_y,_fcv = sliding_windows(tmp_X,tmp_Y,tmp_fcvs,input_chunk_length,output_chunk_length)
            x.extend(_x)
            y.extend(_y)
            future_covariates.extend(_fcv)
            
#             gc.collect()
        # else:
        #     print('Not including iid:', iid)
    
    return np.float32(np.array(x)),np.float32(np.array(y)),np.float32(np.array(future_covariates))

In [23]:
gc.collect()

40

## Modeling ##

In [24]:
config = {
    'seq_length' : input_chunk_length,
    'num_epochs' : 100,
    'lr' : 0.00026,
    'input_size' : 1,
    'hidden_size' : 3,
    'num_layers' : 1,
    'num_classes' :1, ## This is  output dimension
    'train_shuffle': True,
    'val_shuffle': True,
    'batch_size' : 4096*2,
    'best_model_name' : 'lstm5_c5',
    'bidirectional' : False,
    'only_last_hidden': True,
    'early_stopping_patience':10,
}

In [25]:
class TSDataset(Dataset):
    
    def __init__(self,x,y,future_covariates):
        """
        Args:
        """
        self.x=x
        self.y=y
        self.future_covariates = future_covariates

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        
        sample = [torch.Tensor(self.x[idx]),torch.Tensor(self.y[idx]),torch.Tensor(self.future_covariates[idx])]
        return sample

#### Model ####

In [26]:
num_epochs = config['num_epochs']
lr = config['lr']
input_size = config['input_size']
hidden_size = config['hidden_size']
num_layers = config['num_layers']
num_classes = config['num_classes']
seq_length = config['seq_length']
bidirectional = config['bidirectional']
only_last_hidden = config['only_last_hidden']
early_stopping_patience = config['early_stopping_patience']

#### Loss function ######

In [27]:
def pearson_loss(x, y):
    xd = x - x.mean()
    yd = y - y.mean()
    nom = (xd * yd).sum()
    denom = ((xd ** 2).sum() * (yd ** 2).sum()).sqrt()
    return 1 - nom / denom

In [28]:
class LstmTsModel(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers,seq_length):
        super(LstmTsModel, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        cat_input_dim: int = 3774
        
#         self.embedding = nn.Embedding(cat_input_dim, embedding_dim)
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True,bidirectional=bidirectional)
        
        if bidirectional:
            m=2
        else:
            m=1
        
        if only_last_hidden:
            input_fc_dim = (hidden_size*m)+(len(features))
        else:
            input_fc_dim = (self.seq_length*hidden_size*m)+(len(features))
        
        print(input_fc_dim)
        
        self.fc = nn.Sequential(nn.Linear(input_fc_dim, 508),
#                                 nn.BatchNorm1d(num_features=508),
                                nn.ReLU(),
                                nn.Dropout(0.42),
                                
                                nn.Linear(508, 405),
#                                 nn.BatchNorm1d(num_features=405),
                                nn.Dropout(0.42),
                                nn.ReLU(),
                                
#                                 nn.Linear(input_fc_dim//16, input_fc_dim//32),
#                                 nn.BatchNorm1d(num_features=input_fc_dim//32),
#                                 # nn.Dropout(0.2),
#                                 nn.ReLU(),
                                
                                
                                nn.Linear(405, self.num_classes)
                                )
    
    def forward(self, x, fcv):
                                # Propagate input through LSTM
        h_out, (h_n, _) = self.lstm(x)
        if only_last_hidden:
            h_out = h_out[:,-1:,:]
        
        h_out = h_out.flatten(start_dim=1)
        
        
#         investment_ids = fcv[:,0].type(torch.LongTensor).to(device)
#         embeddings = self.embedding(investment_ids)
        
        h_out = torch.cat([fcv,h_out], dim=1)
        
        out = self.fc(h_out)
        
        return out

In [29]:
def run(model,train_dl,val_dl,fold):
    def evaluate(model,valid_loader):
        model.eval()
        valid_loss = 0
        rec_loss = 0
        with torch.no_grad():
            for i, inputs in enumerate(tqdm(valid_loader)):
                dataX = inputs[0]
                dataY = inputs[1]
                dataFCV = inputs[2]
                outputs = model(dataX,dataFCV)
                loss = criterion(outputs, dataY)
                valid_loss += loss.item()

        valid_loss /= len(valid_loader)
        return valid_loss
    
    def train_and_evaluate_loop(train_loader,model,optimizer,criterion,epoch,lr_scheduler=None,valid_loader=None, best_loss=99999):
        train_loss = 0
        improvement = False
        for i, inputs in enumerate(tqdm(train_loader)):
            optimizer.zero_grad()
            model.train()
            
            dataX = inputs[0]
            dataY = inputs[1]
            dataFCV = inputs[2]
            outputs = model(dataX,dataFCV)
            loss = criterion(outputs, dataY)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            if lr_scheduler:
                lr_scheduler.step()
        
        train_loss /= len(train_loader)
        if valid_loader:
            valid_loss = evaluate(model,valid_loader) 
            print(f"Epoch:{epoch} |Train Loss:{train_loss}|Valid Loss:{valid_loss}")
            if valid_loss <= best_loss:
                print(f"{g_}Loss Decreased from {best_loss} to {valid_loss}{sr_}")

                best_loss = valid_loss
                torch.save(model.state_dict(), WEIGHTS_DIR+str(fold)+'_'+config['best_model_name'])
                improvement = True
        else:
            print(f"Epoch:{epoch} |Train Loss:{train_loss}")
            
                    
        return best_loss,improvement
    
    accelerator = Accelerator()
    print(f"{accelerator.device} is used")

    
    
    optimizer = optim.Adam(model.parameters(),lr=config['lr'],amsgrad=False)
    criterion = pearson_loss
    
    # lr_scheduler = CosineAnnealingWarmupRestarts(optimizer, **config_lr)
    # lr_scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **config_lr)
    lr_scheduler = None

    model,train_dl,val_dl,optimizer,lr_scheduler,criterion = accelerator.prepare(model,train_dl,val_dl,optimizer,lr_scheduler,criterion)

    best_loss = 9999999
    start_time = time.time()
    current_patience = 0
    for epoch in tqdm(range(config["num_epochs"])):
        print(f"Epoch Started:{epoch}")
        best_loss,improvement = train_and_evaluate_loop(train_dl,model,optimizer,criterion,epoch,lr_scheduler,valid_loader=val_dl, best_loss=best_loss)
        
        if not improvement:
            current_patience+=1
        else:
            current_patience = 0
            
        
        if current_patience == early_stopping_patience:
            print(f'{g_}EARLY STOPPING')
            break
            
            
        
        end_time = time.time()
        print(f"{m_}Time taken by epoch {epoch} is {end_time-start_time:.2f}s{sr_}")
        start_time = end_time
        
    return best_loss, model

In [30]:
gc.collect()

20

In [31]:
# for fold in folds.keys():
for fold in [0,1,2,3,4]:
    print(f'Starting for fold: {fold}{r_}')
    print(f'Preparing training data for fold: {fold}{m_}')
    train_indxs = folds[fold]['train']
    test_indxs = folds[fold]['test']
    test_f_df = train_df[train_df.index.isin(test_indxs)].reset_index(drop=True)
    train_f_df = train_df[train_df.index.isin(train_indxs)].reset_index(drop=True)
    
    print(train_f_df.shape)
    count_df = train_f_df.groupby('investment_id',as_index=False).agg(c = ('time_id', len))
    to_exclude_ids = count_df[count_df.c<input_chunk_length+1].investment_id.unique().tolist()
    
    X_train, Y_train, FCV_train = make_sequences(train_f_df,to_exclude_ids)
    Y_train = Y_train.reshape(-1,1)
    print(X_train.shape,Y_train.shape,FCV_train.shape)
    
    X_val, Y_val, FCV_val = make_sequences(test_f_df,to_exclude_ids)
    Y_val = Y_val.reshape(-1,1)
    print(X_val.shape,Y_val.shape,FCV_train.shape)
    
    model = LstmTsModel(num_classes, input_size, hidden_size, num_layers,seq_length)
    train_dl = DataLoader(TSDataset(X_train, Y_train, FCV_train), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    val_dl = DataLoader(TSDataset(X_val, Y_val, FCV_val), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    
    best_loss, model = run(model,train_dl,val_dl,fold)
    
    gc.collect()
    

Starting for fold: 0[31m
Preparing training data for fold: 0[35m
(531075, 304)
(509634, 5, 1) (509634, 1) (509634, 300)
(500649, 5, 1) (500649, 1) (509634, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.9044780646051679|Valid Loss:0.8569000901714448
[32mLoss Decreased from 9999999 to 0.8569000901714448[0m
[35mTime taken by epoch 0 is 28.32s[0m
Epoch Started:1


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8630288271676927|Valid Loss:0.8441750810992333
[32mLoss Decreased from 0.8569000901714448 to 0.8441750810992333[0m
[35mTime taken by epoch 1 is 27.20s[0m
Epoch Started:2


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8468307909511384|Valid Loss:0.8393283582502796
[32mLoss Decreased from 0.8441750810992333 to 0.8393283582502796[0m
[35mTime taken by epoch 2 is 27.09s[0m
Epoch Started:3


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8331731453774467|Valid Loss:0.8284602194063125
[32mLoss Decreased from 0.8393283582502796 to 0.8284602194063125[0m
[35mTime taken by epoch 3 is 27.43s[0m
Epoch Started:4


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.8218626294817243|Valid Loss:0.8206066421924099
[32mLoss Decreased from 0.8284602194063125 to 0.8206066421924099[0m
[35mTime taken by epoch 4 is 27.33s[0m
Epoch Started:5


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.8094456347208174|Valid Loss:0.81444665020512
[32mLoss Decreased from 0.8206066421924099 to 0.81444665020512[0m
[35mTime taken by epoch 5 is 27.19s[0m
Epoch Started:6


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.79681624685015|Valid Loss:0.8107086823832604
[32mLoss Decreased from 0.81444665020512 to 0.8107086823832604[0m
[35mTime taken by epoch 6 is 27.19s[0m
Epoch Started:7


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7879509831231738|Valid Loss:0.806405363544341
[32mLoss Decreased from 0.8107086823832604 to 0.806405363544341[0m
[35mTime taken by epoch 7 is 26.85s[0m
Epoch Started:8


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7805596892795865|Valid Loss:0.8057594203179882
[32mLoss Decreased from 0.806405363544341 to 0.8057594203179882[0m
[35mTime taken by epoch 8 is 27.02s[0m
Epoch Started:9


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7735098808530777|Valid Loss:0.8052125015566426
[32mLoss Decreased from 0.8057594203179882 to 0.8052125015566426[0m
[35mTime taken by epoch 9 is 26.68s[0m
Epoch Started:10


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7689474234505306|Valid Loss:0.8045094301623683
[32mLoss Decreased from 0.8052125015566426 to 0.8045094301623683[0m
[35mTime taken by epoch 10 is 27.11s[0m
Epoch Started:11


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7621548242039151|Valid Loss:0.8056033186374172
[35mTime taken by epoch 11 is 26.93s[0m
Epoch Started:12


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7580000029669868|Valid Loss:0.806698759717326
[35mTime taken by epoch 12 is 27.10s[0m
Epoch Started:13


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7507822078371805|Valid Loss:0.8057743137882601
[35mTime taken by epoch 13 is 26.96s[0m
Epoch Started:14


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7461745918743194|Valid Loss:0.808042767547792
[35mTime taken by epoch 14 is 27.00s[0m
Epoch Started:15


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7391179771650405|Valid Loss:0.8109457935056379
[35mTime taken by epoch 15 is 27.22s[0m
Epoch Started:16


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7350791976565406|Valid Loss:0.8064220124675382
[35mTime taken by epoch 16 is 27.24s[0m
Epoch Started:17


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7319919977869306|Valid Loss:0.8093090220805137
[35mTime taken by epoch 17 is 27.09s[0m
Epoch Started:18


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7252210624634273|Valid Loss:0.8094172862268263
[35mTime taken by epoch 18 is 27.11s[0m
Epoch Started:19


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7213148919362871|Valid Loss:0.8103576437119515
[35mTime taken by epoch 19 is 27.13s[0m
Epoch Started:20


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7146887211572557|Valid Loss:0.8131873194248446
[32mEARLY STOPPING
Starting for fold: 1[31m
Preparing training data for fold: 1[35m
(1053141, 304)
(1031688, 5, 1) (1031688, 1) (1031688, 300)
(500649, 5, 1) (500649, 1) (1031688, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8762092978235275|Valid Loss:0.8457221148475524
[32mLoss Decreased from 9999999 to 0.8457221148475524[0m
[35mTime taken by epoch 0 is 42.28s[0m
Epoch Started:1


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8398793756015717|Valid Loss:0.8323416161921716
[32mLoss Decreased from 0.8457221148475524 to 0.8323416161921716[0m
[35mTime taken by epoch 1 is 42.55s[0m
Epoch Started:2


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8191312201439388|Valid Loss:0.8176945419080796
[32mLoss Decreased from 0.8323416161921716 to 0.8176945419080796[0m
[35mTime taken by epoch 2 is 42.68s[0m
Epoch Started:3


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8026476895052289|Valid Loss:0.8141514178245298
[32mLoss Decreased from 0.8176945419080796 to 0.8141514178245298[0m
[35mTime taken by epoch 3 is 42.93s[0m
Epoch Started:4


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7892108025058867|Valid Loss:0.8058529726920589
[32mLoss Decreased from 0.8141514178245298 to 0.8058529726920589[0m
[35mTime taken by epoch 4 is 42.60s[0m
Epoch Started:5


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.781628953559058|Valid Loss:0.8057173817388473
[32mLoss Decreased from 0.8058529726920589 to 0.8057173817388473[0m
[35mTime taken by epoch 5 is 42.81s[0m
Epoch Started:6


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7757044619037992|Valid Loss:0.8051389561545464
[32mLoss Decreased from 0.8057173817388473 to 0.8051389561545464[0m
[35mTime taken by epoch 6 is 42.69s[0m
Epoch Started:7


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7696105714828249|Valid Loss:0.8063773801249843
[35mTime taken by epoch 7 is 42.29s[0m
Epoch Started:8


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7649593107284062|Valid Loss:0.8041353773686194
[32mLoss Decreased from 0.8051389561545464 to 0.8041353773686194[0m
[35mTime taken by epoch 8 is 42.87s[0m
Epoch Started:9


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7602333743420858|Valid Loss:0.8039810638273915
[32mLoss Decreased from 0.8041353773686194 to 0.8039810638273915[0m
[35mTime taken by epoch 9 is 42.63s[0m
Epoch Started:10


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7554138037893507|Valid Loss:0.8067803709737716
[35mTime taken by epoch 10 is 43.05s[0m
Epoch Started:11


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7501972180510325|Valid Loss:0.8042570419849888
[35mTime taken by epoch 11 is 42.42s[0m
Epoch Started:12


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7460313666434515|Valid Loss:0.8045333825772808
[35mTime taken by epoch 12 is 42.72s[0m
Epoch Started:13


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7400064071019491|Valid Loss:0.8043993836449038
[35mTime taken by epoch 13 is 42.97s[0m
Epoch Started:14


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7357404865915813|Valid Loss:0.8041429087038963
[35mTime taken by epoch 14 is 42.41s[0m
Epoch Started:15


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7322277530791268|Valid Loss:0.807183837698352
[35mTime taken by epoch 15 is 42.51s[0m
Epoch Started:16


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7263296993951949|Valid Loss:0.8030804213016264
[32mLoss Decreased from 0.8039810638273915 to 0.8030804213016264[0m
[35mTime taken by epoch 16 is 42.17s[0m
Epoch Started:17


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7221618900223384|Valid Loss:0.8031915291663139
[35mTime taken by epoch 17 is 42.49s[0m
Epoch Started:18


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7181986494669839|Valid Loss:0.8052222305728544
[35mTime taken by epoch 18 is 42.51s[0m
Epoch Started:19


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7124925676792387|Valid Loss:0.8038049282566193
[35mTime taken by epoch 19 is 42.53s[0m
Epoch Started:20


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7080238915625072|Valid Loss:0.8050171732902527
[35mTime taken by epoch 20 is 42.50s[0m
Epoch Started:21


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.7040682416113596|Valid Loss:0.8060601488236459
[35mTime taken by epoch 21 is 42.11s[0m
Epoch Started:22


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.699234390069568|Valid Loss:0.8069481878511368
[35mTime taken by epoch 22 is 43.07s[0m
Epoch Started:23


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.6965243915716807|Valid Loss:0.8043214669150691
[35mTime taken by epoch 23 is 42.46s[0m
Epoch Started:24


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.6920420503805554|Valid Loss:0.8067574231855331
[35mTime taken by epoch 24 is 42.64s[0m
Epoch Started:25


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:25 |Train Loss:0.688704691709034|Valid Loss:0.8070637083822682
[35mTime taken by epoch 25 is 42.32s[0m
Epoch Started:26


  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:26 |Train Loss:0.6831490127813249|Valid Loss:0.8074803054332733
[32mEARLY STOPPING
Starting for fold: 2[31m
Preparing training data for fold: 2[35m
(1575207, 304)
(1553750, 5, 1) (1553750, 1) (1553750, 300)
(500649, 5, 1) (500649, 1) (1553750, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8667545152337928|Valid Loss:0.8660920406541517
[32mLoss Decreased from 9999999 to 0.8660920406541517[0m
[35mTime taken by epoch 0 is 57.77s[0m
Epoch Started:1


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8295331650658657|Valid Loss:0.8417557333746264
[32mLoss Decreased from 0.8660920406541517 to 0.8417557333746264[0m
[35mTime taken by epoch 1 is 58.04s[0m
Epoch Started:2


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8077956315718199|Valid Loss:0.8242979213114707
[32mLoss Decreased from 0.8417557333746264 to 0.8242979213114707[0m
[35mTime taken by epoch 2 is 58.51s[0m
Epoch Started:3


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.7962930246403342|Valid Loss:0.8191345151393644
[32mLoss Decreased from 0.8242979213114707 to 0.8191345151393644[0m
[35mTime taken by epoch 3 is 58.05s[0m
Epoch Started:4


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7877790099696109|Valid Loss:0.8149259715310989
[32mLoss Decreased from 0.8191345151393644 to 0.8149259715310989[0m
[35mTime taken by epoch 4 is 58.54s[0m
Epoch Started:5


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.7819415610087546|Valid Loss:0.8140260692565672
[32mLoss Decreased from 0.8149259715310989 to 0.8140260692565672[0m
[35mTime taken by epoch 5 is 58.99s[0m
Epoch Started:6


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7770701452305443|Valid Loss:0.8149713633521911
[35mTime taken by epoch 6 is 58.58s[0m
Epoch Started:7


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7720174246712734|Valid Loss:0.8153672314459278
[35mTime taken by epoch 7 is 58.50s[0m
Epoch Started:8


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7676139188440223|Valid Loss:0.8133254435754591
[32mLoss Decreased from 0.8140260692565672 to 0.8133254435754591[0m
[35mTime taken by epoch 8 is 58.71s[0m
Epoch Started:9


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7633644285954927|Valid Loss:0.8139461711529763
[35mTime taken by epoch 9 is 58.50s[0m
Epoch Started:10


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.758709612645601|Valid Loss:0.8149158175914518
[35mTime taken by epoch 10 is 58.35s[0m
Epoch Started:11


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7542100216213026|Valid Loss:0.8154555761045025
[35mTime taken by epoch 11 is 58.50s[0m
Epoch Started:12


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7507521648156016|Valid Loss:0.8146136403083801
[35mTime taken by epoch 12 is 58.34s[0m
Epoch Started:13


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7456268630529704|Valid Loss:0.8168782351478454
[35mTime taken by epoch 13 is 58.38s[0m
Epoch Started:14


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7413266291743831|Valid Loss:0.8163474634770425
[35mTime taken by epoch 14 is 58.27s[0m
Epoch Started:15


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7377452028425117|Valid Loss:0.8136202260371177
[35mTime taken by epoch 15 is 58.08s[0m
Epoch Started:16


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7339286603425679|Valid Loss:0.8133487826393496
[35mTime taken by epoch 16 is 58.69s[0m
Epoch Started:17


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.729079741866965|Valid Loss:0.816499843712776
[35mTime taken by epoch 17 is 58.21s[0m
Epoch Started:18


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7260911938391233|Valid Loss:0.8154940220617479
[32mEARLY STOPPING
Starting for fold: 3[31m
Preparing training data for fold: 3[35m
(2097273, 304)
(2075812, 5, 1) (2075812, 1) (2075812, 300)
(500649, 5, 1) (500649, 1) (2075812, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.867911510580168|Valid Loss:0.8682321733044039
[32mLoss Decreased from 9999999 to 0.8682321733044039[0m
[35mTime taken by epoch 0 is 75.00s[0m
Epoch Started:1


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8309033651520886|Valid Loss:0.8340596325935856
[32mLoss Decreased from 0.8682321733044039 to 0.8340596325935856[0m
[35mTime taken by epoch 1 is 74.53s[0m
Epoch Started:2


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8081065430415897|Valid Loss:0.8158050294845335
[32mLoss Decreased from 0.8340596325935856 to 0.8158050294845335[0m
[35mTime taken by epoch 2 is 74.52s[0m
Epoch Started:3


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.7967160900277416|Valid Loss:0.8087735108790859
[32mLoss Decreased from 0.8158050294845335 to 0.8087735108790859[0m
[35mTime taken by epoch 3 is 74.66s[0m
Epoch Started:4


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7906456052318332|Valid Loss:0.8068180757184182
[32mLoss Decreased from 0.8087735108790859 to 0.8068180757184182[0m
[35mTime taken by epoch 4 is 74.73s[0m
Epoch Started:5


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.7856430514590946|Valid Loss:0.8061838774911819
[32mLoss Decreased from 0.8068180757184182 to 0.8061838774911819[0m
[35mTime taken by epoch 5 is 74.92s[0m
Epoch Started:6


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7815291940227268|Valid Loss:0.8047670895053495
[32mLoss Decreased from 0.8061838774911819 to 0.8047670895053495[0m
[35mTime taken by epoch 6 is 74.77s[0m
Epoch Started:7


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7767539465521264|Valid Loss:0.8042545087875859
[32mLoss Decreased from 0.8047670895053495 to 0.8042545087875859[0m
[35mTime taken by epoch 7 is 75.01s[0m
Epoch Started:8


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7737742265847725|Valid Loss:0.805083490187122
[35mTime taken by epoch 8 is 75.18s[0m
Epoch Started:9


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7693843214999972|Valid Loss:0.8044711495599439
[35mTime taken by epoch 9 is 75.46s[0m
Epoch Started:10


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7651875455548444|Valid Loss:0.8048135090258813
[35mTime taken by epoch 10 is 74.79s[0m
Epoch Started:11


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7612206034772978|Valid Loss:0.8025540296108492
[32mLoss Decreased from 0.8042545087875859 to 0.8025540296108492[0m
[35mTime taken by epoch 11 is 75.16s[0m
Epoch Started:12


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.758048563960969|Valid Loss:0.8054048332475847
[35mTime taken by epoch 12 is 75.06s[0m
Epoch Started:13


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7541670346353936|Valid Loss:0.8064378894144489
[35mTime taken by epoch 13 is 75.07s[0m
Epoch Started:14


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7508814881636402|Valid Loss:0.8016183587812609
[32mLoss Decreased from 0.8025540296108492 to 0.8016183587812609[0m
[35mTime taken by epoch 14 is 75.01s[0m
Epoch Started:15


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7470377655479852|Valid Loss:0.8032674914406192
[35mTime taken by epoch 15 is 75.43s[0m
Epoch Started:16


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7436767832970056|Valid Loss:0.8042276395905402
[35mTime taken by epoch 16 is 74.89s[0m
Epoch Started:17


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7401530780191496|Valid Loss:0.8032253490340325
[35mTime taken by epoch 17 is 75.12s[0m
Epoch Started:18


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7366026433434073|Valid Loss:0.8057261724625865
[35mTime taken by epoch 18 is 74.71s[0m
Epoch Started:19


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7335860161330756|Valid Loss:0.8068615303885552
[35mTime taken by epoch 19 is 74.37s[0m
Epoch Started:20


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7302297555555509|Valid Loss:0.8069209206488824
[35mTime taken by epoch 20 is 74.97s[0m
Epoch Started:21


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.726511660523302|Valid Loss:0.8075354926047786
[35mTime taken by epoch 21 is 74.65s[0m
Epoch Started:22


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.7244765347852482|Valid Loss:0.8054917695060853
[35mTime taken by epoch 22 is 74.95s[0m
Epoch Started:23


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.7212457506675419|Valid Loss:0.8083203233057453
[35mTime taken by epoch 23 is 74.69s[0m
Epoch Started:24


  0%|          | 0/254 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.7175910036394916|Valid Loss:0.8079257376732365
[32mEARLY STOPPING
Starting for fold: 4[31m
Preparing training data for fold: 4[35m
(2619339, 304)
(2597877, 5, 1) (2597877, 1) (2597877, 300)
(500649, 5, 1) (500649, 1) (2597877, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8675714410700888|Valid Loss:0.8743459092032525
[32mLoss Decreased from 9999999 to 0.8743459092032525[0m
[35mTime taken by epoch 0 is 91.54s[0m
Epoch Started:1


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8250535593467688|Valid Loss:0.841960088860604
[32mLoss Decreased from 0.8743459092032525 to 0.841960088860604[0m
[35mTime taken by epoch 1 is 92.87s[0m
Epoch Started:2


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8053526921467211|Valid Loss:0.827850591751837
[32mLoss Decreased from 0.841960088860604 to 0.827850591751837[0m
[35mTime taken by epoch 2 is 93.26s[0m
Epoch Started:3


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.7969198703016125|Valid Loss:0.8276733881042849
[32mLoss Decreased from 0.827850591751837 to 0.8276733881042849[0m
[35mTime taken by epoch 3 is 92.56s[0m
Epoch Started:4


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.791458565109181|Valid Loss:0.8228130215598691
[32mLoss Decreased from 0.8276733881042849 to 0.8228130215598691[0m
[35mTime taken by epoch 4 is 92.94s[0m
Epoch Started:5


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.7865731013271043|Valid Loss:0.8238671652732357
[35mTime taken by epoch 5 is 92.67s[0m
Epoch Started:6


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7823688534070861|Valid Loss:0.8172267752308999
[32mLoss Decreased from 0.8228130215598691 to 0.8172267752308999[0m
[35mTime taken by epoch 6 is 93.52s[0m
Epoch Started:7


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7781898353084828|Valid Loss:0.8212334850142079
[35mTime taken by epoch 7 is 93.14s[0m
Epoch Started:8


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.774609608095397|Valid Loss:0.8201114016194497
[35mTime taken by epoch 8 is 94.10s[0m
Epoch Started:9


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7703744602278344|Valid Loss:0.8149713739272086
[32mLoss Decreased from 0.8172267752308999 to 0.8149713739272086[0m
[35mTime taken by epoch 9 is 94.75s[0m
Epoch Started:10


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7671701611962708|Valid Loss:0.814798754069113
[32mLoss Decreased from 0.8149713739272086 to 0.814798754069113[0m
[35mTime taken by epoch 10 is 94.85s[0m
Epoch Started:11


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7634840065953117|Valid Loss:0.8154830557684745
[35mTime taken by epoch 11 is 93.16s[0m
Epoch Started:12


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7598928777301837|Valid Loss:0.8183086841337143
[35mTime taken by epoch 12 is 93.29s[0m
Epoch Started:13


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7563335363220118|Valid Loss:0.8138742100807929
[32mLoss Decreased from 0.814798754069113 to 0.8138742100807929[0m
[35mTime taken by epoch 13 is 92.93s[0m
Epoch Started:14


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7534123244900374|Valid Loss:0.8134637234672424
[32mLoss Decreased from 0.8138742100807929 to 0.8134637234672424[0m
[35mTime taken by epoch 14 is 93.84s[0m
Epoch Started:15


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7496130831961362|Valid Loss:0.8115113915935639
[32mLoss Decreased from 0.8134637234672424 to 0.8115113915935639[0m
[35mTime taken by epoch 15 is 94.66s[0m
Epoch Started:16


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7467621203113652|Valid Loss:0.8142717990183062
[35mTime taken by epoch 16 is 93.49s[0m
Epoch Started:17


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7432409429325247|Valid Loss:0.8144431171878692
[35mTime taken by epoch 17 is 92.50s[0m
Epoch Started:18


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7407029837182483|Valid Loss:0.8138528408542756
[35mTime taken by epoch 18 is 93.40s[0m
Epoch Started:19


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7377536304341922|Valid Loss:0.8177803389487728
[35mTime taken by epoch 19 is 92.46s[0m
Epoch Started:20


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7351255004510939|Valid Loss:0.8186864035744821
[35mTime taken by epoch 20 is 92.29s[0m
Epoch Started:21


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.7321634007699834|Valid Loss:0.8144014314297707
[35mTime taken by epoch 21 is 92.40s[0m
Epoch Started:22


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.7293182782032205|Valid Loss:0.8162804140198615
[35mTime taken by epoch 22 is 93.38s[0m
Epoch Started:23


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.7265219778384803|Valid Loss:0.8158347952750421
[35mTime taken by epoch 23 is 92.81s[0m
Epoch Started:24


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.7248704500918118|Valid Loss:0.8189899286916179
[35mTime taken by epoch 24 is 93.71s[0m
Epoch Started:25


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

Epoch:25 |Train Loss:0.7223768749701902|Valid Loss:0.818321761585051
[32mEARLY STOPPING
