In [1]:
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold
import dateutil.easter as easter

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
from accelerate import Accelerator
import torch.optim as optim

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
import time

In [7]:
from tqdm.notebook import tqdm

In [8]:
import gc

## Global Variables ###

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
device

device(type='cuda', index=0)

In [11]:
from colorama import Fore, Back, Style
r_ = Fore.RED
b_ = Fore.BLUE
c_ = Fore.CYAN
g_ = Fore.GREEN
y_ = Fore.YELLOW
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [12]:
BASE_DIR = '/sharedHDD/rohit/timeseries_learning/ubiquant/'
DATA_DIR = BASE_DIR+'data/parquet/'
INPUT_DIR = BASE_DIR+'input/'
WEIGHTS_DIR = BASE_DIR + 'weights/'

In [13]:
input_chunk_length = 30
output_chunk_length = 1
# embedding_dim = 56 ## Investment id embedding dim

In [14]:
features = [f'f_{i}' for i in range(300)]

## Load data ##

In [15]:
%%time
train_df = pd.read_parquet(DATA_DIR+'train_low_mem.parquet')

CPU times: user 8.75 s, sys: 13.7 s, total: 22.5 s
Wall time: 4.24 s


In [16]:
investment_ids = train_df.investment_id.unique().tolist()

In [17]:
max(investment_ids)

3773

In [18]:
with open(BASE_DIR+'input/folds.pickle', 'rb') as f:
    folds = pickle.load(f)

#### Utility functions ######

In [19]:
all_features_columns = ['target'] #+ features

In [20]:
def get_feature_array_train(rows):
    target_val = rows.target.values
    
    inp_vec = rows[all_features_columns].values
    
#     future_covariates = rows[['investment_id']+features].values
    
    future_covariates = rows[features].values
    
    return inp_vec,target_val,future_covariates

In [21]:
def sliding_windows(X,Y,FCVs, seq_length,prediction_length):
    x = []
    y = []
    future_covariates = []
    for i in range(len(X)-(prediction_length+seq_length)):
        _x = X[i:(i+seq_length),:].reshape(seq_length,-1)
#         _y = Y[(i+seq_length):(i+prediction_length+seq_length),:]
        _y = Y[i+seq_length]
        _future_covariate = FCVs[i+seq_length]
        x.append(_x)
        y.append(_y)
        future_covariates.append(_future_covariate)

    return x,y,future_covariates

In [22]:
def make_sequences(df,to_exclude_ids):
    x = []
    y = []
    future_covariates = []
    investment_ids = df.investment_id.unique()
    for iid in investment_ids:
        if iid not in to_exclude_ids:
            df_tmp = df[df.investment_id == iid].sort_values('time_id')
            tmp_X,tmp_Y,tmp_fcvs = get_feature_array_train(df_tmp)
            _x,_y,_fcv = sliding_windows(tmp_X,tmp_Y,tmp_fcvs,input_chunk_length,output_chunk_length)
            x.extend(_x)
            y.extend(_y)
            future_covariates.extend(_fcv)
            
#             gc.collect()
        # else:
        #     print('Not including iid:', iid)
    
    return np.float32(np.array(x)),np.float32(np.array(y)),np.float32(np.array(future_covariates))

In [23]:
gc.collect()

40

## Modeling ##

In [24]:
config = {
    'seq_length' : input_chunk_length,
    'num_epochs' : 100,
    'lr' : 0.00026,
    'input_size' : 1,
    'hidden_size' : 3,
    'num_layers' : 1,
    'num_classes' :1, ## This is  output dimension
    'train_shuffle': True,
    'val_shuffle': True,
    'batch_size' : 4096*2,
    'best_model_name' : 'lstm4_hs3',
    'bidirectional' : False,
    'only_last_hidden': True,
    'early_stopping_patience':10,
}

In [25]:
class TSDataset(Dataset):
    
    def __init__(self,x,y,future_covariates):
        """
        Args:
        """
        self.x=x
        self.y=y
        self.future_covariates = future_covariates

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        
        sample = [torch.Tensor(self.x[idx]),torch.Tensor(self.y[idx]),torch.Tensor(self.future_covariates[idx])]
        return sample

#### Model ####

In [26]:
num_epochs = config['num_epochs']
lr = config['lr']
input_size = config['input_size']
hidden_size = config['hidden_size']
num_layers = config['num_layers']
num_classes = config['num_classes']
seq_length = config['seq_length']
bidirectional = config['bidirectional']
only_last_hidden = config['only_last_hidden']
early_stopping_patience = config['early_stopping_patience']

#### Loss function ######

In [27]:
def pearson_loss(x, y):
    xd = x - x.mean()
    yd = y - y.mean()
    nom = (xd * yd).sum()
    denom = ((xd ** 2).sum() * (yd ** 2).sum()).sqrt()
    return 1 - nom / denom

In [28]:
class LstmTsModel(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers,seq_length):
        super(LstmTsModel, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        cat_input_dim: int = 3774
        
#         self.embedding = nn.Embedding(cat_input_dim, embedding_dim)
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True,bidirectional=bidirectional)
        
        if bidirectional:
            m=2
        else:
            m=1
        
        if only_last_hidden:
            input_fc_dim = (hidden_size*m)+(len(features))
        else:
            input_fc_dim = (self.seq_length*hidden_size*m)+(len(features))
        
        print(input_fc_dim)
        
        self.fc = nn.Sequential(nn.Linear(input_fc_dim, 508),
#                                 nn.BatchNorm1d(num_features=508),
                                nn.ReLU(),
                                nn.Dropout(0.42),
                                
                                nn.Linear(508, 405),
#                                 nn.BatchNorm1d(num_features=405),
                                nn.Dropout(0.42),
                                nn.ReLU(),
                                
#                                 nn.Linear(input_fc_dim//16, input_fc_dim//32),
#                                 nn.BatchNorm1d(num_features=input_fc_dim//32),
#                                 # nn.Dropout(0.2),
#                                 nn.ReLU(),
                                
                                
                                nn.Linear(405, self.num_classes)
                                )
    
    def forward(self, x, fcv):
                                # Propagate input through LSTM
        h_out, (h_n, _) = self.lstm(x)
        if only_last_hidden:
            h_out = h_out[:,-1:,:]
        
        h_out = h_out.flatten(start_dim=1)
        
        
#         investment_ids = fcv[:,0].type(torch.LongTensor).to(device)
#         embeddings = self.embedding(investment_ids)
        
        h_out = torch.cat([fcv,h_out], dim=1)
        
        out = self.fc(h_out)
        
        return out

In [29]:
def run(model,train_dl,val_dl,fold):
    def evaluate(model,valid_loader):
        model.eval()
        valid_loss = 0
        rec_loss = 0
        with torch.no_grad():
            for i, inputs in enumerate(tqdm(valid_loader)):
                dataX = inputs[0]
                dataY = inputs[1]
                dataFCV = inputs[2]
                outputs = model(dataX,dataFCV)
                loss = criterion(outputs, dataY)
                valid_loss += loss.item()

        valid_loss /= len(valid_loader)
        return valid_loss
    
    def train_and_evaluate_loop(train_loader,model,optimizer,criterion,epoch,lr_scheduler=None,valid_loader=None, best_loss=99999):
        train_loss = 0
        improvement = False
        for i, inputs in enumerate(tqdm(train_loader)):
            optimizer.zero_grad()
            model.train()
            
            dataX = inputs[0]
            dataY = inputs[1]
            dataFCV = inputs[2]
            outputs = model(dataX,dataFCV)
            loss = criterion(outputs, dataY)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            if lr_scheduler:
                lr_scheduler.step()
        
        train_loss /= len(train_loader)
        if valid_loader:
            valid_loss = evaluate(model,valid_loader) 
            print(f"Epoch:{epoch} |Train Loss:{train_loss}|Valid Loss:{valid_loss}")
            if valid_loss <= best_loss:
                print(f"{g_}Loss Decreased from {best_loss} to {valid_loss}{sr_}")

                best_loss = valid_loss
                torch.save(model.state_dict(), WEIGHTS_DIR+str(fold)+'_'+config['best_model_name'])
                improvement = True
        else:
            print(f"Epoch:{epoch} |Train Loss:{train_loss}")
            
                    
        return best_loss,improvement
    
    accelerator = Accelerator()
    print(f"{accelerator.device} is used")

    
    
    optimizer = optim.Adam(model.parameters(),lr=config['lr'],amsgrad=False)
    criterion = pearson_loss
    
    # lr_scheduler = CosineAnnealingWarmupRestarts(optimizer, **config_lr)
    # lr_scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **config_lr)
    lr_scheduler = None

    model,train_dl,val_dl,optimizer,lr_scheduler,criterion = accelerator.prepare(model,train_dl,val_dl,optimizer,lr_scheduler,criterion)

    best_loss = 9999999
    start_time = time.time()
    current_patience = 0
    for epoch in tqdm(range(config["num_epochs"])):
        print(f"Epoch Started:{epoch}")
        best_loss,improvement = train_and_evaluate_loop(train_dl,model,optimizer,criterion,epoch,lr_scheduler,valid_loader=val_dl, best_loss=best_loss)
        
        if not improvement:
            current_patience+=1
        else:
            current_patience = 0
            
        
        if current_patience == early_stopping_patience:
            print(f'{g_}EARLY STOPPING')
            break
            
            
        
        end_time = time.time()
        print(f"{m_}Time taken by epoch {epoch} is {end_time-start_time:.2f}s{sr_}")
        start_time = end_time
        
    return best_loss, model

In [30]:
gc.collect()

20

In [31]:
# for fold in folds.keys():
for fold in [0,1,2,3,4]:
    print(f'Starting for fold: {fold}{r_}')
    print(f'Preparing training data for fold: {fold}{m_}')
    train_indxs = folds[fold]['train']
    test_indxs = folds[fold]['test']
    test_f_df = train_df[train_df.index.isin(test_indxs)].reset_index(drop=True)
    train_f_df = train_df[train_df.index.isin(train_indxs)].reset_index(drop=True)
    
    print(train_f_df.shape)
    count_df = train_f_df.groupby('investment_id',as_index=False).agg(c = ('time_id', len))
    to_exclude_ids = count_df[count_df.c<input_chunk_length+1].investment_id.unique().tolist()
    
    X_train, Y_train, FCV_train = make_sequences(train_f_df,to_exclude_ids)
    Y_train = Y_train.reshape(-1,1)
    print(X_train.shape,Y_train.shape,FCV_train.shape)
    
    X_val, Y_val, FCV_val = make_sequences(test_f_df,to_exclude_ids)
    Y_val = Y_val.reshape(-1,1)
    print(X_val.shape,Y_val.shape,FCV_train.shape)
    
    model = LstmTsModel(num_classes, input_size, hidden_size, num_layers,seq_length)
    train_dl = DataLoader(TSDataset(X_train, Y_train, FCV_train), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    val_dl = DataLoader(TSDataset(X_val, Y_val, FCV_val), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    
    best_loss, model = run(model,train_dl,val_dl,fold)
    
    gc.collect()
    

Starting for fold: 0[31m
Preparing training data for fold: 0[35m
(531075, 304)
(421258, 30, 1) (421258, 1) (421258, 300)
(412467, 30, 1) (412467, 1) (421258, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8983992475729722|Valid Loss:0.8553852099998325
[32mLoss Decreased from 9999999 to 0.8553852099998325[0m
[35mTime taken by epoch 0 is 22.44s[0m
Epoch Started:1


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8651913003279612|Valid Loss:0.8454979050393198
[32mLoss Decreased from 0.8553852099998325 to 0.8454979050393198[0m
[35mTime taken by epoch 1 is 22.11s[0m
Epoch Started:2


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8505086509081033|Valid Loss:0.8386671706741932
[32mLoss Decreased from 0.8454979050393198 to 0.8386671706741932[0m
[35mTime taken by epoch 2 is 22.15s[0m
Epoch Started:3


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8418720353108186|Valid Loss:0.8323523238593457
[32mLoss Decreased from 0.8386671706741932 to 0.8323523238593457[0m
[35mTime taken by epoch 3 is 22.05s[0m
Epoch Started:4


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.8299570702589475|Valid Loss:0.8276672491840288
[32mLoss Decreased from 0.8323523238593457 to 0.8276672491840288[0m
[35mTime taken by epoch 4 is 22.06s[0m
Epoch Started:5


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.8186358018563344|Valid Loss:0.8248783396739586
[32mLoss Decreased from 0.8276672491840288 to 0.8248783396739586[0m
[35mTime taken by epoch 5 is 21.93s[0m
Epoch Started:6


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.806529761506961|Valid Loss:0.8179211487957075
[32mLoss Decreased from 0.8248783396739586 to 0.8179211487957075[0m
[35mTime taken by epoch 6 is 21.81s[0m
Epoch Started:7


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7985265518610294|Valid Loss:0.8140302136832592
[32mLoss Decreased from 0.8179211487957075 to 0.8140302136832592[0m
[35mTime taken by epoch 7 is 22.01s[0m
Epoch Started:8


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7885814870779331|Valid Loss:0.8100909041423424
[32mLoss Decreased from 0.8140302136832592 to 0.8100909041423424[0m
[35mTime taken by epoch 8 is 21.74s[0m
Epoch Started:9


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7790955305099487|Valid Loss:0.8092805415976281
[32mLoss Decreased from 0.8100909041423424 to 0.8092805415976281[0m
[35mTime taken by epoch 9 is 21.76s[0m
Epoch Started:10


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7730253040790558|Valid Loss:0.8080050349235535
[32mLoss Decreased from 0.8092805415976281 to 0.8080050349235535[0m
[35mTime taken by epoch 10 is 22.07s[0m
Epoch Started:11


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.766767966059538|Valid Loss:0.8074214633773354
[32mLoss Decreased from 0.8080050349235535 to 0.8074214633773354[0m
[35mTime taken by epoch 11 is 21.77s[0m
Epoch Started:12


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7616747927207214|Valid Loss:0.8056810124247682
[32mLoss Decreased from 0.8074214633773354 to 0.8056810124247682[0m
[35mTime taken by epoch 12 is 21.74s[0m
Epoch Started:13


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7550415866650068|Valid Loss:0.8082429556285634
[35mTime taken by epoch 13 is 21.94s[0m
Epoch Started:14


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7514005223145852|Valid Loss:0.8084317831432118
[35mTime taken by epoch 14 is 21.93s[0m
Epoch Started:15


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7443835139274597|Valid Loss:0.809968630472819
[35mTime taken by epoch 15 is 22.06s[0m
Epoch Started:16


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.739173826116782|Valid Loss:0.8113470895617616
[35mTime taken by epoch 16 is 21.85s[0m
Epoch Started:17


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7334509434608313|Valid Loss:0.8131933749890795
[35mTime taken by epoch 17 is 21.90s[0m
Epoch Started:18


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7294075328570145|Valid Loss:0.8145434178558051
[35mTime taken by epoch 18 is 21.96s[0m
Epoch Started:19


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7246920363261149|Valid Loss:0.8128599664744209
[35mTime taken by epoch 19 is 21.93s[0m
Epoch Started:20


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7179762033315805|Valid Loss:0.81396829848196
[35mTime taken by epoch 20 is 21.71s[0m
Epoch Started:21


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.7132331614310925|Valid Loss:0.815154702055688
[35mTime taken by epoch 21 is 21.97s[0m
Epoch Started:22


  0%|          | 0/52 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.7083786439437133|Valid Loss:0.8179817959374073
[32mEARLY STOPPING
Starting for fold: 1[31m
Preparing training data for fold: 1[35m
(1053141, 304)
(942773, 30, 1) (942773, 1) (942773, 300)
(412467, 30, 1) (412467, 1) (942773, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8752590392170281|Valid Loss:0.853827018363803
[32mLoss Decreased from 9999999 to 0.853827018363803[0m
[35mTime taken by epoch 0 is 37.00s[0m
Epoch Started:1


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8444832378420336|Valid Loss:0.8438333798857296
[32mLoss Decreased from 0.853827018363803 to 0.8438333798857296[0m
[35mTime taken by epoch 1 is 36.90s[0m
Epoch Started:2


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8285703088702827|Valid Loss:0.8330538623473224
[32mLoss Decreased from 0.8438333798857296 to 0.8330538623473224[0m
[35mTime taken by epoch 2 is 37.00s[0m
Epoch Started:3


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8125245293666576|Valid Loss:0.8219172709128436
[32mLoss Decreased from 0.8330538623473224 to 0.8219172709128436[0m
[35mTime taken by epoch 3 is 37.12s[0m
Epoch Started:4


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7955429209717388|Valid Loss:0.8126342436846565
[32mLoss Decreased from 0.8219172709128436 to 0.8126342436846565[0m
[35mTime taken by epoch 4 is 36.96s[0m
Epoch Started:5


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.7848147734485823|Valid Loss:0.8092269967584049
[32mLoss Decreased from 0.8126342436846565 to 0.8092269967584049[0m
[35mTime taken by epoch 5 is 36.72s[0m
Epoch Started:6


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7760766667538676|Valid Loss:0.808481706123726
[32mLoss Decreased from 0.8092269967584049 to 0.808481706123726[0m
[35mTime taken by epoch 6 is 37.08s[0m
Epoch Started:7


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7713933386679354|Valid Loss:0.8066160047755522
[32mLoss Decreased from 0.808481706123726 to 0.8066160047755522[0m
[35mTime taken by epoch 7 is 36.92s[0m
Epoch Started:8


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7655148495887888|Valid Loss:0.8058474215806699
[32mLoss Decreased from 0.8066160047755522 to 0.8058474215806699[0m
[35mTime taken by epoch 8 is 37.04s[0m
Epoch Started:9


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7605035823994669|Valid Loss:0.8063260749274609
[35mTime taken by epoch 9 is 37.01s[0m
Epoch Started:10


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7562360475803244|Valid Loss:0.8058710974805495
[35mTime taken by epoch 10 is 37.09s[0m
Epoch Started:11


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7520960949618241|Valid Loss:0.8050298188246933
[32mLoss Decreased from 0.8058474215806699 to 0.8050298188246933[0m
[35mTime taken by epoch 11 is 36.91s[0m
Epoch Started:12


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7458380070225946|Valid Loss:0.8057143653140348
[35mTime taken by epoch 12 is 36.81s[0m
Epoch Started:13


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7432597125398701|Valid Loss:0.8042144600082847
[32mLoss Decreased from 0.8050298188246933 to 0.8042144600082847[0m
[35mTime taken by epoch 13 is 36.98s[0m
Epoch Started:14


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7384006201193251|Valid Loss:0.8055563241827721
[35mTime taken by epoch 14 is 36.79s[0m
Epoch Started:15


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7335752762597183|Valid Loss:0.8078007242258858
[35mTime taken by epoch 15 is 36.86s[0m
Epoch Started:16


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7298625640828034|Valid Loss:0.8079912323577731
[35mTime taken by epoch 16 is 36.51s[0m
Epoch Started:17


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.72689144570252|Valid Loss:0.8048072620934131
[35mTime taken by epoch 17 is 36.84s[0m
Epoch Started:18


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7220967889859758|Valid Loss:0.8045147914512485
[35mTime taken by epoch 18 is 36.70s[0m
Epoch Started:19


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.7175397199803385|Valid Loss:0.8081363053882823
[35mTime taken by epoch 19 is 36.59s[0m
Epoch Started:20


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7119318904547856|Valid Loss:0.8054556542751836
[35mTime taken by epoch 20 is 36.77s[0m
Epoch Started:21


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.7072725861236967|Valid Loss:0.8071745096468458
[35mTime taken by epoch 21 is 37.04s[0m
Epoch Started:22


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.7049439950235958|Valid Loss:0.8077345770948073
[35mTime taken by epoch 22 is 36.78s[0m
Epoch Started:23


  0%|          | 0/116 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.7017257737702337|Valid Loss:0.8079780012953515
[32mEARLY STOPPING
Starting for fold: 2[31m
Preparing training data for fold: 2[35m
(1575207, 304)
(1464649, 30, 1) (1464649, 1) (1464649, 300)
(412467, 30, 1) (412467, 1) (1464649, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8627517602963155|Valid Loss:0.8615264880890939
[32mLoss Decreased from 9999999 to 0.8615264880890939[0m
[35mTime taken by epoch 0 is 52.16s[0m
Epoch Started:1


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8207955213898387|Valid Loss:0.8320486755932078
[32mLoss Decreased from 0.8615264880890939 to 0.8320486755932078[0m
[35mTime taken by epoch 1 is 52.23s[0m
Epoch Started:2


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.801622262547136|Valid Loss:0.8223411265541526
[32mLoss Decreased from 0.8320486755932078 to 0.8223411265541526[0m
[35mTime taken by epoch 2 is 52.39s[0m
Epoch Started:3


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.792887937756224|Valid Loss:0.817145379150615
[32mLoss Decreased from 0.8223411265541526 to 0.817145379150615[0m
[35mTime taken by epoch 3 is 52.06s[0m
Epoch Started:4


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7859962452723327|Valid Loss:0.8152390613275415
[32mLoss Decreased from 0.817145379150615 to 0.8152390613275415[0m
[35mTime taken by epoch 4 is 52.38s[0m
Epoch Started:5


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.780625197474517|Valid Loss:0.814043353585636
[32mLoss Decreased from 0.8152390613275415 to 0.814043353585636[0m
[35mTime taken by epoch 5 is 52.40s[0m
Epoch Started:6


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7759738714335351|Valid Loss:0.8147776115174387
[35mTime taken by epoch 6 is 52.39s[0m
Epoch Started:7


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7710050514290453|Valid Loss:0.8118034110349768
[32mLoss Decreased from 0.814043353585636 to 0.8118034110349768[0m
[35mTime taken by epoch 7 is 52.19s[0m
Epoch Started:8


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7667805982035631|Valid Loss:0.8122377220322105
[35mTime taken by epoch 8 is 52.70s[0m
Epoch Started:9


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7624056815435101|Valid Loss:0.8128079257759393
[35mTime taken by epoch 9 is 52.62s[0m
Epoch Started:10


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7578870134646666|Valid Loss:0.8140165291580499
[35mTime taken by epoch 10 is 52.56s[0m
Epoch Started:11


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7527045417764333|Valid Loss:0.8130856822518742
[35mTime taken by epoch 11 is 52.63s[0m
Epoch Started:12


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.749658045156042|Valid Loss:0.8157249747538099
[35mTime taken by epoch 12 is 52.54s[0m
Epoch Started:13


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7453346648695749|Valid Loss:0.8153094088329988
[35mTime taken by epoch 13 is 52.48s[0m
Epoch Started:14


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.740533591315733|Valid Loss:0.8146259293836706
[35mTime taken by epoch 14 is 52.43s[0m
Epoch Started:15


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.736288974405001|Valid Loss:0.8180324306675032
[35mTime taken by epoch 15 is 52.50s[0m
Epoch Started:16


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7334034935722138|Valid Loss:0.8157855482662425
[35mTime taken by epoch 16 is 52.48s[0m
Epoch Started:17


  0%|          | 0/179 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7278907635358459|Valid Loss:0.8178824630438113
[32mEARLY STOPPING
Starting for fold: 3[31m
Preparing training data for fold: 3[35m
(2097273, 304)
(1986639, 30, 1) (1986639, 1) (1986639, 300)
(412467, 30, 1) (412467, 1) (1986639, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8667199621475282|Valid Loss:0.8709801680901471
[32mLoss Decreased from 9999999 to 0.8709801680901471[0m
[35mTime taken by epoch 0 is 67.95s[0m
Epoch Started:1


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8278144826123743|Valid Loss:0.8332980018036038
[32mLoss Decreased from 0.8709801680901471 to 0.8332980018036038[0m
[35mTime taken by epoch 1 is 68.19s[0m
Epoch Started:2


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8072190115481247|Valid Loss:0.8162734613699072
[32mLoss Decreased from 0.8332980018036038 to 0.8162734613699072[0m
[35mTime taken by epoch 2 is 68.37s[0m
Epoch Started:3


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.7971029649546117|Valid Loss:0.8129206580274245
[32mLoss Decreased from 0.8162734613699072 to 0.8129206580274245[0m
[35mTime taken by epoch 3 is 68.12s[0m
Epoch Started:4


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7901819951740312|Valid Loss:0.808131311454025
[32mLoss Decreased from 0.8129206580274245 to 0.808131311454025[0m
[35mTime taken by epoch 4 is 68.16s[0m
Epoch Started:5


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.7860289942089913|Valid Loss:0.8061641267701691
[32mLoss Decreased from 0.808131311454025 to 0.8061641267701691[0m
[35mTime taken by epoch 5 is 68.43s[0m
Epoch Started:6


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7811097365347937|Valid Loss:0.8059844421405419
[32mLoss Decreased from 0.8061641267701691 to 0.8059844421405419[0m
[35mTime taken by epoch 6 is 68.27s[0m
Epoch Started:7


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7764487232200403|Valid Loss:0.8042598097932105
[32mLoss Decreased from 0.8059844421405419 to 0.8042598097932105[0m
[35mTime taken by epoch 7 is 68.33s[0m
Epoch Started:8


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7723257114857803|Valid Loss:0.8034945177096947
[32mLoss Decreased from 0.8042598097932105 to 0.8034945177096947[0m
[35mTime taken by epoch 8 is 68.25s[0m
Epoch Started:9


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7685386697941847|Valid Loss:0.8059269762506672
[35mTime taken by epoch 9 is 68.65s[0m
Epoch Started:10


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.76466541550287|Valid Loss:0.8050698719772638
[35mTime taken by epoch 10 is 68.28s[0m
Epoch Started:11


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7609338630374076|Valid Loss:0.8040018946516747
[35mTime taken by epoch 11 is 68.40s[0m
Epoch Started:12


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7575175462926856|Valid Loss:0.8036174119687548
[35mTime taken by epoch 12 is 67.56s[0m
Epoch Started:13


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7536070756460904|Valid Loss:0.8038423400299222
[35mTime taken by epoch 13 is 67.95s[0m
Epoch Started:14


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7499551895714591|Valid Loss:0.8059200457498139
[35mTime taken by epoch 14 is 68.42s[0m
Epoch Started:15


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7462569043469527|Valid Loss:0.8061276534024406
[35mTime taken by epoch 15 is 68.37s[0m
Epoch Started:16


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7420415289608049|Valid Loss:0.8059400331740286
[35mTime taken by epoch 16 is 68.03s[0m
Epoch Started:17


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7390244168999754|Valid Loss:0.8053269140860614
[35mTime taken by epoch 17 is 68.63s[0m
Epoch Started:18


  0%|          | 0/243 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7355219868966091|Valid Loss:0.8070204760514054
[32mEARLY STOPPING
Starting for fold: 4[31m
Preparing training data for fold: 4[35m
(2619339, 304)
(2508656, 30, 1) (2508656, 1) (2508656, 300)
(412467, 30, 1) (412467, 1) (2508656, 300)
303
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8703743912498026|Valid Loss:0.8831626013213513
[32mLoss Decreased from 9999999 to 0.8831626013213513[0m
[35mTime taken by epoch 0 is 84.07s[0m
Epoch Started:1


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8306314168613198|Valid Loss:0.8533123100505156
[32mLoss Decreased from 0.8831626013213513 to 0.8533123100505156[0m
[35mTime taken by epoch 1 is 84.50s[0m
Epoch Started:2


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8082264617133995|Valid Loss:0.8369352022806803
[32mLoss Decreased from 0.8533123100505156 to 0.8369352022806803[0m
[35mTime taken by epoch 2 is 84.74s[0m
Epoch Started:3


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.7979661449547311|Valid Loss:0.8304918665511936
[32mLoss Decreased from 0.8369352022806803 to 0.8304918665511936[0m
[35mTime taken by epoch 3 is 84.12s[0m
Epoch Started:4


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.7919368844855492|Valid Loss:0.8248240421800053
[32mLoss Decreased from 0.8304918665511936 to 0.8248240421800053[0m
[35mTime taken by epoch 4 is 84.07s[0m
Epoch Started:5


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.787488275902279|Valid Loss:0.8261440396308899
[35mTime taken by epoch 5 is 84.47s[0m
Epoch Started:6


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.7831351485236848|Valid Loss:0.8172563139130088
[32mLoss Decreased from 0.8248240421800053 to 0.8172563139130088[0m
[35mTime taken by epoch 6 is 83.94s[0m
Epoch Started:7


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.7789031754487501|Valid Loss:0.8195197734178281
[35mTime taken by epoch 7 is 84.23s[0m
Epoch Started:8


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.7749609232725461|Valid Loss:0.8222519065819535
[35mTime taken by epoch 8 is 84.64s[0m
Epoch Started:9


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.7711677916275174|Valid Loss:0.8155858224513484
[32mLoss Decreased from 0.8172563139130088 to 0.8155858224513484[0m
[35mTime taken by epoch 9 is 83.65s[0m
Epoch Started:10


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.7678794200723257|Valid Loss:0.8165876573207331
[35mTime taken by epoch 10 is 84.70s[0m
Epoch Started:11


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.7636410489144465|Valid Loss:0.8209729837436303
[35mTime taken by epoch 11 is 84.43s[0m
Epoch Started:12


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.7600884369607858|Valid Loss:0.8161739438187843
[35mTime taken by epoch 12 is 84.35s[0m
Epoch Started:13


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.7573353044373204|Valid Loss:0.8137404521306356
[32mLoss Decreased from 0.8155858224513484 to 0.8137404521306356[0m
[35mTime taken by epoch 13 is 84.42s[0m
Epoch Started:14


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.7541708190976991|Valid Loss:0.8171653654061112
[35mTime taken by epoch 14 is 84.56s[0m
Epoch Started:15


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.7502303694280817|Valid Loss:0.8186377041480121
[35mTime taken by epoch 15 is 84.47s[0m
Epoch Started:16


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.7474286160173944|Valid Loss:0.8107156356175741
[32mLoss Decreased from 0.8137404521306356 to 0.8107156356175741[0m
[35mTime taken by epoch 16 is 84.25s[0m
Epoch Started:17


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.7444139864623353|Valid Loss:0.8156665014285668
[35mTime taken by epoch 17 is 84.80s[0m
Epoch Started:18


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.7410172519155744|Valid Loss:0.818084273852554
[35mTime taken by epoch 18 is 84.38s[0m
Epoch Started:19


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.73805207829522|Valid Loss:0.8142529819525924
[35mTime taken by epoch 19 is 84.70s[0m
Epoch Started:20


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.7356087833351732|Valid Loss:0.8131591619229784
[35mTime taken by epoch 20 is 84.88s[0m
Epoch Started:21


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.7326360238103214|Valid Loss:0.8130323968681634
[35mTime taken by epoch 21 is 85.08s[0m
Epoch Started:22


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.7294523110218856|Valid Loss:0.8154033889957503
[35mTime taken by epoch 22 is 84.69s[0m
Epoch Started:23


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.7275068039614525|Valid Loss:0.8179318004963445
[35mTime taken by epoch 23 is 84.61s[0m
Epoch Started:24


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.725101113319397|Valid Loss:0.8204180084022821
[35mTime taken by epoch 24 is 84.31s[0m
Epoch Started:25


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:25 |Train Loss:0.7223777204072437|Valid Loss:0.8128732758409837
[35mTime taken by epoch 25 is 84.52s[0m
Epoch Started:26


  0%|          | 0/307 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

Epoch:26 |Train Loss:0.7197921870197458|Valid Loss:0.815745475245457
[32mEARLY STOPPING
