In [1]:
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GroupKFold
import dateutil.easter as easter

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [3]:
from torch.utils.data import Dataset, DataLoader

In [4]:
from accelerate import Accelerator
import torch.optim as optim

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
import time

In [7]:
from tqdm.notebook import tqdm

In [8]:
import gc

## Global Variables ###

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
device

device(type='cuda', index=0)

In [11]:
from colorama import Fore, Back, Style
r_ = Fore.RED
b_ = Fore.BLUE
c_ = Fore.CYAN
g_ = Fore.GREEN
y_ = Fore.YELLOW
m_ = Fore.MAGENTA
sr_ = Style.RESET_ALL

In [12]:
BASE_DIR = '/sharedHDD/rohit/timeseries_learning/ubiquant/'
DATA_DIR = BASE_DIR+'data/parquet/'
INPUT_DIR = BASE_DIR+'input/'
WEIGHTS_DIR = BASE_DIR + 'weights/'

In [13]:
features = [f'f_{i}' for i in range(300)]

## Load data ##

In [14]:
%%time
train_df = pd.read_parquet(DATA_DIR+'train_low_mem.parquet')

CPU times: user 8.71 s, sys: 14.5 s, total: 23.2 s
Wall time: 4.13 s


In [15]:
investment_ids = train_df.investment_id.unique().tolist()

In [16]:
max(investment_ids)

3773

In [17]:
with open(BASE_DIR+'input/folds.pickle', 'rb') as f:
    folds = pickle.load(f)

#### Utility functions ######

In [18]:
gc.collect()

80

## Modeling ##

In [19]:
config = {
    'num_epochs' : 100,
    'lr' : 0.00026,
    'input_size' : 300,
    'num_classes' :1, ## This is  output dimension
    'train_shuffle': True,
    'val_shuffle': True,
    'batch_size' : 4096*2,
    'best_model_name' : 'baseline_mse',
    'early_stopping_patience':10,
}

In [20]:
class TSDataset(Dataset):
    
    def __init__(self,x,y):
        """
        Args:
        """
        self.x=x
        self.y=y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        
        sample = [torch.Tensor(self.x[idx]),torch.Tensor(self.y[idx])]
        return sample

#### Model ####

In [21]:
num_epochs = config['num_epochs']
lr = config['lr']
input_size = config['input_size']
num_classes = config['num_classes']
early_stopping_patience = config['early_stopping_patience']

#### Loss function ######

In [22]:
def pearson_loss(x, y):
    xd = x - x.mean()
    yd = y - y.mean()
    nom = (xd * yd).sum()
    denom = ((xd ** 2).sum() * (yd ** 2).sum()).sqrt()
    return 1 - nom / denom

In [23]:
class BaselineModel(nn.Module):

    def __init__(self, num_classes, input_size):
        super(BaselineModel, self).__init__()
        
        self.num_classes = num_classes
        self.input_size = input_size
        
        
        print(self.input_size)
        
        self.fc = nn.Sequential(nn.Linear(self.input_size, 508),
#                                 nn.BatchNorm1d(num_features=508),
                                nn.ReLU(),
                                nn.Dropout(0.42),
                                
                                nn.Linear(508, 405),
#                                 nn.BatchNorm1d(num_features=405),
                                nn.Dropout(0.42),
                                nn.ReLU(),
                                
#                                 nn.Linear(input_fc_dim//16, input_fc_dim//32),
#                                 nn.BatchNorm1d(num_features=input_fc_dim//32),
#                                 # nn.Dropout(0.2),
#                                 nn.ReLU(),
                                
                                
                                nn.Linear(405, self.num_classes)
                                )
    
    def forward(self, x):
        out = self.fc(x)
        
        return out

In [24]:
def run(model,train_dl,val_dl,fold):
    def evaluate(model,valid_loader):
        model.eval()
        valid_loss = 0
        rec_loss = 0
        with torch.no_grad():
            for i, inputs in enumerate(tqdm(valid_loader)):
                dataX = inputs[0]
                dataY = inputs[1]
                
                outputs = model(dataX)
                loss = criterion(outputs, dataY)
                valid_loss += loss.item()

        valid_loss /= len(valid_loader)
        return valid_loss
    
    def train_and_evaluate_loop(train_loader,model,optimizer,criterion,epoch,lr_scheduler=None,valid_loader=None, best_loss=99999):
        train_loss = 0
        improvement = False
        for i, inputs in enumerate(tqdm(train_loader)):
            optimizer.zero_grad()
            model.train()
            
            dataX = inputs[0]
            dataY = inputs[1]
    
            outputs = model(dataX)
            loss = criterion(outputs, dataY)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
            if lr_scheduler:
                lr_scheduler.step()
        
        train_loss /= len(train_loader)
        if valid_loader:
            valid_loss = evaluate(model,valid_loader) 
            print(f"Epoch:{epoch} |Train Loss:{train_loss}|Valid Loss:{valid_loss}")
            if valid_loss <= best_loss:
                print(f"{g_}Loss Decreased from {best_loss} to {valid_loss}{sr_}")

                best_loss = valid_loss
                torch.save(model.state_dict(), WEIGHTS_DIR+str(fold)+'_'+config['best_model_name'])
                improvement = True
        else:
            print(f"Epoch:{epoch} |Train Loss:{train_loss}")
            
                    
        return best_loss,improvement
    
    accelerator = Accelerator()
    print(f"{accelerator.device} is used")

    
    
    optimizer = optim.Adam(model.parameters(),lr=config['lr'],amsgrad=False)
#     criterion = pearson_loss
    criterion = torch.nn.MSELoss()
    
    # lr_scheduler = CosineAnnealingWarmupRestarts(optimizer, **config_lr)
    # lr_scheduler =  torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **config_lr)
    lr_scheduler = None

    model,train_dl,val_dl,optimizer,lr_scheduler,criterion = accelerator.prepare(model,train_dl,val_dl,optimizer,lr_scheduler,criterion)

    best_loss = 9999999
    start_time = time.time()
    current_patience = 0
    for epoch in tqdm(range(config["num_epochs"])):
        print(f"Epoch Started:{epoch}")
        best_loss,improvement = train_and_evaluate_loop(train_dl,model,optimizer,criterion,epoch,lr_scheduler,valid_loader=val_dl, best_loss=best_loss)
        
        if not improvement:
            current_patience+=1
        else:
            current_patience = 0
            
        
        if current_patience == early_stopping_patience:
            print(f'{g_}EARLY STOPPING')
            break
            
            
        
        end_time = time.time()
        print(f"{m_}Time taken by epoch {epoch} is {end_time-start_time:.2f}s{sr_}")
        start_time = end_time
        
    return best_loss, model

In [25]:
gc.collect()

20

In [26]:
# for fold in folds.keys():
for fold in [0,1,2,3,4]:
    print(f'Starting for fold: {fold}{r_}')
    print(f'Preparing training data for fold: {fold}{m_}')
    train_indxs = folds[fold]['train']
    test_indxs = folds[fold]['test']
    test_f_df = train_df[train_df.index.isin(test_indxs)].reset_index(drop=True)
    train_f_df = train_df[train_df.index.isin(train_indxs)].reset_index(drop=True)
    
    
    X_train = train_f_df[features].values
    Y_train = train_f_df['target'].values
    Y_train = Y_train.reshape(-1,1)
    print(X_train.shape,Y_train.shape)
    
    X_val = test_f_df[features].values
    Y_val = test_f_df['target'].values
    Y_val = Y_val.reshape(-1,1)
    print(X_val.shape,Y_val.shape)
    
    model = BaselineModel(num_classes, input_size)
    train_dl = DataLoader(TSDataset(X_train, Y_train), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    val_dl = DataLoader(TSDataset(X_val, Y_val), batch_size=config['batch_size'], shuffle=config['train_shuffle'], num_workers=0)
    
    best_loss, model = run(model,train_dl,val_dl,fold)
    
    gc.collect()
    

Starting for fold: 0[31m
Preparing training data for fold: 0[35m
(531075, 300) (531075, 1)
(522066, 300) (522066, 1)
300
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:0 |Train Loss:1.0079244824556204|Valid Loss:0.874565415084362
[32mLoss Decreased from 9999999 to 0.874565415084362[0m
[35mTime taken by epoch 0 is 26.23s[0m
Epoch Started:1


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.991258822954618|Valid Loss:0.8722070502117276
[32mLoss Decreased from 0.874565415084362 to 0.8722070502117276[0m
[35mTime taken by epoch 1 is 23.69s[0m
Epoch Started:2


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.9863750347724327|Valid Loss:0.8709472082555294
[32mLoss Decreased from 0.8722070502117276 to 0.8709472082555294[0m
[35mTime taken by epoch 2 is 23.12s[0m
Epoch Started:3


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.9826850496805631|Valid Loss:0.8707038145512342
[32mLoss Decreased from 0.8709472082555294 to 0.8707038145512342[0m
[35mTime taken by epoch 3 is 22.70s[0m
Epoch Started:4


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.9803886514443617|Valid Loss:0.8698624279350042
[32mLoss Decreased from 0.8707038145512342 to 0.8698624279350042[0m
[35mTime taken by epoch 4 is 23.03s[0m
Epoch Started:5


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.9777832443897541|Valid Loss:0.8689901577308774
[32mLoss Decreased from 0.8698624279350042 to 0.8689901577308774[0m
[35mTime taken by epoch 5 is 22.71s[0m
Epoch Started:6


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.9758769503006568|Valid Loss:0.8689123056828976
[32mLoss Decreased from 0.8689901577308774 to 0.8689123056828976[0m
[35mTime taken by epoch 6 is 23.02s[0m
Epoch Started:7


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.9733275156754714|Valid Loss:0.8691558474674821
[35mTime taken by epoch 7 is 22.83s[0m
Epoch Started:8


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.9714911525066082|Valid Loss:0.8685469646006823
[32mLoss Decreased from 0.8689123056828976 to 0.8685469646006823[0m
[35mTime taken by epoch 8 is 23.89s[0m
Epoch Started:9


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.9699088481756357|Valid Loss:0.8685146057978272
[32mLoss Decreased from 0.8685469646006823 to 0.8685146057978272[0m
[35mTime taken by epoch 9 is 23.16s[0m
Epoch Started:10


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.968173218690432|Valid Loss:0.8687207363545895
[35mTime taken by epoch 10 is 23.02s[0m
Epoch Started:11


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.9651152060582088|Valid Loss:0.8680248921737075
[32mLoss Decreased from 0.8685146057978272 to 0.8680248921737075[0m
[35mTime taken by epoch 11 is 22.96s[0m
Epoch Started:12


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.9627097313220684|Valid Loss:0.8685759147629142
[35mTime taken by epoch 12 is 22.88s[0m
Epoch Started:13


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.9614447667048528|Valid Loss:0.8681978285312653
[35mTime taken by epoch 13 is 22.93s[0m
Epoch Started:14


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.9576825838822585|Valid Loss:0.8689300054684281
[35mTime taken by epoch 14 is 22.96s[0m
Epoch Started:15


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.9550988683333763|Valid Loss:0.8686919109895825
[35mTime taken by epoch 15 is 22.85s[0m
Epoch Started:16


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.9521300627635075|Valid Loss:0.8688586773350835
[35mTime taken by epoch 16 is 22.88s[0m
Epoch Started:17


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.9508260607719421|Valid Loss:0.869053834117949
[35mTime taken by epoch 17 is 22.72s[0m
Epoch Started:18


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.9461208297656133|Valid Loss:0.8697106456384063
[35mTime taken by epoch 18 is 23.03s[0m
Epoch Started:19


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.9450735156352703|Valid Loss:0.8691323138773441
[35mTime taken by epoch 19 is 22.60s[0m
Epoch Started:20


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.9402818487240718|Valid Loss:0.8700705962255597
[35mTime taken by epoch 20 is 22.99s[0m
Epoch Started:21


  0%|          | 0/65 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.9384353380936843|Valid Loss:0.8705218648537993
[32mEARLY STOPPING
Starting for fold: 1[31m
Preparing training data for fold: 1[35m
(1053141, 300) (1053141, 1)
(522066, 300) (522066, 1)
300
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.9385381798411525|Valid Loss:0.8232184899970889
[32mLoss Decreased from 9999999 to 0.8232184899970889[0m
[35mTime taken by epoch 0 is 36.64s[0m
Epoch Started:1


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.9275980896727983|Valid Loss:0.820677749812603
[32mLoss Decreased from 0.8232184899970889 to 0.820677749812603[0m
[35mTime taken by epoch 1 is 34.86s[0m
Epoch Started:2


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.9241836782573729|Valid Loss:0.8200421929359436
[32mLoss Decreased from 0.820677749812603 to 0.8200421929359436[0m
[35mTime taken by epoch 2 is 34.75s[0m
Epoch Started:3


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.9213021490000939|Valid Loss:0.8201464544981718
[35mTime taken by epoch 3 is 34.72s[0m
Epoch Started:4


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.9195482291916545|Valid Loss:0.8188873957842588
[32mLoss Decreased from 0.8200421929359436 to 0.8188873957842588[0m
[35mTime taken by epoch 4 is 34.67s[0m
Epoch Started:5


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.9179467900778896|Valid Loss:0.8195139281451702
[35mTime taken by epoch 5 is 34.95s[0m
Epoch Started:6


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.9153953365577284|Valid Loss:0.8183388542383909
[32mLoss Decreased from 0.8188873957842588 to 0.8183388542383909[0m
[35mTime taken by epoch 6 is 34.44s[0m
Epoch Started:7


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.9134080562480661|Valid Loss:0.8184070708230138
[35mTime taken by epoch 7 is 34.55s[0m
Epoch Started:8


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.9117816885312399|Valid Loss:0.8176823630928993
[32mLoss Decreased from 0.8183388542383909 to 0.8176823630928993[0m
[35mTime taken by epoch 8 is 34.59s[0m
Epoch Started:9


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.9096879663393479|Valid Loss:0.8182086255401373
[35mTime taken by epoch 9 is 34.47s[0m
Epoch Started:10


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.9079353684602782|Valid Loss:0.8178701801225543
[35mTime taken by epoch 10 is 34.65s[0m
Epoch Started:11


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.9053548130878183|Valid Loss:0.8177014794200659
[35mTime taken by epoch 11 is 34.90s[0m
Epoch Started:12


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.9030965488086375|Valid Loss:0.8179897870868444
[35mTime taken by epoch 12 is 34.83s[0m
Epoch Started:13


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.901079651459243|Valid Loss:0.8176660565659404
[32mLoss Decreased from 0.8176823630928993 to 0.8176660565659404[0m
[35mTime taken by epoch 13 is 35.00s[0m
Epoch Started:14


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.898843436278114|Valid Loss:0.8179805595427752
[35mTime taken by epoch 14 is 34.71s[0m
Epoch Started:15


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.8964848874151244|Valid Loss:0.8177634878084064
[35mTime taken by epoch 15 is 34.93s[0m
Epoch Started:16


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.8939756760301516|Valid Loss:0.8175855092704296
[32mLoss Decreased from 0.8176660565659404 to 0.8175855092704296[0m
[35mTime taken by epoch 16 is 34.51s[0m
Epoch Started:17


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.8918936049291329|Valid Loss:0.81857939530164
[35mTime taken by epoch 17 is 34.84s[0m
Epoch Started:18


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.8895436067913853|Valid Loss:0.818469705991447
[35mTime taken by epoch 18 is 34.72s[0m
Epoch Started:19


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.8872160934662634|Valid Loss:0.8179399007931352
[35mTime taken by epoch 19 is 34.85s[0m
Epoch Started:20


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.8849526255629784|Valid Loss:0.8177442327141762
[35mTime taken by epoch 20 is 34.64s[0m
Epoch Started:21


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.8825625134992969|Valid Loss:0.8179855970665812
[35mTime taken by epoch 21 is 34.80s[0m
Epoch Started:22


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.8811107300972754|Valid Loss:0.8177331751212478
[35mTime taken by epoch 22 is 34.88s[0m
Epoch Started:23


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.877614116945932|Valid Loss:0.8185101198032498
[35mTime taken by epoch 23 is 34.97s[0m
Epoch Started:24


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.875570743120918|Valid Loss:0.8185318019241095
[35mTime taken by epoch 24 is 34.70s[0m
Epoch Started:25


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:25 |Train Loss:0.8736645537753438|Valid Loss:0.8184840343892574
[35mTime taken by epoch 25 is 34.72s[0m
Epoch Started:26


  0%|          | 0/129 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:26 |Train Loss:0.8712046631546908|Valid Loss:0.8188815768808126
[32mEARLY STOPPING
Starting for fold: 2[31m
Preparing training data for fold: 2[35m
(1575207, 300) (1575207, 1)
(522066, 300) (522066, 1)
300
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.9002485281445202|Valid Loss:0.775859591551125
[32mLoss Decreased from 9999999 to 0.775859591551125[0m
[35mTime taken by epoch 0 is 47.77s[0m
Epoch Started:1


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8913368125653638|Valid Loss:0.7743714153766632
[32mLoss Decreased from 0.775859591551125 to 0.7743714153766632[0m
[35mTime taken by epoch 1 is 46.99s[0m
Epoch Started:2


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8881751292727772|Valid Loss:0.7737717125564814
[32mLoss Decreased from 0.7743714153766632 to 0.7737717125564814[0m
[35mTime taken by epoch 2 is 47.03s[0m
Epoch Started:3


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8864370176829205|Valid Loss:0.7728551710024476
[32mLoss Decreased from 0.7737717125564814 to 0.7728551710024476[0m
[35mTime taken by epoch 3 is 47.14s[0m
Epoch Started:4


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.88426673412323|Valid Loss:0.7730848900973797
[35mTime taken by epoch 4 is 47.05s[0m
Epoch Started:5


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.8823957538975336|Valid Loss:0.7731882305815816
[35mTime taken by epoch 5 is 46.82s[0m
Epoch Started:6


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.8812623286494319|Valid Loss:0.77294029481709
[35mTime taken by epoch 6 is 47.00s[0m
Epoch Started:7


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.8795265700532983|Valid Loss:0.7726935157552361
[32mLoss Decreased from 0.7728551710024476 to 0.7726935157552361[0m
[35mTime taken by epoch 7 is 47.18s[0m
Epoch Started:8


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.8774565668303732|Valid Loss:0.772845801897347
[35mTime taken by epoch 8 is 47.04s[0m
Epoch Started:9


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.8758092689390627|Valid Loss:0.7728850087150931
[35mTime taken by epoch 9 is 47.05s[0m
Epoch Started:10


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.8743100734572337|Valid Loss:0.7729745479300618
[35mTime taken by epoch 10 is 47.10s[0m
Epoch Started:11


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.8719377761677757|Valid Loss:0.7733638789504766
[35mTime taken by epoch 11 is 48.45s[0m
Epoch Started:12


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.8704498941416567|Valid Loss:0.7731935465708375
[35mTime taken by epoch 12 is 47.57s[0m
Epoch Started:13


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.8688532407419669|Valid Loss:0.7730230325832963
[35mTime taken by epoch 13 is 46.95s[0m
Epoch Started:14


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.866691400658899|Valid Loss:0.7739612199366093
[35mTime taken by epoch 14 is 47.27s[0m
Epoch Started:15


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.8649180889747303|Valid Loss:0.7737653059884906
[35mTime taken by epoch 15 is 47.20s[0m
Epoch Started:16


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.8631530055110319|Valid Loss:0.7735900841653347
[35mTime taken by epoch 16 is 46.84s[0m
Epoch Started:17


  0%|          | 0/193 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.8614087645253987|Valid Loss:0.773765436373651
[32mEARLY STOPPING
Starting for fold: 3[31m
Preparing training data for fold: 3[35m
(2097273, 300) (2097273, 1)
(522066, 300) (522066, 1)
300
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8688059581392934|Valid Loss:0.7813065927475691
[32mLoss Decreased from 9999999 to 0.7813065927475691[0m
[35mTime taken by epoch 0 is 58.78s[0m
Epoch Started:1


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.862024425068718|Valid Loss:0.7790640955790877
[32mLoss Decreased from 0.7813065927475691 to 0.7790640955790877[0m
[35mTime taken by epoch 1 is 59.76s[0m
Epoch Started:2


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8599283595493331|Valid Loss:0.778777601197362
[32mLoss Decreased from 0.7790640955790877 to 0.778777601197362[0m
[35mTime taken by epoch 2 is 59.18s[0m
Epoch Started:3


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8597631806993299|Valid Loss:0.7783637186512351
[32mLoss Decreased from 0.778777601197362 to 0.7783637186512351[0m
[35mTime taken by epoch 3 is 59.12s[0m
Epoch Started:4


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.8570248164555442|Valid Loss:0.777712794020772
[32mLoss Decreased from 0.7783637186512351 to 0.777712794020772[0m
[35mTime taken by epoch 4 is 59.15s[0m
Epoch Started:5


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.855012490823575|Valid Loss:0.7777528781443834
[35mTime taken by epoch 5 is 60.18s[0m
Epoch Started:6


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.852397946068285|Valid Loss:0.7770572677254677
[32mLoss Decreased from 0.777712794020772 to 0.7770572677254677[0m
[35mTime taken by epoch 6 is 59.26s[0m
Epoch Started:7


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.852580599051969|Valid Loss:0.7769709723070264
[32mLoss Decreased from 0.7770572677254677 to 0.7769709723070264[0m
[35mTime taken by epoch 7 is 59.78s[0m
Epoch Started:8


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.8501677763601222|Valid Loss:0.7769100405275822
[32mLoss Decreased from 0.7769709723070264 to 0.7769100405275822[0m
[35mTime taken by epoch 8 is 59.37s[0m
Epoch Started:9


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.8498469148164594|Valid Loss:0.776806503534317
[32mLoss Decreased from 0.7769100405275822 to 0.776806503534317[0m
[35mTime taken by epoch 9 is 59.27s[0m
Epoch Started:10


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.84843762259539|Valid Loss:0.7769444473087788
[35mTime taken by epoch 10 is 59.54s[0m
Epoch Started:11


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.8461840090120812|Valid Loss:0.7769948402419686
[35mTime taken by epoch 11 is 59.88s[0m
Epoch Started:12


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.8462123836060906|Valid Loss:0.7768788151443005
[35mTime taken by epoch 12 is 59.99s[0m
Epoch Started:13


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.8445168831005171|Valid Loss:0.7764854850247502
[32mLoss Decreased from 0.776806503534317 to 0.7764854850247502[0m
[35mTime taken by epoch 13 is 59.64s[0m
Epoch Started:14


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.842397945400342|Valid Loss:0.7778035141527653
[35mTime taken by epoch 14 is 59.36s[0m
Epoch Started:15


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.841468575631598|Valid Loss:0.7770670289173722
[35mTime taken by epoch 15 is 59.78s[0m
Epoch Started:16


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.8404083581286181|Valid Loss:0.7770787375047803
[35mTime taken by epoch 16 is 59.46s[0m
Epoch Started:17


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.8385481843688609|Valid Loss:0.7772361543029547
[35mTime taken by epoch 17 is 58.79s[0m
Epoch Started:18


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.8372367103276086|Valid Loss:0.7775376373901963
[35mTime taken by epoch 18 is 59.43s[0m
Epoch Started:19


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.8357283092658343|Valid Loss:0.7772352769970894
[35mTime taken by epoch 19 is 59.18s[0m
Epoch Started:20


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.833918440666644|Valid Loss:0.777478139847517
[35mTime taken by epoch 20 is 59.47s[0m
Epoch Started:21


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.8322582527821166|Valid Loss:0.7774374065920711
[35mTime taken by epoch 21 is 59.24s[0m
Epoch Started:22


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.8321945495642576|Valid Loss:0.7775372043251991
[35mTime taken by epoch 22 is 60.02s[0m
Epoch Started:23


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.8314113609985618|Valid Loss:0.7777545070275664
[32mEARLY STOPPING
Starting for fold: 4[31m
Preparing training data for fold: 4[35m
(2619339, 300) (2619339, 1)
(522066, 300) (522066, 1)
300
cuda is used


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch Started:0


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:0 |Train Loss:0.8514232901856303|Valid Loss:0.7465552799403667
[32mLoss Decreased from 9999999 to 0.7465552799403667[0m
[35mTime taken by epoch 0 is 71.95s[0m
Epoch Started:1


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:1 |Train Loss:0.8450772849842906|Valid Loss:0.7461648564785719
[32mLoss Decreased from 0.7465552799403667 to 0.7461648564785719[0m
[35mTime taken by epoch 1 is 71.74s[0m
Epoch Started:2


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:2 |Train Loss:0.8428304804489016|Valid Loss:0.7453551050275564
[32mLoss Decreased from 0.7461648564785719 to 0.7453551050275564[0m
[35mTime taken by epoch 2 is 72.32s[0m
Epoch Started:3


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:3 |Train Loss:0.8411175530403853|Valid Loss:0.7440263256430626
[32mLoss Decreased from 0.7453551050275564 to 0.7440263256430626[0m
[35mTime taken by epoch 3 is 72.44s[0m
Epoch Started:4


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:4 |Train Loss:0.839593799225986|Valid Loss:0.7436940893530846
[32mLoss Decreased from 0.7440263256430626 to 0.7436940893530846[0m
[35mTime taken by epoch 4 is 72.16s[0m
Epoch Started:5


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:5 |Train Loss:0.8382291520014405|Valid Loss:0.7443230096250772
[35mTime taken by epoch 5 is 72.36s[0m
Epoch Started:6


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:6 |Train Loss:0.8366924285888672|Valid Loss:0.7446444919332862
[35mTime taken by epoch 6 is 72.72s[0m
Epoch Started:7


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:7 |Train Loss:0.8355666976422071|Valid Loss:0.7436953522264957
[35mTime taken by epoch 7 is 72.44s[0m
Epoch Started:8


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:8 |Train Loss:0.8342083787545562|Valid Loss:0.7438311502337456
[35mTime taken by epoch 8 is 72.05s[0m
Epoch Started:9


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:9 |Train Loss:0.8329295163974166|Valid Loss:0.7437097728252411
[35mTime taken by epoch 9 is 72.59s[0m
Epoch Started:10


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:10 |Train Loss:0.831564350053668|Valid Loss:0.7437046058475971
[35mTime taken by epoch 10 is 72.87s[0m
Epoch Started:11


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:11 |Train Loss:0.830262741819024|Valid Loss:0.7447722218930721
[35mTime taken by epoch 11 is 71.98s[0m
Epoch Started:12


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:12 |Train Loss:0.8286152919754386|Valid Loss:0.7434247629716992
[32mLoss Decreased from 0.7436940893530846 to 0.7434247629716992[0m
[35mTime taken by epoch 12 is 72.39s[0m
Epoch Started:13


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:13 |Train Loss:0.8272911433130503|Valid Loss:0.7441759649664164
[35mTime taken by epoch 13 is 72.10s[0m
Epoch Started:14


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:14 |Train Loss:0.8261323409155011|Valid Loss:0.742694016546011
[32mLoss Decreased from 0.7434247629716992 to 0.742694016546011[0m
[35mTime taken by epoch 14 is 72.71s[0m
Epoch Started:15


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:15 |Train Loss:0.8246165804564953|Valid Loss:0.7438111519441009
[35mTime taken by epoch 15 is 72.66s[0m
Epoch Started:16


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:16 |Train Loss:0.8233902726322413|Valid Loss:0.7440024660900235
[35mTime taken by epoch 16 is 72.06s[0m
Epoch Started:17


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:17 |Train Loss:0.8219346221536398|Valid Loss:0.7441850267350674
[35mTime taken by epoch 17 is 72.30s[0m
Epoch Started:18


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:18 |Train Loss:0.8208711767569185|Valid Loss:0.7435454353690147
[35mTime taken by epoch 18 is 72.47s[0m
Epoch Started:19


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:19 |Train Loss:0.8196485821157694|Valid Loss:0.7433538725599647
[35mTime taken by epoch 19 is 72.03s[0m
Epoch Started:20


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:20 |Train Loss:0.8183447806164622|Valid Loss:0.7434520926326513
[35mTime taken by epoch 20 is 72.28s[0m
Epoch Started:21


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:21 |Train Loss:0.8166715627536177|Valid Loss:0.7444599308073521
[35mTime taken by epoch 21 is 72.08s[0m
Epoch Started:22


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:22 |Train Loss:0.8159893348813057|Valid Loss:0.7434470374137163
[35mTime taken by epoch 22 is 72.20s[0m
Epoch Started:23


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:23 |Train Loss:0.814481027983129|Valid Loss:0.7452517878264189
[35mTime taken by epoch 23 is 72.26s[0m
Epoch Started:24


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

Epoch:24 |Train Loss:0.813671555556357|Valid Loss:0.7448712540790439
[32mEARLY STOPPING
