In [None]:
!ls ../input/shopee-generate-data-for-triplet-loss/train_triplets_imgs.csv
!ls ../input/shopee-product-matching/
!ls ../input/shopee-generate-data-for-triplet-loss/train_triplets_titles.csv
!ls ../input/shopee-pytorch-siamese-triplet-loss-xlmrobe-cd1846/xlmroberta_256_fold0.pth

In [None]:
# import sys
# sys.path.append ('/kaggle/input/pytorch-images-seresnet')
import os
import gc
import time
import math
import random
import datetime
import pandas as pd
import numpy as np
import seaborn as sns

import torch
import torch.nn as nn
from   torch.nn import init
from   torch.nn import CrossEntropyLoss, MSELoss
from   torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F
from   torch.nn import Parameter
from   torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from   transformers import AdamW, get_cosine_schedule_with_warmup
from   torch.cuda.amp import autocast, GradScaler

from   transformers import BertForSequenceClassification, BertConfig, AutoTokenizer, AutoModelForSequenceClassification
from   transformers import RobertaTokenizer, RobertaForSequenceClassification, XLMRobertaModel

from   sklearn.model_selection import StratifiedKFold, GroupKFold
# import timm

import albumentations as A
from   albumentations import *
from   albumentations.pytorch import ToTensorV2
from   albumentations.core.transforms_interface import DualTransform
from   albumentations.augmentations import functional as AF
import cv2

from   tqdm import tqdm
from   pprint import pprint
from   functools import partial
import matplotlib.pyplot as plt
# from GPUtil import showUtilization as gpu_usage
# from   numba import cuda
import warnings
warnings.filterwarnings ("ignore")

In [None]:
class CFG:
    device       = torch.device ('cuda' if torch.cuda.is_available () else 'cpu')
    num_workers  = 8
    model_name   = 'xlm-roberta-large'
    bert_model_name = '/kaggle/input/xlm-roberta-large'
    size         = 128
    isTrain      = True
    isFreeze     = True
    lr           = 5e-5
    epochs       = 1
    warmup_steps = 0                     # if float: these many epochs are with frozen model at the beginning, if int = actual steps
    eval_steps   = 0.5                   # if float: these many epochs are with frozen model at the beginning, if int = actual steps 
    lr_num_cycles= 0.5
    epochsNx     = 1
    weight_decay = 1e-6
    max_grad_norm= 1000.0
    seed         = 42
    n_fold       = 10
    train_fold   = [0]                      # [0, 1, 2, 3, 4]
    print_every  = 100
    adam_epsilon = 1e-8
    train_batch_size = 64
    eval_batch_size  = 64
    target_size      = 1
    model_infer_path_prefix = "."
    model_train_path_prefix = "."
    text_triplets_csv= "../input/shopee-generate-data-for-triplet-loss/train_triplets_titles.csv"
    train_path       = '../input/shopee-product-matching/train_images'
    # train_csv        = '../input/vinbigdata-chest-xray-abnormalities-detection/train.csv'
    # test_path        = '../input/vinbigdata-chest-xray-resized-png-1024x1024/test'
    output_dir       = './results'        # output directory        
    max_steps        = 0
    MODEL            = None

In [None]:
def init_logger (log_file=CFG.output_dir+'train.log'):
    
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger (__name__)
    logger.setLevel (INFO)
    handler1 = StreamHandler ()
    handler1.setFormatter (Formatter ("%(message)s"))
    handler2 = FileHandler (filename=log_file)
    handler2.setFormatter (Formatter ("%(message)s"))
    logger.addHandler (handler1)
    logger.addHandler (handler2)
    return logger

In [None]:
def seed_everything (seed):
    
    random.seed (seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed (seed)
    torch.manual_seed (seed)
    torch.cuda.manual_seed (seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    return

# Dataset

In [None]:
# see image triplets
df = pd.read_csv (CFG.text_triplets_csv)
TRAIN_DF = df.iloc[:(df.shape[0]*9//10)]
TEST_DF  = df.iloc[(df.shape[0]*9//10):]
del df
gc.collect ()
TEST_DF.head ()

# Xlm-Roberta

In [None]:
max_len         = CFG.size
tokenizer       = AutoTokenizer.from_pretrained (CFG.bert_model_name)

In [None]:
def encode (premise, hypothesis):
    
    encoded_dict = tokenizer (
        premise,                   # 1st of the Sentence pair to encode.
        hypothesis,                # 2nd of the Sentence pair to encode.
        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
        truncation=True,           # just max_len will not automatically truncate
        max_length = max_len,      # Pad & truncate all sentences.
        padding='max_length',
        return_attention_mask = True,   # Construct attn. masks.
        return_tensors = 'pt',     # Return pytorch tensors.
    ) 
    # print ('encoded_dict =', encoded_dict)
    # 1-D tensors are expected for a sample. Hence squeeze these 2-D tensors e.g [1,256] shaped tensors to 1-D [256] shape 
    for k in encoded_dict:
        encoded_dict[k] = torch.squeeze (encoded_dict[k])
    return encoded_dict

In [None]:
class DoubleTextDataset (Dataset):
    
    def __init__(self, df=TRAIN_DF):
        self.df = df   # pd.read_csv (img_triplets_csv).reset_index (drop=True)
        return
    
    def __getitem__(self, index):
        
        triplet  = self.df.iloc[index]
        anchor   = triplet['anchor']
        positive = triplet['positive']
        negative = triplet['negative']
        
        positive = encode (anchor, positive)
        negative = encode (anchor, negative)
        return (positive, negative)
    
    def __len__(self):
        return self.df.shape[0]

In [None]:
def plot_txt (dataset_show):
        
    for i in range (2):
            idx = np.random.randint (0, len (dataset_show))
            dict1, dict2 = dataset_show[idx] 
            print ('+ve = ', dict1)
            print ('-ve = ', dict2)
    return 

TR_DATASET = DoubleTextDataset ()
plot_txt (TR_DATASET)
del TR_DATASET
gc.collect ()

> # Loss Functions

In [None]:
def get_criterion ():
    
    criterion = nn.BCEWithLogitsLoss ()
    return criterion

# Model

In [None]:
class MyGAPModelForSeqClf (nn.Module):
    
    def __init__(self, bert_model_name=CFG.bert_model_name, outputCount=CFG.target_size, 
                 drop_prob=0.2, nonlin=nn.SiLU ()):
        
        super (MyGAPModelForSeqClf, self).__init__()
        self.model       = AutoModelForSequenceClassification.from_pretrained (bert_model_name).base_model  # adding .base_model if using pretrained XLMRobertaForSequenceClassification
        self.drop_prob   = drop_prob
        self.nonlin      = nonlin
        self.outputCount = outputCount
        hidden_size      = self.model.config.hidden_size
        self.dense       = nn.Linear (hidden_size, hidden_size)
        self.batchnorm   = nn.BatchNorm1d (hidden_size)
        self.outDense    = nn.Linear (hidden_size, outputCount)
        self.dropout     = nn.Dropout (drop_prob)
        # self.outActivtn  = nn.LogSoftmax (dim=1)
        # self.NLLLoss     = nn.NLLLoss ()
        return
    
    def freeze (self):
        
        for param in self.model.base_model.parameters ():
            param.requires_grad = False
        return
    
    def unfreeze (self):
        
        for param in self.model.base_model.parameters ():
            param.requires_grad = True
        return
    
    def forward (self, input_ids, attention_mask, token_type_ids=None, labels=None, **kwargs):
        
        last_hidden_states = None
        
        # The base bert model do not take labels as input
        if token_type_ids is None:
            moutput = self.model (input_ids=input_ids, attention_mask=attention_mask)
            last_hidden_states = moutput[0]
        else:
            moutput = self.model (input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            last_hidden_states = moutput[0]
        #print('last_hidden_states.size=', last_hidden_states.size())
        
        # GAP: last_hidden_states shape = batch_size * max_seq_len * emb_dim(1024?)
        # output shape = batch_size * emb_dim(1024?)  i.e avg across the sequence
        last_hidden_states = torch.mean (last_hidden_states, 1)             #;print('GAP last_hidden_states.size=', last_hidden_states.size())
        # fcnn
        X = self.dropout (self.nonlin (self.batchnorm (self.dense (last_hidden_states))))        #;print('X.size=', X.size())
        out_logits = self.outDense (X)                #;print('out_logits.size=', out_logits.size())
        
        """if labels is None:
            
            # return a named tuple
            Logits = namedtuple ('Logits',['logits'])
            out_logits = Logits (out_logits)
            return out_logits
        log_ps = self.outActivtn (out_logits)         #;print('log_ps.size=', log_ps.size())
        batchLoss = self.NLLLoss (log_ps, labels)
        
        # return a named tuple
        Loss_Logits = namedtuple('Loss_Logits',['loss','logits'])
        loss_logits = Loss_Logits (batchLoss, out_logits)
        return loss_logits """
        return out_logits

In [None]:
!ls ../input/shopee-pytorch-xlmroberta-doubles-relativebinclf/xlm-roberta-large_128_fold0_min_val_loss.pth

In [None]:
model = MyGAPModelForSeqClf ()

# load the pretrained model which was trained this code only (by commenting out these 4 line)
try:
    model.load_state_dict (torch.load ("../input/shopee-pytorch-xlmroberta-doubles-relativebinclf/xlm-roberta-large_128_fold0_min_val_loss.pth")['model_state_dict'])
except:
    model.load_state_dict (torch.load ("../input/shopee-pytorch-xlmroberta-doubles-relativebinclf/xlm-roberta-large_128_fold0_min_val_loss.pth", map_location='cpu')['model_state_dict'])
# model.outDense = nn.Linear (model.model.config.hidden_size, CFG.target_size)

import warnings
warnings.filterwarnings ("ignore")
model.to (CFG.device)
CFG.MODEL = model

## Trainer Helpers

In [None]:
def format_time (elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str (datetime.timedelta (seconds=elapsed_rounded))

# Trainer

In [None]:
class MyTrainer:
    
    def __init__(self, fold, model, train_dataset, eval_dataset, criterion, 
                 checkpoint_path=None, isResume=False):
        
        self.fold             = fold
        self.start_epoch      = 0
        self.model            = model
        # load checkpoint
        if checkpoint_path is not None:
            if isResume:
                self.start_epoch = self.load_checkpoint (checkpoint_path, isResume=True) + 1
            else:
                self.load_checkpoint (checkpoint_path, isResume=False)
        self.model            = self.model.to (CFG.device)
        if CFG.isFreeze:
            self.model.freeze ()
        else:
            self.model.unfreeze ()
        CFG.MODEL             = self.model
        self.train_dataset    = train_dataset
        self.eval_dataset     = eval_dataset
        self.criterion        = criterion
        self.isTrained        = False
        self.device           = CFG.device
        self.optimizer        = AdamW (self.model.parameters (), lr=CFG.lr, eps=CFG.adam_epsilon, weight_decay=CFG.weight_decay)
        self.epochs           = CFG.epochs
        self.set_dataLoaders ()
        self.training_stats   = []
        self.modelFile        = f"{CFG.model_train_path_prefix}/{CFG.model_name}_{CFG.size}_fold{self.fold}.pth"
        if eval_dataset is not None:
            self.minLossModelFile = f"{CFG.model_train_path_prefix}/{CFG.model_name}_{CFG.size}_fold{self.fold}_min_val_loss.pth"
            self.maxAccModelFile  = f"{CFG.model_train_path_prefix}/{CFG.model_name}_{CFG.size}_fold{self.fold}_max_val_acc.pth"
        else:
            self.minLossModelFile = f"{CFG.model_train_path_prefix}/{CFG.model_name}_{CFG.size}_fold{self.fold}_min_tr_loss.pth"
            self.maxAccModelFile  = f"{CFG.model_train_path_prefix}/{CFG.model_name}_{CFG.size}_fold{self.fold}_max_tr_acc.pth"
        
        self.min_val_loss         = 9999
        self.min_train_loss       = 9999
        self.max_val_acc          = -1    
        return
    
    def set_dataLoaders (self):
        # Create the DataLoaders for our training and validation sets.
        
        if isinstance (self.train_dataset, torch.utils.data.IterableDataset):
            train_sampler = None
        else:
            train_sampler = RandomSampler (self.train_dataset)           # Better use RandomSampler
        train_dataloader  = DataLoader (
                    self.train_dataset,                                  # The training samples.
                    sampler     = train_sampler,                           
                    batch_size  = CFG.train_batch_size,
                    num_workers = CFG.num_workers,
                    pin_memory  = True
        )
        # train_dataloader  = DataLoader (self.train_dataset, batch_size=CFG.train_batch_size) # TODO: comment this
        validation_dataloader = None
        if self.eval_dataset:
            validation_dataloader = DataLoader (
                        self.eval_dataset, 
                        sampler     = SequentialSampler (self.eval_dataset),
                        batch_size  = CFG.eval_batch_size,
                        num_workers = CFG.num_workers,
                        pin_memory  = False
            )
            # validation_dataloader  = DataLoader (self.eval_dataset, batch_size=CFG.eval_batch_size) # TODO: comment this
        
        if type (CFG.warmup_steps) is float:
            CFG.warmup_steps = int (CFG.warmup_steps * len (train_dataloader))
        # Total number of training steps is [number of batches] x [number of epochs]
        num_training_steps = len (train_dataloader) * self.epochs        
        lr_scheduler = get_cosine_schedule_with_warmup (self.optimizer, num_cycles=CFG.lr_num_cycles,
                        num_warmup_steps=CFG.warmup_steps, num_training_steps=num_training_steps)
        
        if type (CFG.eval_steps) is float:
            CFG.eval_steps = int (CFG.eval_steps * len (train_dataloader))
        self.train_dataloader, self.validation_dataloader, self.lr_scheduler, self.num_training_steps=train_dataloader, validation_dataloader, lr_scheduler, num_training_steps
        return
            
    def test_iterate_dataloader (self):
        
        for step, batch in enumerate (self.train_dataloader):
            print (step)
            print (batch)
            break
        return
    
    def save_checkpoint (self, epoch, path):
        
        checkpoint = {
            'epoch'               : epoch,
            'model_state_dict'    : self.model.state_dict (),
            'optimizer_state_dict': self.optimizer.state_dict (),
            'lr_sched_state_dict' : self.lr_scheduler.state_dict (),
            'training_stats'      : self.training_stats,
            'max_val_acc'         : self.max_val_acc,
            'min_train_loss'      : self.min_train_loss,
            'min_val_loss'        : self.min_val_loss,
        }
        torch.save (checkpoint, path)
        gc.collect (); torch.cuda.empty_cache ()
        print ("saved checkpoint", path)
        return
    
    def load_checkpoint (self, path, isResume=False):
        
        epoch      = 0
        checkpoint = torch.load (path, map_location=torch.device ('cpu'))
        self.model.load_state_dict (checkpoint['model_state_dict'])
        if isResume:
            
            self.optimizer.load_state_dict (checkpoint['optimizer_state_dict'])
            self.lr_scheduler.load_state_dict (checkpoint['lr_sched_state_dict'])
            epoch = checkpoint['epoch']
            self.training_stats  = checkpoint['training_stats']
            self.min_val_loss    = checkpoint['min_val_loss']
            self.min_train_loss  = checkpoint['min_train_loss']
            self.max_val_acc     = checkpoint['max_val_acc']
            print ("Loaded model, optimizer, and lr_scheduler from -", path)
        else:
            print ("Loaded model from -", path)
            
        self.model.train ()
        return epoch
    
    def train (self):
        
        seed_everything (seed=CFG.seed)
        step             = 0
        total_t0         = time.time ()
        scaler           = GradScaler()
        for epoch_i in range (self.start_epoch, self.epochs):
            
            avg_epoch_train_loss   = 0
            total_epoch_train_loss = 0
            print('======== Epoch {:} / {:} ========'.format (epoch_i + 1, self.epochs))
            t0 = time.time ()
            self.model.train ()
            for stp, batch in tqdm (enumerate (self.train_dataloader), total=len(self.train_dataloader)):
                
                # Print Stats
                # if step % CFG.print_every == 0:
                #     elapsed = format_time (time.time() - t0)
                #     print ('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format (step, len (self.train_dataloader), elapsed))                
                if ((CFG.max_steps > 0 and CFG.max_steps < step) or 
                    (CFG.eval_steps>0 and stp==CFG.eval_steps) or step==0):   # TODO: rm this comment
                    
                    self.save_checkpoint (epoch_i, self.modelFile)
                    training_time = format_time (time.time () - t0)            
                    if self.validation_dataloader:
                        
                        avg_val_loss, avg_val_accuracy, validation_time = self.evaluate (epoch_i, avg_epoch_train_loss, training_time)
                        # save this model if the eval loss decreases from the minimum so far
                        checkpoint_epoch = epoch_i
                        if stp==CFG.eval_steps:
                            # don't count this epoch in the checkpoint since this epoch 
                            # has not completed. Hence, checkpoint at prev epoch
                            checkpoint_epoch = epoch_i-1
                        if avg_val_loss < self.min_val_loss:                             
                            self.min_val_loss = avg_val_loss
                            self.save_checkpoint (checkpoint_epoch, self.minLossModelFile)
                        if avg_val_accuracy > self.max_val_acc:
                            self.max_val_acc = avg_val_accuracy
                            self.save_checkpoint (checkpoint_epoch, self.maxAccModelFile)
                    if CFG.max_steps > 0 and CFG.max_steps < step:
                        
                        print ("")
                        print ("Training complete!")
                        print ("Total training took {:} (h:mm:ss)".format (format_time (time.time ()-total_t0)))
                        self.isTrained = True
                        self.model.cpu ()
                        self.model.eval ()
                        self.save_checkpoint (epoch_i, self.modelFile)
                        try:
                            torch.cuda.empty_cache ()
                            self.plot_train_stats (self.training_stats)
                        except:
                            pass
                        return pd.DataFrame (self.training_stats)
                
                ########################################################
                # Train
                ########################################################
                # self.model.zero_grad ()
                for i in [0,1]:
                    for k in batch[i]:
                        batch[i][k] = batch[i][k].to (self.device)
                
                pos_txt = batch[0]
                neg_txt = batch[1]
                with autocast():
                    
                    # print ("pos_txt['input_ids'].size() =", pos_txt['input_ids'].size ())
                    bs     = pos_txt['input_ids'].size (0)
                    labels = torch.ones ((bs, 1)).to (CFG.device)
                    logits = self.model (**pos_txt)            
                    loss   = self.criterion (logits, labels)
                    scaler.scale (loss).backward ()
                    # torch.nn.utils.clip_grad_norm_ (self.model.parameters (), CFG.max_grad_norm)
                    scaler.step (self.optimizer)
                    scaler.update ()
                    
                    labels = torch.zeros ((bs, 1)).to (CFG.device)
                    logits = self.model (**neg_txt)                    
                    loss   = self.criterion (logits, labels)
                    scaler.scale (loss).backward ()
                    # torch.nn.utils.clip_grad_norm_ (self.model.parameters (), CFG.max_grad_norm)
                    scaler.step (self.optimizer)
                    scaler.update ()
                    
                    self.optimizer.zero_grad ()
                    self.lr_scheduler.step ()
                    
                total_epoch_train_loss += loss.cpu ().item ()
                avg_epoch_train_loss    = total_epoch_train_loss / (stp+1)
                step += 1
            # all steps of an epoch end
            
            # Measure how long this epoch took.
            training_time = format_time (time.time () - t0)            
            print ("  Average training loss: {0:.4f}".format (avg_epoch_train_loss))
            print ("  Training epcoh took: {:}".format (training_time))            
            if self.validation_dataloader:    
                
                avg_val_loss, avg_val_accuracy, validation_time = self.evaluate (epoch_i, avg_epoch_train_loss, training_time)
                # save this epoch's model if the eval loss decreases from the minimum so far
                if avg_val_loss < self.min_val_loss:                    
                    self.min_val_loss = avg_val_loss
                    self.save_checkpoint (epoch_i, self.minLossModelFile)
                if avg_val_accuracy > self.max_val_acc:
                    self.max_val_acc = avg_val_accuracy
                    self.save_checkpoint (epoch_i, self.maxAccModelFile)
            else:                
                training_stats.append ({
                    'epoch'         : epoch_i + 1,
                    'training_loss' : avg_epoch_train_loss,
                    'training_time' : training_time,
                })
                if avg_train_loss < self.min_train_loss:                     
                    self.min_train_loss = avg_train_loss
                    self.save_checkpoint (epoch_i, self.minLossModelFile)
            self.save_checkpoint (epoch_i, self.modelFile)
            # 1 epoch end
        # all epochs end
        
        # just get the best class thresholds at the end
        if self.validation_dataloader:
            print ('At training end, threshold Adjustment (last row of the train summary DF)')
            print (self.evaluate (epoch_i, avg_epoch_train_loss, training_time, isThreshAdjust=True))
            print ('<: avg_val_loss, avg_val_accuracy, validation_time')
        
        print ("***** Training complete! *****")
        print ("Total training took {:} (h:mm:ss)".format (format_time (time.time ()-total_t0)))
        self.isTrained = True
        self.model.cpu ()
        self.model.eval ()
        try:
            torch.cuda.empty_cache ()
            self.plot_train_stats (self.training_stats)
        except:
            pass
        return pd.DataFrame (self.training_stats)
    
    def evaluate (self, epoch_i, avg_train_loss=999, training_time=999, isThreshAdjust=False):
        
        t0           = time.time ()
        all_labels   = []
        all_pred_prs = []
        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        self.model.eval ()
        
        # Tracking variables
        total_eval_accuracy  = 0
        total_eval_loss      = 0
        nb_eval_steps        = 0
        correct_pred_count   = 0
        total_pred_count     = 0
        # Evaluate data for one epoch
        for batch in self.validation_dataloader:
            with torch.no_grad ():
                
                for i in [0,1]:
                    for k in batch[i]:
                        batch[i][k] = batch[i][k].to (self.device)
                
                pos_txt = batch[0]
                neg_txt = batch[1]
                # print ("pos_txt['input_ids'].size() =", pos_txt['input_ids'].size ())
                bs     = pos_txt['input_ids'].size (0)
                labels = torch.ones ((bs, 1)).to (CFG.device)
                logits1= self.model (**pos_txt)            
                loss1  = self.criterion (logits1, labels)
                
                labels = torch.zeros ((bs, 1)).to (CFG.device)
                logits0= self.model (**neg_txt)
                loss0  = self.criterion (logits0, labels)
                
            correct_pred_count += np.sum ((torch.sigmoid (logits1).detach ().cpu ().numpy () >= 0.5) + 0.0) + np.sum ((torch.sigmoid (logits0).detach ().cpu ().numpy () < 0.5) + 0.0)
            total_pred_count   += logits0.shape[0] * 2
            total_eval_loss    += (loss1.item () + loss0.item ()) / 2.0
        
        avg_val_loss     = total_eval_loss / len (self.validation_dataloader)
        avg_val_accuracy = correct_pred_count / total_pred_count
        print ("Val Loss: {0:.4f}".format (avg_val_loss))
        print ("Val Accuracy: {0:.4f}".format (avg_val_accuracy))
        validation_time = format_time (time.time () - t0)
        self.training_stats.append ({
                'epoch'         : epoch_i + 1,
                'training_loss' : avg_train_loss,
                'eval_loss'     : avg_val_loss,
                'eval_accuracy' : avg_val_accuracy,
                'training_time' : training_time,
                'eval_time'     : validation_time                   
        })
        self.model.train ()
        print ("Validation took {:} (h:mm:ss)".format (format_time (time.time () - t0)))
        return avg_val_loss, avg_val_accuracy, validation_time
        
        
    def plot_train_stats (self, training_stats):
        """
        Draw Classification Report curve
        """
        
        accuracies = eval_losses = tr_losses = epochs = -1
        epochs = len (training_stats)
        if 'eval_accuracy' in training_stats[0]:
            accuracies = [e['eval_accuracy'] for e in training_stats]
            sns.lineplot (x=np.arange(1, epochs + 1), y=accuracies, label='val_accuracy')
        if 'eval_loss' in training_stats[0]:
            eval_losses= [e['eval_loss'] for e in training_stats]
        if 'training_loss'  in training_stats[0]:
            tr_losses  = [e['training_loss'] for e in training_stats]
            sns.lineplot (x=np.arange(1, epochs + 1), y=tr_losses,  label='tr_losses')
            
        plt.show ()
        print ('accuracies :', accuracies)        
        print ('eval_losses:', eval_losses)
        print ('tr_losses  :', tr_losses)
        return
    
    def get_trained_model (self):
        
        if self.isTrained:
            return self.model.eval ()
        return None

In [None]:
def free_gpu_cache ():
    
    # print("Initial GPU Usage")
    # gpu_usage()                             

    torch.cuda.empty_cache()

    # cuda.select_device(0)
    # cuda.close()
    # cuda.select_device(0)

    # print("GPU Usage after emptying the cache")
    # gpu_usage()
    return

# free_gpu_cache()           

## Train

In [None]:
def train_fold_loop (checkpoint_path=None, isResume=False):

    print (f"========== training ==========")
    fold = 0 # fold is not used
    criterion      = get_criterion ()
    model          = CFG.MODEL
    if model is None and checkpoint_path is None:
        print ("CFG.MODEL is None")
        model      = getModel (fold, isTrain=True)
        model      = model.float()
    elif model is not None and checkpoint_path is not None:
        pass
        # checkpoint_path = None
    elif model is None and checkpoint_path is not None:
        print ("CFG.MODEL is None")
        model      = getModel (fold, isTrain=False)
        model      = model.float()
        
    train_dataset  = DoubleTextDataset ()
    valid_dataset  = DoubleTextDataset (TEST_DF)
    trainer        = MyTrainer (
        fold            = fold,
        model           = model,
        train_dataset   = train_dataset,
        eval_dataset    = valid_dataset,
        criterion       = criterion,
        checkpoint_path = checkpoint_path,
        isResume        = isResume
    )
    metrics = trainer.train ()
    return metrics
    
    # To plot lr uncomment this
    # lrs = []
    # for i in range (CFG.epochs*len (trainer.train_dataloader)):
    #     trainer.lr_scheduler.step ()
    #     lrs.append (trainer.optimizer.param_groups[0]["lr"])
    # print (lrs)
    # plt.plot (lrs)
    # plt.show ()

In [None]:
def train_main (checkpoint_path=None, isResume=False):
    
    # print (f"========== train_main() ==========")
    if CFG.isTrain:        
        valid_scores_df = pd.DataFrame ()
        for fold in range (CFG.n_fold):
            if fold in CFG.train_fold:
                
                valid_scores_fold_df = train_fold_loop (checkpoint_path, isResume)
                # valid_scores_fold = np.array (valid_scores_fold).reshape ((1, -1))
                valid_scores_df = valid_scores_df.append (valid_scores_fold_df)
                
        print (f"========== CV ==========")
        # print (valid_scores_df)
        # valid_scores = np.vstack (valid_scores)
        # valid_scores = np.mean (valid_scores, axis=0)
        valid_scores = valid_scores_df.iloc[-1, :]  #.mean ()
        print ("CV Scores :-");  print (valid_scores)
    return valid_scores_df

# Single config training

!mkdir -p /kaggle/working/Output/
!touch /kaggle/working/Output/train.log
gc.collect ()
model_names = timm.list_models (pretrained=True)
model_names = timm.list_models ('*resnet*', pretrained=True)
pprint (model_names)
LOGGER = init_logger ()
seed_everything (seed=CFG.seed)

valid_scores_df = train_main ()

valid_scores_df

# To train, uncomment these

In [None]:
!ls ../input/shopee-pytorch-xlmroberta-doubles-relativebinclf/xlm-roberta-large_128_fold0_min_val_loss.pth

gc.collect (); torch.cuda.empty_cache ()
# CFG.warmup_steps = 0.5
# CFG.eval_steps   = 0
# CFG.num_workers  = 8
# CFG.train_batch_size = 32
# CFG.eval_batch_size  = 32
# CFG.freeze = True
# CFG.epochs = 1
# CFG.lr     = 5e-5
print (f"***** Training, freeze={CFG.isFreeze} *****")
valid_scores_df = train_main () 
valid_scores_df

In [None]:
gc.collect (); torch.cuda.empty_cache ()
CFG.warmup_steps = 0
CFG.eval_steps   = 0.5
CFG.train_batch_size = 16
CFG.eval_batch_size  = 16
CFG.isFreeze = False
CFG.epochs = 3
print (f"***** Training, freeze={CFG.isFreeze} *****")
valid_scores_df = train_main ("../input/shopee-pytorch-xlmroberta-doubles-relativebinclf/xlm-roberta-large_128_fold0_min_val_loss.pth", False)
valid_scores_df

In [None]:
print ('Done !')