In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
from tqdm import tqdm
import gc
import math
import random
import os
import pandas as pd
import numpy as np

# Visuals and CV2
import matplotlib.pyplot as plt
#import cudf, cuml, cupy
import cv2

# albumentations for augs
import albumentations
import torchvision.transforms as transforms
from albumentations.pytorch.transforms import ToTensorV2

#torch
import torch
import timm
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F 
from torch import nn 
from torch.optim.optimizer import Optimizer
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import warnings 
warnings.filterwarnings('ignore')

### Configuration

In [None]:
#DIM = (512,512)

NUM_WORKERS = 4
BATCH_SIZE = 8

EPOCHS = 20
SEED = 2020
LR = 3e-4

#TRAIN_IMG = '../input/signature-verification-dataset/sign_data/train'
#shop_csv = '../input/shopee-product-matching/train.csv'
#shop_train = '../input/shopee-product-matching/train.csv'

training_csv = '../input/signature-verification-dataset/sign_data/train_data.csv'
training_dir = '../input/signature-verification-dataset/sign_data/train/'

DEVICE = "cuda"

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]


################################################# MODEL ####################################################################

MODEL_NAME = 'resnet50d' #efficientnet_b3 #efficientnetb5 #efficientnetb7

SCHEDULER = 'CosineAnnealingWarmRestarts' #'CosineAnnealingLR'
T_0=3 # CosineAnnealingWarmRestarts
min_lr=1e-6

In [None]:
img = cv2.imread('../input/shopee-product-matching/train_images/0000a68812bc7e98c42888dfb1c07da0.jpg')
img.shape

In [None]:
img = cv2.imread('../input/signature-verification-dataset/sign_data/train/001/001_01.PNG')
img.shape

### train test split

In [None]:
#read train csv
train_all = pd.read_csv(training_csv)

In [None]:
train_csv = '../input/signature-verification-dataset/sign_data/train_data.csv'
train_dir = '../input/signature-verification-dataset/sign_data/train/'

### Utils

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

### Augs

In [None]:
def get_train_transforms():
    return albumentations.Compose(
        [
            #albumentations.HorizontalFlip(p=0.5),
            #albumentations.VerticalFlip(p=0.5),
            #albumentations.Rotate(limit=120, p=0.8),
            #albumentations.RandomBrightness(limit=(0.09, 0.6), p=0.5),
            #albumentations.Cutout(num_holes=8, max_h_size=8, max_w_size=8, fill_value=0, always_apply=False, p=0.5),
            #albumentations.ShiftScaleRotate(
             #   shift_limit=0.25, scale_limit=0.1, rotate_limit=0
            #),
            albumentations.Normalize(
                MEAN, STD, max_pixel_value=255.0, always_apply=True
            ),
        
            ToTensorV2(p=1.0),
        ]
    )

def get_valid_transforms():

    return albumentations.Compose(
        [albumentations.Normalize(MEAN, STD, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0)
        ]
    )

### Dataset

In [None]:
class SiameseNetworkDataset(Dataset):
    
    def __init__(self,dim=(256,256),training_csv=None,training_dir=None,augmentation=None):
        self.train_df=pd.read_csv(training_csv)
        self.train_df.columns =["image1","image2","label"]
        self.train_dir = training_dir 
        self.dim = dim
        self.augmentation = augmentation


    def __len__(self):
        return len(self.train_df)
        
    def __getitem__(self,index):
        # getting the image path
        image1_path=os.path.join(self.train_dir,self.train_df.iat[index,0])
        image2_path=os.path.join(self.train_dir,self.train_df.iat[index,1])
        label = self.train_df.iat[index,2]
        
        img_0 = cv2.imread(image1_path)
        img_1 = cv2.imread(image1_path)
        
        img_0 = cv2.cvtColor(img_0, cv2.COLOR_BGR2RGB)
        img_1 = cv2.cvtColor(img_1, cv2.COLOR_BGR2RGB)
        
        if self.dim:
            img_0 = cv2.resize(img_0,self.dim)
            img_1 = cv2.resize(img_1,self.dim)
        
        
        if self.augmentation:
            augmented_0 = self.augmentation(image=img_0)
            augmented_1 = self.augmentation(image=img_1)
            img_0 = augmented_0['image']
            img_1 = augmented_1['image']
        
        
        # from numpy to torch
        #img_0 = torch.from_numpy(img_0)
        #img_1 = torch.from_numpy(img_1)
        #
        #img_0 = torch.permute(img_0, (2, 0, 1)) 
        #img_1 = torch.permute(img_1, (2, 0, 1)) 
        
        
        # adding batch size dimension
        #img_0 = img_0.unsqueeze(0)
        #img_1 = img_0.unsqueeze(0)
            
        #if self.augmentation:
        #    augmented_0 = self.augmentation(image=img_0)
        #    augmented_1 = self.augmentation(image=img_1)
        #    img_0 = augmented_0['image']
        #    img_1 = augmented_1['image']
            
    
        return img_0, img_1 ,torch.tensor(label,dtype=torch.long)

In [None]:
# Defining DataSet
train_dataset = SiameseNetworkDataset(
training_csv=train_csv,
training_dir=train_dir,
augmentation=get_train_transforms()

)
    
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    pin_memory=True,
    drop_last=False,
    num_workers=NUM_WORKERS
)

In [None]:
type(train_dataset[0][2])

In [None]:
#img = cv2.imread('../input/signature-verification-dataset/sign_data/train/001/001_01.PNG')
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#print(img.shape)
#img = torch.from_numpy(img)
#print(img.shape)
#
#img = torch.permute(img, (2, 0, 1)) 
#print(img.shape)

In [None]:
## Defining DataSet
#train_dataset = SiameseNetworkDataset(
#training_csv=train_csv,
#training_dir=train_dir,
#augmentation=get_train_transforms()
#
#)
#    
#train_loader = torch.utils.data.DataLoader(
#    train_dataset,
#    batch_size=BATCH_SIZE,
#    pin_memory=True,
#    drop_last=False,
#    num_workers=NUM_WORKERS
#)

In [None]:
#train_dataset[0][0].shape

In [None]:
#path = '../input/signature-verification-dataset/sign_data/train/001/001_01.PNG'
#img = cv2.imread(path)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img.shape

In [None]:
#path = '../input/shopee-product-matching/train_images/0000a68812bc7e98c42888dfb1c07da0.jpg'
#img = cv2.imread(path)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#img.shape

### Ranger Optimizer

In [None]:
#credit : https://github.com/Yonghongwei/Gradient-Centralization

def centralized_gradient(x, use_gc=True, gc_conv_only=False):
    if use_gc:
        if gc_conv_only:
            if len(list(x.size())) > 3:
                x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
        else:
            if len(list(x.size())) > 1:
                x.add_(-x.mean(dim=tuple(range(1, len(list(x.size())))), keepdim=True))
    return x


class Ranger(Optimizer):

    def __init__(self, params, lr=1e-3,                       # lr
                 alpha=0.5, k=5, N_sma_threshhold=5,           # Ranger options
                 betas=(.95, 0.999), eps=1e-5, weight_decay=0,  # Adam options
                 # Gradient centralization on or off, applied to conv layers only or conv + fc layers
                 use_gc=True, gc_conv_only=False, gc_loc=True
                 ):

        # parameter checks
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        if not lr > 0:
            raise ValueError(f'Invalid Learning Rate: {lr}')
        if not eps > 0:
            raise ValueError(f'Invalid eps: {eps}')

        # parameter comments:
        # beta1 (momentum) of .95 seems to work better than .90...
        # N_sma_threshold of 5 seems better in testing than 4.
        # In both cases, worth testing on your dataset (.90 vs .95, 4 vs 5) to make sure which works best for you.

        # prep defaults and init torch.optim base
        defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas,
                        N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay)
        super().__init__(params, defaults)

        # adjustable threshold
        self.N_sma_threshhold = N_sma_threshhold

        # look ahead params

        self.alpha = alpha
        self.k = k

        # radam buffer for state
        self.radam_buffer = [[None, None, None] for ind in range(10)]

        # gc on or off
        self.gc_loc = gc_loc
        self.use_gc = use_gc
        self.gc_conv_only = gc_conv_only
        # level of gradient centralization
        #self.gc_gradient_threshold = 3 if gc_conv_only else 1

        print(
            f"Ranger optimizer loaded. \nGradient Centralization usage = {self.use_gc}")
        if (self.use_gc and self.gc_conv_only == False):
            print(f"GC applied to both conv and fc layers")
        elif (self.use_gc and self.gc_conv_only == True):
            print(f"GC applied to conv layers only")

    def __setstate__(self, state):
        print("set state called")
        super(Ranger, self).__setstate__(state)

    def step(self, closure=None):
        loss = None
        # note - below is commented out b/c I have other work that passes back the loss as a float, and thus not a callable closure.
        # Uncomment if you need to use the actual closure...

        # if closure is not None:
        #loss = closure()

        # Evaluate averages and grad, update param tensors
        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()

                if grad.is_sparse:
                    raise RuntimeError(
                        'Ranger optimizer does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]  # get state dict for this param

                if len(state) == 0:  # if first time to run...init dictionary with our desired entries
                    # if self.first_run_check==0:
                    # self.first_run_check=1
                    #print("Initializing slow buffer...should not see this at load from saved model!")
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)

                    # look ahead weight storage now in state dict
                    state['slow_buffer'] = torch.empty_like(p.data)
                    state['slow_buffer'].copy_(p.data)

                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(
                        p_data_fp32)

                # begin computations
                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                # GC operation for Conv layers and FC layers
                # if grad.dim() > self.gc_gradient_threshold:
                #    grad.add_(-grad.mean(dim=tuple(range(1, grad.dim())), keepdim=True))
                if self.gc_loc:
                    grad = centralized_gradient(grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)

                state['step'] += 1

                # compute variance mov avg
                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)

                # compute mean moving avg
                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)

                buffered = self.radam_buffer[int(state['step'] % 10)]

                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * \
                        state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma
                    if N_sma > self.N_sma_threshhold:
                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (
                            N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = 1.0 / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                # if group['weight_decay'] != 0:
                #    p_data_fp32.add_(-group['weight_decay']
                #                     * group['lr'], p_data_fp32)

                # apply lr
                if N_sma > self.N_sma_threshhold:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    G_grad = exp_avg / denom
                else:
                    G_grad = exp_avg

                if group['weight_decay'] != 0:
                    G_grad.add_(p_data_fp32, alpha=group['weight_decay'])
                # GC operation
                if self.gc_loc == False:
                    G_grad = centralized_gradient(G_grad, use_gc=self.use_gc, gc_conv_only=self.gc_conv_only)

                p_data_fp32.add_(G_grad, alpha=-step_size * group['lr'])
                p.data.copy_(p_data_fp32)

                # integrated look ahead...
                # we do it at the param level instead of group level
                if state['step'] % group['k'] == 0:
                    # get access to slow param tensor
                    slow_p = state['slow_buffer']
                    # (fast weights - slow weights) * alpha
                    slow_p.add_(p.data - slow_p, alpha=self.alpha)
                    # copy interpolated weights to RAdam param tensor
                    p.data.copy_(slow_p)

        return loss

### Model

In [None]:
#create a siamese network
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # Setting up the Sequential of CNN Layers
        self.cnn1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11,stride=1),
            nn.Mish(),
            nn.LocalResponseNorm(5,alpha=0.0001,beta=0.75,k=2),
            nn.MaxPool2d(3, stride=2),
            
            nn.Conv2d(96, 256, kernel_size=5,stride=1,padding=2),
            nn.Mish(),
            nn.LocalResponseNorm(5,alpha=0.0001,beta=0.75,k=2),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),

            nn.Conv2d(256,384 , kernel_size=3,stride=1,padding=1),
            nn.Mish(),
            
            nn.Conv2d(384,256 , kernel_size=3,stride=1,padding=1),
            nn.Mish(),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
            
            nn.Conv2d(256,126 , kernel_size=3,stride=1,padding=1),
            nn.Mish(),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
            
            nn.Conv2d(126,28 , kernel_size=3,stride=1,padding=1),
            nn.Mish(),
            nn.MaxPool2d(3, stride=2),
            nn.Dropout2d(p=0.3),
        )
        # Defining the fully connected layers
        self.fc1 = nn.Sequential(
            nn.Linear(1008, 1024),
            nn.Mish(),
            nn.Dropout2d(p=0.5),
            
            nn.Linear(1024, 128),
            nn.Mish(),
            
            nn.Linear(128,2))
        
    def forward_once(self, x):
        # Forward pass
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output

    def forward(self, input1, input2):
        # forward pass of input 1
        output1 = self.forward_once(input1)
        # forward pass of input 2
        output2 = self.forward_once(input2)
        return output1, output2
    
d = SiameseNetwork()
t1 = torch.ones((1,3,256,256))
t2 = torch.ones((1,3,256,256))
x1,x2 = d(t1,t2)

#print(x1.size())
#print(x2.size())

del x1,t1,t2,d
_ = gc.collect()

In [None]:
256*61*61

### Loss

In [None]:
class ContrastiveLoss(torch.nn.Module):
    """
    Contrastive loss function.
    Based on: http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    """

    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))


        return loss_contrastive

### Train-Loop

In [None]:
def train_fn(dataloader,model,criterion,optimizer,device,epoch,scheduler=None):
    model.train()
    loss_score = AverageMeter()
    
    tk0 = tqdm(dataloader, total=len(dataloader))
    for img_0,img_1,label in tk0:
        
        img_0 = img_0.to(device)
        img_1 = img_1.to(device)

        label = label.to(device)
        
        batch_size = img_0.shape[0]
        
        optimizer.zero_grad()
        
        output_1,output_2 = model(img_0,img_1)
        
        loss = criterion(output_1,output_2,label)
        loss.backward()
        optimizer.step()
        
        loss_score.update(loss.detach().item(), batch_size)
        
        
        tk0.set_postfix(Train_Loss=loss_score.avg,Epoch=epoch,LR=optimizer.param_groups[0]['lr'])
    
    if scheduler is not None:
            scheduler.step()
        
    return loss_score

### Engine

In [None]:
## Defining DataSet
#train_dataset = SiameseNetworkDataset(
#training_csv=training_csv,
#training_dir=training_dir,
#augmentation=get_train_transforms(),
#)
#    
#train_loader = torch.utils.data.DataLoader(
#    train_dataset,
#    batch_size=BATCH_SIZE,
#    pin_memory=True,
#    drop_last=False,
#    num_workers=NUM_WORKERS
#)
#

In [None]:
#device = torch.device("cpu")
#    
## Defining Model for specific fold
#model = SiameseModel(model_name= MODEL_NAME,out_features=64,pretrained=True)
#model.to(device)
#
##DEfining criterion
#criterion = ContrastiveLoss()
#criterion.to(device)
#
#optimizer = torch.optim.Adam(model.parameters(), lr=LR)
##Defining LR SCheduler
#scheduler = CosineAnnealingWarmRestarts(optimizer,T_0=T_0)
#train_loss = train_fn(train_loader, model,criterion, optimizer, device,scheduler=scheduler,epoch=1)

In [None]:
#train_dataset[0][0].shape

In [None]:
def run():
    
    #df = pd.read_csv('../input/shopee-siamese-training/siamese_data.csv')

    
    # Defining DataSet
    #train_dataset = SiameseNetworkDataset(
    #    image_1=df['image_1'].values.tolist(),
    #    image_2 = df['image_2'].values.tolist(),
    #    labels=df['label'].values.tolist(),
    #    dim = DIM,
    #    augmentation=get_train_transforms(),
    #)
    # Defining DataSet
    train_dataset = SiameseNetworkDataset(
    training_csv=train_csv,
    training_dir=train_dir,
    augmentation=get_train_transforms()
    
    )
        
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        pin_memory=True,
        drop_last=False,
        num_workers=NUM_WORKERS
    )
    

    # Defining Device
    device = torch.device("cuda")
    
    # Defining Model for specific fold
    model = SiameseNetwork()
    model.to(device)
    
    #DEfining criterion
    criterion = ContrastiveLoss()
    criterion.to(device)
    
    #optimizer = Ranger(model.parameters(), lr = LR)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    #Defining LR SCheduler
    scheduler = CosineAnnealingWarmRestarts(optimizer,T_0=T_0)
    
    # THE ENGINE LOOP
    best_loss = 10000
    for epoch in range(EPOCHS):
        train_loss = train_fn(train_loader, model,criterion, optimizer, device,epoch=epoch,scheduler=scheduler)
        if train_loss.avg < best_loss:
            best_loss = train_loss.avg
            torch.save(model.state_dict(),f'model_best_loss.pt')
    return model

In [None]:
model = run()
torch.save(model.state_dict(), "model.pt")
print("Model Saved Successfully")