# <span><h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#346efe; border-radius: 100px 100px; text-align:center">Torch inference notebook</h1></span>

<br>
<h2 style = "font-size:16px" 

This is the inference notebook made for training with  https://www.kaggle.com/vladvdv/pytorch-train-notebook-arcface-gem-pooling/notebook  
    
Modifications for version 30:
* replaced supervized KNeighborsClassifier with unsupervized NearestNeighbors   
* corrected gridsearch for determining optim "new_individual" threhsold* (there are used the same training data as the ones the model was trained, for training the NearestNeighbors algorithm, and then the same validation data that the model was trained to predict on the NearestNeighbors algorithm.  
   
To do:
* Implement all folds model blending

In [None]:
import pickle
import os
import gc
import cv2
import math
import copy
import time
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import joblib
from tqdm import tqdm
from collections import defaultdict
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
import albumentations as A
from albumentations.pytorch import ToTensorV2
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append("../input/timm-pytorch-image-models")
import timm
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors

In [None]:
CONFIG = {"seed": 21, # choose your lucky seed
          "img_size": 768, # training image size
          "model_name": "tf_efficientnet_b4", # training model arhitecture
          "num_classes": 15587, # total individuals in training data
          "test_batch_size": 16, # choose acording to the training arhitecture and image size 
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), # gpu
          "test_mode":False, # selects just the first 2000 samples from the test data, usefull for debuging purposes
          "percentage_new_from_test":10, # how much of the test data is estimated to be "new_individual"
          "threshold":0.1, # it will be overwriten after prediction histogram           
          "neigh":100, #knn neighbors 
          "n_fold":5, # nr of folds that the model has been trained
          # ArcFace Hyperparameters
          "s": 30.0, 
          "m": 0.30,
          "ls_eps": 0.0,
          "easy_margin": False
          }

In [None]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [None]:
def get_test_file_path(id):
    return f"{TEST_DIR}/{id}"

def get_train_file_path(id):
    return f"{TRAIN_DIR}/{id}"

#ROOT_DIR = '../input/happy-whale-and-dolphin'
#TEST_DIR = '../input/happy-whale-and-dolphin/test_images'
#TRAIN_DIR = '../input/happy-whale-and-dolphin/train_images'

ROOT_DIR = '../input/happy-whale-and-dolphin'
TEST_DIR = '../input/jpeg-happywhale-384x384/test_images-384-384/test_images-384-384'
TRAIN_DIR = '../input/jpeg-happywhale-384x384/train_images-384-384/train_images-384-384'

weights_path = "../input/loss137-bin/Loss13.7124_epoch11.bin"

if CONFIG["test_mode"]==True:
    df_test = pd.read_csv(f"{ROOT_DIR}/sample_submission.csv")[:2000]
    df_train = pd.read_csv(f"{ROOT_DIR}/train.csv")[:2000]
else:
    df_test = pd.read_csv(f"{ROOT_DIR}/sample_submission.csv")
    df_train = pd.read_csv(f"{ROOT_DIR}/train.csv")  



df_test['file_path'] = df_test['image'].apply(get_test_file_path)
df_train['file_path'] = df_train['image'].apply(get_train_file_path)
train_labels = np.array(df_train['individual_id'].values)
#split into train and valid like in the training notebook for validating NearestNeighbors approach 
trainFold = 0 # this model was trained on fold 0
skf = StratifiedKFold(n_splits=CONFIG['n_fold'])
for fold, ( _, val_) in enumerate(skf.split(X=df_train, y=train_labels)):
      df_train.loc[val_ , "kfold"] = fold
df_train_cnn = df_train[df_train.kfold != trainFold].reset_index(drop=True)
df_valid_cnn = df_train[df_train.kfold == trainFold].reset_index(drop=True)


#hardcode dummy label for input in ArcMargin forward function
df_test['dummy_labels'] = 0
df_train_cnn['dummy_labels'] = 0
df_valid_cnn['dummy_labels'] = 0



In [None]:
class HappyWhaleDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df['dummy_labels'].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long)
        }
        

In [None]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [None]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, 
                 m=0.50, easy_margin=False, ls_eps=0.0):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.ls_eps = ls_eps  # label smoothing
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device=CONFIG['device'])
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features
        # -------------torch.where(out_i = {x_i if condition_i else y_i) ------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s

        return output

In [None]:
class HappyWhaleModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super(HappyWhaleModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.drop = nn.Dropout(p=0.2, inplace=False)
        self.fc = nn.Linear(in_features,512)
        self.arc = ArcMarginProduct(512, 
                           CONFIG["num_classes"],
                           s=CONFIG["s"], 
                           m=CONFIG["m"], 
                           easy_margin=CONFIG["ls_eps"], 
                           ls_eps=CONFIG["ls_eps"])
    def forward(self, images, labels):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        pooled_drop = self.drop(pooled_features)
        emb = self.fc(pooled_drop)
        output = self.arc(emb,labels)
        return output,emb

In [None]:
data_transforms = {
    "test": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
    
}

In [None]:
@torch.inference_mode()
def inference(model, dataloader, device):
    model.eval()  
    outputList=[]
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        _, outputs = model(images,labels)
        outputList.extend(outputs.cpu().detach().numpy())
    return outputList

In [None]:
model = HappyWhaleModel(CONFIG['model_name'])
model.to(CONFIG['device']);
model.load_state_dict(torch.load(weights_path))
#predict first on train dataset to extract embeddings
train_dataset = HappyWhaleDataset(df_train_cnn, transforms=data_transforms["test"])
train_loader = DataLoader(train_dataset, batch_size=CONFIG['test_batch_size'], 
                          num_workers=4, shuffle=False, pin_memory=True)

valid_dataset = HappyWhaleDataset(df_valid_cnn, transforms=data_transforms["test"])
valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['test_batch_size'], 
                          num_workers=4, shuffle=False, pin_memory=True)

test_dataset = HappyWhaleDataset(df_test, transforms=data_transforms["test"])
test_loader = DataLoader(test_dataset, batch_size=CONFIG['test_batch_size'], 
                          num_workers=4, shuffle=False, pin_memory=True)

In [None]:
def map_per_image(label, predictions):
    """Computes the precision score of one image.

    Parameters
    ----------
    label : string
            The true label of the image
    predictions : list
            A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """    
    try:
        return 1 / (predictions[:5].index(label) + 1)
    except ValueError:
        return 0.0

def map_per_set(labels, predictions):
    """Computes the average over multiple images.

    Parameters
    ----------
    labels : list
             A list of the true labels. (Only one true label per images allowed!)
    predictions : list of list
             A list of predicted elements (order does matter, 5 predictions allowed per image)

    Returns
    -------
    score : double
    """
    return np.mean([map_per_image(l, p) for l,p in zip(labels, predictions)])

In [None]:
def PredictGrid(train_cnn_predictions,valid_cnn_predictions, train_labels, valid_labels,new_individual_thres):
    neigh = NearestNeighbors(n_neighbors=CONFIG["neigh"],metric="cosine")
    neigh.fit(train_cnn_predictions)
    
    distances,idxs = neigh.kneighbors(valid_cnn_predictions, return_distance=True)
    conf = 1-distances
    preds=[]

    for j in range(len(idxs)):
        preds.append(list(train_labels[idxs[j]]))

        

    allTop5Preds=[]
    valid_labels_list=[]
    for i in range(len(preds)):
        valid_labels_list.append((valid_labels[i]))

        predictTop = preds[i][:5]
        Top5Conf = conf[i][:5]

        if Top5Conf[0] < new_individual_thres:
           
            tempList=['new_individual',predictTop[0],predictTop[1],predictTop[2],predictTop[3]]
            allTop5Preds.append(tempList)   
           
        elif Top5Conf[1] < new_individual_thres:
   
            tempList=[predictTop[0],'new_individual',predictTop[1],predictTop[2],predictTop[3]]
            allTop5Preds.append(tempList)    
           
        elif Top5Conf[2] < new_individual_thres:

            tempList=[predictTop[0],predictTop[1],'new_individual',predictTop[2],predictTop[3]]
            allTop5Preds.append(tempList)    
           
        elif Top5Conf[3] < new_individual_thres:
           
            tempList=[predictTop[0],predictTop[1],predictTop[2],'new_individual',predictTop[3]]        
            allTop5Preds.append(tempList)  
           
        elif Top5Conf[4] < new_individual_thres:

            tempList=[predictTop[0],predictTop[1],predictTop[2],predictTop[3],'new_individual']        
            allTop5Preds.append(tempList)        
           
        else:
            allTop5Preds.append(predictTop)

        if (('new_individual' in allTop5Preds[-1]) and (valid_labels_list[i] not in train_labels)):
            allTop5Preds[-1] = [valid_labels_list[i] if x=='new_individual' else x for x in allTop5Preds[-1]]

    score = map_per_set(valid_labels_list,allTop5Preds)

    return score

**We are training a NearestNeighbors algorithm on the data that had been use as trainset in the train kernel and test the results on the validation data from the original kernel** (https://www.kaggle.com/vladvdv/pytorch-train-notebook-arcface-gem-pooling/notebook  )

In [None]:
df_train_cnn_predictions = np.array(inference(model, train_loader, CONFIG['device']))
df_valid_cnn_predictions = np.array(inference(model, valid_loader, CONFIG['device']))
train_cnn_labels = np.array(df_train_cnn['individual_id'].values)
valid_cnn_labels = np.array(df_valid_cnn['individual_id'].values)


In [None]:
iteration=0
best_score = 0
best_thres = 0
for thres in np.arange(0.1,0.9,0.1):
    print("iteration ",iteration," of ",len(np.arange(0.3,0.9,0.1)))        
    iteration+=1
    score = PredictGrid(df_train_cnn_predictions,df_valid_cnn_predictions, train_cnn_labels, valid_cnn_labels,new_individual_thres = thres)
    if (score > best_score):
        best_score = score
        best_thres = thres
    print("thres: ",thres,",score: ", score)
print("Best score is: ", best_score)
print("Best thres is: ", best_thres)

In [None]:
def GetSubmission(train_data,valid_data, train_labels,neighbors=100,metric='cosine', new_individual_thres=0.6):

    neigh = NearestNeighbors(n_neighbors=neighbors,metric=metric)
    neigh.fit(train_data)
    distances,idxs = neigh.kneighbors(valid_data, return_distance=True)
    conf = 1-distances
    preds=[]
    df = pd.read_csv("../input/happy-whale-and-dolphin/sample_submission.csv")
    for i in range(len(idxs)):
        preds.append(train_labels[idxs[i]])
        predictTopDecoded={}
    for i in range(len(distances)):
        
        predictTop= list(preds[i][:5])
        topValues = conf[i][:5]

        if topValues[0] < new_individual_thres:
            
            tempList=['new_individual',predictTop[0],predictTop[1],predictTop[2],predictTop[3]]
            predictTopDecoded[df.iloc[i]['image']] = tempList  
            
        elif topValues[1] < new_individual_thres:
    
            tempList=[predictTop[0],'new_individual',predictTop[1],predictTop[2],predictTop[3]]
            predictTopDecoded[df.iloc[i]['image']] = tempList     
            
        elif topValues[2] < new_individual_thres:

            tempList=[predictTop[0],predictTop[1],'new_individual',predictTop[2],predictTop[3]]
            predictTopDecoded[df.iloc[i]['image']] = tempList   
            
        elif topValues[3] < new_individual_thres:
            
            tempList=[predictTop[0],predictTop[1],predictTop[2],'new_individual',predictTop[3]]        
            predictTopDecoded[df.iloc[i]['image']] = tempList  
            
        elif topValues[4] < new_individual_thres:

            tempList=[predictTop[0],predictTop[1],predictTop[2],predictTop[3],'new_individual']        
            predictTopDecoded[df.iloc[i]['image']] = tempList         
            
        else:
          predictTopDecoded[df.iloc[i]['image']] = predictTop  
             
    for x in tqdm(predictTopDecoded):
        predictTopDecoded[x] = ' '.join(predictTopDecoded[x])
    
    predictions = pd.Series(predictTopDecoded).reset_index()
    predictions.columns = ['image','predictions']
    predictions.to_csv('submission.csv',index=False)
    predictions.head()

In [None]:
test_cnn_predictions =  np.array(inference(model, test_loader, CONFIG['device']))
allTrainData = np.concatenate((df_train_cnn_predictions,df_valid_cnn_predictions))
allTrainingLabels = np.concatenate((train_cnn_labels,valid_cnn_labels)) 
GetSubmission(allTrainData,test_cnn_predictions, allTrainingLabels,neighbors=CONFIG["neigh"],metric='cosine', new_individual_thres=best_thres)