In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import timm
from tqdm import tqdm
import math
import random
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

# Visuals and CV2
import cv2

# albumentations for augs
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch
import timm
import torch.nn as nn
from torch.nn import Parameter
from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader

from torchvision import datasets, transforms

# 1. Config

In [None]:
IM_FOLDER = '../input/shopee-product-matching/test_images'
MODEL_PATH = '../input/shopeekfoldevaluation/kfolds_strategies_evaluation/kfolds_strategies_evaluation/train_n_lbgr_88_3fold/Fold02_Valid0.725_Train0.724_Ep003.pth'

FOIS = [0, 1, 2]
SAMPLE = None

DIM = (512,512)
EMB_SIZE = 1536

NUM_WORKERS = 4
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 16
SEED = 2020
LR = 3e-4

################################################# MODEL ####################################################################
model_name = 'efficientnet_b3' #efficientnet_b0-b7

################################################ Metric Loss and its params #######################################################
loss_module = 'arcface' #'cosface' #'adacos'
s = 30.0
m = 0.5 
ls_eps = 0.0
easy_margin = False

############################################## Model Params ###############################################################
model_params = {
    'model_name':'efficientnet_b3',
    'use_fc':False,
    'fc_dim':512,
    'dropout':0.0,
    'loss_module':loss_module,
    's':30.0,
    'margin':28.6, # degree (0.5 radian)
    'ls_eps':0.0,
    'theta_zero':0.785,
    'pretrained':None
}

########### Device ###########
DEVICE = torch.device("cuda")

In [None]:
def get_valid_transforms():

    return albumentations.Compose(
        [
            albumentations.Resize(DIM[0],DIM[1],always_apply=True),
            albumentations.Normalize(),
        ToTensorV2(p=1.0)
        ]
    )

In [None]:
class ShopeeDataset(Dataset):
    def __init__(self, csv, transforms=None):

        self.csv = csv.reset_index()
        self.augmentations = transforms
        
        if('label_group' in self.csv.columns):
            self.is_test = False
        else:
            self.is_test = True

    def __len__(self):
        return self.csv.shape[0]

    def __getitem__(self, index):
        row = self.csv.iloc[index]
        
        text = row.title
        
        image = cv2.imread(row.filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations:
            augmented = self.augmentations(image=image)
            image = augmented['image']       
        
        if(self.is_test):
            return image
        else:
            return image, torch.tensor(row.label_group)

In [None]:
class ShopeeNet(nn.Module):

    def __init__(self,
                 model_name='efficientnet_b0',
                 use_fc=False,
                 fc_dim=512,
                 dropout=0.0,
                 loss_module='softmax',
                 s=30.0,
                 margin=0.50,
                 ls_eps=0.0,
                 theta_zero=0.785,
                 pretrained=None):
        """
        :param n_classes:
        :param model_name: name of model from pretrainedmodels
            e.g. resnet50, resnext101_32x4d, pnasnet5large
        :param loss_module: One of ('arcface', 'cosface', 'softmax')
        """
        super(ShopeeNet, self).__init__()
        print('Building Model Backbone for {} model'.format(model_name))

        self.backbone = timm.create_model(model_name, pretrained=False)
        if(pretrained):
            print('Loading pretrained model from:', pretrained)
            self.backbone.load_state_dict(torch.load(pretrained, map_location='cpu'))
            
        final_in_features = self.backbone.classifier.in_features
        
        self.backbone.classifier = nn.Identity()
        self.backbone.global_pool = nn.Identity()
        
        self.pooling =  nn.AdaptiveAvgPool2d(1)
            
        self.use_fc = use_fc
        if use_fc:
            print('use_fc')
            self.dropout = nn.Dropout(p=dropout)
            self.fc = nn.Linear(final_in_features, fc_dim)
            self.bn = nn.BatchNorm1d(fc_dim)
            self._init_params()
            final_in_features = fc_dim

    def _init_params(self):
        nn.init.xavier_normal_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
        nn.init.constant_(self.bn.weight, 1)
        nn.init.constant_(self.bn.bias, 0)

    def forward(self, x):
        batch_size = x.shape[0]
        x = self.backbone(x)
        x = self.pooling(x).view(batch_size, -1)

        if self.use_fc:
            x = self.dropout(x)
            x = self.fc(x)
            x = self.bn(x)
        
        return x

# 2. Load model

In [None]:
# Test loading model properly
model = ShopeeNet(**model_params)
model.to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))

# 3. Predict

In [None]:
def getMetric(col):
    def f1score(row):
        n = len(np.intersect1d(row.target,row[col]))
        return 2*n / (len(row.target)+len(row[col]))
    return f1score

def compute_f1(df, pred_col='preds'):
    res_df = df.copy()
    target_dict = res_df.groupby('label_group').posting_id.agg('unique').to_dict()
    res_df['target'] = res_df.label_group.map(target_dict)
    res_df['f1'] = res_df.apply(getMetric(pred_col),axis=1)
    return res_df.f1.mean()

In [None]:
def predict_fn(dataloader, df, model, k=50, metric='cosine', threshold=0.4):
    emb_list = []
    for images in tqdm(dataloader):
        if(type(images) == tuple or type(images) == list):
            images = images[0]
        images = images.to(DEVICE)
        embeddings = model(images).detach().cpu().numpy()
        
        # l2 norm
        embeddings /= np.linalg.norm(embeddings, 2, axis=1, keepdims=True)
        
        emb_list.append(embeddings)
        
    emb_vectors = np.vstack(emb_list)
    print(emb_vectors.shape)
    
    model = NearestNeighbors(n_neighbors=k, metric=metric)
    model.fit(emb_vectors)
    distances, indices = model.kneighbors(emb_vectors)

    res_df = df.copy()
    preds = []
    for dist, inds in tqdm(zip(distances, indices)):
        IDX = np.where(dist<threshold)[0]
        IDS = inds[IDX]
        o = df.iloc[IDS].posting_id.values
        preds.append(o)
            
    res_df['preds'] = preds
    
    return res_df

In [None]:
test_df = pd.read_csv('../input/shopee-product-matching/test.csv')
test_df['filepath'] = test_df['image'].apply(lambda x: os.path.join(IM_FOLDER, x))

test_dataset = ShopeeDataset(csv=test_df, transforms=get_valid_transforms(),)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=VALID_BATCH_SIZE,num_workers=NUM_WORKERS,
                                               shuffle=False,pin_memory=True,drop_last=False)

In [None]:
K = 50 if(50 < len(test_df)) else len(test_df)
    
test_res_df =  predict_fn(test_loader, test_df, model, k=K)

In [None]:
test_res_df['matches'] = test_res_df['preds'].map(lambda x: ' '.join(x.tolist()))

In [None]:
sub = test_res_df[['posting_id', 'matches']]

In [None]:
sub.to_csv('submission.csv', index=False)

In [None]:
# valid_df = pd.read_csv('../input/shopeekfoldevaluation/train_vanila_88_3fold.csv').sort_values('label_group')
# valid_df = valid_df[valid_df.fold==0]
# valid_df['filepath'] = valid_df['image'].apply(lambda x: os.path.join('../input/shopee-product-matching/train_images', x))

# le = LabelEncoder()
# valid_df['label_group'] = le.fit_transform(valid_df.label_group)

# valid_dataset = ShopeeDataset(csv=valid_df, transforms=get_valid_transforms(),)
# valid_loader = torch.utils.data.DataLoader(valid_dataset,batch_size=16,num_workers=NUM_WORKERS,
#                                                shuffle=False,pin_memory=True,drop_last=False)

# valid_res_df = predict_fn(valid_loader, valid_df, model)

# compute_f1(valid_res_df)

In [None]:
# all_df = pd.read_csv('../input/shopeekfoldevaluation/train_vanila_88_3fold.csv').sort_values('label_group')
# all_df['filepath'] = all_df['image'].apply(lambda x: os.path.join('../input/shopee-product-matching/train_images', x))

# le = LabelEncoder()
# all_df['label_group'] = le.fit_transform(all_df.label_group)

# all_dataset = ShopeeDataset(csv=all_df, transforms=get_valid_transforms(),)
# all_loader = torch.utils.data.DataLoader(all_dataset,batch_size=16,num_workers=NUM_WORKERS,
#                                                shuffle=False,pin_memory=True,drop_last=False)

# all_res_df = predict_fn(all_loader, all_df, model)

# compute_f1(all_res_df)

In [None]:
# a = next(iter(all_loader))