In [None]:
import sys
sys.path.append('../input/saved-weights/ConvNeXt/ConvNeXt')
sys.path.append("../input/saved-weights/pytorch-image-models/pytorch-image-models")

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from torchvision import transforms
from torch import nn
from PIL import Image
from albumentations.pytorch import ToTensorV2
import albumentations.pytorch
import torchvision
import timm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from fastai.vision.all import *
from fastai.data.core import *
import gc
import random
import cuml, pickle
from cuml.svm import SVR
import models.convnext as cn_next

# Setting Param

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
base_dir = '/kaggle/input'
model_weights = os.path.join(base_dir, 'saved-weights')
test_folder = os.path.join(base_dir, 'petfinder-pawpularity-score', 'test')
test_file = os.path.join(base_dir, 'petfinder-pawpularity-score', 'test.csv')

In [None]:
##### input_shape = (224, 224, 3)
mean, std_dev = [0.5023, 0.4615, 0.4226], [0.2640, 0.2593, 0.2575]
device = 'cuda' if torch.cuda.is_available() else 'cpu'
N_FOLDS = 5
embed_dim = 128
num_of_hidden = 2
batch_size = 64
hidden_dimension = [256, 64]
input_shape_224 = (224, 224, 3)
input_shape_384 = (384, 384, 3)
max_size_384 = 480
max_size_224 = 384
# Model Name: Batch_size for unfreezed
models_list = {'convnext_base': 128,
               'conv_next': 64,
               #'beit_base_patch16_224_in22k': 128,
               #'swin_large_patch4_window12_384_in22k': 64,
               #'swin_base_patch4_window7_224_in22k': 128, 
               #'swin_large_patch4_window7_224_in22k': 64  
              }
save_name = '/kaggle/working/'

weights_svr = {'beit_base_patch16_224_in22k': [0.5,0.5], 
               'swin_large_patch4_window12_384_in22k': [0.1, 0.9],
               'swin_base_patch4_window7_224_in22k': [0.5,0.5],
               'swin_large_patch4_window7_224_in22k': [0.4, 0.6],
               'conv_next': [0.2, 0.8],
               'convnext_base': [0.2, 0.8]}

model_weights = {'beit_base_patch16_224_in22k': 0.0, 
               'swin_large_patch4_window12_384_in22k': 0.0,
               'swin_base_patch4_window7_224_in22k': 0.0,
               'swin_large_patch4_window7_224_in22k': 0.0,
               'convnext_base': 0.3,
               'conv_next': 0.7,}

# Dataset

In [None]:
test_csv = pd.read_csv(test_file)
test_csv.head()

In [None]:
test_csv['path_img'] = list(map(lambda x: os.path.join(test_folder, x+'.jpg'), test_csv['Id']))
test_csv['Pawpularity'] = [1]*len(test_csv)

In [None]:
class PetsDataset(Dataset):
    
    def __init__(self, df, transform = None):
        self.transform = transform
        self.df = df

        self.cat = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action',
       'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df['path_img'].iloc[idx]
        label_1 = self.df['Pawpularity'].iloc[idx]
        img = Image.open(img_path)
        if self.transform:
            album = self.transform(image = np.array(img))
            img = album['image']


        df_data = self.df[self.cat].iloc[idx].values
        
        return (img, df_data, label_1)

In [None]:
def get_data(batch_size, max_size, input_shape):
    
    test_transform = A.Compose([A.LongestMaxSize(max_size=max_size, interpolation=1),
                             A.PadIfNeeded(min_height=input_shape[0], min_width=input_shape[1], border_mode=0, 
                                           value=(0,0,0)),
                             A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
                             A.transforms.ColorJitter(brightness = 0.1, contrast = 0.1, saturation = 0.1, 
                                                      hue = 0.1, p =0.6),
                             A.CenterCrop(height = input_shape[0], width = input_shape[1]),
                             A.HorizontalFlip(p=0.6),
                             A.Normalize(mean, std_dev),
                             ToTensorV2(),
                             ])
    test_dataset = PetsDataset(test_csv, test_transform)
    testloader = DataLoader(test_dataset, batch_size = batch_size, num_workers = 4, shuffle=False)
    
    dls = DataLoaders.from_dsets(test_dataset, bs = batch_size)
    
    return dls, testloader


# Model 

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

class Network(nn.Module):
    def __init__(self, base, number_of_hidden, hidden, regression_out, output_categories, freeze_layer):
        super(Network, self).__init__()
#         print(number_of_hidden, hidden)
        if (number_of_hidden != len(hidden)):
            raise "Number of Hidden layer and length of hidden dim must be same"
            
        
        hidden_dim = hidden[:]
        hidden_dim.insert(0, embed_dim+12)
        
        base.head = nn.Linear(base.head.in_features, embed_dim)
        nn.init.kaiming_normal_(base.head.weight)
        nn.init.constant_(base.head.bias, 0)
        
        self.p = 0.5

        self.regression = self.__fully_connected(number_of_hidden, hidden_dim, regression_out)
        
        self.network = self.__freeze_layer(base, freeze_layer)

        self.__initialise_weights()

    def __initialise_weights(self):
        for m in self.regression:
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
              
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
                

    def __freeze_layer(self, base, freeze_layer):
        cnt = 0
        for child in base.children():
            cnt+=1
            if cnt > freeze_layer:
                break

            for param in child.parameters():
                param.requires_grad = False
        return base


    def __fully_connected(self, number_of_hidden, hidden_dim, output_categories):
        layers = []
        layers.append(nn.Dropout(self.p))
        for i in range(number_of_hidden):
            layers.append(nn.Linear(hidden_dim[i], hidden_dim[i+1]))
            layers.append(nn.GELU())
            layers.append(nn.BatchNorm1d(hidden_dim[i+1]))
            if i != (number_of_hidden-1):
                layers.append(nn.Dropout(self.p))

        layers.append(nn.Linear(hidden_dim[-1], output_categories))
    
        return nn.Sequential(*layers)
        
    def forward(self,x, tab):
        x1 = self.network(x)
        x = torch.cat([x1, tab], dim=1)
        reg = self.regression(x)
        return reg,x

In [None]:
def get_learner(model_name, batch_size, loss, metric, max_size, input_size, conv_next, save_path):
    dls, testloader = get_data(batch_size,max_size, input_size)
    dls = dls.to(device)
    if conv_next:
        if model_name == 'convnext_base':
            network = cn_next.convnext_base(pretrained = False)
            model = Network(network, num_of_hidden, hidden_dimension, 1, 11, 0)
        else:
            network = cn_next.convnext_large(pretrained = False)
            model = Network(network, num_of_hidden, hidden_dimension, 1, 11, 0)
    else:
        network = timm.create_model(model_name, pretrained = False)
        model = Network(network, num_of_hidden, hidden_dimension, 1, 11, 0)
        
    model = model.to(device)
    learn = Learner(dls, model, loss_func=loss, metrics=metric, 
                    model_dir = save_path).to_fp16()
    return learn, testloader

In [None]:
def tta(testloader, learn, svr, svr_weight, tta_steps = 4):
    tta_outputs = []
    tta_steps = 4
    learn.model.eval()
    for i in range(tta_steps):
        final_outputs = []
        svr_data = []
        with torch.no_grad():
            for images, tabular, _ in testloader:
                output_list = []
                images, tabular = images.to(device), tabular.to(device)
                reg_output, embed = learn.model(images, tabular)
                reg_output = 100*torch.sigmoid(reg_output)
                output = reg_output.detach().to('cpu').numpy().reshape(-1,).tolist()
                final_outputs.extend(output)
                svr_data.extend(embed.cpu().numpy())
                
            final_outputs = np.array(final_outputs)
            svr_data = np.array(svr_data)
            svr_preds = clf.predict(svr_data)
            final_outputs = svr_weight[0] * svr_preds + svr_weight[1] * final_outputs

        tta_outputs.append(final_outputs)
    tta_outputs_arr = np.mean(np.array(tta_outputs), axis = 0)
    return tta_outputs_arr

In [None]:
final_predictions = []
for model_name, bs in models_list.items():
    m_name = model_name[:model_name.find('_', 5)]
    
    fold_tta = []
    svr_weights = weights_svr[model_name]
    w = model_weights[model_name]
    m_384 = False
    conv_next = False
    if model_name.find('384')!=-1:
        m_384 = True
        m_name = m_name+'_384'
        
    if model_name.find('conv') != -1:
        conv_next = True
        m_name = model_name
        m_384 = True

    print("#################################")
    print("Testing Model: {}".format(m_name))
    print("#################################")
    for fold in range(N_FOLDS):
        print("Fold: {}".format(fold))
        saved_name = m_name+"_fold_{}_full.pth".format(fold)
        if conv_next:
            svr_name = m_name+"_svr_fold_{}_full.pkl".format(fold)
        else:
            svr_name = m_name+"_svr_fold_{}_full.pkl".format(fold)
        if m_384:
            learn, testloader = get_learner(model_name, bs, BCEWithLogitsLossFlat(), 
                            petfinder_rmse, max_size_384, input_shape_384, conv_next, save_name) 
            
        else:
            learn, testloader = get_learner(model_name, bs, BCEWithLogitsLossFlat(), 
                            petfinder_rmse, max_size_224, input_shape_224, conv_next, save_name) 
            
        learn.load(os.path.join('/kaggle/input/saved-weights', saved_name))
        clf = pickle.load(open(os.path.join('/kaggle/input/saved-weights', svr_name), "rb"))
        
        fold_tta.append(tta(testloader, learn, clf,svr_weights))
        
        del learn
        torch.cuda.empty_cache()
        gc.collect()
        
    fold_tta = np.array(fold_tta)
    fold_tta = np.mean(np.array(fold_tta), axis = 0)
    final_predictions.append(w*fold_tta)

In [None]:
final_predictions = np.array(final_predictions)

In [None]:
final_predictions = np.sum(final_predictions, axis = 0)

In [None]:
test_csv['Pawpularity'] = final_predictions

In [None]:
test_csv = test_csv[["Id", "Pawpularity"]]

In [None]:
test_csv.head()

In [None]:
test_csv.to_csv("submission.csv", index=False)