# Pretrained SWIN Transformer (Inference)

**Description:** Millions of stray animal suffer on the streets or euthanized in shelters every day around the world. A good picture of homeless animal might increase their chance of getting adopted. But what makes a good picture? Our mission is to build a ML model which is able to accurately determine a pet photo's appeal and even suggest improvements to give these rescue animals a higher chance of loving homes.

This competition is organized by PetFinder.my. They are Malaysia's leading animal welfare platform, featuring 180,000 animals with 54,000 happily adopted. If we can developed a model that able to provide accurate recommendations, our model will be adapted into AI tools that will guide shelters and rescuers around the world to improve the photo quality of their shelter pet. Which in the end will increase the chances of stray animals getting adopted.

**Data:** 9912 images of pet animals labeled with "Pawpularity". Photo Metadata = (Focus, Eyes, Face, Near, Action, Accessory, Group, Collage, Human, Occlusion, Info, Blur)

![](https://pbs.twimg.com/media/CvhLlXxXgAA5TDJ.jpg)


# Introduction

This notebook is to run model that I have trained in other notebook. Please refer the the baseline notebook link for details.

Baseline notebook: 

Refered notebook: https://www.kaggle.com/manabendrarout/transformers-classifier-method-starter-infer

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import os
import path
import random
import cv2
import timm
import gc
import albumentations
from albumentations.pytorch.transforms import ToTensorV2
from tqdm import tqdm
from collections import defaultdict

# Import PyTorch Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

# Deciding the device used for calculation. CUDA = GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
csv_dir = '../input/petfinder-pawpularity-score'
test_dir = '../input/petfinder-pawpularity-score/test'

test_file_path = os.path.join(csv_dir, 'test.csv')
sample_sub_file_path = os.path.join(csv_dir, 'sample_submission.csv')
print(f'Test file: {test_file_path}')

In [None]:
test_df = pd.read_csv(test_file_path)
sample_df = pd.read_csv(sample_sub_file_path)

In [None]:
def return_filpath(name, folder):
    path = os.path.join(folder, f'{name}.jpg')
    return path

In [None]:
test_df['image_path'] = test_df['Id'].apply(lambda x: return_filpath(x, folder=test_dir))

In [None]:
test_df.head()

In [None]:
target = ['Pawpularity']
not_features = ['Id', 'kfold', 'image_path', 'Pawpularity']
cols = list(test_df.columns)
features = [feat for feat in cols if feat not in not_features]
print(features)

# Params

In [None]:
params = {
    'model': 'swin_large_patch4_window12_384',
    'features': features,
    'pretrained': False,
    'inp_channels': 3,
    'im_size': 384,
    'device': device,
    'batch_size': 8,
    'num_workers' : 2,
    'out_features': 1,
    'debug': False
}

In [None]:
if params['debug']:
    test_df = test_df.sample(frac=0.1)

# Augmentations

In [None]:
def get_test_transforms(DIM = params['im_size']):
    return albumentations.Compose(
        [
          albumentations.Resize(DIM,DIM),
          albumentations.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
          ),
          ToTensorV2(p=1.0)
        ]
    )

# Dataset

In [None]:
class CuteDataset(Dataset):
    def __init__(self, images_filepaths, dense_features, targets, transform=None):
        self.images_filepaths = images_filepaths
        self.dense_features = dense_features
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        dense = self.dense_features[idx, :]
        label = torch.tensor(self.targets[idx]).float()
        return image, dense, label

# CNN Model

In [None]:
class PetNet(nn.Module):
    def __init__(self, model_name=params['model'], pretrained=params['pretrained'], features=len(params['features']) ):
        super().__init__()
        self.model = timm.create_model(model_name=model_name, pretrained=pretrained, in_chans=3)
        # Replaced the final head layers in model with our own Linear layer
        num_features = self.model.head.in_features
        self.model.head = nn.Linear(num_features, 128)
        self.fully_connect = nn.Sequential(nn.Linear(128 + features, 64),
                                           nn.ReLU(),
                                           nn.Linear(64, 1)
                                          )
        self.dropout = nn.Dropout(p=0.5)
    
    def forward(self, image, features):
        x = self.model(image)
        # Using dropout functions to randomly shutdown some of the nodes in hidden layers to prevent overfitting.
        x = self.dropout(x)
        # Concatenate the metadata into the results.
        x = torch.cat([x, features], dim=1)
        output = self.fully_connect(x)
        return output

In [None]:
# class PetNet2(nn.Module):
#     def __init__(self, model_name="tf_efficientnet_b0_ns", pretrained=False, features=len(params['features']) ):
#         super().__init__()
#         self.model = timm.create_model(model_name=model_name, pretrained=pretrained, in_chans=3)
#          # Replace the classifier layers in model with our own Linear layer
#         num_features = self.model.classifier.in_features
#         self.model.classifier = nn.Linear(num_features, 128)
#         self.fully_connect = nn.Sequential(nn.Linear(128 + features, 64),
#                                            nn.ReLU(),
#                                            nn.Linear(64, 1)
#                                           )
#         self.dropout = nn.Dropout(p=0.4)
    
#     def forward(self, image, features):
#         x = self.model(image)
#         x = self.dropout(x)
#         x = torch.cat([x, features], dim=1)
#         output = self.fully_connect(x)
#         return output


# Prediction

In [None]:
predictions_nn = None
for model_name in range(4):
    model = PetNet()
    model.load_state_dict(torch.load(f"../input/swin-transformer-3rd-model/swin_large_patch4_window12_384_epoch_f{model_name}.pth"))
    model.eval()
    model.to(device)

    test_dataset = CuteDataset(
        images_filepaths = test_df['image_path'].values,
        dense_features = test_df[params['features']].values,
        targets = sample_df['Pawpularity'].values,
        transform = get_test_transforms()
    )
    test_loader = DataLoader(
        test_dataset, batch_size=params['batch_size'],
        shuffle=False, num_workers=params['num_workers'],
        pin_memory=True
    )

    temp_preds = None
    with torch.no_grad():
        for (images, dense, target) in tqdm(test_loader, desc=f'Predicting. '):
            images = images.to(device)
            dense = dense.to(device)
            predictions = torch.sigmoid(model(images, dense)).to('cpu').numpy()*100
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))
            print(temp_preds)

    if predictions_nn is None:
        predictions_nn = temp_preds
    else:
        predictions_nn += temp_preds
        
predictions_nn /= 4
print(predictions_nn)

In [None]:
predictions_nn_2 = None
for model_name in range(8):
    model = PetNet()
    model.load_state_dict(torch.load(f"../input/swin-transform-2nd-model/swin_large_patch4_window12_384_epoch_f{model_name}.pth"))
    model.eval()
    model.to(device)

    test_dataset = CuteDataset(
        images_filepaths = test_df['image_path'].values,
        dense_features = test_df[params['features']].values,
        targets = sample_df['Pawpularity'].values,
        transform = get_test_transforms()
    )
    test_loader = DataLoader(
        test_dataset, batch_size=params['batch_size'],
        shuffle=False, num_workers=params['num_workers'],
        pin_memory=True
    )

    temp_preds = None
    with torch.no_grad():
        for (images, dense, target) in tqdm(test_loader, desc=f'Predicting. '):
            images = images.to(device)
            dense = dense.to(device)
            predictions = torch.sigmoid(model(images, dense)).to('cpu').numpy()*100
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))
            print(temp_preds)

    if predictions_nn_2 is None:
        predictions_nn_2 = temp_preds
    else:
        predictions_nn_2 += temp_preds
        
predictions_nn_2 /= 8
print(predictions_nn_2)

# Submission

In [None]:
sub_df = pd.DataFrame()
sub_df['Id'] = test_df['Id']
sub_df['Pawpularity'] = (predictions_nn + predictions_nn_2) / 2
print(sub_df['Pawpularity'])

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv', index=False)