In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics

import os
import path

In [None]:
import math

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [None]:
oof_values = pd.read_csv('../input/swim-classify/oof_df.csv') 
#oof_values

In [None]:
oof_values['preds'] = oof_values['preds'].apply(sigmoid)
oof_values['preds'] = oof_values['preds'] * 100

In [None]:
cv_score = metrics.mean_squared_error(oof_values['Pawpularity'], oof_values['preds'], squared = False)
cv_score

In [None]:
err = []
for row in zip(oof_values['Pawpularity'], oof_values['preds']):
    err.append(metrics.mean_squared_error([row[0]], [row[1]], squared = False))
    
oof_values['error'] = err

display(oof_values.head())

In [None]:
def make_ranges(value):
    if value <= 10:
        return 1
    elif value > 10 and value <= 20:
        return 2
    elif value > 20 and value <= 30:
        return 3
    elif value > 30 and value <= 40:
        return 4
    elif value > 40 and value <= 50:
        return 5
    elif value > 50 and value <= 60:
        return 6
    elif value > 60 and value <= 70:
        return 7
    elif value > 70 and value <= 80:
        return 8
    elif value > 80 and value <= 90:
        return 9
    elif value > 90:
        return 10

oof_values['range'] = oof_values['Pawpularity'].apply(make_ranges)

In [None]:
sns.catplot(x = 'range', y = 'error', data = oof_values)

In [None]:
for i in range(1, 11):
    print(oof_values['error'][oof_values.range == i].median())
#seems to predict well the ranges of 20 - 50 Pawpularity

In [None]:
MAIN_PATH = '../input/stanford-dogs-dataset/images/Images'
main_categories = list(os.listdir(MAIN_PATH))

In [None]:
#len(os.listdir(os.path.join(MAIN_PATH,main_categories[0])))
total_images = 0
for dir in os.listdir(MAIN_PATH):
    total_images += len(os.listdir(os.path.join(MAIN_PATH,dir)))
print(total_images)

# Inference on external data

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import os
import glob
import random
import cv2
pd.set_option('display.max_columns', None)

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

RANDOM_SEED = 42

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f'Using device: {device}')

In [None]:
# ====================================================
# CFG
# ====================================================
class CFG:
    num_workers = 4
    size = 224
    batch_size = 10
    model_name = 'swin_base_patch4_window7_224'
    seed = 42
    target_size = 1
    target_col = 'Pawpularity'

In [None]:
def get_test_transforms(DIM = CFG.size):
    return albumentations.Compose(
        [
          albumentations.Resize(DIM,DIM),
          albumentations.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
          ),
          ToTensorV2(p=1.0)
        ]
    )

In [None]:
class CuteDataset(Dataset):
    def __init__(self, images_filepaths, targets, transform=None):
        self.images_filepaths = images_filepaths
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        label = torch.tensor(self.targets[idx]).float()
        return image, label

In [None]:
class PetNet(nn.Module):
    def __init__(self, model_name = CFG.model_name, out_features = 1, input_channels = 3, pretrained = False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained = pretrained)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, 128)
        self.fc = nn.Sequential(
                  nn.Linear(128, 64),
                  nn.SiLU(),
                  nn.Linear(64, 1)
        )
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, image):
        image_embeds = self.model(image)
        x = self.dropout(image_embeds)
        z = self.fc(x)
        return z

In [None]:
#full set of images

# MAIN_PATH = '../input/stanford-dogs-dataset/images/Images'
# test_images = []
# for dir in os.listdir(MAIN_PATH):
#     test_images += os.listdir(os.path.join(MAIN_PATH,dir))

In [None]:
all_dirs = os.listdir(MAIN_PATH)
first_dir = all_dirs[0]
test_images = []

test_images += os.listdir(os.path.join(MAIN_PATH,first_dir))

In [None]:
all_test_images = []
for dir in os.listdir(MAIN_PATH):
    for image in os.listdir(os.path.join(MAIN_PATH,dir)):
        added_path = os.path.join(MAIN_PATH, dir, image)
        all_test_images.append(str(added_path))
#all_test_images

In [None]:
# def add_paths(input):
#     return os.path.join(MAIN_PATH, first_dir, input)

In [None]:
test_df = pd.DataFrame()
test_df['image_path'] = all_test_images
#test_df['image_path'] = test_df['image_path'].apply(lambda x : add_paths(x))
display(test_df.head(5))

In [None]:
models_dir = '../input/swim-classify'
predicted_labels = None
for model_name in glob.glob(models_dir + '/*.pth'):
    model = PetNet()
    state = torch.load(model_name, 
                       map_location=torch.device('cpu'))['model']
    model.load_state_dict(state)
    model = model.to(device)
    model.eval()
    
    test_dataset = CuteDataset(
        images_filepaths = test_df['image_path'].values,
        targets = [0] * len(test_df['image_path'].values),
        transform = get_test_transforms()
    )
    test_loader = DataLoader(
        test_dataset, batch_size = CFG.batch_size,
        shuffle=False, num_workers = CFG.num_workers,
        pin_memory=True
    )
    
    temp_preds = None
    
    with torch.no_grad():
        for (images, target) in tqdm(test_loader, desc = f'Predicting.'):
            images = images.to(device)
            predictions = torch.sigmoid(model(images)).to('cpu').numpy()*100
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))

    if predicted_labels is None:
        predicted_labels = temp_preds
    else:
        predicted_labels += temp_preds
        
predicted_labels /= (len(glob.glob(models_dir + '/*.pth')))

In [None]:
test_df['ext_predictions'] = predicted_labels
test_df.head()

In [None]:
plt.hist(test_df['ext_predictions'])

In [None]:
test_df.to_csv('external_stanforddata.csv', index = False)