In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model

In [None]:
import numpy as np
import pandas as pd
import math
import pickle
import gc

import fastai
from fastai.vision.all import *

import torch
from torch import nn
from torch.nn import functional as F

import torchvision
from torchvision import transforms as T
from torchvision.io import read_image

import timm
from timm import create_model

import sklearn
from sklearn.model_selection import StratifiedKFold

In [None]:
class args:
  folder_name = Path('../input/petfinder-pawpularity-score/')
  seed = 1212
  num_splits = 5
  batch_size = 32
  num_workers = 8
  imagesize = 224
  model_name = 'swin_large_patch4_window7_224'

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

In [None]:
# Load our test data
tstdf = pd.read_csv(args.folder_name/'test.csv')
tstdf['filename'] = tstdf['Id'].map(lambda x:str(args.folder_name/'test'/x)+'.jpg')

df = pd.read_csv(args.folder_name/'train.csv')
df['filename'] = df['Id'].map(lambda x:str(args.folder_name/'train'/x)+'.jpg')

In [None]:
seed = args.seed

device = torch.device("cuda:0")

set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

In [None]:
tstdf['Pawpularity'] = [1]*len(tstdf)
tstdf = tstdf.drop(columns=['Id'])

df = df.drop(columns=['Id'])
df = df.sample(frac=1).reset_index(drop=True)
df['norm_score'] = df['Pawpularity']/100


num_bins = int(np.ceil(2*((len(df))**(1./3))))

df['bins'] = pd.cut(df['norm_score'], bins=num_bins, labels=False)


df['fold'] = -1

strat_kfold = StratifiedKFold(n_splits=args.num_splits, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(df.index, df['bins'])):
    df.iloc[train_index, -1] = i

In [None]:
def rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(torch.sigmoid(input.flatten()), target))

In [None]:
def get_data(fold):

    df_f = df.copy()
    df_f['is_valid'] = (df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(df_f, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=args.seed, #seed
                               fn_col='filename', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=args.batch_size, #pass in batch size
                               num_workers=args.num_workers,
                               item_tfms=Resize(args.imagesize), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), 
                                                          Saturation(), 
                                                          RandomErasing(p=.3, sh =.1, max_count = 2)])) #pass in batch_tfms
    
    return dls

In [None]:
def get_learner(fold_num):
    data = get_data(fold_num)
    
    learn = load_learner(f'../input/fastai-swinl/{fold_num}best_weights.pkl', cpu = False).to_fp16()
    
    return learn

In [None]:
all_preds = []

for i in range(args.num_splits):
    
    learn = get_learner(fold_num=i)
    
    dls = ImageDataLoaders.from_df(df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=args.seed, #seed
                               fn_col='filename', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=args.batch_size, #pass in batch size
                               num_workers=args.num_workers,
                               item_tfms=Resize(args.imagesize), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), 
                                                          Saturation(), 
                                                          RandomErasing(p=.3, sh =.1, max_count = 2)])) 
    
    test_dl = dls.test_dl(tstdf)
    
    preds, _ = learn.tta(dl=test_dl, n = 3, beta = .33333)
    
    all_preds.append(preds)
    
    del learn

    torch.cuda.empty_cache()

    gc.collect()

In [None]:
sample_df = pd.read_csv(args.folder_name/'sample_submission.csv')
preds = np.mean(np.stack(all_preds), axis=0)
sample_df['Pawpularity'] = preds*100
sample_df.to_csv('submission.csv',index=False)

In [None]:
pd.read_csv('submission.csv').head()