In [None]:
import sys
sys.path.append('../input/timm-3monthsold/pytorch-image-models-master 2')
from timm import create_model
from timm.data.mixup import Mixup

In [None]:
from fastai.vision.all import *
import matplotlib.pyplot as plt

In [None]:
set_seed(365, reproducible=True)
BATCH_SIZE = 64

In [None]:
dataset_path = Path('../input/petfinder-pawpularity-score/')

In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')

In [None]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe

In [None]:
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

In [None]:
seed=365
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

In [None]:
import math
#Rice rule
num_bins = int(np.ceil(2*((len(train_df))**(1./3))))
num_bins

In [None]:
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_df['fold'] = -1

N_FOLDS = 10
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
def get_data(fold, seed=365):
#     train_df_no_val = train_df.query(f'fold != {fold}')
#     train_df_val = train_df.query(f'fold == {fold}')
    
#     train_df_bal = pd.concat([train_df_no_val,train_df_val.sample(frac=1).reset_index(drop=True)])
    train_df_f = train_df.copy()
    strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
    for i, (_, train_index) in enumerate(strat_kfold.split(train_df_f.index, train_df_f['bins'])):
        train_df_f.iloc[train_index, -1] = i
    train_df_f['fold'] = train_df_f['fold'].astype('int')

    # add is_valid for validation fold
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
#                                valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=365, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
    
    return dls

In [None]:
def get_learner(fold_num, seed=365, mixup=True):
    data = get_data(fold_num)
    
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c)

    if mixup:
        learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse, cbs=[MixUp(0.2)]).to_fp16()
    else:
        learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse).to_fp16()
    
    return learn

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')

In [None]:
test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
import gc

In [None]:
# 365, 999 - no mixup
# 666, 970, 337 - mixup

seeds = [365, 999, 666, 970, 337]
all_preds = list()

for i in range(10):
    for seed in seeds:

        learn = get_learner(fold_num=0)
        learn.path = Path('../')
        learn.model_dir = Path('input/pawpularity-regmods')
        #learn.load(f'model_fold_{i}_seed_{seed}.pkl')
        
        learn = load_learner(f'../input/pawpularity-regmods/model_fold_{i}_seed_{seed}.pkl', cpu=False)
        
        dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                                       valid_pct=0.2, #80-20 train-validation random split
                                       seed=365, #seed
                                       fn_col='path', #filename/path is in the second column of the DataFrame
                                       label_col='norm_score', #label is in the first column of the DataFrame
                                       y_block=RegressionBlock, #The type of target
                                       bs=BATCH_SIZE, #pass in batch size
                                       num_workers=8,
                                       item_tfms=Resize(224), #pass in item_tfms
                                       batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) 
        
        test_dl = dls.test_dl(test_df)
        
        preds, _ = learn.tta(dl=test_dl, n=3, beta=0)
        
        all_preds.append(preds)
        
        del learn

        torch.cuda.empty_cache()

In [None]:
ROUNDING = True

sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
preds = np.mean(np.stack(all_preds), axis=0) * 100

if ROUNDING is True:
    dec = preds % 1
    to_round = (dec<=0.2)|(dec>=0.8)
    preds[to_round] = np.round(preds[to_round])

sample_df['Pawpularity'] = preds    
sample_df.to_csv('submission.csv',index=False)

In [None]:
sample_df.head()