In [None]:
import gc
import sys
sys.path.append('/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model
from fastai.vision.all import *
from sklearn.model_selection import StratifiedKFold

VALID_SPLIT = 0.2
SEED = 999
IMAGE_SIZE = 224
BATCH_SIZE = 4
NUM_WORKERS = 8
BACKBONE_NAME = 'swin_large_patch4_window7_224'

set_seed(SEED, reproducible=True)

ROOT_DIR = '../input/petfinder-pawpularity-score/'
MODEL_DIRS = [
    '/kaggle/input/fastai-tuneaugs-large-nowarp',
    '/kaggle/input/fastai-tuneaugs-large-nowarp-139403',
    '/kaggle/input/fastai-tuneaugs-large-nowarp-656463'
]

dataset_path = Path(ROOT_DIR)
train_df = pd.read_csv(dataset_path/'train.csv')
train_df['path'] = train_df['Id'].map(lambda x: str(dataset_path/'train'/x) + '.jpg')
train_df['norm_score'] = train_df['Pawpularity'] / 100

num_bins = int(np.floor(1+np.log2(len(train_df))))
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)
train_df['fold'] = -1

N_FOLDS = 5
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=SEED, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

def get_data(fold, image_size):
    train_df_f = train_df.copy()
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    dls = ImageDataLoaders.from_df(train_df_f,
        valid_col='is_valid',
        seed=SEED,
        fn_col='path',
        label_col='norm_score',
        y_block=RegressionBlock,
        bs=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        item_tfms=Resize(image_size),
        batch_tfms=aug_transforms(max_warp=0, xtra_tfms=[Contrast(), Hue(), Saturation()])
    )
    return dls

os.makedirs('/root/.cache/torch/hub/checkpoints', exist_ok=True)
!cp '/kaggle/input/swin-large-patch4-window7-224/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/'

def petfinder_rmse(input, target):
    return 100 * torch.sqrt(F.mse_loss(torch.sigmoid(input.flatten()), target))

def get_learner(fold, image_size, backbone_name, model_name):
    data = get_data(fold, image_size)
    model = create_model(backbone_name, pretrained=True, num_classes=data.c)
    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=AccumMetric(func=petfinder_rmse), model_dir=model_name).to_fp16()
    return learn

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')

all_results = []
for model_dir in MODEL_DIRS:  
    all_preds = []
    for fold in range(N_FOLDS):
        learn = get_learner(fold, IMAGE_SIZE, BACKBONE_NAME, model_dir)
        learn.load(f'model_{fold}', with_opt=False)
        test_dl = learn.dls.test_dl(test_df)
        preds, _ = learn.tta(dl=test_dl, n=4, beta=0.25)
        all_preds.append(preds.flatten().numpy())
        del learn
        torch.cuda.empty_cache()
        gc.collect()
    all_results.append(np.mean(np.stack(all_preds), axis=0) * 100)

result = np.mean(np.stack(all_results), axis=0)
print(result)
    
test_df['Pawpularity'] = result
test_df[['Id', 'Pawpularity']].to_csv('submission.csv', index=False)
print(test_df[['Id', 'Pawpularity']])