training notebook is below

https://www.kaggle.com/ytakayama/train-pytorch-swin-5fold-some-tips

## setup

In [None]:
import os
import sys
# for timm(kaggle notebook only: internet disabled)
# from pytorch-image-models dataset
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
# asthetics
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore')

#general
import pickle
from tqdm.auto import tqdm
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import random
import gc
import cv2
import imageio
from itertools import product
gc.enable()
import glob
pd.set_option('display.max_columns', None) 
from PIL import Image

# visualization
import matplotlib.pyplot as plt
%matplotlib inline

# augmentation
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# deep learning
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR
import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# metrics
from sklearn.metrics import mean_squared_error

# cv
from sklearn.model_selection import KFold

import glob, re

config

In [None]:
class Config:
    model_name = "swint_large224"
    base_dir = "/content/drive/MyDrive/petfinder"
    data_dir = "../input/petfinder-pawpularity-score/"
    model_dirs = [
        "../input/train-pytorch-swin-5fold-some-tips"
    ]
    output_dir = "."
    img_train_dir = os.path.join(data_dir, "train")
    img_test_dir = os.path.join(data_dir, "test")
    random_seed = 555
    tta_times = 1 # 1: no TTA
    tta_beta = 1 / tta_times
    model_path = "swin_large_patch4_window7_224"
    pretrained = False
    inp_channels = 3
    im_size =  224
    batch_size = 32
    num_workers = 0 # >0: OS Error
    out_features = 1
    dropout = 0
    scheduler_name = "OneCycleLR" #OneCycleLR

device/seed setting

In [None]:
def seed_everything(seed=Config.random_seed):
    os.environ['PYTHONSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark =True

seed_everything()
# device optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

## prepare data

In [None]:
test_file_path = os.path.join(Config.data_dir, 'test.csv')

def return_imgfilepath(name, folder=Config.img_train_dir):
    path = os.path.join(folder, f'{name}.jpg')
    return path

mount data

In [None]:

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


## prepare data

In [None]:
test_df = pd.read_csv(test_file_path)
test_df['file_path'] = test_df['Id'].apply(lambda x: return_imgfilepath(x, folder=Config.img_test_dir))
target = 'Pawpularity'
filepaths = test_df['file_path'].values

## params about models

## augmentations

In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB
def get_train_transforms(epoch, dim = Config.im_size):
    return A.Compose(
        [             
            # resize like Resize in fastai
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.RandomCrop(height=dim, width=dim, p=1.0),
            A.VerticalFlip(p = 0.5),
            A.HorizontalFlip(p = 0.5)
            #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
        ]
  )

def get_inference_fixed_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    elif mode == 1:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),,
                A.VerticalFlip(p = 1.0)
            ], p=1.0)    
    elif mode == 2:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.HorizontalFlip(p = 1.0)
            ], p=1.0)
    elif mode == 3:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
                A.Transpose(p=1.0)
            ], p=1.0)
        
def get_inference_random_transforms(mode=0, dim = Config.im_size):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ], p=1.0)
    else:
        return A.Compose(
            [            
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                A.VerticalFlip(p = 0.5),
                A.HorizontalFlip(p = 0.5)
                #A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ]
      )

## Dataset

In [None]:
class PetDataset(Dataset):
    def __init__(self, image_filepaths, targets, transform=None):
        self.image_filepaths = image_filepaths
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.image_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.image_filepaths[idx]
        with open(image_filepath, 'rb') as f:
            image = Image.open(f)
            image_rgb = image.convert('RGB')
        image = np.array(image_rgb)

        if self.transform is not None:
            image = self.transform(image = image)["image"]
        
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = self.targets[idx]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target, dtype = torch.float)
        return image, target

## utils

In [None]:
class MetricMonitor:
    def __init__(self, float_precision=3):
        self.float_precision = float_precision
        self.reset()

    def reset(self):
        self.metrics = defaultdict(lambda: {"val": 0, "count": 0, "avg": 0})

    def update(self, metric_name, val):
        metric = self.metrics[metric_name]

        metric["val"] += val
        metric["count"] += 1
        metric["avg"] = metric["val"] / metric["count"]

    def __str__(self):
        return " | ".join(
            [
                "{metric_name}: {avg:.{float_precision}f}".format(
                    metric_name=metric_name, avg=metric["avg"],
                    float_precision=self.float_precision
                )
                for (metric_name, metric) in self.metrics.items()
            ]
        )
    
def usr_rmse_score(output, target):
    y_pred = torch.sigmoid(output).cpu()
    y_pred = y_pred.detach().numpy()*100
    target = target.cpu()*100
    
    return mean_squared_error(target, y_pred, squared=False)

## model

In [None]:
class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = Config.out_features,
        inp_channels=Config.inp_channels,
        pretrained=Config.pretrained
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes = out_features)
    
    def forward(self, image):
        output = self.model(image)
        return output

## test_predict

In [None]:
def notta_fn(filepaths, model):
    print(f'no tta')
    model.eval()
    tta_mode = 0
    fold_preds = []
    test_dataset = PetDataset(
      image_filepaths = filepaths,
      targets = np.zeros(len(filepaths)),
      #transform = get_inference_random_transforms(tta_mode)
      transform = get_inference_fixed_transforms(tta_mode)
    )
    test_loader = DataLoader(
      test_dataset,
      batch_size = Config.batch_size,
      shuffle = False,
      num_workers = Config.num_workers,
      pin_memory = True
    )
    stream = tqdm(test_loader)
    for i, (images, target) in enumerate(stream, start = 1):
        images = images.to(device, non_blocking = True).float()
        target = target.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            output = model(images)

        pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()
        fold_preds.extend(pred)
    fold_preds = np.array(fold_preds)
    del test_loader, test_dataset
    gc.collect()
    torch.cuda.empty_cache()
    return fold_preds

In [None]:
def tta_fn(filepaths, model):
    model.eval()
    tta_preds = []
    for tta_mode in range(Config.tta_times):
        print(f'tta mode:{tta_mode}')
        test_dataset = PetDataset(
          image_filepaths = filepaths,
          targets = np.zeros(len(filepaths)),
          #transform = get_inference_random_transforms(tta_mode)
          transform = get_inference_fixed_transforms(tta_mode)
        )
        test_loader = DataLoader(
          test_dataset,
          batch_size = Config.batch_size,
          shuffle = False,
          num_workers = Config.num_workers,
          pin_memory = True
        )
        stream = tqdm(test_loader)
        tta_pred = []
        for i, (images, target) in enumerate(stream, start = 1):
            images = images.to(device, non_blocking = True).float()
            target = target.to(device, non_blocking = True).float().view(-1, 1)
            with torch.no_grad():
                output = model(images)

            pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()
            tta_pred.extend(pred)
        tta_preds.append(tta_pred)
    tta_preds = np.array(tta_preds)
    # default preds * tta_beta + aug_preds mean * ( 1 - tta_beta)
    # print(test_preds.shape)
    fold_preds = Config.tta_beta * tta_preds[0] + ( 1 - Config.tta_beta) * np.mean(tta_preds[1:], axis =0)
    del test_loader, test_dataset
    gc.collect()
    torch.cuda.empty_cache()
    return fold_preds

In [None]:
oof_df = pd.DataFrame()

"""
test predict loop
"""
best_rmse = np.inf
test_preds = []
for model_dir in Config.model_dirs:
    test_preds_model = []
    for model_path in glob.glob(f'{model_dir}/*pth'):
        print(f'inference: {model_path}')
        test_preds_fold = []
        model = PetNet()
        model.load_state_dict(torch.load(model_path))
        model = model.to(device)
        model.eval()
        if Config.tta_times > 1:
            test_preds_fold = tta_fn(filepaths, model)
        else:
            test_preds_fold = notta_fn(filepaths, model)
        test_preds_model.append(test_preds_fold)
    test_preds_model = np.array(test_preds_model)
    test_preds.append(np.mean(test_preds_model, axis=0))

## submission 
ensemble mean of each fold

In [None]:
preds = np.array(test_preds)
final_predictions = np.mean(preds, axis =0)
submission = pd.DataFrame()
submission['Id'] = test_df['Id']
submission['Pawpularity'] = final_predictions

In [None]:
submission.head()

In [None]:
submission.to_csv("submission.csv", index = False)