## Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

#general
from sklearn.model_selection import StratifiedKFold
from tqdm.auto import tqdm
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import random
import gc
gc.enable()
pd.set_option('display.max_columns', None) 

# augmentation
import albumentations as A

# deep learning
import timm
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR, ReduceLROnPlateau
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from tqdm.notebook import tqdm
tqdm.pandas()

# metrics
from sklearn.metrics import mean_squared_error

config

In [None]:
class Config:
    model_name = "deit_base5fold"
    data_dir = "../input/petfinder-pawpularity-score/"
    models = {
        'beit_large_patch16_512': {
        'model_path': '/kaggle/input/beit_large__512_5fold/transformers/default/1',
        'im_size': 512,
        'pred':[]
    },
        'vit_large_r50_s32_384': {
        'model_path': '/kaggle/input/vit-large-5-folds/pytorch/default/1',
        'im_size': 384,
        'pred':[]
    },
        'swin_large_patch4_window7_224': {
        'model_path': '/kaggle/input/swin_5folds/transformers/default/1',
        'im_size': 224,
        'pred':[]
        },
    }
    img_train_dir = os.path.join(data_dir, "train")
    img_test_dir = os.path.join(data_dir, "test")
    random_seed = 555
    tta_times = 1 # 1: no TTA
    tta_beta = 1 / tta_times
    pretrained = False
    inp_channels = 3
    batch_size = 4
    num_workers = 0 # >0: OS Error
    out_features = 1
    dropout = 0


In [4]:
def seed_everything(seed=Config.random_seed):
    os.environ['PYTHONSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
    torch.backends.cudnn.benchmark =True

seed_everything()
# device optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

Using device: cuda


## prepare data

In [5]:
test_file_path = os.path.join(Config.data_dir, 'test.csv')

def return_imgfilepath(name, folder=Config.img_train_dir):
    path = os.path.join(folder, f'{name}.jpg')
    return path

In [6]:
test_df = pd.read_csv(test_file_path)
test_df['file_path'] = test_df['Id'].apply(lambda x: return_imgfilepath(x, folder=Config.img_test_dir))
target = 'Pawpularity'
filepaths = test_df['file_path'].values

## augmentations

In [None]:
def get_train_transforms(epoch, dim = 512):
    return A.Compose(
        [             
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.RandomCrop(height=dim, width=dim, p=1.0),
            A.VerticalFlip(p = 0.5),
            A.HorizontalFlip(p = 0.5)
        ]
  )

def get_inference_fixed_transforms(, dim = 512):
    return A.Compose([
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.CenterCrop(height=dim, width=dim, p=1.0),
        ], p=1.0)

## Dataset

In [8]:
class PetDataset(Dataset):
    def __init__(self, image_filepaths, targets, transform=None):
        self.image_filepaths = image_filepaths
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.image_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.image_filepaths[idx]
        with open(image_filepath, 'rb') as f:
            image = Image.open(f)
            image_rgb = image.convert('RGB')
        image = np.array(image_rgb)

        if self.transform is not None:
            image = self.transform(image = image)["image"]
        
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = self.targets[idx]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target, dtype = torch.float)
        return image, target

## model

In [10]:
class PetNet(nn.Module):
    def __init__(
        self,
        model_name,
        out_features = Config.out_features,
        inp_channels=Config.inp_channels,
        pretrained=Config.pretrained
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=3, num_classes=0)
        self.fc1 = nn.Linear(self.model.num_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.fc2 = nn.Linear(128, 1)
    
    def get_image_embedding(self, image):
        return self.model(image)

    def forward(self, image):
        output = self.model(image)
        x= self.fc1(output)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

## test_predict

In [None]:
def notta_fn(filepaths, model, im_size):
    print(f'no tta')
    model.eval()
    fold_preds = []
    test_dataset = PetDataset(
      image_filepaths = filepaths,
      targets = np.zeros(len(filepaths)),
      transform = get_inference_fixed_transforms(im_size)
    )
    test_loader = DataLoader(
      test_dataset,
      batch_size = Config.batch_size,
      shuffle = False,
      num_workers = Config.num_workers,
      pin_memory = True
    )
    stream = tqdm(test_loader)
    for i, (images, target) in enumerate(stream, start = 1):
        images = images.to(device, non_blocking = True).float()
        target = target.to(device, non_blocking = True).float().view(-1, 1)
        with torch.no_grad():
            output = model(images)

        pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()
        fold_preds.extend(pred)
    fold_preds = np.array(fold_preds)
    del test_loader, test_dataset
    gc.collect()
    torch.cuda.empty_cache()
    return fold_preds

In [None]:
oof_df = pd.DataFrame()

"""
test predict loop
"""
best_rmse = np.inf
for model_name, model_info in Config.models.items():
    model_dir=Config.models[model_name]['model_path']
    im_size=Config.models[model_name]['im_size']
    for model_path in glob.glob(f'{model_dir}/*pth'):
        print(f'inference: {model_path}')
        test_preds_fold = []
        model = PetNet(model_name=model_name)
        model.load_state_dict(torch.load(model_path))
        model = model.to(device)
        model.eval()
        if Config.tta_times > 1:
            test_preds_fold = tta_fn(filepaths, model)
        else:
            test_preds_fold = notta_fn(filepaths, model, im_size )
        Config.models[model_name]['pred'].append(np.array(test_preds_fold))

inference: /kaggle/input/beit_large__512_5fold/transformers/default/1/beit_large_patch16_512_fold2.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/beit_large__512_5fold/transformers/default/1/beit_large_patch16_512_fold0.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/beit_large__512_5fold/transformers/default/1/beit_large_patch16_512_fold1.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/beit_large__512_5fold/transformers/default/1/beit_large_patch16_512_fold4.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/beit_large__512_5fold/transformers/default/1/beit_large_patch16_512_fold3.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/vit-large-5-folds/pytorch/default/1/vit_large_r50_s32_384_fold2.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/vit-large-5-folds/pytorch/default/1/vit_large_r50_s32_384_fold4.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/vit-large-5-folds/pytorch/default/1/vit_large_r50_s32_384_fold1.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/vit-large-5-folds/pytorch/default/1/vit_large_r50_s32_384_fold3.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/vit-large-5-folds/pytorch/default/1/vit_large_r50_s32_384_fold0.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/swin_5folds/transformers/default/1/swin_large_patch4_window7_224_fold3.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/swin_5folds/transformers/default/1/swin_large_patch4_window7_224_fold0.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/swin_5folds/transformers/default/1/swin_large_patch4_window7_224_fold1.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/swin_5folds/transformers/default/1/swin_large_patch4_window7_224_fold2.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

inference: /kaggle/input/swin_5folds/transformers/default/1/swin_large_patch4_window7_224_fold4.pth
no tta


  0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
beit_pred= Config.models["beit_large_patch16_512"]['pred']
beit_pred= np.mean(beit_pred, axis=0)
vit_pred= Config.models["vit_large_r50_s32_384"]['pred']
vit_pred= np.mean(vit_pred, axis=0)
swin_pred= Config.models["swin_large_patch4_window7_224"]['pred']
swin_pred= np.mean(swin_pred, axis=0)
test_pred= 0.50*beit_pred+ 0.3*vit_pred+0.2*swin_pred

## submission 
ensemble mean of each fold

In [14]:
submission = pd.DataFrame()
submission['Id'] = test_df['Id']
submission['Pawpularity'] = test_pred

In [15]:
submission.head()

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,36.479649
1,43a2262d7738e3d420d453815151079e,36.433002
2,4e429cead1848a298432a0acad014c9d,36.579326
3,80bc3ccafcc51b66303c2c263aa38486,36.744476
4,8f49844c382931444e68dffbe20228f4,36.418576


In [16]:
submission.to_csv("submission.csv", index = False)