In [1]:
import os
import gc
import cv2
import copy
import time
import random
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
c_ = Fore.CYAN
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


# Training Configuration

In [5]:
root_dir = '/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder - Swin Transformer'
train_dir = '/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder - Swin Transformer/train'
test_dir = '/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder - Swin Transformer/test'

In [2]:
CONFIG = dict(
    seed = 42,
    backbone = 'swin_base_patch4_window7_224',
    embedder = 'tf_efficientnet_b4_ns',
    train_batch_size = 16,
    valid_batch_size = 32,
    img_size = 448,
    epochs = 5,
    learning_rate = 1e-4,
    scheduler = 'CosineAnnealingLR',
    min_lr = 1e-6,
    T_max = 100,
#     T_0 = 25,
#     warmup_epochs = 0,
    weight_decay = 1e-6,
    n_accumulate = 1,
    n_fold = 5,
    num_classes = 1,
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    competition = 'PetFinder',
    _wandb_kernel = 'deb'
)

# Set Seed for Reproductibility

In [3]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [7]:
def get_train_file_path(id):
    return f"{train_dir}/{id}.jpg"

# Read Data

In [8]:
df = pd.read_csv(f"{root_dir}/train.csv")
df['file_path'] = df['Id'].apply(get_train_file_path)

In [9]:
feature_cols = [col for col in df.columns if col not in ['Id', 'Pawpularity', 'file_path']]

# Create Folds

In [11]:
def create_folds(df, n_s=5, n_grp=None):
    df['kfold'] = -1
    
    if n_grp is None:
        skf = KFold(n_splits=n_s, random_state=CONFIG['seed'])
        target = df['Pawpularity']
    else:
        skf = StratifiedKFold(n_splits=n_s, shuffle=True, random_state=CONFIG['seed'])
        df['grp'] = pd.cut(df['Pawpularity'], n_grp, labels=False)
        target = df.grp
    
    for fold_no, (t, v) in enumerate(skf.split(target, target)):
        df.loc[v, 'kfold'] = fold_no

    df = df.drop('grp', axis=1)
    
    return df

In [12]:
df = create_folds(df, n_s=CONFIG['n_fold'], n_grp=14)
df.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,file_path,kfold
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63,/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder -...,0
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42,/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder -...,2
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28,/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder -...,0
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15,/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder -...,3
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72,/home/minyoungxi/MINYOUNGXI/Kaggle/PetFinder -...,4


# Dataset Class

In [28]:
class PawpularityDataset(Dataset):
    def __init__(self, root_dir, df, transforms=None):
        self.root_dir = root_dir
        self.df = df
        self.file_names = df['file_path'].values
        self.targets = df['Pawpularity'].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        target = self.targets[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return img, target

# Augmentations

In [29]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.HorizontalFlip(p=0.5),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.),
    
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

# Create Model

In [30]:
class HybridEmbed(nn.Module):
    """ CNN Feature Map Embedding
    Extract feature map from CNN, flatten, project to embedding dim.
    """
    def __init__(self, backbone, img_size=224, patch_size=1, feature_size=None, in_chans=3, embed_dim=768):
        super().__init__()
        assert isinstance(backbone, nn.Module)
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        self.img_size = img_size
        self.patch_size = patch_size
        self.backbone = backbone
        if feature_size is None:
            with torch.no_grad():
                # NOTE Most reliable way of determining output dims is to run forward pass
                training = backbone.training
                if training:
                    backbone.eval()
                o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))
                if isinstance(o, (list, tuple)):
                    o = o[-1]  # last feature if backbone outputs list/tuple of features
                feature_size = o.shape[-2:]
                feature_dim = o.shape[1]
                backbone.train(training)
        else:
            feature_size = (feature_size, feature_size)
            if hasattr(self.backbone, 'feature_info'):
                feature_dim = self.backbone.feature_info.channels()[-1]
            else:
                feature_dim = self.backbone.num_features
        assert feature_size[0] % patch_size[0] == 0 and feature_size[1] % patch_size[1] == 0
        self.grid_size = (feature_size[0] // patch_size[0], feature_size[1] // patch_size[1])
        self.num_patches = self.grid_size[0] * self.grid_size[1]
        self.proj = nn.Conv2d(feature_dim, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.backbone(x)
        if isinstance(x, (list, tuple)):
            x = x[-1]  # last feature if backbone outputs list/tuple of features
        x = self.proj(x).flatten(2).transpose(1, 2)
        return x

In [31]:
class PawpularityModel(nn.Module):
    def __init__(self, backbone, embedder, pretrained=True):
        super(PawpularityModel, self).__init__()
        self.backbone = timm.create_model(backbone, pretrained=pretrained)
        self.embedder = timm.create_model(embedder, features_only=True, out_indices=[2], pretrained=pretrained)
        self.backbone.patch_embed = HybridEmbed(self.embedder, img_size=CONFIG['img_size'], embed_dim=128)
        self.n_features = self.backbone.head.in_features
        self.backbone.reset_classifier(0)
        self.fc = nn.Linear(self.n_features, CONFIG['num_classes'])

    def forward(self, images):
        features = self.backbone(images)              # features = (bs, embedding_size)
        output = self.fc(features)                    # outputs  = (bs, num_classes)
        return output
    
model = PawpularityModel(CONFIG['backbone'], CONFIG['embedder'])
model.to(CONFIG['device']);

In [32]:
# test
img = torch.randn(1, 3, CONFIG['img_size'], CONFIG['img_size']).to(CONFIG['device'])
print(img)
model(img)

tensor([[[[-5.3800e-01, -1.9478e-01, -4.7874e-01,  ...,  1.9646e-01,
            3.3350e-01,  7.8466e-01],
          [-7.0150e-01, -2.0788e-01, -1.4052e-01,  ...,  7.7995e-01,
            2.6136e-01, -1.5904e-01],
          [-7.2315e-01,  3.3166e-01, -3.8337e-01,  ..., -2.5925e-01,
            1.8453e+00,  3.6167e-01],
          ...,
          [-5.4218e-01, -6.7248e-01, -1.6515e+00,  ..., -4.9294e-01,
           -2.4324e-01, -7.4262e-01],
          [ 8.4537e-01,  1.0428e+00,  3.0389e-01,  ...,  1.8329e+00,
           -2.7432e-02,  5.4890e-01],
          [-7.0773e-01, -2.5307e-01, -1.5902e-01,  ..., -6.2589e-01,
            1.5137e+00, -1.9423e+00]],

         [[-3.1430e-01,  1.3632e+00,  1.3705e+00,  ...,  7.0590e-01,
           -8.0494e-01, -1.1797e+00],
          [-2.6537e-01,  1.4801e-01, -8.5976e-02,  ..., -9.6305e-01,
            9.8238e-01,  2.1581e+00],
          [-2.6002e-02,  1.3511e-01, -1.1795e+00,  ...,  1.9341e+00,
           -3.9789e-01, -9.7180e-01],
          ...,
     

ValueError: not enough values to unpack (expected 4, got 3)