In [10]:
import numpy as np
import pandas as pd
import os

import albumentations
import cv2
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

import pytorch_lightning as pl
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule

import glob
import matplotlib.pyplot as plt

In [2]:
SEED = 2023

DATA_DIR = 'data'

TRAIN_IMAGES_DIR = os.path.join(DATA_DIR, 'train')
TEST_IMAGES_DIR = os.path.join(DATA_DIR, 'test')

OUTPUT_DIR = "output"

In [3]:
def create_folds(data: pd.DataFrame, num_splits = 5):
    data["fold"] = -1
    num_bins = int(np.floor(1 + np.log2(len(data)))) # sturge's rule

    data.loc[:, "bins"] = pd.cut(data["Pawpularity"], bins=num_bins, labels=False)
    skf = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=SEED)

    for fold_index, (train_idx, val_idx) in enumerate(skf.split(X=data, y=data.bins.values)):
        data.loc[val_idx, 'fold'] = fold_index

    data = data.drop('bins', axis=1)

    return data

In [4]:
dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

class PetFinderDataset(Dataset):
    def __init__(self, df: pd.DataFrame, dir: str, augmentations: albumentations.Compose):
        self.ids = df["Id"].values
        if "Pawpularity" in df.keys():
            self.targets = df["Pawpularity"].values
        else:
            self.targets = [-1] * len(df)
        self.dense_features = df[dense_features].values

        image_paths = [os.path.join(dir, f"{x}.jpg") for x in df["Id"].values]
        self.image_paths = image_paths

        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image_id = self.ids[item]

        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return image_id, torch.tensor(features, dtype=torch.float), torch.tensor(image, dtype=torch.float), torch.tensor(targets, dtype=torch.float)
    
class PetFinderDataModule(LightningDataModule):
    def __init__(self, 
                 df_train=None, df_val=None, df_test=None, 
                 train_images_dir=None, val_images_dir=None, test_images_dir=None, 
                 train_augmentations=None, val_augmentations=None, test_augmentations=None, 
                 batch_size=64
                ):
        super().__init__()
        self.df_train = df_train
        self.df_val = df_val
        self.df_test = df_test

        self.train_images_dir = train_images_dir
        self.val_images_dir = val_images_dir
        self.test_images_dir = test_images_dir

        self.train_augmentations = train_augmentations
        self.val_augmentations = val_augmentations
        self.test_augmentations = test_augmentations

        self.batch_size = batch_size

    def train_dataloader(self):
        return DataLoader(PetFinderDataset(self.df_train, self.train_images_dir, self.train_augmentations), batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(PetFinderDataset(self.df_val, self.val_images_dir, self.val_augmentations), batch_size=self.batch_size, shuffle=False)
    
    def test_dataloader(self):
        return DataLoader(PetFinderDataset(self.df_test, self.test_images_dir, self.test_augmentations), batch_size=self.batch_size, shuffle=False) 


In [5]:
class EfficientNetPawpularity(pl.LightningModule):
    def __init__(self, model_name="tf_efficientnet_b0_ns", pretrained=True):
        super().__init__()

        self.backbone = timm.create_model(model_name=model_name, pretrained=pretrained, in_chans=3)
        self.backbone.classifier = nn.Linear(self.backbone.classifier.in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(128 + 12, 1)

        self.criterion = nn.BCEWithLogitsLoss()

        self.validation_step_outputs = []
        self.training_step_outputs = []

    def forward(self, input, features):
        x = self.backbone(input)
        x = self.dropout(x)

        x = torch.cat([x, features], dim=1)
        x = self.out(x)

        return x

    def training_step(self, batch, batch_indexes):
        loss, predictions, labels, rmse = self.step(batch, 'train')
        self.training_step_outputs.append({ "rmse": rmse, "loss": loss })

        return { 'loss': loss, 'predictions': predictions, 'labels': labels }

    def validation_step(self, batch, batch_indexes):
        loss, predictions, labels, rmse = self.step(batch, 'val')
        self.validation_step_outputs.append({ "rmse": rmse, "loss": loss })
        
        return { 'loss': loss, 'predictions': predictions, 'labels': labels }

    def step(self, batch, mode):
        image_ids, features, images, labels = batch
        labels = labels.float() / 100.0

        logits = self.forward(images, features).squeeze(1)
        loss = self.criterion(logits, labels) # using BCELoss to optimize models

        predictions = logits.sigmoid().detach().cpu() * 100
        labels = labels.detach().cpu() * 100
        
        rmse = mean_squared_error(predictions, labels, squared=False) # keeping track of RMSE as it is the competition metric
        rmse = torch.tensor(rmse, dtype=torch.float32)

        self.log(f'{mode}_loss', loss)
        
        return loss, predictions, labels, rmse
    
    def on_train_epoch_end(self):
        rsmes = [x["rmse"] for x in self.training_step_outputs]
        rsme = torch.stack(rsmes).mean()

        self.log(f'train_rmse', rsme, prog_bar=True)

        self.training_step_outputs.clear()

    def on_validation_epoch_end(self):
        rsmes = [x["rmse"] for x in self.validation_step_outputs]
        rsme = torch.stack(rsmes).mean()

        self.log(f'val_rmse', rsme, prog_bar=True)
        
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 20, eta_min=1e-4)

        return [optimizer], [scheduler]

In [6]:
class SwinTinyPawpularity(pl.LightningModule):
    def __init__(self, model_name="swin_tiny_patch4_window7_224", pretrained=True):
        super().__init__()

        self.backbone = timm.create_model(model_name=model_name, pretrained=True, num_classes=128, in_chans=3)
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(128 + 12, 1)

        self.criterion = nn.BCEWithLogitsLoss()

        self.validation_step_outputs = []
        self.training_step_outputs = []

    def forward(self, input, features):
        x = self.backbone(input)
        x = self.dropout(x)

        x = torch.cat([x, features], dim=1)
        x = self.out(x)

        return x

    def training_step(self, batch, batch_indexes):
        loss, predictions, labels, rmse = self.step(batch, 'train')
        self.training_step_outputs.append({ "rmse": rmse, "loss": loss })

        return { 'loss': loss, 'predictions': predictions, 'labels': labels }

    def validation_step(self, batch, batch_indexes):
        loss, predictions, labels, rmse = self.step(batch, 'val')
        self.validation_step_outputs.append({ "rmse": rmse, "loss": loss })
        
        return { 'loss': loss, 'predictions': predictions, 'labels': labels }

    def step(self, batch, mode):
        image_ids, features, images, labels = batch
        labels = labels.float() / 100.0

        logits = self.forward(images, features).squeeze(1)
        loss = self.criterion(logits, labels) # using BCELoss to optimize models

        predictions = logits.sigmoid().detach().cpu() * 100
        labels = labels.detach().cpu() * 100
        
        rmse = mean_squared_error(predictions, labels, squared=False) # keeping track of RMSE as it is the competition metric
        rmse = torch.tensor(rmse, dtype=torch.float32)

        self.log(f'{mode}_loss', loss)
        
        return loss, predictions, labels, rmse
    
    def on_train_epoch_end(self):
        rsmes = [x["rmse"] for x in self.training_step_outputs]
        rsme = torch.stack(rsmes).mean()

        self.log(f'train_rmse', rsme, prog_bar=True)

        self.training_step_outputs.clear()

    def on_validation_epoch_end(self):
        rsmes = [x["rmse"] for x in self.validation_step_outputs]
        rsme = torch.stack(rsmes).mean()

        self.log(f'val_rmse', rsme, prog_bar=True)
        
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0 = 20, eta_min=1e-4)

        return [optimizer], [scheduler]

In [13]:
def create_augmentations(image_size: int):
    train_aug = albumentations.Compose(
        [
            albumentations.SmallestMaxSize(max_size=image_size, p=1),
            albumentations.RandomCrop(image_size, image_size, p=1),
            albumentations.HueSaturationValue(
                hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
            ),
            albumentations.RandomBrightnessContrast(
                brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
            ),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0,
                p=1.0,
            ),
        ],
        p=1.0,
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.SmallestMaxSize(max_size=image_size, p=1),
            albumentations.CenterCrop(image_size, image_size, p=1),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
                max_pixel_value=255.0,
                p=1.0,
            ),
        ],
        p=1.0,
    )

    return train_aug, valid_aug

In [50]:
def create_model_and_datamodule(model_name: str, fold_index: int, df_train: pd.DataFrame, df_val: pd.DataFrame):
    if model_name == "swin_tiny_patch4_window7_224":
        image_size = 224
        model = SwinTinyPawpularity(pretrained=False)
    elif model_name == "tf_efficientnet_b0_ns_randomcrop":
        image_size = 256
        model = EfficientNetPawpularity(pretrained=False)

    model.to("cuda")
    model_checkpoint_path = os.path.join(OUTPUT_DIR, "model_checkpoints", model_name, f"fold_{fold_index}", "best_loss.ckpt")
    checkpoint = torch.load(model_checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])
    
    train_aug, valid_aug = create_augmentations(image_size)
    data_module = PetFinderDataModule(
        df_train=df_train, 
        df_val=df_val, 
        train_images_dir=TRAIN_IMAGES_DIR, 
        val_images_dir=TRAIN_IMAGES_DIR, 
        train_augmentations=train_aug,
        val_augmentations=valid_aug,
        batch_size=8
    )

    return model, data_module

def run_predictions(model, data_loader):
    final_image_ids = []
    final_predictions = []
    final_targets = []

    for batch, (image_ids, features, images, labels) in enumerate(data_loader):
        with torch.no_grad():
            predictions =  model(torch.as_tensor(images, dtype=torch.float32).cuda(), features.cuda())
            predictions = predictions.sigmoid() * 100
            predictions = predictions.cpu().data.numpy().reshape(-1)
        
        final_image_ids += list(image_ids)
        final_predictions += list(predictions)
        final_targets += list(labels)

    return pd.DataFrame({ "Id": final_image_ids, "Predicted": final_predictions })


In [51]:
df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
num_splits = 5
df_5 = create_folds(df, num_splits=num_splits)

df_ensemble = pd.DataFrame({ "Id": [], "swin_prediction": [], "efficientnet_prediction": [] })

for fold_index in range(num_splits):
    df_train = df_5[df_5['fold'] != fold_index].reset_index(drop=True)
    df_val = df_5[df_5['fold'] == fold_index].reset_index(drop=True)

    swin_model, swin_datamodule = create_model_and_datamodule("swin_tiny_patch4_window7_224", fold_index, df_train, df_val)
    efficientnet_model, efficientnet_datamodule = create_model_and_datamodule("tf_efficientnet_b0_ns_randomcrop", fold_index, df_train, df_val)

    # run only on out-of-fold predictions
    df_val_swin = run_predictions(swin_model, swin_datamodule.val_dataloader())
    df_val_swin = df_val_swin.rename(columns={ "Predicted": "swin_prediction" })
    df_val_swin['fold'] = fold_index

    df_val_efficientnet = run_predictions(efficientnet_model, efficientnet_datamodule.val_dataloader())
    df_val_efficientnet = df_val_efficientnet.rename(columns={ "Predicted": "efficientnet_prediction" })
    df_val_efficientnet['fold'] = fold_index

    df_val_merged = pd.merge(df_val_swin, df_val_efficientnet, on="Id")
    df_val_merged = pd.merge(df_val_merged, df_val[["Id", "Pawpularity", "fold"]].rename({ "Pawpularity": "target" }), on="Id")

    df_ensemble = pd.concat([df_ensemble, df_val_merged], ignore_index=True)

  model = create_fn(
  df_ensemble = pd.concat([df_ensemble, df_val_merged], ignore_index=True)
  model = create_fn(
  model = create_fn(
  model = create_fn(
  model = create_fn(


In [49]:
best_clf = None
best_improvement = float('-inf')
best_index = -1

for i in range(5):
    df_train = df_ensemble[df_ensemble['fold'] != i]
    df_val = df_ensemble[df_ensemble['fold'] == i]

    X_train = df_train[["swin_prediction", "efficientnet_prediction"]].values
    y_train = df_train["Pawpularity"].values

    X_val = df_val[["swin_prediction", "efficientnet_prediction"]].values
    y_val = df_val["Pawpularity"].values
    
    clf = SVR(C=1.0, epsilon=0.2, max_iter=10000)
    clf.fit(X_train, y_train)

    predictions = clf.predict(X_val)

    rmse = mean_squared_error(predictions, y_val, squared=False)
    swin_rmse = mean_squared_error(df_val["swin_prediction"], df_val["Pawpularity"], squared=False)
    efficientnet_rmse = mean_squared_error(df_val["efficientnet_prediction"], df_val["Pawpularity"], squared=False)
    print(f"RMSE for fold {i}: Ensemble: {rmse}, SWIN: {swin_rmse}, EfficientNet: {efficientnet_rmse}")

    improvement = (swin_rmse + efficientnet_rmse) / 2 - rmse
    if improvement > best_improvement:
        best_clf = clf
        best_improvement = improvement
        best_index = i

print(f"Best improvement from using SVM is fold {i}: {best_improvement}")

RMSE for fold 0: Ensemble: 18.507204988597593, SWIN: 18.753201528243817, EfficientNet: 18.5337780901583
RMSE for fold 1: Ensemble: 19.02512879113913, SWIN: 20.093251601368603, EfficientNet: 18.79414931744975
RMSE for fold 2: Ensemble: 18.957887315347456, SWIN: 19.5824620417417, EfficientNet: 19.096824362859447
RMSE for fold 3: Ensemble: 18.74677806948894, SWIN: 19.225013023193693, EfficientNet: 18.942841446046266
RMSE for fold 4: Ensemble: 19.158653100080016, SWIN: 19.230935940263734, EfficientNet: 19.145982931978512
Best improvement from using SVM is fold 4: 0.4185716682700473


In [None]:
import pickle

