## Changes
- Classification
- Custom loss (ArcFace + BCEWithLogits)
- Custom Architecture
- Backbone: swin_large_patch4_window12_384_in22k

**I hope you find it helpful :) !**

In [None]:
import sys
sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")

In [None]:
import os
import random
import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.io import read_image
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm
from PIL import Image
from sklearn.preprocessing import StandardScaler
import math

In [None]:
def seed_everything(seed=2021):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything()

In [None]:
class args:
    batch_size = 8
    image_size = 384
    coeff = 0.2
    epochs = 20
    learning_rate = 1e-4
    fold = 0

In [None]:
class PawpularDataset:
    def __init__(self, image_paths, dense_features, targets, augmentations):
        self.image_paths = image_paths
        self.dense_features = dense_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        features = self.dense_features[item, :]
        targets = self.targets[item] / 100.
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }

In [None]:
class ArcFaceLoss(nn.modules.Module):
    def __init__(self, s=30.0, m=0.5):
        super().__init__()
        self.crit = nn.BCEWithLogitsLoss()
        self.s = s
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, logits, labels):
        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        output = (labels * phi) + ((1.0 - labels) * cosine)
        output *= self.s
        loss = self.crit(output, labels)
        return loss / 2
    
class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)

    def forward(self, features):
        cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        return cosine

In [None]:
class PawpularModel(tez.Model):
    def __init__(self):
        super().__init__()

        self.model = timm.create_model("swin_large_patch4_window12_384_in22k", pretrained=True, in_chans=3)
        in_features = self.model.head.in_features
        self.model.head = nn.Identity()
        self.neck = nn.Sequential(
            nn.BatchNorm1d(in_features),
            nn.Linear(in_features, 512, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(512),
            nn.Linear(512, 512, bias=False),
            nn.BatchNorm1d(512)
        )
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Sequential(
            nn.Linear(in_features, 512, bias=False),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1)
        )
        self.arc_margin_product = ArcMarginProduct(512, 1)
        
        self.step_scheduler_after = "epoch"

    def monitor_metrics(self, outputs, targets):
        outputs = outputs.cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        rmse = metrics.mean_squared_error(targets, outputs, squared=False)
        return {"rmse": rmse}

    def fetch_scheduler(self):
        sch = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
        )
        return sch

    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=args.learning_rate)
        return opt

    def forward(self, image, features, targets=None):

        x = self.model(image)
        x = self.dropout(x)
        x_ = self.neck(x)
        x_ = self.arc_margin_product(x_)
        x = self.out(x)
        
        if targets is not None:
            loss_classification = nn.BCEWithLogitsLoss()(x, targets.view(-1, 1))
            loss_metric = ArcFaceLoss()(x_, targets.view(-1, 1))
            coeff = args.coeff
            loss =  loss_classification * (1 - coeff) + loss_metric * coeff
            
            metrics = self.monitor_metrics(torch.sigmoid(x) * 100, targets * 100)
            return x, loss, metrics
        return x, 0, {}

In [None]:
train_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.RandomResizedCrop(args.image_size, args.image_size, p=0.5),
        albumentations.HorizontalFlip(p=0.5),
        albumentations.VerticalFlip(p=0.5),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [None]:
df = pd.read_csv("../input/same-old-creating-folds/train_10folds.csv")

In [None]:
dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

In [None]:
df_train = df[df.kfold != args.fold].reset_index(drop=True)
df_valid = df[df.kfold == args.fold].reset_index(drop=True)

In [None]:
train_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train["Id"].values]
valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]

In [None]:
train_dataset = PawpularDataset(
    image_paths=train_img_paths,
    dense_features=df_train[dense_features].values,
    targets=df_train.Pawpularity.values,
    augmentations=train_aug,
)

valid_dataset = PawpularDataset(
    image_paths=valid_img_paths,
    dense_features=df_valid[dense_features].values,
    targets=df_valid.Pawpularity.values,
    augmentations=valid_aug,
)


In [None]:
model = PawpularModel()

es = EarlyStopping(
    monitor="valid_rmse",
    model_path=f"model_f{args.fold}.bin",
    patience=3,
    mode="min",
    save_weights_only=True,
)

model.fit(
    train_dataset,
    valid_dataset=valid_dataset,
    train_bs=args.batch_size,
    valid_bs=2*args.batch_size,
    device="cuda",
    epochs=args.epochs,
    callbacks=[es],
    fp16=True
)