## Notebook Score: CV ~18.2 and LB 18.33

This notebook implements mixup in a very hacky way. If you don't use tez, consider implementing it in the training step.

Summary:
* Model: Swin 224
* Trains on images & meta features + breed
* Treats regression as classification (BCEwLogits > Sigmoid * 100 > RMSE)
* Mixup applied on images & meta features


I'm very new to image competitions, so any advice would be much appreciated!

## Libraries

In [None]:
import sys
sys.path.append("../input/tez-lib")
sys.path.append("../input/timmmaster")

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import tez
import albumentations
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm

In [None]:
class args:
    batch_size=32
    image_size=224
    epochs = 10
    model_name = "swin_tiny_patch4_window7_224"
    # keep mixup alpha in [0.1,0.4]
    mixup_alpha = 0.2

In [None]:
# x is images, z is meta features, y is target
def mixup_data(x, z, y):
    if args.mixup_alpha > 0:
        lam = np.random.beta(args.mixup_alpha, args.mixup_alpha)
    else:
        lam = 1
        
    batch_size = x.size()[0]
    # returns list of shuffled indices in batch size
    index = torch.randperm(batch_size).cuda()
    
    # mix current x with lambda n rest with pics from the shuffled indices
    mixed_x = lam * x + (1 - lam) * x[index, :]
    mixed_z = lam * z + (1 - lam) * z[index, :]
    
    # returns targets for current x n ones used for mix
    y_a, y_b = y, y[index]
    
    return mixed_x, mixed_z, y_a, y_b, lam

# where pred is the output from the forward - predictions basically
def mixup_loss(loss_fn, pred, y_a, y_b, lam):
    # get loss from current x n loss from watermarks n add
    return lam * loss_fn(pred, y_a) + (1 - lam) * loss_fn(pred, y_b)

## Dataset & Model Classes

In [None]:
class PawpularDataset:
    def __init__(self, image_paths, meta_features, targets, augmentations):
        self.image_paths = image_paths
        self.meta_features = meta_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        # read in as BGR
        image = cv2.imread(self.image_paths[index])
        # convert to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            # applies compose function from albumentations on image
            augmented = self.augmentations(image=image)
            # maybe cv2 returns a dict and to access info on image have to call 'image' key
            image = augmented['image']
            
        # transform from HxWxC to CxHxW    
        image = np.transpose(image, (2,0,1)).astype(np.float32)
        
        features = self.meta_features[index, :]
        # normalize to [0-1] - for classification
        targets = self.targets[index] / 100.
        
        return {
            'image': torch.tensor(image, dtype=torch.float),
            'features': torch.tensor(features, dtype=torch.float),
            'targets': torch.tensor(targets, dtype=torch.float),
        }

In [None]:
class PawpularModel(tez.Model):
    def __init__(self):
        super().__init__()
        
        self.model = timm.create_model(args.model_name, pretrained=True, in_chans=3)
        self.model.head = nn.Linear(self.model.head.in_features, 128)
        self.dropout = nn.Dropout(p=0.5)
        self.dense1 = nn.Linear(128+13,1)
        
        self.step_scheduler_after = 'epoch'
        
    def monitor_metrics(self, outputs, targets):
        outputs = outputs.cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        rmse = metrics.mean_squared_error(targets, outputs, squared=False)
        return {'rmse': rmse}
    
    def fetch_scheduler(self):
        sch = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
        )
        return sch
    
    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-4)
        return opt
    
    def forward(self, image, features, targets=None):        
        # do mixup when have targets and state is train (doesnt do mixup at val)
        if ((targets is not None) and (self._train_state == True)):
            image, features, target_a, target_b, lam = mixup_data(image, features, targets.view(-1,1)) 
            image = image.to(device='cuda', dtype=torch.float)
            features = features.to(device='cuda', dtype=torch.float)
            target_a = target_a.to(device='cuda', dtype=torch.float)
            target_b = target_b.to(device='cuda', dtype=torch.float)
        x = self.model(image)
        x = self.dropout(x)
        # combine with meta features and shrink it down to 1 feature (score)
        x = torch.cat([x, features], dim=1)
        x = self.dense1(x)
        
        if targets is not None:
            loss_fn = nn.BCEWithLogitsLoss()
            if self._train_state == True:
                loss = mixup_loss(loss_fn, x, target_a, target_b, lam)
            else:
                loss = loss_fn(x, targets.view(-1, 1))
            
            # sigmoid convert to [0-1] 
            # multiply by 100 to convert to [0-100] which is pawpularity range
            metrics = self.monitor_metrics(torch.sigmoid(x) * 100, targets * 100)
            return x, loss, metrics
        
        return x, 0, {}

## Image Augmentations

In [None]:
train_aug = albumentations.Compose([
    albumentations.RandomResizedCrop(
        height=args.image_size, width=args.image_size,
        scale=(0.08,1), ratio=(0.75, 1), p=1.0
    ),
    # color shift
    albumentations.HueSaturationValue(
        hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5
    ),
    albumentations.RandomBrightnessContrast(
        brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5
    ),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.Rotate(limit=180, p=0.7),
    albumentations.ShiftScaleRotate(
        shift_limit=0.1, scale_limit=0.1, rotate_limit=45, p=0.5
    ),
    albumentations.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
        max_pixel_value=255.0,
        p=1.0
    ),
], p=1.0)

valid_aug = albumentations.Compose([
    albumentations.Resize(args.image_size, args.image_size, p=1.0),
    albumentations.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225],
        max_pixel_value = 255.0,
        p = 1.0,
    ),
], p=1.0)

In [None]:
df = pd.read_csv('../input/no-dupes-pawpularity/train_5folds.csv')
# Breed: 0 for dog, 1 for cat, 2 for neither
df

In [None]:
meta_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur', 'Breed'
]

## Training

In [None]:
for fold_ in range(5):
    print(f'--- Fold {fold_} ---')
    # probably means df train is every fold except fold 0 and df valid is just fold 0
    # reset index doesnt do anything
    df_train = df[df.kfold != fold_].reset_index(drop=True)
    df_valid = df[df.kfold == fold_].reset_index(drop=True)
    
    train_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train['Id'].values]
    valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid['Id'].values]
    
    train_dataset = PawpularDataset(
        image_paths = train_img_paths,
        meta_features = df_train[meta_features].values,
        targets = df_train.Pawpularity.values,
        augmentations = train_aug,
        )

    valid_dataset = PawpularDataset(
        image_paths = valid_img_paths,
        meta_features = df_valid[meta_features].values,
        targets = df_valid.Pawpularity.values,
        augmentations = valid_aug,
        )

    model = PawpularModel()

    es = EarlyStopping(
        monitor='valid_rmse',
        model_path=f'{args.model_name}_f{fold_}.bin',
        patience=3,
        mode='min',
        save_weights_only=True
    )

    model.fit(
        train_dataset = train_dataset,
        valid_dataset = valid_dataset,
        train_bs = args.batch_size,
        valid_bs = 2*args.batch_size,
        device='cuda',
        epochs=args.epochs,
        callbacks=[es],
        fp16=True,
        n_jobs=2
    )