# PetFinder Competition PyTorch Lightning Training
## Adapted from
* [Train Baseline Torch Lightning + GPU&TPU + W&B](https://www.kaggle.com/heyytanay/train-baseline-torch-lightning-gpu-tpu-w-b)

## Changelog

* V3 
  - Add Efficientnet_b0 - b2
  - Early stopping callback. 
  - Increased EPOCHS from 5 to 10
  - Add augmentation:
```python
        A.ColorJitter(p=.2),
        A.RandomGamma(p=.1),
        A.Sharpen(p=.1),
        A.Cutout(p=0.2),
```

* V2
    - Save model with `torch.JIT.save`

## Install and import packages

In [None]:
# ! pip install -q torchtext
! pip install -q torchtext==0.8.0 torch==1.7.1 pytorch-lightning==1.2.2
# ! pip install -q pytorch-lightning==1.1.8
! pip install -q timm
! pip install -q albumentations
! pip install -q --upgrade wandb

In [None]:
from datetime import datetime
from pathlib import Path

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import timm
import torch
import transformers
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import os
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import r2_score, mean_squared_error

import wandb
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

import albumentations as A
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)
from albumentations.pytorch import ToTensorV2

import warnings
warnings.simplefilter('ignore')

In [None]:
DATA_DIR = Path("../input/petfinder-pawpularity-score/")
TRAIN_DIR = DATA_DIR / "train"
TEST_DIR = DATA_DIR / "test"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = dict(
    SEED =  42,
    NFOLDS = 5,
    EPOCHS = 10,
    LR = 2e-4,
    IMG_SIZE = (224, 224),
#     MODEL_NAME = 'tf_efficientnet_b6_ns',
    MODEL_NAME = timm.list_models("tf_efficientnet_b[0-9]", pretrained=True)[:2],
    DR_RATE = 0.35,
    NUM_LABELS = 1,
    TRAIN_BS = 32,
    VALID_BS = 16,
    min_lr = 1e-6,
    T_max = 20,
    T_0 = 25,
    NUM_WORKERS = 4,
    patience = 5,
    infra = "Kaggle",
    competition = 'petfinder',
    _wandb_kernel = 'tanaym',
    wandb = False
)

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

## Dataset class

In [None]:
class PetfinderData(Dataset):
    def __init__(self, df, is_test=False, augments=None):
        self.df = df
        self.is_test = is_test
        self.augments = augments
        
        self.images, self.meta_features, self.targets = self._process_df(self.df)
    
    def __getitem__(self, index):
        img = self.images[index]
        meta_feats = self.meta_features[index]
        meta_feats = torch.tensor(meta_feats, dtype=torch.float32)
        
        img = cv2.imread(img)
#         print(f"img shape 1 {img.shape}")
        img = img[:, :, ::-1]
#         print(f"img shape 2 {img.shape}")
        img = cv2.resize(img, config['IMG_SIZE'])
        
        if self.augments:
            img = self.augments(image=img)['image']
        
        if not self.is_test:
            target = torch.tensor(self.targets[index], dtype=torch.float32)
            return img, meta_feats, target
        else:
            return img, meta_feats
    
    def __len__(self):
        return len(self.df)
    
    def _process_df(self, df):
        
        if not self.is_test:
            df['Id'] = df['Id'].apply(lambda x: str(TRAIN_DIR / f"{x}.jpg"))
            
            meta_features = df.drop(['Id', 'Pawpularity'], axis=1).values

            return df['Id'].tolist(), meta_features, df['Pawpularity'].tolist()            
        else:
            df['Id'] = df['Id'].apply(lambda x: str(TEST_DIR / f"{x}.jpg"))
            
            meta_features = df.drop(['Id'], axis=1).values

            return df['Id'].tolist(), meta_features, None

## Augmentation

In [None]:
class Augments:
    """
    Contains Train, Validation Augments
    """
    train_augments = Compose([
        Resize(*config['IMG_SIZE'], p=1.0),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        A.ColorJitter(p=.2),
        A.RandomGamma(p=.1),
        A.Sharpen(p=.1),
        A.Cutout(p=0.2),
        Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2(p=1.0),
    ],p=1.)
    
    valid_augments = Compose([
        Resize(*config['IMG_SIZE'], p=1.0),
        Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2(p=1.0),
    ], p=1.)

In [None]:
# timm.list_models("*swin*", pretrained=True)

## Pytorch Lightning Model Class

In [None]:
class PetFinderModel(pl.LightningModule):
    def __init__(self, model_name='tf_efficientnet_b6_ns', pretrained=True):
        super(PetFinderModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
#         freeze backbone 
        for module in self.model.children():
                for param in module.parameters():
                    param.requires_grad = False
        
        self.n_features = self.model.classifier.in_features
        self.model.reset_classifier(0)
        self.fc = nn.Linear(self.n_features + 12, config['NUM_LABELS'])
        
        self.train_loss = nn.MSELoss()
        self.valid_loss = nn.MSELoss()

    def forward(self, images, meta):
        features = self.model(images)
        features = torch.cat([features, meta], dim=1)
        output = self.fc(features)
        return output
    
    def training_step(self, batch, batch_idx):
        imgs = batch[0]
        meta = batch[1]
        target = batch[2]
        
        out = self(imgs, meta)
        train_loss = torch.sqrt(self.train_loss(out, target))
        
        logs = {'train_loss': train_loss}
        
        return {'loss': train_loss, 'log': logs}
    
    def test_step(self, batch, batch_idx):
        imgs = batch[0]
        meta = batch[1]
#         target = batch[2]
        
        out = self(imgs, meta)
        return {'test_loss': out}
    
    def test_epoch_end(self, outputs):
        cat = torch.cat([x['test_loss'].squeeze() for x in outputs], axis=0)
        avg_loss = torch.cat([x['test_loss'] for x in outputs], axis=0).mean()
        logs = {'test_loss': cat}
        return {'all': cat}

    def validation_step(self, batch, batch_idx):
        imgs = batch[0]
        meta = batch[1]
        target = batch[2]
        
        out = self(imgs, meta)
        valid_loss = torch.sqrt(self.valid_loss(out, target))
        
        return {'val_loss': valid_loss}
    
    def validation_end(self, outputs):
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        logs = {'val_loss': avg_loss}
        
        print(f"val_loss: {avg_loss}")
        return {'avg_val_loss': avg_loss, 'log': logs}
    
    def configure_optimizers(self):
        opt = torch.optim.Adam(self.parameters(), lr=config['LR'])
        sch = torch.optim.lr_scheduler.CosineAnnealingLR(
            opt, 
            T_max=config['T_max'],
            eta_min=config['min_lr']
        )
        
        return [opt], [sch]


## Model Training

In [None]:
for idx, model_name in enumerate(config["MODEL_NAME"]): 
    # Run the Kfolds training loop
    kf = StratifiedKFold(n_splits=config['NFOLDS'], shuffle=True, random_state=config['SEED'])
    train_file = pd.read_csv(DATA_DIR / "train.csv", 
#                              nrows=50
                            )
    test_df = pd.read_csv(DATA_DIR / "test.csv")

    test_set = PetfinderData(
        test_df,
        is_test=True,
        augments=Augments.valid_augments
    )

    test = DataLoader(
        test_set,
        batch_size=config['VALID_BS'],
        shuffle=False,
        num_workers=config['NUM_WORKERS']
    )

    set_seed(config['SEED'])

    y_pred = []
    final_test_predictions = []
    final_valid_predictions = {}
    scores = []
    
    t_1 = datetime.now()
    for fold_, (train_idx, valid_idx) in enumerate(kf.split(X=train_file, y=train_file['Pawpularity'])):
        t_2 = datetime.now()
        print(f"{'='*20} {model_name} Fold: {fold_} {'='*20}")

        train_df = train_file.loc[train_idx]
        valid_df = train_file.loc[valid_idx]

        valid_ids = valid_df.Id.values.tolist()

        y_train = train_df.Pawpularity
        y_valid = valid_df.Pawpularity

        train_set = PetfinderData(
            train_df,
            augments = Augments.train_augments
        )

        valid_set = PetfinderData(
            valid_df,
            augments = Augments.valid_augments
        )

        train = DataLoader(
            train_set,
            batch_size=config['TRAIN_BS'],
            shuffle=True,
            num_workers=config['NUM_WORKERS'],
            pin_memory=True
        )
        valid = DataLoader(
            valid_set,
            batch_size=config['VALID_BS'],
            shuffle=False,
            num_workers=config['NUM_WORKERS']
        )

        checkpoint_callback = ModelCheckpoint(
            monitor="val_loss",
            dirpath="./",
            filename=f"fold_{fold_}_{model_name}",
            save_top_k=1,
            mode="min",
        )
        es_callback = pl.callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=config["patience"], 
                                                mode='min')
        model = PetFinderModel()
        trainer = pl.Trainer(
            max_epochs=config['EPOCHS'], 
            gpus=1, 
            callbacks=[es_callback, checkpoint_callback], 
    #         logger= wandb_logger
        )
        trainer.fit(model, train, valid)

    #     validation predictions
        valid_preds = []
        _valid_preds = []
        _ids = []
        for idx, batch in enumerate(valid):
            model.eval()
            with torch.no_grad():
                imgs, meta, target = batch[0], batch[1], batch[2]

                tmp_pred = model(imgs, meta).cpu().numpy().squeeze()
                valid_preds.extend(tmp_pred)
                _ids.extend(target.numpy().tolist())
    #     valid_preds.append(_valid_preds)
        rmse = mean_squared_error(y_valid, valid_preds, squared=False)
        ids = _ids


    #     test predictions
        test_preds = []
        _test_preds = []
        for idx, batch in enumerate(test):
            model.eval()
            with torch.no_grad():
                imgs, meta = batch[0], batch[1]

                tmp_pred = model(imgs, meta).cpu().numpy().squeeze()
                test_preds.extend(tmp_pred)
        y_pred.append(test_preds)

        final_test_predictions.append(test_preds)
        final_valid_predictions.update(dict(zip(valid_ids, valid_preds)))
        print(f"fold rmse -> fold: {fold_}, rmse: {rmse}")
        scores.append(rmse)

    #     save a traced version of the best model
        imgs = torch.randn(
            2, 3, *config['IMG_SIZE'], dtype=torch.float32, requires_grad=True
        )
        meta = torch.randn(
            2, 12, dtype=torch.float32, requires_grad=True
        )
        imgs, meta = imgs.to(device=DEVICE), meta.to(device=DEVICE)
        with torch.no_grad():
            traced_cell = torch.jit.trace(model.to(DEVICE).forward, (imgs, meta))
        torch.jit.save(traced_cell, f"fold_{fold_}_{model_name}_jit")
        elapsed_time_2 = datetime.now() - t_2
        print(f"Fold {fold_} took {elapsed_time_2} hh:mm:ss")
    
    elapsed_time_1 = datetime.now() - t_1
    print(f"Model {model_name} took (hh:mm:ss.ms) {elapsed_time_1} hh:mm:ss")

    print(f"scores {model_name} -> mean: {np.mean(scores)}, std: {np.std(scores)}")
    final_valid_predictions = pd.DataFrame.from_dict(final_valid_predictions, orient="index").reset_index()
    final_valid_predictions.columns = ["Id", "Pawpularity"]
    final_valid_predictions.to_csv(f"train_pred_{model_name}.csv", index=False)
    
    sample_submission = pd.read_csv(f"{DATA_DIR}/sample_submission.csv")
    sample_submission.target = np.mean(np.column_stack(final_test_predictions), axis=1)
    sample_submission.columns = ["Id", "Pawpularity"]
    sample_submission.to_csv(f"test_pred_{model_name}.csv", index=False)

## Submission


In [None]:
submission = pd.read_csv(f"{DATA_DIR}/sample_submission.csv")

In [None]:
y_pred_df = pd.DataFrame(np.stack(y_pred, axis=1))
y_pred_df["mean"] = y_pred_df.mean(axis=1)

In [None]:
y_pred_df.to_csv("y_pred.csv", index=False)

In [None]:
submission["Pawpularity"] = y_pred_df["mean"]
submission.to_csv("submission.csv", index=False)
submission