# PetFinder Competition PyTorch Lightning Submission

## Based on the PetFinder Competition PyTorch Lightning Training Notebook.
This adds the training notebook as data and imports/use the trained models for inference and submissions.

In [None]:
from pathlib import Path

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import os
import cv2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import r2_score, mean_squared_error

from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)
from albumentations.pytorch import ToTensorV2

import warnings
warnings.simplefilter('ignore')

In [None]:
DATA_DIR = Path("../input/petfinder-pawpularity-score/")
NB_DIR = Path("../input/petfinder-pretrained-efficientnet")
TRAIN_DIR = DATA_DIR / "train"
TEST_DIR = DATA_DIR / "test"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = dict(
    SEED =  42,
    NFOLDS = 5,
    EPOCHS = 5,
    LR = 2e-4,
    IMG_SIZE = (224, 224),
    MODEL_NAME = 'tf_efficientnet_b6_ns',
    DR_RATE = 0.35,
    NUM_LABELS = 1,
    TRAIN_BS = 32,
    VALID_BS = 4,
    min_lr = 1e-6,
    T_max = 20,
    T_0 = 25,
    NUM_WORKERS = 0,
    infra = "Kaggle",
    competition = 'petfinder',
    _wandb_kernel = 'tanaym',
    wandb = False
)

## Dataset class

In [None]:
class PetfinderData(Dataset):
    def __init__(self, df, is_test=False, augments=None):
        self.df = df
        self.is_test = is_test
        self.augments = augments
        
        self.images, self.meta_features, self.targets = self._process_df(self.df)
    
    def __getitem__(self, index):
        img = self.images[index]
        meta_feats = self.meta_features[index]
        meta_feats = torch.tensor(meta_feats, dtype=torch.float32)
        
        img = cv2.imread(img)
#         print(f"img shape 1 {img.shape}")
        img = img[:, :, ::-1]
#         print(f"img shape 2 {img.shape}")
        img = cv2.resize(img, config['IMG_SIZE'])
        
        if self.augments:
            img = self.augments(image=img)['image']
        
        if not self.is_test:
            target = torch.tensor(self.targets[index], dtype=torch.float32)
            return img, meta_feats, target
        else:
            return img, meta_feats
    
    def __len__(self):
        return len(self.df)
    
    def _process_df(self, df):
        
        if not self.is_test:
            df['Id'] = df['Id'].apply(lambda x: str(TRAIN_DIR / f"{x}.jpg"))
            
            meta_features = df.drop(['Id', 'Pawpularity'], axis=1).values

            return df['Id'].tolist(), meta_features, df['Pawpularity'].tolist()            
        else:
            df['Id'] = df['Id'].apply(lambda x: str(TEST_DIR / f"{x}.jpg"))
            
            meta_features = df.drop(['Id'], axis=1).values

            return df['Id'].tolist(), meta_features, None

## Augmentation

In [None]:
class Augments:
    """
    Contains Train, Validation Augments
    """
    train_augments = Compose([
        Resize(*config['IMG_SIZE'], p=1.0),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2(p=1.0),
    ],p=1.)
    
    valid_augments = Compose([
        Resize(*config['IMG_SIZE'], p=1.0),
        Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
        ToTensorV2(p=1.0),
    ], p=1.)

## Model inference

In [None]:
test_df = pd.read_csv(DATA_DIR / "test.csv")

test_set = PetfinderData(
    test_df,
    is_test=True,
    augments=Augments.valid_augments
)

test = DataLoader(
    test_set,
    batch_size=config['VALID_BS'],
    shuffle=False,
    num_workers=config['NUM_WORKERS']
)

y_pred = []
final_test_predictions = []
final_valid_predictions = {}
scores = []

# test predictions
for path in NB_DIR.glob("**/*_jit*"):
    model = torch.jit.load(path).to(DEVICE)
    test_preds = []
    _test_preds = []
    for idx, batch in enumerate(test):
        model.eval()
        with torch.no_grad():
            imgs, meta = batch[0].to(DEVICE), batch[1].to(DEVICE)

            tmp_pred = model(imgs, meta).cpu().numpy().squeeze()
            test_preds.extend(tmp_pred)
    y_pred.append(test_preds)

#     final_test_predictions.append(test_preds)
#     final_valid_predictions.update(dict(zip(valid_ids, valid_preds)))
#     print(f"fold rmse -> fold: {fold_}, rmse: {rmse}")
#     scores.append(rmse)

# print(f"scores -> mean: {np.mean(scores)}, std: {np.std(scores)}")
# final_valid_predictions = pd.DataFrame.from_dict(final_valid_predictions, orient="index").reset_index()
# final_valid_predictions.columns = ["id", "pred_1"]
# final_valid_predictions.to_csv("train_pred_1.csv", index=False)

## Submission

In [None]:
submission = pd.read_csv(f"{DATA_DIR}/sample_submission.csv")
test_df = pd.read_csv(DATA_DIR / "test.csv")

In [None]:
y_pred_df = pd.DataFrame(np.stack(y_pred, axis=1))
y_pred_df["mean"] = y_pred_df.mean(axis=1)

In [None]:
y_pred_df.to_csv("y_pred.csv", index=False)

In [None]:
submission["Id"] = test_df["Id"]
submission["Pawpularity"] = y_pred_df["mean"]
submission.to_csv("submission.csv", index=False)
submission