## **Find me the best Image**
Social networking sites, dating apps or a pet popularity page, profile pics always play the most important role to shape one's opinion. Hence a lot of effort is made on an ideal beautification to leave the best first impression.

### Pawpularity Contest

* Analyze raw photos and meta data to predict the popularity of pet photos.
* Predict engagement with a pet's profile.


In [None]:
import os
import sys
import cv2
import torch
import numpy as np
import torchmetrics
import pandas as pd
import seaborn as sns
from glob import glob
import torch.nn as nn
from torch import Tensor
import albumentations as A
import pytorch_lightning as pl
from torchvision import models
import matplotlib.pyplot as plt
from torchvision import transforms
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import mean_squared_error
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import Callback
from pytorch_lightning import LightningDataModule
from sklearn.model_selection import train_test_split
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning import LightningModule, Trainer, seed_everything

sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

train_path = "/kaggle/input/petfinder-pawpularity-score/train"
train_meta_path = "/kaggle/input/petfinder-pawpularity-score/train.csv"
test_path = "/kaggle/input/petfinder-pawpularity-score/test"
test_meta_path = "/kaggle/input/petfinder-pawpularity-score/test.csv"
sample_meta_path = "/kaggle/input/petfinder-pawpularity-score/sample_submission.csv"

train_meta_data = pd.read_csv(train_meta_path)
test_meta_data = pd.read_csv(test_meta_path)
sample_data = pd.read_csv(sample_meta_path)

import warnings
warnings.filterwarnings('ignore')

In [None]:
train_meta_data.head()

In [None]:
plt.figure()
sns.distplot(train_meta_data["Pawpularity"])
plt.show()

In [None]:
print(f"Total Train Images: {len(os.listdir(train_path))}")
print(f"Total Test Images: {len(os.listdir(test_path))}")

In [None]:
class CFG:
    seed = 42
    model ='tf_efficientnet_b4_ns'
    pretrained = True
    img_size = 224
    lr = 5e-4
    min_lr = 1e-6
    t_max = 20
    num_epochs = 20
    batch_size = 32
    out_size = 1
pretrained_path = '../input/timmefficientnet/tf_efficientnet_b4_ns-d6313a46.pth'   
seed_everything(CFG.seed)    

In [None]:
tlabels_df, vlabels_df = train_test_split(train_meta_data, test_size=0.2)
tlabels_df.index = pd.RangeIndex(len(tlabels_df))
vlabels_df.index = pd.RangeIndex(len(vlabels_df))

In [None]:
print(f"Total Train Data: {len(tlabels_df)}")
print(f"Total Valid Data: {len(vlabels_df)}")

In [None]:
def get_transform(phase: str):
    IMAGENET_MEAN = [0.485, 0.456, 0.406]
    IMAGENET_STD = [0.229, 0.224, 0.225]
    if phase == 'train':
        return A.Compose([
            A.RandomResizedCrop(height=CFG.img_size, width=CFG.img_size),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(limit=180, p=0.7),
            A.RandomBrightness(limit=0.6,p=0.5),
            A.Cutout(
                    num_holes=10, max_h_size=12, max_w_size=12,
                    fill_value=0, always_apply=False, p=0.5),
            
            A.ShiftScaleRotate(shift_limit=0.25, scale_limit=0.1, 
                               rotate_limit=0,p=0.5),
            A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
            ToTensorV2(p=1.0)
        ])
    
    else:
        return A.Compose([
            A.Resize(height=CFG.img_size, width=CFG.img_size),
            A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
            ToTensorV2(p=1.0)
        ])

In [None]:
class myPetDataset(Dataset):
    def __init__(self, path_to_data, label_df, transform=None, is_testing=False):
        super(myPetDataset,self).__init__()
        
        self.train_img = path_to_data
        self.labels = label_df
        self.transform = transform
        self.testing = is_testing
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        ids = self.labels["Id"][index]
        file_name = os.path.join(self.train_img, ids+".jpg")
        input_data = cv2.imread(file_name)
        input_data = self.transform(image=input_data)
        if not self.testing:
            label = self.labels["Pawpularity"][index] / 100.0
            return input_data['image'], label
        else:
            return {'Id': ids, 'Image': input_data['image']}

In [None]:
class myPetDatasetModule(LightningDataModule):
    def __init__(self):
        super().__init__()
        
    def train_dataloader(self):
        self.train = myPetDataset(train_path, tlabels_df, transform=get_transform("train"))
        return DataLoader(self.train, batch_size=CFG.batch_size, shuffle=True)
    
    def valid_dataloader(self):
        self.valid = myPetDataset(train_path, vlabels_df, transform=get_transform("test"))
        return DataLoader(self.valid, batch_size=CFG.batch_size, shuffle=False)
    
    def test_dataloader(self):
        self.test = myPetDataset(test_path, test_meta_data, transform=get_transform("test"), is_testing=True)
        return DataLoader(self.test, batch_size=1, shuffle=False)     

In [None]:
class PawNet(LightningModule):
    
    def __init__(self):
        super().__init__()
        self.network = timm.create_model(CFG.model, pretrained=False, in_chans=3)
        self.network.load_state_dict(torch.load(pretrained_path))
        in_features = self.network.classifier.in_features
        
        for param in self.network.parameters():
            param.requires_grad = False
        
        self.network.classifier = nn.Sequential(
            nn.Linear(in_features, out_features=512),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=CFG.out_size),
            nn.Sigmoid())
    
    def forward(self, image):
        out = self.network(image)
        return out
    
    def compute_loss(self, y_hat, y):
        return torch.sqrt(nn.MSELoss()(y_hat, y))
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.compute_loss(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
    
    @torch.no_grad()
    def validation_step(self,batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.compute_loss(y_hat, y)
        self.log("valid_loss",loss, prog_bar=True)
    
    @torch.no_grad()
    def predict(self, x):
        y_hat = self(x.unsqueeze(0)) * 100.0
        return y_hat
        
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.network.parameters(), lr=CFG.lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=CFG.t_max, eta_min=CFG.min_lr)
        return [optimizer], [scheduler] 
        

In [None]:
data_module = myPetDatasetModule()
model = PawNet()

In [None]:
trainer = Trainer(max_epochs=3)
trainer.fit(model, data_module)

In [None]:
model.eval()
pred_df = pd.DataFrame(index=range(0,len(test_meta_data)), columns=["Id", "Pawpularity"])
testset = data_module.test_dataloader().dataset

for i in range(len(testset)):
    pred_df['Id'][i] = testset[i]['Id'] 
    pred_df['Pawpularity'][i] = model.predict(testset[i]['Image']).numpy()
    
pred_df.to_csv("submission.csv", index=False)    