In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install --upgrade pip
!pip install torchsummary
!pip install timm
!pip install pytorch-lightning


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
from torch import nn
import torch.optim as optim
from torchvision.transforms import transforms
from torchvision.io import read_image
from torch.utils import data
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F
from torchvision import models
from torchsummary import summary

from PIL import Image
import argparse
import matplotlib.image as mpimg
import glob

import timm
from timm import create_model

import pytorch_lightning as pl
from  pytorch_lightning import trainer

# Properties

In [None]:
BATCH_SIZE = 64
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)
NUM_EPOCHS = 50
learning_rate = 1e-5
NUM_WORKERS = 0

In [None]:
train_dir = '../input/petfinder-pawpularity-score/train/'
test_dir = '../input/petfinder-pawpularity-score/test/'

train_csv = "../input/petfinder-pawpularity-score/train.csv"
test_csv = "../input/petfinder-pawpularity-score/test.csv"

In [None]:
# Creating a dataframe for train data
train_dataframe = pd.read_csv(train_csv)
train_dataframe['Image Path'] =train_dataframe['Id'].astype(str) +'.jpg'
train_dataframe['Image Path'] = train_dir + train_dataframe['Image Path'].astype(str)

cat_features = train_dataframe.drop(['Pawpularity'], axis=1)
stacking_order =[train_dataframe['Subject Focus'],train_dataframe['Eyes'],train_dataframe['Face'],train_dataframe['Near'],train_dataframe['Action'],train_dataframe['Accessory'],
              train_dataframe['Group'],train_dataframe['Collage'],train_dataframe['Human'],train_dataframe['Occlusion'],train_dataframe['Info'],train_dataframe['Blur']]


# Creating a dataframe for test data
test_dataframe = pd.read_csv(test_csv)
test_dataframe['Image Path'] =test_dataframe['Id'].astype(str) +'.jpg'
test_dataframe['Image Path'] = test_dir + test_dataframe['Image Path'].astype(str)
stacking_order =[test_dataframe['Subject Focus'],test_dataframe['Eyes'],test_dataframe['Face'],test_dataframe['Near'],test_dataframe['Action'],test_dataframe['Accessory'],
              test_dataframe['Group'],test_dataframe['Collage'],test_dataframe['Human'],test_dataframe['Occlusion'],test_dataframe['Info'],test_dataframe['Blur']]


In [None]:
# Converting the categorical features into a tensor
cat_features = np.stack(stacking_order,1)
train_cat = torch.tensor(cat_features,dtype=torch.int64)


# Converting the target values
num_classes = np.unique(train_dataframe['Pawpularity'].values).shape[0]
target = train_dataframe['Pawpularity']
target = torch.tensor(target,dtype = torch.float)
num_classes, target.size(0)

In [None]:
mean = [0.485, 0.456, 0.406]
standard_dev = [0.229, 0.224, 0.225]
transformer = transforms.Compose([transforms.RandomHorizontalFlip(),
                                  transforms.RandomVerticalFlip(),
                                  transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
                                  transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                                  transforms.ConvertImageDtype(torch.float),
                                  transforms.Normalize(mean=mean, std=standard_dev),
                                  transforms.ToTensor()
                                 ])
            

In [None]:
class CustomDataset(Dataset):
    def __init__(self,df):
        self.df = df
        self.transform = transforms.Resize([180,180])
        self.y = df['Pawpularity']
        self.image_path = df['Image Path']
        
    def __len__(self):
        return self.y.shape[0]
    
    def __getitem__(self,index):        
        # Images as a Tensor
        img = read_image(self.image_path[index])
        tensor_image = self.transform(img).float()

        # Labels as a tensor
        labels = self.y[index]
    
        return tensor_image,labels

class CustomTestDataset(Dataset):
    def __init__(self,df):
        self.df = df
        self.transform = transforms.Resize([180,180])
        self.image_path = df['Image Path']
        
    def __len__(self):
        return self.image_path.shape[0]
    
    def __getitem__(self,index):
        # Images as a Tensor
        img = Image.open(self.image_path[index])
        tensor_image = self.transform(img)
          
        # Labels as a tensor
        labels = 0
        
        return tensor_image,labels

In [None]:
class LightningDataLoader(pl.LightningDataModule):
    def __init__(self,df):
        super().__init__()
        self.df = df

    
    def create_dataloader(self,df,train):
        if train:
            images = CustomDataset(df)
            return DataLoader(images,BATCH_SIZE,pin_memory=True,shuffle=False)
        else :
            images = CustomTestDataset(df)
            return DataLoader(images,BATCH_SIZE,pin_memory=True,shuffle = False)
    
    def train_dataset(self):
        return self.create_dataloader(self.df,train = True)
    
    def test_dataset(self):
        return self.create_dataloader(self.df,train = False)
    
    
        

In [None]:
train_dataloader = LightningDataLoader(train_dataframe).train_dataset()
image, labels = iter(train_dataloader).next()
print("Shape of the input:",image.shape)
print("Shape of the output labels:",labels.shape)
losses = nn.BCEWithLogitsLoss()


In [None]:
def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
    assert alpha > 0, "alpha should be larger than 0"
    assert x.size(0) > 1, "Mixup cannot be applied to a single instance."

    lam = np.random.beta(alpha, alpha)
    rand_index = torch.randperm(x.size()[0])
    mixed_x = lam * x + (1 - lam) * x[rand_index, :]
    target_a, target_b = y, y[rand_index]
    return mixed_x, target_a, target_b, lam

class Pawpularity_Model(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.build_model()
        self.losses = losses
        # init a pretrained resnet
    def build_model(self):
        self.backbone = create_model('resnet34',pretrained=True,num_classes =0, in_chans =3)
        num_features = self.backbone.num_features
        self.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_features,1))
        

    def forward(self, x):
        f = self.backbone(x)
        out = self.fc(f)
        return out
    
    def training_step(self, dataloader, batch_idx):
        loss, pred, labels = self.step(dataloader,mode = 'train')
        return {'loss' : loss, 'pred' : pred, 'labels': labels}
    
    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(),  1e-5)
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,20,eta_min=1e-4)
        return [optimizer], [scheduler]
    
    def step(self, dataloader, mode):
        images,labels = dataloader
        labels = labels.float()/100.0
        mix_images, target_a, target_b, lam = mixup(images, labels, alpha=0.5)
        logits = self.forward(mix_images).squeeze(1)
        loss = self.losses(logits,target_a) * lam + \
        (1-lam)*self.losses(logits, target_b)
        
        pred = logits.sigmoid().detach().cpu()*100
        labels = labels.detach().cpu()*100
        return loss, pred, labels
    

In [None]:
lightning_model = Pawpularity_Model().cuda()
summary(lightning_model,input_size =(3,180,180),batch_size=BATCH_SIZE)

In [None]:
trainer = pl.Trainer(logger = True, max_epochs = NUM_EPOCHS,gpus =1)

trainer.fit(lightning_model,train_dataloaders =train_dataloader)