If this notebook is useful for you in anyway, please give an upvote or commenting your gratitude on the notebook in the reference section.

In [None]:
import os
import pandas as pd
import numpy as np
from glob import  glob
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import torch
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.dataset import Subset
from torchvision import transforms as T
from torch import nn
from torchvision.models import resnet50
from sklearn.model_selection import KFold,StratifiedKFold


device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f'Training on device {device}.')

seed = 42
torch.manual_seed(seed)

dataset_dir = '../input/petfinder-pawpularity-score/'
photo_dir = '../input/petfinder-pawpularity-score/train/'

photo_pathes = glob(os.path.join(photo_dir,'**.jpg'))
model_name = 'resnet50_pretrained'
model_path = model_name

In [None]:
train_df = pd.read_csv(os.path.join(dataset_dir,'train.csv'))

In [None]:
#imgs = torch.stack([img for img,_ in dataset],dim=3)
#imgs.view(3,-1).mean(dim=1)
#imgs.view(3,-1).std(dim=1)

IMG_MEAN = [0.5188, 0.4840, 0.4462]
IMG_STD = [0.2640, 0.2596, 0.2617]

def to_img_path(object_id):
    return os.path.join(photo_dir,f'{object_id}.jpg')

class Petdataset(Dataset):
    def __init__(self,df,is_train):
        self.df = df
        self.is_train = is_train
        
    def __getitem__(self,index):
        
        self.obj_path = to_img_path(self.df['Id'].iat[index])
        img = Image.open(self.obj_path)
        size = (224,224)
        additional_items = (
            [T.Resize(size)]
            if not self.is_train
            else [
                T.RandomVerticalFlip(p=0.5),
                T.RandomHorizontalFlip(p=0.5),
                T.Resize(size)
            ]
        )
        self.converter = T.Compose([*additional_items,T.ToTensor(),T.Normalize(mean=IMG_MEAN,std=IMG_STD)])
        img = self.converter(img)
        if self.is_train:
            label = self.df['Pawpularity'].iat[index]
            label = torch.tensor([label]).to(torch.float)
            return img,label
        else:
            return img
        
    def __len__(self):
        return len(self.df)

In [None]:
class EarlyStopping:
    
    def __init__(self,patience,verbose,path):
        self.patience = patience
        self.verbose = verbose
        self.path = path
        self.counter = -1
        self.best_score = None
        self.early_stop = False
        
    def __call__(self,val_loss,model):
        
        if self.best_score is None:
            self.best_score = val_loss
            self.checkpoint(val_loss,model)
        
        if val_loss < self.best_score:
            self.best_score = val_loss
            self.checkpoint(val_loss,model)
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                
        return self.path
    
    def checkpoint(self,val_loss,model):
        torch.save(model.state_dict(),self.path)
        self.best_score = val_loss

In [None]:
num_bins = int(1+np.log2(len(train_df)))
train_df['bins'] = pd.cut(train_df['Pawpularity'],bins=num_bins,labels=False)
KF = StratifiedKFold(n_splits=3,random_state=seed,shuffle=True)

dataset = Petdataset(df=train_df,is_train=True)

In [None]:
import datetime

def training_loop(n_epochs,optimizer,model,loss_fn,train_loader,val_loader,model_path,fold):
    
    model.train()
    
    earlystopping = EarlyStopping(patience=10,verbose=False,path=model_path)
    
    for epoch in range(1,n_epochs+1):
        loss_train = 0.0
        
        for imgs,labels in train_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = loss_fn(outputs,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()
            
        with torch.no_grad():
            loss_val = 0.0
            for imgs,labels in val_loader:
                imgs = imgs.to(device)
                labels = labels.to(device)
                outputs = model(imgs)
                loss = loss_fn(outputs,labels)
                loss_val += loss.item()
                
        print('{},{} Fold,{} Epoch, Training loss {:.2f}, Val loss {:.2f}'.format(
        datetime.datetime.now(),fold,epoch,
        loss_train/len(train_loader),
        loss_val/len(val_loader)
        ))
            
        earlystopping((loss_val/len(val_loader)),model)
        if earlystopping.early_stop:
            print('{} Fold,{} Epoch ,Early Stopping'.format(fold,epoch-earlystopping.patience))
            break

In [None]:
model_list = []

for fold,(train_index,val_index) in enumerate(KF.split(train_df,train_df['bins'])):
        train_dataset = Subset(dataset,train_index)
        train_loader = DataLoader(train_dataset,batch_size=64,shuffle=True)
        val_dataset = Subset(dataset,val_index)
        val_loader = DataLoader(val_dataset,batch_size=128,shuffle=True)
        
        model_path = model_name+str(fold)+'.pt'
        
        model = resnet50()
        model.load_state_dict(torch.load('../input/resnet50/resnet50.pth'))
        model.fc = nn.Linear(in_features=2048,out_features=1,bias=True)
        model.to(device)

        optimizer = torch.optim.Adam(params=model.parameters(),lr=1e-4)
        criterion = nn.MSELoss()        
        
        training_loop(
            n_epochs=200,
            optimizer=optimizer,
            model=model,
            loss_fn=criterion,
            train_loader=train_loader,
            val_loader=val_loader,
            model_path=model_path,
            fold=fold
        )
        model_list.append(model_path)

In [None]:
photo_dir = '../input/petfinder-pawpularity-score/test'
test_df = pd.read_csv(os.path.join(os.path.join(dataset_dir,'test.csv')))
test_dataset = Petdataset(df=test_df,is_train=False)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=256,shuffle=False)

In [None]:
test_df['Pawpularity'] = 0

for i in model_list:
    model.load_state_dict(torch.load(i))
    predicts = np.empty(0)
    with torch.no_grad():
        for imgs in test_loader:
            imgs = imgs.to(device)
            outputs = model(imgs)
            predicts = np.append(predicts,np.array(outputs.cpu()))
            
    test_df['Pawpularity'] += predicts
    
test_df['Pawpularity'] = test_df['Pawpularity']/3
sub = test_df[['Id','Pawpularity']]
sub.loc[sub['Pawpularity']<0,'Pawpularity'] = 0
sub.loc[sub['Pawpularity']>100,'Pawpularity'] = 100

In [None]:
sub.to_csv('submission.csv',index=False)