# Imports

In [None]:
import numpy as np 
import pandas as pd 

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms

import xgboost as xgb
import lightgbm as lgb

import os
import pickle
import random
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

from tqdm.notebook import tqdm_notebook

# Set random seed

In [None]:
SEED = 1234
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(SEED)

# Data

In [None]:
class ImagePandasDataset(Dataset):
    def __init__(self, pd_dataframe, img_name_column, img_dir, target_column=None, transform=None):
        self.pd_dataframe = pd_dataframe
        self.img_name_column = img_name_column
        self.img_dir = img_dir
        self.target_column = target_column
        self.transform = transform

    def __len__(self):
        return len(self.pd_dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.pd_dataframe[self.img_name_column].iloc[idx])
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
            
        if not self.target_column:
            return image
        
        target = self.pd_dataframe[self.target_column].iloc[idx]/100.
        return image, target        

In [None]:
train_images_path = '../input/petfinder-pawpularity-score/train/'
test_images_path = '../input/petfinder-pawpularity-score/test/'

train_pd = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_pd = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

train_pd.Id = [image_name + '.jpg' for image_name in train_pd.Id]
test_pd.Id = [image_name + '.jpg' for image_name in test_pd.Id]


img_transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((300, 300)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
test_ds = ImagePandasDataset(test_pd, 'Id', img_dir=test_images_path, transform=img_transforms_test)
test_loader = DataLoader(test_ds, batch_size=128, shuffle=False)

# Train data overview

In [None]:
train_pd.head(10)

In [None]:
train_pd.info()

Numerical features were useless for training so further only images are used

In [None]:
sns.heatmap(train_pd.corr(), 
        xticklabels=train_pd.columns[1:],
        yticklabels=train_pd.columns[1:])

In [None]:
sns.histplot(train_pd.Pawpularity)

In [None]:
most_pawpular = list(train_pd[train_pd.Pawpularity == 100].Id)
less_pawpular = list(train_pd[train_pd.Pawpularity < 10].Id)

In [None]:
fig, axes = plt.subplots(2, 9, figsize=(20, 10))
fig.suptitle('Most pawpular', fontsize=20)
for ax in axes.flat:
    ax.set_yticks([])
    ax.set_xticks([])
for i in range(18):
  axes[i//9, i%9].imshow(plt.imread(train_images_path + most_pawpular[i]))

In [None]:
fig, axes = plt.subplots(2, 9, figsize=(20, 10))
fig.suptitle('Less pawpular', fontsize=20)
for ax in axes.flat:
    ax.set_yticks([])
    ax.set_xticks([])
for i in range(18):
  axes[i//9, i%9].imshow(plt.imread(train_images_path + less_pawpular[i]))

# Models

In [None]:
WEIGHTS_PATH = '../input/pretrained-model-weights-pytorch/'

VGG_19_bn_PATH = WEIGHTS_PATH + 'vgg19_bn-c79401a0.pth'
VGG_19_bn = torchvision.models.vgg19_bn

Inception_v3_PATH = WEIGHTS_PATH + 'inception_v3_google-1a9a5a14.pth'
Inception_v3 = torchvision.models.Inception3

Resnet152_PATH = WEIGHTS_PATH + 'resnet152-b121ed2d.pth'
Resnet152 = torchvision.models.resnet152

In [None]:
def init_pretrained_model(path, model):
    pretrained_model = model()
    pretrained_model.aux_logits = False
    pretrained_model.aux1 = None 
    pretrained_model.aux2 = None
    pretrained_model.load_state_dict(torch.load(path))
    pretrained_model.eval()
    
    for param in pretrained_model.parameters():
        param.requires_grad = False
    return pretrained_model

In [None]:
PRETRAINED_MODELS_PATH = '../input/pawpularity-preatrained-models/'

inception_model = init_pretrained_model(Inception_v3_PATH, Inception_v3).cuda()
resnet_model = init_pretrained_model(Resnet152_PATH, Resnet152).cuda()

lightgbm_inceptionv3 = pickle.load(open(PRETRAINED_MODELS_PATH + 'lightgbm_inceptionv3.pickle', 'rb'))
lightgbm_resnet152 = pickle.load(open(PRETRAINED_MODELS_PATH + 'lightgbm_resnet152.pickle', 'rb'))

xgboost_inceptionv3 = xgb.Booster()
xgboost_inceptionv3.load_model(PRETRAINED_MODELS_PATH + 'xgboost_inceptionv3.json')
xgboost_resnet152 = xgb.Booster()
xgboost_resnet152.load_model(PRETRAINED_MODELS_PATH + 'xgboost_resnet152.json')

# Test

In [None]:
BATCH_SIZE = 128
test_ds = ImagePandasDataset(test_pd, img_name_column='Id', img_dir=test_images_path, transform=img_transforms_test)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
data_after_resnet = np.zeros((len(test_ds), 1000))
data_after_inception = np.zeros((len(test_ds), 1000))

n = 0
for image in tqdm_notebook(test_loader):
    data_after_resnet[n: n+BATCH_SIZE] = resnet_model(image.cuda()).cpu().numpy()
    data_after_inception[n: n+BATCH_SIZE] = inception_model(image.cuda()).cpu().numpy()
    n += BATCH_SIZE

In [None]:
lgb_incp_pred = lightgbm_inceptionv3.predict(data_after_inception)
lgb_rsnt_pred = lightgbm_resnet152.predict(data_after_resnet)
xgb_incp_pred = xgboost_inceptionv3.predict(xgb.DMatrix(data_after_inception))
xgb_rsnt_pred = xgboost_resnet152.predict(xgb.DMatrix(data_after_resnet))

In [None]:
mean_preds = (lgb_incp_pred + lgb_rsnt_pred + xgb_incp_pred + xgb_rsnt_pred)/4

In [None]:
sample_df = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
sample_df['Pawpularity'] = mean_preds * 100
sample_df.to_csv('submission.csv', index=False)