In [None]:
import numpy as np 
import pandas as pd 

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms

import os
import random
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

from tqdm.notebook import tqdm_notebook

In [None]:
VAL_MODE = False

In [None]:
SEED = 1234
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(SEED)

# Dataset

In [None]:
train_images_path = '../input/petfinder-pawpularity-score/train/'
test_images_path = '../input/petfinder-pawpularity-score/test/'

train_pd = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_pd = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

train_pd.Id = [image_name + '.jpg' for image_name in train_pd.Id]
test_pd.Id = [image_name + '.jpg' for image_name in test_pd.Id]

In [None]:
class ImagePandasDataset(Dataset):
    def __init__(self, pd_dataframe, img_name_column, img_dir, target_column=None, features_columns=None, transform=None):
        self.pd_dataframe = pd_dataframe
        self.img_name_column = img_name_column
        self.img_dir = img_dir
        self.target_column = target_column
        self.features_columns = features_columns
        self.transform = transform

    def __len__(self):
        return len(self.pd_dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.pd_dataframe[self.img_name_column].iloc[idx])
        image = Image.open(img_path)
        target = -1
        if self.target_column:
            target = self.pd_dataframe[self.target_column].iloc[idx]/100.
        if self.transform:
            image = self.transform(image)
        if self.features_columns:
            return image, torch.tensor(self.pd_dataframe[self.features_columns].iloc[idx]), target
        return image, target        

In [None]:
FEATURES = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
if VAL_MODE:
    val_pd = train_pd[8500:]
    train_pd = train_pd[:8500]
    
img_transforms_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256)),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0, hue=0),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

img_transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((256, 256)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_ds = ImagePandasDataset(train_pd, 'Id', img_dir=train_images_path, 
                              target_column='Pawpularity', features_columns=FEATURES, transform=img_transforms_train)
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)

if VAL_MODE:
    val_ds = ImagePandasDataset(val_pd, 'Id', img_dir=train_images_path, 
                                  target_column='Pawpularity', features_columns=FEATURES, transform=img_transforms_test)
    val_loader = DataLoader(val_ds, batch_size=128, shuffle=True)

# Model

In [None]:
GOOGLENET_PATH = '../input/googlenet-weights/googlenet-1378be20.pth'

def init_googlenet(path):
    googlenet = torchvision.models.GoogLeNet(transform_input=True)
    googlenet.aux_logits = False
    googlenet.aux1 = None 
    googlenet.aux2 = None
    googlenet.load_state_dict(torch.load(path))
    googlenet.eval()
    for param in googlenet.parameters():
        param.requires_grad = False
    return googlenet

In [None]:
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.googlenet = init_googlenet(GOOGLENET_PATH)
        
    self.linear_tail = nn.Sequential(
        nn.BatchNorm1d(1012),
        nn.Dropout(0.2),
        nn.Linear(in_features=1012, out_features=512),
        nn.ReLU(),
        
        nn.BatchNorm1d(512),
        nn.Dropout(0.2),
        nn.Linear(in_features=512, out_features=256),
        nn.ReLU(),
        
        nn.BatchNorm1d(256),
        nn.Linear(in_features=256, out_features=128),
        nn.ReLU(),
        
        nn.Linear(in_features=128, out_features=64),
        nn.ReLU(),
        
        nn.Linear(in_features=64, out_features=32),
        nn.ReLU(),
        
        nn.Linear(in_features=32, out_features=16),
        nn.ReLU(),
        
        nn.Linear(in_features=16, out_features=1),
        nn.ReLU()
        )
 
  def forward(self, image, features):
    googlenet_results = self.googlenet(image)
    return self.linear_tail(torch.cat((features, googlenet_results), -1))

# Training

In [None]:
def train(model, optimizer, scheduler, loss_fn):
  model.train()
  losses = []
  accs = []
  n = 0
  for values in tqdm_notebook(train_loader):
    x, features, target = values
    x, features, target = x.cuda(), features.cuda(), target.cuda()
    optimizer.zero_grad()
    target = target.unsqueeze(-1)

    pred = model(x, features)
    loss = loss_fn(pred.float(), target.float())
    loss.backward()
    
    optimizer.step()
    if n % 10 == 0:
        scheduler.step()
    losses.append(loss.item())
    n += 1
  return np.mean(losses)

def validation(model, loss_fn):
  model.eval()
  losses = []
  for values in tqdm_notebook(val_loader):
    x, features, target = values
    x, features, target = x.cuda(), features.cuda(), target.cuda()
    target = target.unsqueeze(-1)
    pred = model(x, features)
    loss = loss_fn(pred.float(), target.float())
    losses.append(loss.item())
  return np.mean(losses)

In [None]:
loss_fn = torch.nn.MSELoss()
model = Model().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1e-5)

In [None]:
train_losses = []
if VAL_MODE:
    val_losses = []
 
for epoch in range(3):
  train_loss = train(model, optimizer, scheduler, loss_fn)
  train_losses.append(train_loss)
  print(f'{epoch} epoch, train locc: {train_loss}')
  if VAL_MODE:
      val_loss = validation(model, loss_fn)
      val_losses.append(val_loss)
      print(f'{epoch} epoch, val locc: {val_loss}')

# Test

In [None]:
test_ds = ImagePandasDataset(test_pd, 'Id', img_dir=test_images_path, 
                             features_columns=FEATURES, transform=img_transforms_test)
test_loader = DataLoader(test_ds, batch_size=128, shuffle=False)

In [None]:
model.eval()
preds = []
for image, features, _ in test_loader:
    preds += [100 * el.item() for el in model(image.cuda(), features.cuda())]

In [None]:
sample_df = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
sample_df['Pawpularity'] = preds
sample_df.to_csv('submission.csv',index=False)