## Pawpularity CNN from Scratch in Pytorch

This notebook implements a CNN with 9 convolutional layers and 2 fully connected layers from base pytorch, with no pretrained models or weights used.  Since pawpularity is bounded between 1 and 100 the final layer's activation is the sigmoid function * 100, and MSE is used as the loss function for optimization.  


The final competition evaluation metric is the square root of MSE or 
$ \textrm{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2} $

### Load Dependencies

In [None]:
import pandas as pd
import numpy as np
import math
import time
import os
from skimage import io, transform
import PIL

In [None]:
# Config
data_dir = '../input/petfinder-pawpularity-score/'
global_batch_size = 32
workers = 2
np.random.seed(10)
print(os.listdir(data_dir))
print(os.listdir(f'{data_dir}train')[0:4])

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms

from torch import nn, optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler

### Load and Explore data

**Look at the annotations**

In [None]:
train_df = pd.read_csv(f'{data_dir}train.csv')

In [None]:
train_df.head()

In [None]:
train_df.info()

In [None]:
# Annotations
np.array(train_df.iloc[2, 1:13])

In [None]:
# Scores
train_df.iloc[2, 13]

In [None]:
n, bins, patches = plt.hist(train_df.iloc[:, 13], 50, density=True, facecolor='g', alpha=0.75)

plt.xlabel('Pawpularity')
plt.ylabel('Frequency')
plt.title('Pawpularity Histogram')
plt.xlim(0, 100)
# plt.ylim(0, 0.03)
plt.grid(True)
plt.show()

**Custom dataset class to attach annotations and scores to the images**

In [None]:
class PawpularityDataset(Dataset):
    """Dataset connecting animal images to the score and annotations"""

    def __init__(self, csv_file, img_dir, transform=transforms.ToTensor()):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """

        self.annotations_csv = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations_csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.img_dir,
                                self.annotations_csv.iloc[idx, 0])

        # load each image in PIL format for compatibility with transforms
        image = PIL.Image.open(img_name + '.jpg')
        
        # Columns 1 to 12 contain the annotations
        annotations = np.array(self.annotations_csv.iloc[idx, 1:13])
        annotations = annotations.astype('float')
        # Column 13 has the scores
        score = np.array(self.annotations_csv.iloc[idx, 13])
        score = torch.tensor(score.astype('float')).view(1)

        # Apply the transforms
        image = self.transform(image)

        sample = [image, annotations, score]
        return sample

**Define global image transforms**

In [None]:
## Define transforms with image augmentation on the training set
img_transforms = transforms.Compose([transforms.Resize(255),
                                     transforms.CenterCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.RandomRotation(20),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])])

img_transforms_valid = transforms.Compose([transforms.Resize(255),
                                           transforms.CenterCrop(224),
                                           transforms.ToTensor(),
                                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                std=[0.229, 0.224, 0.225])])

**Load and check out the dataset**

In [None]:
# Load the dataset
train_dataset = PawpularityDataset(f'{data_dir}train.csv', f'{data_dir}train', transform=img_transforms)
train_dataset.img_dir

In [None]:
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
# Batch size of 8
images, annotations, scores = next(iter(dataloader))
print(images.shape)
print(scores.shape)
print(annotations.shape)

**Look at some images**

In [None]:
# Helper function to de-normalize and plot images
def im_convert(tensor):
    """ Display a tensor as an image. """
    
    image = tensor.to("cpu").clone().detach()
    image = image.numpy().squeeze()
    image = image * np.array((0.229, 0.224, 0.225)).reshape(3, 1, 1) + np.array((0.485, 0.456, 0.406)).reshape(3, 1, 1)
    img = (image * 255).astype(np.uint8) # unnormalize
    

    return plt.imshow(np.transpose(img, (1, 2, 0)))

In [None]:
im_numpy = images.numpy() # convert images to numpy for display

# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(20, 10))
# display 20 images
for idx in np.arange(8):
    ax = fig.add_subplot(2, 4, idx+1, xticks=[], yticks=[])
    im_convert(images[idx])
    ax.set_title(scores[idx].item())

### Set up the cnn structure

In [None]:
import torch.nn as nn
import torch.nn.functional as func
import torch.optim as optim

In [None]:
# Calculate the dense layer input size
# Padding of 1 and of 3 means no change in the image dimensions apart from pooling

sdim = 224/2/2/2/2/2 #maxpoolin layers reduce xy dimensions by 2
print(sdim)
print(sdim*sdim*256+12) # add in the annotations

In [None]:
class Regression(nn.Module):
    def __init__(self):
        super().__init__()
        
        # covolutional layers
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1)
        
        self.conv4 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv5 = nn.Conv2d(64, 64, 3, padding=1)
        
        self.conv6 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv7 = nn.Conv2d(128, 128, 3, padding=1)
        
        self.conv8 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
        
        # maxpooling layer
        self.maxpool = nn.MaxPool2d(2, 2)
        
        # Dense layers
        self.fc1 = nn.Linear(12556, 512)
        
        # regression output
        self.fc4 = nn.Linear(512, 1)
        
        # dropout
        self.dropout = nn.Dropout(0.3)
    
    def limit_range(self, x):
        x = torch.where(x > 100.0, 100.0, x)
        x = torch.where(x < 0.0 , 0.0, x)
        return x
        
    def forward(self, data):
        
        img = data['images']
        ann = data['annotations']
        # Conv layers
        x = self.maxpool(func.relu(self.conv1(img)))
        
        x = func.relu(self.conv2(x))
        x = self.maxpool(func.relu(self.conv3(x)))
        
        x = func.relu(self.conv4(x))
        x = self.maxpool(func.relu(self.conv5(x)))
        
        x = func.relu(self.conv6(x))
        x = self.maxpool(func.relu(self.conv7(x)))
        
        x = func.relu(self.conv8(x))
        x = self.maxpool(func.relu(self.conv9(x)))
        
        
        # flatten and combine with annotations
        x = x.view(x.shape[0], -1)
        x = torch.cat((x, ann.float()), 1)
        x = self.dropout(x)
        
        # Dense layers
        x = self.dropout(func.relu(self.fc1(x)))
        x = self.fc4(x).double()
        
        # Limit output to the 0 to 100 range
        x = torch.sigmoid(x)*100
        # x = self.limit_range(x)
        
        return x

In [None]:
# Define a custom weight initialization

def init_weights(m):
    classname = m.__class__.__name__
    
    if classname.find('Linear') != -1:
        n = m.in_features
        y = 1./np.sqrt(n)
        m.weight.data.normal_(0.0, y)
        m.bias.data.fill_(0)

In [None]:
# Create the model and initalize loss function and optimizer

model = None
torch.manual_seed(13)
model = Regression()

#Custom initialization
model.apply(init_weights)

criterion = nn.MSELoss(reduction='sum')

#Adam with L2 regularization
optimizer = optim.AdamW(model.parameters(), lr=0.00007, weight_decay=0.2)

scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = [1, 3, 6, 10], gamma=0.4)

In [None]:
# Load a small batch to test out the forward pass

train_dataset = PawpularityDataset(f'{data_dir}train.csv', f'{data_dir}train', transform=img_transforms)
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
images, annotations, scores = next(iter(dataloader))

In [None]:
# Test out the forward pass on a single batch
# RMSE before any training (with random parameters): 
with torch.no_grad():
    train_loss = 0.0
    output = model({'images': images, 'annotations': annotations})
    loss = criterion(output, scores)
    math.sqrt(loss.item()/64)

In [None]:
print(torch.mean(output))
print(torch.std(output))

In [None]:
print(model)

### Train the model

In [None]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
device = torch.cuda.get_device_name()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print(f'CUDA is available!  Training on GPU {device}...')

**Dataset setup**

In [None]:
## Load and set up the final training and validation dataset (use different transforms)

train_data = PawpularityDataset(f'{data_dir}train.csv', f'{data_dir}train', transform=img_transforms)
valid_data = PawpularityDataset(f'{data_dir}train.csv', f'{data_dir}train', transform=img_transforms_valid)

np.random.seed(13)

# obtain random indices that will be used for traingin/validation split
valid_size = 0.1
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=global_batch_size,
                                           sampler=train_sampler, num_workers=workers,
                                           pin_memory=True) 
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=global_batch_size,
                                           sampler=valid_sampler, num_workers=workers,
                                           pin_memory=True) 

print(len(train_loader)*global_batch_size)
print(len(valid_loader)*global_batch_size)

**Model training loop**

In [None]:
# number of epochs to train the model
# Use 40 epochs

if train_on_gpu:
    model.cuda()

n_epochs = 40

valid_loss_min = np.Inf # track change in validation loss

train_losses, valid_losses = [], []

for epoch in range(1, n_epochs+1):
    
    start = time.time()
    current_lr = scheduler.get_last_lr()[0]
    
    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    # put in training mode (enable dropout)
    model.train()
    for images, annotations, scores in train_loader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            images, annotations, scores = images.cuda(), annotations.cuda(), scores.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        # the annotations get added in the dense layers
        output = model({'images': images, 'annotations': annotations})
        # print(output.dtype)
        # print(scores.dtype)
        # calculate the batch loss
        loss = criterion(output, scores)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()
        
    ######################    
    # validate the model #
    ######################
    # eval mode (no dropout)
    model.eval()
    with torch.no_grad():
        for images, annotations, scores in valid_loader:
            # move tensors to GPU if CUDA is available
            if train_on_gpu:
                images, annotations, scores = images.cuda(), annotations.cuda(), scores.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model({'images': images, 'annotations': annotations})
            # calculate the batch loss
            loss = criterion(output, scores)
            # update average validation loss 
            valid_loss += loss.item()
    
    # calculate RMSE
    train_loss = math.sqrt(train_loss/len(train_loader.sampler))
    valid_loss = math.sqrt(valid_loss/len(valid_loader.sampler))
    
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
        
    # increment learning rate decay
    scheduler.step()
    
    # print training/validation statistics 
    # print(f'Epoch: {e}, {float(time.time() - start):.3f} seconds, lr={optimizer.lr}')
    print('Epoch: {}, time: {:.3f}s, lr: {:.6f} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch, float(time.time() - start), current_lr, train_loss, valid_loss))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'pawpularity_best_model.pt')
        valid_loss_min = valid_loss

### Diagnostics and performance

In [None]:
# Load the best performing model on the validation set
model.load_state_dict(torch.load('pawpularity_best_model.pt'))

In [None]:
# get the distribution of predictions

predictions = []
score_list = []

model.eval()
with torch.no_grad():
    for images, annotations, scores in valid_loader:
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            images, annotations, scores = images.cuda(), annotations.cuda(), scores.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model({'images': images, 'annotations': annotations})
        predictions.extend(list(output.cpu().detach().numpy().reshape(len(output),)))
        score_list.extend(list(scores.cpu().detach().numpy().reshape(len(scores),)))
        

preds_df = pd.DataFrame({'preds': predictions})
preds_df.describe()

In [None]:
# Manually Check RMSE
diffs = np.array(score_list) - np.array(predictions)
print(math.sqrt((diffs @ diffs)/len(valid_loader.sampler)))


In [None]:
# Check that manually increasing the variance doesn't help
# Currently slightly increasing the std dev manually actually does reduce validation error

mean = np.mean(np.array(predictions))
stddev = np.std((np.array(predictions)))
print(mean, stddev)
updated_normalized = 1.5*(predictions-mean)/stddev
new_predictions = updated_normalized+predictions

diffs = np.array(score_list) - np.array(new_predictions)
print(math.sqrt((diffs @ diffs)/len(valid_loader.sampler)))

In [None]:
# Histogram of validation predictions 

n, bins, patches = plt.hist(predictions, 50, density=True, facecolor='g', alpha=0.75)

plt.xlabel('Pawpularity')
plt.ylabel('Frequency')
plt.title('Predicted Pawpularity Histogram')
plt.xlim(0, 100)
plt.ylim(0, .2)
plt.grid(True)
plt.show()

The range could still be a lot greater, and the model is failing completely at predicting the highest ranked images that get a score of 100.  

In [None]:
# Histogram of validation set actual scores

n, bins, patches = plt.hist(train_df.iloc[valid_idx, 13], 50, density=True, facecolor='g', alpha=0.75)

plt.xlabel('Pawpularity')
plt.ylabel('Frequency')
plt.title('Actual Pawpularity Histogram')
plt.xlim(0, 100)
plt.ylim(0, .2)
plt.grid(True)
plt.show()

In [None]:
# Plot the losses
fig = plt.figure()
ax = plt.axes()
ax.plot(list(range(1, len(train_losses))), train_losses[1:])
ax.plot(list(range(1, len(valid_losses))), valid_losses[1:]);
print(f'best score: {valid_loss_min}')

### Show examples of images and predicted vs. actual scores

In [None]:
images, annotations, scores = next(iter(valid_loader))
images, annotations, scores = images.cuda(), annotations.cuda(), scores.cuda()

In [None]:
output_plot = model({'images': images, 'annotations': annotations}).cpu()
images, annotations, scores = images.cpu(), annotations.cpu(), scores.cpu()

In [None]:
# plot the images in the batch, along with the corresponding labels and predictions

fig = plt.figure(figsize=(20, 10))
# display 20 images
for idx in np.arange(12):
    ax = fig.add_subplot(3, 4, idx+1, xticks=[], yticks=[])
    im_convert(images[idx])
    ax.set_title(f'Act: {round(scores[idx].item())} Pred: {round(output_plot[idx].item())}')

### Use the model to predict the test dataset

In [None]:
test_df = pd.read_csv(f'{data_dir}test.csv')
test_df.head(10)

In [None]:
# Load the best performing model on the validation set
model.load_state_dict(torch.load('pawpularity_best_model.pt'))

In [None]:
class PawpularityTestDataset(Dataset):
    """Dataset connecting dog images to the score and annotations"""

    def __init__(self, csv_file, img_dir, transform=transforms.ToTensor()):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """

        self.annotations_csv = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations_csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.img_dir,
                                self.annotations_csv.iloc[idx, 0])

        # load each image in PIL format for compatibility with transforms
        image = PIL.Image.open(img_name + '.jpg')

        annotations = np.array(self.annotations_csv.iloc[idx, 1:13])
        annotations = annotations.astype('float')

        # Apply the transforms
        image = self.transform(image)

        sample = [image, annotations]
        return sample

In [None]:
## Load the test dataset
test_data = PawpularityTestDataset(f'{data_dir}test.csv', f'{data_dir}test', transform=img_transforms_valid)

batch_size = min(len(test_data), 32)

test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=workers) 

In [None]:
# Step through with a reasonable batch size and build up the output dataset

model.eval()
outputs = []
for images, annotations in test_loader:
    # move tensors to GPU if CUDA is available
    if train_on_gpu:
        images, annotations = images.cuda(), annotations.cuda()
    test_output = model({'images': images, 'annotations': annotations})
    outputs.extend(list(test_output.cpu().detach().numpy().reshape(len(test_output),)))
    
img_names = list( test_df.iloc[:, 0].values)
outputs = [round(x, 2) for x in outputs]

output_df = pd.DataFrame({'Id': img_names, 'Pawpularity': outputs})
output_df.head(10)

In [None]:
# Write the output in the required format
output_df.to_csv('submission.csv', index=False)