For Picture Data, make sure notebook is running in GPU.

In [None]:
# repeat import from metadata
import sklearn
import sklearn.linear_model as linear_model
from sklearn import model_selection
import pandas as pd
import numpy as np

# pytorch
import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

# preprocessing
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm

#### Create a custom dataset

In [None]:
img_size = 128

def getImagePath(split, img_id):
    return '../input/petfinder-pawpularity-score/' + split + '/' + img_id + '.jpg'

# ids is train_data['Id'], converted to numpy array
def getRawImages(split, ids):
    images = []
    for index in tqdm(range(ids.size)):
        img_id = ids[index]
        img_path = getImagePath(split, img_id)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (img_size, img_size), interpolation = cv2.INTER_AREA)
        img = img.astype(np.float32)
        images.append(img)
    return images

class PetImageDataset(Dataset):
    def __init__(self, images, labels=None, transforms=None, split="train"):
        self.images = images
        self.labels = labels
        self.transforms = transforms
        self.split = split
        if torch.cuda.is_available():
            device = 'cuda'
        else:
            device = 'cpu'

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img = self.images[index]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        if self.split == 'train':
            score = self.labels[index]
            return img, score
        else:
            return img

#### Image Preprocessing

Because our image dataset is very small, with 9912 images, we want to augment the dataset by doing some image transformations.

In [None]:
# Training data has the possibility of being augmented by horizontal flip, etc. Pawpularity label remains the same
def getTrainTransforms():
    return A.Compose([
                        A.HorizontalFlip(p=0.5),
#                         A.augmentations.geometric.rotate.Rotate(limit=5, p=0.5),
                        A.Normalize(
                                mean=[0.485, 0.456, 0.406], # for DATASET, not for individual image
                                std=[0.229, 0.224, 0.225], 
                            ),
                        ToTensorV2()
                    ])

# Do NOT do image augmentation on test and validation training sets, only resize and normalize.
def getValidationTestTransforms():
    return A.Compose([
                        A.Normalize(
                            mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225],
                        ),
                        ToTensorV2(),
                    ])

#### Load Training, Validation, and Test Data

In [None]:
# Fetch images outside of dataset so that dataloader doesn't have to do fetching every time and cause training to be very slow

# Repeated code from metadata. Could be removed-----------------
random_state = 131

input_path = '../input/petfinder-pawpularity-score/'
train_data = pd.read_csv(input_path + 'train.csv')
test_data = pd.read_csv(input_path + 'test.csv')

# end of repeated code------------------------------------------

# Get image lists for train, val, and test
train_val_raw_images = getRawImages("train", train_data['Id'].values)
train_raw_images, val_raw_images, y_train, y_val = sklearn.model_selection.train_test_split(train_val_raw_images, train_data["Pawpularity"].values.astype(np.float32), test_size = 0.2, random_state=random_state)
test_raw_images = getRawImages("test", test_data['Id'].values)

# Now set up dataloader

train_img_data = PetImageDataset(images=train_raw_images, labels=y_train, split='train', transforms=getTrainTransforms())
val_img_data = PetImageDataset(images=val_raw_images, labels=y_val, split='train', transforms=getValidationTestTransforms())

#### CNN Models

##### Baseline CNN model
Very simple model to see how CNN is working, make sure data is loaded correctly.

In [None]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_1 = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.linear_1 = torch.nn.Linear(img_size // 2 * img_size // 2 * 32, 128)
        self.linear_2 = torch.nn.Linear(128, 1)
        self.dropout = torch.nn.Dropout(p=0.5)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv_1(x))
        x = self.max_pool2d(x)
        x = x.reshape(x.size(0), -1)
        x = self.relu(self.linear_1(x))
        x = self.dropout(x)
        x = self.linear_2(x)
        return x

##### More Complex CNN Model
We tried adding more layers to see if the error would improve. The MSE error after the ideal number of epochs usually stayed around 21-25, either the same or worst than the median prediction. This is the final result after some tuning, which is only marginally better than the simple CNN model.

The architecture of the model was inspired by AlexNet.

In [None]:
class ComplexCNN(torch.nn.Module):
    def __init__(self):
        super(ComplexCNN, self).__init__()
        self.conv_1 = torch.nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3, stride=1, padding=1)
        self.conv_2 = torch.nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.max_pool2d_big = torch.nn.MaxPool2d(kernel_size=4, stride=4)
        self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv_3 = torch.nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.linear_1 = torch.nn.Linear(img_size // 16 * img_size // 16 * 16, 128)
        self.linear_2 = torch.nn.Linear(128, 128)
        self.linear_3 = torch.nn.Linear(128, 1)
        self.dropout = torch.nn.Dropout(p=0.5)
        self.relu = torch.nn.ReLU()
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv_1(x)) # 128 x 128 x 8
        x = self.max_pool2d_big(x) # 32 x 32 x 16
        x = self.relu(self.conv_2(x)) # 32 x 32 x 16
        x = self.max_pool2d(x)
        x = self.relu(self.conv_3(x)) # 16 x 16 x 16
        x = self.relu(self.conv_3(x))
        x = self.relu(self.conv_3(x))
        x = self.max_pool2d(x) # 8 x 8 x 16
        x = x.reshape(x.size(0), -1)
        x = self.relu(self.linear_1(x))
        x = self.dropout(x)
        x = self.linear_2(x)
        x = self.dropout(x)
        x = self.linear_3(x)
        return x

#### Train the model

In [None]:
epochs = 100
batch_size = 32
model = ComplexCNN() # Can change to complex CNN

criterion = torch.nn.MSELoss()
mse_loss = torch.nn.MSELoss()

train_loader = torch.utils.data.DataLoader(train_img_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
# No need to shuffle, we just want to iterate through the whole validation set
val_loader = torch.utils.data.DataLoader(val_img_data, shuffle=False, batch_size=len(val_img_data), pin_memory=True, num_workers=2)

train_loss = []
val_loss = []

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    
optimizer = Adam(model.parameters(), lr=1e-3)

def getError(loader, model):
    with torch.no_grad():
        for index, (x, y) in enumerate(val_loader, 0):
            if torch.cuda.is_available():
                x = x.cuda()
                y = y.cuda()
            y_pred = model(x)
            error = criterion(y_pred, y)
            error = error.cpu()
            return error

for epoch in tqdm(range(epochs)):
    running_loss = 0
    for i, (batch_x_train, batch_y_train) in enumerate(train_loader, 0):
        if torch.cuda.is_available():
            batch_x_train = batch_x_train.cuda()
            batch_y_train = batch_y_train.cuda()
        optimizer.zero_grad()

        y_predicted = model(batch_x_train)
        y_predicted = y_predicted.to(torch.float32)
        
        loss = criterion(y_predicted, batch_y_train)
        running_loss += loss.detach().cpu() * len(batch_x_train)
        loss.backward()
        optimizer.step()
    running_loss /= len(train_img_data)
    
    if (epoch + 1) % 5 == 0:
        loss_test = getError(val_loader, model)
        train_loss.append(running_loss)
        val_loss.append(loss_test)
        print(f'epoch: {epoch+1}, loss: {np.sqrt(running_loss ):.4f}, test_loss: {np.sqrt(loss_test):.4f}' )

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,5))
plt.title("Training and Validation Loss")
plt.plot(val_loss,label="val")
plt.plot(train_loss,label="train")
plt.xlabel("iterations (every 5 epochs)")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
# !pip install torchsummary
# from torchsummary import summary

# summary(model, input_size=(3, img_size, img_size))

In [None]:
# Now load and evaluate model on test image data
test_img_data = PetImageDataset(images=test_raw_images, split='test', transforms=getValidationTestTransforms())
test_loader = torch.utils.data.DataLoader(test_img_data, shuffle=False, batch_size=len(test_img_data))

final_result = []

for index, (x) in enumerate(test_loader, 0):
    if torch.cuda.is_available():
        x = x.cuda()
    final_result = model(x).cpu().detach().numpy().reshape(-1)

# submit
submission = pd.DataFrame({
        "Id":  test_data['Id'],
        "Pawpularity": final_result
    })
submission.to_csv('../working/submission.csv', index=False)

final_result