In [None]:
import glob
import copy
import random
import numpy as np
from PIL import Image
import cv2
import pandas as pd

#modeling
from sklearn.model_selection import train_test_split
import torch
import torchvision

#visualizations
from tqdm import tqdm
import matplotlib.pyplot as plt

In [None]:
path = '/kaggle/input/petfinder-pawpularity-score/'

train_df = pd.read_csv(path + 'train.csv')
test_df = pd.read_csv(path + 'test.csv')

#Get the image data (the .jpg data) and put it into lists of filenames
train_jpg = glob.glob(path + "train/*.jpg")
test_jpg = glob.glob(path + "test/*.jpg")

In [None]:
train_df.shape, len(train_jpg)

In [None]:
#Read in the data and drop unnecessary columns
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')

In [None]:
#Add the .jpg extensions to the image file name ids
train["img_path"] = train["Id"].apply(lambda x: path + 'train/' + x + ".jpg")
test["img_path"] = test["Id"].apply(lambda x: path + 'test/' + x + ".jpg")

In [None]:
X = train["img_path"].values
y = np.expand_dims(train['Pawpularity'].values/100, -1)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=7)
#Show the shape of each of the new arrays
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [None]:
class PetfinderDataset(torch.utils.data.Dataset):
    def __init__(self, img_path, y=None, image_size=128, scale=True):
        self._X = img_path
        self._y = y
        self._transform = torchvision.transforms.Resize([image_size, image_size])
        self.scale = scale

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        image_path = self._X[idx]
        image = torchvision.io.read_image(image_path)
        image = self._transform(image)
        if self.scale:
            image = image.float() / 255
        if self._y is not None:
            label = self._y[idx]
            return image, label
        return image
    
train_dataset = PetfinderDataset(x_train, torch.FloatTensor(y_train))
test_dataset = PetfinderDataset(x_test, torch.FloatTensor(y_test))

In [None]:
batch_size = 1024

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv0 = torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(7,7), stride=(2,2), padding=0)
        self.conv1 = torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), padding=1)
        self.bn1 = torch.nn.BatchNorm2d(32)
        
        self.conv2 = torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3,3), stride=(2,2), padding=1)
        self.bn2 = torch.nn.BatchNorm2d(32)
        self.drop1 = torch.nn.Dropout2d(p=0.25)
        
        self.conv3 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), padding=1)
        self.bn3 = torch.nn.BatchNorm2d(64)
        
        self.conv4 = torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(2,2), padding=1)
        self.bn4 = torch.nn.BatchNorm2d(64)
        self.drop2 = torch.nn.Dropout2d(p=0.25)
        
        self.conv5 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), padding=1)
        self.bn5 = torch.nn.BatchNorm2d(128)
        self.mp1 = torch.nn.MaxPool2d((2, 2))
        
        self.conv6 = torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), padding=1)
        self.bn6 = torch.nn.BatchNorm2d(128)
        self.drop3 = torch.nn.Dropout2d(p=0.25)

        self.linear1 = torch.nn.Linear(8192, 512)
        self.drop4 = torch.nn.Dropout(p=0.5)
        self.linear2 = torch.nn.Linear(512, 1)
        
        
    def forward(self, x):
        x = torch.nn.functional.relu(self.conv0(x))
        x = torch.nn.functional.relu(self.conv1(x))
        x = self.bn1(x)
        
        x = torch.nn.functional.relu(self.conv2(x))
        x = self.bn2(x)
        x = self.drop1(x)
        
        x = torch.nn.functional.relu(self.conv3(x))
        x = self.bn3(x)
        
        x = torch.nn.functional.relu(self.conv4(x))
        x = self.bn4(x)
        x = self.drop2(x)
        
        x = torch.nn.functional.relu(self.conv5(x))
        x = self.bn5(x)
        x = self.mp1(x)
        
        x = torch.nn.functional.relu(self.conv6(x))
        x = self.bn6(x)
        x = self.drop3(x)
        
        x = torch.nn.Flatten()(x)
        x = torch.nn.functional.relu(self.linear1(x))
        x = self.drop4(x)
        x = torch.nn.functional.hardsigmoid(self.linear2(x))
        
        return x

In [None]:
model = Net()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()
epoch_num = 100
best_model = copy.deepcopy(model)
best_eval_loss = 1e9
no_update_cnt = 0
no_update_thresh = 5

for epoch in range(epoch_num):
    model.train()
    train_loss = 0
    train_step = 0
    if epoch != 0:
        for x, y in tqdm(train_loader):
            output = model(x)
            loss = criterion(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_step += 1
        train_loss /= train_step
        
    eval_loss = 0
    eval_step = 0
    model.eval()
    with torch.no_grad():
        for x, y in test_loader:
            output = model(x)
            loss = criterion(output, y)
            
            eval_loss += loss.item()
            eval_step += 1
        eval_loss /= eval_step
    
    if eval_loss < best_eval_loss:
        best_model = copy.deepcopy(model)
        best_eval_loss = eval_loss
        no_update_cnt = 0
    else:
        no_update_cnt += 1
        
    print("epoch: {} train_loss: {} eval_loss:{} no_update_cnt:{}".format(epoch, train_loss, eval_loss, no_update_cnt))
        
    if 20 <= epoch and no_update_thresh <= no_update_cnt:
        break

In [None]:
submit_dataset = PetfinderDataset(test["img_path"].values)
submit_dataloader = torch.utils.data.DataLoader(submit_dataset, batch_size=64)

y_pred = []

for x in tqdm(submit_dataloader):
    output = model(x)
    y_pred.extend(output.squeeze(1).to('cpu').detach().tolist())
    
submit = pd.DataFrame()
submit['Id'] = test['Id']
submit['Pawpularity'] = np.array(y_pred) * 100
submit.to_csv('submission.csv',index=False)
    