In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import glob
import numpy
import random
import matplotlib.pyplot as plt
from skimage import io
from PIL import Image
import torch
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import time
import os
import copy

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
TRAIN_PATH = "../input/petfinder-pawpularity-score/train"
TEST_PATH = "../input/petfinder-pawpularity-score/test"
MODEL_PATH = "../input/pretrained-pytorch-models/resnet18-5c106cde.pth"

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

In [None]:
train['path'] = train['Id'].apply(lambda path: f"{TRAIN_PATH}/{path}.jpg")
test['path'] = test['Id'].apply(lambda path: f"{TEST_PATH}/{path}.jpg")

In [None]:
train_image_paths = train['path'].values
test_image_paths = test['path'].values
train_pawpularity = train['Pawpularity'].values

In [None]:
train.head()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
TRANSFORMERS = transforms.Compose(
        [
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )

In [None]:
class PetDataset(Dataset):
    def __init__(self, image_paths, pawpularity, is_train, transform=None):
        self.image_paths = image_paths
        self.pawpularity = pawpularity
        self.is_train = is_train
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = Image.open(image_filepath)
        paw = self.pawpularity[idx]
        
        if self.transform:
            image = self.transform(image)
        
        if self.is_train:
            paw = self.pawpularity[idx]
            paw = torch.tensor([paw], device='cuda' if torch.cuda.is_available() else 'cpu').to(torch.float)
            
            return image, paw
        else:
            return image
        

In [None]:
dataset = PetDataset(train_image_paths, train_pawpularity, is_train=True, transform=TRANSFORMERS)

In [None]:
valid_split_ratio = 0.25
dataset_size = len(train_image_paths)
TRAIN_DATA_SIZE = int(valid_split_ratio * dataset_size)
VALID_DATA_SIZE = dataset_size - TRAIN_DATA_SIZE

In [None]:
train_set, valid_set = torch.utils.data.random_split(dataset, [TRAIN_DATA_SIZE, VALID_DATA_SIZE])

In [None]:
batch_size = 64

In [None]:
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
def create_model(dict_path=None):
    model = models.resnet18(pretrained=False)
    if (dict_path != None):
        model.load_state_dict(torch.load(dict_path))
    return model

model = create_model(MODEL_PATH)
model

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.fc = nn.Linear(in_features=512,out_features=1,bias=True)
model

In [None]:
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, epochs=5):
    for epoch in range(1, epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

            with torch.no_grad():
                loss_val = 0.0
                for imgs,labels in valid_loader:
                    imgs = imgs.to(device)
                    labels = labels.to(device)
                    outputs = model(imgs)
                    loss = criterion(outputs,labels)
                    loss_val += loss.item()
                
            print('Epoch {}, Training loss {:.4f}, Val loss {:.4f}'.format(
            epoch,
            loss_train/len(train_loader),
            loss_val/len(valid_loader)
            ))

In [None]:
train_model(model, train_loader, valid_loader, criterion, optimizer, epochs=1)

In [None]:
test_dataset = PetDataset(test_image_paths, test_pawpularity, is_train=False, transform=TRANSFORMERS)
test_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True)

In [None]:
predicts = np.empty(0)
model.eval()
with torch.no_grad():
    for imgs in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs)
        predicts = np.append(predicts, outputs.detach())
        


In [None]:
submission = pd.DataFrame({'Id': test['Id'].values})
submission['Pawpularity'] = predicts
submission.to_csv('submission.csv', index=False)

In [None]:
submission.to_csv('submission.csv', index=False)