In [None]:
import pandas as pd
import albumentations
import torchvision
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import cv2
import numpy as np
import os
from torchvision.models import densenet121

In [None]:
path = '../input/petfinder-pawpularity-score/'
train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')


In [None]:
transform = albumentations.Compose([
    albumentations.Resize(width=224, height=224),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.Normalize(
        mean=[0.500, 0.500, 0.500],
        std=[0.5, 0.5, 0.5],
        max_pixel_value=255
    )
])

In [None]:
train_img_dir = path + 'train'
test_img_dir = path + 'test'

In [None]:
showimg = torchvision.transforms.ToPILImage()

In [None]:
class PetDataset(torch.utils.data.Dataset):
    def __init__(self, image_paths, meta_data, augmentations, train_data=True):
        # meta_data: csv data drop column id, then to numpy
        self.image_paths = image_paths
        self.meta_data = meta_data
        self.augmentations = augmentations
        self.train_data=train_data
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]

        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        if self.train_data:
            targets = self.meta_data[item][-1]
            other_data = self.meta_data[item][:-1]
            data = {

                # "image": torch.tensor(image),
                # "other_data": torch.tensor(other_data),
                "image": torch.tensor(image, dtype=torch.float32),
                "other_data": torch.tensor(other_data, dtype=torch.float32),
            }
            return data, targets
        else:
            data = {

                # "image": torch.tensor(image),
                # "other_data": torch.tensor(other_data),
                "image": torch.tensor(image, dtype=torch.float32),
                "other_data": torch.tensor(self.meta_data[item], dtype=torch.float32),
            }
            return data

In [None]:
def process_data(csv_df, img_dir, batch_size, train_data=True):
#     df = pd.read_csv(csv_dir)
    img_paths = [os.path.join(img_dir, x + '.jpg') for x in csv_df.Id]
    meta_datas = csv_df.drop('Id', axis=1)
    if train_data:
        meta_datas = meta_datas.loc[meta_datas.Pawpularity <= 100] # remove Pawpularity > 100
        meta_datas['Pawpularity'] = meta_datas.Pawpularity / 100
    meta_datas = meta_datas.to_numpy()
    dataset = PetDataset(img_paths, meta_datas, transform, train_data=train_data)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Sequential(
#             nn.Conv2d(3, 6, 5),
#             nn.ReLU(),
#             nn.MaxPool2d(2)
#         )
#         self.conv2 = nn.Sequential(
#             nn.Conv2d(6, 16, 5),
#             nn.ReLU(),
#             nn.MaxPool2d(2)
#         )
#         self.conv3 = nn.Sequential(
#             nn.Conv2d(16, 32, 7),
#             nn.ReLU(),
#             nn.MaxPool2d(2)
#         )
#         self.conv4 = nn.Sequential(
#             nn.Conv2d(32, 32, 5),
#             nn.ReLU(),
#             nn.MaxPool2d(2)
#         )
#         self.fc1 = nn.Linear(32 * 11 * 11 + 12, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)
#         self.fc4 = nn.Linear(10, 1)
#         self.soft = nn.Softmax(dim=0)
#         # self.double()
#     def forward(self, x):
#         img = x['image']
#         img = img.to(device)
#         # img = img.float()
#         other_data = x['other_data']
#         other_data = other_data.to(device)
#         # other_data = other_data.float()
#         # print(img.type())
#         x = self.conv1(img)
#         x = self.conv2(x)
#         x = self.conv3(x)
#         x = self.conv4(x)
#         x = x.view(x.size()[0], -1)
#         x = torch.cat([x, other_data], dim=1)
#         # x = x.float()
#         x = self.fc1(x)
#         x = self.fc2(x)
#         x = self.fc3(x)
#         x = self.fc4(x)
#         x = self.soft(x)
#         return x

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
#         self.dnet = densenet121(pretrained=True)
        self.dnet = densenet121()
        num_ftrs = self.dnet.classifier.in_features
        # print(num_ftrs)
        self.fc = nn.Linear(1012, 1)
        self.sig = nn.Sigmoid()
        # self.double()
    def forward(self, x):
        img = x['image']
        img = img.to(device)
        # img = img.float()
        other_data = x['other_data']
        other_data = other_data.to(device)
        x = self.dnet(img)
        # print(x.size())
        x = x.view(x.size()[0], -1)
        # print(other_data.size())
        x = torch.cat([x, other_data], dim=1)
        # x = x.float()
        x = self.fc(x)
        x = self.sig(x)
        return x

In [None]:
def train_model(model, train_dataloader, criterion, optimizer, num_epochs=25):
    running_loss = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        train_data = 0
        for inputs, labels in train_dataloader:
            labels = labels.to(device)
            outputs = model(inputs)
            outputs = outputs.view(outputs.size()[0])
            optimizer.zero_grad()
            labels = labels.float()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_data += inputs['image'].size()[0]
            running_loss += loss.item()
            print('loss: {:.4f}, train_data: {}'.format(loss.item()*100, train_data))
        epoch_loss = running_loss / len(train_dataloader)
        print('Epoch Loss: {:.4f}'.format(epoch_loss*100))
    return model

In [None]:
model = Net()
model = model.to(device)
criterion = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(),lr=0.001)
model = train_model(model, process_data(train, train_img_dir, 32, train_data=True), criterion, optim, num_epochs=3)

In [None]:
PATH = 'trained_model'

In [None]:
torch.save(model.state_dict(), PATH)

In [None]:
trained_model = Net().to(device)
# trained_model = Net()
# trained_model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))
trained_model.load_state_dict(torch.load(PATH))
test_data = process_data(test, test_img_dir, 2, train_data=False)

In [None]:
def test_model(model, test_dataloader):
    pred_label = torch.tensor([[0]]).to(device)
    for inputs in test_dataloader:
        outputs = model(inputs)
        pred_label = torch.cat([pred_label, outputs], dim=0)
    return pred_label

In [None]:
pred = test_model(trained_model, test_data)
submission = pd.concat([test.Id, pd.DataFrame(pred.detach().cpu().numpy()[1:]*100, columns=['Pawpularity'])], axis=1)

In [None]:
submission.to_csv('submission.csv')