In [None]:
import torch
import torch.nn as nn
import pandas as pd

train_df = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/train.csv')
test_df = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/test.csv')

In [None]:
import torch.nn.functional as F
import torchvision.models as models
class Pawpularity_Net(nn.Module):
    def __init__(self):
        super(Pawpularity_Net , self).__init__()
        mobile_net = models.mobilenet_v2()
        self.features = mobile_net.features
        self.layer_1 = nn.Linear(12,100)
        self.layer_2 = nn.Linear(100,100)
        self.layer_3 = nn.Linear(62820 , 500)
        self.last_layer = nn.Linear(500, 1)
    
    def forward(self,x1,x2):
        x1 = self.features(x1)
        x1 = x1.view(x1.shape[0],-1)
        x2 = F.relu(self.layer_1(x2))
        x2 = F.relu(self.layer_2(x2))
        x = torch.cat([x1,x2],dim = 1)
        x = F.relu(self.layer_3(x))
        x = self.last_layer(x)
        return x

In [None]:
from torch.utils.data import DataLoader, Dataset
from PIL import Image 
from torchvision import transforms

class CustomDataSet(Dataset):
    def __init__(self,df,is_test=False):
        self.img_list = df['Id'].to_list()
        self.table_list = df[df.columns[1:13]]
        self.label_list = df[df.columns[-1]].to_list()
        self.preprocess = transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
            ])
        self.is_test = is_test
    def __getitem__(self,idx):
        if self.is_test is not True:
            path = "/kaggle/input/petfinder-pawpularity-score/train/"+self.img_list[idx]+'.jpg'
        else:
            path = "/kaggle/input/petfinder-pawpularity-score/test/"+self.img_list[idx]+'.jpg'
        img = Image.open(path).convert('RGB').resize((224,224))
        img = self.preprocess(img)
        tbl = torch.tensor(list(self.table_list.iloc[idx])).to(torch.float32)
        label = torch.tensor(self.label_list[idx]).to(torch.float32)
        
        if self.is_test is not True:
            return img,tbl,label
        else:
            return img,tbl
    def __len__(self):
        return len(self.img_list)

In [None]:
df = train_df
table_list = df[df.columns[1:len(df.columns)-1]]
print(len(df.columns))
len(list(table_list.iloc[0]))

In [None]:
FOLD_NUM = 5

In [None]:
from sklearn.model_selection import KFold
import torch.optim as opt
import numpy as np


kf = KFold(n_splits=FOLD_NUM, random_state=None, shuffle=False)

BATCH_SIZE = 64
EPOCH = 5
DEVICE = 'cuda'

criterion = nn.MSELoss()

for k,(train_index, test_index) in enumerate(kf.split(train_df['Id'])):
    train_dataset = CustomDataSet(train_df.loc[train_index])
    val_dataset = CustomDataSet(train_df.loc[test_index])
    train_loader = DataLoader(train_dataset,batch_size = BATCH_SIZE,shuffle = True)
    val_loader = DataLoader(val_dataset,batch_size = BATCH_SIZE,shuffle = False)
    model = Pawpularity_Net().to(DEVICE)
    optimizer = opt.Adam(params = model.parameters(),lr = 0.0001)
    best_loss = 99999
    for e in range(EPOCH):
        train_total_loss = 0
        for n,(img,tbl,label) in enumerate(train_loader):
            img = img.to(DEVICE)
            tbl = tbl.to(DEVICE)
            label = label.to(DEVICE)
            
            optimizer.zero_grad()
            output = model(img,tbl)
            
            loss = criterion(output , label)
            loss.backward()
            optimizer.step()
            train_total_loss = (train_total_loss * n + loss.item())/(n+1)
            
            print('\rTRAIN EPOCH[{:03}/{:03}] ITR[{:04}/{:04}] LOSS:{:.5} SCORE:{:.5}'.format(e+1,EPOCH,n+1,len(train_loader),train_total_loss,np.sqrt(train_total_loss)),end = "")
        print()
        val_total_loss = 0
        with torch.no_grad():
            for n,(img,tbl,label) in enumerate(val_loader):
                img = img.to(DEVICE)
                tbl = tbl.to(DEVICE)
                label = label.to(DEVICE)
            
                output = model(img,tbl)

                loss = criterion(output , label)

                val_total_loss = (val_total_loss * n + loss.item())/(n+1)

                print('\rVAL   EPOCH[{:03}/{:03}] ITR[{:04}/{:04}] LOSS:{:.5} SCORE:{:.5}'.format(e+1,EPOCH,n+1,len(val_loader),val_total_loss,np.sqrt(val_total_loss)),end = "")
        print()
        if(best_loss > val_total_loss):
            best_loss = val_total_loss
            model_path = f'{k+1}-fold.pth'
            torch.save(model.state_dict(), model_path)

In [None]:
test_dataset = CustomDataSet(test_df,is_test = True)
test_loader = DataLoader(test_dataset,batch_size = 2,shuffle = False)
with torch.no_grad():
    fold_output = [[] for x in range(FOLD_NUM)]
    for k in range(FOLD_NUM):
        model = Pawpularity_Net().to(DEVICE)
        model_path = f'{k+1}-fold.pth'
        model.load_state_dict(torch.load(model_path))
        for n,(img,tbl) in enumerate(test_loader):
            img = img.to(DEVICE)
            tbl = tbl.to(DEVICE)
            output = model(img,tbl).cpu()[:,0]
            fold_output[k].append(output)
            print('\r TEST FOLD[{:02}/{:02}]  ITR[{:03}/{:03}]'.format(k+1,FOLD_NUM,n+1,len(test_loader)),end = "")

In [None]:
for k in range(len(fold_output)):
    fold_output[k] = torch.cat(fold_output[k],dim = 0).numpy()

In [None]:
list(np.mean(np.array(fold_output),axis = 0))

In [None]:
df_result = pd.DataFrame({'Pawpularity':list(np.mean(np.array(fold_output),axis = 0))})

In [None]:
df_result

In [None]:
df_sub = pd.concat([test_df['Id'] , df_result['Pawpularity']],axis=1)

In [None]:
df_sub.to_csv('submission.csv', index=False)