In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torchvision import transforms, io, models
from tqdm.notebook import tqdm

In [None]:
DIR = ''

In [None]:
df_train = pd.read_csv(DIR+'/kaggle/input/petfinder-pawpularity-score/train.csv')
df_test = pd.read_csv(DIR+'/kaggle/input/petfinder-pawpularity-score/test.csv')

In [None]:
rsiz = transforms.Resize([128,128])

class train_dataset(torch.utils.data.Dataset):
    def __init__(self, Ids, t_train):
        self.x_train = []
        for f in Ids:
            filename = DIR+'/kaggle/input/petfinder-pawpularity-score/train/' + f + '.jpg'
            img = io.read_image(filename)/255.
            #filename = DIR+'/kaggle/input/petfinder-pawpularity-score/train_pt/' + f + '.pt'
            #img = torch.load(filename)/255.
            self.x_train.append(rsiz(img))
        self.Ids = Ids
        self.t_train = t_train/100.
        self.transform = rsiz
        
    def __len__(self):
        return len(self.x_train)
    
    def __getitem__(self, idx):
        return self.x_train[idx], self.Ids[idx]
    
class test_dataset(torch.utils.data.Dataset):
    def __init__(self, Ids):
        self.x_test = []
        for f in Ids:
            filename = DIR+'/kaggle/input/petfinder-pawpularity-score/test/' + f + '.jpg'
            img = io.read_image(filename)/255.
            self.x_test.append(rsiz(img))
        self.Ids = Ids
        self.transform = rsiz
        
    def __len__(self):
        return len(self.x_test)
    
    def __getitem__(self, idx):
        return self.x_test[idx], self.Ids[idx]
    


In [None]:
Ids_train = df_train['Id'].values
t_train = df_train['Pawpularity'].values
train_data = train_dataset(Ids_train, t_train)
Ids_test = df_test['Id'].values
test_data = test_dataset(Ids_test)

In [None]:
BATCH_SIZE = 16

dataloader_train = torch.utils.data.DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)

dataloader_test = torch.utils.data.DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [None]:
#res= models.resnet152(pretrained=True)
res = torch.load('/kaggle/input/pretrained/resnet152')

res_features = nn.Sequential(
    res.conv1,
    res.bn1,
    res.relu,
    res.maxpool,
    res.layer1,
    res.layer2,
    res.layer3,
    res.layer4,
    res.avgpool
)

In [None]:
#densnet = models.densenet201(pretrained=True)
densnet = torch.load('/kaggle/input/pretrained/densenet201')
dens_features = nn.Sequential(
    densnet.features,
    nn.AvgPool2d(kernel_size=4),
)

In [None]:
#gnet = models.googlenet(pretrained=True)
gnet = torch.load('/kaggle/input/pretrained/googlenet')

gnet_features = nn.Sequential(
    gnet.conv1,
    gnet.maxpool1,
    gnet.conv2,
    gnet.conv3,
    gnet.maxpool2,
    gnet.inception3a,
    gnet.inception3b,
    gnet.maxpool3,
    gnet.inception4a,
    gnet.inception4b,
    gnet.inception4c,
    gnet.inception4d,
    gnet.inception4e,
    gnet.maxpool4,
    gnet.inception5a,
    gnet.inception5b,
    gnet.avgpool
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dens_features.eval()
df_train_f = pd.DataFrame()
df_test_f = pd.DataFrame()
for x, Ids in tqdm(dataloader_train):
    y = dens_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_train_f.empty:
        df_train_f = df
    else:
        df_train_f = pd.concat([df_train_f, df])
        
for x, Ids in dataloader_test:
    y = dens_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_test_f.empty:
        df_test_f = df
    else:
        df_test_f = pd.concat([df_test_f, df])
        
df_train = pd.merge(df_train, df_train_f, on='Id')
df_test = pd.merge(df_test, df_test_f, on='Id')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gnet_features.eval()
df_train_f = pd.DataFrame()
df_test_f = pd.DataFrame()
for x, Ids in tqdm(dataloader_train):
    y = gnet_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_train_f.empty:
        df_train_f = df
    else:
        df_train_f = pd.concat([df_train_f, df])
        
for x, Ids in dataloader_test:
    y = gnet_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_test_f.empty:
        df_test_f = df
    else:
        df_test_f = pd.concat([df_test_f, df])

df_train = pd.merge(df_train, df_train_f, on='Id')
df_test = pd.merge(df_test, df_test_f, on='Id')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
res_features.eval()
df_train_f = pd.DataFrame()
df_test_f = pd.DataFrame()
for x, Ids in tqdm(dataloader_train):
    y = res_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_train_f.empty:
        df_train_f = df
    else:
        df_train_f = pd.concat([df_train_f, df])
        
for x, Ids in dataloader_test:
    y = res_features(x)
    df = pd.DataFrame(Ids, columns=['Id'])
    features = y[:,:,0,0].to('cpu').detach().numpy()
    df_f = pd.DataFrame(features)
    df = pd.concat([df, df_f], axis=1)
    if df_test_f.empty:
        df_test_f = df
    else:
        df_test_f = pd.concat([df_test_f, df])
        
df_train = pd.merge(df_train, df_train_f, on='Id')
df_test = pd.merge(df_test, df_test_f, on='Id')

In [None]:
df_train['TrainFlag'] = True
df_test['TrainFlag'] = False

df_all = df_train.append(df_test)
df_all.index = df_all['Id']
df_all.drop('Id', axis=1, inplace=True)

In [None]:
df_all = pd.get_dummies(df_all, drop_first=True)

In [None]:
df_train = df_all[df_all['TrainFlag']]
df_train = df_train.drop(['TrainFlag'], axis=1)
df_test = df_all[~df_all['TrainFlag']]
df_test = df_test.drop(['TrainFlag'], axis = 1)
df_test = df_test.drop(['Pawpularity'], axis = 1)

In [None]:
y = df_train['Pawpularity'].values
X = df_train.drop(['Pawpularity'], axis=1).values
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=1234)

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_valid, label=y_valid)
dtest = xgb.DMatrix(df_test.values)

In [None]:
params = {
    'objective': 'reg:squarederror', 
    'random_state':1234,
    'eta': 0.1,
    'lambda': 2.5,
    'alpha': 0.0,
    'eval_metric': 'rmse',
}
num_round = 500
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

In [None]:
model = xgb.train(params, dtrain, num_round,
                  early_stopping_rounds=5, evals=watchlist)

In [None]:
prediction =  model.predict(dtest)

In [None]:
sub = pd.DataFrame({'Id': df_test.index, 'Pawpularity': prediction})

In [None]:
sub.to_csv('submission.csv', index=None)