In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [None]:
import os
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models,Sequential
import cv2, numpy as np
import os
import torch
import torchvision
import timm
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [None]:
train_data_csv = '../input/petfinder-pawpularity-score/train.csv'
test_data_csv  = '../input/petfinder-pawpularity-score/test.csv'
train_folder = '../input/petfinder-pawpularity-score/train'
test_folder  = '../input/petfinder-pawpularity-score/test'

In [None]:
features=['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']

In [None]:
import torch
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f'Using device: {device}')

In [None]:
params = {
    'model': 'vit_large_patch32_384',
    'dense_features': features,
    'pretrained': False,
    'inp_channels': 3,
    'im_size': 384,
    'device': device,
    'lr': 1e-5,
    'weight_decay': 1e-6,
    'batch_size': 32,
    'num_workers' : 0,
    'epochs': 10,
    'out_features': 1,
    'dropout': 0.2,
    'mixup': False,
    'mixup_alpha': 1.0,
    'T_0': 5,
    'T_max': 5,
    'T_mult': 1,
    'min_lr': 1e-7,
    'max_lr': 1e-4
}

In [None]:
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import timm
#Metrics
from sklearn.metrics import mean_squared_error

class PetNet(nn.Module):
    def __init__(self, model_name=params['model'], out_features=params['out_features'], inp_channels=params['inp_channels'],
                 pretrained=params['pretrained'], num_dense=len(params['dense_features'])):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, 128)
        self.fc = nn.Sequential(
            nn.Linear(128 + num_dense, 64),
            nn.ReLU(),
            nn.Linear(64, out_features)
        )
        self.dropout = nn.Dropout(params['dropout'])
    
    def forward(self, image, dense):
        embeddings = self.model(image)
        x = self.dropout(embeddings)
        x = torch.cat([x, dense], dim=1)
        output = self.fc(x)
        return output

In [None]:
from torch.utils.data import Dataset, DataLoader
class CuteDataset(Dataset):
    def __init__(self, images_filepaths, dense_features, targets, transform=None):
        self.images_filepaths = images_filepaths
        self.dense_features = dense_features
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        dense = self.dense_features[idx, :]
        label = torch.tensor(self.targets[idx]).float()
        return image, dense, label

In [None]:
def id_to_path(img_id,dir):
    return os.path.join(dir, f'{img_id}.jpg')

In [None]:
def get_image(path):
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)
    image = tf.cast(tf.image.resize_with_pad(image, img_size, img_size), dtype=tf.float32)
    return tf.keras.applications.inception_v3.preprocess_input(image)

In [None]:
def process_dataset(path, label):
    return get_image(path), label

In [None]:
def get_dataset(x, y=None):
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x, y))
        return ds.map(process_dataset, num_parallel_calls=autotune) \
            .batch(batch_size).prefetch(buffer_size=autotune)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=autotune) \
            .batch(batch_size).prefetch(buffer_size=autotune)

In [None]:
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

def get_test_transforms(DIM = params['im_size']):
    return albumentations.Compose(
        [
          albumentations.Resize(DIM,DIM),
          albumentations.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
          ),
          ToTensorV2(p=1.0)
        ]
    )

In [None]:
data_train = pd.read_csv(train_data_csv)
data_test = pd.read_csv(test_data_csv)
data_train['path'] = data_train['Id'].apply(lambda x: id_to_path(x, train_folder))
data_test['path'] = data_test['Id'].apply(lambda x: id_to_path(x, test_folder))

train_subset, valid_subset = train_test_split(
    data_train,
    test_size=0.25, shuffle=True, random_state=5
)

In [None]:
X_test = data_test['path']
X_test_dense = data_test[params['dense_features']]

In [None]:
csv_dir = '../input/petfinder-pawpularity-score'
sample_sub_file_path = os.path.join(csv_dir, 'sample_submission.csv')
sample_df = pd.read_csv(sample_sub_file_path)

In [None]:
test_dataset = CuteDataset(
        images_filepaths = X_test.values,
        dense_features = X_test_dense.values,
        targets = sample_df['Pawpularity'].values,
        transform = get_test_transforms()
    )
test_loader = DataLoader(
        test_dataset, batch_size=params['batch_size'],
        shuffle=False, num_workers=params['num_workers'],
        pin_memory=True
    )

In [None]:
# model = PetNet().eval().cuda()
# model.load_state_dict(torch.load(r"../input/best-epoch/vit_large_patch32_384_7_epoch_f18.711_rmse.pth"))

In [None]:
from tqdm.auto import tqdm
import glob
predictions_nn = None
models_dir = '../input/best-epoch'
for model_name in glob.glob(models_dir + '/*.pth'):
#     print(model_name)
    model = PetNet()
    model.load_state_dict(torch.load(model_name))
    model = model.to(params['device'])
    model.eval()
    temp_preds = None
    with torch.no_grad():
        for (images, dense, target) in tqdm(test_loader, desc=f'Predicting. '):
            images = images.to(params['device'], non_blocking=True)
            dense = dense.to(params['device'], non_blocking=True)
            predictions = torch.sigmoid(model(images, dense)).to('cpu').numpy()*100
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))
                
    data_test[model_name.split('/')[-1].split('_')[-3]] = temp_preds

    if predictions_nn is None:
        predictions_nn = temp_preds
    else:
        predictions_nn += temp_preds
        
predictions_nn /= (len(glob.glob(models_dir + '/*.pth')))

In [None]:
print(predictions_nn)

In [None]:
sub_df = pd.DataFrame()
sub_df['Id'] = data_test['Id']
sub_df['Pawpularity'] = predictions_nn

In [None]:
sub_df.to_csv('submission.csv', index=False)

In [None]:
# y = 'Pawpularity'
# final_outputs = []
# for i,(images, dense) in enumerate(test_loader):
#     images = images.to(params['device'], non_blocking=True)
#     dense = dense.to(params['device'], non_blocking=True)
#     output = model(images, dense)
#     outputs = torch.sigmoid(output).detach().cpu().numpy()*100
#     data_test[y]=outputs
#     print(outputs)

In [None]:
# data_test[['Id', y]].to_csv('submission.csv', index=False)