In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# !pip install timm
!pip install '../input/timm-package/timm-0.4.12-py3-none-any.whl'
# !pip install -qq albumentations==1.0.3
# !pip install -qq grad-cam
# !pip install -qq ttach

In [None]:
# Asthetics
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

# General
from tqdm.auto import tqdm
from collections import defaultdict
import pandas as pd
import numpy as np
import os
import random
import gc
import cv2
import sys
gc.enable()
pd.set_option('display.max_columns', None)


# General
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import os
import glob
import random
import cv2
pd.set_option('display.max_columns', None)


from sklearn.model_selection import KFold,StratifiedKFold


# Visialisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style="whitegrid")

# Image Aug
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

# Deep Learning
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR
import torch
import torchvision
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm


import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

#Metrics
from sklearn.metrics import mean_squared_error

# Random Seed Initialize
RANDOM_SEED = 2021

def seed_everything(seed=RANDOM_SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

# Device Optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f'Using device: {device}')

In [None]:
csv_dir = '../input/petfinder-pawpularity-score'
test_dir = '../input/petfinder-pawpularity-score/test'
test_file = '../input/petfinder-pawpularity-score/test.csv'
sample_sub_file_path = os.path.join(csv_dir, 'sample_submission.csv')
test_df = pd.read_csv(test_file)

sample_df = pd.read_csv(sample_sub_file_path)

In [None]:
def return_filpath(name, folder=None):
    path = os.path.join(folder, f'{name}.jpg')
    return path
test_df['image_path'] = test_df['Id'].apply(lambda x: return_filpath(x, folder=test_dir))

In [None]:
params_test = {
    'model': 'tf_efficientnet_b0_ns',
    'dense_features': ['Subject Focus', 'Eyes', 'Face', 'Near',
                       'Action', 'Accessory', 'Group', 'Collage',
                       'Human', 'Occlusion', 'Info', 'Blur'],
    'pretrained': False,
    'inp_channels': 3,
    'im_size': 256,
    'device': device,
    'batch_size': 32,
    'num_workers' : 2,
    'out_features': 1,
    'debug': False
}

In [None]:
def get_test_transforms(DIM = params_test['im_size']):
    return albumentations.Compose(
        [
          albumentations.Resize(DIM,DIM),
          albumentations.Normalize(
              mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225],
              ),
          ToTensorV2(p=1.0)
        ]
    )


class PawDataset(Dataset):
    def __init__(self, images_filepaths, dense_features, targets, transform=None):
        self.images_filepaths = images_filepaths
        self.dense_features = dense_features
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            image = self.transform(image=image)['image']
        
        dense = self.dense_features[idx, :]
        label = torch.tensor(self.targets[idx]).float()
        return image, dense, label
    
    
class PawPetNet(nn.Module):
    def __init__(self, model_name=params_test['model'], out_features=params_test['out_features'], inp_channels=params_test['inp_channels'],
                 pretrained=params_test['pretrained'], num_dense=len(params_test['dense_features'])):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels)
        if model_name.split('_')[0] == 'efficientnet':
            out_channels = self.model.conv_stem.out_channels
            kernel_size = self.model.conv_stem.kernel_size
            stride = self.model.conv_stem.stride
            padding = self.model.conv_stem.padding
            bias = self.model.conv_stem.bias
            self.model.conv_stem = nn.Conv2d(inp_channels, out_channels,
                                             kernel_size=kernel_size, stride=stride,
                                             padding=padding, bias=bias)
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Identity()
        elif model_name.split('_')[0] == 'nfnet':
            n_features = self.model.head.fc.in_features
            self.model.head.fc = nn.Identity()
        elif model_name in ['resnet18d', 'resnet50d', 'resnet152d',
                            'seresnet50', 'seresnext26d_32x4d', 'seresnext50_32x4d',
                            'resnetblur18', 'resnetblur50']:
            n_features = self.model.fc.in_features
            self.model.fc = nn.Identity()
        else:
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Identity()

        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(n_features + num_dense, out_features)
    
    def forward(self, image, dense):
        embeddings = self.model(image)
        x = self.dropout(embeddings)
        x = torch.cat([x, dense], dim=1)
        output = self.fc(x)
        return output

In [None]:
models_dir = '../input/pawpularity-score-starter'
print(f'Models path: {models_dir}')

In [None]:

        
predicted_labels = None
for model_name in glob.glob(models_dir + '/*.pth'):
    print(model_name)
    model = PawPetNet()
    model.load_state_dict(torch.load(model_name))
    model = model.to(params_test['device'])
#     print(model.eval())

    test_dataset = PawDataset(
        images_filepaths = test_df['image_path'].values,
        dense_features = test_df[params_test['dense_features']].values,
        targets = sample_df['Pawpularity'].values,
        transform = get_test_transforms()
    )
    test_loader = DataLoader(
        test_dataset, batch_size=params_test['batch_size'],
        shuffle=False, num_workers=params_test['num_workers'],
        pin_memory=True
    )

    temp_preds = None
    with torch.no_grad():
        for (images, dense, target) in tqdm(test_loader, desc=f'Predicting. '):
            images = images.to(params_test['device'], non_blocking=True)
            dense = dense.to(params_test['device'], non_blocking=True)
            predictions = model(images, dense).to('cpu').numpy()
            
            if temp_preds is None:
                temp_preds = predictions
            else:
                temp_preds = np.vstack((temp_preds, predictions))
#     print(temp_preds)
    if predicted_labels is None:
        predicted_labels = temp_preds
    else:
        predicted_labels += temp_preds
        
        
predicted_labels /= (len(glob.glob(models_dir + '/*.pth')))

In [None]:
sub_df = pd.DataFrame()
sub_df['Id'] = test_df['Id']
sub_df['Pawpularity'] = predicted_labels

In [None]:
sub_df['Pawpularity'].describe()

In [None]:
sub_df.to_csv('submission.csv', index=False)