In [None]:
package_path = '../input/timmlatest'
import sys; sys.path.append(package_path)

In [None]:
import os
import gc
import cv2
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
from tqdm import tqdm

import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
ROOT_DIR = "../input/petfinder-pawpularity-score"
TRAIN_DIR = "../input/petfinder-pawpularity-score/train"
TEST_DIR = "../input/petfinder-pawpularity-score/test"

In [None]:
CONFIG = dict(
    seed = 42,
    model_name = 'tf_efficientnet_b4_ns',
    test_batch_size = 32,
    img_size = 512,
    num_classes = 1,
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
)

In [None]:
MODEL_PATHS = [
    '../input/petfinder-public/RMSE18.7226_epoch4.bin',
    '../input/petfinder-public/RMSE18.9180_epoch3.bin',
    '../input/petfinder-public/RMSE18.3704_epoch5.bin',
    '../input/petfinder-public/RMSE18.7945_epoch3.bin',
    '../input/petfinder-public/RMSE18.8625_epoch4.bin'
]

In [None]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [None]:
def get_test_file_path(id):
    return f"{TEST_DIR}/{id}.jpg"

In [None]:
df = pd.read_csv(f"{ROOT_DIR}/test.csv")
df['file_path'] = df['Id'].apply(get_test_file_path)

In [None]:
feature_cols = [col for col in df.columns if col not in ['Id', 'Pawpularity', 'file_path']]

In [None]:
class PawpularityTestDataset(Dataset):
    def __init__(self, root_dir, df, transforms=None):
        self.root_dir = root_dir
        self.df = df
        self.file_names = df['file_path'].values
        self.meta = df[feature_cols].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        meta = self.meta[index, :]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return img, meta

In [None]:
test_transforms = A.Compose([
    A.Resize(CONFIG['img_size'], CONFIG['img_size']),
    A.Normalize(
            mean=[0.485, 0.456, 0.406], 
            std=[0.229, 0.224, 0.225], 
            max_pixel_value=255.0, 
            p=1.0
        ),
    ToTensorV2()], p=1.)

In [None]:
class PawpularityModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super(PawpularityModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.reset_classifier(0)
        self.fc = nn.Linear(self.n_features + 12, CONFIG['num_classes'])
        self.dropout = nn.Dropout(p=0.3)

    def forward(self, images, meta):
        features = self.model(images)                 # features = (bs, embedding_size)
        features = self.dropout(features)
        features = torch.cat([features, meta], dim=1) # features = (bs, embedding_size + 12)
        output = self.fc(features)                    # outputs  = (bs, num_classes)
        return output

In [None]:
@torch.no_grad()
def valid_fn(model, dataloader, device):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, (images, meta) in bar:        
        images = images.to(device, dtype=torch.float)
        meta = meta.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        dataset_size += batch_size
        
        outputs = model(images, meta)
        PREDS.append(outputs.view(-1).cpu().detach().numpy()) 
    
    PREDS = np.concatenate(PREDS)
    gc.collect()
    
    return PREDS

In [None]:
def inference(model_paths, dataloader, device):
    final_preds = []
    for i, path in enumerate(model_paths):
        model = PawpularityModel(CONFIG['model_name'], pretrained=False)
        model.to(CONFIG['device'])
        model.load_state_dict(torch.load(path))
        
        print(f"Getting predictions for model {i+1}")
        preds = valid_fn(model, dataloader, device)
        final_preds.append(preds)
    
    final_preds = np.array(final_preds)
    final_preds = np.mean(final_preds, axis=0)
    return final_preds

In [None]:
test_dataset = PawpularityTestDataset(TEST_DIR, df, test_transforms)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['test_batch_size'], 
                         num_workers=4, shuffle=False, pin_memory=True)

In [None]:
preds = inference(MODEL_PATHS, test_loader, CONFIG['device'])

In [None]:
sample = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
sample['Pawpularity'] = preds

In [None]:
sample.head()

In [None]:
sample.to_csv('submission.csv', index=False)