In [1]:
import sys
import os

sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

import pandas as pd
import numpy as np
import glob
import cv2
import matplotlib.pyplot as plt
import joblib
import gc
from glob import glob

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

import timm
from PIL import Image
import PIL

from tqdm import tqdm
import joblib
import time
from tqdm.notebook import tqdm
import joblib
from sklearn.model_selection import StratifiedKFold

import cuml

print(np.__version__)
print(pd.__version__)
print(torch.__version__)
print(timm.__version__)

# Install OpenAI CLIP

In [1]:
!pip install ../input/openaiclipweights/python-ftfy-master/python-ftfy-master
!pip install ../input/openaiclipweights/clip/CLIP
!cp ../input/openaiclipweights/CLIP-main/CLIP-main/clip/bpe_simple_vocab_16e6.txt /opt/conda/lib/python3.7/site-packages/clip/.
!gzip -k /opt/conda/lib/python3.7/site-packages/clip/bpe_simple_vocab_16e6.txt

import clip

# Load Train and Test

In [1]:
train = pd.read_csv('../input/petfinderdata/train-folds-1.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
sub = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

train['path'] = train['Id'].map(lambda x: '../input/petfinder-pawpularity-score/train/'+x+'.jpg')
test['path'] = test['Id'].map(lambda x: '../input/petfinder-pawpularity-score/test/'+x+'.jpg')

# If its Public LB run, then augment Testset to chack batch size memory consumption.
if test.shape[0]<10:
    test = pd.concat([
        test, test, test, test, test, 
    ])
    test = test.reset_index(drop=True)

print(train.shape, test.shape, sub.shape)

# Just create K Folds and check target consistency accross folds

In [1]:
train['bins'] = (train['Pawpularity']//5).round()

train['fold0'] = -1
skf = StratifiedKFold(n_splits = 20, shuffle=True, random_state = 1)
for i, (_, test_index) in enumerate(skf.split(train.index, train['bins'])):
    train.iloc[test_index, -1] = i

train['fold0'] = train['fold0'].astype('int')
gc.collect()

train.groupby(['fold0'])['Pawpularity'].agg(['mean','std','count'])

In [1]:
train.head()

In [1]:
test.head()

# Lest check all models available in timm library

In [1]:
avail_pretrained_models = timm.list_models(pretrained=True)
len(avail_pretrained_models), avail_pretrained_models

# As we can see there are 575 pretrained model architectures available in timm library.
# The first part of the solution is basically extract the features from the last layer of that models and run a SVR on that extracted features.
# Most of the models in timm are trained using 1000 classes in imagenet, so output shape is 1000 for each model.
# Extracting features from all 575 models is something crazy and unthinkable, specially taking into account submission time of 9h. So the idea is to find a subset of models (from that 575) that performs well in terms of RMSE.
# To do that it was used a forward models selection algorithm, following by RMSE hill climbing logic. Starting with one model, then keep adding models until it stop increasing RMSE performance.


# Now lets start to extract imagenet pretrained models features

# The pretrained models found by the forward model selection algorithm used in this solution are listed above.

In [1]:
names = [
    'deit_base_distilled_patch16_384',
    'fbnetc_100',
    'ig_resnext101_32x8d',
    'ig_resnext101_32x48d',
    'repvgg_b0',
    'resnetv2_152x4_bitm',
    'rexnet_200',
    'resnest269e',
    'swsl_resnext101_32x8d',
    'tf_efficientnet_b6_ns',
    'tf_efficientnet_b7_ns',
    'tf_efficientnet_b8_ap',
    'tf_efficientnet_l2_ns_475',
    'vit_base_patch16_384',
    'vit_large_patch16_384',
    'vit_large_r50_s32_384',
]

names_hflip_crop = [
    'tf_efficientnet_l2_ns_hflip_384',
    'deit_base_distilled_patch16_384_hflip_384',
    'ig_resnext101_32x48d_hflip_384',
    'tf_efficientnet_l2_ns_512',
]

names_orig = [
    'ig_resnext101_32x48d',
    'vit_large_r50_s32_384',
    'clip_RN50x4',
    'clip_ViT-B-16',
    'clip_RN50x16',
    'clip_ViT-B-32',
]


# Create a dictionary with the path of all pretrained weights available in Kaggle datasets

In [1]:
modelpath = { m.split('/')[-1].split('.')[0] :m for m in glob('../input/pytorch-pretrained-0/*.pt')+glob('../input/pytorch-pretrained-1/*.pt')+glob('../input/pytorch-pretrained-2/*.pt')+glob('../input/pytorch-pretrained-3/*.pt')}
modelpath

# Now interactively extract the TESTSET features from each imagenet pretrained model and append to a dictionary

In [1]:
class PawpularDataset:
    def __init__(self, images, base_path='../input/petfinder-pawpularity-score/train/', modelcfg=None, aug=0 ):
        
        self.images = images.copy()
        self.base_path = base_path
        self.transform = create_transform(**modelcfg)
        self.aug=aug
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, item):
        img = Image.open(self.base_path + self.images[item] + '.jpg').convert('RGB')
        img = self.transform(img)
        return img


EMB_TEST = {}
for arch in names:
    starttime = time.time()

    model = timm.create_model(arch, pretrained=False).to('cuda')
    model.load_state_dict(torch.load(modelpath[arch]))
    model.eval()

    train_dataset = PawpularDataset(
        images = test.Id.values,
        base_path='../input/petfinder-pawpularity-score/test/',
        modelcfg = resolve_data_config({}, model=model),
        aug = 0,
    )
    BS = 10 if arch in ['tf_efficientnet_l2_ns'] else 16
    train_dataloader = DataLoader(train_dataset, batch_size=BS, num_workers= 2, shuffle=False)
    
    with torch.no_grad():
        res = [model(img.to('cuda')).cpu().numpy() for img in train_dataloader]
    res = np.concatenate(res, 0)
    EMB_TEST[arch] = res
    
    print( arch, ', Done in:', int(time.time() - starttime), 's' )
    
    del model, res
    torch.cuda.empty_cache() # PyTorch thing to clean RAM
    gc.collect()

print(time.time() )    
len(EMB_TEST), EMB_TEST.keys()

# Extract features using Horizontal Flip and small crop

In [1]:
class PawpularDataset_HFLIP:
    def __init__(self, images, base_path='../input/petfinder-pawpularity-score/train/', modelcfg=None, doflip=False ):
        
        self.images = images.copy()
        self.base_path = base_path
        self.transform = modelcfg
        self.doflip=doflip
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, item):
        img = Image.open(self.base_path + self.images[item] + '.jpg').convert('RGB')
        
        if self.doflip==True:
            img = img.transpose(PIL.Image.FLIP_LEFT_RIGHT)
            width, height = img.size
            img = img.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))  
        
        img = self.transform(img)
        return img


for arch in names_hflip_crop:
    starttime = time.time()

    archname = arch.split('_hflip_')[0]
    if arch == 'tf_efficientnet_l2_ns_512':
        archname = 'tf_efficientnet_l2_ns'
    model = timm.create_model(archname, pretrained=False).to('cuda')
    model.load_state_dict(torch.load(modelpath[archname]))
    model.eval()

    # Get model default transforms
    transf = resolve_data_config({}, model=model)
    sz = int(arch.split('_')[-1])
    transf['input_size'] = (3, sz, sz)
    transf['crop_pct'] = 1.0        
    transf = create_transform(**transf)

    doflip = True if arch.split('_')[-2] == 'hflip' else False
    train_dataset = PawpularDataset_HFLIP(
        images = test.Id.values,
        base_path='../input/petfinder-pawpularity-score/test/',
        modelcfg = transf,
        doflip = doflip,
    )

    BS = 10 if archname in ['tf_efficientnet_l2_ns'] else 16
    train_dataloader = DataLoader(train_dataset, batch_size=BS, num_workers= 2, shuffle=False)

    with torch.no_grad():
        res = [model(img.to('cuda')).cpu().numpy() for img in train_dataloader]
    res = np.concatenate(res, 0)
    EMB_TEST[arch] = res

    print( arch, 'imge size:', sz, 'Hflip:', doflip, ',Done in:', int(time.time() - starttime), 's' )

    del model, res
    torch.cuda.empty_cache() # PyTorch thing to clean RAM
    gc.collect()

print(time.time() )    
len(EMB_TEST), EMB_TEST.keys()

# Now extract TESTSET features from CLIP architecture

In [1]:
class CustomDataset:
    def __init__(self, data, base_path='../input/petfinder-pawpularity-score/test/', preprocess=None):
        
        self.data = data.copy()
        self.base_path = base_path
        if 'Pawpularity' not in self.data.columns:
            self.data['Pawpularity'] = 0
        self.preprocess=preprocess
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, item):
        img = Image.open(self.base_path + self.data.Id[item] + '.jpg').convert("RGB")
        img = self.preprocess(img)
        return img


for m in ['RN50', 'RN101', 'RN50x4', 'RN50x16', 'ViT-B-16', 'ViT-B-32']:
    starttime = time.time()
    model, preprocess = clip.load("../input/openaiclipweights/clip/CLIP/models/"+m+".pt")
    model.cuda().eval()
    
    EMB = []
    with torch.no_grad():
        test_dataset = CustomDataset(data = test, base_path='../input/petfinder-pawpularity-score/test/', preprocess=preprocess)
        test_data_loader = DataLoader(test_dataset, batch_size=64,num_workers=2,shuffle=False,pin_memory=True,)
        for batch in test_data_loader:
            image_features = model.encode_image(batch.to('cuda'))
            #image_features /= image_features.norm(dim=-1, keepdim=True)
            logits = image_features.cpu().numpy()
            EMB.append(logits)
    EMB = np.concatenate(EMB, 0)
    EMB = EMB.astype('float32')
    gc.collect()
    
    EMB_TEST['clip_'+m] = EMB
    print( m, ', Done in:', int(time.time() - starttime), 's' )
    
    del model
    torch.cuda.empty_cache() # PyTorch thing to clean RAM
    gc.collect()
    
gc.collect()
print(EMB_TEST.keys())

# Load TRAINSET extracted features (made offline)

In [1]:
EMB_TRAIN = joblib.load('../input/petfinderdata/train-embeddings-direct-1.joblib')
gc.collect()

resclip = joblib.load('../input/openai-clip/train-embeddings-openai-clip-1.joblib')
for m in resclip.keys():
    EMB_TRAIN[m] = resclip[m]
del resclip
gc.collect()

hflipmodels = joblib.load('../input/petfinder-extracted-pretrained-1/extracted-pretrained-1.joblib')
for col in names_hflip_crop:
    EMB_TRAIN[col] = hflipmodels[col]
del hflipmodels
gc.collect()

print( len(EMB_TRAIN) )
print(EMB_TRAIN.keys())

# Check the shape of the features

In [1]:
for m in EMB_TEST.keys():
    print(EMB_TRAIN[m].shape, EMB_TEST[m].shape, m)

In [1]:
names0 = [
    'clip_RN50x16',
    'clip_ViT-B-32',
    'clip_ViT-B-16',
    'clip_RN50x4',
    'deit_base_distilled_patch16_384',
    'ig_resnext101_32x48d',
    'repvgg_b0',
    'resnetv2_152x4_bitm',
    'swsl_resnext101_32x8d',
    'tf_efficientnet_l2_ns_475',
    'vit_base_patch16_384',
    'vit_large_r50_s32_384',
]

names1 = [
    'clip_RN50x16',
    'clip_RN101', 
    'clip_RN50',
    'fbnetc_100',
    'ig_resnext101_32x8d',
    'rexnet_200',
    'resnest269e',
    'tf_efficientnet_b6_ns',
    'tf_efficientnet_b8_ap',
    'tf_efficientnet_b7_ns',
    'vit_large_patch16_384',
]

names2 = [
    'tf_efficientnet_l2_ns_hflip_384',
    'deit_base_distilled_patch16_384_hflip_384',
    'ig_resnext101_32x48d_hflip_384',
    'tf_efficientnet_l2_ns_512',
    'ig_resnext101_32x48d',
    'vit_large_r50_s32_384',
    'clip_RN50x4',
    'clip_ViT-B-16',
    'clip_RN50x16',
    'clip_ViT-B-32',
]

names = np.unique(names0 + names1 + names2)
len(names), names

# Just clean memory of offline trainset features not going to be used here

In [1]:
feats = list(EMB_TRAIN.keys())
for n in feats:
    if n not in names:
        del EMB_TRAIN[n]
        gc.collect()

# Now its time to fit a GPU accelerated SVR using cuml 

In [1]:
from cuml.svm import SVR
from sklearn.preprocessing import StandardScaler

def fit_gpu_svr(TRAIN, TEST, kfoldcol='fold0'):
    
    ypredtrain_ = np.zeros(train.shape[0])
    ypredtest_ = np.zeros(test.shape[0])

    for fold in range(train[kfoldcol].max()+1):
        ind_train = train[kfoldcol] != fold
        ind_valid = train[kfoldcol] == fold

        model = SVR(C=16.0, kernel='rbf', degree=3, max_iter=4000, output_type='numpy')
        model.fit(TRAIN[ind_train], train.Pawpularity[ind_train].clip(1, 85)  )

        ypredtrain_[ind_valid] = np.clip(model.predict(TRAIN[ind_valid]), 1 , 100)
        ypredtest_ += np.clip(model.predict(TEST), 1, 100)

        del model
        gc.collect()

    ypredtest_ /= (train[kfoldcol].max()+1)

    return ypredtrain_, ypredtest_

def rmse(ytrue, ypred):
    return np.sqrt(np.mean((ytrue-ypred)**2))

# First, lets fit one SVR for each architecture independently

In [1]:
for col in names:
    
    TRAIN = EMB_TRAIN[col].copy()
    TEST = EMB_TEST[col].copy()

    scaler = StandardScaler()
    scaler.fit( np.vstack((TRAIN, TEST)) )
    TRAIN = scaler.transform(TRAIN)
    TEST = scaler.transform(TEST)
    
    ypredtrain, ypredtest = fit_gpu_svr(TRAIN, TEST, 'fold0')
    print(rmse(train.Pawpularity,ypredtrain), col)    

# As you can see above, features extracted from individual architectures have SVR RMSE ranging from 17.56 to 18.52.
# But what happens if we stack some architecture features side by side before fitting the SVR?

# Concatenate some features and standardize

# Fit the SVR A using all K Folds.
# I noticed cliping the target in 85 slightly boosts RMSE.

In [1]:
print('Concatenating:', names0)

TRAIN = np.concatenate([EMB_TRAIN[k] for k in names0], 1)
TEST = np.concatenate([EMB_TEST[k] for k in names0], 1)
scaler = StandardScaler()
scaler.fit( np.vstack((TRAIN, TEST)) )
gc.collect()

TRAIN = scaler.transform(TRAIN)
TEST = scaler.transform(TEST)
gc.collect()

# Check the output shape
print(TRAIN.shape, TEST.shape)

ypredtrainA, ypredtestA = fit_gpu_svr(TRAIN, TEST, 'fold0')
print(rmse(train.Pawpularity, ypredtrainA))

In [1]:
del TRAIN, TEST
gc.collect()
torch.cuda.empty_cache() # PyTorch thing

# Also I noticed that using a multiplier of 1.032 boosts both CV and LB. It may be by the fact that SRV optimizes mean squared error and not RMSE.

In [1]:
print('RMSE:', rmse(train.Pawpularity, 1.032*ypredtrainA))

# Now fit a SVR B using a second subset of features. The idea of fitting more subsets is to add diversity in posterior model ensemble and avoid the curse of dimensionality increasing too much the number of features.

In [1]:
print('Concatenating:', names1)

TRAIN = np.concatenate([EMB_TRAIN[k] for k in names1], 1)
TEST = np.concatenate([EMB_TEST[k] for k in names1], 1)
scaler = StandardScaler()
scaler.fit( np.vstack((TRAIN, TEST)) )
gc.collect()

TRAIN = scaler.transform(TRAIN)
TEST = scaler.transform(TEST)
gc.collect()

print( TRAIN.shape, TEST.shape )

ypredtrainB, ypredtestB = fit_gpu_svr(TRAIN, TEST, 'fold0')
print('RMSE:', rmse(train.Pawpularity, ypredtrainB))

In [1]:
del TRAIN, TEST
gc.collect()
torch.cuda.empty_cache() # PyTorch thing

# SVR C

In [1]:
print('Concatenating:', names2)

TRAIN = np.concatenate([EMB_TRAIN[k] for k in names2], 1)
TEST = np.concatenate([EMB_TEST[k] for k in names2], 1)
scaler = StandardScaler()
scaler.fit( np.vstack((TRAIN, TEST)) )
gc.collect()

TRAIN = scaler.transform(TRAIN)
TEST = scaler.transform(TEST)
gc.collect()

print( TRAIN.shape, TEST.shape )

ypredtrainC, ypredtestC = fit_gpu_svr(TRAIN, TEST, 'fold0')
print('RMSE:', rmse(train.Pawpularity, ypredtrainC))

# Free RAM and GPU memory

In [1]:
del TRAIN, TEST
del EMB_TRAIN, EMB_TEST
gc.collect()

torch.cuda.empty_cache() # PyTorch thing to free GPU memory
gc.collect()

# Now run inference using Deep Learning finetuned image models.

In [1]:
from torch.utils.data import Dataset, DataLoader
import albumentations as A

device = torch.device('cuda')
class Config:
    model_name = "swin_large_patch4_window7_224"
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    model_dir = "exp"
    output_dir = model_dir
    img_test_dir = os.path.join(data_dir, "test")
    model_path = "swin_large_patch4_window7_224"
    im_size =  384
    batch_size = 16


class PetDataset(Dataset):
    def __init__(self, image_filepaths, targets, transform=None):
        self.image_filepaths = image_filepaths
        self.targets = targets
        self.transform = transform
    
    def __len__(self):
        return len(self.image_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.image_filepaths[idx]
        with open(image_filepath, 'rb') as f:
            image = Image.open(f)
            image_rgb = image.convert('RGB')
        image = np.array(image_rgb)

        if self.transform is not None:
            image = self.transform(image = image)["image"]
        
        image = image / 255
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = self.targets[idx]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target, dtype = torch.float)
        return image, target    


def get_inference_fixed_transforms(mode=0, dim = 224):
    if mode == 0: # do not original aspects, colors and angles
        return A.Compose([
                A.SmallestMaxSize(max_size=dim, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
            ], p=1.0)
    elif mode == 1:
        return A.Compose([
                A.SmallestMaxSize(max_size=dim+16, p=1.0),
                A.CenterCrop(height=dim, width=dim, p=1.0),
                A.HorizontalFlip(p = 1.0)
            ], p=1.0)


class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False,
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False, in_chans=3, num_classes = 1)
    
    def forward(self, image):
        output = self.model(image)
        return output    


def tta_fn(filepaths, model, ttas=[0, 1]):
    print('Image Size:', Config.im_size)
    model.eval()
    tta_preds = []
    for tta_mode in ttas:#range(Config.tta_times):
        print(f'tta mode:{tta_mode}')
        test_dataset = PetDataset(
          image_filepaths = filepaths,
          targets = np.zeros(len(filepaths)),
          transform = get_inference_fixed_transforms(tta_mode, dim = Config.im_size )
        )
        test_loader = DataLoader(
          test_dataset,
          batch_size = Config.batch_size,
          shuffle = False,
          num_workers = 2,
          pin_memory = True
        )
        #stream = tqdm(test_loader)
        tta_pred = []
        for images, target in test_loader:#enumerate(stream, start = 1):
            images = images.to(device, non_blocking = True).float()
            target = target.to(device, non_blocking = True).float().view(-1, 1)
            with torch.no_grad():
                output = model(images)

            pred = (torch.sigmoid(output).detach().cpu().numpy() * 100).ravel().tolist()
            tta_pred.extend(pred)
        tta_preds.append(np.array(tta_pred))
    
    fold_preds = tta_preds[0]
    for n in range(1, len(tta_preds)):
        fold_preds += tta_preds[n]
    fold_preds /= len(tta_preds)
        
    del test_loader, test_dataset
    gc.collect()
    torch.cuda.empty_cache()
    return fold_preds    

In [1]:
# List all test files
filepaths = test['path'].values.copy()
len(filepaths)

In [1]:
%%time

class Config:
    model_dir = "exp53"
    output_dir = "exp53"
    model_name = "swin_large_patch4_window7_224"
    im_size =  224
    model_path = model_name
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    img_test_dir = os.path.join(data_dir, "test")
    batch_size = 16


test_preds = []
test_preds_model = []
modelfiles = glob('../input/petfinder-'+Config.model_dir+'/*.pth')
for mi, model_path in enumerate(modelfiles):
    print(f'inference: {model_path}')
    test_preds_fold = []
    model = PetNet(
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained=False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model = model.float()
    model.eval()
    test_preds_fold = tta_fn(filepaths, model, [1] )        
    test_preds_model.append(test_preds_fold)
    
oof53 = pd.read_csv('../input/petfinder-'+Config.model_dir+'/oof_tta.csv')
final_predictions53 = np.mean(np.array(test_preds_model), axis=0)
final_predictions53

In [1]:
%%time
class Config:
    model_dir = "exp55"
    output_dir = "exp55"
    model_name = "beit_large_patch16_224"
    im_size =  224
    model_path = model_name
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    img_test_dir = os.path.join(data_dir, "test")
    batch_size = 16


test_preds = []
test_preds_model = []
modelfiles = glob('../input/petfinder-'+Config.model_dir+'/*.pth')
for mi, model_path in enumerate(modelfiles):
    print(f'inference: {model_path}')
    test_preds_fold = []
    model = PetNet(
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model = model.float()
    model.eval()
    test_preds_fold = tta_fn(filepaths, model, [0] )        
    test_preds_model.append(test_preds_fold)


oof55 = pd.read_csv('../input/petfinder-'+Config.model_dir+'/oof_tta.csv')
final_predictions55 = np.mean(np.array(test_preds_model), axis=0)
final_predictions55

In [1]:
%%time
class Config:
    model_dir = "exp66"
    output_dir = "exp66"
    model_name = "swin_large_patch4_window12_384_in22k"
    im_size =  384
    model_path = model_name
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    img_test_dir = os.path.join(data_dir, "test")
    batch_size = 16

    
test_preds = []
test_preds_model = []
modelfiles = glob('../input/petfinder-'+Config.model_dir+'/*.pth')
for mi, model_path in enumerate(modelfiles):
    print(f'inference: {model_path}')
    test_preds_fold = []
    model = PetNet(
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model = model.float()
    model.eval()
    test_preds_fold = tta_fn(filepaths, model, [0] )        
    test_preds_model.append(test_preds_fold)
    
oof66 = pd.read_csv('../input/petfinder-'+Config.model_dir+'/oof_tta.csv')
final_predictions66 = np.mean(np.array(test_preds_model), axis=0)
final_predictions66

In [1]:
%%time
class Config:
    model_dir = "exp77"
    output_dir = "exp77"
    model_name = "beit_large_patch16_224"
    im_size =  224
    model_path = model_name
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    img_test_dir = os.path.join(data_dir, "test")
    batch_size = 16

    
class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    ):
        super().__init__()
        NC = 1000
        self.model = timm.create_model(model_name, pretrained=False)
        self.dropout = nn.Dropout(0.05)
        self.head = nn.Linear(NC, 1)
    
    def forward(self, image):
        output = self.model(image)
        output = self.dropout(output)
        output = self.head(output)
        return output    
    
test_preds = []
test_preds_model = []
modelfiles = glob('../input/petfinder-'+Config.model_dir+'/*.pth')
for mi, model_path in enumerate(modelfiles):
    print(f'inference: {model_path}')
    test_preds_fold = []
    model = PetNet(
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model = model.float()
    model.eval()
    test_preds_fold = tta_fn(filepaths, model, [0] )        
    test_preds_model.append(test_preds_fold)
    
oof77 = pd.read_csv('../input/petfinder-'+Config.model_dir+'/oof_tta.csv')
final_predictions77 = np.mean(np.array(test_preds_model), axis=0)
final_predictions77

In [1]:
%%time
class Config:
    model_dir = "exp82"
    output_dir = "exp82"
    model_name = "tf_efficientnet_b6_ns"
    im_size =  528
    model_path = model_name
    base_dir = "../input/petfinder-pawpularity-score"
    data_dir = base_dir
    img_test_dir = os.path.join(data_dir, "test")
    batch_size = 16


class PetNet(nn.Module):
    def __init__(
        self,
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    ):
        super().__init__()
        NC = 1000
        self.model = timm.create_model(model_name, pretrained=False)
        self.dropout = nn.Dropout(0.15)
        self.head = nn.Linear(NC, out_features)
    
    def forward(self, image):
        output = self.model(image)
        output = self.dropout(output)
        output = self.head(output)
        return output
    
test_preds = []
test_preds_model = []
modelfiles = glob('../input/petfinder-'+Config.model_dir+'/*.pth')
for mi, model_path in enumerate(modelfiles):
    print(f'inference: {model_path}')
    test_preds_fold = []
    model = PetNet(
        model_name = Config.model_path,
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to(device)
    model = model.float()
    model.eval()
    test_preds_fold = tta_fn(filepaths, model, [0] )        
    test_preds_model.append(test_preds_fold)
    
oof82 = pd.read_csv('../input/petfinder-'+Config.model_dir+'/oof_tta.csv')
final_predictions82 = np.mean(np.array(test_preds_model), axis=0)
final_predictions82

# Weighted average image models

In [1]:
oof = oof53.copy()
oof['pred'] = (
    3*oof53['pred'] +
    4*oof55['pred'] +
    3*oof66['pred'] +
    4*oof77['pred'] +
    2*oof82['pred']  
) / (3+4+3+4+2) 

final_train_predictions = train.merge(oof, on='Id', how='left')['pred'].values.copy()

rmse(train.Pawpularity.values, final_train_predictions)

In [1]:
final_test_predictions = (
    3*final_predictions53 +
    4*final_predictions55 +
    3*final_predictions66 +
    4*final_predictions77 +
    2*final_predictions82 
) / (3+4+3+4+2)

# Optimize the overall RMSE using OOF prediction of cuML SVR A, B, C, and Image models ensemble.

In [1]:
from scipy.optimize import minimize

def min_func(K):
    ypredtrain = K[0]*ypredtrainA + K[1]*ypredtrainB + K[2]*ypredtrainC + K[3]*final_train_predictions
    return rmse(train.Pawpularity, ypredtrain)
   
res = minimize(min_func, [1/4]*4, method='Nelder-Mead', tol=1e-6)
K = res.x
res

In [1]:
ypredtrain = K[0]*ypredtrainA + K[1]*ypredtrainB + K[2]*ypredtrainC + K[3]*final_train_predictions

test['Pawpularity'] = K[0]*ypredtestA + K[1]*ypredtestB + K[2]*ypredtestC + K[3]*final_test_predictions

print('Ensemble weights:', K )
print('Final RMSE:' rmse(train.Pawpularity, ypredtrain) )

In [1]:
test.head(8)

In [1]:
test[['Id','Pawpularity']].to_csv('submission.csv', index=False)