In [None]:
import sys
import os
import time
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import glob
import cv2
import matplotlib.pyplot as plt
import joblib
import gc
from glob import glob
from PIL import Image
import PIL

sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import cuml

print(np.__version__)
print(pd.__version__)
print(torch.__version__)
print(timm.__version__)

# Install OpenAI CLIP

In [None]:
!pip install ftfy regex
!pip install git+https://github.com/openai/CLIP.git

In [None]:
import clip

# Images PATH

In [None]:
images_path = glob('../input/petfinder-pawpularity-score/train/*.jpg')

len(images_path), images_path[:4]

In [None]:
modelpath = { m.split('/')[-1].split('.')[0] :m for m in glob('../input/pytorch-pretrained-0/*.pt')+glob('../input/pytorch-pretrained-1/*.pt')+glob('../input/pytorch-pretrained-2/*.pt')+glob('../input/pytorch-pretrained-3/*.pt')}
modelpath

# The pretrained models found by the forward model selection algorithm used in this solution are listed above.

In [None]:
%%time
starttime = time.time()

MODELS = {}
PREPROC = {}
for arch in [
    'tf_efficientnet_l2_ns',
    'deit_base_distilled_patch16_384',
    'ig_resnext101_32x48d',
    ]:
    print('Loading:', arch)
    MODELS[arch + '_hflip_384'] = timm.create_model(arch, pretrained=False).to('cuda')
    MODELS[arch + '_hflip_384'].load_state_dict(torch.load(modelpath[arch]))
    MODELS[arch + '_hflip_384'] = MODELS[arch + '_hflip_384'].half()
    MODELS[arch + '_hflip_384'].eval()
    PREPROC[arch + '_hflip_384'] = create_transform(**resolve_data_config({}, model=MODELS[arch + '_hflip_384']))
    
    print( time.time() - starttime, 's' )
    print( )
    _ = gc.collect()
    
print('Done')

In [None]:
%%time
starttime = time.time()

for arch in [
    'tf_efficientnet_l2_ns',
    'ig_resnext101_32x48d',
    'vit_large_r50_s32_384',
    ]:
    print('Loading:', arch)
    MODELS[arch] = timm.create_model(arch, pretrained=False).to('cuda')
    MODELS[arch].load_state_dict(torch.load(modelpath[arch]))
    MODELS[arch] = MODELS[arch].half()
    MODELS[arch].eval()    
    PREPROC[arch] = create_transform(**resolve_data_config({}, model=MODELS[arch ]))
    
    print( time.time() - starttime, 's' )
    print( )
    _ = gc.collect()

print('Done')

In [None]:
for arch in [
    'RN50x4',
    'ViT-B-16',
    'RN50x16',
    'ViT-B-32',
    ]:
    print('Loading:', arch)
    MODELS['clip_' + arch], PREPROC['clip_' + arch] = clip.load("../input/openaiclipweights/clip/CLIP/models/"+arch+".pt")
    MODELS['clip_' + arch] = MODELS['clip_' + arch].float()
    MODELS['clip_' + arch].cuda().eval()
    
    print( time.time() - starttime, 's' )
    print( )
    _ = gc.collect()   

In [None]:
def load_image_and_transform(path='', transform = None, flip = False, resize = 0):
    img = Image.open(path).convert('RGB')

    if resize>0:
        img = img.resize((resize, resize))
    
    if flip==True:
        img = img.transpose(PIL.Image.FLIP_LEFT_RIGHT)
        width, height = img.size
        img = img.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))  
    
    img = transform(img).unsqueeze(0)
    return img.half()

SVRMODEL = joblib.load('../input/petfinder-svr-weight/svr-model-full.joblib')
SVRSCALE = joblib.load('../input/petfinder-svr-weight/svr-scaler.joblib')
gc.collect()

In [None]:
names = [
    'tf_efficientnet_l2_ns_hflip_384',
    'deit_base_distilled_patch16_384_hflip_384',
    'ig_resnext101_32x48d_hflip_384',
    
    'tf_efficientnet_l2_ns',
    'ig_resnext101_32x48d',
    'vit_large_r50_s32_384',
    
    'clip_RN50x4',
    'clip_ViT-B-16',
    'clip_RN50x16',
    'clip_ViT-B-32',
]

In [None]:
%%time

def predict_single_image(file):
    RES = []
    with torch.no_grad():
        for m in names:
            #print('Extracting from:', m)
            if arch == 'tf_efficientnet_l2_ns':
                resize = 512
            else: 
                resize = 0

            if m.find('hflip')>=0:
                image = load_image_and_transform(file, PREPROC[m], True, resize)
            else:
                image = load_image_and_transform(file, PREPROC[m], False, resize)

            if m.find('clip')>=0:
                res = MODELS[m].encode_image(image.to('cuda')).cpu().numpy()
            else:
                res = MODELS[m](image.to('cuda')).cpu().numpy()

            RES.append(res)

    RES = np.concatenate(RES, 1)
    RES = SVRSCALE.transform(RES)
    pawpularity = SVRMODEL.predict(RES)
    
    return pawpularity

predict_single_image(images_path[1])

In [None]:
class PetNet(nn.Module):
    def __init__(
        self,
        model_name = 'none',
        out_features = 1,
        inp_channels = 3,
        pretrained = False,
    ):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=False, in_chans=3, num_classes = 1)
    
    def forward(self, image):
        output = self.model(image)
        return output    


EXP66 = []
modelfiles = glob('../input/petfinder-exp66/*.pth')
for mi, model_path in enumerate(modelfiles[:5]):
    model = PetNet(
        model_name = 'swin_large_patch4_window12_384_in22k',
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to('cuda')
    model = model.half()
    model.eval()
    EXP66.append(model)
    
print(len(EXP66))

In [None]:
class PetNet2(nn.Module):
    def __init__(
        self,
        model_name = 'beit_large_patch16_224',
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    ):
        super().__init__()
        NC = 1000
        self.model = timm.create_model(model_name, pretrained=False)
        self.dropout = nn.Dropout(0.05)
        self.head = nn.Linear(NC, 1)
    
    def forward(self, image):
        output = self.model(image)
        output = self.dropout(output)
        output = self.head(output)
        return output  
    
    
EXP77 = []
modelfiles = glob('../input/petfinder-exp77/*.pth')
for mi, model_path in enumerate(modelfiles[:5]):
    model = PetNet2(
        model_name = 'beit_large_patch16_224',
        out_features = 1,
        inp_channels = 3,
        pretrained = False
    )
    model.load_state_dict(torch.load(model_path))
    model = model.to('cuda')
    model = model.half()
    model.eval()
    EXP77.append(model)
    
print(len(EXP77))    

In [None]:
import albumentations as A

def get_inference_fixed_transforms(dim = 224):
    return A.Compose([
            A.SmallestMaxSize(max_size=dim, p=1.0),
            A.CenterCrop(height=dim, width=dim, p=1.0),
        ], p=1.0)


def load_image(path='', transform = None):
    image = Image.open(path).convert('RGB')
    image = np.array(image)

    if transform is not None:
        image = transform(image = image)["image"]

    image = image / 255
    image = np.transpose(image, (2, 0, 1)).astype(np.float32)
    image = torch.tensor(image, dtype = torch.float)
    image = image.unsqueeze(0) / 255.
    
    return image.half()

In [None]:
def dl_inference(path, models, size=224):
    img = load_image( path, get_inference_fixed_transforms(size) ).to('cuda')
    res = []
    with torch.no_grad():
        for model in models:
            res.append(model(img).sigmoid().cpu().numpy() * 100)
    res = np.mean(res)
    return res



In [None]:
for i in tqdm(range(20)):
    imagepath = images_path[i]
    img = cv2.cvtColor(cv2.imread(imagepath), cv2.COLOR_RGB2BGR)
    plt.imshow(img)

    p0 = predict_single_image(imagepath) # SVR
    p1 = dl_inference(imagepath, EXP66, 384) # Exp66
    p2 = dl_inference(imagepath, EXP77, 224) # Exp77

    pawpularity = 0.63994705 * p0 + 0.15912166 * p1 + 0.22570434 * p2
    plt.title(str(pawpularity))
    plt.show()