In [1]:
import os

import pandas as pd
import numpy as np
import glob
#import cv2
import matplotlib.pyplot as plt
import joblib
import gc
from glob import glob

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T

from PIL import Image
import PIL

import time
import tqdm
import pickle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error

import cuml
from cuml.svm import SVR as cuml_SVR
from sklearn.preprocessing import StandardScaler

In [2]:
# Set the seed

SEED = 42

import os
# os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

import random 
random.seed(SEED)

import numpy as np
np.random.seed(SEED)

import torch
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

# Install timm, yolov4 and CLIP

In [3]:
!pip install timm --no-index --find-links=file:///kaggle/input/timm-packages/timm/

Looking in links: file:///kaggle/input/timm-packages/timm/
Processing /kaggle/input/timm-packages/timm/timm-0.6.11-py3-none-any.whl
Installing collected packages: timm
Successfully installed timm-0.6.11
[0m

In [4]:
# Move the yolov4 source code to the conda folder
# ! cp -r /kaggle/input/yolov4/yolov4_extracted/yolov4 /opt/conda/lib/python3.7/site-packages

In [5]:
# Install ftfy
!pip install ../input/clip-full-package/CLIP_full_package/ftfy-6.1.1-py3-none-any.whl
# Move the CLIP installer to Output dir for installing (cannot install in the Input dir, which is a read-only file system)
!cp -r ../input/clip-full-package/CLIP_full_package/CLIP-main ./
# Install CLIP
!pip install /kaggle/working/CLIP-main

Processing /kaggle/input/clip-full-package/CLIP_full_package/ftfy-6.1.1-py3-none-any.whl
Installing collected packages: ftfy
Successfully installed ftfy-6.1.1
[0mProcessing ./CLIP-main
  Preparing metadata (setup.py) ... [?25l- done
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l- \ done
[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=11872 sha256=3689cd91bf48f606da1a8c12fef2d930842c565568decbec7d0b007bb70b9804
  Stored in directory: /root/.cache/pip/wheels/d5/9e/a7/ece3970b50674283ec0313940110361256ae1708618c0c5ec9
Successfully built clip
Installing collected packages: clip
Successfully installed clip-1.0
[0m

In [6]:
# Copy "bpe_simple_vocab_16e6.txt.xyz" to the Conda folder
!cp ../input/clip-full-package/CLIP_full_package/CLIP-main/clip/bpe_simple_vocab_16e6.txt.xyz /opt/conda/lib/python3.7/site-packages/clip/.

# Rename "bpe_simple_vocab_16e6.txt.xyz" with "bpe_simple_vocab_16e6.txt.gz"
!mv /opt/conda/lib/python3.7/site-packages/clip/bpe_simple_vocab_16e6.txt.xyz /opt/conda/lib/python3.7/site-packages/clip/bpe_simple_vocab_16e6.txt.gz
!ls /opt/conda/lib/python3.7/site-packages/clip/.

__init__.py  bpe_simple_vocab_16e6.txt.gz  model.py
__pycache__  clip.py			   simple_tokenizer.py


In [7]:
import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

import clip

# Set up yolov4

In [8]:
# import cv2
# from yolov4.tf import YOLOv4

In [9]:
# yolo = YOLOv4()

# yolo.config.parse_names("../input/yolov4/yolov4/yolov4/coco.names")
# yolo.config.parse_cfg("../input/yolov4/yolov4/yolov4/yolov4.cfg")

# yolo.make_model()
# yolo.load_weights("../input/yolov4/yolov4/yolov4/yolov4.weights", weights_type="yolo")

# Image processing

In [10]:
class PetfinderDataSet(Dataset):
    def __init__(self, img_id_lst, transform, base_path='./petfinder-pawpularity-score/test/', new_size = 0):
        self.img_id_lst = img_id_lst.copy()
        self.base_path = base_path
        self.new_size = new_size
        self.transform = transform

    def __len__(self):
        return len(self.img_id_lst)

    def __getitem__(self, idx):
        img = Image.open(self.base_path + self.img_id_lst[idx] + '.jpg').convert('RGB')
        
        resized_img = self.transform(img)
        
        return resized_img

In [11]:
# To be modified 
class PetCropDataset_CenterCrop(Dataset):
    def __init__(self, df, end_trans, base_path='./petfinder-pawpularity-score/test/', new_size = 0):
        self.img_id_lst = df["Id"].tolist().copy()
        self.df = df
        self.base_path = base_path
        self.new_size = new_size
        self.end_trans = end_trans

    def __len__(self):
        return len(self.img_id_lst)

    def __getitem__(self, idx):
        
        img = Image.open(self.base_path + self.img_id_lst[idx] + '.jpg').convert('RGB')
        width, height = img.size

        center_chosen = [self.df.iloc[idx, -4], self.df.iloc[idx, -3]] # the pet's center ratio coordinate
        
        if self.df.iloc[idx, -2] == 0: # failed to detect cat or dog
            cropped_img = T.CenterCrop(size=min(width, height))(img)
        else:
            if width > height:
                (top_left, top_right, bottom_left, bottom_right, center) = T.FiveCrop(size=(height, height))(img)
                margin = (height/2)/width
                if center_chosen[0] < margin: # crop to the left
                    cropped_img = top_left
                elif center_chosen[0] > (1-margin): # crop to the right
                    cropped_img = top_right
                else:
                    left = int( width * (center_chosen[0]-margin) )
                    cropped_img = T.functional.crop(img=img, top=0, left=left, height=height, width=height)
            else:
                (top_left, top_right, bottom_left, bottom_right, center) = T.FiveCrop(size=(width, width))(img)
                margin = (width/2)/height
                if center_chosen[1] < margin: # crop to the top
                    cropped_img = top_left
                elif center_chosen[1] > (1-margin): # crop to the bottom
                    cropped_img = bottom_left
                else:
                    top = int( height * (center_chosen[0]-margin) )
                    cropped_img = T.functional.crop(img=img, top=0, left=0, height=width, width=width)
            
        resized_img = T.Resize(size=self.new_size)(cropped_img)
        resized_img = self.end_trans(resized_img)

        return resized_img

In [12]:
class CLIPDataset(Dataset):
    def __init__(self, img_id_lst, base_path='../input/petfinder-pawpularity-score/test/', preprocess=None):
        
        self.img_id_lst = img_id_lst.copy()
        self.base_path = base_path
        self.preprocess = preprocess
        
    def __len__(self):
        return len(self.img_id_lst)
    
    def __getitem__(self, idx):
        img = Image.open(self.base_path + self.img_id_lst[idx] + '.jpg').convert('RGB')
        img = self.preprocess(img)
        return img

# Get pet coordinates from test data

In [13]:
data_base_path = "../input/petfinder-pawpularity-score"
df_test = pd.read_csv(data_base_path+'/test.csv')

df_test['class'] = -1

df_test['center_x'] = -1.5
df_test['center_y'] = -1.5
df_test['pet_type'] = 0

df_test['fold_idx'] = -1

df_test.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,class,center_x,center_y,pet_type,fold_idx
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,-1,-1.5,-1.5,0,-1
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,-1,-1.5,-1.5,0,-1
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,-1,-1.5,-1.5,0,-1
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,-1,-1.5,-1.5,0,-1
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,-1,-1.5,-1.5,0,-1


In [14]:
df_test.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,class,center_x,center_y,pet_type,fold_idx
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,-1,-1.5,-1.5,0,-1
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,-1,-1.5,-1.5,0,-1
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,-1,-1.5,-1.5,0,-1
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,-1,-1.5,-1.5,0,-1
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,-1,-1.5,-1.5,0,-1


# Get test data features

In [15]:
batch_size = 8
test_data_base_path = '../input/petfinder-pawpularity-score/test/'
test_id_lst = df_test["Id"].tolist()

In [16]:
path = "./testset_features"
if not os.path.exists(path):
    os.makedirs(path)

## Feature extraction using Timm models

In [17]:
# timm_model_lst = ["../input/timm-pretrained-models/beitv2_large_patch16_224.pkl"]
timm_model_lst = ["../input/timm-pretrained-models/tf_efficientnet_l2_ns_475.pkl",
                 "../input/timm-pretrained-models/beitv2_large_patch16_224.pkl"]
#"../input/timm-pretrained-models/swin_base_patch4_window7_224.pkl"
                 # "../input/timm-pretrained-models/vit_large_r50_s32_384.pkl",
                 # "../input/timm-pretrained-models/vit_large_patch16_384.pkl"]
for m in timm_model_lst:
    print("Extracing features using", m)
    pretrained_model = pickle.load( open( m, "rb" ) ).to('cuda')
    trans_config = resolve_data_config({}, model=pretrained_model)
    default_trans = create_transform(**trans_config)
    
    test_dataset = PetfinderDataSet(img_id_lst=test_id_lst, transform=default_trans, base_path=test_data_base_path)
    test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
    
    with torch.no_grad():
        emb = [pretrained_model(data.to('cuda')).cpu().numpy() for data in test_data_loader]
    EMB_full = np.concatenate(emb, 0)
    
    model_name = m.split("/")[-1].split(".")[0]
    pickle.dump( EMB_full, open( "./testset_features/"+model_name+"_features.pkl", "wb" ) )
    
    del pretrained_model, EMB_full
    torch.cuda.empty_cache()
    gc.collect()
    print("Done!\n")

Extracing features using ../input/timm-pretrained-models/tf_efficientnet_l2_ns_475.pkl
Done!

Extracing features using ../input/timm-pretrained-models/beitv2_large_patch16_224.pkl
Done!



## Feature extraction using CLIP models

In [18]:
# CLIP_model_lst = ["../input/clip-full-package/CLIP_full_package/CLIP_models/ViT-B-16.pkl"]
CLIP_model_lst = ["../input/clip-full-package/CLIP_full_package/CLIP_models/RN50x16.pkl",
                 "../input/clip-full-package/CLIP_full_package/CLIP_models/RN50x4.pkl",
                 "../input/clip-full-package/CLIP_full_package/CLIP_models/ViT-B-16.pkl",
                 "../input/clip-full-package/CLIP_full_package/CLIP_models/ViT-B-32.pkl"]
for m in CLIP_model_lst:
    print("Extracing features using", m)
    clip_model, preprocess = pickle.load( open( m, "rb" ) )
    clip_model.to('cuda')
    
    full_dataset = CLIPDataset(img_id_lst=test_id_lst, base_path=test_data_base_path, preprocess=preprocess)
    full_data_loader = DataLoader(full_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
    
    with torch.no_grad():
        emb = [clip_model.encode_image(data.to('cuda')).cpu().numpy() for data in full_data_loader]
    EMB_full = np.concatenate(emb, 0)
    
    model_name = m.split("/")[-1].split(".")[0]
    pickle.dump( EMB_full, open( "./testset_features/"+model_name+"_features.pkl", "wb" ) )
    
    del clip_model, EMB_full
    torch.cuda.empty_cache()
    gc.collect()
    print("Done!\n")

Extracing features using ../input/clip-full-package/CLIP_full_package/CLIP_models/RN50x16.pkl
Done!

Extracing features using ../input/clip-full-package/CLIP_full_package/CLIP_models/RN50x4.pkl
Done!

Extracing features using ../input/clip-full-package/CLIP_full_package/CLIP_models/ViT-B-16.pkl
Done!

Extracing features using ../input/clip-full-package/CLIP_full_package/CLIP_models/ViT-B-32.pkl
Done!



## Combine features

In [19]:
# Fixed setting
models = ["RN50x16",
          "RN50x4",
          "ViT-B-16",
          "ViT-B-32",
          "tf_efficientnet_l2_ns_475"]
model_old_hill_1 = ["RN50x16",
                   "beitv2_large_patch16_224",
                   "beitv2_large_patch16_224"] # * 1.032
model_old_hill_2 = ["ViT-B-16",
                   "beitv2_large_patch16_224"] # * 1.032
models_man_2 = ["RN50x16",
                "RN50x4",
                "ViT-B-16",
                "ViT-B-32",
                "tf_efficientnet_l2_ns_475",
                "beitv2_large_patch16_224"] # loss: 17.296  * 1.030
models_man_5 = ["RN50x16",
          "RN50x4",
          "ViT-B-16",
          "ViT-B-32",
          "beitv2_large_patch16_224",
          "swin_base_patch4_window7_224",
          "tf_efficientnet_l2_ns"] # loss: 17.257  * 1.029
# "beitv2_large_patch16_224"
#           "vit_large_patch16_384",
#           "vit_large_r50_s32_384"
# models = ["beitv2_large_patch16_224",
#           "RN50x16"]
models = models_man_2
EMB_test = None 
for i in range(len(models)):
    if i == 0:
        EMB_test = pickle.load( open( "./testset_features/"+models[0]+"_features.pkl", "rb" ) )
    else:
        EMB_test = np.concatenate((EMB_test, pickle.load( open( "./testset_features/"+models[i]+"_features.pkl", "rb" ) )), axis=1)

# Get prediction

In [20]:
# load svr model
svr_model, scaler = pickle.load(open("../input/cumlsvr-full/cumlSVR_man_2.pkl", 'rb'))
EMB_test = scaler.transform(EMB_test)
# get prediction
test_pred = np.clip(svr_model.predict(EMB_test)*1.03, 1, 70)
# test_pred = svr_model.predict(EMB_test)*1.03

In [21]:
df_test['Pawpularity'] = test_pred
df_test.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,class,center_x,center_y,pet_type,fold_idx,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,-1,-1.5,-1.5,0,-1,41.114029
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,-1,-1.5,-1.5,0,-1,40.735275
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,-1,-1.5,-1.5,0,-1,41.092106
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,-1,-1.5,-1.5,0,-1,40.716255
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,-1,-1.5,-1.5,0,-1,40.699627


In [22]:
df_test[['Id','Pawpularity']].to_csv('submission.csv', index=False)

In [23]:
# top10% Public: 17.82371
# new hill 1 *1.032 clip 80
# old fix 1 *1.034 clip 80
# man 2 *1.030 clip 80
# man 5 *1.029 clip 80
# old hill 1 *1.032 clip 80
# old hill 2 *1.032 clip 80