In [26]:
import numpy as np
import pandas as pd
import os

import albumentations
import cv2
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

import pytorch_lightning as pl
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule

import glob
import matplotlib.pyplot as plt

import clip
from PIL import Image
from joblib import dump, load

In [5]:
SEED = 2023

DATA_DIR = 'data'

TRAIN_IMAGES_DIR = os.path.join(DATA_DIR, 'train')
TEST_IMAGES_DIR = os.path.join(DATA_DIR, 'test')

OUTPUT_DIR = "output"

In [6]:
def create_folds(data: pd.DataFrame, num_splits = 5):
    data["fold"] = -1
    num_bins = int(np.floor(1 + np.log2(len(data)))) # sturge's rule

    data.loc[:, "bins"] = pd.cut(data["Pawpularity"], bins=num_bins, labels=False)
    skf = StratifiedKFold(n_splits=num_splits, shuffle=True, random_state=SEED)

    for fold_index, (train_idx, val_idx) in enumerate(skf.split(X=data, y=data.bins.values)):
        data.loc[val_idx, 'fold'] = fold_index

    data = data.drop('bins', axis=1)

    return data

In [17]:
dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

class PetFinderClipDataset(Dataset):
    def __init__(self, df: pd.DataFrame, dir: str, augmentations: albumentations.Compose):
        self.ids = df["Id"].values
        if "Pawpularity" in df.keys():
            self.targets = df["Pawpularity"].values
        else:
            self.targets = [-1] * len(df)
        self.dense_features = df[dense_features].values

        image_paths = [os.path.join(dir, f"{x}.jpg") for x in df["Id"].values]
        self.image_paths = image_paths

        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image_id = self.ids[item]

        image = Image.open(os.path.join(self.image_paths[item]))
        if self.augmentations is not None:
            image = self.augmentations(image)
        
        features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return image_id, torch.tensor(features, dtype=torch.float), image, torch.tensor(targets, dtype=torch.float)
    
class PetFinderClipDataModule(LightningDataModule):
    def __init__(self, 
                 df_train=None, df_val=None, df_test=None, 
                 train_images_dir=None, val_images_dir=None, test_images_dir=None, 
                 train_augmentations=None, val_augmentations=None, test_augmentations=None, 
                 batch_size=64
                ):
        super().__init__()
        self.df_train = df_train
        self.df_val = df_val
        self.df_test = df_test

        self.train_images_dir = train_images_dir
        self.val_images_dir = val_images_dir
        self.test_images_dir = test_images_dir

        self.train_augmentations = train_augmentations
        self.val_augmentations = val_augmentations
        self.test_augmentations = test_augmentations

        self.batch_size = batch_size

    def train_dataloader(self):
        return DataLoader(PetFinderClipDataset(self.df_train, self.train_images_dir, self.train_augmentations), batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(PetFinderClipDataset(self.df_val, self.val_images_dir, self.val_augmentations), batch_size=self.batch_size, shuffle=False)
    
    def test_dataloader(self):
        return DataLoader(PetFinderClipDataset(self.df_test, self.test_images_dir, self.test_augmentations), batch_size=self.batch_size, shuffle=False) 

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

100%|███████████████████████████████████████| 338M/338M [00:37<00:00, 9.43MiB/s]


In [13]:
df_train = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))

df_train, df_val = train_test_split(df_train, test_size=0.2, random_state=SEED, shuffle=True, stratify=df_train['Pawpularity'])
df_train = df_train.reset_index(drop=True)
df_val = df_val.reset_index(drop=True)

In [19]:
def get_features(dataloader):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for image_ids, features, images, labels in tqdm(dataloader):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [20]:
datamodule = PetFinderClipDataModule(
    df_train=df_train, train_images_dir=TRAIN_IMAGES_DIR, train_augmentations=preprocess,
    df_val=df_val, val_images_dir=TRAIN_IMAGES_DIR, val_augmentations=preprocess,
    batch_size=64
)

In [21]:
train_features, train_labels = get_features(datamodule.train_dataloader())

  0%|          | 0/124 [00:00<?, ?it/s]

100%|██████████| 124/124 [01:36<00:00,  1.29it/s]


In [24]:
train_features.shape, train_labels.shape

((7929, 512), (7929,))

In [25]:
clf = make_pipeline(StandardScaler(), SVR(C=1, epsilon=0.2, kernel='rbf'))
clf.fit(train_features, train_labels)

In [27]:
dump(clf, os.path.join(OUTPUT_DIR, 'svm_clip.joblib'))
clf = load(os.path.join(OUTPUT_DIR, 'svm_clip.joblib'))

In [28]:
val_features, val_labels = get_features(datamodule.val_dataloader())

100%|██████████| 31/31 [00:20<00:00,  1.48it/s]


In [29]:
def get_predictions_with_clip_from_dataframe(df: pd.DataFrame, clf, image_dir: str = TRAIN_IMAGES_DIR, val_features=None, val_labels=None):
    if val_features is None or val_labels is None:
        datamodule = PetFinderClipDataModule(df_val=df, val_image_dir=image_dir, val_augmentations=preprocess)
        val_features, val_labels = get_features(datamodule.val_dataloader())

    val_predictions = clf.predict(val_features)
    df["Pawpularity_prediction"] = val_predictions

    return df

In [30]:
df_val_with_predictions = get_predictions_with_clip_from_dataframe(df_val, clf, val_features=val_features, val_labels=val_labels)

In [31]:
df_val_with_predictions

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,Pawpularity_prediction
0,763c0fbfd75593c2911a33603c28dc45,0,0,0,0,0,0,1,0,0,0,0,0,18,41.059297
1,ba6faa385160bedd384aed93329779fe,0,1,1,0,0,0,1,0,0,0,1,0,52,41.438748
2,dcae448678ce2779659104d1f5970e92,0,1,1,1,0,0,0,0,0,0,0,0,32,24.552653
3,eab6c63493b68e1ab3817a094622be6f,0,0,1,0,0,0,0,1,0,0,1,1,35,36.910476
4,291320168a3fb0d981b4e5ef5c26a307,0,1,1,1,0,0,0,0,0,0,0,0,23,48.283062
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1978,5130a71b585694927eda397d93a41e4d,0,1,1,1,0,0,0,0,0,0,0,0,34,36.100432
1979,d77cbb32318b41770954ed1a73e35dcb,0,1,1,1,0,1,0,0,0,0,0,0,48,32.914243
1980,feefa7d5530149a0538313c9815a90ad,0,1,1,1,0,0,0,0,0,0,0,0,20,29.234506
1981,60e31d807e3743cdd09130e2cf7c0d50,0,0,1,1,0,0,0,0,0,0,0,1,30,38.147585


In [33]:
mean_squared_error(df_val_with_predictions['Pawpularity'], df_val_with_predictions['Pawpularity_prediction'], squared=False)

18.474123624936215

In [38]:
torch.save(model, os.path.join(OUTPUT_DIR, 'model_checkpoints', 'clip.pth'))

# Download Clip and save to zip file

In [35]:
!pip download git+https://github.com/openai/CLIP.git -d ./clip/

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\q\appdata\local\temp\pip-req-build-to9c22xk
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting ftfy (from clip==1.0)
  Using cached ftfy-6.1.1-py3-none-any.whl (53 kB)
Collecting regex (from clip==1.0)
  Using cached regex-2023.10.3-cp311-cp311-win_amd64.whl (269 kB)
Collecting tqdm (from clip==1.0)
  Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)
Collecting torch (from clip==1.0)
  Using cached torch-2.1.0-cp311-cp311-win_amd64.whl (192.3 MB)
Collecting torchvision (from clip==1.0)
  Using cached 

  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\q\AppData\Local\Temp\pip-req-build-to9c22xk'

[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
import os
from zipfile import ZipFile

dirName = "./clip"
zipName = "packages.zip"

# Create a ZipFile Object
with ZipFile(zipName, 'w') as zipObj:
    # Iterate over all the files in directory
    for folderName, subfolders, filenames in os.walk(dirName):
        for filename in filenames:
            if (filename != zipName):
                # create complete filepath of file in directory
                filePath = os.path.join(folderName, filename)
                # Add file to zip
                zipObj.write(filePath)

In [37]:
dir(clip)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'available_models',
 'clip',
 'load',
 'model',
 'simple_tokenizer',
 'tokenize']

In [39]:
preprocess

Compose(
    Resize(size=224, interpolation=bicubic, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    <function _convert_image_to_rgb at 0x000001D660264B80>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)