In [5]:
import os 
import torch 
import numpy as np 
import pandas as pd

from math import ceil
from PIL import Image

from mmpfn.models.dino_v2.models.vision_transformer import vit_small

In [6]:
col_features = [
    "Age",
    "Breed1",
    "Breed2",
    "Color1",
    "Color2",
    "Color3",
    "Dewormed",
    "Fee",
    "FurLength",
    "Gender",
    "Health",
    "MaturitySize",
    "PhotoAmt",
    "State",
    "Sterilized",
    "Type",
    "Vaccinated",
    "VideoAmt",
    "Quantity",
]
col_exclude = ["PetID", "RescureID", "Description", "Name"]
col_target = "AdoptionSpeed"

train = pd.read_csv("datasets/petfinder-adoption-prediction/train/train.csv")
datasets_dir = "datasets/petfinder-adoption-prediction"

train["PetID"] = train["PetID"].astype(str)
train_images = [
    f
    for f in os.listdir(os.path.join(datasets_dir, "train_images"))
    if f.endswith(".jpg")
]
train_images = [f for f in train_images if f.split("-")[0] in train["PetID"].values]
train_images_df = pd.DataFrame(
    {
        "PetID": [f.split("-")[0] for f in train_images],
        "ImageNumber": [f.split("-")[1].split(".")[0] for f in train_images],
    }
)
train_images_df = train_images_df[train_images_df["ImageNumber"] == "1"]
train = train.merge(train_images_df, on="PetID", how="left")
train = train[train["ImageNumber"].notna()]
train["ImagePath"] = train["PetID"] + "-1.jpg"

x_images = train["ImagePath"]

image_arrays = []
for filename in x_images:
    if filename.lower().endswith(".jpg"):
        try:
            with Image.open(
                os.path.join(datasets_dir, "train_images", filename)
            ) as img:
                width, height = img.size
                scale_factor = max(1.0, 224 / min(width, height))
                new_width = ceil(width * scale_factor)
                new_height = ceil(height * scale_factor)
                new_width = ceil(new_width / 14) * 14
                new_height = ceil(new_height / 14) * 14
                img = img.resize((new_width, new_height), Image.BILINEAR)
                image_arrays.append(np.array(img))
        except Exception as e:
            print(e)

for i, arr in enumerate(image_arrays):
    if (arr.shape[0] % 14 != 0) or (arr.shape[1] % 14 != 0):
        print(i, arr.shape)

model = vit_small(
    patch_size=14, img_size=518, init_values=1.0, num_register_tokens=0, block_chunks=0
)

model_path = "parameters/dinov2_vits14_pretrain.pth"
model.load_state_dict(torch.load(model_path))
model = model.cuda()

image_features = []
with torch.no_grad():
    for image_array in image_arrays:
        input_image = (
            torch.tensor(np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0))
            .float()
            .to("cuda")
        )
        feat = model(input_image)
        image_features.append(feat.cpu())
        del input_image, feat
torch.cuda.empty_cache()

np.save("train_image_features_small.npy", [t.numpy() for t in image_features])