In [1]:
import os
from glob import glob

import cv2
import numpy as np
import polars as pl
import timm
import torch
from tqdm import tqdm

In [2]:
pathes = glob(os.path.join("/kaggle/input/atmaCup#18_dataset", "images", "*", "image_t.png"))
len(pathes)

45098

In [3]:
model = timm.create_model("resnet18d", pretrained=True, num_classes=-1).eval()
model = model.to("cuda")

NVIDIA GeForce RTX 4090 with CUDA capability sm_89 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_60 sm_70 sm_75 compute_70 compute_75.
If you want to use the NVIDIA GeForce RTX 4090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, pathes):
        self.pathes = pathes

    def __len__(self):
        return len(self.pathes)

    def __getitem__(self, idx):
        path = self.pathes[idx]
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))
        img = img / 255.0
        img = img.transpose(2, 0, 1)
        img = torch.tensor(img, dtype=torch.float32)
        return img

In [5]:
dataloder = torch.utils.data.DataLoader(
    CustomDataset(pathes),
    batch_size=64,
    shuffle=False,
)

features = []
with torch.no_grad():
    for imgs in tqdm(dataloder):
        imgs = imgs.to("cuda")
        features.append(model(imgs).detach().cpu().numpy())


100%|██████████| 705/705 [00:43<00:00, 16.09it/s]


In [6]:
features = np.concatenate(features)

In [7]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=32, random_state=0)
features = svd.fit_transform(features)
features.shape

(45098, 32)

In [8]:
df = pl.DataFrame(features, schema=[f"img_emb_{i}" for i in range(32)])
ids = [path.split("/")[-2] for path in pathes]
df = df.with_columns(pl.Series("ID", ids))
df.head()

img_emb_0,img_emb_1,img_emb_2,img_emb_3,img_emb_4,img_emb_5,img_emb_6,img_emb_7,img_emb_8,img_emb_9,img_emb_10,img_emb_11,img_emb_12,img_emb_13,img_emb_14,img_emb_15,img_emb_16,img_emb_17,img_emb_18,img_emb_19,img_emb_20,img_emb_21,img_emb_22,img_emb_23,img_emb_24,img_emb_25,img_emb_26,img_emb_27,img_emb_28,img_emb_29,img_emb_30,img_emb_31,ID
f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,str
6.421825,0.945972,-0.026925,1.059455,1.082559,-1.088256,-1.550975,-0.100807,-0.132713,-0.383449,0.102457,1.369587,-0.371239,-0.137833,-0.191333,-0.107892,-0.217118,0.671512,0.218856,-0.088977,0.061951,0.365553,0.336992,0.311474,0.405945,-0.091794,-0.050762,0.325328,-0.115107,0.140964,0.032567,0.25402,"""df13d930da7d18b027295cb7acca9f…"
7.042717,-1.296742,1.216432,2.543397,-0.2359,-0.42828,0.017613,1.593839,-0.877097,0.352018,-1.033763,-0.769155,-0.487338,-1.313721,-0.641589,-0.33523,0.034287,-0.053777,0.05448,-0.59476,0.106501,0.493268,-0.093137,-0.561625,0.175695,-0.401975,0.113649,0.074851,0.242077,-0.154483,0.079947,-0.107651,"""4f82b8c1252927a89a89f862d61471…"
7.550829,1.405917,-0.725527,0.817095,0.378958,0.642951,-1.211553,-0.050206,1.51725,0.47686,0.299116,-0.783766,0.589219,-0.533846,0.42364,-0.574574,-0.327105,0.014507,0.36235,0.459348,0.655586,-0.697103,0.083823,-0.548894,-0.123053,-0.412186,0.974848,0.067295,0.254798,-0.508267,0.032507,-0.155088,"""db2711eb0a03d53978b4e788f64c78…"
8.582917,-1.604584,-0.966825,0.785558,1.564791,-0.616406,-0.51183,-0.927535,-0.31958,0.676131,-0.349427,-0.725266,1.338446,-0.01426,0.32007,0.344096,-0.4406,-0.308975,0.590363,1.060933,-0.240136,-0.139033,0.342027,-0.001298,0.376357,-0.8234,-0.021138,-0.994519,-0.225066,0.523157,0.234012,-0.368881,"""aaee0f37200b7b87496fe7b86b05ed…"
7.533806,1.712595,-0.549584,0.089194,-0.175475,-0.189765,0.151349,-0.436834,0.531711,-0.855186,0.019512,-0.834142,0.136501,0.008242,-0.174233,-0.430688,-0.309291,0.495387,-0.078007,-0.431799,-0.043789,-0.136899,-0.681444,-0.171332,0.161765,0.216772,-0.42054,0.287003,-0.32044,-0.177328,-0.434106,0.479419,"""f71c2b3852d0f381b4122e3d5ece59…"


In [9]:
df.write_csv("image_emb_svd32.csv")