In [4]:
import os
import torch
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from sklearn.neighbors import NearestNeighbors
import clip 
device = torch.device('cuda')
IMAGE_DIR = './dataset'
model, preprocess = clip.load('ViT-B/32', device = device)

In [None]:
filenames = sorted([f for f in os.listdir(IMAGE_DIR) if f.endswith(('.png', '.jpg', '.jpeg'))])

features = []
for fname in tqdm(filenames):
    image = preprocess(Image.open(os.path.join(IMAGE_DIR, fname)).convert("RGB")).unsqueeze(0).to(device)
    with torch.no_grad():
        feat = model.encode_image(image).cpu().numpy()
        features.append(feat[0])
features = np.stack(features)

features = features / np.linalg.norm(features, axis=1, keepdims=True)

nn = NearestNeighbors(n_neighbors=7, metric='cosine').fit(features)
distances, indices = nn.kneighbors(features)

rows = []
for i, fname in enumerate(filenames):
    neighbors = [filenames[idx] for idx in indices[i] if idx != i][:6]
    rows.append({
        "filename": fname,
        "ranking": " ".join(neighbors)
    })

df = pd.DataFrame(rows)
df.to_csv("submission.csv", index=False)

100%|██████████| 9605/9605 [02:13<00:00, 71.79it/s]
