In [None]:
import os

import pandas as pd
from transformers import pipeline

folder_path = "public_test"

class_labels = [
    "Hotel exterior, outdoor area, or building facade",
    "Hotel room, living space, or bedroom with furniture",
    "Swimming pool or hotel pool area",
    "Billiard table, pool table, or game room",
    "Bathroom with toilet, shower, sink, or bath amenities",
    "Hotel restaurant, dining room, or eating area",
    "Hotel lobby, reception area, or entrance hall",
    "Beachfront, shoreline, or sandy beach area",
    "Corridors, hallways, or staircases in the hotel",
    "Food dishes, meals on plates, or table settings",
    "Conference room, meeting room, or seminar space",
    "Gym, fitness center, or exercise equipment area",
    "Balcony view, outdoor balcony, or terrace",
    "Terrace, patio, or outdoor courtyard",
    "Spa, sauna, wellness center, or relaxation area",
]

image_classifier = pipeline(
    task="zero-shot-image-classification",
    model="google/siglip-so400m-patch14-384",
    device="cuda",
    batch_size=20,
)

image_files = [
    f"public_test/{f}"
    for f in os.listdir(folder_path)
    if f.endswith(("jpg", "jpeg", "png"))
]
outputs = image_classifier(image_files, candidate_labels=class_labels)

In [None]:
threshold_for_zero_class = 228

new_outputs = [x[0] for x in outputs]
scores = [x["score"] for x in new_outputs]
labels = [x["label"] for x in new_outputs]
df = pd.DataFrame({"images": image_files, "labels": labels, "scores": scores})

df["class_number"] = df["labels"].apply(lambda x: class_labels.index(x)) + 1
# df["class_number"] = df.apply(lambda x: x["class_number"] if x["scores"] > threshold_for_zero_class else 16, axis=1)
df["image_name"] = df["images"].apply(lambda x: x.split("/")[1])
df = df.sort_values(
    by="image_name", key=lambda x: [int(y.split("_")[2].split(".")[0]) for y in x]
)

df[["image_name", "class_number"]].to_csv(
    f"siglip-so400m-patch14-384_{threshold_for_zero_class}_gpt_prompts.csv", index=False
)
df

Unnamed: 0,images,labels,scores,class_number,image_name
199,public_test/public_test_1.jpg,"Spa, sauna, wellness center, or relaxation area",0.238302,15,public_test_1.jpg
281,public_test/public_test_2.jpg,"Hotel room, living space, or bedroom with furn...",0.116635,2,public_test_2.jpg
496,public_test/public_test_3.jpg,"Hotel room, living space, or bedroom with furn...",0.251428,2,public_test_3.jpg
379,public_test/public_test_4.jpg,"Gym, fitness center, or exercise equipment area",0.109092,12,public_test_4.jpg
458,public_test/public_test_5.jpg,"Hotel exterior, outdoor area, or building facade",0.055614,1,public_test_5.jpg
...,...,...,...,...,...
334,public_test/public_test_1120.jpg,"Conference room, meeting room, or seminar space",0.019155,11,public_test_1120.jpg
326,public_test/public_test_1121.jpg,"Gym, fitness center, or exercise equipment area",0.015733,12,public_test_1121.jpg
472,public_test/public_test_1122.jpg,"Hotel room, living space, or bedroom with furn...",0.059990,2,public_test_1122.jpg
394,public_test/public_test_1123.jpg,"Corridors, hallways, or staircases in the hotel",0.000044,9,public_test_1123.jpg


In [None]:
df_1 = pd.read_csv("siglip-so400m-patch14-384_0.0005_gpt_prompts.csv")
df_2 = pd.read_csv("clip_openai_large_0.1_gpt_prompts.csv")

In [None]:
df_2.columns = ["image_name", "clip_class"]

In [None]:
final_df = pd.merge(df_1, df_2, on="image_name")

In [None]:
final_df[final_df["class_number"] != final_df["clip_class"]]

Unnamed: 0,image_name,class_number,clip_class
0,public_test_1.jpg,15,3
4,public_test_5.jpg,1,3
6,public_test_7.jpg,16,2
9,public_test_10.jpg,13,1
16,public_test_17.jpg,13,14
...,...,...,...
1109,public_test_1110.jpg,16,10
1111,public_test_1112.jpg,16,6
1113,public_test_1114.jpg,13,2
1116,public_test_1117.jpg,16,2
