In [None]:
import os
import shutil

import numpy as np
import pandas as pd
import torch
from transformers import pipeline

torch.cuda.empty_cache()

folder_path = "public_test"

class_labels = [
    "Close-up view of the hotel's front exterior taken from ground level",  # very good
    "Hotel room, living space or bedroom with furniture, double bed or sofa",
    "Swimming pool or hotel pool area",
    # seems okay aFTER REMOVING PING PONG
    "Room with a billiard table, pool table, billiard balls or pool cue",
    "Bathroom with toilet, shower, tap or sink",  # okay
    "Hotel restaurant, dining room, or eating area",
    # very difficult, might also be a bar
    "Hotel lobby, lounge, entrance hall or reception area with a front desk",
    "Beachfront, shoreline, sandy beach area with sun loungers and beach umbrellas",
    "Corridors, doors, hallways, or staircases in the hotel",  # has some errors, but okay
    "Food dishes or meals on plates, up-close food",
    "Conference room, meeting room, or seminar space",
    # seems okay, just need to filter out other stuff
    "Gym, fitness center, exercise equipment area or people exercising",
    "Table and chairs on an outdoor balcony",  # bad
    "Terrace, patio, or outdoor courtyard",
    "Spa, sauna, wellness center, or relaxation area",
]

assert len(class_labels) == 15

# class_labels += [
#     "Ping pong table, tennis court, golf area, basketball court, athletic track", # good
#     "City panorama view, skyscapers or towers", # good
#     "Bicycles", # good
#     "Retail store, clothing store, or fashion boutique with racks, mannequins, or clothing displays", # good
#     "Small logo, emblem, information box on the wall or address sign, certificate", # good
#     "Slot machines room"
#     # "Scenic landscape or panoramic view from a high vantage point, aerial shots",
#     # "Bathroom tubes or hair dryer",
#     # "Vending machine, coffee machine, hotel bar, or mini bar with drinks or refreshments",
#     "Children's playroom",
#     # "Water and mountains, boats, sunsets", # Good
#     # "Kitchen or cooking area",
#     # "Small hotel co-working or seating area with television, sofas or chairs, piano",
#     # "Computer or laptop with a keyboard or mouse on a desk",
#     # "Desk phone on the table with an office chair",
#     # "Close-up pen, pencil, notepad or office supplies on the desk",
#     # "Space with elevators or a single lift",
#     # "Hotel room interior with a front door and a wardrobe",
#     # "A painting or drawing on the wall",
#     # "Office desk with a computers, office chairs",
#     # "Table setting with spoons, cups, plates, napkins and placemat",
# ]
#
# coffee machine


model = "apple/DFN5B-CLIP-ViT-H-14-378"
# model = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
image_classifier = pipeline(
    task="zero-shot-image-classification",
    # model="openai/clip-vit-large-patch14",
    model=model,
    device="cuda",
    batch_size=10,
)

image_files = [
    f"public_test/{f}"
    for f in os.listdir(folder_path)
    if f.endswith(("jpg", "jpeg", "png"))
]
outputs = image_classifier(image_files, candidate_labels=class_labels)

del image_classifier



In [None]:
threshold_for_zero_class = 0.2  # не используется, скор больше, когда нет 16 класса

new_outputs = [x[0] for x in outputs]

mean = []
for x in outputs:
    mean.append(np.mean([y["score"] for y in x]))

median = []
for x in outputs:
    median.append(np.median([y["score"] for y in x]))

scores = [x["score"] for x in new_outputs]
labels = [x["label"] for x in new_outputs]
df = pd.DataFrame(
    {
        "images": image_files,
        "labels": labels,
        "scores": scores,
        "means": mean,
        "medians": median,
    }
)

df["class_number"] = df["labels"].apply(lambda x: class_labels.index(x)) + 1
df["class_number"] = df.apply(
    lambda x: x["class_number"] if x["scores"] > threshold_for_zero_class else 16,
    axis=1,
)

# df["class_number"] = df["class_number"].apply(lambda x: x if x <=15 else 16)

df["image_name"] = df["images"].apply(lambda x: x.split("/")[1])
df = df.sort_values(
    by="image_name", key=lambda x: [int(y.split("_")[2].split(".")[0]) for y in x]
)

df[["image_name", "class_number"]].to_csv(
    f"{model.replace('/', '-')}_{threshold_for_zero_class}_a_lot_of_classes_latest.csv",
    index=False,
)
df

Unnamed: 0,images,labels,scores,means,medians,class_number,image_name
199,public_test/public_test_1.jpg,Swimming pool or hotel pool area,0.307947,0.066667,0.039753,3,public_test_1.jpg
281,public_test/public_test_2.jpg,"Hotel room, living space or bedroom with furni...",0.347617,0.066667,0.038855,2,public_test_2.jpg
496,public_test/public_test_3.jpg,"Hotel room, living space or bedroom with furni...",0.287645,0.066667,0.039548,2,public_test_3.jpg
379,public_test/public_test_4.jpg,"Gym, fitness center, exercise equipment area o...",0.402974,0.066667,0.039854,12,public_test_4.jpg
458,public_test/public_test_5.jpg,Swimming pool or hotel pool area,0.268025,0.066667,0.045751,3,public_test_5.jpg
...,...,...,...,...,...,...,...
334,public_test/public_test_1120.jpg,"Conference room, meeting room, or seminar space",0.367251,0.066667,0.047561,11,public_test_1120.jpg
326,public_test/public_test_1121.jpg,"Gym, fitness center, exercise equipment area o...",0.283822,0.066667,0.053483,12,public_test_1121.jpg
472,public_test/public_test_1122.jpg,"Hotel room, living space or bedroom with furni...",0.224617,0.066667,0.052842,2,public_test_1122.jpg
394,public_test/public_test_1123.jpg,"Corridors, doors, hallways, or staircases in t...",0.165135,0.066667,0.062786,16,public_test_1123.jpg


In [None]:
os.makedirs("eyes_check", exist_ok=True)

# Iterate over the DataFrame and copy the files
for _, (_, row) in enumerate(df.iterrows()):
    src_path = row["images"]
    new_name = row["labels"] + src_path.split("/")[-1]
    dst_path = os.path.join("eyes_check", new_name)

    # Copy the file to the new location with the new name
    shutil.copy(src_path, dst_path)

In [None]:
df

In [None]:
outputs[1008]

In [None]:
df[df["image_name"] == "public_test_501.jpg"]

In [None]:
df[df["class_number"] >= 16].head(40)

In [None]:
df[df["scores"] < 0.15]

In [None]:
df[
    (df["labels"] == "Spa, sauna, wellness center, or relaxation area")
    & (df["scores"] < 0.2)
]

In [None]:
df[["image_name", "labels", "scores"]].to_csv(
    f"{model.replace('/', '-')}_{threshold_for_zero_class}_eyes_check.csv", index=False
)