In [1]:
from PIL import Image
import requests
from transformers import AutoProcessor, AutoModel
import torch

model = AutoModel.from_pretrained("google/siglip-so400m-patch14-384")
processor = AutoProcessor.from_pretrained("google/siglip-so400m-patch14-384")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

texts = ["a photo of 2 cats", "a photo of 2 dogs"]
inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

logits_per_image = outputs.logits_per_image
probs = torch.sigmoid(logits_per_image) # these are the probabilities
print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")

40.5% that image 0 is 'a photo of 2 cats'


In [2]:
class_names= ['Buildings', 'Forests', 'Glacier', 'Mountains', 'Sea', 'Street']

labels_list = []

for class_name in class_names:
    labels_list.extend([
        f"A photo of a {class_name}",
        f"An image depicting a {class_name}",
        f"A scenic view of {class_name}",
        f"A {class_name} landscape",
        f"A snapshot of a {class_name}",
        f"A beautiful {class_name} scene",
        f"An artistic representation of {class_name}",
        f"A {class_name} captured in nature",
        f"A {class_name} during sunset",
        f"A serene {class_name} environment"
    ])
    
    labels_list.extend([
    f"A breathtaking view of {class_name}",
    f"A majestic {class_name} landscape",
    f"A vibrant {class_name} scene",
    f"A {class_name} in the early morning light",
    f"A dramatic depiction of {class_name}",
    f"A {class_name} immersed in fog",
    f"A serene and peaceful {class_name}",
    f"A rugged {class_name} terrain",
    f"A {class_name} teeming with life",
    f"A sun-drenched {class_name}"
    ])
    
    labels_list.extend([
    f"What a beautiful {class_name}!",
    f"This is a {class_name} scene.",
    f"Imagine a {class_name} like this.",
    f"Picture a serene {class_name}.",
    f"The essence of {class_name} is captured here.",
    f"Nature's {class_name} at its best.",
    f"A true depiction of {class_name}.",
    f"The tranquility of {class_name}.",
    f"The rugged beauty of {class_name}.",
    f"The timelessness of {class_name}."
    ])
    
    labels_list.extend([
    f"A {class_name} at sunrise",
    f"A rainy day in the {class_name}",
    f"A snowy {class_name} landscape",
    f"A bright sunny day over the {class_name}",
    f"A {class_name} covered in mist",
    f"An overcast {class_name} scene",
    f"A {class_name} during golden hour",
    f"A {class_name} under a clear blue sky",
    f"A windy day in the {class_name}",
    f"A {class_name} under a starry night"
    ])
    
    labels_list.extend([
    f"People hiking in the {class_name}",
    f"Birds flying over the {class_name}",
    f"Children playing near the {class_name}",
    f"A festival happening in the {class_name}",
    f"Boats sailing on the {class_name}",
    f"A picnic by the {class_name}",
    f"Wildlife roaming in the {class_name}",
    f"A campsite in the {class_name}",
    f"A storm approaching the {class_name}",
    f"A serene sunset at the {class_name}"
    ])
    
    labels_list.extend([
    f"A bird's-eye view of the {class_name}",
    f"A close-up shot of {class_name}",
    f"A panoramic view of {class_name}",
    f"A view from the top of a {class_name}",
    f"A distant shot of the {class_name}",
    f"A view of {class_name} through the trees",
    f"A side view of {class_name}",
    f"A low-angle shot of the {class_name}",
    f"A view from within the {class_name}",
    f"A wide-angle shot capturing the {class_name}"
    ])
    
    labels_list.extend([
    f"The solitude of the {class_name}",
    f"The awe-inspiring {class_name}",
    f"A melancholic {class_name} scene",
    f"A {class_name} filled with hope",
    f"The beauty of simplicity in the {class_name}",
    f"The grandeur of {class_name}",
    f"A nostalgic view of {class_name}",
    f"The peaceful calm of {class_name}",
    f"A {class_name} that inspires wonder",
    f"A scene that stirs the soul: {class_name}"
    ])
    
    labels_list.extend([
    f"A {class_name} bathed in golden sunlight",
    f"The intricate details of a {class_name}",
    f"Shadow patterns on a {class_name}",
    f"The vibrant colors of a {class_name}",
    f"A {class_name} contrasted against a bright sky",
    f"A {class_name} in soft morning light",
    f"Textures and layers of a {class_name}",
    f"A {class_name} with reflections in the water",
    f"Light filtering through a {class_name}",
    f"Rich shadows falling over the {class_name}"
    ])
    
    labels_list.extend([
    f"A {class_name} covered in fresh snow",
    f"A {class_name} during a spring bloom",
    f"Golden autumn leaves around a {class_name}",
    f"A {class_name} at dusk, under a purple sky",
    f"A {class_name} in the crisp air of winter",
    f"A {class_name} at dawn with dewdrops",
    f"A {class_name} under the summer sun",
    f"A {class_name} in the cool shade of evening",
    f"A {class_name} blanketed in fog on a winter morning",
    f"A {class_name} glowing under the full moon"
    ])
    
    labels_list.extend([
    f"The quiet majesty of a {class_name}",
    f"A {class_name} that whispers tales of old",
    f"The eternal beauty of the {class_name}",
    f"A {class_name} that sings the songs of the earth",
    f"A {class_name} veiled in the mist of time",
    f"The {class_name} where dreams meet reality",
    f"A {class_name} that holds the secrets of nature",
    f"A {class_name} painted by the hands of the universe",
    f"The {class_name}, a canvas of the soul",
    f"A {class_name} where the earth meets the sky"
    ])

In [None]:
import pandas as pd
import os

submission = dict({'id_idx': [], 'label': []})

for idx in range(81):
    print(idx+1, end=" ")
    images = []
    for i in range(100):
        images.append(Image.open(f"./Scene/0/{idx*100 + i}.jpg"))
    
    inputs = processor(text=labels_list, images=images, padding="max_length", return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)

    logits_per_image = outputs.logits_per_image
    probs = torch.sigmoid(logits_per_image)
    rst = probs.argmax(axis=1)
    rst = rst.numpy()
    rst = rst//100
    
    submission['label'] += list(rst)
    
    file_name = f'submission_SIGLIP-P100.csv'
    submission['id_idx'] = list(range(len(submission['label'])))
    pd.DataFrame(submission).to_csv(os.path.join("./submissions/", file_name), index=False)

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 

In [None]:
print(file_name)