In [1]:
from PIL import Image
import requests
from transformers import AutoProcessor, AutoModel
import torch

model = AutoModel.from_pretrained("google/siglip-so400m-patch14-384")
processor = AutoProcessor.from_pretrained("google/siglip-so400m-patch14-384")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

texts = ["a photo of 2 cats", "a photo of 2 dogs"]
inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

logits_per_image = outputs.logits_per_image
probs = torch.sigmoid(logits_per_image) # these are the probabilities
print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")

40.5% that image 0 is 'a photo of 2 cats'


In [20]:
from datetime import datetime
import pandas as pd
import os

In [21]:
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

submission = dict({'id_idx': [], 'label': []})

for idx in range(81):
    print(idx+1, end=" ")
    images = []
    for i in range(100):
        images.append(Image.open(f"./Scene/0/{idx*100 + i}.jpg"))
    
    texts = ['Buildings', 'Forests', 'Glacier', 'Mountains', 'Sea', 'Street']
    inputs = processor(text=texts, images=images, padding="max_length", return_tensors="pt")
    
    with torch.no_grad():
        outputs = model(**inputs)

    logits_per_image = outputs.logits_per_image
    probs = torch.sigmoid(logits_per_image)
    rst = probs.argmax(axis=1)
    rst = rst.numpy()

    submission['label'] += list(rst)
    
    file_name = f'submission_{current_time}.csv'
    submission['id_idx'] = list(range(len(submission['label'])))
    pd.DataFrame(submission).to_csv(os.path.join("./submissions/", file_name), index=False)

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 

In [22]:
print(file_name)

submission_20240828_130720.csv
