In [2]:
#!pip install transformers einops timm pillow
from transformers import AutoModel

# Initialize the model
model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)

# New meaningful sentences
sentences = ['A blue cat', 'A red cat']

# Public image URLs
image_urls = [
    'https://i.pinimg.com/600x315/21/48/7e/21487e8e0970dd366dafaed6ab25d8d8.jpg',
    'https://i.pinimg.com/736x/c9/f2/3e/c9f23e212529f13f19bad5602d84b78b.jpg'
]

# Encode text and images
text_embeddings = model.encode_text(sentences)
image_embeddings = model.encode_image(image_urls)  # also accepts PIL.image, local filenames, dataURI

# Compute similarities
print(text_embeddings[0] @ text_embeddings[1].T) # text embedding similarity
print(text_embeddings[0] @ image_embeddings[0].T) # text-image cross-modal similarity
print(text_embeddings[0] @ image_embeddings[1].T) # text-image cross-modal similarity
print(text_embeddings[1] @ image_embeddings[0].T) # text-image cross-modal similarity
print(text_embeddings[1] @ image_embeddings[1].T)# text-image cross-modal similarity

0.5635972
0.2905935
0.05685927
0.12773362
0.2915658


In [9]:
import torch
import torch.nn.functional as F

# 레이블 리스트
labels_list = ['Buildings', 'Forests', 'Glacier', 'Mountains', 'Sea', 'Street']

# 이미지 리스트
images = ["./Scene/0/0.jpg", "./Scene/0/1.jpg"]

# 텍스트 및 이미지 임베딩 계산
text_embeddings = model.encode_text(labels_list)
image_embeddings = model.encode_image(images)

# 결과 계산 (점수 매트릭스)
rst = image_embeddings @ text_embeddings.T

# numpy.ndarray를 torch.Tensor로 변환
rst = torch.tensor(rst)

# 소프트맥스 함수 적용 (차원=1, 각 행에 대해 확률 계산)
softmax_probs = F.softmax(rst, dim=1)

# 출력: softmax 결과 확인
print(softmax_probs)

tensor([[0.1715, 0.1601, 0.1593, 0.1654, 0.1626, 0.1811],
        [0.1863, 0.1515, 0.1596, 0.1696, 0.1654, 0.1676]])


In [23]:
from datetime import datetime
import pandas as pd
import os

In [88]:
model.logit_scale.exp()

tensor(98.5356, grad_fn=<ExpBackward0>)

In [None]:
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")

submission = dict({'id_idx': [], 'label': []})

for idx in range(81):
    print(idx+1, end=" ")
    images = []
    for i in range(100):
        images.append(f"./Scene/0/{idx*100 + i}.jpg")
        
    image_embeddings = model.encode_image(images)
    rst = int(model.logit_scale.exp()) * image_embeddings @ text_embeddings.T
    rst = torch.tensor(rst)
    softmax_probs = F.softmax(rst, dim=1)
    rst = softmax_probs.tolist()
    submission['label'] += rst

In [92]:
file_name = f'JINA_soft_{current_time}.csv'
submission['id_idx'] = list(range(len(submission['label'])))
pd.DataFrame(submission).to_csv(os.path.join("./submissions/", file_name), index=False)