In [5]:
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel, CLIPVisionModel
from tqdm.auto import tqdm 
import os
import numpy as np
import torch

In [2]:
#model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
#
model = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32").to('cuda')
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [5]:
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

In [6]:
inputs = processor(images=image, return_tensors="pt", padding=True)

In [8]:
inputs['pixel_values'].shape

torch.Size([1, 3, 224, 224])

In [9]:
outputs = model(**inputs)

In [13]:
outputs['pooler_output'].shape

torch.Size([1, 768])

In [5]:
files = sorted(os.listdir("datasets/scale-1_1/unsplash2000_ball"))

In [6]:
with torch.no_grad():
    for filename in tqdm(files):
        image = Image.open(f"datasets/scale-1_1/unsplash2000_ball/{filename}/image.png")
        inputs = processor(images=image, return_tensors="pt", padding=True)
        inputs = {k: v.to("cuda") for k, v in inputs.items()}
        outputs = model(**inputs)
        convnext_last_hidden_states = outputs.pooler_output
        output_dir = f"datasets/scale-1_1/unsplash2000_clip/{filename}"
        os.makedirs(output_dir, exist_ok=True)
        torch.save(convnext_last_hidden_states.cpu(), f"{output_dir}/last_hidden_states.pt")

  0%|          | 0/2000 [00:00<?, ?it/s]

# process small studio

In [3]:
files = sorted(os.listdir("datasets/scale-1_1/unsplash2000_ball"))

In [7]:
with torch.no_grad():
    for idx, filename in enumerate(tqdm(np.linspace(-1,1,360))):
        filename = f"{filename:.8f}"
        image = Image.open(f"datasets/rotate_studio_ball/{idx:03d}.png")
        inputs = processor(images=image, return_tensors="pt", padding=True)
        inputs = {k: v.to("cuda") for k, v in inputs.items()}
        outputs = model(**inputs)
        convnext_last_hidden_states = outputs.pooler_output
        output_dir = f"datasets/scale-1_1/rotate_studio_clip/{filename}"
        os.makedirs(output_dir, exist_ok=True)
        torch.save(convnext_last_hidden_states.cpu(), f"{output_dir}/last_hidden_states.pt")

  0%|          | 0/360 [00:00<?, ?it/s]