# English Clip

https://github.com/mlfoundations/open_clip

In [2]:
!pip install open_clip_torch

Collecting open_clip_torch
  Downloading open_clip_torch-2.23.0-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Collecting ftfy (from open_clip_torch)
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from open_clip_torch)
  Downloading huggingface_hub-0.19.1-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.1/311.1 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece (from open_clip_torch)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m67.8 MB/s[0m eta [36m0:00:00[0m
Collecting timm (from open_clip_torch)
  Downloading timm-0.9.10-py3-n

In [6]:
import torch
from PIL import Image
import open_clip

model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
tokenizer = open_clip.get_tokenizer('ViT-B-32')

image = preprocess(Image.open("/content/unknown.png")).unsqueeze(0)
text = tokenizer(["a owl"])

with torch.no_grad(), torch.cuda.amp.autocast():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print("Label probs:", text_probs)



Label probs: tensor([[1.]])


In [5]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.35.1-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m91.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, tokenizers, transformers
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.19.1
    Uninstalling huggingface-hub-0.19.1:
      Successfully uninstalled huggingface-hub-0.19.1
Successfully i

# Russian Clip

https://github.com/ai-forever/ru-clip

In [6]:
!git clone https://github.com/ai-forever/ru-clip.git

Cloning into 'ru-clip'...
remote: Enumerating objects: 229, done.[K
remote: Counting objects: 100% (102/102), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 229 (delta 61), reused 44 (delta 38), pack-reused 127[K
Receiving objects: 100% (229/229), 6.50 MiB | 10.25 MiB/s, done.
Resolving deltas: 100% (99/99), done.


In [7]:
!pip install ru-clip/

Processing ./ru-clip
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ruclip
  Building wheel for ruclip (setup.py) ... [?25l[?25hdone
  Created wheel for ruclip: filename=ruclip-0.0.2-py3-none-any.whl size=14707 sha256=a459b2c8b70ae6026377bf0c9d576aa4e3659567e4def9979c157541bbcf03ff
  Stored in directory: /root/.cache/pip/wheels/f5/97/eb/2eea9a1a41cb632275745fb9bd1032f9d35b706fb68ddf27de
Successfully built ruclip
Installing collected packages: ruclip
Successfully installed ruclip-0.0.2


In [3]:
import ruclip
from PIL import Image

In [2]:
clip, processor = ruclip.load("ruclip-vit-base-patch16-224", device="cpu")

Downloading:   0%|          | 0.00/748k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/346 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/599M [00:00<?, ?B/s]

In [9]:
import torch
import base64
import requests
import matplotlib.pyplot as plt
from PIL import Image
from io import BytesIO

bs4_urls = requests.get('https://raw.githubusercontent.com/ai-forever/ru-dolph/master/pics/pipelines/cats_vs_dogs_bs4.json').json()
images = [Image.open(BytesIO(base64.b64decode(bs4_url))) for bs4_url in bs4_urls]
classes = ['кошка', 'собака']
templates = ['{}', 'это {}', 'на картинке {}', 'это {}, домашнее животное']
predictor = ruclip.Predictor(clip, processor, 'cpu', bs=8, templates=templates)


image_embedding = predictor.get_image_latents(images)
text_embedding = predictor.get_text_latents(classes)


0it [00:00, ?it/s][A
8it [00:06,  1.18it/s]
