Elastic Search Script


In [3]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
!pip install pillow numpy elasticsearch tqdm
!pip install git+https://github.com/openai/CLIP.git

Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting elasticsearch
  Downloading elasticsearch-9.2.0-py3-none-any.whl.metadata (8.9 kB)
Collecting elastic-transport<10,>=9.2.0 (from elasticsearch)
  Downloading elastic_transport-9.2.0-py3-none-any.whl.metadata (3.9 kB)
Downloading elasticsearch-9.2.0-py3-none-any.whl (960 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m960.5/960.5 kB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading elastic_transport-9.2.0-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.3/65.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-9.2.0 elasticsearch-9.2.0
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-6woa9yko
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.gi

In [5]:
import clip
import torch
from PIL import Image
import numpy as np

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL, PREPROCESS = clip.load("ViT-L/14", device=DEVICE)  # 768-dim

def embed_text(text: str) -> np.ndarray:
    with torch.no_grad():
        tokens = clip.tokenize([text]).to(DEVICE)
        feats = MODEL.encode_text(tokens)
        feats = feats / feats.norm(dim=-1, keepdim=True)
    return feats[0].cpu().numpy().astype(np.float32)

def embed_image(path_or_pil) -> np.ndarray:
    img = path_or_pil if isinstance(path_or_pil, Image.Image) else Image.open(path_or_pil).convert("RGB")
    img = PREPROCESS(img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        feats = MODEL.encode_image(img)
        feats = feats / feats.norm(dim=-1, keepdim=True)
    return feats[0].cpu().numpy().astype(np.float32)

# TEMPORARY TEST
if __name__ == "__main__":
    print(len(embed_text("hello world")))  # should print 768


100%|███████████████████████████████████████| 890M/890M [00:12<00:00, 73.5MiB/s]


768


In [6]:
if __name__ == "__main__":
    # 1. Test text embedding
    print("Text embedding length:", len(embed_text("hello world")))

    # 2. Test image embedding
    from google.colab import files
    uploaded = files.upload()  # choose a JPG/PNG file

    filename = list(uploaded.keys())[0]
    from PIL import Image
    img = Image.open(filename)

    vec = embed_image(img)
    print("Image embedding length:", len(vec))
    print(vec[:10])  # print first 10 values


Text embedding length: 768


Saving aio.jpg to aio.jpg
Image embedding length: 768
[ 0.05508782 -0.00832368  0.0452379  -0.00499035 -0.02895667  0.0109887
 -0.02423028 -0.00231402  0.00723218  0.03213012]
