In [None]:
!pip install transformers==4.37.2 torch==2.2.0

## Load config

In [None]:
from config import BASE_PATH, CLIP_MODEL, ID_MAPPING, INDEX_NAME

print(f"Base path: {BASE_PATH}")
print(f"CLIP model: {CLIP_MODEL}")
print(f"Index name: {INDEX_NAME}")
print(f"ID mapping: {ID_MAPPING}")

## Download index

In [None]:
import fsspec
from config import BASE_PATH, ID_MAPPING, INDEX_NAME

index_url = f"{BASE_PATH}/{INDEX_NAME}"
with fsspec.open(index_url, "rb") as f:
    file_contents = f.read()

    with open(INDEX_NAME, "wb") as out:
        out.write(file_contents)

## Build index

In [None]:
import faiss
import numpy as np
ind = faiss.read_index(INDEX_NAME)

## Load image urls

In [None]:
import dask.dataframe as dd

In [None]:
ddf = dd.read_parquet(f"{BASE_PATH}/{ID_MAPPING}")

In [None]:
image_urls = ddf["url"].compute().to_list()

## Load model

In [None]:
from transformers import AutoTokenizer, CLIPTextModelWithProjection

model = CLIPTextModelWithProjection.from_pretrained(CLIP_MODEL)
tokenizer = AutoTokenizer.from_pretrained(CLIP_MODEL)

## Query

In [None]:
prompt = "A kitchen in country style"
k = 5

In [None]:
import numpy as np
image_number = 2
query = np.expand_dims(ind.reconstruct(image_number), 0)

In [None]:
import io
import requests
from IPython.display import Image

inputs = tokenizer([prompt], padding=True, return_tensors="pt")

outputs = model(**inputs)
query = outputs.text_embeds.cpu().detach().numpy().astype('float64')

In [None]:
distances, indices = ind.search(query, k)
print("results :")
for d, i in zip(distances[0], indices[0]):
    print("similarity=", d)
    print(i)
    url = image_urls[i]
    print(url)
    image = requests.get(url).content
    display(Image(io.BytesIO(image).getvalue()))