In [20]:
import sys
sys.path.append("..")

from aips import get_engine
from IPython.display import display, HTML
import ipywidgets as widgets
from PIL import Image
import pickle
import requests
import numpy
import torch
import clip
from io import BytesIO

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

engine = get_engine()

In [21]:
![ ! -d 'tmdb' ] && git clone --depth 1 https://github.com/ai-powered-search/tmdb.git
! cd tmdb && git pull
! cd tmdb && mkdir -p '../../data/tmdb/' && tar -xvf movie_image_embeddings.tgz -C '../../data/tmdb/'

Already up to date.
tar: Removing leading `../' from member names
tar: ../data/tmdb/movie_image_embeddings.pickle: Member name contains '..'
tar: Exiting with failure status due to previous errors


In [35]:
![ ! -d 'tmdb' ] && git clone --depth 1 https://github.com/ai-powered-search/tmdb.git
! cd tmdb && git pull
! cd tmdb && mkdir -p '../../data/tmdb/' && tar -Pxvf movie_image_embeddings.tgz -C '../../data/tmdb/'

Already up to date.
../data/tmdb/movie_image_embeddings.pickle


## Listing 15.14

In [23]:
def read(cache_name):
    cache_file_name = f"../data/data/tmdb/{cache_name}.pickle" #When un-tar issue is fixed, correct path
    with open(cache_file_name, "rb") as fd:
        return pickle.load(fd)

def update_movie_embedding(movie_embedding):
    movie_embedding["image_embeddings"] \
        = normalize_embedding(movie_embedding["image_embeddings"])
    return movie_embedding

def generate_tmdb_with_embeddings_index():
    embeddings_data = read("movie_image_embeddings")
    collection = engine.create_collection("tmdb_with_embeddings")
    movies = [update_movie_embedding(v) for k, v
              in embeddings_data.items()]
    collection.add_documents(movies)
    
def normalize_embedding(embedding):
    return numpy.divide(embedding,
      numpy.linalg.norm(embedding,axis=0)).tolist()

In [24]:
generate_tmdb_with_embeddings_index()

Wiping "tmdb_with_embeddings" collection
Creating "tmdb_with_embeddings" collection
Status: Success

Adding Documents to 'tmdb_with_embeddings' collection


## Listing 15.15

In [25]:
def load_image(full_path, log=False):   
    try:
        if full_path.startswith("http"):
            response = requests.get(full_path)
            image = Image.open(BytesIO(response.content))
        else:
            image = Image.open(full_path)
        if log: print("File Found")
        return image
    except:
        if log: print(f"No Image Available {full_path}")
        return []      

def movie_search(query_embedding, limit=8, log=False):
    collection = engine.get_collection("tmdb_with_embeddings")
    request = {
        "query_vector": query_embedding,
        "query_field": "image_embeddings",
        "limit": limit,
        "quantization_size": "FLOAT32"}
    if log: request["log"] = True 
    response = collection.vector_search(**request)
    if log: print(f"Vector search results {len(response['docs'])}")
    return response    

def compute_text_embedding(text):    
    text = clip.tokenize([text]).to(device)    
    text_features = model.encode_text(text)
    embedding = text_features.tolist()[0]
    return normalize_embedding(embedding)

def compute_image_embedding(image_file):
    image = load_image(image_file)
    inputs = preprocess(image).unsqueeze(0).to(device)
    embeddings = model.encode_image(inputs).tolist()[0]
    return normalize_embedding(embeddings)

def compute_text_and_image_embedding(text_query, image_file):
    normalized_text_query_embedding = compute_text_embedding(text_query)
    normalized_image_embedding = compute_image_embedding(image_file)
    return numpy.average([normalized_text_query_embedding,
                          normalized_image_embedding], axis=0).tolist()

## Listing 15.16

In [None]:
def get_html(movies_documents):
    css = """
      <style type="text/css">
        .results { 
          margin-top: 15px; 
          display: flex; 
          flex-wrap: wrap; 
          justify-content: space-evenly; }
        .results .result { height: 250px; margin-bottom: 5px; }
      </style>"""
    
    results_html = ""
    for movie in movies_documents:
        image_file = f"http://image.tmdb.org/t/p/w780/{movie['image_id']}.jpg"
        movie_link = f"https://www.themoviedb.org/movie/{movie['movie_id']}"
        img_html = f"<img title='{movie['title']}' class='result' src='{image_file}'>"
        results_html += f"<a href='{movie_link}' target='_blank'>{img_html}</a>"
    return f"{css}<div class='results'>{results_html}</div>"
   
def display_results(search_results):    
    output = widgets.Output()
    with output:
        display(HTML(get_html(search_results["docs"]))) 
    display(widgets.HBox(layout=widgets.Layout(justify_content="center")), output)   

def search_and_display(text_query="", image_query=None):
    if image_query:
        if text_query:
            query_embedding = compute_text_and_image_embedding(text_query, image_query)
        else:
            query_embedding = compute_image_embedding(image_query)
    else:
        query_embedding = compute_text_embedding(text_query)
    display_results(movie_search(query_embedding))

In [26]:
#Figure 15.5
search_and_display(text_query="singing in the rain")

HBox(layout=Layout(justify_content='center'))

Output()

In [27]:
search_and_display(text_query="superhero flying")

HBox(layout=Layout(justify_content='center'))

Output()

In [28]:
#Figure 15.6
search_and_display(text_query="superheroes flying")

HBox(layout=Layout(justify_content='center'))

Output()

In [29]:
#Figure 15.7
search_and_display(image_query="../data/tmdb/delorean-query.jpg")

HBox(layout=Layout(justify_content='center'))

Output()

In [34]:
#Figure 15.8
search_and_display(text_query="superhero", image_query="../data/tmdb/delorean-query.jpg")

HBox(layout=Layout(justify_content='center'))

Output()

## Listing 15.15


## Listing 15.16
