# Video Search with Azure Computer Vision 4 (Florence)
## 5 Gradio App for video search

![image](logo.jpg)

In [1]:
import datetime
import glob
import gradio as gr
import json
import os
import pandas as pd
import sys
import time

from azure import (
    get_cosine_similarity,
    image_embedding,
    remove_background,
    text_embedding,
)

from dotenv import load_dotenv

In [2]:
# Getting Azure CV endpoint and key from the azure.env file

load_dotenv("azure.env")
key = os.getenv("azure_cv_key")
endpoint = os.getenv("azure_cv_endpoint")

## 1. Informations

In [3]:
sys.version

'3.8.5 (default, Sep  4 2020, 07:30:14) \n[GCC 7.3.0]'

In [4]:
print("Today is", datetime.datetime.today())

Today is 2023-05-15 11:40:31.283360


## 2. Frames

In [5]:
IMAGES_DIR = "frames"

In [6]:
image_files = glob.glob(IMAGES_DIR + "/*")

print("Directory of images:", IMAGES_DIR)
print("Total number of catalog images =", "{:,}".format(len(image_files)))

Directory of images: frames
Total number of catalog images = 1,448


## 3. Loading vector embeddings

In [7]:
JSON_DIR = "json"

glob.glob(JSON_DIR + "/*.json")

['json/img_embed_15May2023_113615.json']

In [8]:
print("Importing vectors embeddings...")

jsonfiles = [entry.name for entry in os.scandir(JSON_DIR) if entry.is_file()]
jsonfiles = [f for f in jsonfiles if os.path.isfile(os.path.join(JSON_DIR, f))]

# Get the most recent file
modification_times = [
    (f, os.path.getmtime(os.path.join(JSON_DIR, f))) for f in jsonfiles
]
modification_times.sort(key=lambda x: x[1], reverse=True)
most_recent_file = JSON_DIR + "/" + modification_times[0][0]

# Loading the most recent file
print(f"Loading the most recent file of the vector embeddings: {most_recent_file}")

with open(most_recent_file) as f:
    list_emb = json.load(f)

print(f"\nDone: number of imported vector embeddings = {len(list_emb):,}")

Importing vectors embeddings...
Loading the most recent file of the vector embeddings: json/img_embed_15May2023_113615.json

Done: number of imported vector embeddings = 1,448


## 4. Gradio webapp for visual search using an image

### Generic gradio elements

In [54]:
footnote = "Powered by Azure Computer Vision 4 (Florence)"

top_n = 5

### Visual Search using an image

In [63]:
def visual_search_from_image_app(image, list_emb=list_emb, topn=top_n):
    """
    Function for visual search using an image for the gradio app
    """
    # Reference image embeddding
    nobackground_image = remove_background(image)
    image_emb = image_embedding(nobackground_image)

    # Comparing with all the images embeddings
    results_list = [
        get_cosine_similarity(image_emb, emb_image) for emb_image in list_emb
    ]

    # Topn results
    df = pd.DataFrame(
        list(zip(image_files, results_list)), columns=["image_file", "similarity"]
    )
    df = df.sort_values("similarity", ascending=False)
    topn_list = df.nlargest(topn, "similarity")["image_file"].tolist()
    similarity_list = df.nlargest(topn, "similarity")["similarity"].tolist()
    
    print(topn_list, similarity_list)
    
    return topn_list

In [64]:
header_image = "Visual Search with Azure Computer Vision (Florence) using an image"

images_examples = [
    "images/paris1.jpg",
    "images/paris3.jpg",
    "images/paris4.jpg",
    "images/paris5.jpg",
    "images/paris6.jpg",
    "images/paris8.jpg",
    "images/paris9.jpg",
    "images/paris10.jpg",
]

topn_list_images = [""] * topn
refimage = gr.components.Image(label="Your image:", type="filepath", shape=((200, 200)))

list_img_results_prompt = [
    gr.components.Image(
        label=f"Top {i+1} {topn_list_images[i]}", type="filepath", shape=((200, 200))
    )
    for i in range(top_n)
]

webapp_image = gr.Interface(
    visual_search_from_image_app,
    refimage,
    list_img_results_prompt,
    title=header_image,
    examples=images_examples,
    theme="gstaff/sketch",
    article=footnote,
)

webapp_image.queue()

Gradio Interface for: visual_search_from_image_app
--------------------------------------------------
inputs:
|-image
outputs:
|-image
|-image
|-image
|-image
|-image

### We can run this app

In [65]:
# webapp_image.launch(share=True)

## 5. Gradio webapp for visual search using a prompt

In [66]:
def visual_search_from_prompt_app(query, list_emb=list_emb, topn=top_n):
    """
    Function for visual search using a prompt for the gradio app
    """
    # Text Embedding of the prompt
    text_emb = text_embedding(query)

    # Comparing the Text embedding with all the images embeddings
    results_list = [
        get_cosine_similarity(text_emb, emb_image) for emb_image in list_emb
    ]

    # Top5 results
    df = pd.DataFrame(
        list(zip(image_files, results_list)), columns=["image_file", "similarity"]
    )
    df = df.sort_values("similarity", ascending=False)
    topn_list = df.nlargest(topn, "similarity")["image_file"].tolist()

    print(topn_list)

    return topn_list

In [67]:
header_prompt = "Visual Search with Azure Computer Vision (Florence) using a prompt"

prompt_examples = [
    "Dior",
    "Eiffel Tower",
    "Empty street",
    "Love brings love exhibition",
    "Métro",
    "Mercedes",
    "Monument with a flame",
    "Padlocks",
    "Palais de Tokyo",
    "Palais Galliera",
    "Paris Pont d'Iéna",
    "Person wearing a mask",
    "Person wearing an orange dress",
    "Person with a bagpack",
    "Pink clothes",
    "Pink lines on the street",
]

topn_list_prompt = [""] * top_n

prompt = gr.components.Textbox(
    lines=1,
    label="What do you want to search?",
    placeholder="Enter your prompt for the visual search and press the Submit button",
)

labelfile = topn_list_prompt
list_img_results_image = [
    gr.components.Image(
        label=f"Top {i+1} {str(labelfile[i])} {topn_list_prompt[i]}", type="filepath"
    )
    for i in range(top_n)
]

webapp_prompt = gr.Interface(
    visual_search_from_prompt_app,
    prompt,
    list_img_results_image,
    title=header_prompt,
    examples=prompt_examples,
    theme="gstaff/sketch",
    article=footnote,
)

webapp_prompt.queue()

Gradio Interface for: visual_search_from_prompt_app
---------------------------------------------------
inputs:
|-textbox
outputs:
|-image
|-image
|-image
|-image
|-image

### We can run this app

In [68]:
# webapp_prompt.launch(share=True)

## 6. Gradio webapp for visual search using an image or a prompt
### We can combine the webapps into a single one

In [69]:
visualsearch_webapp = gr.TabbedInterface(
    [webapp_prompt, webapp_image],
    ["1 Visual search from a prompt", "2 Visual search from an image"],
    css="body {background-color: black}",
    theme="rottenlittlecreature/Moon_Goblin",
    # Themes: https://huggingface.co/spaces/gradio/theme-gallery
)

visualsearch_webapp.queue()

visualsearch_webapp.launch(share=True)



Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://b2fec49e3da46fcb83.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




['frames/frame_00_11_18.jpg', 'frames/frame_00_11_35.jpg', 'frames/frame_00_11_20.jpg', 'frames/frame_00_11_36.jpg', 'frames/frame_00_00_18.jpg']
Removing background from the image using Azure Computer Vision 4.0...
Done
['frames/frame_00_01_40.jpg', 'frames/frame_00_01_43.jpg', 'frames/frame_00_11_04.jpg', 'frames/frame_00_01_45.jpg', 'frames/frame_00_12_17.jpg'] [0.5863442492080095, 0.5830845232487765, 0.5778213000702307, 0.5659349037147696, 0.5649978912214925]


> End