# Video Search with Azure Content Understanding
## Objective
This document is meant to present a guideline on how to leverage the Azure Video Content Understanding API for AI Search.
The sample will demonstrate the following steps:
1. Process a video file from Azure Blob storage with the Azure Video Content Understanding service to generate a video description grounding document.
2. Process the video description grounding document with Azure Search client to generate an Azure Search index.
3. Utilize OpenAI completion and embedding models to search through content in the video search index.


## Pre-requisites
1. Follow [README](../README.md#configure-azure-ai-service-resource) to create essential resource that will be used in this sample.
1. Install required packages

In [None]:
%pip install -r ../requirements.txt

## Load environment variables

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

# Load and validate Azure AI Services configs
AZURE_AI_SERVICE_ENDPOINT = os.getenv("AZURE_AI_SERVICE_ENDPOINT")
AZURE_AI_SERVICE_API_VERSION = os.getenv("AZURE_AI_SERVICE_API_VERSION", "2024-12-01-preview")

# Load and validate Azure OpenAI configs
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")
AZURE_OPENAI_CHAT_API_VERSION = os.getenv("AZURE_OPENAI_CHAT_API_VERSION", "2024-08-01-preview")
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
AZURE_OPENAI_EMBEDDING_API_VERSION = os.getenv("AZURE_OPENAI_EMBEDDING_API_VERSION", "2023-05-15")

# Load and validate Azure Search Services configs
AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_ENDPOINT")
AZURE_SEARCH_INDEX_NAME = os.getenv("AZURE_SEARCH_INDEX_NAME", "sample-index-video")


## File to Analyze

In [None]:
from pathlib import Path
VIDEO_LOCATION = Path("../data/FlightSimulator.mp4")

In [None]:
from langchain.schema import Document
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.vectorstores.azuresearch import AzureSearch
from langchain_core.prompts import ChatPromptTemplate
import requests
import json
from pathlib import Path
import sys
import uuid


parent_dir = Path(Path.cwd()).parent
sys.path.append(
    str(parent_dir)
)  # add the parent directory to the path to use shared modules


## Generate Video Segment Description
Create a custom analyzer with pre-defined schema. The custom analyzer schema is defined in [./video_content_understanding_basic.json](./video_content_understanding_basic.json)

In [None]:
from python.content_understanding_client import AzureContentUnderstandingClient

ANALYZER_TEMPLATE_PATH = "../analyzer_templates/video_content_understanding.json"
ANALYZER_ID = "video_analyzer" + "_" + str(
    uuid.uuid4())  # Unique identifier for the analyzer

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

# Create the Content Understanding (CU) client
cu_client = AzureContentUnderstandingClient(
    endpoint=AZURE_AI_SERVICE_ENDPOINT,
    api_version=AZURE_AI_SERVICE_API_VERSION,
    token_provider=token_provider,
    x_ms_useragent="azure-ai-content-understanding-python/search_with_video", # This header is used for sample usage telemetry, please comment out this line if you want to opt out.
)

# Use the client to create an analyzer
response = cu_client.begin_create_analyzer(
    ANALYZER_ID, analyzer_template_path=ANALYZER_TEMPLATE_PATH)
result = cu_client.poll_result(response)

print(json.dumps(result, indent=2))


### Use the created analyzer to extract video segment description

In [None]:
# Submit the video for content analysis
response = cu_client.begin_analyze(ANALYZER_ID, file_location=VIDEO_LOCATION)

# Wait for the analysis to complete and get the content analysis result
video_cu_result = cu_client.poll_result(response,
                                        timeout_seconds=3600)  # 1 hour timeout

# Print the content analysis result
print(f"Video Content Understanding result: ", video_cu_result)

# Delete the analyzer if it is no longer needed
cu_client.delete_analyzer(ANALYZER_ID)

### Pre-process the video segmention descriptions

In [None]:
def convert_values_to_strings(json_obj):
    return [str(value) for value in json_obj]


def remove_markdown(json_obj):
    for segment in json_obj:
        if 'markdown' in segment:
            del segment['markdown']
    return json_obj


def process_cu_scene_description(scene_description):
    audio_visual_segments = scene_description["result"]["contents"]
    filtered_audio_visual_segments = remove_markdown(audio_visual_segments)
    audio_visual_splits = [
        "The following is a json string representing a video segment with scene description and transcript ```"
        + v
        + "```"
        for v in convert_values_to_strings(filtered_audio_visual_segments)
    ]
    docs = [Document(page_content=v) for v in audio_visual_splits]
    return docs


docs = process_cu_scene_description(video_cu_result)
print("There are " + str(len(docs)) + " documents.")

## Embed and index the chunks
Add the scene description segments as documents to Azure Search.

In [None]:
def embed_and_index_chunks(docs):
    aoai_embeddings = AzureOpenAIEmbeddings(
        azure_deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME,
        openai_api_version=AZURE_OPENAI_EMBEDDING_API_VERSION,  # e.g., "2023-12-01-preview"
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        azure_ad_token_provider=token_provider
    )

    vector_store: AzureSearch = AzureSearch(
        azure_search_endpoint=AZURE_SEARCH_ENDPOINT,
        azure_search_key=None,
        index_name=AZURE_SEARCH_INDEX_NAME,
        embedding_function=aoai_embeddings.embed_query
    )
    vector_store.add_documents(documents=docs)
    return vector_store


# embed and index the docs:
vector_store = embed_and_index_chunks(docs)

## Retrieve relevant content
#### Execute a pure vector similarity search

In [None]:
# Set your query
query = "japan"

In [None]:
# Perform a similarity search
docs = vector_store.similarity_search(
    query=query,
    k=3,
    search_type="similarity",
)
for doc in docs:
    print(doc.page_content)

#### Execute hybrid search. Vector and nonvector text fields are queried in parallel, results are merged, and top matches of the unified result set are returned.

In [None]:
# Perform a hybrid search using the search_type parameter
docs = vector_store.hybrid_search(query=query, k=3)
for doc in docs:
    print(doc.page_content)

## Video Q&A
We can utilize OpenAI GPT completion models + Azure Search to conversationally search for and chat about the results. (If you are using GitHub Codespaces, there will be an input prompt near the top of the screen)

In [None]:
# Setup rag chain
prompt_str = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""


def setup_rag_chain(vector_store):
    retriever = vector_store.as_retriever(search_type="similarity", k=3)

    prompt = ChatPromptTemplate.from_template(prompt_str)
    llm = AzureChatOpenAI(
        openai_api_version=AZURE_OPENAI_CHAT_API_VERSION,
        azure_deployment=AZURE_OPENAI_CHAT_DEPLOYMENT_NAME,
        azure_ad_token_provider=token_provider,
        temperature=0.7,
    )

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain


# Setup conversational search
def conversational_search(rag_chain, query):
    print(rag_chain.invoke(query))


rag_chain = setup_rag_chain(vector_store)
while True:
    query = input("Enter your query: ")
    if query=="":
        break
    conversational_search(rag_chain, query)