## Notebook to extract insights from video and using it to pass as context in prompt and chat with openai.

### will be further worked upon to vectorize the transcript derived from the insights for large videos.  

In [1]:
'''
Constants and env variables.
'''
import os
from dotenv import load_dotenv
load_dotenv()

OpenAI_KEY=os.getenv('OpenAI_KEY')
ASSISTANT_MODEL=os.getenv('ASSISTANT_MODEL')

### Below are the helper functions

In [None]:
import requests
import json

def get_api_bearer_token():
    '''
    Function to get bearer token that can be used for getting access token for video indexing
    '''

    # Define the API endpoint
    tenant_id =  os.getenv('AZURE_VIDEO_INDEXER_TENANT_ID')
    url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"

    # Prepare the data and files
    client_id = os.getenv('AZURE_VIDEO_INDEXER_CLIENT_ID')
    client_secret = os.getenv('AZURE_VIDEO_INDEXER_CLIENT_SECRET')
    grant_type = "client_credentials"
    scope = "https://management.azure.com/.default"

    data = {
        "client_id": client_id,
        "client_secret": client_secret,
        "grant_type": grant_type,
        "scope": scope
    }

    # Send the request
    response = requests.post(url, data=data)

    # Handle the response
    if response.status_code == 200:
        return response.json()['access_token']
    else:
        print("Failed to get bearer token")
        print("Status code:", response.status_code)
        print("Response:", response.text)

def get_access_token():
    '''
    Function to get bearer token that can be used for getting access token for video indexing
    '''
    subscription_id = os.getenv('AZURE_VIDEO_INDEXER_SUBSCRIPTION_ID')
    resource_group = os.getenv('AZURE_VIDEO_INDEXER_RESOURCE_GROUP')
    account_name = os.getenv('AZURE_VIDEO_INDEXER_ACCOUNT_NAME')
    version = '2022-08-01'

    url = f"https://management.azure.com/subscriptions/{subscription_id}/resourcegroups/{resource_group}/providers/Microsoft.VideoIndexer/accounts/{account_name}/generateAccessToken?api-version={version}"
    
    data = {
        "permissionType": "Contributor",
        "scope": "Account"
    }

    bearer_token = get_api_bearer_token()

    headers = {
        "Content-Type": 'application/json',
        "Authorization": f"Bearer {bearer_token}"
    }

    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code == 200:
        return response.json()['accessToken']
    else:
        print("Failed to get access token")
        print("Status code:", response.status_code)
        print("Response:", response.text)

def get_video_insights(video_id):
    account_id = os.getenv('AZURE_VIDEO_INDEXER_ACCOUNT_ID')
    location = os.getenv('AZURE_VIDEO_INDEXER_LOCATION', 'eastus')
    access_token = get_access_token()

    url = f"https://api.videoindexer.ai/eastus/Accounts/{account_id}/Videos/{video_id}/Index?accessToken={access_token}"
    
    
    params = {
        'accessToken': access_token
    }

    print(f'Fetching insights for video {video_id}...')
    response = requests.get(url, params=params)
    response.raise_for_status()

    video_insights_data = response.json().get('videos', [])[0]
    return video_insights_data

def parse_insights(video_insights):
    
    insights = video_insights.get('insights')
    transcript = " ".join([t["text"] for t in insights.get("transcript", [])])
    faces = [f["name"] for f in insights.get("faces", []) if f.get("name") != "Unknown"]
    keywords = [k["text"] for k in insights.get("keywords", [])]
    topics = [t["name"] for t in insights.get("topics", [])]
    emotions = [e["type"] for e in insights.get("emotions", [])]

    return {
        "videoId": video_insights.get("id"),
        "transcript": transcript,
        "faces": faces,
        "keywords": keywords,
        "topics": topics,
        "emotions": emotions
    }

In [7]:
raw = get_video_insights('2wur8zc1t2')
doc = parse_insights(raw)
with open('output.json', 'w', encoding='utf-8') as f:
  json.dump(doc, f, indent=4)

context_template = (
    "This video (ID: {video_id})"
    "Key themes in this video include: {topics}. "
    "Notable keywords extracted are: {keywords}. "
    "Detected individuals or faces in the video: {faces}. "
    "Below is the full transcript of the video:\n\n{transcript}"
)

context = context_template.format(
  video_id=doc["videoId"],
  topics=', '.join(doc["topics"]) if doc["topics"] else "None",
  keywords=', '.join(doc["keywords"][:10]) + (", and more" if len(doc["keywords"]) > 10 else ""),
  faces=', '.join(doc["faces"]) if doc["faces"] else "None",
  transcript=doc["transcript"]
)

print(context)

Fetching insights for video 2wur8zc1t2...
This video (ID: 2wur8zc1t2)Key themes in this video include: Technology, Computer S, Application Programming Interfaces Api, Database Systems. Notable keywords extracted are: building agent, mcp server, data sources, code assistant, chat app, mcp client, mcp host, mcp protocol, server, servers, and more. Detected individuals or faces in the video: Unknown #1, Unknown #2. Below is the full transcript of the video:

If you're building AI agents, you've probably heard about MCP, or Model Context Protocol. MCP is a new open source standard to connect your agents to data sources such as databases or APIs. MCP consists of multiple components. The most important ones are the host, the client, and the server. So let's break it down. At the very top, you would have your MCP host. Your MCP host will include an MCP client, and it could also include multiple clients. The MCP host could be an application such as a chat app, it could also be a code assistant

In [13]:
from openai import OpenAI
# Your context variable (assumed to be a long string prepared earlier)
# context = build_context(data)

def ask_question_with_context(user_question, context):

    client = OpenAI(api_key=OpenAI_KEY)

    response = client.chat.completions.create(
        model="gpt-4",  # or "gpt-3.5-turbo" if you're using that
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a helpful assistant. Use ONLY the provided context to answer the user's question. "
                    "Do not rely on any outside knowledge or make assumptions beyond the context. "
                    "If the answer cannot be found in the context, reply with: 'The context does not contain that information.'"
                )
            },
            {
                "role": "user",
                "content": f"Context:\n{context}"
            },
            {
                "role": "user",
                "content": f"Question: {user_question}"
            }
        ],
        temperature=0.3,
        max_tokens=1024
    )
    
    return response.choices[0].message.content

In [14]:
ask_question_with_context('Tell me about MCP server', context)

"The MCP server is a component of the Model Context Protocol (MCP), which is a new open source standard used to connect agents to data sources such as databases or APIs. The MCP server connects to the MCP host or client when they need a tool. It can connect to a database, whether it's a relational database or a no SQL database, APIs, or data sources like a local file type or code. The MCP server is responsible for executing something that goes to a database, to an API or a local piece of code when it is called by the MCP host or client to get a tool result. It can also be connected to multiple MCP hosts or clients."

In [15]:
ask_question_with_context('Tell me India', context)

'The context does not contain that information.'