<a href="https://colab.research.google.com/github/prisar/ai_notebooks/blob/main/nb_106.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Authentication and service account setup
from google.colab import auth
from google.auth import default
import os

# Authenticate with Google Cloud
auth.authenticate_user()

# Set project ID
os.environ['GOOGLE_CLOUD_PROJECT'] = 'mrc-quant-ml'


In [None]:

# Install required packages
!pip install -q google-genai google-cloud-aiplatform

In [None]:
# Import and initialize
from google.genai import Client
from google.genai.types import Part, VideoMetadata, FileData
from google.cloud import storage
import asyncio
from concurrent.futures import ThreadPoolExecutor
import nest_asyncio
import time # Import time module for delays
import moviepy.editor as mp # Import moviepy for video duration


def summarize_video_chunk(video_uri: str, start_offset: str, end_offset: str, prompt: str = "Analyze this video and provide a summary."):
    """Summarizes a video chunk using the Gemini API."""
    client = Client(
        vertexai=True,
        project="mrc-quant-ml",
        location="us-central1",
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=[
            Part(
                video_metadata=VideoMetadata(
                    fps=1,
                    start_offset=start_offset,
                    end_offset=end_offset
                ),
                file_data=FileData(
                    file_uri=video_uri,
                    mime_type="video/mp4",
                ),
            ),
            prompt
        ],
    )
    return response.text

# Function to get video duration
async def get_video_duration(video_uri: str) -> int:
    """Gets the duration of a video from a GCS URI."""
    try:
        # Assuming the video is in a GCS bucket
        client = storage.Client()
        bucket_name, blob_name = video_uri.replace("gs://", "").split("/", 1)
        bucket = client.get_bucket(bucket_name)
        blob = bucket.blob(blob_name)
        # Download the video temporarily to get duration (consider optimizing this)
        temp_file = f"/tmp/{blob_name.split('/')[-1]}"
        blob.download_to_filename(temp_file)
        clip = mp.VideoFileClip(temp_file)
        duration = int(clip.duration)
        os.remove(temp_file) # Clean up the temporary file
        return duration
    except Exception as e:
        print(f"Error getting video duration: {e}")
        return 0 # Return 0 or raise an error based on desired behavior

# Batch processing optimization
async def process_video_chunks_parallel(video_uri: str, chunk_duration_minutes: int = 30, max_workers: int = 4, delay_seconds: int = 1):
    """Process video chunks in parallel for better throughput with 30-minute intervals and a delay between API calls."""
    chunk_duration = chunk_duration_minutes * 60 # Convert minutes to seconds

    total_duration = 7302 # await get_video_duration(video_uri)
    print(f"Total video duration: {total_duration} seconds")
    if total_duration == 0:
        print("Could not get video duration. Aborting processing.")
        return []

    chunks = [(i, min(i + chunk_duration, total_duration))
              for i in range(0, total_duration, chunk_duration)]

    async def process_chunk_with_delay(start, end):
        """Helper function to process a chunk with a delay and return summary and metadata."""
        summary = summarize_video_chunk(video_uri, f"{start}s", f"{end}s")
        await asyncio.sleep(delay_seconds) # Add delay between calls
        return {"start": start, "end": end, "summary": summary}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
            loop.run_in_executor(
                executor,
                asyncio.run, # Use asyncio.run to run the async helper function
                process_chunk_with_delay(start, end)
            ) for start, end in chunks
        ]

        chunk_results = await asyncio.gather(*tasks)

    return chunk_results # Return list of dictionaries with start, end, and summary

# Example usage with error handling
video_uri = "gs://mrc-quant-ml-video-analysis/videoplayback.mp4"

# Example of how to use the parallel processing function

nest_asyncio.apply() # Apply this if running in Colab

try:
    # Add delay_seconds parameter to control delay
    video_chunks_with_summaries = asyncio.run(process_video_chunks_parallel(video_uri, chunk_duration_minutes=30, delay_seconds=5))
    for i, chunk_info in enumerate(video_chunks_with_summaries):
        print(f"Chunk {i+1} (Start: {chunk_info['start']}s, End: {chunk_info['end']}s):\n{chunk_info['summary']}\n")
    print(f"Video chunks with summaries: {video_chunks_with_summaries}")
except Exception as e:
    print(f"Error during parallel processing: {e}")

  if event.key is 'enter':



Total video duration: 7302 seconds
Chunk 1 (Start: 0s, End: 1800s):
Sure thing! Here is a summary of the video:

The video is a live stream by “hu-po” where he goes over the topic of “Diffusion Beats Autoregressive.” The video begins with testing live streaming, followed by hu-po giving his regular disclaimer. 

Hu-po briefly talks about the thumbnail, and then moves into discussing a paper from the title, which explores model architectures in OpenAI’s ChatGPT. The content revolves around large language models (LLM) and how AI compares two dominant models. Hu-po is critical about the fact that LLM has been used to train internet data over the past couple of years.

Chunk 2 (Start: 1800s, End: 3600s):
Here is a summary of the video:
The speaker is going over papers about scaling laws for AI models. He points out that a paper he is looking at has extrapolated data to model sizes for which they have no data.
Next, the speaker finds an Apple paper called “Distillation Scaling Laws” that co

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
# Add Vertex AI imports
from google.cloud.aiplatform import init
from vertexai.language_models import TextEmbeddingModel


def score_relevancy_gemini(user_query: str, summaries: list[str], model_name: str = "text-embedding-004") -> list[float]:
    """
    Calculates relevancy scores for a list of summaries based on a user query
    using Gemini embedding and cosine similarity.

    Args:
        user_query: The user's query string.
        summaries: A list of summary strings.
        model_name: The name of the Gemini embedding model to use (default is text-embedding-004).

    Returns:
        A list of cosine similarity scores, one for each summary.
    """
    # Initialize Vertex AI (if not already initialized)
    init(project="mrc-quant-ml", location="us-central1")

    # Get the embedding model
    embedding_model = TextEmbeddingModel.from_pretrained(model_name)

    # Embed the user query and each summary using Vertex AI
    query_embedding = embedding_model.get_embeddings([user_query])[0].values
    summary_embeddings = [embedding.values for embedding in embedding_model.get_embeddings(summaries)]

    # Calculate the cosine similarity between the query embedding and each summary embedding
    # cosine_similarity expects a 2D array for the first argument, so reshape the query embedding
    similarity_scores = cosine_similarity([query_embedding], summary_embeddings)

    # The result of cosine_similarity is a 2D array, so flatten it to get a list of scores
    return similarity_scores[0].tolist()

# Example usage
user_query = "diffusion models"
summaries = [chunk['summary'] for chunk in video_chunks_with_summaries] # Extract summaries from the list of dictionaries
relevancy_scores_gemini = score_relevancy_gemini(user_query, summaries)
print(f"Relevancy scores using Gemini embedding for query '{user_query}': {relevancy_scores_gemini}")




Relevancy scores using Gemini embedding for query 'diffusion models': [0.5578611017348896, 0.5802670089981152, 0.5742812468244225, 0.5929148443269238, 0.28378637212218516]


In [None]:
video_chunks_with_summaries

[{'start': 0,
  'end': 1800,
  'summary': 'Sure thing! Here is a summary of the video:\n\nThe video is a live stream by “hu-po” where he goes over the topic of “Diffusion Beats Autoregressive.” The video begins with testing live streaming, followed by hu-po giving his regular disclaimer. \n\nHu-po briefly talks about the thumbnail, and then moves into discussing a paper from the title, which explores model architectures in OpenAI’s ChatGPT. The content revolves around large language models (LLM) and how AI compares two dominant models. Hu-po is critical about the fact that LLM has been used to train internet data over the past couple of years.'},
 {'start': 1800,
  'end': 3600,
  'summary': 'Here is a summary of the video:\nThe speaker is going over papers about scaling laws for AI models. He points out that a paper he is looking at has extrapolated data to model sizes for which they have no data.\nNext, the speaker finds an Apple paper called “Distillation Scaling Laws” that correctly

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
# Add Vertex AI imports
from google.cloud.aiplatform import init
from vertexai.language_models import TextEmbeddingModel
from google.genai.types import Part, VideoMetadata, FileData


def generate_answer_from_top_chunks(user_query: str, video_chunks: list[dict], relevancy_scores: list[float], k: int = 2, video_uri: str = "gs://mrc-quant-ml-video-analysis/videoplayback.mp4"):
    """
    Generates an answer to the user query based on the top k most relevant video chunks.

    Args:
        user_query: The user's query string.
        video_chunks: A list of dictionaries, each containing 'start', 'end', and 'summary' of a video chunk.
        relevancy_scores: A list of relevancy scores corresponding to the video chunks.
        k: The number of top chunks to use (default is 2).
        video_uri: The GCS URI of the video.

    Returns:
        A generated answer based on the top k chunks.
    """
    # Pair chunks with their scores and sort by score in descending order
    scored_chunks = sorted(zip(video_chunks, relevancy_scores), key=lambda item: item[1], reverse=True)

    # Select the top k chunks
    top_k_chunks = [chunk for chunk, score in scored_chunks[:k]]

    # Prepare the content for the Gemini API call
    contents = [user_query]
    for chunk in top_k_chunks:
        contents.append(
            Part(
                video_metadata=VideoMetadata(
                    fps=0.1,
                    start_offset=f"{chunk['start']}s",
                    end_offset=f"{chunk['end']}s"
                ),
                file_data=FileData(
                    file_uri=video_uri,
                    mime_type="video/mp4",
                ),
            )
        )

    # Use Gemini API to generate an answer based on the top k chunks
    client = Client(
        vertexai=True,
        project="mrc-quant-ml",
        location="us-central1",
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=contents,
    )
    return response.text

# Example usage with k=2 (you can change k as needed)
k_value = 1
user_query = "why diffusion models are better and at what time it was discussed"
# Pass video_chunks_with_summaries directly to the function
generated_answer = generate_answer_from_top_chunks(user_query, video_chunks_with_summaries, relevancy_scores_gemini, k=k_value, video_uri=video_uri)
print(f"\nGenerated answer based on top {k_value} chunks:\n{generated_answer}")


Generated answer based on top 1 chunks:
At [00:10:06], the video discussed why diffusion models are superior because they offer increased data augmentation due to the implicit noise addition, leading to more efficient training.

