<a href="https://colab.research.google.com/github/prisar/ai_notebooks/blob/main/nb_104.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Authentication and service account setup
from google.colab import auth
from google.auth import default
import os

# Authenticate with Google Cloud
auth.authenticate_user()

# Set project ID
os.environ['GOOGLE_CLOUD_PROJECT'] = 'mrc-quant-ml'


In [2]:

# Install required packages
!pip install -q google-genai google-cloud-aiplatform

In [3]:
# Import and initialize
from google.genai import Client
from google.genai.types import Part, VideoMetadata, FileData
from google.cloud import storage
import asyncio
from concurrent.futures import ThreadPoolExecutor
import nest_asyncio
import time # Import time module for delays
import moviepy.editor as mp # Import moviepy for video duration


def summarize_video_chunk(video_uri: str, start_offset: str, end_offset: str, prompt: str = "Analyze this video and provide a summary."):
    """Summarizes a video chunk using the Gemini API."""
    client = Client(
        vertexai=True,
        project="mrc-quant-ml",
        location="us-central1",
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=[
            Part(
                video_metadata=VideoMetadata(
                    fps=1,
                    start_offset=start_offset,
                    end_offset=end_offset
                ),
                file_data=FileData(
                    file_uri=video_uri,
                    mime_type="video/mp4",
                ),
            ),
            prompt
        ],
    )
    return response.text

# Function to get video duration
async def get_video_duration(video_uri: str) -> int:
    """Gets the duration of a video from a GCS URI."""
    try:
        # Assuming the video is in a GCS bucket
        client = storage.Client()
        bucket_name, blob_name = video_uri.replace("gs://", "").split("/", 1)
        bucket = client.get_bucket(bucket_name)
        blob = bucket.blob(blob_name)
        # Download the video temporarily to get duration (consider optimizing this)
        temp_file = f"/tmp/{blob_name.split('/')[-1]}"
        blob.download_to_filename(temp_file)
        clip = mp.VideoFileClip(temp_file)
        duration = int(clip.duration)
        os.remove(temp_file) # Clean up the temporary file
        return duration
    except Exception as e:
        print(f"Error getting video duration: {e}")
        return 0 # Return 0 or raise an error based on desired behavior

# Batch processing optimization
async def process_video_chunks_parallel(video_uri: str, chunk_duration_minutes: int = 30, max_workers: int = 4, delay_seconds: int = 1):
    """Process video chunks in parallel for better throughput with 30-minute intervals and a delay between API calls."""
    chunk_duration = chunk_duration_minutes * 60 # Convert minutes to seconds

    total_duration = 7302 # await get_video_duration(video_uri)
    print(f"Total video duration: {total_duration} seconds")
    if total_duration == 0:
        print("Could not get video duration. Aborting processing.")
        return []

    chunks = [(i, min(i + chunk_duration, total_duration))
              for i in range(0, total_duration, chunk_duration)]

    def run_summarize_chunk(start, end):
      """Helper function to run the summarize_video_chunk coroutine."""
      return asyncio.run(process_chunk_with_delay(start, end))

    async def process_chunk_with_delay(start, end):
        """Helper function to process a chunk with a delay."""
        summary = summarize_video_chunk(video_uri, f"{start}s", f"{end}s")
        await asyncio.sleep(delay_seconds) # Add delay between calls
        return summary


    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
            loop.run_in_executor(
                executor,
                run_summarize_chunk,
                start,
                end
            ) for start, end in chunks
        ]

        summaries = await asyncio.gather(*tasks)

    return summaries

# Example usage with error handling
video_uri = "gs://mrc-quant-ml-video-analysis/videoplayback.mp4"

# Example of how to use the parallel processing function

nest_asyncio.apply() # Apply this if running in Colab

try:
    # Add delay_seconds parameter to control delay
    all_summaries = asyncio.run(process_video_chunks_parallel(video_uri, chunk_duration_minutes=30, delay_seconds=5))
    for i, summary in enumerate(all_summaries):
        print(f"Summary for chunk {i+1}:\n{summary}\n")
except Exception as e:
    print(f"Error during parallel processing: {e}")

  if event.key is 'enter':



Total video duration: 7302 seconds
Summary for chunk 1:
Alright, here's a summary of the video:

The speaker, Hu-Po, starts by testing live streaming on Youtube and X and then introduces the topic for the live stream: a discussion on the research paper "Diffusion Beats Autoregressive in Data-Constrained Settings."
In the live stream, he first explains the origin of the thumbnail and that the word Autoregressive was misspelled. Then he briefly describes the study of masking, the purpose of the said research paper, what makes diffusion models so special, and explains the importance of each item as well as what their purposes are when they are limited by things such as low processing power and low data availability.

Summary for chunk 2:
The speaker describes how Scaling Laws are sometimes extrapolated beyond the data available. 


He then provides a brief overview of the concept of attention masks in the context of autoaggressive models. 


He finally goes on to describe how diffusion mo

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
# Add Vertex AI imports
from google.cloud.aiplatform import init
from vertexai.language_models import TextEmbeddingModel


def score_relevancy_gemini(user_query: str, summaries: list[str], model_name: str = "text-embedding-004") -> list[float]:
    """
    Calculates relevancy scores for a list of summaries based on a user query
    using Gemini embedding and cosine similarity.

    Args:
        user_query: The user's query string.
        summaries: A list of summary strings.
        model_name: The name of the Gemini embedding model to use (default is text-embedding-004).

    Returns:
        A list of cosine similarity scores, one for each summary.
    """
    # Initialize Vertex AI (if not already initialized)
    init(project="mrc-quant-ml", location="us-central1")

    # Get the embedding model
    embedding_model = TextEmbeddingModel.from_pretrained(model_name)

    # Embed the user query and each summary using Vertex AI
    query_embedding = embedding_model.get_embeddings([user_query])[0].values
    summary_embeddings = [embedding.values for embedding in embedding_model.get_embeddings(summaries)]

    # Calculate the cosine similarity between the query embedding and each summary embedding
    # cosine_similarity expects a 2D array for the first argument, so reshape the query embedding
    similarity_scores = cosine_similarity([query_embedding], summary_embeddings)

    # The result of cosine_similarity is a 2D array, so flatten it to get a list of scores
    return similarity_scores[0].tolist()

# Example usage
user_query = "diffusion models"
summaries = all_summaries # Assuming all_summaries is available from previous execution
relevancy_scores_gemini = score_relevancy_gemini(user_query, summaries)
print(f"Relevancy scores using Gemini embedding for query '{user_query}': {relevancy_scores_gemini}")




Relevancy scores using Gemini embedding for query 'diffusion models': [0.565487016765596, 0.5798447897620905, 0.5898328088135344, 0.5833395964088023, 0.3505766592289541]
