<a href="https://colab.research.google.com/github/prisar/ai_notebooks/blob/main/nb_102.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Authentication and service account setup
from google.colab import auth
from google.auth import default
import os

# Authenticate with Google Cloud
auth.authenticate_user()

# Set project ID
os.environ['GOOGLE_CLOUD_PROJECT'] = 'mrc-quant-ml'


In [2]:

# Install required packages
!pip install -q google-genai google-cloud-aiplatform

In [5]:
# Import and initialize
from google.genai import Client
from google.genai.types import Part, VideoMetadata, FileData
from google.cloud import storage
import asyncio
from concurrent.futures import ThreadPoolExecutor
import nest_asyncio
import time # Import time module for delays
import moviepy.editor as mp # Import moviepy for video duration


def summarize_video_chunk(video_uri: str, start_offset: str, end_offset: str, prompt: str = "Analyze this video and provide a summary."):
    """Summarizes a video chunk using the Gemini API."""
    client = Client(
        vertexai=True,
        project="mrc-quant-ml",
        location="us-central1",
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",
        contents=[
            Part(
                video_metadata=VideoMetadata(
                    fps=1,
                    start_offset=start_offset,
                    end_offset=end_offset
                ),
                file_data=FileData(
                    file_uri=video_uri,
                    mime_type="video/mp4",
                ),
            ),
            prompt
        ],
    )
    return response.text

# Function to get video duration
async def get_video_duration(video_uri: str) -> int:
    """Gets the duration of a video from a GCS URI."""
    try:
        # Assuming the video is in a GCS bucket
        client = storage.Client()
        bucket_name, blob_name = video_uri.replace("gs://", "").split("/", 1)
        bucket = client.get_bucket(bucket_name)
        blob = bucket.blob(blob_name)
        # Download the video temporarily to get duration (consider optimizing this)
        temp_file = f"/tmp/{blob_name.split('/')[-1]}"
        blob.download_to_filename(temp_file)
        clip = mp.VideoFileClip(temp_file)
        duration = int(clip.duration)
        os.remove(temp_file) # Clean up the temporary file
        return duration
    except Exception as e:
        print(f"Error getting video duration: {e}")
        return 0 # Return 0 or raise an error based on desired behavior

# Batch processing optimization
async def process_video_chunks_parallel(video_uri: str, chunk_duration_minutes: int = 30, max_workers: int = 4, delay_seconds: int = 1):
    """Process video chunks in parallel for better throughput with 30-minute intervals and a delay between API calls."""
    chunk_duration = chunk_duration_minutes * 60 # Convert minutes to seconds

    total_duration = 7302 # await get_video_duration(video_uri)
    print(f"Total video duration: {total_duration} seconds")
    if total_duration == 0:
        print("Could not get video duration. Aborting processing.")
        return []

    chunks = [(i, min(i + chunk_duration, total_duration))
              for i in range(0, total_duration, chunk_duration)]

    def run_summarize_chunk(start, end):
      """Helper function to run the summarize_video_chunk coroutine."""
      return asyncio.run(process_chunk_with_delay(start, end))

    async def process_chunk_with_delay(start, end):
        """Helper function to process a chunk with a delay."""
        summary = summarize_video_chunk(video_uri, f"{start}s", f"{end}s")
        await asyncio.sleep(delay_seconds) # Add delay between calls
        return summary


    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        loop = asyncio.get_event_loop()
        tasks = [
            loop.run_in_executor(
                executor,
                run_summarize_chunk,
                start,
                end
            ) for start, end in chunks
        ]

        summaries = await asyncio.gather(*tasks)

    return summaries

# Example usage with error handling
video_uri = "gs://mrc-quant-ml-video-analysis/videoplayback.mp4"

# Example of how to use the parallel processing function

nest_asyncio.apply() # Apply this if running in Colab

try:
    # Add delay_seconds parameter to control delay
    all_summaries = asyncio.run(process_video_chunks_parallel(video_uri, chunk_duration_minutes=30, delay_seconds=5))
    for i, summary in enumerate(all_summaries):
        print(f"Summary for chunk {i+1}:\n{summary}\n")
except Exception as e:
    print(f"Error during parallel processing: {e}")

Total video duration: 7302 seconds
Summary for chunk 1:
Alright, I can provide a video summary:
In this video, the host, "hu-po," is doing a live coding and analysis stream, which he titles "Diffusion vs. Autoregressive." His focus is a paper from Carnegie Mellon University, about Diffusion models beating Autoregressive ones in data-constrained settings. He plans to review the abstract of the paper, which came out on July 21, 2025.

He notes that the thumbnail for the video, featuring illustrated cats, was created in OpenAI's GPT, though GPT made a misspelling. He says these sorts of errors are interesting. 

He says he'd like to talk about Scaling Laws a bit. Hu-po says you can always generate more data but that comes at a cost of time.

Hu-po shows a diagram that he likes, then he makes an offhand comment about "British accents making people sound smarter," before explaining that the issue is "overfitting."

Hu-po cites a study that involved training on synthetic textbooks leading to

In [6]:
all_summaries

['Alright, I can provide a video summary:\nIn this video, the host, "hu-po," is doing a live coding and analysis stream, which he titles "Diffusion vs. Autoregressive." His focus is a paper from Carnegie Mellon University, about Diffusion models beating Autoregressive ones in data-constrained settings. He plans to review the abstract of the paper, which came out on July 21, 2025.\n\nHe notes that the thumbnail for the video, featuring illustrated cats, was created in OpenAI\'s GPT, though GPT made a misspelling. He says these sorts of errors are interesting. \n\nHe says he\'d like to talk about Scaling Laws a bit. Hu-po says you can always generate more data but that comes at a cost of time.\n\nHu-po shows a diagram that he likes, then he makes an offhand comment about "British accents making people sound smarter," before explaining that the issue is "overfitting."\n\nHu-po cites a study that involved training on synthetic textbooks leading to better results. The core point is that the