## Download YouTube Video


In [None]:
import yt_dlp
import os

output_path="../downloads"

# Create output path if it doesn't exist
if not os.path.exists(output_path):
    os.makedirs(output_path)

ydl_opts = {
    'format': 'bestvideo[height<=1080][vcodec^=avc1]+bestaudio/best[height<=1080][vcodec^=avc1]',
    'merge_output_format': 'mp4',
    'outtmpl': f"{output_path}/%(title)s.%(ext)s",
}
def download_video(url, ydl_opts=ydl_opts):
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

video_url = "https://www.youtube.com/watch?v=VoJVcu52a3Q"
download_video(video_url)

[youtube] Extracting URL: https://www.youtube.com/watch?v=VoJVcu52a3Q
[youtube] VoJVcu52a3Q: Downloading webpage
[youtube] VoJVcu52a3Q: Downloading tv client config
[youtube] VoJVcu52a3Q: Downloading player f6e09c70
[youtube] VoJVcu52a3Q: Downloading tv player API JSON
[youtube] VoJVcu52a3Q: Downloading ios player API JSON
[youtube] VoJVcu52a3Q: Downloading m3u8 information
[info] VoJVcu52a3Q: Downloading 1 format(s): 136+251
[download] Destination: ../downloads/Thomas the tank Engine travels Japan[MMD].f136.mp4
[download] 100% of   15.76MiB in 00:00:01 at 13.95MiB/s    
[download] Destination: ../downloads/Thomas the tank Engine travels Japan[MMD].f251.webm
[download] 100% of    2.03MiB in 00:00:00 at 9.80MiB/s     
[Merger] Merging formats into "../downloads/Thomas the tank Engine travels Japan[MMD].mp4"
Deleting original file ../downloads/Thomas the tank Engine travels Japan[MMD].f136.mp4 (pass -k to keep)
Deleting original file ../downloads/Thomas the tank Engine travels Japan[MMD]

## Extract a Batch of Frames from Video


In [2]:
from decord import VideoReader, cpu
import numpy as np
from datetime import datetime, timedelta

def extract_video_frames_in_batches(video_path, frame_skip_interval=5, batch_size=5):
    """
    Generator function that extracts frames from a video at a specified frame skip interval and yields them in batches.

    Args:
        video_path (str): Path to the video file.
        frame_skip_interval (int): Number of frames to skip between extractions.
        batch_size (int): Number of frames per batch.
        video_start_time (datetime, optional): The start time of the video for timestamp calculation.
            If None, the timestamps will be relative (from 00:00:00).

    Yields:
        tuple: A tuple containing:
            - np.ndarray: A batch of extracted video frames.
            - list: The corresponding frame indexes.
            - list: The corresponding timestamps in MongoDB ISODate format.
    """
    video_reader = VideoReader(video_path, ctx=cpu(0))
    video_fps = video_reader.get_avg_fps()  # Get actual FPS of the video
    print("Video FPS:", video_fps)
    print("Frame Skip Interval:", frame_skip_interval)

    frame_batch = []
    frame_indexes = []
    seek_times = []
    
    # Generate the list of frame indices to extract based on frame skipping interval
    selected_frame_indexes = list(range(0, len(video_reader), frame_skip_interval + 1))

    for frame_index in selected_frame_indexes:
        frame = video_reader[frame_index].asnumpy()  # Convert to NumPy array
        frame_batch.append(frame)
        frame_indexes.append(frame_index)  # Store actual frame index
        
        # Calculate seek time based on frame index and video FPS
        seek_time = round(frame_index / video_fps, 2)
        seek_times.append(seek_time)
        
        # Yield batch when it reaches the specified size
        if len(frame_batch) == batch_size:
            yield np.array(frame_batch), frame_indexes, seek_times
            frame_batch = []  # Reset batch
            frame_indexes = []
            seek_times = []
    
    # Yield any remaining frames in the last batch
    if frame_batch:
        yield np.array(frame_batch), frame_indexes, seek_times

# Example Usage
video_file_path = "../downloads/Thomas the tank Engine travels Japan[MMD].mp4"
frame_skip_interval = 2  # Skip 10 frames between each extraction
batch_size = 8  # Extract frames in batches of 3

frame_generator = extract_video_frames_in_batches(video_file_path, frame_skip_interval, batch_size)

frame_batch, frame_indexes, seek_times = next(frame_generator)  # Retrieve first batch of frames
print("Frame Batch Shape:", frame_batch.shape)  # (batch_size, height, width, 3)
print("Frame Indexes:", frame_indexes)
print("Seek Times:", seek_times)


Video FPS: 29.97002997002997
Frame Skip Interval: 2
Frame Batch Shape: (8, 720, 1280, 3)
Frame Indexes: [0, 3, 6, 9, 12, 15, 18, 21]
Seek Times: [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]


# Create Embeddings for Extracted Frames


In [14]:
import torch
from PIL import Image
import requests
from transformers import AutoProcessor, AutoModel

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and processor
model = AutoModel.from_pretrained("google/siglip2-base-patch16-naflex").to(device)
processor = AutoProcessor.from_pretrained("google/siglip2-base-patch16-naflex")

# Prepare inputs (assuming frame_batch is a list of PIL images)
inputs = processor(images=frame_batch, return_tensors="pt").to(device)

# Perform inference on GPU
with torch.no_grad():
    frame_batch_embeddings = model.get_image_features(**inputs)

print(frame_batch_embeddings.shape) 

torch.Size([8, 768])


In [16]:
float32_embeddings = frame_batch_embeddings.cpu().numpy().astype(np.float32)

In [17]:
from bson.binary import Binary
from bson.binary import BinaryVectorDtype

# Generate BSON vector using `BinaryVectorDtype`
def generate_bson_vector(vector, vector_dtype):
    return Binary.from_vector(vector, vector_dtype)

bson_float32_embeddings = []
for f32_emb in float32_embeddings:
    bson_float32_embeddings.append(generate_bson_vector(f32_emb, BinaryVectorDtype.FLOAT32))

## MongoDB Connection


In [19]:
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get MongoDB URI
MONGODB_URI = os.getenv("MONGODB_URI")

client = MongoClient(MONGODB_URI)
print("Connected to MongoDB successfully!")

# # List all databases
databases = client.list_database_names()
print("Databases:")
for db in databases:
    print(db)

# Access the video_analysis database
db = client['video_analysis']

# List all collections in the video_analysis database
print()
collections = db.list_collection_names()
print("Collections in video_analysis database:")
for collection in collections:
    print(collection)

Connected to MongoDB successfully!
Databases:
admin
config
local
video_analysis

Collections in video_analysis database:
frames


# Store Frame Data in DB


In [21]:
# Define the collection
frames_collection = db['frames']

# Insert the first batch of frames into the frames collection
docs = []
for frame_index, seek_time, embedding in zip(frame_indexes, seek_times, bson_float32_embeddings):
    doc = {
        "frame_index": frame_index,
        "seek_time": seek_time,
        "embedding": embedding,
    }
    docs.append(doc)

frames_collection.insert_many(docs)

# Count the number of documents in the frames collection
num_frames = frames_collection.count_documents({})
print("Number of Frames in Collection:", num_frames)


Number of Frames in Collection: 16
