In [None]:
pip install opencv-python gitpython

import os
import cv2
import numpy as np
from git import Repo

def clone_github_repo(repo_url, clone_dir='./github_videos'):
    if not os.path.exists(clone_dir):
        print(f"Cloning repo from {repo_url} into {clone_dir}")
        Repo.clone_from(repo_url, clone_dir)
    else:
        print(f"Repo already cloned at {clone_dir}")

def extract_frames_from_video(video_path, num_frames=20, resize_dim=(224, 224)):
    """
    Extracts fixed number of frames uniformly from a video.
    Returns numpy array shape = (num_frames, height, width, 3)
    """
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)

    frames = []
    idx_set = set(frame_indices)
    current_frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if current_frame_idx in idx_set:
            frame = cv2.resize(frame, resize_dim)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            frames.append(frame)
        current_frame_idx += 1

    cap.release()

    if len(frames) < num_frames:
        # Padding with last frame if video shorter than num_frames
        while len(frames) < num_frames:
            frames.append(frames[-1])

    return np.array(frames, dtype=np.float32) / 255.0  # Normalize frames

def load_videos_from_repo(clone_dir, num_frames=20):
    """
    Scan all video files in clone_dir, extract frames and return X array.
    """
    video_extensions = ('.mp4', '.avi', '.mov', '.mkv')
    video_files = []
    for root, dirs, files in os.walk(clone_dir):
        for file in files:
            if file.lower().endswith(video_extensions):
                video_files.append(os.path.join(root, file))

    print(f"Found {len(video_files)} videos in {clone_dir}")

    X = []
    for video_path in video_files:
        frames = extract_frames_from_video(video_path, num_frames)
        X.append(frames)

    X = np.array(X)
    return X, video_files  # return file list for labels mapping or reference


if __name__ == "__main__":
    repo_url = "https://github.com/yourusername/yourvideorepo.git"
    clone_dir = "./github_videos"

    # Clone repo with videos
    clone_github_repo(repo_url, clone_dir)

    # Extract frames from all videos in repo
    X_videos, video_files = load_videos_from_repo(clone_dir, num_frames=20)
    print("Extracted frames from videos:", X_videos.shape)  # (num_videos, 20, 224, 224, 3)

    # TODO: load labels for videos based on your dataset organization
    # For demonstration, we generate dummy labels
    num_classes = 5
    y_dummy = np.random.randint(0, num_classes, size=len(X_videos))
    y_one_hot = tf.keras.utils.to_categorical(y_dummy, num_classes)

    # Build and train your BiMT model with these video frames and labels
    model = build_BiMT_model(input_shape=(20, 224, 224, 3), num_classes=num_classes)
    model.compile(optimizer='adam',
                  loss=CategoricalFocalLoss(gamma=2.0, alpha=0.25),
                  metrics=['accuracy'])

    model.fit(X_videos, y_one_hot, epochs=3, batch_size=4)

    # Evaluate & predict with your test data here...

