In [None]:
!pip install opencv-python # Only run once

In [1]:
# SETUP
import cv2
import os
import numpy as np
import csv
import pandas as pd

if cv2 and os:
    print('success')

success


In [22]:
# CREATE CLIPS
# Inputs
video_path = 'kai_video_2.mp4'
output_dir = 'raw_clips'
current_clip = 1400                          # name of clip at which to continue labeling, continue clip count from last batch,will determine clip names
desired_additional_clips = 1000              # number of new clips we want to make
start_time = 60                              # (in seconds) at what time in the video to start creating clips from
target_clip = current_clip + desired_additional_clips

In [None]:
os.makedirs(output_dir, exist_ok=True)
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
clip_length = 1
frame_count = 0
cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)

# Loop to extract clips
while cap.isOpened() and current_clip < target_clip:
    frames = []
    # Extract frames for each clip
    for _ in range(int(fps * clip_length)):  
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    # Create clip file from captured frames
    if frames:  # Check if any frames were captured
        clip_path = os.path.join(output_dir, f"clip_{current_clip:04d}.mp4")
        height, width, _ = frames[0].shape  # Get frame dimensions
        out = cv2.VideoWriter(
            clip_path, 
            cv2.VideoWriter_fourcc(*'mp4v'), 
            fps, 
            (width, height)
        )
        for frame in frames:
            out.write(frame)
        out.release()  # Release the writer
        current_clip += 1

    if not ret:
        break

cap.release()
print(f"Extracted {desired_additional_clips} new clips from {video_path}")

In [2]:
# LABEL CLIPS
# Inputs
clip_dir = "raw_clips"
output_csv = "labels.csv"

In [None]:
# Load existing labels into a set to avoid re-labeling
labeled_clips = set()
if os.path.exists(output_csv):
    with open(output_csv, mode='r') as file:
        reader = csv.reader(file)
        try:
            next(reader)  # Skip header row
        except StopIteration:
            pass  # CSV is empty, no header to skip
        for row in reader:
            labeled_clips.add(row[0])  # Add file_name to the set

# Open the CSV file in append mode to add new labels
with open(output_csv, mode='a', newline='') as file:
    writer = csv.writer(file)

    # Sort the clips in alphabetical/numerical order for consistency
    clip_names = sorted(os.listdir(clip_dir))

    # Loop through each clip, skipping already labeled ones
    for clip_name in clip_names:
        if clip_name in labeled_clips:
            continue  # Skip already labeled clips

        clip_path = os.path.join(clip_dir, clip_name)

        # Open and play each clip for labeling
        cap = cv2.VideoCapture(clip_path)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            cv2.imshow("Clip", frame)
            if cv2.waitKey(30) & 0xFF == ord('q'):  # Press 'q' to move to the next clip
                break

        cap.release()
        cv2.destroyAllWindows()

        # Ask for label input (type "pass" or "dribble")
        label = input(f"Enter label for {clip_name} (pass/dribble): ").strip().lower()

        # Write the file name and label to the CSV
        writer.writerow([clip_name, label])

print("Labeling complete. New labels added to ", output_csv)

In [21]:
# ANALYZE CLIP LABELS
df = pd.read_csv(output_csv)

print( df['label'].unique() )                                 # print label values
print(df.shape)                                               # print raw shape
# print( df.head() )                                          # preview data
df['label'] = df['label'].astype(str).str.strip().str.lower() # normalize data, convert to string, lowercase etc
df = df.drop(df[df['label'] == '2'].index)                    # prune invalid/void pass/dribble scenarios
print( df.shape )                                             # print pruned df shape

[2 0 1]
(2155, 2)
(999, 2)
