In [13]:
import os
import cv2
import shutil
import random
import numpy as np
from tqdm import tqdm
from pathlib import Path


EVENT_CLIPS_DIR = "../Dataset/Event_clips"
OUTPUT_DIR = "../Dataset/split_data"  
AUGMENTED_DIR = "../Dataset/augmented_event_clips"

In [6]:
event_counts = {}

for event in os.listdir(EVENT_CLIPS_DIR):
    event_folder = os.path.join(EVENT_CLIPS_DIR, event)
    if os.path.isdir(event_folder):
        num_clips = len([f for f in os.listdir(event_folder) if f.endswith(".mp4")])
        event_counts[event] = num_clips

# Print counts
for event, count in event_counts.items():
    print(f"{event}: {count} clips")


Ball out of play: 275 clips
Clearance: 247 clips
Corner: 231 clips
Direct free-kick: 210 clips
Foul: 264 clips
Goal: 148 clips
Indirect free-kick: 259 clips
Kick-off: 157 clips
Offside: 178 clips
Penalty: 25 clips
Red card: 8 clips
Shots off target: 232 clips
Shots on target: 238 clips
Substitution: 150 clips
Throw-in: 267 clips
Yellow card: 162 clips
Yellow-_red card: 8 clips


In [14]:
os.makedirs(AUGMENTED_DIR, exist_ok=True)

# Events for augmentation
high_aug_events = {"Penalty", "Red card", "Yellow-_red card"}  # Critically low clips
mild_aug_events = {"Goal", "Kick-off", "Offside", "Substitution", "Yellow card"}  # Moderately low clips

# OpenCV-based augmentations
def flip_video(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        flipped = cv2.flip(frame, 1)  # Flip horizontally
        out.write(flipped)

    cap.release()
    out.release()


def add_gaussian_noise(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        noise = np.random.normal(0, 25, frame.shape).astype(np.uint8)
        noisy_frame = cv2.add(frame, noise)
        out.write(noisy_frame)

    cap.release()
    out.release()


def change_brightness(video_path, output_path, factor=1.2):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        brightened = np.clip(frame * factor, 0, 255).astype(np.uint8)
        out.write(brightened)

    cap.release()
    out.release()


# Apply augmentation
def augment_videos(event_folder, event_name, intensity):
    event_path = os.path.join(EVENT_CLIPS_DIR, event_folder)
    output_path = os.path.join(AUGMENTED_DIR, event_folder)
    os.makedirs(output_path, exist_ok=True)

    video_files = [f for f in os.listdir(event_path) if f.endswith('.mp4')]
    num_original = len(video_files)
    num_needed = 2 * num_original if intensity == "high" else int(1.5 * num_original)

    for _ in tqdm(range(num_needed - num_original), desc=f"Augmenting {event_name}"):
        video_file = random.choice(video_files)
        input_path = os.path.join(event_path, video_file)
        output_file = f"aug_{random.randint(1000, 9999)}_{video_file}"
        output_path_file = os.path.join(output_path, output_file)

        aug_type = random.choice(["flip", "noise", "brightness"])
        if aug_type == "flip":
            flip_video(input_path, output_path_file)
        elif aug_type == "noise":
            add_gaussian_noise(input_path, output_path_file)
        elif aug_type == "brightness":
            change_brightness(input_path, output_path_file, factor=random.uniform(0.8, 1.2))


# Perform augmentation
for event in os.listdir(EVENT_CLIPS_DIR):
    if event in high_aug_events:
        augment_videos(event, event, "high")
    elif event in mild_aug_events:
        augment_videos(event, event, "mild")

print("✅ Data augmentation complete.")


Augmenting Goal: 100%|██████████| 74/74 [07:16<00:00,  5.90s/it]
Augmenting Kick-off: 100%|██████████| 78/78 [08:16<00:00,  6.37s/it]
Augmenting Offside: 100%|██████████| 89/89 [09:39<00:00,  6.51s/it]
Augmenting Penalty: 100%|██████████| 25/25 [11:37<00:00, 27.91s/it] 
Augmenting Red card: 100%|██████████| 8/8 [00:53<00:00,  6.71s/it]
Augmenting Substitution: 100%|██████████| 75/75 [07:26<00:00,  5.95s/it]
Augmenting Yellow card: 100%|██████████| 81/81 [06:35<00:00,  4.88s/it]
Augmenting Yellow-_red card: 100%|██████████| 8/8 [00:31<00:00,  3.98s/it]

✅ Data augmentation complete.





In [15]:
# Merge augmented clips into original event clips directory
for event in os.listdir(AUGMENTED_DIR):
    event_aug_path = os.path.join(AUGMENTED_DIR, event)
    event_orig_path = os.path.join(EVENT_CLIPS_DIR, event)

    if not os.path.exists(event_orig_path):
        os.makedirs(event_orig_path)

    for file in os.listdir(event_aug_path):
        src = os.path.join(event_aug_path, file)
        dest = os.path.join(event_orig_path, file)
        shutil.move(src, dest)  # Moves files instead of copying

print("✅ Augmented clips successfully merged into Event_clips.")


✅ Augmented clips successfully merged into Event_clips.


In [16]:
# Create Train, Validation, and Test Folders
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(OUTPUT_DIR, split), exist_ok=True)

# Define Splitting Ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Iterate over each event folder
for event in os.listdir(EVENT_CLIPS_DIR):
    event_path = os.path.join(EVENT_CLIPS_DIR, event)
    if not os.path.isdir(event_path):
        continue  # Skip non-folder files

    clips = list(Path(event_path).glob("*.mp4"))  # Get all video clips
    random.shuffle(clips)  # Shuffle clips randomly

    # Compute split indices
    total_clips = len(clips)
    train_count = int(total_clips * train_ratio)
    val_count = int(total_clips * val_ratio)

    train_clips = clips[:train_count]
    val_clips = clips[train_count:train_count + val_count]
    test_clips = clips[train_count + val_count:]

    # Move files to respective folders
    for split, clips_set in zip(["train", "val", "test"], [train_clips, val_clips, test_clips]):
        event_split_dir = os.path.join(OUTPUT_DIR, split, event)
        os.makedirs(event_split_dir, exist_ok=True)  # Create event subfolder
        
        for clip in clips_set:
            shutil.copy(clip, event_split_dir)  # Copy clip to split folder

    print(f"📂 {event}: Train={len(train_clips)}, Val={len(val_clips)}, Test={len(test_clips)}")

print("\n✅ Data successfully split into train/val/test!")


📂 Ball out of play: Train=192, Val=41, Test=42
📂 Clearance: Train=172, Val=37, Test=38
📂 Corner: Train=161, Val=34, Test=36
📂 Direct free-kick: Train=147, Val=31, Test=32
📂 Foul: Train=184, Val=39, Test=41
📂 Goal: Train=155, Val=33, Test=34
📂 Indirect free-kick: Train=181, Val=38, Test=40
📂 Kick-off: Train=164, Val=35, Test=36
📂 Offside: Train=186, Val=40, Test=41
📂 Penalty: Train=35, Val=7, Test=8
📂 Red card: Train=11, Val=2, Test=3
📂 Shots off target: Train=162, Val=34, Test=36
📂 Shots on target: Train=166, Val=35, Test=37
📂 Substitution: Train=157, Val=33, Test=35
📂 Throw-in: Train=186, Val=40, Test=41
📂 Yellow card: Train=170, Val=36, Test=37
📂 Yellow-_red card: Train=11, Val=2, Test=3

✅ Data successfully split into train/val/test!
