In [1]:
import os
import cv2
import shutil
import random
import numpy as np
from tqdm import tqdm
from pathlib import Path


EVENT_CLIPS_DIR = "../Dataset/dataset_2/Event_clips"
OUTPUT_DIR = "../Dataset/dataset_2/split_data_2"  
AUGMENTED_DIR = "../Dataset/dataset_2/augmented_event_clips"

In [2]:
event_counts = {}

for event in os.listdir(EVENT_CLIPS_DIR):
    event_folder = os.path.join(EVENT_CLIPS_DIR, event)
    if os.path.isdir(event_folder):
        num_clips = len([f for f in os.listdir(event_folder) if f.endswith(".mp4")])
        event_counts[event] = num_clips

# Print counts
for event, count in event_counts.items():
    print(f"{event}: {count} clips")


Ball out of play: 275 clips
Clearance: 247 clips
Corner: 231 clips
Direct free-kick: 315 clips
Foul: 264 clips
Goal: 333 clips
Indirect free-kick: 259 clips
Kick-off: 352 clips
Offside: 267 clips
Shots off target: 348 clips
Shots on target: 357 clips
Substitution: 337 clips
Throw-in: 267 clips
Yellow card: 364 clips


In [5]:
os.makedirs(AUGMENTED_DIR, exist_ok=True)

mild_aug_events = {"Direct free-kick", "Goal", "Kick-off", "Shots off target", "Shots on target", "Substitution", "Yellow card"}

def flip_video(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        flipped = cv2.flip(frame, 1)  
        out.write(flipped)

    cap.release()
    out.release()


def add_gaussian_noise(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        noise = np.random.normal(0, 25, frame.shape).astype(np.uint8)
        noisy_frame = cv2.add(frame, noise)
        out.write(noisy_frame)

    cap.release()
    out.release()


def change_brightness(video_path, output_path, factor=1.2):
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        brightened = np.clip(frame * factor, 0, 255).astype(np.uint8)
        out.write(brightened)

    cap.release()
    out.release()


# Apply augmentation
def augment_videos(event_folder, event_name, intensity):
    event_path = os.path.join(EVENT_CLIPS_DIR, event_folder)
    output_path = os.path.join(AUGMENTED_DIR, event_folder)
    os.makedirs(output_path, exist_ok=True)

    video_files = [f for f in os.listdir(event_path) if f.endswith('.mp4')]
    num_original = len(video_files)
    num_needed = 2 * num_original if intensity == "high" else int(1.5 * num_original)

    for _ in tqdm(range(num_needed - num_original), desc=f"Augmenting {event_name}"):
        video_file = random.choice(video_files)
        input_path = os.path.join(event_path, video_file)
        output_file = f"aug_{random.randint(1000, 9999)}_{video_file}"
        output_path_file = os.path.join(output_path, output_file)

        aug_type = random.choice(["flip", "noise", "brightness"])
        if aug_type == "flip":
            flip_video(input_path, output_path_file)
        elif aug_type == "noise":
            add_gaussian_noise(input_path, output_path_file)
        elif aug_type == "brightness":
            change_brightness(input_path, output_path_file, factor=random.uniform(0.8, 1.2))


# Perform augmentation
for event in os.listdir(EVENT_CLIPS_DIR): 
    if event in mild_aug_events:
        augment_videos(event, event, "mild")
    # elif event in mild_aug_events:
    #     augment_videos(event, event, "mild")

print("✅ Data augmentation complete.")


Augmenting Direct free-kick:   0%|          | 0/105 [00:00<?, ?it/s]

Augmenting Direct free-kick: 100%|██████████| 105/105 [14:06<00:00,  8.06s/it]
Augmenting Goal: 100%|██████████| 111/111 [15:33<00:00,  8.41s/it]
Augmenting Kick-off: 100%|██████████| 117/117 [16:01<00:00,  8.22s/it]
Augmenting Shots off target: 100%|██████████| 116/116 [16:12<00:00,  8.39s/it]
Augmenting Shots on target: 100%|██████████| 119/119 [13:55<00:00,  7.02s/it]
Augmenting Substitution: 100%|██████████| 112/112 [13:46<00:00,  7.38s/it]
Augmenting Yellow card: 100%|██████████| 121/121 [15:29<00:00,  7.68s/it]

✅ Data augmentation complete.





In [2]:
# Merge augmented clips into original event clips directory
for event in os.listdir(AUGMENTED_DIR):
    event_aug_path = os.path.join(AUGMENTED_DIR, event)
    event_orig_path = os.path.join(EVENT_CLIPS_DIR, event)

    if not os.path.exists(event_orig_path):
        os.makedirs(event_orig_path)

    for file in os.listdir(event_aug_path):
        src = os.path.join(event_aug_path, file)
        dest = os.path.join(event_orig_path, file)
        shutil.move(src, dest)  # Moves files instead of copying

print("✅ Augmented clips successfully merged into Event_clips.")


✅ Augmented clips successfully merged into Event_clips.


In [3]:
# Create Train, Validation, and Test Folders
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(OUTPUT_DIR, split), exist_ok=True)

# Define Splitting Ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Iterate over each event folder
for event in os.listdir(EVENT_CLIPS_DIR):
    if event == 'Ball out of play' or event == 'Clearance':
        continue
    event_path = os.path.join(EVENT_CLIPS_DIR, event)
    if not os.path.isdir(event_path):
        continue  # Skip non-folder files

    clips = list(Path(event_path).glob("*.mp4"))  
    random.shuffle(clips)  

    # Compute split indices
    total_clips = len(clips)
    train_count = int(total_clips * train_ratio)
    val_count = int(total_clips * val_ratio)

    train_clips = clips[:train_count]
    val_clips = clips[train_count:train_count + val_count]
    test_clips = clips[train_count + val_count:]

    # Move files to respective folders
    for split, clips_set in zip(["train", "val", "test"], [train_clips, val_clips, test_clips]):
        event_split_dir = os.path.join(OUTPUT_DIR, split, event)
        os.makedirs(event_split_dir, exist_ok=True)  
        
        for clip in clips_set:
            shutil.copy(clip, event_split_dir)  

    print(f" {event}: Train={len(train_clips)}, Val={len(val_clips)}, Test={len(test_clips)}")

print("\n Data successfully split into train/val/test!")


 Corner: Train=161, Val=34, Test=36
 Direct free-kick: Train=220, Val=47, Test=48
 Foul: Train=184, Val=39, Test=41
 Goal: Train=233, Val=49, Test=51
 Indirect free-kick: Train=181, Val=38, Test=40
 Kick-off: Train=246, Val=52, Test=54
 Offside: Train=186, Val=40, Test=41
 Shots off target: Train=243, Val=52, Test=53
 Shots on target: Train=249, Val=53, Test=55
 Substitution: Train=235, Val=50, Test=52
 Throw-in: Train=186, Val=40, Test=41
 Yellow card: Train=254, Val=54, Test=56

 Data successfully split into train/val/test!
