This notebook converts point-based labels into the YOLO bounding box format to facilitate object detection training. 
The process involves:

1. **Input Files:**
   - Videos (`.mp4`) containing frames of the Hexbug objects.
   - Corresponding CSV files (`.csv`) containing labeled points (head positions) for each frame.

2. **Transformation:**
   - A predefined bounding box size is applied around each labeled point.
   - Adjustments ensure bounding boxes stay within frame boundaries.
   - Each bounding box is converted to the YOLO format (`<class-id> <x-center> <y-center> <width> <height>`).

3. **Output:**
   - Extracted frames are saved as images in the `train` and `val` directories for training and validation datasets.
   - Generated YOLO label files for each image.

4. **Split:**
   - The dataset is split into training (80%) and validation (20%) sets.

This approach is a best-effort transformation to bounding boxes from point-based labels. While not entirely accurate, it ensures that the data is usable for object detection tasks.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split

### Create Lists with mp4 and csv filepaths

In [None]:
folder_path = "../Training Data/"

mp4_files = []
csv_files = []

# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file starts with "training" and ends with ".mp4"
    if filename.startswith("training") and filename.endswith(".mp4"):
        mp4_file = os.path.join(folder_path, filename)
        
        # Assuming the corresponding CSV file has the same name but with ".csv" extension
        csv_file = os.path.splitext(mp4_file)[0] + ".csv"
        
        mp4_files.append(mp4_file)
        csv_files.append(csv_file)

        # Example print for demonstration
        print("Processing video:", mp4_file)
        print("Corresponding CSV file:", csv_file)

### Create Labels and structure for yolo


Training

In [None]:
# Split the data into training and validation sets
video_train, video_val, csv_train, csv_val = train_test_split(mp4_files, csv_files, test_size=0.2, random_state=42)

BOUNDING_BOX_SIZE = 25
# Create the train and val folders
# <class-index> <x> <y> <width> <height>
for mp4_file, csv_file in zip(video_train, csv_train):
    #if not mp4_file.endswith("training03.mp4"):
    #    continue
    print(mp4_file)
    # Load the CSV file
    df = pd.read_csv(csv_file)
    
    # Load the video file
    cap = cv2.VideoCapture(mp4_file) #("path-to-your-video.mp4")
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fc = 0
    ret = True
    # Loop through each frame
    while (fc < frameCount and ret):
        #print(f"{mp4_file} - {fc}")
        objects_list = []
        # Take frame
        ret, buf = cap.read()
        
        # Check if ret is False (no frame captured), break the loop
        if not ret:
            break
        
        # Convert to RGB
        im_rgb = buf #cv2.cvtColor(buf, cv2.COLOR_BGR2RGB)
        # make a copy of the image to draw bounding box
        im_rgb_bb = im_rgb.copy()

        frameHeight,frameWidth,channels = buf.shape
        try:
            # Add bounding box
            filtered_df = df[df['t'] == fc]
            # Iterate over the hexbugs in the frame and get the 'x' and 'y' values
            for index, row in filtered_df.iterrows():
                x = int(row['x'])
                y = int(row['y'])
                #print(f"x: {x}, y: {y}")

                # Labeled Point may be outside of the frame? -> fix x and y with max + min
                x = min(max(x, 0 + BOUNDING_BOX_SIZE), frameWidth - BOUNDING_BOX_SIZE)
                y = min(max(y, 0 + BOUNDING_BOX_SIZE), frameHeight - BOUNDING_BOX_SIZE)
                #print(f"new x: {x}, new y: {y}")

                # draw a bounding box
                y1 = max(y - BOUNDING_BOX_SIZE, 0)
                y2 = min(y + BOUNDING_BOX_SIZE, frameHeight)
                x1 = max(x - BOUNDING_BOX_SIZE, 0)
                x2 = min(x + BOUNDING_BOX_SIZE, frameWidth)
                #print(f"x1: {x1}, y1: {y1}, x2: {x2}, y2: {y2}")
                #cv2.rectangle(im_rgb_bb, (x1, y1), (x2, y2), color=(255,0,0), thickness=5)  # Draw a bounding box

                # ratios for yolo format
                x_ratio = round(x / frameWidth, 8)
                y_ratio = round(y / frameHeight, 8)
                width_ratio = round((x2 - x1) / frameWidth, 8)
                height_ratio = round((y2 - y1) / frameHeight, 8)
                #print(f"{[0, round(x_ratio,8), round(y_ratio,8), round(width_ratio,8), round(height_ratio,8)]}")

                objects_list.append([0, x_ratio, y_ratio, width_ratio, height_ratio])
        except:    
            print("No bounding box found in frame", fc)
        
        # save normal img
        video_name = os.path.splitext(mp4_file)[0].split("/")[2]
        # Save frame in train folder
        img_file = "../dataset/images/train/" + video_name + "_" + str(fc) + ".jpg"
        plt.imsave(img_file, im_rgb)

        # save bounding box img
        #file = "../boundingbox/" + os.path.splitext(mp4_file)[0].split("/")[2] + "_" + str(fc) + ".png"  
        #plt.imsave(file, im_rgb_bb) 

        # Save label in txt file
        txt_file = "../dataset/labels/train/" + video_name + "_" + str(fc) + ".txt"
        with open(txt_file, 'w') as f:
            for obj in objects_list:
                f.write(" ".join(str(e) for e in obj) + "\n")
        
        # Increase frame count
        fc += 1
        #break
    #break

Val

In [None]:
# Split the data into training and validation sets
video_train, video_val, csv_train, csv_val = train_test_split(mp4_files, csv_files, test_size=0.2, random_state=42)

BOUNDING_BOX_SIZE = 25
# Create the train and val folders
# <class-index> <x> <y> <width> <height>
for mp4_file, csv_file in zip(video_val, csv_val):
    #if not mp4_file.endswith("training03.mp4"):
    #    continue
    print(mp4_file)
    # Load the CSV file
    df = pd.read_csv(csv_file)
    
    # Load the video file
    cap = cv2.VideoCapture(mp4_file) #("path-to-your-video.mp4")
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fc = 0
    ret = True
    # Loop through each frame
    while (fc < frameCount and ret):
        #print(f"{mp4_file} - {fc}")
        objects_list = []
        # Take frame
        ret, buf = cap.read()
        
        # Check if ret is False (no frame captured), break the loop
        if not ret:
            break
        
        # Convert to RGB
        im_rgb = buf #cv2.cvtColor(buf, cv2.COLOR_BGR2RGB)
        # make a copy of the image to draw bounding box
        im_rgb_bb = im_rgb.copy()

        frameHeight,frameWidth,channels = buf.shape
        try:
            # Add bounding box
            filtered_df = df[df['t'] == fc]
            # Iterate over the hexbugs in the frame and get the 'x' and 'y' values
            for index, row in filtered_df.iterrows():
                x = int(row['x'])
                y = int(row['y'])
                #print(f"x: {x}, y: {y}")

                # Labeled Point may be outside of the frame? -> fix x and y with max + min
                x = min(max(x, 0 + BOUNDING_BOX_SIZE), frameWidth - BOUNDING_BOX_SIZE)
                y = min(max(y, 0 + BOUNDING_BOX_SIZE), frameHeight - BOUNDING_BOX_SIZE)
                #print(f"new x: {x}, new y: {y}")

                # draw a bounding box
                y1 = max(y - BOUNDING_BOX_SIZE, 0)
                y2 = min(y + BOUNDING_BOX_SIZE, frameHeight)
                x1 = max(x - BOUNDING_BOX_SIZE, 0)
                x2 = min(x + BOUNDING_BOX_SIZE, frameWidth)
                #print(f"x1: {x1}, y1: {y1}, x2: {x2}, y2: {y2}")
                #cv2.rectangle(im_rgb_bb, (x1, y1), (x2, y2), color=(255,0,0), thickness=5)  # Draw a bounding box

                # ratios for yolo format
                x_ratio = round(x / frameWidth, 8)
                y_ratio = round(y / frameHeight, 8)
                width_ratio = round((x2 - x1) / frameWidth, 8)
                height_ratio = round((y2 - y1) / frameHeight, 8)
                #print(f"{[0, round(x_ratio,8), round(y_ratio,8), round(width_ratio,8), round(height_ratio,8)]}")

                objects_list.append([0, x_ratio, y_ratio, width_ratio, height_ratio])
        except:    
            print("No bounding box found in frame", fc)
        
        # save normal img
        video_name = os.path.splitext(mp4_file)[0].split("/")[2]
        # Save frame in train folder
        img_file = "../dataset/images/val/" + video_name + "_" + str(fc) + ".jpg"
        plt.imsave(img_file, im_rgb)

        # save bounding box img
        #file = "../boundingbox/" + os.path.splitext(mp4_file)[0].split("/")[2] + "_" + str(fc) + ".png"  
        #plt.imsave(file, im_rgb_bb) 

        # Save label in txt file
        txt_file = "../dataset/labels/val/" + video_name + "_" + str(fc) + ".txt"
        with open(txt_file, 'w') as f:
            for obj in objects_list:
                f.write(" ".join(str(e) for e in obj) + "\n")
        
        # Increase frame count
        fc += 1
        #break
    #break


Leaderboard

In [None]:
folder_path = "../Leaderboarddata/"

mp4_files = []

# Iterate through each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file starts with "training" and ends with ".mp4"
    if filename.endswith(".mp4"):
        mp4_file = os.path.join(folder_path, filename)
                
        mp4_files.append(mp4_file)

        # Example print for demonstration
        print("Processing video:", mp4_file)

In [None]:
# Create the train and val folders
# <class-index> <x> <y> <width> <height>
for mp4_file in mp4_files:
    #if not mp4_file.endswith("training03.mp4"):
    #    continue
    print(mp4_file)

    
    # Load the video file
    cap = cv2.VideoCapture(mp4_file) #("path-to-your-video.mp4")
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fc = 0
    ret = True
    # Loop through each frame
    while (fc < frameCount and ret):
        #print(f"{mp4_file} - {fc}")
        objects_list = []
        # Take frame
        ret, buf = cap.read()
        
        # Check if ret is False (no frame captured), break the loop
        if not ret:
            break
        
        # Convert to RGB
        im_rgb = buf #cv2.cvtColor(buf, cv2.COLOR_BGR2RGB)

        # save normal img
        video_name = os.path.splitext(mp4_file)[0].split("/")[2]
        # Save frame in train folder
        img_file = "../Leaderboarddata/img/" + video_name + "_" + str(fc) + ".jpg"
        plt.imsave(img_file, im_rgb)

        # save bounding box img
        #file = "../boundingbox/" + os.path.splitext(mp4_file)[0].split("/")[2] + "_" + str(fc) + ".png"  
        #plt.imsave(file, im_rgb_bb) 

        # Increase frame count
        fc += 1
        #break
    #break
