In [1]:
import pandas as pd
from pathlib import Path
import os
mids_dir = Path("D:\\MIDS-W207")
data = mids_dir/"datasets/soccertrack"
project = mids_dir/"MIDS-W207-Spring24-Soccer-Detection"
analysis = project/"analysis"

# Author: Timothy Majidzadeh
# Date Created: March 7, 2024
# Date Updated: March 12, 2024
# Description: Develop the baseline YOLOv5 model data.
# Notes: [v1] Created program.
# Inputs: Frame-by-frame image data & labels.
# Outputs: The data prepared for use in YOLOv5 using Ultralytics.

In [2]:
top_view_labels = pd.read_pickle(data/"labels/top_view_labels_stacked/top_view_labels.pkl")
wide_view_labels = pd.read_pickle(data/"labels/wide_view_labels_stacked/wide_view_labels.pkl")

In [3]:

def format_labels_to_ultralytics(
    input_df, output_dir=data/"labels", input_type='top_view', 
    input_height=2160, input_width=3840, class_dict={'ball': 0, 'team_0': 1, 'team_1': 2}
):
    """
    Output the bounding boxes as text files in YOLOv5 Ultralytics Format.
    Inputs:
        input_df: A DataFrame of bounding boxes with corresponding images.
        output_dir: A PathLib Path which points to the directory where labels should output.
        input_type: 'top_view' or 'wide_view'
        input_height: The height of the images, as an integer in pixels.
        input_width: The width of the images, as an integer in pixels.
        class dict: A dictionary of classes to include and associated ids. Classes are strings appearing in input_df
                    and ids are an integer index starting from 0.
    Outputs:
        Saves a .txt file for every image with the path bounding boxes in YOLOv5 desired format.
    """
    if ((input_type != 'top_view') & (input_type != 'wide_view')):
        raise Exception("Please set input_type to 'top_view' or 'wide_view'.")
    
    input_df = input_df.copy().reset_index()
    # For each vidname, ensure the directory exists.
    for i in input_df.index:
        vidname, frame = input_df.loc[i, 'vidname'][0].strip(), str(input_df.loc[i, 'frame'][0]).strip()
        output_path = output_dir/"{}/{}/{}_{}.txt".format(input_type, vidname, vidname, frame)
        if not os.path.exists(output_dir/"{}/{}".format(input_type, vidname)):
            os.makedirs(output_dir/"{}/{}".format(input_type, vidname))
        # for each frame, save the relevant classes & bounding boxes to a text file.
        with open(output_path, 'w') as f:
            i = 0
            for class_name in class_dict.keys():
                class_index = str(class_dict[class_name])
                for instance in input_df[class_name].columns.get_level_values(0).unique():
                    left = input_df[class_name][instance]['bb_left'][0]
                    top = input_df[class_name][instance]['bb_top'][0]
                    width = input_df[class_name][instance]['bb_width'][0]
                    height = input_df[class_name][instance]['bb_height'][0]

                    left, width = left / input_width, width / input_width
                    top, height = top / input_height, height / input_height

                    xcenter, ycenter = left + width / 2, top + height / 2
                    
                    if i == 0:
                        to_write = " ".join((str(class_index), str(xcenter), str(ycenter), str(width), str(height)))
                    else:
                        to_write += "\n" + " ".join((str(class_index), str(xcenter), str(ycenter), str(width), str(height)))
                    i += 1
            f.write(to_write)
            f.close()


In [None]:
format_labels_to_ultralytics(top_view_labels)
format_labels_to_ultralytics(wide_view_labels, input_type='wide_view', input_height=1000, input_width=6500)

  vidname, frame = input_df.loc[i, 'vidname'][0].strip(), str(input_df.loc[i, 'frame'][0]).strip()


In [None]:

def train_val_test_paths(image_paths, set_type, vrsn_num, output_dir=data):
    """
    Create text files which give Ultralytics the paths for the train, val, and test images.
    Inputs:
        train, val, test: Numpy arrays or lists which are the absolute filepaths.
        set_type: 'train', 'val', or 'test', based on the input type.
        output_dir: A PathLib Path object.
    Outputs:
        Saves .txt files with paths to the selected train, val, and test sets.
    """
    output_dir_str = str(output_dir).replace("\\", "/")
    image_paths = list(image_paths.copy().str.replace("\\", "/").str.replace(str(data).replace("\\","/"), "."))
    with open(output_dir/"{}_v{}.txt".format(set_type, str(vrsn_num)), 'w') as f:
        i = 0
        for image_path in image_paths:
            if i == 0:
                f.write(image_path)
            else:
                f.write("\n"+image_path)
            i+=1
        f.close()

In [None]:
stacked_labels = pd.concat(top_view_labels, wide_view_labels)
stacked_labels

In [6]:
train_paths, val_paths, test_paths = top_view_labels[:70]['frame_imgpath'], top_view_labels[70:85]['frame_imgpath'], top_view_labels[85:100]['frame_imgpath']

In [7]:
train_val_test_paths(train_paths, set_type='train', vrsn_num=1)
train_val_test_paths(val_paths, set_type='val', vrsn_num=1)
train_val_test_paths(test_paths, set_type='test', vrsn_num=1)