In [1]:
from pathlib import Path
import os
import numpy as np
import pandas as pd

mids_dir = Path("D:\\MIDS-W207")
data = mids_dir/"datasets/soccertrack_square"
project = mids_dir/"MIDS-W207-Spring24-Soccer-Detection"
analysis = project/"analysis"

# Author: Timothy Majidzadeh
# Date Created: April 4, 2024
# Date Updated: April 4, 2024
# Description: Import and append the labels created when making the soccertrack_square dataset.
# Notes: [v1] Created program.
# Inputs: Frame-by-frame labels saved as separate text files.
# Outputs: An appended Pandas DataFrame with all of the labels and associated filepaths.

In [2]:
all_saved_images = [f for root,dirs,files in os.walk(data/"images") for f in files]

In [3]:
def read_image_labels(image_filename, data_dir=data):
    '''
    Reads text files with the labels as Pandas DataFrames.
    Ultralytics requires blank text file for images that have no objects in them.
    Keep a row of null values for those images when creating a table of stacked labels.
    Inputs:
        image_filename: A string with the filename of an image.
        data_dir: A PathLib Path pointing to the soccertrack_square directory.
    Returns:
        output_df: A Pandas DataFrame with the image name, text file name, and bounding box(es).
        If there are no bounding boxes, returns a row with null values.
    '''
    label_filename = image_filename.replace(".png", ".txt")
    label_filepath = data/"labels"/label_filename
    output_df = pd.read_csv(label_filepath, sep = ' ', header = None, names=[
        "class", "bb_xcenter", "bb_ycenter", "bb_width", "bb_height"
    ])
    if output_df.shape[0] == 0:
        output_df = pd.DataFrame(
            [["Empty", np.nan, np.nan, np.nan, np.nan]],
            columns = ["class", "bb_xcenter", "bb_ycenter", "bb_width", "bb_height"]
        )
    output_df['image_name'] = image_filename
    return output_df[['image_name', 'class', 'bb_xcenter', 'bb_ycenter', 'bb_width', 'bb_height']]

In [4]:
stacked_labels = pd.concat([read_image_labels(image) for image in all_saved_images]).reset_index().drop(columns='index')

In [5]:
pivot_table = stacked_labels.groupby(['image_name', 'class']).count()[['bb_xcenter']].rename(columns={'bb_xcenter':'count'})

In [6]:
pivot_table = pivot_table.reset_index().pivot(index='image_name', columns='class', values='count')

In [7]:
pivot_table = pivot_table.fillna(0).drop(columns='Empty').rename(columns={0: 'img_ball_count', 1: 'img_team_0_count', 2: 'img_team_1_count'}).reset_index()
pivot_table[['img_ball_count', 'img_team_0_count', 'img_team_1_count']] = pivot_table[['img_ball_count', 'img_team_0_count', 'img_team_1_count']].astype(int)

In [8]:
stacked_labels = stacked_labels.merge(pivot_table, how='left', on='image_name')

In [9]:
pivot_table.to_csv(data/'count_objects_per_image.csv', index=False)
pivot_table.to_pickle(data/'count_objects_per_image.pkl')
stacked_labels.to_csv(data/'stacked_labels.csv', index=False)
stacked_labels.to_pickle(data/'stacked_labels.pkl')

In [10]:
stacked_labels

Unnamed: 0,image_name,class,bb_xcenter,bb_ycenter,bb_width,bb_height,img_ball_count,img_team_0_count,img_team_1_count
0,top_view_0.png,2,0.006078,0.075065,0.012157,0.04875,0,0,2
1,top_view_0.png,2,0.625282,0.371315,0.048750,0.04875,0,0,2
2,top_view_1.png,Empty,,,,,0,0,0
3,top_view_10.png,1,0.072482,0.216496,0.048750,0.04875,0,3,7
4,top_view_10.png,1,0.068197,0.546912,0.048750,0.04875,0,3,7
...,...,...,...,...,...,...,...,...,...
362253,wide_view_9998.png,1,0.437647,0.609750,0.046250,0.07875,0,5,2
362254,wide_view_9998.png,1,0.403272,0.655375,0.043750,0.08000,0,5,2
362255,wide_view_9998.png,2,0.323897,0.533500,0.038750,0.05875,0,5,2
362256,wide_view_9998.png,2,0.344522,0.564125,0.042500,0.07000,0,5,2
