In [1]:
from pathlib import Path
import os
import numpy as np
import pandas as pd

import tensorflow.compat.v1 as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from PIL import Image
import keras
import matplotlib
import tensorflow_datasets as tfds

import shutil
from tqdm import tqdm

import warnings
import random
warnings.filterwarnings("ignore", category=UserWarning) #used to supress the tf version warning. 

mids_dir = Path("D:\\MIDS-W207")
data = mids_dir/"datasets"
project = mids_dir/"MIDS-W207-Spring24-Soccer-Detection"
analysis = project/"analysis"

# Author: Timothy Majidzadeh
# Date Created: March 31, 2024
# Date Updated: April 3, 2024
# Description: Create a square dataset from the top-view and wide-view datasets.
# Notes: [v1] Created program.
# [v2] Switched crop method to manual programming (from Keras_CV package). Added fixed random seed.
# Inputs: Frame-by-frame image data & labels.
# Outputs: 800x800 square images & associated bounding boxes.

In [2]:
top_view_height, top_view_width = 2160, 3840
wide_view_height, wide_view_width = 1000, 6500
target_height, target_width = 800, 800

In [3]:
labels_order = [
    ('vidname', '', ''),
    ('frame', '', ''),
    ('frame_imgname', '', ''),
    ('frame_imgpath', '', ''),
    ('frame_saved', '', ''),
    ('ball', 'ball', 'bb_left'),
    ('ball', 'ball', 'bb_top'),
    ('ball', 'ball', 'bb_width'),
    ('ball', 'ball', 'bb_height'),
    ('team_0', 'player_00', 'bb_left'),
    ('team_0', 'player_00', 'bb_top'),
    ('team_0', 'player_00', 'bb_width'),
    ('team_0', 'player_00', 'bb_height'),
    ('team_0', 'player_01', 'bb_left'),
    ('team_0', 'player_01', 'bb_top'),
    ('team_0', 'player_01', 'bb_width'),
    ('team_0', 'player_01', 'bb_height'),
    ('team_0', 'player_02', 'bb_left'),
    ('team_0', 'player_02', 'bb_top'),
    ('team_0', 'player_02', 'bb_width'),
    ('team_0', 'player_02', 'bb_height'),
    ('team_0', 'player_03', 'bb_left'),
    ('team_0', 'player_03', 'bb_top'),
    ('team_0', 'player_03', 'bb_width'),
    ('team_0', 'player_03', 'bb_height'),
    ('team_0', 'player_04', 'bb_left'),
    ('team_0', 'player_04', 'bb_top'),
    ('team_0', 'player_04', 'bb_width'),
    ('team_0', 'player_04', 'bb_height'),
    ('team_0', 'player_05', 'bb_left'),
    ('team_0', 'player_05', 'bb_top'),
    ('team_0', 'player_05', 'bb_width'),
    ('team_0', 'player_05', 'bb_height'),
    ('team_0', 'player_06', 'bb_left'),
    ('team_0', 'player_06', 'bb_top'),
    ('team_0', 'player_06', 'bb_width'),
    ('team_0', 'player_06', 'bb_height'),
    ('team_0', 'player_07', 'bb_left'),
    ('team_0', 'player_07', 'bb_top'),
    ('team_0', 'player_07', 'bb_width'),
    ('team_0', 'player_07', 'bb_height'),
    ('team_0', 'player_08', 'bb_left'),
    ('team_0', 'player_08', 'bb_top'),
    ('team_0', 'player_08', 'bb_width'),
    ('team_0', 'player_08', 'bb_height'),
    ('team_0', 'player_09', 'bb_left'),
    ('team_0', 'player_09', 'bb_top'),
    ('team_0', 'player_09', 'bb_width'),
    ('team_0', 'player_09', 'bb_height'),
    ('team_0', 'player_10', 'bb_left'),
    ('team_0', 'player_10', 'bb_top'),
    ('team_0', 'player_10', 'bb_width'),
    ('team_0', 'player_10', 'bb_height'),
    ('team_1', 'player_00', 'bb_left'),
    ('team_1', 'player_00', 'bb_top'),
    ('team_1', 'player_00', 'bb_width'),
    ('team_1', 'player_00', 'bb_height'),
    ('team_1', 'player_01', 'bb_left'),
    ('team_1', 'player_01', 'bb_top'),
    ('team_1', 'player_01', 'bb_width'),
    ('team_1', 'player_01', 'bb_height'),
    ('team_1', 'player_02', 'bb_left'),
    ('team_1', 'player_02', 'bb_top'),
    ('team_1', 'player_02', 'bb_width'),
    ('team_1', 'player_02', 'bb_height'),
    ('team_1', 'player_03', 'bb_left'),
    ('team_1', 'player_03', 'bb_top'),
    ('team_1', 'player_03', 'bb_width'),
    ('team_1', 'player_03', 'bb_height'),
    ('team_1', 'player_04', 'bb_left'),
    ('team_1', 'player_04', 'bb_top'),
    ('team_1', 'player_04', 'bb_width'),
    ('team_1', 'player_04', 'bb_height'),
    ('team_1', 'player_05', 'bb_left'),
    ('team_1', 'player_05', 'bb_top'),
    ('team_1', 'player_05', 'bb_width'),
    ('team_1', 'player_05', 'bb_height'),
    ('team_1', 'player_06', 'bb_left'),
    ('team_1', 'player_06', 'bb_top'),
    ('team_1', 'player_06', 'bb_width'),
    ('team_1', 'player_06', 'bb_height'),
    ('team_1', 'player_07', 'bb_left'),
    ('team_1', 'player_07', 'bb_top'),
    ('team_1', 'player_07', 'bb_width'),
    ('team_1', 'player_07', 'bb_height'),
    ('team_1', 'player_08', 'bb_left'),
    ('team_1', 'player_08', 'bb_top'),
    ('team_1', 'player_08', 'bb_width'),
    ('team_1', 'player_08', 'bb_height'),
    ('team_1', 'player_09', 'bb_left'),
    ('team_1', 'player_09', 'bb_top'),
    ('team_1', 'player_09', 'bb_width'),
    ('team_1', 'player_09', 'bb_height'),
    ('team_1', 'player_10', 'bb_left'),
    ('team_1', 'player_10', 'bb_top'),
    ('team_1', 'player_10', 'bb_width'),
    ('team_1', 'player_10', 'bb_height')
]

class_dict = {
    'ball':0,
    'team_0':1,
    'team_1':2
}

In [4]:
def preprocess_labels(input_df):
    '''
    Input dataframe is in wide format with multilevel columns.
    Reshape to long format and generate the class value.
    Inputs:
        input_df: The stacked labels from the soccertrack dataset, in a wide Pandas DataFrame.
    Outputs:
        output_df: The labels reshaped to wide and with necessary values generated.
    '''
    output_df = input_df[labels_order]
    output_df = output_df.reorder_levels([2, 0, 1], axis=1)
    output_df.columns = ['-'.join(col).strip() for col in output_df.columns.values]
    output_df.rename(columns={"-vidname-":"vidname", "-frame-":"frame", "-frame_imgname-":"frame_imgname", "-frame_imgpath-":"frame_imgpath", "-frame_saved-":"frame_saved"}
                       ,inplace=True)
    i = ['vidname', 'frame', 'frame_imgname', 'frame_imgpath', 'frame_saved']
    output_df = pd.wide_to_long(
        output_df,
        stubnames=['bb_left', 'bb_top', 'bb_width', 'bb_height'], 
        i=i, 
        j='player', 
        suffix=r'.*').reset_index()
    output_df['player'] = output_df['player'].str.replace('^-', '', regex=True)
    output_df[['team', 'player']] = output_df['player'].str.split('-', n=1, expand=True)
    output_df['class'] = output_df['team'].map(class_dict)
    output_df['bb_right'] = output_df['bb_left'] + output_df['bb_width']
    output_df['bb_bot'] = output_df['bb_top'] + output_df['bb_height']
    output_df['bb_xcenter'] = output_df['bb_left'] + output_df['bb_width'] / 2
    output_df['bb_ycenter'] = output_df['bb_top'] + output_df['bb_height'] / 2
    
    output_df = output_df[['vidname', 'frame', 'frame_imgname', 'frame_imgpath', 'frame_saved', 'player', 'team', 'class', 'bb_left', 'bb_top', 'bb_right', 'bb_bot', 'bb_xcenter', 'bb_ycenter', 'bb_width', 'bb_height']]
    return(output_df)

In [5]:

def crop_and_rescale(image_path, labels_df, output_folder, output_name, input_width, input_height, target_width=800, target_height=800):
    '''
    Randomly crop an image (800x800 by default), and relabel the bounding boxes based on the new values.
    Save the image to storage and drop boxes which are cropped to outside the image.
    Image coordinates are in pixel units starting from the top-left origin.
    Inputs:
        image_name: The original image to load.
        image_df: A Pandas DataFrame in long format with the images, classes and labels.
        output_folder: A destination folder (PathLib Path) to save the cropped image (to subfolder images) and rescaled labels (to subfolder labels).
        output_name: A filename (without extension) for the output.
        input_width: The image's original width in pixels.
        input_height: The image's original height in pixels.
        target_width: The output image width. 800 by default.
        target_height: The output image height. 800 by default.
        global_suffix: A global environment value which is the suffix of the output image filename. Increments by 1
                       for each image.
    Outputs:
        Saves the cropped image and new labels to storage.
    '''
    global global_suffix
    # Randomly select crop boundaries.
    crop_top, crop_left = random.uniform(0, input_height-target_height), random.uniform(0, input_width-target_width)
    crop_bot, crop_right = crop_top + target_height, crop_left + target_width

    # Load, crop and save the image.
    im = Image.open(image_path)
    im_crop = im.crop((crop_left, crop_top, crop_right, crop_bot))
    image_filename = output_name + str(global_suffix) + ".png"
    im_crop.save(output_folder/"images"/image_filename)
    labels_filename = output_name + str(global_suffix) + ".txt"

    # Drop objects which are outside the new bounds.
    image_df = labels_df[labels_df['frame_imgpath'] == image_path]
    cropped_df = image_df.query(
        "(bb_right > @crop_left) & (bb_left < @crop_right) & (bb_bot > @crop_top) & (bb_top < @crop_bot)"
    )

    # Scale the remaining objects' bounding boxes to fit within the new image.
    cropped_df['bb_top'] = cropped_df['bb_top'].map(lambda x: np.max([x, crop_top]) - crop_top)
    cropped_df['bb_bot'] = cropped_df['bb_bot'].map(lambda x: np.min([x, crop_bot]) - crop_top)
    cropped_df['bb_left'] = cropped_df['bb_left'].map(lambda x: np.max([x, crop_left]) - crop_left)
    cropped_df['bb_right'] = cropped_df['bb_right'].map(lambda x: np.min([x, crop_right]) - crop_left)

    cropped_df['bb_width'] = cropped_df['bb_right'] - cropped_df['bb_left']
    cropped_df['bb_height'] = cropped_df['bb_bot'] - cropped_df['bb_top']

    cropped_df['bb_xcenter'] = cropped_df['bb_left'] + cropped_df['bb_width'] / 2
    cropped_df['bb_ycenter'] = cropped_df['bb_top'] + cropped_df['bb_height'] / 2

    cropped_df[['bb_top', 'bb_bot', 'bb_height', 'bb_ycenter']] = cropped_df[['bb_top', 'bb_bot', 'bb_height', 'bb_ycenter']] / target_height
    cropped_df[['bb_left', 'bb_right', 'bb_width', 'bb_xcenter']] = cropped_df[['bb_left', 'bb_right', 'bb_width', 'bb_xcenter']] / target_width
    
    cropped_df['class'] = cropped_df['class'].astype(int)
    output_df = cropped_df[['class', 'bb_xcenter', 'bb_ycenter', 'bb_width', 'bb_height']].astype(str)

    np.savetxt(output_folder/"labels"/labels_filename, output_df.values, fmt='%s')
    global_suffix += 1

In [None]:
random.seed(42119)
top_view_labels = pd.read_pickle(data/"soccertrack/labels/top_view_labels_stacked/top_view_labels.pkl")
top_view_labels_long = preprocess_labels(top_view_labels[top_view_labels['frame_saved'] == True])
img_paths = top_view_labels_long['frame_imgpath'].unique()
global_suffix = 0
for path in img_paths:
    crop_and_rescale(path, top_view_labels_long, data/"soccertrack_square", 'top_view_', top_view_width, top_view_height)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cropped_df['bb_top'] = cropped_df['bb_top'].map(lambda x: np.max([x, crop_top]) - crop_top)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cropped_df['bb_bot'] = cropped_df['bb_bot'].map(lambda x: np.min([x, crop_bot]) - crop_top)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cropped_df['bb_left']

In [None]:
random.seed(16040)
wide_view_labels = pd.read_pickle(data/"soccertrack/labels/wide_view_labels_stacked/wide_view_labels.pkl")
wide_view_labels_long = preprocess_labels(wide_view_labels[wide_view_labels['frame_saved'] == True])
img_paths = wide_view_labels_long['frame_imgpath'].unique()
global_suffix = 0
for path in img_paths:
    crop_and_rescale(path, wide_view_labels_long, data/"soccertrack_square", 'wide_view_', wide_view_width, wide_view_height)