# Organise annotations into input features for model training

3 types: domain feats, rgb sequential crops, crops organised in label folders

Input:
1. GT features: court, ball
2. Annotated features: pose gt, pose annotated, shuttle tracknet

Output:
1. Pro, am-singles, am-doubles
2. GT, annotated
3. domain feats, rgb seq, crops organised by labels

In [37]:
import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import shutil

In [28]:
gt_pose_rootdir = 'object_level/player/gt_pose_bbox/' # 'object_level/player/gt_pose_bbox/pro_gtpose/match1/1_01_00_gtpose.csv'
gt_shuttle_rootdir = '../datasets/' # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_rootdir = '../datasets/' # '../datasets/court_corners_pro.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

annot_pose_rootdir = 'object_level/player/filtered2_pose_bbox/' # 'object_level/player/filtered2_pose_bbox/pro_filtered2pose/match1/1_01_00_filtered2pose.csv'
annot_shuttle_rootdir = 'object_level/shuttle/' # (not available for pro) 'object_level/shuttle/am_singles/match_china2/singles0_shuttle.csv'

## Domain features

domain_pro/

    - match1
        - 1_01_00_x.csv
        - 1_01_00_y.csv
        - 1_02_00_x.csv
        - 1_02_00_y.csv
    
    
 each 1_01_00_x.csv:
 
     frame, court (4x2), shuttle (1x2), vis (2x1), bbox (2 x 4), poses (2 x 34)
     frame, tl_x, tl_y, tr_x, tr_y, br_x, br_y, bl_x, bl_y, ball_x, ball_y, vis_near, vis_far, near_x1, near_y1, near_x2, near_y2, far_x1, far_y1, far_x2, far_y2, near_pose0_x, ..., near_pose16_y, far_pose0_x, ... far_pose16_y
 
 each 1_01_00_y.csv:
 
     frame, player_hit, rallybound, backhand, in-play
     
    "player_hit": { 0: "no_hit", 1: "near_player", 2: "far_player"},
    "rally_bound": { 0: "not_bound", 1: "rally_start", 2: "rally_end"},
    "backhand": { 0: "NA", 1: "backhand", 2: "forehand"},
    "in-play": { 0: "break", 1: "in-play"}

### GT

In [29]:
gt_pose_file = 'object_level/player/gt_pose_bbox/pro_gtpose/match1/1_01_00_gtpose.csv'
gt_shuttle_file = '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_file = '../datasets/court_corners_pro.csv'
label_file = '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_file_x = 'input_features/gt/domain/domain_pro/match1/1_01_00_x.csv'
out_file_y = out_file_x[:-5] + 'y.csv'

df_pose = pd.read_csv(gt_pose_file)
df_shuttle = pd.read_csv(gt_shuttle_file)
df_crt = pd.read_csv(gt_crt_file)
df_label = pd.read_csv(label_file)

print(df_pose.columns)
print(df_shuttle.columns)
print(df_crt.columns)
print(df_label.columns)

Index(['Frame', 'id', 'x1', 'y1', 'x2', 'y2', '0_x', '0_y', '1_x', '1_y',
       '2_x', '2_y', '3_x', '3_y', '4_x', '4_y', '5_x', '5_y', '6_x', '6_y',
       '7_x', '7_y', '8_x', '8_y', '9_x', '9_y', '10_x', '10_y', '11_x',
       '11_y', '12_x', '12_y', '13_x', '13_y', '14_x', '14_y', '15_x', '15_y',
       '16_x', '16_y'],
      dtype='object')
Index(['Frame', 'Visibility', 'X', 'Y'], dtype='object')
Index(['Unnamed: 0', 'path', 'tl_x', 'tl_y', 'tr_x', 'tr_y', 'br_x', 'br_y',
       'bl_x', 'bl_y', 'conf', 'width', 'height', '_tl_x', '_tl_y', '_tr_x',
       '_tr_y', '_br_x', '_br_y', '_bl_x', '_bl_y'],
      dtype='object')
Index(['frame', 'player_hit', 'rally_bound', 'backhand'], dtype='object')


#### Functions

In [31]:
def combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle, id1=1, id2=2):
    df_court_rel = df_crt[df_crt['path'] == crt_orig_path][['_tl_x', '_tl_y', '_tr_x', '_tr_y', '_br_x', '_br_y', '_bl_x', '_bl_y', 'width', 'height']]
    court_array = df_court_rel.to_numpy()[0]
    w = court_array[8]
    h = court_array[9]

    total_frame = df_pose['Frame'].to_numpy()[-1] + 1
    outrows_list = []
    for fr in range(total_frame):
        row_dict = {}
        row_dict['Frame'] = fr

        # fill court coordinates
        row_dict['tl_x'], row_dict['tl_y'], row_dict['tr_x'], row_dict['tr_y'], row_dict['br_x'], row_dict['br_y'], row_dict['bl_x'], row_dict['bl_y'] = \
            court_array[:8]

        df_shuttle_tmp = df_shuttle[df_shuttle['Frame'] == fr]
        df_pose_tmp = df_pose[df_pose['Frame'] == fr]

        # get shuttle coordinates
        if not df_shuttle_tmp.empty:
            shuttle_coords = df_shuttle_tmp[['X', 'Y']].to_numpy()[0]
            # rescale shuttle coord
            shuttle_coords = [shuttle_coords[0]/w, shuttle_coords[1]/h]
        else:
            shuttle_coords = [0, 0]
        row_dict['ball_x'], row_dict['ball_y'] = shuttle_coords[0], shuttle_coords[1]
        
        # get bbox and visibility
        bbox_near = df_pose_tmp[df_pose_tmp['id'] == id1][['x1', 'y1', 'x2', 'y2']].to_numpy()[0]
        bbox_far = df_pose_tmp[df_pose_tmp['id'] == id2][['x1', 'y1', 'x2', 'y2']].to_numpy()[0]
        # check visibility, visible if bbox is not all zeroes
        vis_near = 1 if np.any(bbox_near) else 0
        vis_far = 1 if np.any(bbox_far) else 0
        row_dict['vis_near'], row_dict['vis_far'] = vis_near, vis_far
        row_dict['near_x1'], row_dict['near_y1'], row_dict['near_x2'], row_dict['near_y2'] = bbox_near
        row_dict['far_x1'], row_dict['far_y1'], row_dict['far_x2'], row_dict['far_y2'] = bbox_far

        # get pose coordinates
        coords_pose_near = df_pose_tmp[df_pose_tmp['id'] == id1].drop(columns=['Frame', 'id', 'x1', 'y1', 'x2', 'y2']).to_numpy()[0]
        coords_pose_far = df_pose_tmp[df_pose_tmp['id'] == id2].drop(columns=['Frame', 'id', 'x1', 'y1', 'x2', 'y2']).to_numpy()[0]
        for player in range(2):
            rootname = 'near_pose' if player==0 else 'far_pose'
            coords_pose = coords_pose_near if player==0 else coords_pose_far
            for p in range(17):
                row_dict[rootname + str(p) + '_x'], row_dict[rootname + str(p) + '_y'] =  coords_pose[2*p], coords_pose[2*p+1]

        outrows_list.append(row_dict.copy())
    df_out = pd.DataFrame(outrows_list)  

    return df_out

In [32]:
def convert_label_into_y(df_label):
    num_frame = len(df_label)

    serve_frame = df_label[df_label['rally_bound']==1][['frame']].to_numpy()
    if not serve_frame.any():
        serve_frame = 0
    else:
        serve_frame = serve_frame[0][0]

    end_frame = df_label[df_label['rally_bound']==2][['frame']].to_numpy()
    if not end_frame.any():
        end_frame = num_frame-1
    else:
        end_frame = end_frame[0][0]

    inplay = np.zeros(num_frame)
    inplay[serve_frame:end_frame+1] = 1
    df_label['inplay'] = inplay 
    
    df_label = df_label.astype('int')
    
    return df_label

#### run on pro

In [33]:
gt_pose_rootdir = 'object_level/player/gt_pose_bbox/' # 'object_level/player/gt_pose_bbox/pro_gtpose/match1/1_01_00_gtpose.csv'
gt_shuttle_rootdir = '../datasets/' # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_rootdir = '../datasets/' # '../datasets/court_corners_pro.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir = 'input_features/gt/domain/'

annot_pose_rootdir = 'object_level/player/filtered2_pose_bbox/' # 'object_level/player/filtered2_pose_bbox/pro_filtered2pose/match1/1_01_00_filtered2pose.csv'
annot_shuttle_rootdir = 'object_level/shuttle/' # (not available for pro) 'object_level/shuttle/am_singles/match_china2/singles0_shuttle.csv'

In [34]:
prefix = 'pro'
gt_pose_rootdir_pro = os.path.join(gt_pose_rootdir, prefix + '_gtpose')
gt_shuttle_rootdir_pro = os.path.join('../datasets/', prefix) # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_file_pro = os.path.join(gt_crt_rootdir, 'court_cortners_' + prefix + '.csv') # '../datasets/court_corners_pro.csv'
label_rootdir_pro = os.path.join(label_rootdir, prefix) # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir_pro = os.path.join(out_rootdir, 'domain_' + prefix)

for matchdir in sorted(os.listdir(gt_pose_rootdir_pro)):
    for posecsv in os.listdir(os.path.join(gt_pose_rootdir_pro, matchdir)):
        basename = posecsv.split('_gtpose.csv')[0]
        gt_pose_file = os.path.join(gt_pose_rootdir_pro, matchdir, posecsv)
        gt_shuttle_file = os.path.join(gt_shuttle_rootdir_pro, matchdir, 'ball_trajectory', basename + '_ball.csv')
        label_file = os.path.join(label_rootdir_pro, matchdir, 'player_hit', basename + '.mp4_player_hit.csv')
        crt_orig_path = os.path.join('../profession_dataset', matchdir, 'rally_video', basename + '.mp4')
        
        df_pose = pd.read_csv(gt_pose_file)
        df_shuttle = pd.read_csv(gt_shuttle_file)
        df_crt = pd.read_csv(gt_crt_file)
        df_label = pd.read_csv(label_file)
        
        outdir = os.path.join(out_rootdir_pro, matchdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        print(out_file_x)
        

        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle)
        df_out_y = convert_label_into_y(df_label)

        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)

input_features/gt/domain/domain_pro/match1/1_06_08_x.csv
input_features/gt/domain/domain_pro/match1/1_03_05_x.csv
input_features/gt/domain/domain_pro/match1/1_03_06_x.csv
input_features/gt/domain/domain_pro/match1/1_01_00_x.csv
input_features/gt/domain/domain_pro/match1/1_02_01_x.csv
input_features/gt/domain/domain_pro/match1/1_03_04_x.csv
input_features/gt/domain/domain_pro/match1/1_02_04_x.csv
input_features/gt/domain/domain_pro/match1/1_06_06_x.csv
input_features/gt/domain/domain_pro/match1/1_02_02_x.csv
input_features/gt/domain/domain_pro/match1/1_02_00_x.csv
input_features/gt/domain/domain_pro/match1/1_02_03_x.csv
input_features/gt/domain/domain_pro/match1/1_06_09_x.csv
input_features/gt/domain/domain_pro/match10/1_03_01_x.csv
input_features/gt/domain/domain_pro/match10/2_14_08_x.csv
input_features/gt/domain/domain_pro/match10/1_12_16_x.csv
input_features/gt/domain/domain_pro/match10/1_03_03_x.csv
input_features/gt/domain/domain_pro/match10/2_04_02_x.csv
input_features/gt/domain/d

#### run on am_singles

In [35]:
prefix = 'am_singles'
gt_pose_rootdir_am_singles = os.path.join(gt_pose_rootdir, prefix + '_gtpose')
gt_shuttle_rootdir_am_singles = os.path.join('../datasets/', prefix) # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_file_am_singles = os.path.join(gt_crt_rootdir, 'court_corners_am.csv') # '../datasets/court_corners_pro.csv'
label_rootdir_am_singles = os.path.join(label_rootdir, prefix) # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir_am_singles = os.path.join(out_rootdir, 'domain_' + prefix)

df_crt = pd.read_csv(gt_crt_file_am_singles)
    
for matchdir in sorted(os.listdir(gt_pose_rootdir_am_singles)):
    for posecsv in os.listdir(os.path.join(gt_pose_rootdir_am_singles, matchdir)):
        basename = posecsv.split('_gtpose.csv')[0]
        gt_pose_file = os.path.join(gt_pose_rootdir_am_singles, matchdir, posecsv)
        gt_shuttle_file = os.path.join(gt_shuttle_rootdir_am_singles, matchdir, 'ball_trajectory', basename + '_ball.csv')
        label_file = os.path.join(label_rootdir_am_singles, matchdir, 'player_hit', basename + '.mp4_player_hit.csv')
        crt_orig_path = os.path.join('../vids/difficult_dataset', matchdir, 'rally_video', basename + '.mp4')
        
        df_pose = pd.read_csv(gt_pose_file)
        df_shuttle = pd.read_csv(gt_shuttle_file)
        df_label = pd.read_csv(label_file)
        
        outdir = os.path.join(out_rootdir_am_singles, matchdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        print(out_file_x)
        

        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle)
        df_out_y = convert_label_into_y(df_label)

        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)

input_features/gt/domain/domain_am_singles/match24/1_03_05_x.csv
input_features/gt/domain/domain_am_singles/match24/1_01_04_x.csv
input_features/gt/domain/domain_am_singles/match24/1_01_02_x.csv
input_features/gt/domain/domain_am_singles/match24/1_03_04_x.csv
input_features/gt/domain/domain_am_singles/match24/1_02_04_x.csv
input_features/gt/domain/domain_am_singles/match24/1_00_01_x.csv
input_features/gt/domain/domain_am_singles/match24/1_01_03_x.csv
input_features/gt/domain/domain_am_singles/match24/1_01_01_x.csv
input_features/gt/domain/domain_am_singles/match24/1_05_05_x.csv
input_features/gt/domain/domain_am_singles/match24/1_04_05_x.csv
input_features/gt/domain/domain_am_singles/match25/1_05_03_x.csv
input_features/gt/domain/domain_am_singles/match25/1_01_00_x.csv
input_features/gt/domain/domain_am_singles/match25/1_05_00_x.csv
input_features/gt/domain/domain_am_singles/match25/1_03_00_x.csv
input_features/gt/domain/domain_am_singles/match25/1_04_00_x.csv
input_features/gt/domain/

#### am_doubles

In [36]:
prefix = 'am_doubles'
gt_pose_rootdir_am_doubles = os.path.join(gt_pose_rootdir, prefix + '_gtpose')
gt_shuttle_rootdir_am_doubles = os.path.join('../datasets/', prefix) # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_file_am_doubles = os.path.join(gt_crt_rootdir, 'court_corners_am.csv') # '../datasets/court_corners_pro.csv'
label_rootdir_am_doubles = os.path.join(label_rootdir, prefix) # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir_am_doubles = os.path.join(out_rootdir, 'domain_' + prefix)

df_crt = pd.read_csv(gt_crt_file_am_doubles)
    
for matchdir in sorted(os.listdir(gt_pose_rootdir_am_doubles)):
    for posecsv in os.listdir(os.path.join(gt_pose_rootdir_am_doubles, matchdir)):
        basename = posecsv.split('_gtpose.csv')[0]
        gt_pose_file = os.path.join(gt_pose_rootdir_am_doubles, matchdir, posecsv)
        gt_shuttle_file = os.path.join(gt_shuttle_rootdir_am_doubles, matchdir, 'ball_trajectory', basename + '_ball.csv')
        label_file = os.path.join(label_rootdir_am_doubles, matchdir, 'player_hit', basename + '.mp4_player_hit.csv')
        crt_orig_path = os.path.join('../vids/difficult_dataset', matchdir, 'rally_video', basename + '.mp4')
        
        df_pose = pd.read_csv(gt_pose_file)
        df_shuttle = pd.read_csv(gt_shuttle_file)
        df_label = pd.read_csv(label_file)
        print(matchdir, basename)
        
        # for pair1
        outdir = os.path.join(out_rootdir_am_doubles, matchdir, 'pair1')
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle, id1=1, id2=3)
        df_out_y = convert_label_into_y(df_label)
        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)
        
        # for pair2
        outdir = os.path.join(out_rootdir_am_doubles, matchdir, 'pair2')
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle, id1=2, id2=4)
        df_out_y = convert_label_into_y(df_label)
        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)

match_china doubles0
match_china doubles1
match_china doubles3
match_china doubles2
match_clementi doubles0
match_clementi doubles1
match_clementi doubles6
match_clementi doubles5
match_clementi doubles4
match_clementi doubles3
match_clementi doubles2
match_msia doubles0
match_msia doubles1
match_msia doubles6
match_msia doubles5
match_msia doubles4
match_msia doubles3
match_msia doubles2
match_yewtee doubles1
match_yewtee doubles2


### Filtered

In [10]:
gt_pose_rootdir = 'object_level/player/gt_pose_bbox/' # 'object_level/player/gt_pose_bbox/pro_gtpose/match1/1_01_00_gtpose.csv'
gt_shuttle_rootdir = '../datasets/' # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_rootdir = '../datasets/' # '../datasets/court_corners_pro.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir = 'input_features/filtered2/domain/'

annot_pose_rootdir = 'object_level/player/filtered2_pose_bbox/' # 'object_level/player/filtered2_pose_bbox/pro_filtered2pose/match1/1_01_00_filtered2pose.csv'
annot_shuttle_rootdir = 'object_level/shuttle/' # (not available for pro) 'object_level/shuttle/am_singles/match_china2/singles0_shuttle.csv'

#### pro

In [11]:
prefix = 'pro'
annot_pose_rootdir_pro = os.path.join(annot_pose_rootdir, prefix + '_filtered2pose')
annot_shuttle_rootdir_pro = os.path.join(gt_shuttle_rootdir, prefix) 
gt_crt_file_pro = os.path.join(gt_crt_rootdir, 'court_corners_' + prefix + '.csv') # '../datasets/court_corners_pro.csv'
label_rootdir_pro = os.path.join(label_rootdir, prefix) # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir_pro = os.path.join(out_rootdir, 'domain_' + prefix)

for matchdir in sorted(os.listdir(annot_pose_rootdir_pro)):
    for posecsv in os.listdir(os.path.join(annot_pose_rootdir_pro, matchdir)):
        basename = posecsv.split('_filtered2pose.csv')[0]
        annot_pose_file = os.path.join(annot_pose_rootdir_pro, matchdir, posecsv)
        annot_shuttle_file = os.path.join(annot_shuttle_rootdir_pro, matchdir, 'ball_trajectory', basename + '_ball.csv')
        label_file = os.path.join(label_rootdir_pro, matchdir, 'player_hit', basename + '.mp4_player_hit.csv')
        crt_orig_path = os.path.join('../profession_dataset', matchdir, 'rally_video', basename + '.mp4')
        
        df_pose = pd.read_csv(annot_pose_file)
        df_shuttle = pd.read_csv(annot_shuttle_file)
        df_crt = pd.read_csv(gt_crt_file_pro)
        df_label = pd.read_csv(label_file)
        
        outdir = os.path.join(out_rootdir_pro, matchdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        print(out_file_x)
        

        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle)
        df_out_y = convert_label_into_y(df_label)

        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)

input_features/filtered2/domain/domain_pro/match1/1_02_02_x.csv
input_features/filtered2/domain/domain_pro/match1/1_02_04_x.csv
input_features/filtered2/domain/domain_pro/match1/1_03_06_x.csv
input_features/filtered2/domain/domain_pro/match1/1_02_00_x.csv
input_features/filtered2/domain/domain_pro/match1/1_03_05_x.csv
input_features/filtered2/domain/domain_pro/match1/1_02_03_x.csv
input_features/filtered2/domain/domain_pro/match1/1_01_00_x.csv
input_features/filtered2/domain/domain_pro/match1/1_06_06_x.csv
input_features/filtered2/domain/domain_pro/match1/1_02_01_x.csv
input_features/filtered2/domain/domain_pro/match1/1_03_04_x.csv
input_features/filtered2/domain/domain_pro/match1/1_06_08_x.csv
input_features/filtered2/domain/domain_pro/match1/1_06_09_x.csv
input_features/filtered2/domain/domain_pro/match10/2_04_02_x.csv
input_features/filtered2/domain/domain_pro/match10/2_14_08_x.csv
input_features/filtered2/domain/domain_pro/match10/1_03_01_x.csv
input_features/filtered2/domain/domai

#### run on am-singles

In [12]:
prefix = 'am_singles'
annot_pose_rootdir_am_singles = os.path.join(annot_pose_rootdir, prefix + '_filtered2pose')
annot_shuttle_rootdir_am_singles = os.path.join(annot_shuttle_rootdir, prefix) # '../datasets/pro/match1/ball_trajectory/1_01_00_ball.csv'
gt_crt_file_am_singles = os.path.join(gt_crt_rootdir, 'court_corners_am.csv') # '../datasets/court_corners_pro.csv'
label_rootdir_am_singles = os.path.join(label_rootdir, prefix) # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'

out_rootdir_am_singles = os.path.join(out_rootdir, 'domain_' + prefix)

df_crt = pd.read_csv(gt_crt_file_am_singles)
    
for matchdir in sorted(os.listdir(annot_pose_rootdir_am_singles)):
    for posecsv in os.listdir(os.path.join(annot_pose_rootdir_am_singles, matchdir)):
        basename = posecsv.split('_filtered2pose.csv')[0]
        annot_pose_file = os.path.join(annot_pose_rootdir_am_singles, matchdir, posecsv)
        annot_shuttle_file = os.path.join(annot_shuttle_rootdir_am_singles, matchdir, basename + '_shuttle.csv')
        label_file = os.path.join(label_rootdir_am_singles, matchdir, 'player_hit', basename + '.mp4_player_hit.csv')
        crt_orig_path = os.path.join('../vids/difficult_dataset', matchdir, 'rally_video', basename + '.mp4')
        
        df_pose = pd.read_csv(annot_pose_file)
        df_shuttle = pd.read_csv(annot_shuttle_file)
        df_label = pd.read_csv(label_file)
        
        outdir = os.path.join(out_rootdir_am_singles, matchdir)
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        out_file_x = os.path.join(outdir, basename + '_x.csv')
        out_file_y = out_file_x[:-5] + 'y.csv'
        print(out_file_x)
        

        df_out_x = combine_domain_into_x(df_crt, crt_orig_path, df_pose, df_shuttle)
        df_out_y = convert_label_into_y(df_label)

        df_out_x.to_csv(out_file_x, index=False)
        df_out_y.to_csv(out_file_y, index=False)

input_features/filtered2/domain/domain_am_singles/match24/1_02_04_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_03_05_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_01_04_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_01_03_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_03_04_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_04_05_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_01_02_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_05_05_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_01_01_x.csv
input_features/filtered2/domain/domain_am_singles/match24/1_00_01_x.csv
input_features/filtered2/domain/domain_am_singles/match25/1_02_00_x.csv
input_features/filtered2/domain/domain_am_singles/match25/1_06_03_x.csv
input_features/filtered2/domain/domain_am_singles/match25/1_05_01_x.csv
input_features/filtered2/domain/domain_am_singles/match25/1_01_0

## RGB crops

- rgb sequential for each video
  - RGB_pro/match1/1_00_00/near/0.jpg, RGB_pro/match1/1_00_00/far/0.jpg
- crops organised by labels
  - data_crops/train/0, data_crops/train/1, data_crops/train/2
  - test
  
For rgb sequential, if no crop present, fill with zeros.

For crops by labels, only include legit crops.

Use only GT to generate crops.

In [3]:
gt_pose_rootdir = 'object_level/player/gt_pose_bbox/' # 'object_level/player/gt_pose_bbox/pro_gtpose/match1/1_01_00_gtpose.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
crop_dim = 128

### Functions for crop extraction

In [4]:
# function to resize player crop to desired aspect ratio
# very important,need to do the same for inference. YOLOv5 s, and then this processing

def post_crop_processing(height, width, des_height, des_width, x1, y1, x2, y2, padding_scale=1.3):
    # padding
    y1 = max((y1+y2)*0.5 - (y2-y1)*0.5*padding_scale,0)
    y2 = min((y1+y2)*0.5 + (y2-y1)*0.5*padding_scale, height)
    x1 = max((x1+x2)*0.5 - (x2-x1)*0.5*padding_scale,0)
    x2 = min((x1+x2)*0.5 + (x2-x1)*0.5*padding_scale, width)

    # recrop to fit aspect ratio
    if (x2-x1)/(y2-y1) > des_width/des_height:
        ratio = ((x2-x1)/(y2-y1))/(des_width/des_height)
        y1_ = max((y1+y2)*0.5 - (y2-y1)*0.5*ratio,0)
        y2_ = min((y1+y2)*0.5 + (y2-y1)*0.5*ratio,height)
        if y1_ == 0:
            y2_ = (des_height/des_width)*(x2-x1)
        elif y2_ == height:
            y1_ = height - (des_height/des_width)*(x2-x1)
        y1, y2 = y1_, y2_
    elif (x2-x1)/(y2-y1) < des_width/des_height:
        ratio = ((y2-y1)/(x2-x1))*(des_width/des_height)
        x1_ = max((x1+x2)*0.5 - (x2-x1)*0.5*ratio,0)
        x2_ = min((x1+x2)*0.5 + (x2-x1)*0.5*ratio,width)
        if x1_ == 0:
            x2_ = (des_width/des_height)*(y2-y1)
        elif x2_ == width:
            x1_ = width - (des_width/des_height)*(y2-y1)
        x1, x2 = x1_, x2_
    return int(x1), int(y1), int(x2), int(y2)

In [10]:
# function to save player crops from video

def save_crops_from_vid(video_path, bbox_path, outdir, des_width=64, des_height=128, concat=True, id1=1, id2=2, bbox_inpixels=False):
    basename = video_path.split('/')[-1][:-4]
    match = video_path.split('/')[-3]
    
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    if not concat:
        if not os.path.exists(os.path.join(outdir, 'near')):
            os.makedirs(os.path.join(outdir, 'near'))
            
        if not os.path.exists(os.path.join(outdir, 'far')):
            os.makedirs(os.path.join(outdir, 'far'))
    
    df_bbox = pd.read_csv(bbox_path)
    
    # Reading the Video File Using the VideoCapture
    video_reader = cv2.VideoCapture(video_path)
    width  = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))   # float `width`
    height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float `height`
    
    width_scale = 1 if bbox_inpixels else width
    height_scale = 1 if bbox_inpixels else height

    frnum = 0
    # zeros for undetected boxes
    crop_zeros = np.zeros((des_height, des_width, 3))
    # Iterating through Video Frames
    while True:
        # Reading a frame from the video file 
        success, frame = video_reader.read() 

        # If Video frame was not successfully read then break the loop
        if not success:
            break
        
        df_bbox_tmp = df_bbox[(df_bbox['Frame']==frnum) & ((df_bbox['id']==id1) | (df_bbox['id']==id2))]
        
        pathbase_crop = os.path.join(outdir, str(frnum) + '.jpg')

        # get bbox coords
        bbox_near = df_bbox_tmp[df_bbox_tmp['id']==id1][['x1','y1','x2','y2']].to_numpy()[0]
        bbox_near = [int(bbox_near[0]*width_scale), int(bbox_near[1]*height_scale), int(bbox_near[2]*width_scale),int(bbox_near[3]*height_scale)]
        # adjust bbox coords to get desired aspect ratio
        if not np.any(bbox_near):
            crop_near = crop_zeros
        else:
            bbox_near = post_crop_processing(height, width, des_height, des_width, bbox_near[0],bbox_near[1], bbox_near[2], bbox_near[3], padding_scale=1.3)
            crop_near = frame[bbox_near[1]:bbox_near[3], bbox_near[0]:bbox_near[2], :]
        
        # get bbox coords
        bbox_far = df_bbox_tmp[df_bbox_tmp['id']==id2][['x1','y1','x2','y2']].to_numpy()[0]
        bbox_far = [int(bbox_far[0]*width_scale), int(bbox_far[1]*height_scale), int(bbox_far[2]*width_scale),int(bbox_far[3]*height_scale)]
        # adjust bbox coords to get desired aspect ratio
        if not np.any(bbox_far):
            crop_far = crop_zeros
        else:
            bbox_far = post_crop_processing(height, width, des_height, des_width, bbox_far[0],bbox_far[1], bbox_far[2], bbox_far[3], padding_scale=1.3)
            crop_far = frame[bbox_far[1]:bbox_far[3], bbox_far[0]:bbox_far[2], :]
        
        # Resize the Frame to fixed Dimensions
        resized_near = cv2.resize(crop_near, (des_width, des_height))
        resized_far = cv2.resize(crop_far, (des_width, des_height))
        
        if concat:
            resized = np.concatenate((resized_near, resized_far), axis=1)
            cv2.imwrite(pathbase_crop, resized)
        else:
            near_path = os.path.join(outdir, 'near', str(frnum) + '.jpg')
            far_path = os.path.join(outdir, 'far', str(frnum) + '.jpg')
            cv2.imwrite(near_path, resized_near)
            cv2.imwrite(far_path, resized_far)
        
        frnum += 1
    
    # Closing the VideoCapture object and releasing all resources. 
    video_reader.release()

### sequential (indv)

In [6]:
gt_bbox_rootdir = '../datasets/' # '../datasets/pro/match1/bbox_id/1_01_00_gtbboxid.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
dest_rootdir = 'input_features/gt/indv_RGB/'

In [11]:
prefix = 'pro'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir = os.path.join(dest_rootdir, prefix, matchdir, basename)
        
        print(vid_path)
        
        save_crops_from_vid(vid_path, bbox_path, outdir, des_width=crop_dim, des_height=crop_dim, concat=False, id1=1, id2=2, bbox_inpixels=False)

../datasets/pro/match1/rally_video/1_01_00.mp4
../datasets/pro/match1/rally_video/1_02_00.mp4
../datasets/pro/match1/rally_video/1_02_01.mp4
../datasets/pro/match1/rally_video/1_02_02.mp4
../datasets/pro/match1/rally_video/1_02_03.mp4
../datasets/pro/match1/rally_video/1_02_04.mp4
../datasets/pro/match1/rally_video/1_03_04.mp4
../datasets/pro/match1/rally_video/1_03_05.mp4
../datasets/pro/match1/rally_video/1_03_06.mp4
../datasets/pro/match1/rally_video/1_06_06.mp4
../datasets/pro/match1/rally_video/1_06_08.mp4
../datasets/pro/match1/rally_video/1_06_09.mp4
../datasets/pro/match10/rally_video/1_03_01.mp4
../datasets/pro/match10/rally_video/1_03_03.mp4
../datasets/pro/match10/rally_video/1_12_16.mp4
../datasets/pro/match10/rally_video/2_04_02.mp4
../datasets/pro/match10/rally_video/2_14_08.mp4
../datasets/pro/match11/rally_video/1_03_01.mp4
../datasets/pro/match11/rally_video/1_07_06.mp4
../datasets/pro/match11/rally_video/1_13_13.mp4
../datasets/pro/match11/rally_video/2_05_00.mp4
../d

### am_singles

In [12]:
prefix = 'am_singles'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir = os.path.join(dest_rootdir, prefix, matchdir, basename)
        
        print(vid_path)
        
        save_crops_from_vid(vid_path, bbox_path, outdir, des_width=crop_dim, des_height=crop_dim, concat=False, id1=1, id2=2, bbox_inpixels=False)

../datasets/am_singles/match24/rally_video/1_00_01.mp4
../datasets/am_singles/match24/rally_video/1_01_01.mp4
../datasets/am_singles/match24/rally_video/1_01_02.mp4
../datasets/am_singles/match24/rally_video/1_01_03.mp4
../datasets/am_singles/match24/rally_video/1_01_04.mp4
../datasets/am_singles/match24/rally_video/1_02_04.mp4
../datasets/am_singles/match24/rally_video/1_03_04.mp4
../datasets/am_singles/match24/rally_video/1_03_05.mp4
../datasets/am_singles/match24/rally_video/1_04_05.mp4
../datasets/am_singles/match24/rally_video/1_05_05.mp4
../datasets/am_singles/match25/rally_video/1_01_00.mp4
../datasets/am_singles/match25/rally_video/1_02_00.mp4
../datasets/am_singles/match25/rally_video/1_03_00.mp4
../datasets/am_singles/match25/rally_video/1_04_00.mp4
../datasets/am_singles/match25/rally_video/1_05_00.mp4
../datasets/am_singles/match25/rally_video/1_05_01.mp4
../datasets/am_singles/match25/rally_video/1_05_02.mp4
../datasets/am_singles/match25/rally_video/1_05_03.mp4
../dataset

### am_doubles

In [13]:
prefix = 'am_doubles'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir_pair1 = os.path.join(dest_rootdir, prefix, matchdir, basename, 'pair1')
        outdir_pair2 = os.path.join(dest_rootdir, prefix, matchdir, basename, 'pair2')
        
        print(vid_path)

        save_crops_from_vid(vid_path, bbox_path, outdir_pair1, des_width=crop_dim, des_height=crop_dim, concat=False, id1=1, id2=3, bbox_inpixels=False)
        save_crops_from_vid(vid_path, bbox_path, outdir_pair2, des_width=crop_dim, des_height=crop_dim, concat=False, id1=2, id2=4, bbox_inpixels=False)

../datasets/am_doubles/match_china/rally_video/doubles0.mp4
../datasets/am_doubles/match_china/rally_video/doubles1.mp4
../datasets/am_doubles/match_china/rally_video/doubles2.mp4
../datasets/am_doubles/match_china/rally_video/doubles3.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles0.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles1.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles2.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles3.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles4.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles5.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles6.mp4
../datasets/am_doubles/match_msia/rally_video/doubles0.mp4
../datasets/am_doubles/match_msia/rally_video/doubles1.mp4
../datasets/am_doubles/match_msia/rally_video/doubles2.mp4
../datasets/am_doubles/match_msia/rally_video/doubles3.mp4
../datasets/am_doubles/match_msia/rally_video/doubles4.mp4
../datasets/am_doubles/m

### sequential (concat)

### pro

In [18]:
gt_bbox_rootdir = '../datasets/' # '../datasets/pro/match1/bbox_id/1_01_00_gtbboxid.csv'
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
dest_rootdir = 'input_features/gt/RGB/'

In [27]:
prefix = 'pro'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir = os.path.join(dest_rootdir, prefix, matchdir, basename)
        
        print(vid_path)
        
        save_crops_from_vid(vid_path, bbox_path, outdir, des_width=int(crop_dim/2), des_height=crop_dim, concat=True, id1=1, id2=2, bbox_inpixels=False)

../datasets/pro/match1/rally_video/1_01_00.mp4
../datasets/pro/match1/rally_video/1_02_00.mp4
../datasets/pro/match1/rally_video/1_02_01.mp4
../datasets/pro/match1/rally_video/1_02_02.mp4
../datasets/pro/match1/rally_video/1_02_03.mp4
../datasets/pro/match1/rally_video/1_02_04.mp4
../datasets/pro/match1/rally_video/1_03_04.mp4
../datasets/pro/match1/rally_video/1_03_05.mp4
../datasets/pro/match1/rally_video/1_03_06.mp4
../datasets/pro/match1/rally_video/1_06_06.mp4
../datasets/pro/match1/rally_video/1_06_08.mp4
../datasets/pro/match1/rally_video/1_06_09.mp4
../datasets/pro/match10/rally_video/1_03_01.mp4
../datasets/pro/match10/rally_video/1_03_03.mp4
../datasets/pro/match10/rally_video/1_12_16.mp4
../datasets/pro/match10/rally_video/2_04_02.mp4
../datasets/pro/match10/rally_video/2_14_08.mp4
../datasets/pro/match11/rally_video/1_03_01.mp4
../datasets/pro/match11/rally_video/1_07_06.mp4
../datasets/pro/match11/rally_video/1_13_13.mp4
../datasets/pro/match11/rally_video/2_05_00.mp4
../d

#### am_singles sequential

In [28]:
prefix = 'am_singles'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir = os.path.join(dest_rootdir, prefix, matchdir, basename)
        
        print(vid_path)
        
        save_crops_from_vid(vid_path, bbox_path, outdir, des_width=int(crop_dim/2), des_height=crop_dim, concat=True, id1=1, id2=2, bbox_inpixels=False)

../datasets/am_singles/match24/rally_video/1_00_01.mp4
../datasets/am_singles/match24/rally_video/1_01_01.mp4
../datasets/am_singles/match24/rally_video/1_01_02.mp4
../datasets/am_singles/match24/rally_video/1_01_03.mp4
../datasets/am_singles/match24/rally_video/1_01_04.mp4
../datasets/am_singles/match24/rally_video/1_02_04.mp4
../datasets/am_singles/match24/rally_video/1_03_04.mp4
../datasets/am_singles/match24/rally_video/1_03_05.mp4
../datasets/am_singles/match24/rally_video/1_04_05.mp4
../datasets/am_singles/match24/rally_video/1_05_05.mp4
../datasets/am_singles/match25/rally_video/1_01_00.mp4
../datasets/am_singles/match25/rally_video/1_02_00.mp4
../datasets/am_singles/match25/rally_video/1_03_00.mp4
../datasets/am_singles/match25/rally_video/1_04_00.mp4
../datasets/am_singles/match25/rally_video/1_05_00.mp4
../datasets/am_singles/match25/rally_video/1_05_01.mp4
../datasets/am_singles/match25/rally_video/1_05_02.mp4
../datasets/am_singles/match25/rally_video/1_05_03.mp4
../dataset

#### am_doubles sequential

In [41]:
prefix = 'am_doubles'
# get vid paths
vidpaths = []
dataset_rootdir = os.path.join(gt_bbox_rootdir, prefix)

for matchdir in sorted(os.listdir(dataset_rootdir)):
    for vid in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, 'rally_video'))):
        basename = vid[:-4]
        vid_path = os.path.join(dataset_rootdir, matchdir, 'rally_video', vid)
        bbox_path = os.path.join(gt_bbox_rootdir, prefix, matchdir, 'bbox_id', basename + '_gtbboxid.csv')
        outdir_pair1 = os.path.join(dest_rootdir, prefix, matchdir, basename, 'pair1')
        outdir_pair2 = os.path.join(dest_rootdir, prefix, matchdir, basename, 'pair2')
        
        print(vid_path)

        save_crops_from_vid(vid_path, bbox_path, outdir_pair1, des_width=int(crop_dim/2), des_height=crop_dim, concat=True, id1=1, id2=3, bbox_inpixels=False)
        save_crops_from_vid(vid_path, bbox_path, outdir_pair2, des_width=int(crop_dim/2), des_height=crop_dim, concat=True, id1=2, id2=4, bbox_inpixels=False)

../datasets/am_doubles/match_china/rally_video/doubles0.mp4
../datasets/am_doubles/match_china/rally_video/doubles1.mp4
../datasets/am_doubles/match_china/rally_video/doubles2.mp4
../datasets/am_doubles/match_china/rally_video/doubles3.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles0.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles1.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles2.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles3.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles4.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles5.mp4
../datasets/am_doubles/match_clementi/rally_video/doubles6.mp4
../datasets/am_doubles/match_msia/rally_video/doubles0.mp4
../datasets/am_doubles/match_msia/rally_video/doubles1.mp4
../datasets/am_doubles/match_msia/rally_video/doubles2.mp4
../datasets/am_doubles/match_msia/rally_video/doubles3.mp4
../datasets/am_doubles/match_msia/rally_video/doubles4.mp4
../datasets/am_doubles/m

### data crops

Copy and paste crops from RGB sequential

Input: extracted seq crops, gt labels

Output: crops organised into 0/, 1/, 2/.   
  - data_crops/train/0, data_crops/train/1, data_crops/train/2
  - test

Method

1.For every video directory of rgb crops, copy, rename and paste them into the correct label directory.
  - +- 3 of a hit is considered a hit.

### indv crops + clearnohit

Under label 0, we only put player crops that are more than 10 frames away from the nearest hit. Rationale is that this will confuse the classifier less during training, since hitting poses look quite similar around the hit time.

In [40]:
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
rgb_rootdir = 'input_features/gt/indv_RGB/'
dst_rootdir = 'input_features/gt/data_indvcrops_clearnohit/'
relax_hit = 3
dist_nohit = 15 

In [41]:
prefix = 'pro'

rgbpaths = []
dataset_rootdir = os.path.join(rgb_rootdir, prefix)

# create output dirs
if not os.path.exists(dst_rootdir):
    os.makedirs(os.path.join(dst_rootdir, 'train', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '2'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '2'))

for matchdir in sorted(os.listdir(dataset_rootdir)):
    if 'test' in matchdir:
        dst_dir = os.path.join(dst_rootdir, 'test')
    else:
        dst_dir = os.path.join(dst_rootdir, 'train')
        
    for viddir in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir))):
        label_path = os.path.join(label_rootdir, prefix, matchdir, 'player_hit', viddir + '.mp4_player_hit.csv')
        array_hit = pd.read_csv(label_path)['player_hit'].values
        
        for imgfile in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, viddir, 'near'))): # near, far doesn't matter, since pics all same names
            frnum = int(imgfile[:-4])
            if 1 in array_hit[frnum-relax_hit:frnum+relax_hit+1]: 
                label = 1
                subdir = 'near'
                srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                shutil.copyfile(srcfile, dstfile)
            elif 2 in array_hit[frnum-relax_hit:frnum+relax_hit+1]:
                label = 2
                subdir = 'far'
                srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                shutil.copyfile(srcfile, dstfile)
            else:
                label = 0
                if 1 not in array_hit[frnum-dist_nohit:frnum+dist_nohit+1] and 2 not in array_hit[frnum-dist_nohit:frnum+dist_nohit+1]: 
                    subdirs = ['near', 'far']
                    for subdir in subdirs:
                        srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                        dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                        shutil.copyfile(srcfile, dstfile)

#### balance crop data(i.e. remove extra from label 0 nohit)

In [42]:
dirpath0 = os.path.join(dst_rootdir, 'train', '0')
dirpath1 = os.path.join(dst_rootdir, 'train', '1')
dirpath2 = os.path.join(dst_rootdir, 'train', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))

In [43]:
dirpath0 = os.path.join(dst_rootdir, 'test', '0')
dirpath1 = os.path.join(dst_rootdir, 'test', '1')
dirpath2 = os.path.join(dst_rootdir, 'test', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))

### indv crops

In [14]:
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
rgb_rootdir = 'input_features/gt/indv_RGB/'
dst_rootdir = 'input_features/gt/data_indvcrops/'
relax_hit = 3

In [19]:
prefix = 'pro'

rgbpaths = []
dataset_rootdir = os.path.join(rgb_rootdir, prefix)

# create output dirs
if not os.path.exists(dst_rootdir):
    os.makedirs(os.path.join(dst_rootdir, 'train', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '2'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '2'))

for matchdir in sorted(os.listdir(dataset_rootdir)):
    if 'test' in matchdir:
        dst_dir = os.path.join(dst_rootdir, 'test')
    else:
        dst_dir = os.path.join(dst_rootdir, 'train')
        
    for viddir in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir))):
        label_path = os.path.join(label_rootdir, prefix, matchdir, 'player_hit', viddir + '.mp4_player_hit.csv')
        array_hit = pd.read_csv(label_path)['player_hit'].values
        
        for imgfile in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, viddir, 'near'))): # near, far doesn't matter, since pics all same names
            frnum = int(imgfile[:-4])
            if 1 in array_hit[frnum-relax_hit:frnum+relax_hit+1]: 
                label = 1
                subdir = 'near'
                srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                shutil.copyfile(srcfile, dstfile)
            elif 2 in array_hit[frnum-relax_hit:frnum+relax_hit+1]:
                label = 2
                subdir = 'far'
                srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                shutil.copyfile(srcfile, dstfile)
            else:
                label = 0
                subdirs = ['near', 'far']
                for subdir in subdirs:
                    srcfile = os.path.join(dataset_rootdir, matchdir, viddir, subdir, imgfile)
                    dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + subdir + '_' + imgfile)
                    shutil.copyfile(srcfile, dstfile)

#### balance crop data(i.e. remove extra from label 0 nohit)

In [20]:
dirpath0 = os.path.join(dst_rootdir, 'train', '0')
dirpath1 = os.path.join(dst_rootdir, 'train', '1')
dirpath2 = os.path.join(dst_rootdir, 'train', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))

In [23]:
dirpath0 = os.path.join(dst_rootdir, 'test', '0')
dirpath1 = os.path.join(dst_rootdir, 'test', '1')
dirpath2 = os.path.join(dst_rootdir, 'test', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))

### concat crops

In [25]:
label_rootdir = '../datasets/' # '../datasets/pro/match1/player_hit/1_01_00.mp4_player_hit.csv'
rgb_rootdir = 'input_features/gt/RGB/'
dst_rootdir = 'input_features/gt/data_crops/'
relax_hit = 3

In [45]:
prefix = 'pro'

rgbpaths = []
dataset_rootdir = os.path.join(rgb_rootdir, prefix)

# create output dirs
if not os.path.exists(dst_rootdir):
    os.makedirs(os.path.join(dst_rootdir, 'train', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'train', '2'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '0'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '1'))
    os.makedirs(os.path.join(dst_rootdir, 'test', '2'))

for matchdir in sorted(os.listdir(dataset_rootdir)):
    if 'test' in matchdir:
        dst_dir = os.path.join(dst_rootdir, 'test')
    else:
        dst_dir = os.path.join(dst_rootdir, 'train')
        
    for viddir in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir))):
        label_path = os.path.join(label_rootdir, prefix, matchdir, 'player_hit', viddir + '.mp4_player_hit.csv')
        array_hit = pd.read_csv(label_path)['player_hit'].values
        
        for imgfile in sorted(os.listdir(os.path.join(dataset_rootdir, matchdir, viddir))):
            frnum = int(imgfile[:-4])
            if 1 in array_hit[frnum-relax_hit:frnum+relax_hit+1]: 
                label = 1
            elif 2 in array_hit[frnum-relax_hit:frnum+relax_hit+1]:
                label = 2
            else:
                label = 0
            
            srcfile = os.path.join(dataset_rootdir, matchdir, viddir, imgfile)
            dstfile = os.path.join(dst_dir, str(label), matchdir + '_' + viddir + '_' + imgfile)
            shutil.copyfile(srcfile, dstfile)

#### balance crop data(i.e. remove extra from label 0 nohit)

In [None]:
dirpath0 = os.path.join(dst_rootdir, 'train', '0')
dirpath1 = os.path.join(dst_rootdir, 'train', '1')
dirpath2 = os.path.join(dst_rootdir, 'train', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))

In [26]:
dirpath0 = os.path.join(dst_rootdir, 'test', '0')
dirpath1 = os.path.join(dst_rootdir, 'test', '1')
dirpath2 = os.path.join(dst_rootdir, 'test', '2')

list0 = os.listdir(dirpath0)
list1 = os.listdir(dirpath1)
list2 = os.listdir(dirpath2)

num2retain = max(len(list1), len(list2))
random.shuffle(list0)
toremove = list0[num2retain:]

for file in toremove:
    os.remove(os.path.join(dirpath0, file))