In [1]:
import os
import json
import glob
import shutil
import numpy as np
from sklearn.cluster import KMeans
import cv2
from collections import Counter
from IPython.display import Video
from tqdm.notebook import tqdm

### Helper Functions

In [2]:
def get_color(lbl):

    if lbl==0:
        return (0,0,255)
    elif lbl==1:
        return (0,255,0)
    else:
        return None
    
def post_process_cls(all_cls, all_tracks):

    ### First, we need to get the set of all the tracks
    ### After which, to find its corrsponding classes
    ### And transform/interpolate the classes list
    from collections import Counter
    id_to_cls_list = {}
    for en, (cls, track_id) in enumerate(zip(all_cls, all_tracks)):

        if track_id in id_to_cls_list:
            id_to_cls_list[track_id].append(cls)
        else:
            id_to_cls_list[track_id] = [cls]
            

    id_to_cls_val = {}
    for track_id, cls_lst in id_to_cls_list.items():
        cls_lst = np.array(cls_lst).flatten().tolist()
        cnt = Counter(cls_lst)
        mst_cmn = cnt.most_common()[0][0]
        id_to_cls_val[track_id] = int(mst_cmn)

    output = []
    for en, track_id in enumerate(all_tracks):
        
        output.append(id_to_cls_val[track_id])

    return output, id_to_cls_val

def chunks(l, n):
    n = max(1, n)
    return [l[i:i+n] for i in range(0, len(l), n)] 

def get_all_team_classes(id_dict):
    print("Clustering all teams in progress...")
    anno_dirs = glob.glob('../data/raw_data/*')
    
    ### Create global dict which maps global player track to its new global team class
    global_id_to_cls_val = {}
    all_cls = list(range(0, 2 * len(anno_dirs)))

    def chunks(l, n):
        n = max(1, n)
        return [l[i:i+n] for i in range(0, len(l), n)]   
    cls_chunks = chunks(all_cls, 2)  


    for anno_en, anno_dir in enumerate(tqdm(anno_dirs)):
        
        ### Process a new game
        all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
        orig_dir = os.path.join('../../data/playerTrackingFrames', os.path.basename(anno_dir))
        
        ### Create the corresponding history of labels and histograms
        all_hists = []
        all_labels = []
        
        anno_error = 0
        box_cnt = 0
        for en, single_json in enumerate(all_jsons):
            data = json.load(open(single_json))
            
            
            img_path = os.path.join(orig_dir, os.path.basename(single_json).replace('.json', '.jpg')) 
            img0 = cv2.imread(img_path)
            h,w,_ = img0.shape

            for i in range(len(data['shapes'])):
                box_cnt += 1
            
                label = data['shapes'][i]['label']
                pts = np.array(data['shapes'][i]['points']).astype(int)
                if pts[0][1] > pts[1][1] or pts[0][0] > pts[1][0]: 
                    anno_error += 1
                    continue

                player_label = id_dict[os.path.basename(anno_dir)][label]
                
                center_y = int((pts[1][1] + pts[0][1]) / 2)
                center_x = int((pts[1][0] + pts[0][0]) / 2)

                img_box = img0[max(0,center_y - 30): min(h, center_y + 30), 
                               max(0, center_x - 10): min(w, center_x + 10), :]

                img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2HSV)
                hist = cv2.calcHist([img_box], [0], None, [24],
                                [0, 300])
                hist = cv2.normalize(hist, hist).flatten()

                all_hists.append(hist)
                all_labels.append(player_label)
                
                

        concat_hists = np.concatenate(all_hists)
        km = KMeans(n_clusters=2, init="k-means++", max_iter=10000).fit(all_hists)
        proc_cls, id_to_cls_val = post_process_cls(km.labels_, all_labels)
        
        print(anno_en, anno_dir, Counter(proc_cls), 100 * (anno_error/box_cnt))
        
        for player_id, color_cls in id_to_cls_val.items():
            curr_cls_subset = cls_chunks[anno_en]
            global_id_to_cls_val[player_id] = curr_cls_subset[color_cls]
    
    print('Clustering is finished!')
    return proc_cls, global_id_to_cls_val                    

### Create a dict which maps player ID from a game to its unique ID in the dataset.

In [4]:
anno_dirs = glob.glob('../data/raw_data/*')

id_dict = {}
k_class = 1
for anno_dir in anno_dirs:
    id_dict[os.path.basename(anno_dir)] = {}
    
    curr_set = set()
    all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
    for single_json in all_jsons:
        data = json.load(open(single_json))

        for i in range(len(data['shapes'])):
            curr_set.add(data['shapes'][i]['label'])
            
    num_classes = len(curr_set)
    curr_classes = sorted(list(curr_set))
    
    en = 0
    while en<num_classes:
        
        id_dict[os.path.basename(anno_dir)][curr_classes[en]] = k_class
        en += 1
        k_class += 1
        
print("The number of class is ", k_class)
print("The number of dirs is ", len(anno_dirs))

The number of class is  410
The number of dirs is  41


### Create gt.txt for each video. IMPORTANT! Frames should start from 1.

### 1.1 Create gt.txt with team label having only {0,1}

In [4]:
# gt_list = []
# anno_dirs = glob.glob('../data/raw_data/*')


# for anno_dir in tqdm(anno_dirs):
#     all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
    
    
#     team_cls, id_to_cls_val = get_team_color_labels(anno_dir, all_jsons)
    
#     gt_list = []
#     cls_en = 0
#     for en, single_json in enumerate(all_jsons):
#         data = json.load(open(single_json))

#         for i in range(len(data['shapes'])):
#             bbox = data['shapes'][i]['points']  
#             label = data['shapes'][i]['label']
            
#             player_lbl = id_to_cls_val[label]
            
#             if bbox[0][0] > bbox[1][0] or bbox[0][1] > bbox[1][1]: 
#                 print("BBOX ERROR")
#                 continue
            
            
#             anno_line = [en+1, id_dict[os.path.basename(anno_dir)][label], 
#                          int(bbox[0][0]), int(bbox[0][1]), 
#                          int(bbox[1][0] - bbox[0][0]), int(bbox[1][1] - bbox[0][1]),
#                          1, 1, player_lbl]

#             anno_str = ','.join([str(x) for x in anno_line])     

#             gt_list.append(anno_str)
           
        
#     ### Create the output GT dir
#     output_dir = os.path.join('../data/mot_data/images/train/', os.path.basename(anno_dir))
#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)
#     output_dir = os.path.join(output_dir, 'gt')
#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)


#     ### Write the detection to the file gt.txt
#     with open(os.path.join(output_dir, 'gt.txt'), 'w') as f:
#         for x in gt_list:
#             f.writelines(x + '\n')       

### 1.2 Create gt.txt with all teams having its own label

In [7]:
gt_list = []
anno_dirs = glob.glob('../data/raw_data/*')

_, id_to_cls_val = get_all_team_classes(id_dict)

for anno_dir in tqdm(anno_dirs):
    all_jsons = sorted(glob.glob(anno_dir + '/*.json'))

    
    gt_list = []
    cls_en = 0
    for en, single_json in enumerate(all_jsons):
        data = json.load(open(single_json))

        for i in range(len(data['shapes'])):
            bbox = data['shapes'][i]['points']  
            label = data['shapes'][i]['label']
            
            if bbox[0][0] > bbox[1][0] or bbox[0][1] > bbox[1][1]: 
                continue
                
            track_label = id_dict[os.path.basename(anno_dir)][label]
            player_lbl = id_to_cls_val[track_label]
            
            
            anno_line = [en+1, track_label, 
                         int(bbox[0][0]), int(bbox[0][1]), 
                         int(bbox[1][0] - bbox[0][0]), int(bbox[1][1] - bbox[0][1]),
                         1, 1, player_lbl]

            anno_str = ','.join([str(x) for x in anno_line])     

            gt_list.append(anno_str)
           
        
    ### Create the output GT dir
    output_dir = os.path.join('../data/mot_data/images/train/', os.path.basename(anno_dir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_dir = os.path.join(output_dir, 'gt')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


    ### Write the detection to the file gt.txt
    with open(os.path.join(output_dir, 'gt.txt'), 'w') as f:
        for x in gt_list:
            f.writelines(x + '\n')       

Clustering all teams in progress...


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=41.0), HTML(value='')))

0 ../data/raw_data/2020.02.13-Colorado_at_Oregon Counter({1: 1000, 0: 982}) 0.15113350125944583
1 ../data/raw_data/2018.11.27-Indiana_at_Duke Counter({1: 970, 0: 924}) 0.0
2 ../data/raw_data/2019-02-11_Virginia_at_North_Carolina Counter({1: 1232, 0: 527}) 0.0
3 ../data/raw_data/2020.02.22-Michigan_at_Purdue Counter({1: 1275, 0: 542}) 0.0
4 ../data/raw_data/2019.01.22-Duke_at_Pittsburgh Counter({0: 1000, 1: 954}) 0.051150895140664954
5 ../data/raw_data/2020.02.25-NorthCarolinaState_at_NorthCarolina Counter({1: 988, 0: 952}) 0.0
6 ../data/raw_data/2019.02.26-Duke_at_VirginiaTech Counter({1: 1112, 0: 761}) 0.0
7 ../data/raw_data/2019.03.14-ACC-Syracuse_at_Duke Counter({1: 882, 0: 842}) 0.0
8 ../data/raw_data/2020.02.15-NotreDame_at_Duke Counter({1: 1005, 0: 730}) 0.0576036866359447
9 ../data/raw_data/2020.02.04-Duke_at_BostonCollege Counter({0: 939, 1: 919}) 0.0
10 ../data/raw_data/2020.02.10-FloridaState_at_Duke Counter({1: 976, 0: 711}) 0.0
11 ../data/raw_data/2020-02-15-Virginia_at_Nor

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=41.0), HTML(value='')))




### 1.3 Create gt.txt with ball pocession information and jersey number 

In [6]:
gt_list = []
anno_dirs = glob.glob('../data/raw_data/*')
jersey_dir = '../data/second_task/'


_, id_to_cls_val = get_all_team_classes(id_dict)

for anno_dir in tqdm(anno_dirs):
    
    jersey_anno = os.path.join(jersey_dir, os.path.basename(anno_dir))
    
    all_jsons = sorted(glob.glob(anno_dir + '/*.json'))

    
    ### Iterate through all frames of current directory
    cls_en = 0
    gt_list = []
    curr_labels = set()
    for en, single_json in enumerate(all_jsons):
        data = json.load(open(single_json))
        
        jersey_file = os.path.join(jersey_anno, os.path.basename(single_json).replace('frame_', ''))
        
        if os.path.exists(jersey_file):
            jersey_data = json.load(open(jersey_file))   
        
            ### Map each track for current frame to its existing information, such as Ball Pocession, Jersey Number, Position on Court
            jersey_dict = {}
            for i in range(len(jersey_data['shapes'])):
                bbox = jersey_data['shapes'][i]['points']  
                label = jersey_data['shapes'][i]['label']

                lbl_split = label.split('_')
                if 'j' in label:

                    if len(lbl_split)==4:
                        jersey_num = lbl_split[-1]
                        track_id  = '_'.join([lbl_split[1], lbl_split[2]])
                    else:
                        _, track_id, jersey_num = lbl_split


                    if not track_id in jersey_dict:
                        jersey_dict[str(track_id)] = [jersey_num, 0]
                    else:
                        jersey_dict[str(track_id)][0] = jersey_num

                elif 'b' in label:
                    
                    if len(lbl_split)==3:
                        track_id  = '_'.join([lbl_split[1], lbl_split[2]])
                    else:
                        _, track_id = lbl_split
                        
                    if not track_id in jersey_dict:
                        jersey_dict[track_id] = [None, 1]
                    else:
                        jersey_dict[track_id][1] = 1

                        
        for i in range(len(data['shapes'])):
            bbox = data['shapes'][i]['points']  
            label = data['shapes'][i]['label']
            curr_labels.add(label)
            
            if bbox[0][0] > bbox[1][0] or bbox[0][1] > bbox[1][1]: 
                continue
                
            track_label = id_dict[os.path.basename(anno_dir)][label]
            team_lbl = id_to_cls_val[track_label]
            
            if os.path.exists(jersey_file):
                jersey_num, ball_poc = jersey_dict.get(label, [None, 0])
            
            anno_line = [en+1, track_label, 
                         int(bbox[0][0]), int(bbox[0][1]), 
                         int(bbox[1][0] - bbox[0][0]), int(bbox[1][1] - bbox[0][1]),
                         1, 1, team_lbl, ball_poc]

            anno_str = ','.join([str(x) for x in anno_line])     

            gt_list.append(anno_str)


    
    ### Create the output GT dir
    output_dir = os.path.join('../data/mot_data/images/train/', os.path.basename(anno_dir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_dir = os.path.join(output_dir, 'gt')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)


    ### Write the detection to the file gt.txt
    with open(os.path.join(output_dir, 'gt.txt'), 'w') as f:
        for x in gt_list:
            f.writelines(x + '\n')       

Clustering all teams in progress...


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=41.0), HTML(value='')))

0 ../data/raw_data/2020.02.13-Colorado_at_Oregon Counter({0: 1000, 1: 982}) 0.15113350125944583
1 ../data/raw_data/2018.11.27-Indiana_at_Duke Counter({0: 970, 1: 924}) 0.0
2 ../data/raw_data/2019-02-11_Virginia_at_North_Carolina Counter({0: 1232, 1: 527}) 0.0
3 ../data/raw_data/2020.02.22-Michigan_at_Purdue Counter({1: 1275, 0: 542}) 0.0
4 ../data/raw_data/2019.01.22-Duke_at_Pittsburgh Counter({1: 1000, 0: 954}) 0.051150895140664954
5 ../data/raw_data/2020.02.25-NorthCarolinaState_at_NorthCarolina Counter({1: 988, 0: 952}) 0.0
6 ../data/raw_data/2019.02.26-Duke_at_VirginiaTech Counter({1: 1112, 0: 761}) 0.0
7 ../data/raw_data/2019.03.14-ACC-Syracuse_at_Duke Counter({0: 882, 1: 842}) 0.0
8 ../data/raw_data/2020.02.15-NotreDame_at_Duke Counter({1: 1005, 0: 730}) 0.0576036866359447
9 ../data/raw_data/2020.02.04-Duke_at_BostonCollege Counter({0: 939, 1: 919}) 0.0
10 ../data/raw_data/2020.02.10-FloridaState_at_Duke Counter({0: 976, 1: 711}) 0.0
11 ../data/raw_data/2020-02-15-Virginia_at_Nor

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=41.0), HTML(value='')))




In [13]:
### ANNO FORMAT
### frame_number obj_ID x y width height 1 (conf score) 1 (obj type) visibility_ratio

### Copy frames to mot_data

In [76]:
### To understand which frame to copy, we need to create the set of all available frames

In [27]:
anno_dirs = glob.glob('../data/raw_data/*')

set_of_all = set()
for anno_dir in anno_dirs:
    all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
    for js in all_jsons:
        x = '/'.join(js.split('/')[-2:])
        set_of_all.add(x)

In [28]:
all_dirs = glob.glob('../data/mot_data/images/train/*')
orig_frames = os.listdir('../../data/playerTrackingFrames')


for dr in all_dirs:
    
    if os.path.basename(dr) in orig_frames:
        orig_dir = os.path.join('../../data/playerTrackingFrames', os.path.basename(dr))

        dest_dir = os.path.join(dr, 'img1')
        
        
        if os.path.exists(dest_dir):
            if not os.path.isdir(dest_dir):
                os.remove(dest_dir)

In [29]:
all_dirs = glob.glob('../data/mot_data/images/train/*')
orig_frames = os.listdir('../../data/playerTrackingFrames')


for dr in all_dirs:
    
    if os.path.basename(dr) in orig_frames:
        orig_dir = os.path.join('../../data/playerTrackingFrames', os.path.basename(dr))

        dest_dir = os.path.join(dr, 'img1')

        if os.path.exists(dest_dir):
            shutil.rmtree(dest_dir)
            os.makedirs(dest_dir)
        else:
            os.makedirs(dest_dir)
        
        curr_imgs = glob.glob(orig_dir + '/*.jpg')
        for img in curr_imgs:
            x = '/'.join(img.split('/')[-2:]).replace('.jpg', '.json')
            if x in set_of_all:
                shutil.copy2(img, dest_dir)


### Rename image files. IMPORTANT! Frames should start from 1.

In [30]:
all_dirs = glob.glob('../data/mot_data/images/train/*')

for dr in all_dirs:
    img_dr = os.path.join(dr, 'img1')
    curr_imgs = sorted(glob.glob(img_dr + '/*.jpg'))
    
    for en, img_path in enumerate(curr_imgs):
        base = os.path.basename(img_path)
        new_base = f"{en+1:06d}.jpg"
        os.rename(img_path, img_path.replace(base, new_base))

### Create a custom.train file, don't know why :)

In [31]:
all_dirs = glob.glob('../data/mot_data/images/train/*')
all_dirs = sorted(all_dirs)


train_dirs = all_dirs[:int(0.9*len(all_dirs))]
val_dirs = all_dirs[int(0.9*len(all_dirs)):]
print(len(train_dirs), len(val_dirs))

output = []
for dr in train_dirs:
    curr_files = sorted(glob.glob(dr + '/img1/*.jpg'))
    for f in curr_files:
        output.append(f.replace('../data/', ''))
             
with open('./src/data/custom.train', 'w') as f:
    for l in output:
        f.writelines(l + '\n')
        
print(len(output))
        

output = []
for dr in val_dirs:
    curr_files = sorted(glob.glob(dr + '/img1/*.jpg'))
    for f in curr_files:
        output.append(f.replace('../data/', ''))
             
with open('./src/data/custom.val', 'w') as f:
    for l in output:
        f.writelines(l + '\n')
        
print(len(output))

36 5
6505
1000


In [36]:
global_dirs = glob.glob('/media/data_disc/data/videos1/videos/*')
global_dirs = sorted(global_dirs)
global_dirs = [os.path.basename(x).replace('.mp4', '') for x in global_dirs]
all_ = [os.path.basename(x) for x in all_dirs]
(set(global_dirs) - set(all_))

{'2019-11-24_Virginia_at_ArizonaState',
 '2019.01.14-Syracuse_at_Duke',
 '2019.03.02-NorthCarolina_at_Clemson',
 '2019.11.27-NorthCarolina_at_Alabama',
 '2019.12.04-OhioState_at_NorthCarolina',
 '2019.12.14-GeorgiaTech_at_Kentucky',
 '2020-02-08_Virginia_at_Louisville',
 '2020.01.14-Duke_at_Clemson',
 '2020.02.17-NorthCarolina_at_NotreDame',
 '2020.02.22-NorthCarolina_at_Louisville',
 '2020.02.22-Oregon_at_Arizona',
 '2020_01-20_NCState_at_Virginia'}

### Create cfg file

In [38]:
cfg = {}

cfg['root'] = '/home/ubuntu/oljike/PlayerTracking/data'
cfg['train'] = {}
cfg['train']['custom'] = './data/custom.train'
cfg['test'] = {}
cfg['test']['custom'] = './data/custom.val'
cfg['test_emb'] = './data/custom.val'


with open('src/lib/cfg/custom.json','w') as f:
    json.dump(cfg, f)

### Create team color labels and check them visually

In [4]:
def get_team_color_labels(anno_dir, all_jsons):
    all_labels = []
    all_hists = []

    
    orig_dir = os.path.join('../../data/player_tracking_frames', os.path.basename(anno_dir))
    for single_json in all_jsons:
        data = json.load(open(single_json))

        ### Read the image
        img_path = os.path.join(orig_dir, os.path.basename(single_json).replace('.json', '.jpg')) 
        img0 = cv2.imread(img_path)
        h,w,_ = img0.shape

        for i in range(len(data['shapes'])):
            pts = np.array(data['shapes'][i]['points']).astype(int)

            if pts[0][1] > pts[1][1] or pts[0][0] > pts[1][0]: continue
            center_y = int((pts[1][1] + pts[0][1]) / 2)
            center_x = int((pts[1][0] + pts[0][0]) / 2)

            img_box = img0[max(0,center_y - 30): min(h, center_y + 30), 
                           max(0, center_x - 10): min(w, center_x + 10), :]
            cv2.imwrite('small.jpg', img_box)

            img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2HSV)

            hist = cv2.calcHist([img_box], [0], None, [24],
                                [0, 300])
            hist = cv2.normalize(hist, hist).flatten()

            all_hists.append(hist)
            all_labels.append(data['shapes'][i]['label'])

    concat_hists = np.concatenate(all_hists)
    print(hist.shape)
    km = KMeans(n_clusters=2, init="k-means++", max_iter=10000).fit(all_hists)
    print(Counter(km.labels_))
    proc_cls, id_to_cls_val = post_process_cls(km.labels_, all_labels)
    print(Counter(proc_cls))
    return proc_cls, id_to_cls_val

# anno_dir = glob.glob('../data/raw_data/*')[24]
# all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
# proc_cls, id_to_cls_val = get_team_color_labels(anno_dir, all_jsons)

In [188]:
h, w, _ = 720, 1280, 0
out = cv2.VideoWriter('team_label_output.mp4',cv2.VideoWriter_fourcc(*'MP4V'), 3, (w,h))

orig_dir = os.path.join('../../data/player_tracking_frames', os.path.basename(anno_dir))

en = 0
for single_json in all_jsons:
    data = json.load(open(single_json))
    
    ### Read the image
    img_path = os.path.join(orig_dir, os.path.basename(single_json).replace('.json', '.jpg')) 
    img = cv2.imread(img_path)
    
    for shape in data['shapes']:
        bbox = np.array([[int(x) for x in y] for y in shape['points']])
        
        bbox = bbox.flatten()
   
        color = get_color(id_to_cls_val[shape['label']])
        
        cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, thickness=1)
        cv2.putText(img, shape['label'], (bbox[0], max(0, bbox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), thickness=1)
       
        en += 1
    out.write(img)
out.release()

In [None]:
Video('team_label_output.mp4')

### Visual Check

In [211]:
### Visual check
h, w, _ = 720, 1280, 0
out = cv2.VideoWriter('team_label_output.mp4',cv2.VideoWriter_fourcc(*'MP4V'), 3, (w,h))


anno_dir = glob.glob('../data/raw_data/*')[1]
all_jsons = sorted(glob.glob(anno_dir + '/*.json'))
orig_dir = os.path.join('../../data/player_tracking_frames', os.path.basename(anno_dir))
en = 0

for single_json in all_jsons:
    data = json.load(open(single_json))
    
    ### Read the image
    img_path = os.path.join(orig_dir, os.path.basename(single_json).replace('.json', '.jpg')) 
    img = cv2.imread(img_path)
    
    for shape in data['shapes']:
        bbox = np.array([[int(x) for x in y] for y in shape['points']])
        label = shape['label']
        bbox = bbox.flatten()
        track_label = id_dict[os.path.basename(anno_dir)][label]
        player_lbl = id_to_cls_val[track_label]
#         color = get_color(player_lbl)
        
        cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255,0,0), thickness=1)
        cv2.putText(img, str(player_lbl), (bbox[0], max(0, bbox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), thickness=5)
       
        en += 1
    out.write(img)
out.release()

In [212]:
Video('team_label_output.mp4')