In [1]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
in_file = "Data/preprocess/VideoAnnotations.ndjson"
out_file = "Data/preprocess/annotations.csv"

In [3]:
with open(in_file, 'r') as f:
    all_video_data = [json.loads(line) for line in f]

In [4]:
def rename_gaze(direction):
    if direction in ["person_1", "face_1"]:     return 'head1'
    elif direction in ["person_2", "face_2"]:   return 'head2'
    elif direction == 'object_1':               return 'object1'
    elif direction == 'object_2':               return 'object2'
    else: return direction

In [5]:
bounding_box_data = []

for each_video in tqdm(all_video_data):
    video_name = each_video['data_row']['external_id']
    all_labels = each_video['projects']['clit3zloh00k4071d6x1lc5ej']['labels']
    if all_labels:
        all_frames = all_labels[0]['annotations']['frames']
        face1_gaze_direction = None
        face2_gaze_direction = None
        ## there are 90 frames per video
        for frame_id in [str(i) for i in range(1, 91)]:
            frame = all_frames[frame_id]
            gazes = frame['classifications']
            for gaze in gazes:
                if gaze['name'] == 'face1_gaze_direction':
                    face1_gaze_direction = rename_gaze(gaze['radio_answer']['value'])
                if gaze['name'] == 'face2_gaze_direction':
                    face2_gaze_direction = rename_gaze(gaze['radio_answer']['value'])
            ## loop through each label in a single frame
            for annotation_id in frame['objects'].keys():
                label_name = frame['objects'][annotation_id]['name']
                if label_name not in ['face1', 'face2']:
                    bounding_box = frame['objects'][annotation_id]['bounding_box']
                    top = int(bounding_box['top'])
                    left = int(bounding_box['left'])
                    height = int(bounding_box['height'])
                    width = int(bounding_box['width'])
                    bottom = top + height
                    right = left + width
                    center = np.array((left + (width/2), top + (height/2)))
                    
                    gaze_direction = None
                    if label_name == 'head1': gaze_direction = f"head1, {face1_gaze_direction}"
                    elif label_name == 'head2': gaze_direction = f"head2, {face2_gaze_direction}"
                        
                        
                    
                    frame_data = {'video_name': video_name,
                                  'frame': frame_id,
                                  'label_name': label_name,
                                  'left': left,
                                  'top' : top,
                                  'right': right,
                                  'bottom': bottom,
                                  'height': height, 'width': width,
                                  'x_center': center[0],
                                  'y_center': center[1],
                                  'gaze_direction': gaze_direction
                                  }
                    if not gaze_direction and (label_name in ['head1','head2']):
                        raise ValueError(f'gaze direction is None, {frame_data}')
                    
                    bounding_box_data.append(frame_data)

100%|██████████| 250/250 [00:00<00:00, 884.70it/s] 


In [6]:
bounding_box_data[-1]

{'video_name': 'yt-k6Ildfvd4wA_76.mp4',
 'frame': '90',
 'label_name': 'head2',
 'left': 363,
 'top': 5,
 'right': 500,
 'bottom': 262,
 'height': 257,
 'width': 137,
 'x_center': 431.5,
 'y_center': 133.5,
 'gaze_direction': 'head2, head1'}

In [7]:
df = pd.DataFrame(bounding_box_data)
df

Unnamed: 0,video_name,frame,label_name,left,top,right,bottom,height,width,x_center,y_center,gaze_direction
0,-YwZOeyAQC8_15.mp4,1,head1,0,101,94,228,127,94,47.0,164.5,"head1, neither"
1,-YwZOeyAQC8_15.mp4,1,head2,86,211,155,286,75,69,120.5,248.5,"head2, object1"
2,-YwZOeyAQC8_15.mp4,1,object1,189,279,243,327,48,54,216.0,303.0,
3,-YwZOeyAQC8_15.mp4,2,head1,0,101,93,225,124,93,46.5,163.0,"head1, neither"
4,-YwZOeyAQC8_15.mp4,2,head2,86,211,154,285,74,68,120.0,248.0,"head2, object1"
...,...,...,...,...,...,...,...,...,...,...,...,...
59879,yt-k6Ildfvd4wA_76.mp4,88,head2,363,5,500,262,257,137,431.5,133.5,"head2, head1"
59880,yt-k6Ildfvd4wA_76.mp4,89,head1,0,68,68,290,222,68,34.0,179.0,"head1, head2"
59881,yt-k6Ildfvd4wA_76.mp4,89,head2,363,5,500,262,257,137,431.5,133.5,"head2, head1"
59882,yt-k6Ildfvd4wA_76.mp4,90,head1,0,68,68,290,222,68,34.0,179.0,"head1, head2"


In [8]:
df.to_csv(out_file, index=False)