## Packages

In [None]:
import shutil
import json
import csv
import os
import re
from natsort import natsorted
from tqdm import tqdm

## Label data analysis

In [None]:
with open('L_SIT/20201030_dog-sit-000273.mp4.json', encoding='utf-8') as f:
  label_sample = json.load(f)

In [None]:
print(label_sample.keys())

name, meta, anno = label_sample.values()
print(f'''
video name: {name}
seq #     : {meta['seq']}
action    : {meta['action']}
emotion   : {meta['inspect']['emotion']}
height    : {meta['height']}
width     : {meta['width']}
frames    : {len(anno)}
          ''')

In [None]:
anno[0]['bounding_box']

## Data preprocessing

In [None]:
action_list = [ 'BODYLOWER', 'SIT' ]
emo_list = [ '공격성', '공포', '불안/슬픔', '편안/안정', '행복/즐거움', '화남/불쾌' ]

In [None]:
##########################################################################
# label.csv
#-------------------------------------------------------------------------
# filename : '{seq}_{frame}.jpg'
# seq      : sequence ID number; representing same video source
# frame_idx: frame ID number; 0, 1, 2, ...
# action   : action class number
# emotion  : emotion class number
# bbox     : bounding box; (x1, y1, x2, y2); starting/ending point
# keypoints: keypoints; [(x, y, v), ... ], v for visibility
##########################################################################
l = open('label.csv', 'w', newline='')
lr = csv.writer(l)
lr.writerow(['file_name', 'seq', 'frame_idx', 'action', 'emotion', 'bbox', 'keypoints'])

# directory for training images
try:
    os.mkdir('train')
except:
    ...

# error log
err = open('error.csv', 'w', newline='')
er = csv.writer(err)

In [None]:
# for each Action folder
for i, act in enumerate(action_list):

    # for each video
    for video in tqdm(os.listdir(f'O_{act}'), desc=f'{act:10}'):
        if video == '.ipynb_checkpoints':
            continue

        # extract informations from label file
        try:
            with open(f'L_{act}/{video}.json', 'r', encoding='utf-8') as f:
                label = json.load(f)
                _, meta, anno = label.values()
                seq = int(meta['seq'])
                emotion = emo_list.index(meta['inspect']['emotion'])
        except Exception as e:
            er.writerow([e])
            continue

        # for each frame
        for j, frame_name in enumerate(natsorted(os.listdir(f'O_{act}/{video}'))):
            # find annotation index
            temp = re.split('[_.]', frame_name)
            N_fr = int(temp[1])
            N_ts = int(temp[3])
            img_name = f'{seq}_{N_fr}.jpg'

            # get annotation data
            keys = dict()
            bbox = dict()
            for frame in anno:
                if frame['frame_number'] == N_fr and frame['timestamp'] == N_ts:
                    keys = frame['keypoints']
                    bbox = frame['bounding_box']
                    break
            
            # bounding box
            x1, y1, w, h = bbox.values()
            x2 = x1 + w
            y2 = y1 + h

            # keypoints
            keylist = []
            for key in keys.values():
                keylist.append([key['x'], key['y'], 1] if type(key) == dict else [0, 0, 0])

            # copy images to new directory and rename
            shutil.copyfile(f'O_{act}/{video}/{frame_name}', f'train/{img_name}')

            # label.csv logging
            lr.writerow([img_name, seq, j, i, emotion, [x1, y1, x2, y2], keylist])
l.close()
err.close()