## 1. Packages

In [1]:
import shutil
import json
import csv
import os
import re
import pickle
import pandas as pd
import torch
import torch.nn.functional as F
from natsort import natsorted
from tqdm import tqdm
from utils.data import to_tensor, norm_keys

## 2. Label Data Analysis

In [2]:
with open('Training/L_SIT/20201030_dog-sit-000273.mp4.json', encoding='utf-8') as f:
    label_sample = json.load(f)

In [3]:
print(label_sample.keys())

name, meta, anno = label_sample.values()
print(f'''
video name: {name}
seq #     : {meta['seq']}
action    : {meta['action']}
emotion   : {meta['inspect']['emotion']}
height    : {meta['height']}
width     : {meta['width']}
frames    : {len(anno)}
          ''')

dict_keys(['file_video', 'metadata', 'annotations'])

video name: dog-sit-000273.mp4
seq #     : 273
action    : 앉기
emotion   : 행복/즐거움
height    : 1280
width     : 720
frames    : 86
          


In [4]:
anno[0]['bounding_box']

{'x': 6, 'y': 311, 'width': 592, 'height': 766}

## 3. Data preprocessing
    - Train: 39537
    - Val  : 2474
    - Test : 2475

In [6]:
action_list = [ 'BODYLOWER', 'BODYSCRATCH', 'BODYSHAKE', 'FEETUP', 'FOOTUP', 'HEADING',
                'LYING', 'MOUNTING', 'SIT', 'TAILING', 'TAILLOW', 'TURN', 'WALKRUN' ]
emo_list = [ '공격성', '공포', '불안/슬픔', '편안/안정', '행복/즐거움', '화남/불쾌' ]

In [7]:
##########################################################################
# label.csv
#-------------------------------------------------------------------------
# filename : '{seq}_{frame}.jpg'
# seq      : sequence ID number; representing same video source
# frame_idx: frame ID number; 0, 1, 2, ...
# action   : action class number
# emotion  : emotion class number
# bbox     : bounding box; (x1, y1, x2, y2); starting/ending point
# keypoints: keypoints; [(x, y, v), ... ], v for visibility
##########################################################################

In [8]:
for i in range(2):
    source = ['Training', 'Validation'][i]
    target = ['train', 'val'][i]
    
    label_log = open(os.path.join(target, 'label.csv'), 'w', newline='')
    lr = csv.writer(label_log)
    lr.writerow(['file_name', 'seq', 'frame_idx', 'action', 'emotion', 'bbox', 'keypoints'])
    
    error_log = open(os.path.join(target, 'error.csv'), 'w', newline='')
    er = csv.writer(error_log)
    
    # for each Action folder
    for i, act in enumerate(action_list):
        path_label = os.path.join(source, f'L_{act}')
        path_origin= os.path.join(source, f'O_{act}')

        # for each video
        for video in tqdm(os.listdir(path_origin), desc=f'{act:10}'):
            if video == '.ipynb_checkpoints':
                continue

            # extract informations from label file
            try:
                with open(f'{path_label}/{video}.json', 'r', encoding='utf-8') as f:
                    label = json.load(f)
                    _, meta, anno = label.values()
                    seq = int(meta['seq'])
                    emotion = emo_list.index(meta['inspect']['emotion'])
            except Exception as e:
                er.writerow([e])
                continue

            # for each frame
            for j, frame_name in enumerate(natsorted(os.listdir(f'{path_origin}/{video}'))):
                # find annotation index
                temp = re.split('[_.]', frame_name)
                N_fr = int(temp[1])
                N_ts = int(temp[3])
                img_name = f'{seq}_{N_fr}.jpg'

                # get annotation data
                keys = dict()
                bbox = dict()
                for frame in anno:
                    if frame['frame_number'] == N_fr and frame['timestamp'] == N_ts:
                        keys = frame['keypoints']
                        bbox = frame['bounding_box']
                        break

                # bounding box
                x1, y1, w, h = bbox.values()
                x2 = x1 + w
                y2 = y1 + h

                # keypoints
                keylist = []
                for key in keys.values():
                    keylist.append([key['x'], key['y'], 1] if type(key) == dict else [0, 0, 0])

                # copy images to new directory and rename
                shutil.move(f'{path_origin}/{video}/{frame_name}', f'{target}/{img_name}')

                # label.csv logging
                lr.writerow([img_name, seq, j, i, emotion, [x1, y1, x2, y2], keylist])
    label_log.close()
    error_log.close()

BODYLOWER  : 100%|█████████████████████████████████████████████████████████████████| 6392/6392 [00:59<00:00, 107.25it/s]
BODYSCRATCH: 100%|█████████████████████████████████████████████████████████████████| 1228/1228 [00:18<00:00, 67.40it/s]
BODYSHAKE  : 100%|█████████████████████████████████████████████████████████████████| 1327/1327 [00:18<00:00, 70.16it/s]
FEETUP     : 100%|█████████████████████████████████████████████████████████████████| 2748/2748 [00:38<00:00, 70.85it/s]
FOOTUP     : 100%|█████████████████████████████████████████████████████████████████| 4154/4154 [00:44<00:00, 67.09it/s]


## 4. Label Processing

In [9]:
label_train = pd.read_csv('data/train/label.csv')
label_train.head()

Unnamed: 0,file_name,seq,frame_idx,action,emotion,bbox,keypoints
0,28_0.jpg,28,0,0,3,"[145, 377, 540, 852]","[[326, 583, 1], [337, 498, 1], [356, 614, 1], ..."
1,28_12.jpg,28,1,0,3,"[154, 383, 540, 848]","[[325, 590, 1], [337, 498, 1], [356, 614, 1], ..."
2,28_102.jpg,28,2,0,3,"[137, 394, 524, 855]","[[317, 589, 1], [326, 492, 1], [344, 620, 1], ..."
3,28_108.jpg,28,3,0,3,"[137, 394, 524, 861]","[[317, 589, 1], [326, 492, 1], [344, 620, 1], ..."
4,28_114.jpg,28,4,0,3,"[132, 394, 524, 861]","[[313, 588, 1], [323, 492, 1], [344, 620, 1], ..."


In [11]:
new_label_train = pd.DataFrame(columns=['action', 'emotion', 'keys'])

for i, s in enumerate(tqdm(label_train['seq'].unique())):
    df = label_train[label_train['seq']==s]
    
    bbox = to_tensor(df['bbox'])
    keys = to_tensor(df['keypoints'])
    keys = norm_keys(bbox, keys)
    
    action, emotion = df[['action', 'emotion']].iloc[0]
    action = F.one_hot(torch.tensor([action]), num_action).float()
    emotion = F.one_hot(torch.tensor([emotion]), num_emotion).float()
    
    new_label_train.loc[i] = [keys, action, emotion]
    
new_label_train.head(3)

100%|███████████████████████████████████████████████████████████████████████████| 39532/39532 [04:30<00:00, 145.94it/s]


Unnamed: 0,action,emotion,keys
0,"[[[tensor(0.4582, dtype=torch.float64), tensor...","[[tensor(1.), tensor(0.), tensor(0.), tensor(0...","[[tensor(0.), tensor(0.), tensor(0.), tensor(1..."
1,"[[[tensor(0.4794, dtype=torch.float64), tensor...","[[tensor(1.), tensor(0.), tensor(0.), tensor(0...","[[tensor(0.), tensor(0.), tensor(0.), tensor(1..."
2,"[[[tensor(0.4735, dtype=torch.float64), tensor...","[[tensor(1.), tensor(0.), tensor(0.), tensor(0...","[[tensor(0.), tensor(0.), tensor(0.), tensor(1..."


In [13]:
label_val = pd.read_csv('data/val/label.csv')
new_label_val = pd.DataFrame(columns=['action', 'emotion', 'keys'])

for i, s in enumerate(tqdm(label_val['seq'].unique())):
    df = label_val[label_val['seq']==s]
    
    bbox = to_tensor(df['bbox'])
    keys = to_tensor(df['keypoints'])
    keys = norm_keys(bbox, keys)
    
    action, emotion = df[['action', 'emotion']].iloc[0]
    action = F.one_hot(torch.tensor([action]), num_action).float()
    emotion = F.one_hot(torch.tensor([emotion]), num_emotion).float()
    
    new_label_val.loc[i] = [keys, action, emotion]

100%|██████████████████████████████████████████████████████████████████████████████| 4949/4949 [02:52<00:00, 28.64it/s]


In [14]:
new_label_train.to_pickle('data/key_label_train')
new_label_val.to_pickle('data/key_label_val')