### 수행순서

#### 1. 얼굴 위치 정보, 나이, 감정정보가 있는 사물(얼굴)만 어노테이션으로 파싱
#### 2. 파싱한 어노테이션을 읽어와서 224x224가 넘는 얼굴만 가져옴

### 필요 라이브러리 tqdm, ray

### 준비사항
* 사전에 AiHub의 [멀티모달영상데이터](http://aihub.or.kr/aidata/137)를 다운로드하여 압축해제
* 압축 해제한 폴더는 CLIP_FOLDER에 셋팅해줌
* 현재는 zip파일 하나 당 처리하도록 구성되어 있음

In [50]:
import os
os.environ['CUDA_VISIBLE_DEVICES']=''
os.environ['BASE_FOLDER'] = os.getcwd()
BASE_FOLDER = os.getcwd()
print("BASE_FOLDER", BASE_FOLDER)

import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import dlib

import copy
import csv
import json
import pickle
import ipywidgets
from glob import glob
from tqdm import tqdm

import ray

# 병렬처리 라이브러리 호출
ray.shutdown()
# ray.init(num_cpus=8, num_gpus=0)
ray.init(webui_host='0.0.0.0')
#     memory=2000 * 1024 * 1024,
#     object_store_memory=200 * 1024 * 1024,
#     driver_object_store_memory=100 * 1024 * 1024)

## 1. 얼굴 위치 정보, 나이, 감정정보가 있는 사물(얼굴을 포함 한)만 어노테이션으로 파싱

In [2]:
# 파싱 된 비디오 프레임들과 어노테이션 파일이 출력 될 폴더 
OUTPUT_FOLDER = './result'

# 압축 풀린 폴더 선택
CLIP_FOLDER = '0001-0400'
# CLIP_FOLDER = '0801-1200'

# 압축 풀린 폴더 및의 파일들
CLIP_FOLDER_PATHS = glob(os.path.join(CLIP_FOLDER, '*'))

# 비디오 파일과 어노테이션 파일 경로 로드
clip_json_paths = glob(os.path.join(CLIP_FOLDER, "*/*.json"))
clip_mp4_paths = glob(os.path.join(CLIP_FOLDER, "*/*.mp4"))
clip_json_paths.sort(), clip_mp4_paths.sort()

(None, None)

In [49]:
BASE_FOLDER, CLIP_FOLDER

### 1.1 MP4파일들로 부터 비디오 프레임 생성 및 어노테이션 생성

In [4]:
def video_to_frames(clip_path:str):
    # "multi-modal-video"+"0001-0400/clip_324"
    
    output_paths = []
        
    # path setting
    # ['multi-modal-video', '0001-0400', 'clip_98']
    path = os.path.normpath(clip_path)
    split_paths = path.split(os.sep)
    
    # multi-modal-video/0001-0400/clip_98/clip_98.mp4
    video_path = os.path.join('/'+os.path.join(*split_paths), split_paths[-1]+'.mp4')

    # result/0001-0400/clip_98/frame_images
    output_folder_path = os.path.join(OUTPUT_FOLDER, *split_paths[-2:])
    
    # load video
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()

    # save video frames as jpg
    count = 1
    while success:
        os.makedirs(output_folder_path,exist_ok=True)
        output_path = os.path.join(output_folder_path,"%d.jpg" % count)
        output_paths.append(output_path)
        cv2.imwrite(output_path, image)     # save frame as JPEG file
        success, image = vidcap.read()
        #print('Read a new frame: ', success, output_path)
        count += 1
        
    return output_paths


def json_to_info(clip_full_path:str):
    """ 비디오 프레임들을 인물 정보와 매칭 시킨 어노테이션 생성
    inputs:
        clip_full_path: 비디오 클립 전체 폴더 경로
    outputs:
        results: dictionary, 매칭 된 어노테이션 정보
    
    """
    results = {}
    
    #make json path
    splited_path = os.path.split(clip_full_path)
    clip_json_path = os.path.join(clip_full_path, splited_path[-1]+'.json')    
    
    with open(clip_json_path, 'r') as f:
        o_json = json.load(f)
        
    #     # seq 대로 불러오기 위해 frame_idx를 사용
    #     data_len = len(o_json['data'])
    
    # 사람정보 읽어오기
    dict_actors = o_json['actor']
    
    for frame_idx in o_json['data']:
        #print('frame_id: ', frame_idx)
        data = o_json['data'][frame_idx]

        for obj_id in o_json['data'][frame_idx]:
            # o_json['data'][frame frame_idx][obj]
            
            #print('obj_id : ', obj_id)
            #print('o_json_frame_obj ' , o_json['data'][frame_idx][str(obj_id)])
            
            xtl = 0
            xbr = 0
            ytl = 0
            ybr = 0
            person_id = 0
            age = 0
            gender = ''
            emotion = ''
            try:
                person_id = copy.copy(o_json['data'][frame_idx][str(obj_id)]['person_id'])
                age       = copy.copy(dict_actors[person_id]['age'])
                gender    = copy.copy(dict_actors[person_id]['gender'])
                
                try:
                    img_emotion = o_json['data'][frame_idx][str(obj_id)]['emotion']['image']['emotion']
                    if img_emotion != "":
                        emotion   = copy.copy(o_json['data'][frame_idx][str(obj_id)]['emotion'])

                        xtl = int(round(float(o_json['data'][frame_idx][str(obj_id)]['xtl'])))
                        xbr = int(round(float(o_json['data'][frame_idx][str(obj_id)]['xbr'])))
                        ytl = int(round(float(o_json['data'][frame_idx][str(obj_id)]['ytl'])))
                        ybr = int(round(float(o_json['data'][frame_idx][str(obj_id)]['ybr'])))
                        #print(frame_idx, person_id, age, gender, emotion, xtl, xbr, ytl, ybr)
                        results[frame_idx] = {'person_id':person_id, 'age':age, 'gender':gender, 'emotion':emotion, 
                                          'xtl':xtl, 'xbr':xbr, 'ytl':ytl, 'ybr':ybr}                    
                    else:
                        continue
                except KeyError as e:
                    pass

            except KeyError as e:
                pass
    
    return results

@ray.remote
def parallel_write(clips:list):
    print(clips)
    
    for clip in clips:
        # ["0001-0400/clip_324",...]
    
        face_info = {}
        face_info_with_imgs = {}    

        print('clip_id: ', clip)
        face_info_with_imgs[clip] = {}

        # parsing video 
        # "multi-modal-video"+"0001-0400/clip_324"
        clip_full_path = os.path.join(BASE_FOLDER, clip)
        frame_paths = video_to_frames(clip_full_path)
        print('video_to_frames done!!')

        # parsing json
        person_infos = json_to_info(clip_full_path)
        print('json_to_info done!!')
        #     print(person_infos)

        total_frame_len = len(person_infos)
        for frame_idx in tqdm(person_infos.keys()):

            d_p_info = person_infos[str(frame_idx)]

            frame_image_path = os.path.join(OUTPUT_FOLDER, clip, str(frame_idx)+'.jpg')

            person_id = copy.copy(d_p_info['person_id'])
            age     = copy.copy(d_p_info['age'])
            gender  = copy.copy(d_p_info['gender'])
            emotion = copy.copy(d_p_info['emotion'])
            xtl = copy.copy(d_p_info['xtl'])
            xbr = copy.copy(d_p_info['xbr'])
            ytl = copy.copy(d_p_info['ytl'])
            ybr = copy.copy(d_p_info['ybr'])        

            face_info_with_imgs[clip][str(frame_idx)] = {'person_id':person_id, 'age':age, 'gender':gender, 'emotion':emotion, 
                              'xtl':xtl, 'xbr':xbr, 'ytl':ytl, 'ybr':ybr, 'frame_image_path':frame_image_path}


        mid_result_output_path = os.path.join(OUTPUT_FOLDER, clip+'.jsonpickle' )
        print(mid_result_output_path)

        with open(mid_result_output_path, 'wb') as f:
            pickle.dump(face_info_with_imgs, f)

In [7]:
%%time

# clip
## frame
parallelism = 100
sub_tak_idxes = len(CLIP_FOLDER_PATHS) // parallelism
remainings = len(CLIP_FOLDER_PATHS) - (sub_tak_idxes * parallelism)

ray.get([parallel_write.remote(CLIP_FOLDER_PATHS[i*sub_tak_idxes:(i+1)*sub_tak_idxes]) for i in range(100)])
if remainings != 0:
    parallel_write(CLIP_FOLDER_PATHS[-remainings:])

[2m[36m(pid=69931)[0m ['0001-0400/clip_333', '0001-0400/clip_334', '0001-0400/clip_335', '0001-0400/clip_336']
[2m[36m(pid=69931)[0m clip_id:  0001-0400/clip_333
[2m[36m(pid=69942)[0m ['0001-0400/clip_351', '0001-0400/clip_352', '0001-0400/clip_353', '0001-0400/clip_354']
[2m[36m(pid=69942)[0m clip_id:  0001-0400/clip_351
[2m[36m(pid=69952)[0m ['0001-0400/clip_315', '0001-0400/clip_316', '0001-0400/clip_317', '0001-0400/clip_318']
[2m[36m(pid=69952)[0m clip_id:  0001-0400/clip_315
[2m[36m(pid=69958)[0m ['0001-0400/clip_232', '0001-0400/clip_233', '0001-0400/clip_234', '0001-0400/clip_235']
[2m[36m(pid=69958)[0m clip_id:  0001-0400/clip_232
[2m[36m(pid=69935)[0m ['0001-0400/clip_283', '0001-0400/clip_284', '0001-0400/clip_285', '0001-0400/clip_286']
[2m[36m(pid=69935)[0m clip_id:  0001-0400/clip_283
[2m[36m(pid=69959)[0m ['0001-0400/clip_24', '0001-0400/clip_240', '0001-0400/clip_241', '0001-0400/clip_242']
[2m[36m(pid=69959)[0m clip_id:  0001-0400/cl

[2m[36m(pid=70004)[0m video_to_frames done!!
[2m[36m(pid=70004)[0m json_to_info done!!
[2m[36m(pid=70004)[0m ./result/0001-0400/clip_142.jsonpickle
100%|██████████| 941/941 [00:00<00:00, 28884.53it/s]
[2m[36m(pid=70004)[0m clip_id:  0001-0400/clip_143
[2m[36m(pid=69968)[0m video_to_frames done!!
[2m[36m(pid=69968)[0m json_to_info done!!
  0%|          | 0/905 [00:00<?, ?it/s]
[2m[36m(pid=69968)[0m ./result/0001-0400/clip_1.jsonpickle
[2m[36m(pid=69968)[0m clip_id:  0001-0400/clip_10
100%|██████████| 905/905 [00:00<00:00, 27592.10it/s]
[2m[36m(pid=69955)[0m video_to_frames done!!
[2m[36m(pid=69955)[0m json_to_info done!!
  0%|          | 0/882 [00:00<?, ?it/s]
[2m[36m(pid=69955)[0m ./result/0001-0400/clip_11.jsonpickle
[2m[36m(pid=69955)[0m clip_id:  0001-0400/clip_110
100%|██████████| 882/882 [00:00<00:00, 29871.02it/s]
[2m[36m(pid=69930)[0m video_to_frames done!!
[2m[36m(pid=69930)[0m json_to_info done!!
  0%|          | 0/1082 [00:00<?, ?it/s

[2m[36m(pid=69938)[0m ./result/0001-0400/clip_207.jsonpickle
[2m[36m(pid=69938)[0m clip_id:  0001-0400/clip_208
100%|██████████| 1390/1390 [00:00<00:00, 50568.41it/s]
[2m[36m(pid=69933)[0m video_to_frames done!!
[2m[36m(pid=69933)[0m json_to_info done!!
  0%|          | 0/1294 [00:00<?, ?it/s]
[2m[36m(pid=69933)[0m ./result/0001-0400/clip_243.jsonpickle
100%|██████████| 1294/1294 [00:00<00:00, 45634.73it/s]
[2m[36m(pid=69933)[0m clip_id:  0001-0400/clip_244
[2m[36m(pid=69935)[0m video_to_frames done!!
[2m[36m(pid=69935)[0m json_to_info done!!
  0%|          | 0/1295 [00:00<?, ?it/s]
[2m[36m(pid=69935)[0m ./result/0001-0400/clip_283.jsonpickle
100%|██████████| 1295/1295 [00:00<00:00, 25194.70it/s]
[2m[36m(pid=69935)[0m clip_id:  0001-0400/clip_284
[2m[36m(pid=69970)[0m video_to_frames done!!
[2m[36m(pid=69970)[0m json_to_info done!!
  0%|          | 0/1401 [00:00<?, ?it/s]
[2m[36m(pid=69970)[0m ./result/0001-0400/clip_377.jsonpickle
[2m[36m(pid=6

[2m[36m(pid=69983)[0m video_to_frames done!!
[2m[36m(pid=69983)[0m json_to_info done!!
[2m[36m(pid=70011)[0m video_to_frames done!!
  0%|          | 0/1609 [00:00<?, ?it/s]
[2m[36m(pid=69943)[0m video_to_frames done!!
[2m[36m(pid=69983)[0m ./result/0001-0400/clip_326.jsonpickle
[2m[36m(pid=69983)[0m clip_id:  0001-0400/clip_327
100%|██████████| 1609/1609 [00:00<00:00, 29099.36it/s]
[2m[36m(pid=69943)[0m json_to_info done!!
  0%|          | 0/1679 [00:00<?, ?it/s]
[2m[36m(pid=70011)[0m json_to_info done!!
[2m[36m(pid=69943)[0m ./result/0001-0400/clip_269.jsonpickle
100%|██████████| 1679/1679 [00:00<00:00, 29904.48it/s]
  0%|          | 0/1698 [00:00<?, ?it/s]
[2m[36m(pid=69943)[0m clip_id:  0001-0400/clip_27
[2m[36m(pid=70011)[0m ./result/0001-0400/clip_236.jsonpickle
100%|██████████| 1698/1698 [00:00<00:00, 24331.59it/s]
[2m[36m(pid=70011)[0m clip_id:  0001-0400/clip_237
[2m[36m(pid=70004)[0m video_to_frames done!!
[2m[36m(pid=70004)[0m json_to

[2m[36m(pid=69973)[0m video_to_frames done!!
[2m[36m(pid=69973)[0m json_to_info done!!
[2m[36m(pid=69973)[0m ./result/0001-0400/clip_194.jsonpickle
[2m[36m(pid=69973)[0m clip_id:  0001-0400/clip_195
100%|██████████| 741/741 [00:00<00:00, 37274.43it/s]
[2m[36m(pid=70014)[0m video_to_frames done!!
[2m[36m(pid=70014)[0m json_to_info done!!
  0%|          | 0/2367 [00:00<?, ?it/s]
[2m[36m(pid=70014)[0m ./result/0001-0400/clip_135.jsonpickle
[2m[36m(pid=70014)[0m clip_id:  0001-0400/clip_136
100%|██████████| 2367/2367 [00:00<00:00, 28243.47it/s]
[2m[36m(pid=69955)[0m video_to_frames done!!
[2m[36m(pid=69955)[0m json_to_info done!!
  0%|          | 0/1156 [00:00<?, ?it/s]
[2m[36m(pid=69955)[0m ./result/0001-0400/clip_110.jsonpickle
[2m[36m(pid=69955)[0m clip_id:  0001-0400/clip_111
100%|██████████| 1156/1156 [00:00<00:00, 46912.70it/s]
[2m[36m(pid=69965)[0m video_to_frames done!!
[2m[36m(pid=69965)[0m json_to_info done!!
[2m[36m(pid=69965)[0m ./re

[2m[36m(pid=70005)[0m json_to_info done!!
[2m[36m(pid=70005)[0m ./result/0001-0400/clip_14.jsonpickle
[2m[36m(pid=70005)[0m clip_id:  0001-0400/clip_140
100%|██████████| 616/616 [00:00<00:00, 35626.31it/s]
[2m[36m(pid=70001)[0m video_to_frames done!!
[2m[36m(pid=69961)[0m video_to_frames done!!
[2m[36m(pid=69961)[0m json_to_info done!!
  0%|          | 0/1168 [00:00<?, ?it/s]
[2m[36m(pid=70001)[0m json_to_info done!!
[2m[36m(pid=70001)[0m ./result/0001-0400/clip_103.jsonpickle
[2m[36m(pid=70001)[0m clip_id:  0001-0400/clip_104
100%|██████████| 1194/1194 [00:00<00:00, 47759.82it/s]
[2m[36m(pid=69961)[0m ./result/0001-0400/clip_277.jsonpickle
[2m[36m(pid=69961)[0m clip_id:  0001-0400/clip_278
100%|██████████| 1168/1168 [00:00<00:00, 24390.71it/s]
[2m[36m(pid=69979)[0m video_to_frames done!!
[2m[36m(pid=69943)[0m video_to_frames done!!
[2m[36m(pid=69979)[0m json_to_info done!!
[2m[36m(pid=69943)[0m json_to_info done!!
[2m[36m(pid=69943)[0m .

[2m[36m(pid=69978)[0m video_to_frames done!!
[2m[36m(pid=69944)[0m video_to_frames done!!
[2m[36m(pid=69951)[0m video_to_frames done!!
[2m[36m(pid=69978)[0m json_to_info done!!
  0%|          | 0/1573 [00:00<?, ?it/s]
[2m[36m(pid=69978)[0m ./result/0001-0400/clip_266.jsonpickle
[2m[36m(pid=69978)[0m clip_id:  0001-0400/clip_267
100%|██████████| 1573/1573 [00:00<00:00, 40060.48it/s]
[2m[36m(pid=69951)[0m json_to_info done!!
  0%|          | 0/1413 [00:00<?, ?it/s]
[2m[36m(pid=69944)[0m json_to_info done!!
  0%|          | 0/1386 [00:00<?, ?it/s]
[2m[36m(pid=69951)[0m ./result/0001-0400/clip_36.jsonpickle
[2m[36m(pid=69951)[0m clip_id:  0001-0400/clip_360
100%|██████████| 1413/1413 [00:00<00:00, 25552.42it/s]
[2m[36m(pid=69944)[0m ./result/0001-0400/clip_211.jsonpickle
[2m[36m(pid=69944)[0m clip_id:  0001-0400/clip_212
100%|██████████| 1386/1386 [00:00<00:00, 28772.61it/s]
[2m[36m(pid=69996)[0m video_to_frames done!!
[2m[36m(pid=69996)[0m json_to

[2m[36m(pid=69990)[0m json_to_info done!!
  0%|          | 0/1436 [00:00<?, ?it/s]
[2m[36m(pid=69990)[0m ./result/0001-0400/clip_162.jsonpickle
[2m[36m(pid=69990)[0m clip_id:  0001-0400/clip_163
100%|██████████| 1436/1436 [00:00<00:00, 30750.72it/s]
[2m[36m(pid=69942)[0m video_to_frames done!!
[2m[36m(pid=69942)[0m json_to_info done!!
  0%|          | 0/1821 [00:00<?, ?it/s]
[2m[36m(pid=69942)[0m ./result/0001-0400/clip_352.jsonpickle
[2m[36m(pid=69942)[0m clip_id:  0001-0400/clip_353
100%|██████████| 1821/1821 [00:00<00:00, 49014.47it/s]
[2m[36m(pid=69992)[0m video_to_frames done!!
[2m[36m(pid=69992)[0m json_to_info done!!
  0%|          | 0/2095 [00:00<?, ?it/s]
[2m[36m(pid=69992)[0m ./result/0001-0400/clip_349.jsonpickle
[2m[36m(pid=69992)[0m clip_id:  0001-0400/clip_35
100%|██████████| 2095/2095 [00:00<00:00, 36389.89it/s]
[2m[36m(pid=70006)[0m video_to_frames done!!
[2m[36m(pid=70006)[0m json_to_info done!!
  0%|          | 0/1111 [00:00<?, ?

[2m[36m(pid=70010)[0m json_to_info done!!
  0%|          | 0/1478 [00:00<?, ?it/s]
[2m[36m(pid=70010)[0m ./result/0001-0400/clip_249.jsonpickle
[2m[36m(pid=70010)[0m clip_id:  0001-0400/clip_25
100%|██████████| 1478/1478 [00:00<00:00, 27644.56it/s]
[2m[36m(pid=69943)[0m json_to_info done!!
  0%|          | 0/1502 [00:00<?, ?it/s]
[2m[36m(pid=69943)[0m ./result/0001-0400/clip_270.jsonpickle
[2m[36m(pid=69943)[0m clip_id:  0001-0400/clip_271
100%|██████████| 1502/1502 [00:00<00:00, 27837.97it/s]
[2m[36m(pid=69975)[0m video_to_frames done!!
[2m[36m(pid=69975)[0m json_to_info done!!
  0%|          | 0/1380 [00:00<?, ?it/s]
[2m[36m(pid=69975)[0m ./result/0001-0400/clip_223.jsonpickle
[2m[36m(pid=69975)[0m clip_id:  0001-0400/clip_224
100%|██████████| 1380/1380 [00:00<00:00, 42245.49it/s]
[2m[36m(pid=69970)[0m video_to_frames done!!
[2m[36m(pid=69970)[0m json_to_info done!!
  0%|          | 0/1084 [00:00<?, ?it/s]
[2m[36m(pid=69970)[0m ./result/0001-040

[2m[36m(pid=70005)[0m video_to_frames done!!
[2m[36m(pid=70005)[0m json_to_info done!!
  0%|          | 0/2081 [00:00<?, ?it/s]
[2m[36m(pid=70005)[0m ./result/0001-0400/clip_140.jsonpickle
100%|██████████| 2081/2081 [00:00<00:00, 26611.06it/s]
[2m[36m(pid=70005)[0m clip_id:  0001-0400/clip_141
[2m[36m(pid=70000)[0m video_to_frames done!!
[2m[36m(pid=70000)[0m json_to_info done!!
  0%|          | 0/1712 [00:00<?, ?it/s]
[2m[36m(pid=70000)[0m ./result/0001-0400/clip_274.jsonpickle
100%|██████████| 1712/1712 [00:00<00:00, 30092.53it/s]
[2m[36m(pid=70000)[0m clip_id:  0001-0400/clip_275
[2m[36m(pid=69937)[0m video_to_frames done!!
[2m[36m(pid=69937)[0m json_to_info done!!
  0%|          | 0/1739 [00:00<?, ?it/s]
[2m[36m(pid=69937)[0m ./result/0001-0400/clip_184.jsonpickle
[2m[36m(pid=69937)[0m clip_id:  0001-0400/clip_185
100%|██████████| 1739/1739 [00:00<00:00, 48244.51it/s]
[2m[36m(pid=69994)[0m video_to_frames done!!
[2m[36m(pid=69994)[0m json_t

[2m[36m(pid=69998)[0m json_to_info done!!
  0%|          | 0/1192 [00:00<?, ?it/s]
[2m[36m(pid=69998)[0m ./result/0001-0400/clip_257.jsonpickle
100%|██████████| 1192/1192 [00:00<00:00, 24980.44it/s]
[2m[36m(pid=69998)[0m ['0001-0400/clip_89', '0001-0400/clip_9', '0001-0400/clip_90', '0001-0400/clip_91']
[2m[36m(pid=69998)[0m clip_id:  0001-0400/clip_89
[2m[36m(pid=69983)[0m video_to_frames done!!
[2m[36m(pid=69983)[0m json_to_info done!!
  0%|          | 0/1990 [00:00<?, ?it/s]
[2m[36m(pid=69983)[0m ./result/0001-0400/clip_328.jsonpickle
100%|██████████| 1990/1990 [00:00<00:00, 32966.40it/s]
[2m[36m(pid=69983)[0m clip_id:  0001-0400/clip_329
[2m[36m(pid=70008)[0m video_to_frames done!!
[2m[36m(pid=70008)[0m json_to_info done!!
  0%|          | 0/1625 [00:00<?, ?it/s]
[2m[36m(pid=70008)[0m ./result/0001-0400/clip_302.jsonpickle
[2m[36m(pid=70008)[0m clip_id:  0001-0400/clip_303
100%|██████████| 1625/1625 [00:00<00:00, 22702.95it/s]
[2m[36m(pid=69957

[2m[36m(pid=69945)[0m video_to_frames done!!
[2m[36m(pid=69945)[0m json_to_info done!!
[2m[36m(pid=69945)[0m ./result/0001-0400/clip_253.jsonpickle
100%|██████████| 1141/1141 [00:00<00:00, 40040.33it/s]
[2m[36m(pid=70007)[0m video_to_frames done!!
[2m[36m(pid=70007)[0m json_to_info done!!
  0%|          | 0/1390 [00:00<?, ?it/s]
[2m[36m(pid=70007)[0m ./result/0001-0400/clip_178.jsonpickle
100%|██████████| 1390/1390 [00:00<00:00, 44108.48it/s]
[2m[36m(pid=69988)[0m video_to_frames done!!
[2m[36m(pid=69988)[0m json_to_info done!!
[2m[36m(pid=69988)[0m ./result/0001-0400/clip_181.jsonpickle
100%|██████████| 1161/1161 [00:00<00:00, 45801.66it/s]
[2m[36m(pid=69961)[0m video_to_frames done!!
[2m[36m(pid=69961)[0m json_to_info done!!
  0%|          | 0/1219 [00:00<?, ?it/s]
[2m[36m(pid=69961)[0m ./result/0001-0400/clip_279.jsonpickle
100%|██████████| 1219/1219 [00:00<00:00, 50191.98it/s]
[2m[36m(pid=69947)[0m video_to_frames done!!
[2m[36m(pid=69947)[

[2m[36m(pid=70009)[0m video_to_frames done!!
[2m[36m(pid=70009)[0m json_to_info done!!
[2m[36m(pid=70009)[0m ./result/0001-0400/clip_74.jsonpickle
100%|██████████| 1437/1437 [00:00<00:00, 53171.14it/s]
[2m[36m(pid=70009)[0m clip_id:  0001-0400/clip_75
[2m[36m(pid=69930)[0m video_to_frames done!!
[2m[36m(pid=69930)[0m json_to_info done!!
  0%|          | 0/715 [00:00<?, ?it/s]
[2m[36m(pid=69930)[0m ./result/0001-0400/clip_4.jsonpickle
[2m[36m(pid=69930)[0m clip_id:  0001-0400/clip_40
100%|██████████| 715/715 [00:00<00:00, 55435.09it/s]
[2m[36m(pid=69978)[0m video_to_frames done!!
[2m[36m(pid=69955)[0m video_to_frames done!!
[2m[36m(pid=69955)[0m json_to_info done!!
[2m[36m(pid=69955)[0m ./result/0001-0400/clip_85.jsonpickle
[2m[36m(pid=69955)[0m clip_id:  0001-0400/clip_86
100%|██████████| 1149/1149 [00:00<00:00, 56310.24it/s]
[2m[36m(pid=69978)[0m json_to_info done!!
[2m[36m(pid=69978)[0m ./result/0001-0400/clip_268.jsonpickle
100%|█████████

[2m[36m(pid=70012)[0m video_to_frames done!!
[2m[36m(pid=70012)[0m json_to_info done!!
[2m[36m(pid=70012)[0m ./result/0001-0400/clip_64.jsonpickle
100%|██████████| 1460/1460 [00:00<00:00, 80508.05it/s]
[2m[36m(pid=70012)[0m clip_id:  0001-0400/clip_65
[2m[36m(pid=69996)[0m video_to_frames done!!
[2m[36m(pid=69996)[0m json_to_info done!!
[2m[36m(pid=69996)[0m ./result/0001-0400/clip_42.jsonpickle
[2m[36m(pid=69996)[0m clip_id:  0001-0400/clip_43
0it [00:00, ?it/s]6)[0m 
[2m[36m(pid=69990)[0m video_to_frames done!!
[2m[36m(pid=69990)[0m json_to_info done!!
[2m[36m(pid=69990)[0m ./result/0001-0400/clip_68.jsonpickle
100%|██████████| 1499/1499 [00:00<00:00, 63907.28it/s]
[2m[36m(pid=69990)[0m clip_id:  0001-0400/clip_69
[2m[36m(pid=69940)[0m video_to_frames done!!
[2m[36m(pid=69973)[0m video_to_frames done!!
[2m[36m(pid=69940)[0m json_to_info done!!
[2m[36m(pid=69940)[0m ./result/0001-0400/clip_79.jsonpickle
[2m[36m(pid=69940)[0m clip_id:

[2m[36m(pid=69984)[0m video_to_frames done!!
[2m[36m(pid=69984)[0m json_to_info done!!
  0%|          | 0/2057 [00:00<?, ?it/s]
[2m[36m(pid=69984)[0m ./result/0001-0400/clip_134.jsonpickle
100%|██████████| 2057/2057 [00:00<00:00, 56859.08it/s]
[2m[36m(pid=70012)[0m video_to_frames done!!
[2m[36m(pid=70012)[0m json_to_info done!!
  0%|          | 0/1501 [00:00<?, ?it/s]
[2m[36m(pid=70012)[0m ./result/0001-0400/clip_65.jsonpickle
[2m[36m(pid=70012)[0m clip_id:  0001-0400/clip_66
100%|██████████| 1501/1501 [00:00<00:00, 66025.36it/s]
[2m[36m(pid=69998)[0m video_to_frames done!!
[2m[36m(pid=69998)[0m json_to_info done!!
  0%|          | 0/874 [00:00<?, ?it/s]
[2m[36m(pid=69998)[0m ./result/0001-0400/clip_90.jsonpickle
100%|██████████| 874/874 [00:00<00:00, 65800.68it/s]
[2m[36m(pid=69998)[0m clip_id:  0001-0400/clip_91
[2m[36m(pid=69936)[0m video_to_frames done!!
[2m[36m(pid=69936)[0m json_to_info done!!
[2m[36m(pid=69936)[0m ./result/0001-0400/cli

## 2. 파싱한 어노테이션을 읽어와서 224x224가 넘는 얼굴만 가져옴

### 2.1 생성한 어노테이션 로딩

In [48]:
parsed_annotations = glob(os.path.join(OUTPUT_FOLDER,CLIP_FOLDER+'/*.jsonpickle'))

len(parsed_annotations), parsed_annotations[:2]

(400,
 ['./result/0001-0400/clip_142.jsonpickle',
  './result/0001-0400/clip_1.jsonpickle'])

In [6]:
@ray.remote
def load_filter(parsed_annotation_path:str):
        
    def _filter_image_size(a_frame_info:dict, clip_key:str, frame_key:str):
        img = plt.imread(a_frame_info['frame_image_path'])
        
        faces = detector(img)
        
        if len(faces) == 0:
            # 얼굴 못찾으면 삭제
            
            # print("faces : ", faces)
            #del #p_a_copy_224[clip_key][frame_key]
            del p_a_copy_48[clip_key][frame_key]
        elif len(faces) > 0:        
            minY = faces[0].top()
            maxY = faces[0].bottom()
            minX = faces[0].left()
            maxX = faces[0].right()
            face = img[minY:maxY,minX:maxX]   
            
            # print("maxY - minY ", maxY - minY, "maxX - minX ", maxX - minX)
            # tmp_test_size_list.append((maxY - minY, maxX - minX))
        
            if maxY - minY < 48 or maxX - minX < 48:                            
                del p_a_copy_48[clip_key][frame_key]
            else:
                p_a_copy_48[clip_key][frame_key]['ytl'] = minY
                p_a_copy_48[clip_key][frame_key]['ybr'] = maxY
                p_a_copy_48[clip_key][frame_key]['xtl'] = minX
                p_a_copy_48[clip_key][frame_key]['xbr'] = maxX
                
            #             if maxY - minY < 224 or maxX - minX < 224:
            #                 # 224 보다 사이즈가 작으면 삭제
            #                 del #p_a_copy_224[clip_key][frame_key]
            #             else:
            #                 # 224 보다 사이즈가 크면 얼굴 rect로 변경 (기존은 오브젝트 전체 rect)
            #                 #p_a_copy_224[clip_key][frame_key]['ytl'] = minY
            #                 #p_a_copy_224[clip_key][frame_key]['ybr'] = maxY
            #                 #p_a_copy_224[clip_key][frame_key]['xtl'] = minX
            #                 #p_a_copy_224[clip_key][frame_key]['xbr'] = maxX
                

    # tmp_test_size_list = []
    
    detector = dlib.get_frontal_face_detector()
    
    with open(parsed_annotation_path, 'rb') as f:
        p_a = pickle.load(f)
        
    #p_a_copy_224 = copy.deepcopy(p_a)
    p_a_copy_48 = copy.deepcopy(p_a)

    [ _filter_image_size(p_a[clip_key][frame_key], clip_key, frame_key ) for clip_key in p_a for frame_key in p_a[clip_key] ]
    
    return p_a_copy_48 #p_a_copy_224 #, 

def to_iterator(obj_ids):
    while obj_ids:
        done, obj_ids = ray.wait(obj_ids)
        yield ray.get(done[0])


### 2.2 특정 사이즈의 얼굴 이미지가 포함된 데이터만 어노테이션 재생성

In [None]:
# 특정 사이즈의 얼굴 이미지가 포함된 데이터만 어노테이션 재생성
obj_ids = [load_filter.remote(p_a) for p_a in parsed_annotations]
for x in tqdm(to_iterator(obj_ids), total=len(obj_ids)):
    pass

# ray_result = ray.get([load_filter.remote(p_a) for p_a in parsed_annotations])

ray_result = ray.get(obj_ids)

total_result = {CLIP_FOLDER:ray_result}

with open(CLIP_FOLDER+'_annotation.json', 'w') as f:
    json.dump(total_result, f)

In [8]:
with open(CLIP_FOLDER+'_annotation.json', 'r') as f:
    j_load_tmp = json.load(f)

frame_list = []
[ frame_list.append(clips[clip_key][frame_key]) \
 for zip_file_key in j_load_tmp \
 for clips in j_load_tmp[zip_file_key] \
 for clip_key in clips \
 for frame_key in clips[clip_key] ]
""

''

In [9]:
len(frame_list), frame_list[300000]

(470438,
 {'person_id': '20',
  'age': '40s',
  'gender': 'female',
  'emotion': {'image': {'emotion': 'surprise', 'valence': 4, 'arousal': 2}},
  'xtl': 880,
  'xbr': 966,
  'ytl': 285,
  'ybr': 371,
  'frame_image_path': './result/0001-0400/clip_372/688.jpg'})

In [30]:
@ray.remote
def parsing_json_to_csv_with_img(i_list:list, idx:int):
    #     print(i_list)
    result_list = []
    for i in i_list:    
        img = Image.open(i['frame_image_path']).convert('L')
        img = img.crop((i['xtl'],i['ytl'], i['xbr'], i['ybr']))

        w = img.size[0]
        h = img.size[1]

        if h > 48 or w > 48:
            resized_face_gray_img = np.array(img.resize((48, 48)), 'uint8')
            #             resized_face_gray_img = resized_face_gray_img.reshape(48*48)
            result_list.append((i['emotion']['image']['emotion'], resized_face_gray_img))

    with open(os.path.join(OUTPUT_FOLDER,CLIP_FOLDER,CLIP_FOLDER+'_'+str(idx)+'.csv'), 'w') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(['emotion','face_img'])
        #     csv_writer.writerows(result_total_anno_list)
        #     [csv_writer.writerow(row) for row in result_total_anno_list]    
        [csv_writer.writerow((anno[0], " ".join(list(anno[1].reshape(48*48).astype(np.str))))) for anno in result_list]                

def parsing_json_to_csv_with_img_single(i_list:list):
    result_list = []
    for i in i_list:    
        img = Image.open(i['frame_image_path']).convert('L')
        img = img.crop((i['xtl'],i['ytl'], i['xbr'], i['ybr']))

        w = img.size[0]
        h = img.size[1]
        
        if h > 48 or w > 48:        
            resized_face_gray_img = np.array(img.resize((48, 48)), 'uint8')
            result_list.append((i['emotion']['image']['emotion'], resized_face_gray_img))
            
    with open(os.path.join(OUTPUT_FOLDER,CLIP_FOLDER,CLIP_FOLDER+'_'+str(idx)+'.csv'), 'w') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(['emotion','face_img'])
        #     csv_writer.writerows(result_total_anno_list)
        #     [csv_writer.writerow(row) for row in result_total_anno_list]    
        [csv_writer.writerow((anno[0], " ".join(list(anno[1].reshape(48*48).astype(np.str))))) for anno in result_list]                

In [42]:
num_partition = 20
seq_rest_partition = 21
num_frame_list = len(frame_list)
num_sub_frame_list = num_frame_list // num_partition
num_rest_sub_frame_list = num_frame_list - (num_sub_frame_list * num_partition)

# 파티션 단위로 처리
result_obj_ids = [parsing_json_to_csv_with_img.remote(frame_list[(0+i)*num_sub_frame_list:(1+i)*num_sub_frame_list], i) for i in range(num_partition) ]
for x in tqdm(to_iterator(result_obj_ids), total=len(result_obj_ids)):
    pass

# 나머지 소수 남은 데이터 처리
parsing_json_to_csv_with_img_single(frame_list[num_sub_frame_list * 100:(num_sub_frame_list * 100)+num_rest_sub_frame_list], seq_rest_partition)

In [43]:
len(frame_list)

470438

In [44]:
for i in range(20):
    print((0+i)*num_sub_frame_list,(1+i)*num_sub_frame_list)

0 23521
23521 47042
47042 70563
70563 94084
94084 117605
117605 141126
141126 164647
164647 188168
188168 211689
211689 235210
235210 258731
258731 282252
282252 305773
305773 329294
329294 352815
352815 376336
376336 399857
399857 423378
423378 446899
446899 470420


In [98]:


# detector = dlib.get_frontal_face_detector()

# for clip_idx in face_info_with_imgs:

#     for frame_idx in face_info_with_imgs[clip_idx]:
        
#         # load img
#         img = plt.imread(face_info_with_imgs[clip_idx][frame_idx]['frame_image_path'])
#         print('----------- img path: ', face_info_with_imgs[clip_idx][frame_idx]['frame_image_path'])
#         print(face_info_with_imgs[clip_idx][frame_idx]['emotion'])
        
#         # slice object
#         xtl = int(face_info_with_imgs[clip_idx][frame_idx]['xtl'])
#         xbr = int(face_info_with_imgs[clip_idx][frame_idx]['xbr'])
#         ytl = int(face_info_with_imgs[clip_idx][frame_idx]['ytl'])
#         ybr = int(face_info_with_imgs[clip_idx][frame_idx]['ybr'])

#         obj = img[ytl:ybr+1, xtl:xbr+1]
#         faces = detector(obj)
        
#         minY = faces[0].top()
#         maxY = faces[0].bottom()
#         minX = faces[0].left()
#         maxX = faces[0].right()
#         face = obj[minY:maxY,minX:maxX]
#         plt.imshow(face)
        
#         break
#     break