# Cook Video Analysis
   1. Analysis engine
   2. Vision analysis
   3. Text analysis
   4. Result

In [1]:
# SetUp
import os
from pathlib import Path
import json
import pandas as pd
import torch
import cv2
import copy
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
from mmaction.apis import init_recognizer, inference_recognizer
from model.run_model import communicator

In [None]:
# action_recog-requirements

# MMCV==1.3.6
# opencv-python==4.5.5.64
# tqdm

# folder

# mmaction
# tools

# 1. Analysis Engine 
## (from sogang-mmlab: JinhaSong/analysis-engine)
   - engine list
### hidf-engine-asr_main : Automatic Speech Recognition
### hidf-engine-food_main : Food Detection(Yolov4 + Efficientnet)
### hidf-engine-places_main : Places Recognition(Resnet)
### hidf-engine-object_main : Object Detection(Efficientdet)
### hidf-engine-scenetext_main : Scene text recognition
   - engine address
### hidf-engine-asr_main : http://mlamethyst.sogang.ac.kr:9002
### hidf-engine-food_main : http://mlamethyst.sogang.ac.kr:10000
### hidf-engine-places_main : http://mlamethyst.sogang.ac.kr:10001
### hidf-engine-object_main : http://mlamethyst.sogang.ac.kr:11000
### hidf-engine-scenetext_main : http://mlamethyst.sogang.ac.kr:12000

In [27]:
def analysis_engine_dir(module, video_dir, video_format, fps):
    
    """
    1. video_dir: string,  비디오 영상의 저장되어 있는 파일 경로, e.g) 'C:/Users/jcjo/Desktop/code/NotGit/Cook_Video_Analysis/Dataset/'
    2. module: dictionary, 모듈 이름과 엔진 주소 번호, e.g) {'asr':9002, 'food':10000, 'places':10001, 'obj': 11000, 'scenetxt': 12000}
    3. video_format: list, 비디오 확장자 리스트, e.g) ['mp4', 'avi']
    4. fps: int, frame per second, e.g) 30
    """
    
    # engine-server communicator instance
    com = communicator()

    dir_list = os.listdir(video_dir)
    dir_list = {d for d in dir_list for fom in video_format if fom in d}
    
    result_dir = []
    
    result_video={}
    
    for d in dir_list: #categories:
    
        print(f"_____Video-name is {d}______")
    
        result_module = {}
    
        for name, addr in module.items():

            print(f"{name}-engine is running")

            video_path = video_dir + d    

            if name == 'asr': #asr(audio-speech-recognition) model cannot use temporarily
                pass
            else:
                result = com.communicator_video(f"http://mllime.sogang.ac.kr:{addr}/video/", video_path, "", fps, "", "", "video") #30 : 30fps
                result_module[name] = result
        
    result_video[d[:-4]] = result_module #[:4] : without format of video
    
    result_dir.append(result_video)
    
    return result_dir

In [None]:
# test

video_dir = '/home/Cook_Video_Analysis/Dataset/test/'
module = {'scenetxt': 12000, 'food':10000} #{'asr':9002, 'food':10000, 'places':10001, 'obj': 11000, 'scenetxt': 12000}
video_format = ['mp4']
fps = 30

result = analysis_engine_dir(module, video_dir, video_format, fps)
save_load_json(sav=True, data=result, file_path='./Dataset/engine_result/result9_30fps.json')

_____Video-name is carae_woodong.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is saewoogang.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is dorirocos.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is miyeok_gook.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is myeongrang_gui.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is gimchi_zigae.mp4______
scenetxt-engine is running
food-engine is running
_____Video-name is gimchi_geon.mp4______
scenetxt-engine is running


In [217]:
def analysis_engine(module, video_path, fps):
    
    """
    1. video_path: string,  비디오 영상의 저장되어 있는 파일 주소, e.g) 'C:/Users/jcjo/Desktop/code/NotGit/Cook_Video_Analysis/Dataset/meat.mp4'
    2. module: dictionary, 모듈 이름과 엔진 주소 번호, e.g) {'asr':9002, 'food':10000, 'places':10001, 'obj': 11000, 'scenetxt': 12000}
    3. video_format: list, 비디오 확장자 리스트, e.g) ['mp4', 'avi']
    4. fps: int, frame per second, e.g) 30
    """
    
    # engine-server communicator instance
    com = communicator()
    
    # video_name
    video_name = Path(video_path).stem
    
    result={}
    
    result_category = {}
    
    for name, addr in module.items():

        print(f"{name}-engine is running") 

        if name == 'asr': #asr(audio-speech-recognition) model cannot use temporarily
            pass
        else:
            result_video = com.communicator_video(f"http://mllime.sogang.ac.kr:{addr}/video/", video_path, "", fps, "", "", "video") #30 : 30fps
            result_category[name] = result_video
        
    result[video_name] = result_category
    
    return result

In [None]:
# test

# module = {'food':10000, 'scenetxt': 12000} #{'asr':9002, 'food':10000, 'places':10001, 'obj': 11000, 'scenetxt': 12000}
# video_path = '/home/Cook_Video_Analysis/Dataset/meat.mp4'
# fps=30

# result = analysis_engine(module, video_path, fps)

In [None]:
# example

# print(result)

# {'bundaegi': {'hy_scenetxt': {'model_name': 'scene_text_recognition',
#    'analysis_time': 6322.094475984573,
#    'frame_results': [{'frame_result': [{'label': [{'description': '박좆웑',
#          'score': 0.9837325215339661}],
#        'position': {'x': 96, 'y': 60, 'w': 309, 'h': 99}},
#       {'label': [{'description': 'drunken', 'score': 0.9999628067016602}],
#        'position': {'x': 144, 'y': 36, 'w': 96, 'h': 18}},
#       {'label': [{'description': 'talk', 'score': 0.9999697208404541}],
#        'position': {'x': 243, 'y': 36, 'w': 51, 'h': 18}},
#       {'label': [{'description': 'show', 'score': 0.9999678134918213}],
#        'position': {'x': 294, 'y': 36, 'w': 66, 'h': 18}}],
#      'frame_url': '/media/20220804/bundaegi/1.jpg',
#      'frame_number': 30,
#      'timestamp': '0:00:01'},
#     {'frame_result': [{'label': [{'description': '박좆웑',
#          'score': 0.9856455326080322}],
#        'position': {'x': 96, 'y': 60, 'w': 309, 'h': 99}},
#       {'label': [{'description': 'drunken', 'score': 0.9999593496322632}],
#        'position': {'x': 144, 'y': 36, 'w': 96, 'h': 18}},
#       {'label': [{'description': 'talk', 'score': 0.999971866607666}],
#        'position': {'x': 243, 'y': 36, 'w': 51, 'h': 18}},
#       {'label': [{'description': 'show', 'score': 0.9999687671661377}],
#        'position': {'x': 294, 'y': 36, 'w': 66, 'h': 18}}],
#      'frame_url': '/media/20220804/bundaegi/2.jpg',
#      'frame_number': 60,
#      'timestamp': '0:00:02'},

   - Save& Load result
      - json

In [19]:
def save_load_json(file_path, sav = True, data=None):
    
    """
    1. sav: bool, default: True, 저장할 때 True, 불러올 때 False, e.g) sav=True(save)/ sav=False(load)
    2. file_path: str, 저장시 저장 파일 경로, 불러올 시 불러오는 파일 경로, e.g) '/Cook_Video_Analysis/Dataset/engine_result/meat.json'
    3. data: list, default=None, 저장할(if sav=True) 데이터, e.g) result (= analysis_engine(module. video_dir, video_format))
    """
    
    video_name = Path(file_path).stem
    
    if sav:
        with open(file_path, 'w', encoding='utf-8') as outfile:
            json.dump(data, outfile, indent=4, ensure_ascii=False) # indent: 들여쓰기(가독성)/ ensure_ascii=False : 아스키 코드 -> 유니코드
            
        print(f"{video_name} save done.")
    else: #load
        with open(file_path, "r") as json_file:
            json_data = json.load(json_file)
        print(f"{video_name} load done.")
        return json_data

In [None]:
#load result

# result_30 = save_load_json(sav=False, file_path='./Dataset/engine_result/result_30fps.json')

# 2. Vision Analysis
- Classification
### - food classification
   - YOLOv4: MS COCO Dataset (related with food) e.g) 'bowl', ‘wine glass’, ‘cup’, ‘fork’, knife’, ‘spoon’…
   - EfficinetNet: AI hub, KIST (주)휴먼ICT, 2017, 한국 이미지(음식), 
   source link: https://aihub.or.kr/aihubdata/data/view.do?currMenu=115&topMenu=100&aihubDataSe=realm&dataSetSn=79
<br/><br/>
- Action_recognition
### - MMAction2
   - Kinetics400
   - label related with food e.g) ‘cooking egg’, ‘cooking chicken’, 'breading or breadcrumbing' …

In [181]:
# preprocess result by score
def preprocess(data, score, video_name, module_name):
    
    """
    1. data: json, analysis-engine 결과 data e.g) e.g) result (= analysis_engine(module. video_dir, video_format))
    2. score: int, result score 값으로 몇 점 이상으로 filter 할 것인지 e.g) 90(food: 0-100), 0.9(obj, scenetxt, places:0-1)
    3. video_name: str, 파일 확장자를 제외한 비디오 이름 e.g) egg.mp4 -> egg
    4. module_name: str, module.keys() 참고 e.g) module.keys():{'food', 'obj', 'scenetxt', 'places', 'asr'} -> module_name='food'
    """

    label_list = []

    for fr_results in data[video_name][module_name]['frame_results']:
        if (fr_results['frame_result'] != None):

            for i in range(0, len(fr_results['frame_result'])): #객체 수만큼 draw

                if fr_results['frame_result'][i]['label'][0]['score'] > score:

                    label = fr_results['frame_result'][i]['label'][0]['description']
                    label_list.append(label)
    return label_list

In [168]:
# Test
### analysis food_classfication - Top1 Tag ###

# egg_food = preprocess(data=result_30, score=90, video_name='egg', module_name='food')
# pd.Series(egg_food).value_counts().index[0]

'조림/메추리알장조림'

In [None]:
def splitter(video_path, segment_sec, sav_dir, fourcc):
    
    """
    1. video_path : str, 구간 분할할 영상 경로 e.g) '/home/Cook_Video_Analysis/Dataset/meat.mp4'
    2. segment_sec: int, 몇 초씩 구간 분할을 할 것인지 e.g) 10
    3. sav_dir: str, 저장할 경로 e.g) '/home/Cook_Video_Analysis/Dataset/segment/'
    4. fourcc: str, four character code : 코덱, 압축 방식, 색상, 픽셀 포맷 등을 정의하는 정수 값 e.g) 'mp4v'
    """
    
    vc = cv2.VideoCapture(video_path)
    
    # check whether video can be opened or not
    if not vc.isOpened():

        print("fail to open video")

    else:
        
        # read first frame of video
        ret, frame = vc.read()
        h, w, _ = frame.shape

        fps = round(vc.get(cv2.CAP_PROP_FPS))
        # step: frame number of 1 segment
        step = segment_sec * fps
        frame_num = round(vc.get(cv2.CAP_PROP_FRAME_COUNT))
        out_frame_num = frame_num // step + 1 # +1: 나머지 frame을 1개로 추가
        
        fourcc = cv2.VideoWriter_fourcc(*fourcc)
        
        # save directory
        os.makedirs(f"{sav_dir}{Path(video_path).stem}", exist_ok=True)
        
        # how to write video
        video_name = Path(video_path).stem
        writers = [cv2.VideoWriter(f"{sav_dir}{Path(video_path).stem}/{video_name}_{i}.mp4", fourcc, fps, (w,h)) for i in range(1, out_frame_num+1)]

        f = 0
        
        # frame을 받아올 수 있을 때까지
        while ret:
            f += 1
    
            for i, start in enumerate(range(1, frame_num, step)):
                end = start+step
                if start <= f < end:
                    writers[i].write(frame)
            # next frame
            ret, frame = vc.read()
        # 자원 해제
        for writer in writers:
            writer.release()

        vc.release()
        print('split done.')

In [None]:
def action_recognition(video_path, sav_dir, config_file, checkpoint_file, device, topk, split=False, segment_sec=None, fourcc=None):
    
    """
    1. video_path: str, action_recognition 돌릴 영상 주소 e.g) '/home/Cook_Video_Analysis/Dataset/meat.mp4'
    2. sav_dir: str, 구간 분할한 영상 저장할 경로 e.g) '/home/Cook_Video_Analysis/Dataset/segment/'
    3. config_file: str, action_recognition config(module) e.g) 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
    4. checkpoints_file: str, weight check point 주소 e.g) 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
    5. device: str, 사용할 device e.g) 'cuda:0' or 'cpu'
    6. topk: int, 몇 개의 빈도수 상위 태그를 뽑을 것인지 e.g) 3
    7. split: bool, default=False, 영상 분할을 할 것인지, False시 통 영상 채로 분석 결과 도출 e.g) False
    8. segment_sec: int, default=None, 몇 초씩 구간 분할을 할 것인지 e.g) 10
    9. fourcc: str, default=None, four character code : 코덱, 압축 방식, 색상, 픽셀 포맷 등을 정의하는 정수 값 e.g) 'mp4v'
    """
    
    # sort segment in ascending order
    video_name = Path(video_path).stem
    
    # assign the desired device.
    device = torch.device(device)
    
    # build the model from a config file and a checkpoint file
    model = init_recognizer(config_file, checkpoint_file, device=device)
    
    # label open
    labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
    labels = [x.strip() for x in labels]
    
    # split video into segments, and output
    if split:
        splitter(video_path, segment_sec, sav_dir, fourcc)

        seg_list = os.listdir(f'{sav_dir}{video_name}/')
        seg_sort = sorted(seg_list, key = lambda x: int(Path(x).stem[len(video_name)+1:]))
    
        results_dict = {}

        # action recognition execute with seg videoes
        for name in tqdm(seg_sort):
            video = f'{sav_dir}{video_name}/{name}'
            print(name)
            results = inference_recognizer(model, video)

            # results
            results = [(labels[k[0]], k[1]) for k in results]
            results_dict[name] = results
        
        # analysis result by value count
        results_count = {}

        for key in results_dict.keys():

            for label, score in results_dict[key]:

                if label in results_count:
                    results_count[label] += score
                else:
                    results_count[label] = score
                    
        # topk label tag list
        topk_tags = [label[0] for label in sorted(results_count.items(), key = lambda x: -x[1])[:topk]]
        
    else: # not split video output
        results = inference_recognizer(model, video=video_path)
        topk_tags = [labels[k[0]] for k in results[:topk]]
        
    return topk_tags

In [None]:
# test

# video_path = '/home/Cook_Video_Analysis/Dataset/meat.mp4'
# sav_dir = '/home/Cook_Video_Analysis/Dataset/segment/'
# config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'
# checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
# device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
# topk=3
# segment_sec = 10
# fourcc = 'mp4v'

# meat_tags = action_recognition(video_path, sav_dir, config_file, checkpoint_file, device, topk)
# meat_tags

# 3. Text Analysis
- Segmentation of Video
### - font_height_to_segmentation
   - 편집자가 편집할 때, 특정 장면에 특정 자막을 사용한다.
   - 특히, 자막의 높이가 고유한 값으로, 이를 통해 구간 분할 및 태그 목록을 생성한다.
   - 거꾸로, 편집을 할 때, 분할하고자 하는 지점에는 특정한 크기의 자막을 달도록 할 수 있다.
<br/><br/>
- Ingredient Tags
### - text_simple_match
   - filtered by whether they are completely matched or not, e.g) '메추리알' == '메추리알 : True, '메치루알' != '메추리알': False
   - matching Ingredient DB (source: 소스산업화센터, 식재료 DB, https://sauce.foodpolis.kr/home/index.do)

In [173]:
def frame_to_time(frame, fps):
    """
    1. frame: int, frame number, e.g) 2958
    2. fps: int, frame per second, e.g) 30
    """
    sec = frame/fps
    minute = int(sec // 60)
    min_sec = sec % 60
    return (minute, int(min_sec))

In [174]:
def time_to_frame(time, fps):
    """
    1. time: tuple, (miniute, second), e.g) 1분 38초 = (1,38)
    2. fps: int, frame per second, e.g) 30
    """
    sec = time[0]*60 + time[1]
    frame = sec * fps
    return frame

In [175]:
def font_height_to_segmentation(data, video_name, score, fh_low, fh_high, freq, fps, find_range):
    
    """
    data: type = .json, help = scene_text_recognition result, e.g) result_30
    video_name: type = str, help = video name without the file extension like .mp4, e.g) 'bundaegi', 'egg', 'nuddle'
    score : type=float, help= scentxt_score, e.g) 0.7
    fh_low : type=int, help= font_height_low, e.g) 125
    fh_high : type=int, help= font_height_high, e.g) 140
    freq : type=int, help= frequency of text_bboxes per frame, e.g) 10
                        (The larger the number, the higher the probability that the subtitle appeared.)
    fps : type=int, help=frame per second, e.g) 30
    find_range: type=int, help=sec, take text_result for time of find_range from segment start time, e.g) 3
    """
    
    frame_results_list = [] #frame 별 result만 따로 정리하자

    for idx in range(len(data[video_name]['scenetxt']['frame_results'])): #영상 전체 프레임 수 만큼 반복

        frame_results_list.append(data[video_name]['scenetxt']['frame_results'][idx]['frame_result'])
    
    target_frame_label = {} #filtered {frame : description(label)}

    for frame, result in enumerate(frame_results_list):
        for i in result:
            
            #scene_text_score& font height filter
            if (i['label'][0]['score'] > score) and (fh_low<=i['position']['h']<fh_high):
                target_frame_label[frame] = i['label'][0]['description']
    
    # frame to time& sort by asceding order
    target_time = sorted([frame_to_time(frame=j, fps=30) for j in list(set(target_frame_label.keys()))], key= lambda x: (x[0], x[1]) )
    
    # frequency the font appeared filter: 1 프레임에서도 같은 높이의 폰트가 여러 번 등장할 수 있다. 즉 등장 빈도수를 freq에 대해 filtering
    cnt = Counter(target_time)
    most_freq_list = list(filter(lambda x: x[1] > freq, cnt.most_common()))
    
    most_list = sorted([i[0] for i in most_freq_list], key = lambda x: (x[0], x[1]))
    
    # Frames appearing in succession are represented by the front frame: 연달아 나오는 프레임은 맨 앞 시간으로 태그를 걸어주면 된다.
    seg_time = copy.deepcopy(most_list)

    for i in range(len(most_list)-1):

        if most_list[i+1][1] == most_list[i][1] + 1:
            seg_time.remove(most_list[i+1])
        else:
            pass
    
    # time to frame
    seg_frame = list(map(lambda x: time_to_frame(x,fps), seg_time))
    
    # take tags from seg_time for find_range time
    seg_tag = {}
    for start in seg_frame:
        tag_list = []
        end = start + (find_range * fps)

        for frame in target_frame_label.keys():
            if start <= frame < end:
                tag_list.append(target_frame_label[frame])
        seg_tag[start] = set(tag_list)
    
    return seg_time, seg_tag

In [213]:
# data = result_30
# video_name = 'egg' #['bundaegi', 'egg', 'nuddle']
# score = 0.7
# fh_low = 125
# fh_high = 140
# freq = 10
# fps = 30
# find_range = 3

# seg_time, seg_tag = font_height_to_segmentation(data, video_name, score, fh_low, fh_high, freq, fps, find_range)
# seg_time, seg_tag

([(1, 38), (1, 50), (1, 54), (2, 8), (2, 23), (2, 38), (2, 57)],
 {2940: {'1팩', '메추리알'},
  3300: {'1컵', '시2컵', '진간장그컵', '황설탕'},
  3420: {'물4컵'},
  3840: {'10개', 'tht', 'tr', 'ttr'},
  4290: {'16개', '파리고추'},
  4740: {'3개', '청양고추3개'},
  5310: {'1조각'}})

In [184]:
# preprocessed scenetext_result

# egg_text = preprocess(data=result_30, score=0.9, video_name='egg', module_name='scenetxt')
# len(egg_text)

31001

In [186]:
df = pd.read_csv('./Dataset/dataset/refined/ingredients.csv', encoding = 'cp949' )
df

Unnamed: 0,ingr_ko,ingr_en
0,가다랑어,"Katsuo(일명), BONITO"
1,가다랑어포,katsuobushi
2,가리비,scallop
3,가리비살,Hotategai(일명)
4,가시복,Harisenbon(일명)
...,...,...
4928,strawberry guave열매,"Erdbeerguave, Cattley Guava, Purple Guava, pin..."
4929,sugar pea,snow pea
4930,tea oil plant씨,tea oil camellia
4931,wild endive,wild endive


In [214]:
def text_simple_match(data, db, topk):
    
    """
    1. data: list, 질의할 preprocessed scenetext 분석 결과, e.g) egg_text (= preprocess(data=result_30, score=0.9, video_name='egg', module_name='scenetxt'))
    2. db: pd.Dataframe, 매칭할 데이터 베이스 e.g) 식재료 데이터 베이스, df (= pd.read_csv('./Dataset/dataset/refined/ingredients.csv', encoding = 'cp949' ))
    3. topk: int, 몇 개의 빈도수 상위 태그를 뽑을 것인지 e.g) 5
    """

    result_dict={}
    
    # count the number of fail to match
    no_result = 0

    for query in data:

        # scenetxt_result <-> ingredient DB 
        search_result = list(filter(lambda x: x == query, db['ingr_ko']))

        if search_result: #exist

                if search_result[0] in result_dict:
                    result_dict[search_result[0]] += 1
                else:
                    result_dict[search_result[0]] = 1

        else: #empty
            no_result += 1
    
    #The ratio of result doesn't output
    #print(f'no_result_ratio : {round(no_result/len(data), 2)}')
    
    topk_tags = [label[0] for label in sorted(result_dict.items(), key = lambda x: -x[1])[:topk]]
    
    return topk_tags

In [215]:
#Test

# data = egg_text
# db = df
# topk = 5

# top5_tags = text_simple_match(data, db, topk)
# top5_tags

['메추리알', '마늘', '다시마', '버터', '간장']

# 4. Result
   ## - Among Playlists (from 백종원의 요리비책 Paik's Cuisine")
   1. egg.mp4 : 백종원의 쿠킹로그 (313 videos (22.08.20 기준)) : 🦆 여보 여기 있던 내 알 못 봤어요│메추리알 장조림, https://youtu.be/RmZyxKOUbfs
   2. nuddle.mp4 : 집종원 (37 videos (22.08.20 기준)) : 🍅케첩이 된 멋쟁이 토마토! 나폴리탄으로 업그레이드해줬습니다!🍅, https://youtu.be/1PRD6zJRY4c
   3. bundaegi.mp4 : 바종원 (8 videos (22.08.20 기준)) : 오늘따라 야장 가고 싶네... 청양고추 잔뜩 넣은 번데기탕에 감귤 향 터지는 미상 한 병? 오늘 나 집에 안 가!, https://youtu.be/9zqOvGkGPp4
   <br/><br/>
   
   
   
   ## - In Playlist(of 백종원의 쿠킹로그)
   1. egg.mp4 : 🦆 여보 여기 있던 내 알 못 봤어요│메추리알 장조림, https://youtu.be/RmZyxKOUbfs
   2. mooksabal.mp4 : 단돈 2천원으로 방구석 피서! 다람이 거🐹 도토리🌰 뺏어 만든 쿨내 폴폴❄ 묵사발, https://youtu.be/-Imo0xgUQG0
   3. miyeok_gook.mp4 : 300원으로 한 냄비 가~득 국 끓이기! 가성비 끝판왕 맑은 미역국, https://youtu.be/xsTFsunt6-8
  <br/><br/>
   
   
   
  ## - In Video(egg.mp4 : 🦆 여보 여기 있던 내 알 못 봤어요│메추리알 장조림)
  1. fps=1, font_text_score=0.9
  2. fps=1, font_text_score=0.7
  3. fps=30, font_text_score=0.7

In [None]:
# among playlists

######################'egg'###################### 
#fps=1, font_text_score=0.95 font_height=(125, 140)

# {'food': '조림/메추리알장조림',
#  'action_recog': ['cooking egg', 'cooking chicken', 'scrambling eggs'],
#  'segment': {(1, 39): {'1팩', '메추리알'},
#   (1, 51): {'황설탕'},
#   (1, 54): {'물4컵'},
#   (2, 8): {'10개'},
#   (2, 23): {'16개'},
#   (2, 39): {'3개', '청양고추3개'},
#   (2, 57): {'1조각'}},
#  'match': ['메추리알', '마늘', '버터', '조', '다시마'],
#  'video': 'egg'}

#####################'nuddle'##################### 
#fps=1, font_text_score=0.95 font_height=(120, 130)

# {'food': '면/라면',
#  'action_recog': ['tasting food', 'eating burger', 'eating spaghetti'],
#  'segment': {(0, 29): {'라파'},
#   (0, 35): {'라파는?'},
#   (1, 53): {'잘게채'},
#   (2, 11): {'소시지와', '식용유'},
#   (2, 13): {'볶아주세요', '소시지와'},
#   (2, 54): {'라면1개'},
#   (2, 57): {'분위기'},
#   (3, 24): {'진간장'},
#   (3, 35): {'지금부터는'},
#   (3, 52): {'해먹지?'},
#   (4, 40): {'전', '집종원'},
#   (4, 42): {'집종원'},
#   (4, 55): {'궁금증', '생각난'}},
#  'match': ['토마토', '간장', '소시지', '양파', '조'],
#  'video': 'nuddle'}

###################'bundaegi' ###################
#fps=1, font_text_score=0.95 font_height=(145,154)

# {'food': '음청류/식혜',
#  'action_recog': ['tasting beer', 'dining', 'bartending'],
#  'segment': {(1, 56): {'시작된'},
#   (1, 59): {'백종원의', '야무지게'},
#   (2, 1): {'백종원의'},
#   (2, 30): {'있어요!', '지금'},
#   (2, 36): {'백종원의'},
#   (2, 44): {'전다음에'},
#   (2, 51): {'국물까지'},
#   (3, 10): {'연애할때'},
#   (3, 13): {'양파'},
#   (3, 24): {'반개만!'},
#   (4, 2): {'간장은'},
#   (4, 28): {'나도', '이번'},
#   (4, 55): {'백종원의'},
#   (5, 28): {'비빔국수'},
#   (5, 49): {'백종원의', '종원의'},
#   (6, 17): {'종원'}},
#  'match': ['멸치', '배', '감귤', '아마', '양파'],
#  'video': 'bundaegi'}

In [None]:
# in playlists

########################'egg'######################## 
#fps=1, font_text_score=0.95 font_height=(125, 140)

# {'food': '조림/메추리알장조림',
#  'action_recog': ['cooking egg', 'cooking chicken', 'scrambling eggs'],
#  'segment': {(1, 39): {'1팩', '메추리알'},
#   (1, 51): {'황설탕'},
#   (1, 54): {'물4컵'},
#   (2, 8): {'10개'},
#   (2, 23): {'16개'},
#   (2, 39): {'3개', '청양고추3개'},
#   (2, 57): {'1조각'}},
#  'match': ['메추리알', '마늘', '버터', '조', '다시마'],
#  'video': 'egg'}

##################'mooksabal'################## 
#fps=1, font_text_score=0.95 font_height=(87, 93)

# {'food': '무침/도토리묵',
#  'action_recog': ['cooking chicken', 'scrambling eggs', 'tossing salad'],
#  'segment': {(1, 16): {'1팩'},
#   (1, 42): {'신김치'},
#   (2, 0): {'14개', '오이약'},
#   (2, 6): {'대파약', '약'},
#   (2, 9): {'청양고추'},
#   (2, 23): {'간마늘'},
#   (3, 8): {'참기름'},
#   (3, 38): {'약'},
#   (3, 56): {'수있도록'},
#   (4, 0): {'수있도록'},
#   (4, 22): {'정수물'},
#   (4, 34): {'국간장'},
#   (4, 48): {'황설탕약'},
#   (5, 4): {'식초'},
#   (5, 24): {'부어'},
#   (5, 31): {'약간'},
#   (6, 27): {'김까지', '먹으려고'}},
#  'match': ['소고기', '멸치', '식초', '김', '마늘'],
#  'video': 'mooksabal'}

##################'miyeok_gook'##################
#fps=1, font_text_score=0.95 font_height=(87, 93)

# {'food': '국/미역국',
#  'action_recog': ['scrambling eggs', 'cooking chicken', 'cooking egg'],
#  'segment': {(0, 54): {'정수물'},
#   (1, 7): {'불린'},
#   (1, 10): {'불린'},
#   (1, 18): {'국간장'},
#   (1, 23): {'간마늘'},
#   (1, 39): {'멸치액젓'},
#   (1, 47): {'맛소금'},
#   (3, 30): {'주세요'}},
#  'match': ['미역', '닭고기', '소금', '소고기', '다시마'],
#  'video': 'miyeok_gook'}

In [None]:
# Compare by adjusting fps, font_text_score
################# 'egg' ###################

#######fps =1, score=0.9 ########

# {'food': '조림/메추리알장조림',
#  'action_recog': ['cooking egg', 'cooking chicken', 'scrambling eggs'],
#  'segment': {99: {'1팩', '메추리알'},
#   111: {'황설탕'},
#   114: {'물4컵'},
#   128: {'10개', 'tr'},
#   143: {'16개', '파리고추'},
#   159: {'3개', '청양고추3개'},
#   177: {'1조각'}},
#  'match': ['메추리알', '마늘', '버터', '조', '다시마']}

#######fps =1, score=0.7 ########

# {'food': '조림/메추리알장조림',
#  'action_recog': ['cooking egg', 'cooking chicken', 'scrambling eggs'],
#  'segment': {99: {'1팩', '메추리알'},
#   111: {'황설탕'},
#   114: {'물4컵'},
#   128: {'10개', 'tr'},
#   143: {'16개', '파리고추'},
#   159: {'3개', '청양고추3개'},
#   165: {'th'},
#   177: {'1조각'}},
#  'match': ['메추리알', '마늘', '버터', '조', '다시마']}

#########fps=30, score=0.7 ########

# {'food': '조림/메추리알장조림',
#  'action_recog': ['cooking chicken', 'cooking egg', 'scrambling eggs'],
#  'segment': {2940: {'1팩', '메추리알'},
#   3300: {'1컵', '시2컵', '진간장그컵', '황설탕'},
#   3420: {'물4컵'},
#   3840: {'10개', 'tht', 'tr', 'ttr'},
#   4290: {'16개', '파리고추'},
#   4740: {'3개', '청양고추3개'},
#   5310: {'1조각'}},
#  'match': ['메추리알', '마늘', '다시마', '버터', '간장'],
#  'video': 'egg'}

# Appendix
   - Hangeul encoding check
   - kinetics400: label list
   - opencv: video_to_image  

In [None]:
# check system encoding set
import sys
sys.stdin.encoding, sys.stdout.encoding

In [None]:
# Dataset-Kinetics400
labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
labels = [x.strip() for x in labels]
labels[:5]

In [None]:
# Adjust the number of frame
# 영상의 의미지를 연속적으로 캡쳐할 수 있게 하는 class
vidcap = cv2.VideoCapture('{영상의 경로}.mp4')

count = 0

while (vidcap.isOpened()):
    # read()는 grab()와 retrieve() 두 함수를 한 함수로 불러옴
    # 두 함수를 동시에 불러오는 이유는 프레임이 존재하지 않을 때
    # grab() 함수를 이용하여 return false 혹은 NULL 값을 넘겨 주기 때문
    ret, image = vidcap.read()

    if (int(vidcap.get(1)) % 10 == 0): #default fps 30이면, fps 30/10=3만큼으로 조절
        print('Saved frame number : ' + str(int(vidcap.get(1))))
        cv2.imwrite("C:/Users/BIT/Desktop/miniproj/img/%d.jpg" % count, image)
        print('Saved frame%d.jpg' % count)
        count += 1

vidcap.release()

In [62]:
#retrieval: 반환이 됐는지 여부 # frame 맨 첫 번째 한 프레임
ret, frame = vc.read()
h, w, c = frame.shape
print(h, w, c)

1080 1920 3


In [86]:
segment = 10 #sec
fps = round(vc.get(cv2.CAP_PROP_FPS))
step = segment * fps
frame_num = round(vc.get(cv2.CAP_PROP_FRAME_COUNT))
out_frame_num = frame_num // step + 1 # +1: 나머지 frame을 1개로 추가

round(vc.get(cv2.CAP_PROP_FRAME_COUNT)), round(vc.get(cv2.CAP_PROP_FPS)), out_frame_num

(11096, 30, 37)

In [69]:
round(vc.get(cv2.CAP_PROP_FRAME_HEIGHT)), round(vc.get(cv2.CAP_PROP_FRAME_WIDTH))

(1080, 1920)