In [2]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from glob import glob
import shutil

In [3]:
data_dir = '/media/minjee/4970a4b3-9bec-42aa-8022-ddff6e7b8f80/'
img_dir = os.path.join(data_dir, 'images/anesthesia/')
excel_path = os.path.join(os.getcwd(), 'hh_video_data_v2.xlsx')
label_path = os.path.join(os.getcwd(), 'handhygiene_label.csv') 

In [4]:
df_ex = pd.read_excel(excel_path)

In [6]:
def classify_frames(img_dir):
    for image in tqdm(glob(img_dir+'*.jpg')):
        img_name= image.split('/')[-1].split('.')[0]
        
        video_id = int(img_name.split('_')[0])
        frame_num = img_name[-6:] # ex: 000001
        
        # classes : surgery, still, target(handhygiene)
        df = df_ex[df_ex['video_id'] == video_id]
        surgery_list = df['surgery_frame'].iloc[0]
        still_list = df['still_frame'].iloc[0]
        target_list = df['target_frame'].iloc[0]
        
        frames = {'surgery':surgery_list, 'still':still_list, 'anesthesia/handhygiene':target_list}
        
        
        for procedure in ['surgery', 'still', 'anesthesia/handhygiene']:
            if frames[procedure] is np.nan:
                continue
                
            lists = frames[procedure].strip().split(',')
            for el in lists:
                start_frame = el.strip().split('-')[0]
                end_frame = el.strip().split('-')[1]
                if int(frame_num) >= int(start_frame) and int(frame_num) <= int(end_frame):
                    shutil.move(image, os.path.join(data_dir, 'images/'+procedure))
            #       cnt += 1
            #print('{} images are moved to images/{}'.format(cnt, procedure))
            
    #shutil.move(glob(img_dir+'*'), os.path.join(data_dir, 'images/anesthesia'))
    #!mv /media/minjee/4970a4b3-9bec-42aa-8022-ddff6e7b8f80/labeled/* /media/minjee/4970a4b3-9bec-42aa-8022-ddff6e7b8f80/images/anesthesia/
    return

In [7]:
classify_frames(img_dir)

100%|██████████| 313789/313789 [04:15<00:00, 1226.60it/s]


In [6]:
hh_dict = {'video_id':[], 'date':[], 'img_name':[]}
def create_dataframe(img_dir):
    for image in tqdm(glob(img_dir+'*.jpg')):
        img_name= image.split('/')[-1].split('.')[0]

        video_id = img_name.split('_')[0]
        date = img_name.split('_')[1]
        frame_num = img_name[-6:]
        
        hh_dict['video_id'].append(video_id)
        hh_dict['date'].append(date)
        hh_dict['img_name'].append(img_name+'.jpg')
        
        df = pd.DataFrame(hh_dict)
        df.sort_values('img_name', inplace=True)
        df.reset_index(inplace=True)
        df.drop('index', 1, inplace=True)
    return df

In [7]:
df = create_dataframe(img_dir)

 17%|█▋        | 6785/39569 [00:24<03:09, 172.87it/s]

KeyboardInterrupt: 

 17%|█▋        | 6785/39569 [00:40<03:09, 172.87it/s]

In [19]:
df_excel

Unnamed: 0,video_id,video_name,date,surgery_frame_start,surgery_frame_end,target_frame,frame_rate
0,1,1_20180806,20181107,,,,
1,2,2_20180920,20181112,8760.0,37200.0,1686-1771,
2,3,3_20180920,20181116,,,,
3,4,4_20180920,20181119,,,,
4,5,5_20180920,20180920,,,,
5,6,6_20180920,20180920,,,,
6,7,7_20180920,20180920,,,,


In [9]:
hh_dict = {'date':[], 'img_name':[], 'video_id':[], 'target':[] }
temp_array = []
for row in df.values[:]:
    video_id = int(row[2])
    frame_num = row[1].split('_')[-1].split('.')[0][-6:]
    for vid, target, length in df_excel[['video_id', 'target_frame', 'frame_length']].values:
        if video_id is not vid:
            continue
        if target is np.nan:
            continue
        
        target_frames = target.strip().split(',')
        target_frames = list(map(lambda x: "%.6d" % int(x) ,target_frames))
    
        if frame_num in target_frames:
            # update target frame list
            temp_array = list(map(lambda n: "%.6d" % (n + int(frame_num)), range(int(length))))
            row = np.append(row, [1])
            continue
        if frame_num in temp_array:
            row = np.append(row, [1])
        else:
            row = np.append(row, [0])
        print(row)   
        hh_dict['date'].append(row[0]) 
        hh_dict['img_name'].append(row[1])
        hh_dict['video_id'].append(row[2])
        hh_dict['target'].append(row[3])

['20180806' '1_20180806_frames000354.jpg' '1' 0]
['20180806' '1_20180806_frames000355.jpg' '1' 0]
['20180806' '1_20180806_frames000356.jpg' '1' 0]
['20180806' '1_20180806_frames000357.jpg' '1' 0]
['20180806' '1_20180806_frames000358.jpg' '1' 0]
['20180806' '1_20180806_frames000359.jpg' '1' 0]
['20180806' '1_20180806_frames000360.jpg' '1' 0]
['20180806' '1_20180806_frames000361.jpg' '1' 0]
['20180806' '1_20180806_frames000362.jpg' '1' 0]
['20180806' '1_20180806_frames000363.jpg' '1' 0]
['20180806' '1_20180806_frames000364.jpg' '1' 0]
['20180806' '1_20180806_frames000365.jpg' '1' 0]
['20180806' '1_20180806_frames000366.jpg' '1' 0]
['20180806' '1_20180806_frames000367.jpg' '1' 0]
['20180806' '1_20180806_frames000368.jpg' '1' 0]
['20180806' '1_20180806_frames000369.jpg' '1' 0]
['20180806' '1_20180806_frames000370.jpg' '1' 0]
['20180806' '1_20180806_frames000371.jpg' '1' 0]
['20180806' '1_20180806_frames000372.jpg' '1' 0]
['20180806' '1_20180806_frames000373.jpg' '1' 0]
['20180806' '1_20180

['20180806' '1_20180806_frames000576.jpg' '1' 0]
['20180806' '1_20180806_frames000577.jpg' '1' 0]
['20180806' '1_20180806_frames000578.jpg' '1' 0]
['20180806' '1_20180806_frames000579.jpg' '1' 0]
['20180806' '1_20180806_frames000580.jpg' '1' 0]
['20180806' '1_20180806_frames000581.jpg' '1' 0]
['20180806' '1_20180806_frames000582.jpg' '1' 0]
['20180806' '1_20180806_frames000583.jpg' '1' 0]
['20180806' '1_20180806_frames000584.jpg' '1' 0]
['20180806' '1_20180806_frames000585.jpg' '1' 0]
['20180806' '1_20180806_frames000586.jpg' '1' 0]
['20180806' '1_20180806_frames000587.jpg' '1' 0]
['20180806' '1_20180806_frames000588.jpg' '1' 0]
['20180806' '1_20180806_frames000589.jpg' '1' 0]
['20180806' '1_20180806_frames000590.jpg' '1' 0]
['20180806' '1_20180806_frames000591.jpg' '1' 0]
['20180806' '1_20180806_frames000592.jpg' '1' 0]
['20180806' '1_20180806_frames000593.jpg' '1' 0]
['20180806' '1_20180806_frames000594.jpg' '1' 0]
['20180806' '1_20180806_frames000595.jpg' '1' 0]
['20180806' '1_20180

['20180806' '1_20180806_frames000823.jpg' '1' 0]
['20180806' '1_20180806_frames000824.jpg' '1' 0]
['20180806' '1_20180806_frames000825.jpg' '1' 0]
['20180806' '1_20180806_frames000826.jpg' '1' 0]
['20180806' '1_20180806_frames000827.jpg' '1' 0]
['20180806' '1_20180806_frames000828.jpg' '1' 0]
['20180806' '1_20180806_frames000829.jpg' '1' 0]
['20180806' '1_20180806_frames000830.jpg' '1' 0]
['20180806' '1_20180806_frames000831.jpg' '1' 0]
['20180806' '1_20180806_frames000832.jpg' '1' 0]
['20180806' '1_20180806_frames000833.jpg' '1' 0]
['20180806' '1_20180806_frames000834.jpg' '1' 0]
['20180806' '1_20180806_frames000835.jpg' '1' 0]
['20180806' '1_20180806_frames000836.jpg' '1' 0]
['20180806' '1_20180806_frames000837.jpg' '1' 0]
['20180806' '1_20180806_frames000838.jpg' '1' 0]
['20180806' '1_20180806_frames000839.jpg' '1' 0]
['20180806' '1_20180806_frames000840.jpg' '1' 0]
['20180806' '1_20180806_frames000841.jpg' '1' 0]
['20180806' '1_20180806_frames000842.jpg' '1' 0]
['20180806' '1_20180

['20180806' '1_20180806_frames001080.jpg' '1' 0]
['20180806' '1_20180806_frames001081.jpg' '1' 0]
['20180806' '1_20180806_frames001082.jpg' '1' 0]
['20180806' '1_20180806_frames001083.jpg' '1' 0]
['20180806' '1_20180806_frames001084.jpg' '1' 0]
['20180806' '1_20180806_frames001085.jpg' '1' 0]
['20180806' '1_20180806_frames001086.jpg' '1' 0]
['20180806' '1_20180806_frames001087.jpg' '1' 0]
['20180806' '1_20180806_frames001088.jpg' '1' 0]
['20180806' '1_20180806_frames001089.jpg' '1' 0]
['20180806' '1_20180806_frames001090.jpg' '1' 0]
['20180806' '1_20180806_frames001091.jpg' '1' 0]
['20180806' '1_20180806_frames001092.jpg' '1' 0]
['20180806' '1_20180806_frames001093.jpg' '1' 0]
['20180806' '1_20180806_frames001094.jpg' '1' 0]
['20180806' '1_20180806_frames001095.jpg' '1' 0]
['20180806' '1_20180806_frames001096.jpg' '1' 0]
['20180806' '1_20180806_frames001097.jpg' '1' 0]
['20180806' '1_20180806_frames001098.jpg' '1' 0]
['20180806' '1_20180806_frames001099.jpg' '1' 0]
['20180806' '1_20180

['20180806' '1_20180806_frames001335.jpg' '1' 0]
['20180806' '1_20180806_frames001336.jpg' '1' 0]
['20180806' '1_20180806_frames001337.jpg' '1' 0]
['20180806' '1_20180806_frames001338.jpg' '1' 0]
['20180806' '1_20180806_frames001339.jpg' '1' 0]
['20180806' '1_20180806_frames001340.jpg' '1' 0]
['20180806' '1_20180806_frames001341.jpg' '1' 0]
['20180806' '1_20180806_frames001342.jpg' '1' 0]
['20180806' '1_20180806_frames001343.jpg' '1' 0]
['20180806' '1_20180806_frames001344.jpg' '1' 0]
['20180806' '1_20180806_frames001345.jpg' '1' 0]
['20180806' '1_20180806_frames001346.jpg' '1' 0]
['20180806' '1_20180806_frames001347.jpg' '1' 0]
['20180806' '1_20180806_frames001348.jpg' '1' 0]
['20180806' '1_20180806_frames001349.jpg' '1' 0]
['20180806' '1_20180806_frames001350.jpg' '1' 0]
['20180806' '1_20180806_frames001351.jpg' '1' 0]
['20180806' '1_20180806_frames001352.jpg' '1' 0]
['20180806' '1_20180806_frames001353.jpg' '1' 0]
['20180806' '1_20180806_frames001354.jpg' '1' 0]
['20180806' '1_20180

['20180806' '1_20180806_frames001589.jpg' '1' 0]
['20180806' '1_20180806_frames001590.jpg' '1' 0]
['20180806' '1_20180806_frames001591.jpg' '1' 0]
['20180806' '1_20180806_frames001592.jpg' '1' 0]
['20180806' '1_20180806_frames001593.jpg' '1' 0]
['20180806' '1_20180806_frames001594.jpg' '1' 0]
['20180806' '1_20180806_frames001595.jpg' '1' 0]
['20180806' '1_20180806_frames001596.jpg' '1' 0]
['20180806' '1_20180806_frames001597.jpg' '1' 0]
['20180806' '1_20180806_frames001598.jpg' '1' 0]
['20180806' '1_20180806_frames001599.jpg' '1' 0]
['20180806' '1_20180806_frames001601.jpg' '1' 1]
['20180806' '1_20180806_frames001602.jpg' '1' 1]
['20180806' '1_20180806_frames001603.jpg' '1' 1]
['20180806' '1_20180806_frames001604.jpg' '1' 1]
['20180806' '1_20180806_frames001605.jpg' '1' 1]
['20180806' '1_20180806_frames001606.jpg' '1' 1]
['20180806' '1_20180806_frames001607.jpg' '1' 1]
['20180806' '1_20180806_frames001608.jpg' '1' 1]
['20180806' '1_20180806_frames001609.jpg' '1' 1]
['20180806' '1_20180

['20180920' '2_20180920_frames000221.jpg' '2' 1]
['20180920' '2_20180920_frames000222.jpg' '2' 1]
['20180920' '2_20180920_frames000223.jpg' '2' 1]
['20180920' '2_20180920_frames000224.jpg' '2' 1]
['20180920' '2_20180920_frames000225.jpg' '2' 1]
['20180920' '2_20180920_frames000226.jpg' '2' 1]
['20180920' '2_20180920_frames000227.jpg' '2' 1]
['20180920' '2_20180920_frames000228.jpg' '2' 1]
['20180920' '2_20180920_frames000229.jpg' '2' 1]
['20180920' '2_20180920_frames000230.jpg' '2' 0]
['20180920' '2_20180920_frames000231.jpg' '2' 0]
['20180920' '2_20180920_frames000232.jpg' '2' 0]
['20180920' '2_20180920_frames000233.jpg' '2' 0]
['20180920' '2_20180920_frames000234.jpg' '2' 0]
['20180920' '2_20180920_frames000235.jpg' '2' 0]
['20180920' '2_20180920_frames000236.jpg' '2' 0]
['20180920' '2_20180920_frames000237.jpg' '2' 0]
['20180920' '2_20180920_frames000238.jpg' '2' 0]
['20180920' '2_20180920_frames000239.jpg' '2' 0]
['20180920' '2_20180920_frames000240.jpg' '2' 0]
['20180920' '2_20180

['20180920' '4_20180920_frames000053.jpg' '4' 0]
['20180920' '4_20180920_frames000054.jpg' '4' 0]
['20180920' '4_20180920_frames000055.jpg' '4' 0]
['20180920' '4_20180920_frames000056.jpg' '4' 0]
['20180920' '4_20180920_frames000057.jpg' '4' 0]
['20180920' '4_20180920_frames000058.jpg' '4' 0]
['20180920' '4_20180920_frames000059.jpg' '4' 0]
['20180920' '4_20180920_frames000060.jpg' '4' 0]
['20180920' '4_20180920_frames000061.jpg' '4' 0]
['20180920' '4_20180920_frames000062.jpg' '4' 0]
['20180920' '4_20180920_frames000063.jpg' '4' 0]
['20180920' '4_20180920_frames000064.jpg' '4' 0]
['20180920' '4_20180920_frames000065.jpg' '4' 0]
['20180920' '4_20180920_frames000066.jpg' '4' 0]
['20180920' '4_20180920_frames000067.jpg' '4' 0]
['20180920' '4_20180920_frames000068.jpg' '4' 0]
['20180920' '4_20180920_frames000069.jpg' '4' 0]
['20180920' '4_20180920_frames000070.jpg' '4' 0]
['20180920' '4_20180920_frames000071.jpg' '4' 0]
['20180920' '4_20180920_frames000072.jpg' '4' 0]
['20180920' '4_20180

['20180920' '4_20180920_frames000301.jpg' '4' 1]
['20180920' '4_20180920_frames000302.jpg' '4' 1]
['20180920' '4_20180920_frames000303.jpg' '4' 1]
['20180920' '4_20180920_frames000304.jpg' '4' 1]
['20180920' '4_20180920_frames000305.jpg' '4' 1]
['20180920' '4_20180920_frames000306.jpg' '4' 1]
['20180920' '4_20180920_frames000307.jpg' '4' 1]
['20180920' '4_20180920_frames000308.jpg' '4' 1]
['20180920' '4_20180920_frames000309.jpg' '4' 1]
['20180920' '4_20180920_frames000310.jpg' '4' 1]
['20180920' '4_20180920_frames000311.jpg' '4' 1]
['20180920' '4_20180920_frames000312.jpg' '4' 1]
['20180920' '4_20180920_frames000313.jpg' '4' 1]
['20180920' '4_20180920_frames000314.jpg' '4' 1]
['20180920' '4_20180920_frames000315.jpg' '4' 0]
['20180920' '4_20180920_frames000316.jpg' '4' 0]
['20180920' '4_20180920_frames000317.jpg' '4' 0]
['20180920' '4_20180920_frames000318.jpg' '4' 0]
['20180920' '4_20180920_frames000319.jpg' '4' 0]
['20180920' '4_20180920_frames000320.jpg' '4' 0]
['20180920' '4_20180

['20180920' '4_20180920_frames000545.jpg' '4' 0]
['20180920' '5_20180920_frames000000.jpg' '5' 0]
['20180920' '5_20180920_frames000001.jpg' '5' 0]
['20180920' '5_20180920_frames000002.jpg' '5' 0]
['20180920' '5_20180920_frames000003.jpg' '5' 0]
['20180920' '5_20180920_frames000004.jpg' '5' 0]
['20180920' '5_20180920_frames000005.jpg' '5' 0]
['20180920' '5_20180920_frames000006.jpg' '5' 0]
['20180920' '5_20180920_frames000007.jpg' '5' 0]
['20180920' '5_20180920_frames000008.jpg' '5' 0]
['20180920' '5_20180920_frames000009.jpg' '5' 0]
['20180920' '5_20180920_frames000010.jpg' '5' 0]
['20180920' '5_20180920_frames000011.jpg' '5' 0]
['20180920' '5_20180920_frames000012.jpg' '5' 0]
['20180920' '5_20180920_frames000013.jpg' '5' 0]
['20180920' '5_20180920_frames000014.jpg' '5' 0]
['20180920' '5_20180920_frames000015.jpg' '5' 0]
['20180920' '5_20180920_frames000016.jpg' '5' 0]
['20180920' '5_20180920_frames000017.jpg' '5' 0]
['20180920' '5_20180920_frames000018.jpg' '5' 0]
['20180920' '5_20180

['20180920' '5_20180920_frames000260.jpg' '5' 0]
['20180920' '5_20180920_frames000261.jpg' '5' 0]
['20180920' '5_20180920_frames000262.jpg' '5' 0]
['20180920' '5_20180920_frames000263.jpg' '5' 0]
['20180920' '5_20180920_frames000264.jpg' '5' 0]
['20180920' '5_20180920_frames000265.jpg' '5' 0]
['20180920' '5_20180920_frames000266.jpg' '5' 0]
['20180920' '5_20180920_frames000267.jpg' '5' 0]
['20180920' '5_20180920_frames000268.jpg' '5' 0]
['20180920' '5_20180920_frames000269.jpg' '5' 0]
['20180920' '5_20180920_frames000270.jpg' '5' 0]
['20180920' '5_20180920_frames000271.jpg' '5' 0]
['20180920' '5_20180920_frames000272.jpg' '5' 0]
['20180920' '5_20180920_frames000273.jpg' '5' 0]
['20180920' '5_20180920_frames000274.jpg' '5' 0]
['20180920' '5_20180920_frames000276.jpg' '5' 1]
['20180920' '5_20180920_frames000277.jpg' '5' 1]
['20180920' '5_20180920_frames000278.jpg' '5' 1]
['20180920' '5_20180920_frames000279.jpg' '5' 1]
['20180920' '5_20180920_frames000280.jpg' '5' 1]
['20180920' '5_20180

['20180920' '5_20180920_frames000510.jpg' '5' 0]
['20180920' '5_20180920_frames000511.jpg' '5' 0]
['20180920' '5_20180920_frames000512.jpg' '5' 0]
['20180920' '5_20180920_frames000513.jpg' '5' 0]
['20180920' '5_20180920_frames000514.jpg' '5' 0]
['20180920' '5_20180920_frames000515.jpg' '5' 0]
['20180920' '5_20180920_frames000516.jpg' '5' 0]
['20180920' '5_20180920_frames000517.jpg' '5' 0]
['20180920' '5_20180920_frames000518.jpg' '5' 0]
['20180920' '5_20180920_frames000519.jpg' '5' 0]
['20180920' '5_20180920_frames000520.jpg' '5' 0]
['20180920' '5_20180920_frames000521.jpg' '5' 0]
['20180920' '5_20180920_frames000522.jpg' '5' 0]
['20180920' '5_20180920_frames000523.jpg' '5' 0]
['20180920' '5_20180920_frames000524.jpg' '5' 0]
['20180920' '5_20180920_frames000525.jpg' '5' 0]
['20180920' '5_20180920_frames000526.jpg' '5' 0]
['20180920' '5_20180920_frames000527.jpg' '5' 0]
['20180920' '5_20180920_frames000528.jpg' '5' 0]
['20180920' '5_20180920_frames000529.jpg' '5' 0]
['20180920' '5_20180

In [10]:
labeling = pd.DataFrame(hh_dict, index =range(len(hh_dict['img_name'])))

In [11]:
pd.DataFrame.to_csv(labeling, label_path, index=False)