In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from glob import glob
import shutil

In [None]:
!pip3 install xlrd

In [None]:
DATA_DIR = './data/'
TARGET_DIR = os.path.join(DATA_DIR, 'images/handhygiene')
IMG_DIR = os.path.join(DATA_DIR, 'images')
VID_DIR = os.path.join(DATA_DIR, 'videos')
EXCEL_PATH = os.path.join(DATA_DIR, 'hh_label.xlsx')
df_ex = pd.read_excel(EXCEL_PATH)

In [None]:
"""
Seperating raw images into 3 categories; surgery, anesthesia, still(useless images) 
"""
# classes directories
# /images
### /surgery
### /surgery_label
### /anesthesia
### /anesthesia_label
#####    /handhygiene (target)
### /still
### /simulation
# /videos

### /anesthesia_label

def get_classes(cls, df):
    """ get a specific class and its subclasses"""
    classes = []
    for col in df_ex.columns:
        if cls in col:
            classes.append(col)
            
    subclasses = {}
    if not classes:
        return
    for cls in classes: # subclass
        subclasses[cls] = df[cls].iloc[0] 
    return subclasses

            
def sort_frames_into_categories(TARGET_DIR, IMG_DIR):
    cnt = 0
    images = glob(os.path.join(TARGET_DIR,'*.jpg'))
    images.sort()
    
    for image in tqdm(images): 
        img_name= image.split('/')[-1].split('.')[0]
        video_id = int(img_name.split('_')[0])
        video_name = str(video_id)+'_'+img_name.split('_')[1]
        frame_num = img_name[-6:] # ex: 000001
        df = df_ex[df_ex['video_id'] == video_id]

        for phase in ['surgery', 'anesthesia', 'still']:
            frames = get_classes(phase, df)
            
            for procedure in sorted(frames.keys(), reverse=True):
                #if frames[procedure] is np.nan:
                if 'float' in str(type(frames[procedure])):
                    continue
                
                lists = frames[procedure].strip().split(',')
                for el in lists:
                    start = el.strip().split('-')[0]
                    end = el.strip().split('-')[1]

                    if int(frame_num) in range(int(start), int(end)+1):
                        if not os.path.exists(image):
                            continue

                        #print("image: {}, procedure: {}".format(os.path.basename(image), procedure))
                        
                        if '/' not in procedure: ## for class
                            label_path = os.path.join(IMG_DIR, procedure)
                        else : ## for subclass(labeled)
                            label = procedure.split('/')[0]
                            sublabel = procedure.split('/')[1]
                            label_path = os.path.join(IMG_DIR, label+'_label')
                            if not os.path.exists(label_path):
                                os.mkdir(label_path)
                            label_path = os.path.join(label_path, sublabel)
                        if not os.path.exists(label_path):
                            os.mkdir(label_path)    
                        dst_path = os.path.join(label_path, video_name)
                        if not os.path.exists(dst_path):
                            os.mkdir(dst_path)
                        
                        shutil.move(image, dst_path)
                        #cnt += 1
                        break
                        
    return

In [None]:
sort_frames_into_categories(TARGET_DIR, IMG_DIR)

In [None]:
CUSTOM_LIST='109-145, 159-178, 187-239, 247-263, 272-344, 382-402, 431-462, 538-610, 640-805, 812-829, 848-926, 1004-1013, 1069-1092, 1096-1104, 1149-1164, 1233-1249, 1260-1280, 1352-1380, 1432-1453, 1532-1611, 1630-1674, 1689-1728, 1749-1785, 1789-1810, 1894-1903, 2027-2083, 2154-2173, 2191-2198, 2255-2342, 2353-2380, 2381-2405, 2412-2458, 2468-2479, 2412-2458, 2468-2479, 2495-2540, 2588-2608, 2613-2628, 2629-2649, 2658-2675, 2696-2712, 2743-2774, 2839-2846, 2859-2871, 2915-2937, 2944-2978, 2986-3036, 3113-3120, 3152-3159, 3184-3206, 3241-3248, 3359-3400, 3401-3434, 3544-3573, 3589-3604, 3766-3802, 3847-3862, 3896-3916, 3940-3960, 4034-4070, 4283-4298, 4411-4445, 4758-4782, 5327-5354, 5471-5490, 5507-5522, 5575-5591, 5759-5780, 5795-5811, 5853-5868, 5919-5934, 5935-5960, 6019-6050, 6077-6092, 6222-6246, 6438-6453, 6587-6612, 6667-6684, 6768-6787, 6793-6808'

In [None]:
TARGET_LABEL = 'anesthesia_label/handhygiene'
NONE_TARGET_LABEL = 'anesthesia/'
INPUT_DIR = os.path.join(IMG_DIR, TARGET_LABEL)
OUTPUT_DIR = os.path.join(VID_DIR, TARGET_LABEL)
VIDEO_EXT = '.mp4'

def make_video(INPUT_DIR, OUTPUT_DIR, start, vname, fps, ext):
    images_path = os.path.join(INPUT_DIR, vname, vname+'_frames%06d.jpg')
    target_dir = os.path.join(OUTPUT_DIR, vname+'_{}'.format(start.zfill(6))+ext)
    if not os.path.exists(target_dir):
        #print(fps, start, images_path, target_dir)
        !ffmpeg -r $fps -start_number $start -i $images_path -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p $target_dir -y
    return

def images_to_video(OUTPUT_DIR, NONE_TARGET_LABEL=None):
    for row in tqdm(df_ex[['video_name', 'anesthesia/handhygiene', 'fps']].values):
        vname, target, fps = row
        if target is np.nan:
            continue
        lists = target.strip().split(',')
        
        ## target
        for el in lists[:]:
            start = el.strip().split('-')[0]
            make_video(INPUT_DIR, OUTPUT_DIR, start, vname, fps, VIDEO_EXT)
            
        ## none-target    
#         if NONE_TARGET_LABEL is not None:
#             start = '0'
#             for el in CUSTOM_LIST.strip().split(','):
#                 end = str(int(el.strip().split('-')[0])-1)
#                 if int(end)-int(start) > 16:
#                     print(vname, start, end)
#                     make_video(os.path.join(IMG_DIR, NONE_TARGET_LABEL), 
#                                 os.path.join(VID_DIR, NONE_TARGET_LABEL), 
#                                 start, vname, fps, VIDEO_EXT)
#                 start = str(int(el.strip().split('-')[1])+1)

In [None]:
images_to_video(OUTPUT_DIR, NONE_TARGET_LABEL)