In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from glob import glob
import shutil
import random

In [2]:
data_dir = os.getcwd()
img_dir = os.path.join(data_dir, 'jpg/')
excel_path = os.path.join(data_dir, 'hh_label.xlsx')
label_path = os.path.join(data_dir, 'hh_label.csv') 
df_ex = pd.read_excel(excel_path)

anesthesia_dir = '/media/minjee/4970a4b3-9bec-42aa-8022-ddff6e7b8f80/images/anesthesia/'

In [3]:
def move_target_data(image, target_dir):
    if not os.path.exists(target_dir):
        os.mkdir(target_dir)
    shutil.move(image, target_dir)

In [4]:
def add_to_data_dict(video_id, img_date, img_name, clip_num, category):
    hh_dict['video_id'].append(video_id)
    hh_dict['date'].append(img_date)
    hh_dict['img_name'].append(img_name+'.jpg')
    hh_dict['clip_num'].append(clip_num)
    hh_dict['category'].append(category)
    return 

In [5]:
def pick_random_frames():
    for row in df_ex[['video_name', 'anesthesia', 'anesthesia/handhygiene', 'anesthesia/intubation']].values:
        vname = row[0]
        target = row[2]
        if target is np.nan:
            continue
        n_target = len(target.strip().split(','))
        print("target has {} clip/clips. pick a random {} case from video_id {}.".format(n_target, n_target, vname))
        
        for n in range(n_target):
            list = row[1].strip().split(',')
            pick = random.choice(list)
            start, end = pick.split('-')
            l_rand = random.randint(16, 40)
            n_rand = random.randint(int(start), int(end)-l_rand)
            
            for i in range(l_rand):
                imgname = vname + '_frames' + str(n_rand+i).zfill(6) + '.jpg'
                shutil.copy(os.path.join(anesthesia_dir, imgname), img_dir)
            

In [6]:
pick_random_frames()

target has 1 clip/clips. pick a random 1 case from video_id 2_20181112.
target has 5 clip/clips. pick a random 5 case from video_id 5_20181119.
target has 1 clip/clips. pick a random 1 case from video_id 8_20181122.
target has 5 clip/clips. pick a random 5 case from video_id 11_20181126.
target has 1 clip/clips. pick a random 1 case from video_id 13_20181129.
target has 5 clip/clips. pick a random 5 case from video_id 17_20181203.
target has 1 clip/clips. pick a random 1 case from video_id 18_20181204.


In [7]:
hh_dict = {'video_id':[], 'date':[], 'img_name':[], 'clip_num':[], 'category':[]}
#target_str = 'clean'

def is_target(clip_num, frame_num, targets):
    for target in targets:
        
        start, end = target.split('-')
        if int(frame_num) in range(int(start), int(end)+1):
            if int(frame_num) == int(start):
                clip_num += 1
            return True, clip_num
        else:
            continue
    
    return False, clip_num
    
def label_data(img_dir):
    img_total = glob(os.path.join(img_dir, '*.jpg'))
    img_total.sort()
    
    clip_num = -1
    cnt = 0

    for image in tqdm(img_total):
        img_name= image.split('/')[-1].split('.')[0]
        video_id = int(img_name.split('_')[0])
        video_date = img_name.split('_')[1]
        img_frame_num = img_name[-6:] # ex: 000001
        
        df = df_ex[df_ex['video_id'] == video_id]
        vid, vdate, target = df[['video_id', 'date', 'anesthesia/handhygiene']].values[0]
        
        categories = {'clean': target, 'notclean': None}
        targets = categories['clean'].strip().split(',')
        
        target, clip_num = is_target(clip_num, img_frame_num, targets)
        if target:
            move_target_data(image, os.path.join(img_dir, 'clean'))
            add_to_data_dict(video_id, video_date, img_name, clip_num, 'clean')
            cnt+=1
        else:
            move_target_data(image, os.path.join(img_dir, 'notclean'))
            add_to_data_dict(video_id, video_date, img_name, np.nan, 'notclean')
             

    print("{} images moved to {}".format(cnt, os.path.join(img_dir, 'clean')))
    
    return

In [8]:
label_data(img_dir)

100%|██████████| 1077/1077 [00:01<00:00, 1001.24it/s]

539 images moved to /data/handhygiene/jpg/clean





In [9]:
df = pd.DataFrame(hh_dict)
pd.DataFrame.to_csv(df, label_path, index=False)