# ラベリング済みディレクトリからラベル情報を抽出して画像情報に貼り付け

In [71]:
import pandas as pd
from glob import glob 
import os
import subprocess
from shutil import copyfile
import json


## ラベリング済みディレクトリからラベル情報CSVを生成


In [None]:

def make_perlabel_df(label_dir):
    label = os.path.basename(label_dir)

    track_ids = glob(os.path.join(label_dir, "*"))
    # track_ids = [x[0] for x in os.walk(label_dir)]
    per_label_df = pd.DataFrame({"track_id": list (map(lambda x: os.path.basename(x), track_ids))})
    per_label_df["label"] = label

    return per_label_df



In [34]:
def make_label_csv(base_label_dir, csv_path):
    label_dirs = glob(os.path.join(base_label_dir, "*"))
    label_df = pd.DataFrame()
    for label_dir in label_dirs:
        print("label: ", os.path.basename(label_dir))
        perlabel_df = make_perlabel_df(label_dir)
        label_df = label_df.append(perlabel_df)
    label_df.to_csv(csv_path)


## 抽出画像情報CSVとラベル情報CSVを合成

In [95]:
def combine_label_df(csv_path, label_df):
    info_df = pd.read_csv(csv_path)
    info_df["label"] = "unknown"
    labelled = False
    for key, track_df in info_df.groupby(["track_id"]):
        label = label_df[label_df["track_id"] == key].label
        print(label.values)
        if(len(label.values) > 0):
            labelled = True
            info_df.loc[info_df["track_id"]==key,["label"]] = label.values[0]

    if(labelled):
        info_df.to_csv(csv_path+"_labelled.csv")
            
#     return info_df
#     label_df.to_csv(csv_path)


In [96]:
def combine_infoNlabel(base_info_dir, label_path):
    info_csvs = glob(os.path.join(base_info_dir, "*mkv.csv"))
    label_df = pd.read_csv(label_path)
    for info_csv in info_csvs:
        print(info_csv)
        combine_label_df(info_csv, label_df)

            
#     return base_df
#     label_df.to_csv(csv_path)


## ラベル合成後CSVから画像を抽出


In [108]:
def select_images_percsv(info_csv_path, base_image_dir, base_output_image_dir, conditions_dict):
    subprocess.call(["mkdir", "-p", base_output_image_dir]);
    df = pd.read_csv(info_csv_path)
    
    # 本当はquery時にやりたかったけど
    for cond_name, cond_dict in conditions_dict["condition"].items():
        for logic, value in cond_dict.items():
            if logic == "max":
                df = df[df[cond_name] <= value]
            elif logic == "min":
                df = df[df[cond_name] >= value]
            elif logic == "equal":
                df = df[df[cond_name] == value]
        
    for key, track_df in df.groupby(["label"]):
        print("key:", key)
        print("len(track_df.values):", len(track_df.values))
        if key == "unknown":
            continue
        if(len(track_df.values) > 0):
            input_dir = os.path.join(base_image_dir, key)
            output_dir = os.path.join(base_output_image_dir, key)
            subprocess.call(["mkdir", "-p", output_dir]);

            for index, row in track_df.iterrows():
                track_id = row["track_id"]
                raw_image_filename = row["image_filename"]
                new_image_filename = "{}_{}".format(key ,raw_image_filename)
                src = os.path.join(input_dir, track_id, raw_image_filename)
                dst = os.path.join(output_dir, new_image_filename)
                copyfile(src, dst)

            
#     return base_df
#     label_df.to_csv(csv_path)



In [109]:
def select_images(base_infocsv_dir, base_image_dir, base_output_image_dir, conditions_json_path):
    subprocess.call(["mkdir", "-p", base_output_image_dir]);
    f = open(conditions_json_path)
    conditions_dict = json.load(f)
    f.close()
    info_csvs = glob(os.path.join(base_infocsv_dir, "*labelled.csv"))
    for info_csv in info_csvs:
        print(info_csv)
        select_images_percsv(info_csv, base_image_dir, base_output_image_dir, conditions_dict)

            
#     return base_df
#     label_df.to_csv(csv_path)



In [112]:
base_infocsv_dir = 'video'
base_image_dir = 'images/liquid_employees/'
base_output_image_dir = 'images/select_test'
conditions_json_path = 'image_condition.json'

In [114]:
select_images(base_infocsv_dir, base_image_dir, base_output_image_dir, conditions_json_path)

video/66-20190720201028.mkv.csv_labelled.csv
key: ogura
len(track_df.values): 6
key: ohiwa
len(track_df.values): 23
key: ohsu
len(track_df.values): 53
video/000-2019081412221.mkv.csv_labelled.csv
key: keita
len(track_df.values): 76
key: ohiwa
len(track_df.values): 60
key: teppei
len(track_df.values): 20
key: umeki
len(track_df.values): 17
key: unknown
len(track_df.values): 31


In [58]:
base_label_dir = "images/liquid_employees/"
label_path = 'test_label2.csv'
base_csv_path = 'video/000-2019081412221.mkv.csv'

In [59]:
make_label_csv(base_label_dir, label_path)

label:  keita
label:  teppei
label:  ogura
label:  ohsu
label:  ohiwa
label:  umeki


In [97]:
base_info_dir = "video"

combine_infoNlabel(base_info_dir, label_path)

video/66-20190720201028.mkv.csv
['ohiwa']
['ohiwa']
['ohsu']
['ogura']
['ohsu']
['ohsu']
['ohsu']
['ohsu']
video/000-2019081412221.mkv.csv
['ohiwa']
['ohiwa']
['teppei']
['umeki']
['keita']
['umeki']
[]
['teppei']
['ohiwa']
[]
['keita']
['ohiwa']
[]
[]


In [46]:
combine_label(base_csv_path, label_df)

['ohiwa']
['ohiwa']
['teppei']
['umeki']
['keita']
['umeki']
[]
['teppei']
['ohiwa']
[]
['keita']
['ohiwa']
[]
[]
