# ラベリング済みディレクトリからラベル情報を抽出して画像情報に貼り付け

In [1]:
import pandas as pd
from glob import glob 
import os
import subprocess
from shutil import copyfile
import json


## ラベリング済みディレクトリからラベル情報CSVを生成


In [2]:

def make_perlabel_df(label_dir):
    label = os.path.basename(label_dir)

    track_ids = glob(os.path.join(label_dir, "*"))
    # track_ids = [x[0] for x in os.walk(label_dir)]
    per_label_df = pd.DataFrame({"track_id": list (map(lambda x: os.path.basename(x), track_ids))})
    per_label_df["label"] = label

    return per_label_df



In [3]:
def make_label_csv(base_label_dir, csv_path):
    label_dirs = glob(os.path.join(base_label_dir, "*"))
    label_df = pd.DataFrame()
    for label_dir in label_dirs:
        print("label: ", os.path.basename(label_dir))
        perlabel_df = make_perlabel_df(label_dir)
        label_df = label_df.append(perlabel_df)
    label_df.to_csv(csv_path)


## 抽出画像情報CSVとラベル情報CSVを合成

In [13]:
def combine_label_df(csv_path, label_df):
    info_df = pd.read_csv(csv_path)
    if(info_df.size == 0):
        return
    info_df["label"] = "unknown"
    labelled = False
    for key, track_df in info_df.groupby(["track_id"]):
        label = label_df[label_df["track_id"] == key].label
        print(label.values)
        if(len(label.values) > 0):
            labelled = True
            info_df.loc[info_df["track_id"]==key,["label"]] = label.values[0]

    if(labelled):
        info_df.to_csv(csv_path+"_labelled.csv")
            
#     return info_df
#     label_df.to_csv(csv_path)


In [14]:
def combine_infoNlabel(base_info_dir, label_path):
    info_csvs = glob(os.path.join(base_info_dir, "*mkv.csv"))
    label_df = pd.read_csv(label_path)
    for info_csv in info_csvs:
        print(info_csv)
        combine_label_df(info_csv, label_df)

            
#     return base_df
#     label_df.to_csv(csv_path)


## ラベル合成後CSVから画像を抽出


In [15]:
def select_images_percsv(info_csv_path, base_image_dir, base_output_image_dir, conditions_dict):
    subprocess.call(["mkdir", "-p", base_output_image_dir]);
    df = pd.read_csv(info_csv_path)
    if(df.size == 0):
        return
    
    # 本当はquery時にやりたかったけど
    for cond_name, cond_dict in conditions_dict["condition"].items():
        for logic, value in cond_dict.items():
            if logic == "max":
                df = df[df[cond_name] <= value]
            elif logic == "min":
                df = df[df[cond_name] >= value]
            elif logic == "equal":
                df = df[df[cond_name] == value]
        
    for key, track_df in df.groupby(["label"]):
        print("key:", key)
        print("len(track_df.values):", len(track_df.values))
        if key == "unknown":
            continue
        if(len(track_df.values) > 0):
            input_dir = os.path.join(base_image_dir, key)
            output_dir = os.path.join(base_output_image_dir, key)
            subprocess.call(["mkdir", "-p", output_dir]);

            for index, row in track_df.iterrows():
                track_id = row["track_id"]
                raw_image_filename = row["image_filename"]
                new_image_filename = "{}_{}".format(key ,raw_image_filename)
                src = os.path.join(input_dir, track_id, raw_image_filename)
                dst = os.path.join(output_dir, new_image_filename)
                copyfile(src, dst)

            
#     return base_df
#     label_df.to_csv(csv_path)



In [16]:
def select_images(base_infocsv_dir, base_image_dir, base_output_image_dir, conditions_json_path):
    subprocess.call(["mkdir", "-p", base_output_image_dir]);
    f = open(conditions_json_path)
    conditions_dict = json.load(f)
    f.close()
    info_csvs = glob(os.path.join(base_infocsv_dir, "*labelled.csv"))
    for info_csv in info_csvs:
        print(info_csv)
        select_images_percsv(info_csv, base_image_dir, base_output_image_dir, conditions_dict)

            
#     return base_df
#     label_df.to_csv(csv_path)



## 抽出後画像をPerson_reID_baseline_pytorch　の評価のフォルダ構成になるようにグループ分け

In [11]:
import random
import shutil
import os
from glob import glob
import subprocess
def group_test_images(src_dir, dst_dir, q_t_ratio, n_min, n_max):
    """
    src_dir : 抽出後画像ディレクトリ
    dst_dir : 評価用画像ディレクトリ（ここにquery, template ディレクトリを作成する）
    q_t_ratio : query:templates の比
    n_min : 同一ラベルの画像数の下限
    n_max : 同一ラベルのtemplatesの数の上限
    """
    base_template_dir = os.path.join(dst_dir, "template")
    base_query_dir = os.path.join(dst_dir, "query")
    subprocess.call(["mkdir", "-p", base_template_dir]);
    subprocess.call(["mkdir", "-p", base_query_dir]);

    label_dirs = glob(os.path.join(src_dir, "*"))
    for label_dir in label_dirs:
        label_name = os.path.basename(label_dir)
        print("label: ", label_name)
        src_images = glob(os.path.join(label_dir, "*.jpg"))
        if (len(src_images) < n_min):
            print("not enough images!")
            print("number of images: ", len(src_images))
            print("n_min: ", n_min)
            continue
        template_dir = os.path.join(base_template_dir, label_name)
        query_dir = os.path.join(base_query_dir, label_name)
        subprocess.call(["mkdir", "-p", template_dir]);
        subprocess.call(["mkdir", "-p", query_dir]);

        if (len(src_images) > n_max):
            src_images = src_images[:n_max]
        query_num = int(len(src_images) * (q_t_ratio / (1.0 + q_t_ratio)))
        query_images = random.sample(src_images, query_num)
        template_images = list(set(src_images) - set(query_images))
        
        [shutil.copy(query_image, query_dir) for query_image in query_images]
        [shutil.copy(template_image, template_dir) for template_image in template_images]



In [12]:
src_dir = "/home/liquid/data/surv_test/test1_20190820"
dst_dir = "/home/liquid/data/surv_test/test1_20190820_grouped/"
q_t_ratio = 0.2
t_min = 0
t_max = 100

group_test_images(src_dir, dst_dir, q_t_ratio, t_min, t_max)
    
    
    
    

label:  teppei
label:  ogura
label:  tanaka
label:  keita
label:  ohiwa
label:  utsumi
label:  nakayama
label:  hasegawa
label:  kakoi
label:  hoshina
label:  izumiya
label:  kondo
label:  iwasaki
label:  kobayashi
label:  benzo
label:  nonoyama
label:  natsuo
label:  umeki
label:  kuda
label:  fujimoto
label:  yoshiwara
label:  shirai
label:  kano
label:  yo
label:  maemichi
label:  obo


In [19]:
base_infocsv_dir = 'video'
base_image_dir = '/home/liquid/data/surv_test/test1'
base_output_image_dir = '/home/liquid/data/surv_test/test1_20190820'
conditions_json_path = 'image_condition.json'

In [20]:
select_images(base_infocsv_dir, base_image_dir, base_output_image_dir, conditions_json_path)

video/09-20190719180553.mkv.csv_labelled.csv
key: ogura
len(track_df.values): 25
key: unknown
len(track_df.values): 65
video/417-20190726193901.mkv.csv_labelled.csv
key: utsumi
len(track_df.values): 15
video/457-20190729104205.mkv.csv_labelled.csv
key: hasegawa
len(track_df.values): 16
key: unknown
len(track_df.values): 16
video/34-20190723164617.mkv.csv_labelled.csv
key: kobayashi
len(track_df.values): 35
key: shirai
len(track_df.values): 49
key: unknown
len(track_df.values): 99
video/157-20190725155621.mkv.csv_labelled.csv
key: hoshina
len(track_df.values): 33
key: unknown
len(track_df.values): 77
video/16-20190722152105.mkv.csv_labelled.csv
key: fujimoto
len(track_df.values): 35
key: nonoyama
len(track_df.values): 31
key: unknown
len(track_df.values): 18
video/02-20190724172443.mkv.csv_labelled.csv
key: nonoyama
len(track_df.values): 27
key: teppei
len(track_df.values): 91
key: unknown
len(track_df.values): 58
video/71-20190722183601.mkv.csv_labelled.csv
key: hoshina
len(track_df.va

In [11]:
base_label_dir = "images/liquid_employees/"
label_path = 'label_test1_20190820.csv'


In [59]:
make_label_csv(base_label_dir, label_path)

label:  keita
label:  teppei
label:  ogura
label:  ohsu
label:  ohiwa
label:  umeki


In [17]:
base_info_dir = "video"
label_path = 'label_test1_20190820.csv'
combine_infoNlabel(base_info_dir, label_path)

video/227-20190725204737.mkv.csv
[]
[]
[]
[]
[]
[]
[]
[]
video/593-20190729210736.mkv.csv
[]
[]
video/513-20190729141559.mkv.csv
[]
[]
[]
[]
[]
[]
[]
video/48-20190722165411.mkv.csv
['izumiya']
video/788-20190730222536.mkv.csv
[]
[]
[]
video/215-20190725195433.mkv.csv
[]
[]
video/280-20190726112427.mkv.csv
[]
[]
video/57-20190725085757.mkv.csv
video/31-20190724185303.mkv.csv
['maemichi']
[]
video/769-20190730194921.mkv.csv
[]
[]
[]
video/727-20190730164340.mkv.csv
['shirai']
['kano']
[]
video/124-20190725133205.mkv.csv
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
video/322-20190726141930.mkv.csv
['kobayashi']
[]
[]
[]
video/364-20190726162507.mkv.csv
[]
[]
[]
[]
[]
video/420-20190726195414.mkv.csv
[]
video/160-20190723121149.mkv.csv
[]
video/27-20190724184121.mkv.csv
['iwasaki']
video/175-20190725165742.mkv.csv
[]
[]
[]
[]
[]
[]
[]
[]
video/58-20190722172707.mkv.csv
[]
[]
[]
video/723-20190730162748.mkv.csv
[]
video/32-20190723164006.mkv.csv
[]
video/563-20190729182325.mkv.csv
[]
[]
[]
[]
[]
[]

In [46]:
combine_label(base_csv_path, label_df)

['ohiwa']
['ohiwa']
['teppei']
['umeki']
['keita']
['umeki']
[]
['teppei']
['ohiwa']
[]
['keita']
['ohiwa']
[]
[]
