In [9]:
import numpy as np
import pandas as pd
from pathlib import Path
from load_data import *
from utils import *
import subprocess
from PIL import Image
import random
import math

In [10]:
# Create test path and load metadata
person_id = 'u1_extra'
test_path = Path.cwd().parent / 'data' / person_id / 'puzzle_test'
if not osp.exists(str(test_path)):
    os.makedirs(str(test_path))
minute_meta_fp = Path.cwd().parent / 'data' / 'minute_based_table'
vs_meta_fp = Path.cwd().parent / 'data' / 'visual_concepts'
minute_based_df = load_csv_file(minute_meta_fp, '{}.csv'.format(person_id))
visual_concept_df = load_csv_file(vs_meta_fp, '{}_categories_attr_concepts.csv'.format(person_id))
query_fold_path = Path.cwd().parent / 'data' / person_id / 'puzzle_query'
query_ids = [d for d in sorted(os.listdir(str(query_fold_path))) if osp.isdir(str(query_fold_path / d))]

Function load_csv_file elapsed Time: 0:00:00.716695
Function load_csv_file elapsed Time: 0:00:01.289899


In [12]:
# Create ground truth metadata
for query in query_ids:
    query_path = query_fold_path / query
    vs_metadata_df = []
    dates = [d for d in os.listdir(str(query_path)) if osp.isdir(str(query_path / d))]
    # Get visual concepts info from image path
    for date in dates:
        date_path = query_path / date
        img_name = [f for f in sorted(os.listdir(str(date_path)))]
        img_paths = [osp.join(date, name) for name in img_name]
        vs_df = [visual_concept_df.query('image_path == "{}"'.format(path)) for path in img_paths]
        vs_metadata_df.append(pd.concat(vs_df))
    vs_metadata_df = pd.concat(vs_metadata_df)
    # Get minute_based_df info from vs_metadata_df
    minute_metadata_df = []
    for v in vs_metadata_df.values:
        image_id, image_path, *r = v
        mask = minute_based_df.isin([image_id])
        metadata = minute_based_df[mask.any(axis=1)]
        minute_metadata_df.append(metadata)
    minute_metadata_df = pd.concat(minute_metadata_df)
    # Save the ground truth
    vs_metadata_df.to_csv(str(query_path / 'visual_concepts.csv'), index=False, na_rep='NULL')
    minute_metadata_df.to_csv(str(query_path / 'minute_based_table.csv'), index=False, na_rep='NULL')
    print("Processing query {}".format(query))

Processing query 001
Processing query 002
Processing query 003
Processing query 004
Processing query 005
Processing query 006
Processing query 007
Processing query 008
Processing query 009
Processing query 010


In [13]:
# Create test
for query in query_ids:
    query_path = query_fold_path / query
    query_test_path = test_path / query
    query_image_path = test_path / query / 'images'
    if not osp.exists(str(query_test_path)):
        os.makedirs(str(query_test_path))
    if osp.exists(str(query_image_path)):
        shutil.rmtree(str(query_image_path))
    os.makedirs(str(query_image_path))
    query_path = query_fold_path / query
    qminute_metadata_df = load_csv_file(query_path, 'minute_based_table.csv')
    qvs_metadata_df = load_csv_file(query_path, 'visual_concepts.csv')
    num_query = len(qvs_metadata_df.values)
    print(num_query)
    rand_list = ['%03d.JPG' % i for i in range(1, num_query+1)]
    random.shuffle(rand_list)
    minute_metadata_df = []
    vs_metadata_df = []
    org_name = []
    org_path = []
    new_name = []
    for i in range(num_query):
        vs = qvs_metadata_df.iloc[[i],:]
        image_id, image_path, *r = vs.values[0]
        org_name.append(image_id)
        org_path.append(image_path)
        image_name = rand_list[i]
        new_name.append(image_name)
        image_path = query_path / vs.values[0][1]
        minute = qminute_metadata_df.iloc[[i],:]
        vs.insert(0, 'image_name', [image_name])
        vs_metadata_df.append(vs)
        minute.insert(0, 'image_name', [image_name])
        minute_metadata_df.append(minute)
        # Copy image
        dest_path = query_image_path / image_name
        cmd = 'cp {} {}'.format(str(image_path), str(dest_path))
        subprocess.call(cmd, shell=True)
    minute_metadata_df = pd.concat(minute_metadata_df)
    min_drop_cols = ['minute_ID', 'utc_time', 'local_time'] + ['img%02d_id' % i for i in range(20)] \
                    + ['cam%02d_id' % i for i in range(15)]
    minute_metadata_df.drop(min_drop_cols, axis=1, inplace=True)
    minute_metadata_df.sort_values('image_name', axis=0, inplace=True)
    minute_metadata_df.to_csv(str(query_test_path / 'minute_based_table.csv'), index=False, na_rep='NULL')
    vs_metadata_df = pd.concat(vs_metadata_df)
    vs_drop_cols = ['image_id', 'image_path']
    vs_metadata_df.drop(vs_drop_cols, axis=1, inplace=True)
    vs_metadata_df.sort_values('image_name', axis=0, inplace=True)
    vs_metadata_df.to_csv(str(query_test_path / 'visual_concepts.csv'), index=False, na_rep='NULL')
    
    map_df = pd.DataFrame({'image_name' : new_name, 'image_id' : org_name, 'image_path' : org_path}, \
                          columns=['image_name', 'image_id', 'image_path'])
    map_df.sort_values('image_name', axis=0, inplace=True)
    map_df.to_csv(str(query_path / 'map_df.csv'), index=False, na_rep='NULL')
    print('Processing query {}'.format(query))

Function load_csv_file elapsed Time: 0:00:00.013456
Function load_csv_file elapsed Time: 0:00:00.012998
25
Processing query 001
Function load_csv_file elapsed Time: 0:00:00.013175
Function load_csv_file elapsed Time: 0:00:00.012631
25
Processing query 002
Function load_csv_file elapsed Time: 0:00:00.008715
Function load_csv_file elapsed Time: 0:00:00.015082
25
Processing query 003
Function load_csv_file elapsed Time: 0:00:00.008664
Function load_csv_file elapsed Time: 0:00:00.013211
25
Processing query 004
Function load_csv_file elapsed Time: 0:00:00.010686
Function load_csv_file elapsed Time: 0:00:00.013133
25
Processing query 005
Function load_csv_file elapsed Time: 0:00:00.010920
Function load_csv_file elapsed Time: 0:00:00.014732
25
Processing query 006
Function load_csv_file elapsed Time: 0:00:00.010301
Function load_csv_file elapsed Time: 0:00:00.014870
25
Processing query 007
Function load_csv_file elapsed Time: 0:00:00.009182
Function load_csv_file elapsed Time: 0:00:00.012817
