In [1]:
import os
import re
import sys
import pprint
import collections
import multiprocessing

pelops_dir = os.path.abspath('../..')
sys.path.insert(0, pelops_dir)

from pelops.datasets.featuredataset import FeatureDataset
from pelops.experiment_api.experiment import ExperimentGenerator
from pelops.analysis import analysis
import matplotlib.pyplot as plt

In [2]:
# Set experiment constants

ITEMS_PER_CAMERA = 10
Y_RANDOM=1024
CAMERAS=2
DROPPED=0
CMC_CNT=100
EXPERIMENTS=400

# Set data constants
DATA_ROOT = '/Users/schuylerx/Data'
OUT_DIR = '/Users/schuylerx/Results'

In [3]:
# Scan for and locate feature datasets

files_by_network = collections.defaultdict(dict)
files_by_dataset = collections.defaultdict(dict)
file_pattern = re.compile('^(?P<key>.+?)_\d+px_[A-Za-z]+\.hdf5', re.S | re.I)

for root, sub_dirs, file_names in os.walk(DATA_ROOT):
    full_names = [os.path.join(root, fn) for fn in file_names if fn.endswith('.hdf5')]
    if len(full_names) > 0:
        dataset_name = os.path.basename(root)
        for file_name in full_names:
            mch = re.search(file_pattern, os.path.basename(file_name))
            if mch is None:
                print('ERROR: {}'.format(file_name))
            network_name = mch.group('key')
            files_by_network[network_name][dataset_name] = file_name
            files_by_dataset[dataset_name][network_name] = file_name
            
formatter = lambda d, s: '{}:\n{}'.format(s, pprint.pformat({k: [h for h in v] for k, v in d.items()}))
print(formatter(files_by_network, 'Files By Network'))
print(formatter(files_by_dataset, 'Files By Dataset'))

Files By Network:
{'compcars_color': ['VeRI_100px',
                    'VeRI_110px',
                    'VeRI_120px',
                    'VeRI_130px',
                    'VeRI_140px',
                    'VeRI_150px',
                    'VeRI_160px',
                    'VeRI_30px',
                    'VeRI_40px',
                    'VeRI_50px',
                    'VeRI_60px',
                    'VeRI_70px',
                    'VeRI_80px',
                    'VeRI_90px'],
 'compcars_make_model': ['VeRI_100px',
                         'VeRI_110px',
                         'VeRI_120px',
                         'VeRI_130px',
                         'VeRI_140px',
                         'VeRI_150px',
                         'VeRI_160px',
                         'VeRI_30px',
                         'VeRI_40px',
                         'VeRI_50px',
                         'VeRI_60px',
                         'VeRI_70px',
                         'VeRI_80px',
           

In [4]:
# Define experiment behavior and processing output

def run_experiment(args):
    feature_file, title, output_file = args
    print(title)
    
    # Require output dir
    out_dir = os.path.dirname(output_file)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
    
    # Run experiment
    features = FeatureDataset(feature_file)
    experiment_gen = ExperimentGenerator(features, CAMERAS, ITEMS_PER_CAMERA, DROPPED, Y_RANDOM)
    experiment_hldr = analysis.repeat_pre_cmc(features, experiment_gen, NUMCMC=CMC_CNT, EXPPERCMC=EXPERIMENTS)
    stats, gdata = analysis.make_cmc_stats(experiment_hldr, ITEMS_PER_CAMERA)

    # Plot experiment results
    figure = plt.figure()
    ax = plt.subplot(111)
    ax.plot(gdata.transpose())
    plt.title('{}\n({} CMC curves with {} experiments / curve)'.format(title, CMC_CNT, EXPERIMENTS))
    ax.legend(('-stddev','avg','+stddev'), bbox_to_anchor=(1, -0.05), fancybox=True, shadow=True, ncol=5)
    plt.savefig(output_file)

In [None]:
# Run all the experiments

def get_jobs(index):
    cnt = 0
    for training_set in index:
        for test_set in index[training_set]:
            feature_file = index[training_set][test_set]
            title = "Processing {} using {} trained network".format(test_set, training_set)
            out_file = "{}/{}_{}.png".format(OUT_DIR, test_set, training_set)
            yield feature_file, title, out_file


pool = multiprocessing.Pool(4)
try:
    pool.map(run_experiment, get_jobs(files_by_network))
finally:
    pool.close()

Processing VeRI_30px using compcars_color trained network
Processing VeRI_100px using compcars_make_model trained network
Processing VeRI_100px using compcars_color trained network
Processing VeRI_30px using compcars_make_model trained network
Processing VeRI_100px using dgcars_body_type trained network


  dist = 1.0 - np.dot(u, v) / (norm(u) * norm(v))
