In [1]:
import os, time, shutil, itertools, json
from collections import defaultdict
from tqdm import tqdm


def crawl_directory(dirname):
    """ Walk a nested directory to get all filename ending in a pattern """
    for path, subdirs, files in os.walk(dirname):
        for name in files:
            if not name.endswith('.DS_Store'):
                yield os.path.join(path, name)


def remove_empty_dirs(path):
    for root, dirnames, filenames in os.walk(path, topdown=False):
        for dirname in dirnames:
            remove_empty_dir(os.path.realpath(os.path.join(root, dirname)))


def remove_empty_dir(path):
    try:
        os.rmdir(path)
    except OSError:
        pass
    

def nested_pickle_dict():
    """ Picklable defaultdict nested dictionaries """
    return defaultdict(nested_pickle_dict)


def format_e(n):
    a = '%E' % n
    return (a.split('E')[0].rstrip('0').rstrip('.') + 'E' + a.split('E')[1]).lower()

In [2]:
def get_best_performance_multivariate(mypath):
    """ For a trial, get the best performance for multivariate data """
    # Get path, files in path
    files = os.listdir(mypath)
    results = []

    # Read in the files
    for file in files:
        if file == '.DS_Store':
            continue
                            
        with open(mypath + file, 'r') as f:
            data = json.load(f)

        results.append(data)
        
    # Initialize best dictionary
    optimal = nested_pickle_dict()

    # Go through all models, distributionss, metrics, and record the best
    for result in results:
        for model, distributions in result.items():
            for distribution, metrics in distributions.items():
                for metric, values in metrics.items():
                    if metric not in ["LR", "HDIM", "BSIZE"]:

                        # If metric is seen for the first time, it is the best
                        if metric not in optimal[model][distribution]:
                            optimal[model][distribution][metric]["value"] = values
                            optimal[model][distribution][metric]["parameters"] = [metrics["LR"], metrics["HDIM"], metrics["BSIZE"]]

                        # Otherwise, compare it the presently considered value
                        elif min(optimal[model][distribution][metric]["value"]) > min(values):
                            optimal[model][distribution][metric]["value"] = values
                            optimal[model][distribution][metric]["parameters"] = [metrics["LR"], metrics["HDIM"], metrics["BSIZE"]]

    return optimal


def get_best_performance_mnist(*args):
    return get_best_performance_multivariate(*args)


def merge_mixture(dirname):
    outdir = dirname
    for idx, file in enumerate(os.listdir(dirname)):

        if '.DS_Store' in file:
            continue

        for nest in crawl_directory(dirname + file):

            index = 1

            if 'dims' not in nest.split('/')[7]:
                outdir = '/'.join(nest.split('/')[:7] + nest.split('/')[8:9]) + '/'
            else:
                outdir = dirname
                
            # Initialize directory
            if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                os.makedirs(outdir + 'trial_{0}/'.format(index))

            try:
                shutil.move(nest, outdir + 'trial_{0}/'.format(index))
            except:
                extension = nest.split('/')[-1]
                while os.path.exists(outdir + 'trial_{0}/'.format(index) + extension):
                    index += 1

                if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                    os.makedirs(outdir + 'trial_{0}/'.format(index))

                shutil.move(nest, outdir + 'trial_{0}/'.format(index))

    remove_empty_dirs(dirname)
    
    
def merge_multivariate(dirname):
    outdir = dirname
    for idx, file in enumerate(os.listdir(dirname)):

        if '.DS_Store' in file:
            continue

        for nest in crawl_directory(dirname + file):

            index = 1

            if 'dims' not in nest.split('/')[6]:
                outdir = '/'.join(nest.split('/')[:7] + nest.split('/')[8:9]) + '/'
            else:
                # Uncomment the + for mixture
                outdir = dirname + nest.split('/')[6] + '/'
                
            # Initialize directory
            if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                os.makedirs(outdir + 'trial_{0}/'.format(index))

            try:
                shutil.move(nest, outdir + 'trial_{0}/'.format(index))
            except:
                extension = nest.split('/')[-1]
                while os.path.exists(outdir + 'trial_{0}/'.format(index) + extension):
                    index += 1

                if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                    os.makedirs(outdir + 'trial_{0}/'.format(index))

                shutil.move(nest, outdir + 'trial_{0}/'.format(index))

    remove_empty_dirs(dirname)
    
    
def merge_mnist(dirname):
    outdir = dirname
    for idx, file in enumerate(os.listdir(dirname)):

        if '.DS_Store' in file:
            continue

        for nest in crawl_directory(dirname + file):

            index = 1

            if 'dims' in nest.split('/')[5]:
                outdir = '/'.join(nest.split('/')[:6]) + '/'
            else:
                # Uncomment the + for mixture
                outdir = dirname + nest.split('/')[7] + '/'
    

            # Initialize directory
            if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                os.makedirs(outdir + 'trial_{0}/'.format(index))

            try:
                shutil.move(nest, outdir + 'trial_{0}/'.format(index))
            except:
                extension = nest.split('/')[-1]
                while os.path.exists(outdir + 'trial_{0}/'.format(index) + extension):
                    index += 1

                if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
                    os.makedirs(outdir + 'trial_{0}/'.format(index))

                shutil.move(nest, outdir + 'trial_{0}/'.format(index))

    remove_empty_dirs(dirname)
    

def get_stats(dirname):
    """ Get missing runs for all trials """
    hidden_dims = [32, 64, 128, 256, 512]
    batch_sizes = [128, 256, 512, 1024]
    learning_rates = [2e-1, 2e-2, 2e-3]

    filenames, hyperparams = [], []

    for (lr, hdim, bsize) in itertools.product(*[learning_rates, hidden_dims, batch_sizes]):
        hyperparam = (lr * min(batch_sizes)/bsize, hdim, bsize)
        filename = 'results_{0}.json'.format("_".join([str(i) for i in hyperparam]))
        filenames.append(filename)
        hyperparams.append((str(format_e(lr)), str(hdim), str(bsize)))
    
    TODO = []
    for file in os.listdir(dirname):
        if '.DS_Store' in file:
            continue

        print(file, len(os.listdir(dirname + file)))
        idx = 0
        try:
            for f in os.listdir(dirname + file):
                if '.DS_Store' in f:
                    continue

                files = os.listdir(dirname + file + '/' + f)
                length = len(files)
                print(f, length)

                if length >= 60:
                    idx += 1            
                else:
                    missing = [hyperparams[idx] for idx, item in enumerate(filenames) if item not in files]
                    TODO.extend(missing)

            print('{0}/20'.format(idx))
            print('\n')
        except NotADirectoryError:
            files = os.listdir(dirname + file)
            missing = [hyperparams[idx] for idx, item in enumerate(filenames) if item not in files]
            TODO.extend(missing)
            
        
    return TODO

In [80]:
TODO = get_stats('/Users/sob/Desktop/gan_results/hypertuning/multivariate/64_dims_100000_samples/')

trial_1 60
trial_10 59
trial_11 59
trial_12 57
trial_13 56
trial_14 51
trial_15 49
trial_16 45
trial_17 39
trial_18 25
trial_19 10
trial_2 60
trial_20 2
trial_3 60
trial_4 60
trial_5 60
trial_6 60
trial_7 60
trial_8 60


In [86]:
TODO[128:]

[('2e-02', '256', '128'),
 ('2e-02', '256', '256'),
 ('2e-02', '256', '512'),
 ('2e-02', '256', '1024'),
 ('2e-02', '512', '128'),
 ('2e-02', '512', '256'),
 ('2e-02', '512', '512'),
 ('2e-03', '32', '128'),
 ('2e-03', '32', '256'),
 ('2e-03', '32', '512'),
 ('2e-03', '32', '1024'),
 ('2e-03', '64', '128'),
 ('2e-03', '64', '256'),
 ('2e-03', '128', '128'),
 ('2e-03', '128', '256'),
 ('2e-03', '128', '512'),
 ('2e-03', '128', '1024'),
 ('2e-03', '256', '128'),
 ('2e-03', '512', '128'),
 ('2e-03', '512', '256'),
 ('2e-03', '512', '512'),
 ('2e-03', '512', '1024'),
 ('2e-01', '32', '128'),
 ('2e-01', '32', '256'),
 ('2e-01', '32', '512'),
 ('2e-01', '32', '1024'),
 ('2e-01', '64', '128'),
 ('2e-01', '64', '256'),
 ('2e-01', '64', '512'),
 ('2e-01', '64', '1024'),
 ('2e-01', '128', '128'),
 ('2e-01', '128', '256'),
 ('2e-01', '128', '512'),
 ('2e-01', '128', '1024'),
 ('2e-01', '256', '128'),
 ('2e-01', '256', '256'),
 ('2e-01', '256', '512'),
 ('2e-01', '256', '1024'),
 ('2e-01', '512', 

In [64]:
dirname = '/Users/sob/Desktop/hypertuning/multivariate/'
outdir = dirname
for idx, file in enumerate(os.listdir(dirname)):

    if '.DS_Store' in file:
        continue

    for nest in crawl_directory(dirname + file):

        index = 1

        if 'dims' not in nest.split('/')[6]:
            outdir = '/'.join(nest.split('/')[:6]) + '/'# + nest.split('/')[8:9]) + '/'
        else:
            # Uncomment the + for mixture
            outdir = dirname + nest.split('/')[6]
        
        if '64_dims_100000' in nest:
            print(nest)
            
#        # Initialize directory
#         if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
#             os.makedirs(outdir + 'trial_{0}/'.format(index))

#         try:
#             shutil.move(nest, outdir + 'trial_{0}/'.format(index))
#         except:
#             extension = nest.split('/')[-1]
#             while os.path.exists(outdir + 'trial_{0}/'.format(index) + extension):
#                 index += 1

#             if not os.path.exists(outdir + 'trial_{0}/'.format(index)):
#                 os.makedirs(outdir + 'trial_{0}/'.format(index))

#             shutil.move(nest, outdir + 'trial_{0}/'.format(index))

# remove_empty_dirs(dirname)

In [46]:
TODO = get_stats(dirname)

trial_1 60
trial_10 59
trial_11 58
trial_12 55
trial_13 54
trial_14 51
trial_15 49
trial_16 48
trial_17 47
trial_18 45
trial_19 43
trial_2 60
trial_20 40
trial_3 60
trial_4 60
trial_5 60
trial_6 60
trial_7 60
trial_8 60
trial_9 60


In [51]:
TODO[64:]

[('2e-02', '32', '128'),
 ('2e-02', '64', '128'),
 ('2e-02', '128', '128'),
 ('2e-02', '256', '128'),
 ('2e-02', '512', '128'),
 ('2e-03', '32', '128'),
 ('2e-03', '64', '128'),
 ('2e-03', '128', '128'),
 ('2e-03', '256', '128'),
 ('2e-03', '512', '128'),
 ('2e-01', '32', '128'),
 ('2e-01', '64', '128'),
 ('2e-01', '128', '128'),
 ('2e-01', '256', '128'),
 ('2e-01', '512', '128'),
 ('2e-02', '32', '128'),
 ('2e-02', '64', '128'),
 ('2e-02', '128', '128'),
 ('2e-02', '128', '1024'),
 ('2e-02', '256', '128'),
 ('2e-02', '512', '128'),
 ('2e-02', '512', '1024'),
 ('2e-03', '32', '128'),
 ('2e-03', '64', '128'),
 ('2e-03', '128', '128'),
 ('2e-03', '256', '128'),
 ('2e-03', '512', '128'),
 ('2e-01', '32', '128'),
 ('2e-01', '64', '128'),
 ('2e-01', '128', '128'),
 ('2e-01', '128', '256'),
 ('2e-01', '256', '128'),
 ('2e-01', '512', '128'),
 ('2e-01', '512', '512'),
 ('2e-02', '32', '128'),
 ('2e-02', '64', '128'),
 ('2e-02', '128', '128'),
 ('2e-02', '128', '1024'),
 ('2e-02', '256', '128'

In [44]:
TODO = get_stats('/Users/sob/Desktop/october/64_dims_100000_samples/')

trial_1 60
trial_10 59
trial_11 59
trial_12 57
trial_13 56
trial_14 51
trial_15 49
trial_16 45
trial_17 39
trial_18 25
trial_19 10
trial_2 60
trial_20 2
trial_21 1
trial_3 60
trial_4 60
trial_5 60
trial_6 60
trial_7 60
trial_8 60


In [32]:
""" MOVE HYPERTUNING RESULTS TO BEST FOLDER """
import tqdm
best_path = '/Users/sob/Desktop/gan_results/best/multivariate/64_dims_10000_samples/'
dirname = '/Users/sob/Desktop/gan_results/hypertuning/multivariate/64_dims_10000_samples/'
if not os.path.exists(best_path):
    os.makedirs(best_path)

files = os.listdir(dirname)
files = [f for f in files if f != '.DS_Store']
for idx, f in tqdm.tqdm_notebook(enumerate(files)):
    
    optimal = get_best_performance_mnist(dirname + f + '/')
    if len(os.listdir(dirname + f + '/')) < 60:
        print(f, len(os.listdir(dirname + f + '/')))
    
    with open(best_path + '/trial_{0}.json'.format(idx+1), 'w') as outfile:
        json.dump(optimal, outfile)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


