In [140]:
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt
import os

### TODO:
    1. Decreasing stepsize in FW (-1 for rho/gamma)
    2. Full dataset training (deterministic FW)

In [259]:
# SMALL setting
common = {'image_side': 10, 'giveup': 100, 'accuracy_threshold': 0.0, 'repetitions': 1}
param_groups = {
    'sgd':
    [{'optimizer': 'sgd', 'train_batch_size': 1000, 'learning_rate': 0.1, 'epochs': 100, **common}],
    
    'frankwolfe':
    [{'optimizer': 'frankwolfe', 'train_batch_size': 1000, 'p': 2.0, 'R': 100.0, 'gamma': 0.01, 'ro': 0.6,
     'epochs': 200, **common}],
    
    'adam':
    [{'optimizer': 'adam', 'train_batch_size': 1000, 'p': 2.0, 'learning_rate': 0.001, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-08,
     'epochs': 200, **common}],
}

In [352]:
# medium setting
common = {'image_side': 5, 'giveup': 100, 'accuracy_threshold': 0, 'p': 3.0, 'repetitions': 3}
param_groups = {
    'sgd':
    [{'optimizer': 'sgd', 'train_batch_size': 1000, 'learning_rate': lr, 'epochs': 100, **common}
     for lr in [0.1, 0.01, 0.001, 0.0001]],
    
    'frankwolfe':
    [{'optimizer': 'frankwolfe', 'train_batch_size': 1000, 'R': 20, 'gamma': gamma, 'ro': ro,
     'epochs': 100, **common} for gamma in [-1] + list(np.linspace(0, 1, 3))
     for ro in [-1] + list(np.linspace(0, 1, 3))],
    
    'adam':
    [{'optimizer': 'adam', 'train_batch_size': 1000, 'learning_rate': lr, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-08,
     'epochs': 100, **common} for lr in [0.1, 0.01, 0.001, 0.0001]]
}

In [353]:
parameters = [x for group in param_groups.values() for x in group]

In [354]:
def varying_for_optim(d):
    """ What changes for optimizer? """
    d0 = d[0]
    keys = set()
    for v in d:
        for key, val in v.items():
            if d0[key] != val:
                keys.add(key)
    return list(keys)

# group -> what changes
varying = {group: varying_for_optim(param_groups[group]) for group in param_groups.keys()}

print(varying)

{'sgd': ['learning_rate'], 'frankwolfe': ['gamma', 'ro'], 'adam': ['learning_rate']}


In [355]:
def print_nice(y):
    if isinstance(y, float):
        return str(round(y, 10))#'%.2g' % y
    return str(y)

In [356]:
def print_one(**kwargs):
    print('python experiment.py ' + " ".join(['--' + x + ' ' + print_nice(y) for x, y in kwargs.items()]) + ' &')

In [357]:
# arguments in the correct order

f = open('experiment.py', 'r').readlines()

args_in_order = []
for l in f:
    k = 'parser.add_argument(\'--'
    if l.startswith(k):
        args_in_order.append(l[len(k):].split('\'')[0])

In [358]:
def get_file(**kwargs):
    return ("_".join([x + '-' + print_nice(kwargs[x] if x in kwargs else None) for x in args_in_order])+'.output')

In [359]:
it = 0
for params in parameters:
    print_one(**params)
    if it % 2 == 0:
        print('sleep 5')
    it += 1
it = len(parameters)
print('Total runs: ', it)
print('Total time: ', common['repetitions'] * 3 * it / 4)

python experiment.py --optimizer sgd --train_batch_size 1000 --learning_rate 0.1 --epochs 100 --image_side 5 --giveup 100 --accuracy_threshold 0 --p 3.0 --repetitions 3 &
sleep 5
python experiment.py --optimizer sgd --train_batch_size 1000 --learning_rate 0.01 --epochs 100 --image_side 5 --giveup 100 --accuracy_threshold 0 --p 3.0 --repetitions 3 &
python experiment.py --optimizer sgd --train_batch_size 1000 --learning_rate 0.001 --epochs 100 --image_side 5 --giveup 100 --accuracy_threshold 0 --p 3.0 --repetitions 3 &
sleep 5
python experiment.py --optimizer sgd --train_batch_size 1000 --learning_rate 0.0001 --epochs 100 --image_side 5 --giveup 100 --accuracy_threshold 0 --p 3.0 --repetitions 3 &
python experiment.py --optimizer frankwolfe --train_batch_size 1000 --R 20 --gamma -1 --ro -1 --epochs 100 --image_side 5 --giveup 100 --accuracy_threshold 0 --p 3.0 --repetitions 3 &
sleep 5
python experiment.py --optimizer frankwolfe --train_batch_size 1000 --R 20 --gamma -1 --ro 0.0 --epoch

In [267]:
def arr_of_dicts_to_dict_of_arrays(arr):
    """ Array of dicts to dict of arrays """
    all_keys = arr[0].keys()
    return {key: [v[key] for v in arr] for key in all_keys}

In [274]:
def process_dict(d, do_plot = True, use_random = True):
    """ Process one dictionary from  file, return key metrics or plot them """
    d0 = d
    d = arr_of_dicts_to_dict_of_arrays(d)
    all_keys = d.keys()
    metrics = d
    #print(metrics)
    name = '0'
    
    if 
    
    results = {key: [] for key in all_keys}
    results['hessian_eigens_mean'] = []
    results['hessian_eigens_max'] = []
    del results['hessian_eigens']
    
    for i in range(len(d0)):
        for key, val in metrics.items():
            if key == 'hessian_eigens':
                eigens = val[i]
                results['hessian_eigens_mean'].append(np.mean(eigens))
                results['hessian_eigens_max'].append(np.max(eigens))
            elif isinstance(val[i], list):
                results[key].append(val[i][-1]) # appending LAST loss/accuracy
            else:
                results[key].append(val[i])
    
    if do_plot:
        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        
        ax1.set_xlabel('epochs')
        ax1.set_ylabel('loss', color='b')
        ax1.tick_params('y', colors='b')
        
        ax2.set_ylabel('accuracy', color='r')
        ax2.tick_params('y', colors='r')
        
        for i in range(len(d0)):
            ax1.plot(metrics['train_loss'][i], label = 'train_loss')
            ax1.plot(metrics['test_loss'][i], label = 'test_loss')

            ax2.plot(metrics['train_acc'][i], label = 'train_acc')
            ax2.plot(metrics['train_acc'][i], label = 'train_acc')

        fig.tight_layout()
        fig.legend()
        plt.savefig('figures/' + name + '.eps', bbox_inches = 'tight')
        plt.show()

    return results

In [275]:
def process_file(f, do_plot = False):
    """ Process one file """
    if not os.path.isfile(f):
        print('File ' + f + ' missing')
        return
    
    content = open(f, 'r').read()
    if content.startswith('Nothing['):
        print('File ' + f + " is empty")
        return
    d = eval(content)
    return process_dict(d, do_plot)
    #return d

In [307]:
# parameter id -> processed file
params_to_processed = [process_file(get_file(**param), do_plot = False) for param in parameters]

In [308]:
for param in parameters:
    print(param)
    fs = process_file(get_file(**param))
    print(fs)

{'optimizer': 'sgd', 'train_batch_size': 1000, 'learning_rate': 0.1, 'epochs': 100, 'image_side': 10, 'giveup': 100, 'accuracy_threshold': 0.0, 'repetitions': 1}
{'train_loss': [1.8829033], 'test_loss': [1.8760109], 'train_acc': [0.61038333], 'test_acc': [0.6179], 'hessian_eigens_mean': [0.000596617091062123], 'hessian_eigens_max': [0.6542341]}
{'optimizer': 'frankwolfe', 'train_batch_size': 1000, 'p': 2.0, 'R': 100.0, 'gamma': 0.01, 'ro': 0.6, 'epochs': 200, 'image_side': 10, 'giveup': 100, 'accuracy_threshold': 0.0, 'repetitions': 1}
{'train_loss': [1.8721464], 'test_loss': [1.8653136], 'train_acc': [0.61845], 'test_acc': [0.6254], 'hessian_eigens_mean': [0.0003984750364007894], 'hessian_eigens_max': [0.4615565]}
{'optimizer': 'adam', 'train_batch_size': 1000, 'p': 2.0, 'learning_rate': 0.001, 'beta1': 0.9, 'beta2': 0.999, 'epsilon': 1e-08, 'epochs': 200, 'image_side': 10, 'giveup': 100, 'accuracy_threshold': 0.0, 'repetitions': 1}
{'train_loss': [1.4957184], 'test_loss': [1.5014652]

In [309]:
params_to_processed

[{'train_loss': [1.8829033],
  'test_loss': [1.8760109],
  'train_acc': [0.61038333],
  'test_acc': [0.6179],
  'hessian_eigens_mean': [0.000596617091062123],
  'hessian_eigens_max': [0.6542341]},
 {'train_loss': [1.8721464],
  'test_loss': [1.8653136],
  'train_acc': [0.61845],
  'test_acc': [0.6254],
  'hessian_eigens_mean': [0.0003984750364007894],
  'hessian_eigens_max': [0.4615565]},
 {'train_loss': [1.4957184],
  'test_loss': [1.5014652],
  'train_acc': [0.9698167],
  'test_acc': [0.963],
  'p_norm': [101.4558],
  'hessian_eigens_mean': [0.0006790847303972931],
  'hessian_eigens_max': [0.8046694]}]

In [310]:
def selection_metric(summary):
    """ Summary of one element in params_to_processed[], a number
    Using mean test accuracy over runs
    """
    return np.mean(summary['test_acc'])

In [318]:
def param_to_result(param):
    """ Parameter setting to results summary """
    idx = parameters.index(param)
    return params_to_processed[idx]

In [319]:
def select_best(optim):
    """ Select best parameters for an optimizer """
    
    metrics = [selection_metric(param_to_result(p)) for p in param_groups[optim]]
    best_idx = np.argmax(metrics)
    return parameters.index(param_groups[optim][best_idx])

In [322]:
select_best('adam')

2