In [1]:
# Config sampler for PPO hyperparameter tuning
%load_ext autoreload
%autoreload 2

import copy
import yaml
import json
import random
import numpy as np
from scipy.stats import loguniform

In [2]:
CONFIG_DIR = '/global/scratch/open_spiel/open_spiel/notebooks/configs'

def sample_config(base_config):
    config = copy.deepcopy(base_config)

    config['steps_per_batch'] = int(np.random.choice([64, 128]))
    config['num_minibatches'] = int(np.random.choice([4, 8]))
    config['update_epochs'] = int(np.random.choice([4, 8, 16]))
    config['learning_rate'] = float(np.random.choice([3e-5, 6e-6, 9e-5, 1e-4, 3e-4]))
    config['gae'] = bool(np.random.choice([True, False]))
    # x['gamma'] = float(np.random.uniform(0.8, 1.)) # WE DO NOT WANT THIS
    config['anneal_lr'] = bool(np.random.choice([True, False]))
    config['gae_lambda'] = float(np.random.uniform(0.94, 1.))
    config['clip_coef'] = float(loguniform.rvs(0.0003, .3, size=1)[0])
    config['clip_vloss'] = bool(np.random.choice([True, False]))
    config['entropy_coef'] = float(np.random.choice([1e-4, 1e-5, 1e-6, 0, 3e-5, 3e-6]))
    config['value_coef'] = float(loguniform.rvs(0.1, 1.3, size=1)[0])
    config['num_envs'] = int(np.random.choice([4, 8, 16]))
    config['normalize_advantages'] = bool(np.random.choice([True, False]))
    
    config['optimizer'] = random.choice(['adam', 'rmsprop', 'sgd'])
    optimizer_kwargs = dict()
    if config['optimizer'] == 'adam':
        beta1 = float(np.random.uniform(0.8, .9))
        beta2 = float(np.random.uniform(0.8, .999))
        optimizer_kwargs['betas'] = [beta1, beta2]
    elif config['optimizer'] == 'sgd':
        pass
    elif config['optimizer'] == 'rmsprop':
        pass
    else:
        raise ValueError()
    
    config['optimizer_kwargs'] = optimizer_kwargs
    config['max_grad_norm'] = float(np.random.uniform(0.1, 1)) 
      
    agent_kwargs = dict()
    agent_kwargs['activation'] = str(np.random.choice(['relu', 'tanh']))
    agent_kwargs['hidden_sizes'] = random.choice([
        [32, 32, 32], [64, 64, 64], [128, 128, 128],
    ])
    agent_kwargs['add_skip_connections'] = bool(np.random.choice([True, False]))
    agent_kwargs['use_torso'] = bool(np.random.choice([True, False]))
    config['agent_fn_kwargs'] = agent_kwargs

    return config

In [None]:
### CONFIG GENERATOR
base = yaml.safe_load(open('configs/ppo_may31_23/ppo_76.yml'))


In [None]:
N_CONFIGS = 50

configs = []

for _ in range(N_CONFIGS):
    config = copy.deepcopy(base)
    
    config['steps_per_batch'] = int(np.random.choice([64, 128]))
    config['num_minibatches'] = int(np.random.choice([4, 8]))
    config['update_epochs'] = int(np.random.choice([4, 8, 16]))
    config['learning_rate'] = float(np.random.choice([3e-5, 6e-6, 9e-5, 1e-4, 3e-4]))
    config['gae'] = bool(np.random.choice([True, False]))
    # x['gamma'] = float(np.random.uniform(0.8, 1.)) # WE DO NOT WANT THIS
    config['anneal_lr'] = bool(np.random.choice([True, False]))
    config['gae_lambda'] = float(np.random.uniform(0.94, 1.))
    config['clip_coef'] = float(loguniform.rvs(0.0003, .3, size=1)[0])
    config['clip_vloss'] = bool(np.random.choice([True, False]))
    config['entropy_coef'] = float(np.random.choice([1e-4, 1e-5, 1e-6, 0, 3e-5, 3e-6]))
    config['value_coef'] = float(loguniform.rvs(0.1, 1.3, size=1)[0])
    config['num_envs'] = int(np.random.choice([4, 8, 16]))
    config['normalize_advantages'] = bool(np.random.choice([True, False]))
    
    config['optimizer'] = random.choice(['adam', 'rmsprop', 'sgd'])
    optimizer_kwargs = dict()
    if config['optimizer'] == 'adam':
        beta1 = float(np.random.uniform(0.8, .9))
        beta2 = float(np.random.uniform(0.8, .999))
        optimizer_kwargs['betas'] = [beta1, beta2]
    elif config['optimizer'] == 'sgd':
        pass
    elif config['optimizer'] == 'rmsprop':
        pass
    else:
        raise ValueError()
    
    config['optimizer_kwargs'] = optimizer_kwargs
    config['max_grad_norm'] = float(np.random.uniform(0.1, 1)) 
      
    agent_kwargs = dict()
    agent_kwargs['activation'] = str(np.random.choice(['relu', 'tanh']))
    agent_kwargs['hidden_sizes'] = random.choice([
        [32, 32, 32], [64, 64, 64], [128, 128, 128],
    ])
    agent_kwargs['add_skip_connections'] = bool(np.random.choice([True, False]))
    agent_kwargs['use_torso'] = bool(np.random.choice([True, False]))
    
    config['agent_fn_kwargs'] = agent_kwargs
    
    configs.append(config) 

def sample_configs(config_subdir, base_config, n_configs, seed=1234, config_dir=CONFIG_DIR):
    random.seed(seed)
    np.random.seed(seed)
    configs = [sample_config(base_config) for _ in range(n_configs)]

    for i, config in enumerate(configs):
        with open(f'{config_dir}/{config_subdir}/config_{i}.yml', 'w') as f:
            yaml.dump(config, f)    

In [None]:
new_date = 'jun15_23'

!mkdir -p configs/{new_date}
for i, config in enumerate(configs):
    yaml.dump(config, open(f'configs/{new_date}/ppo_{i}.yml', 'w'))
    