In [16]:
# Config sampler for PPO hyperparameter tuning
%load_ext autoreload
%autoreload 2

import copy
import json
import os
import random
from tqdm import tqdm
import yaml

import numpy as np
from scipy.stats import loguniform

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
CONFIG_DIR = '/global/scratch/open_spiel/open_spiel/notebooks/configs'

def sample_config(base_config):
    config = copy.deepcopy(base_config)

    config['steps_per_batch'] = int(np.random.choice([64, 128]))
    config['num_minibatches'] = int(np.random.choice([4, 8]))
    config['update_epochs'] = int(np.random.choice([4, 8, 16]))
    config['learning_rate'] = float(np.random.choice([3e-5, 6e-6, 9e-5, 1e-4, 3e-4]))
    config['gae'] = bool(np.random.choice([True, False]))
    # x['gamma'] = float(np.random.uniform(0.8, 1.)) # WE DO NOT WANT THIS
    config['anneal_lr'] = bool(np.random.choice([True, False]))
    config['gae_lambda'] = float(np.random.uniform(0.94, 1.))
    config['clip_coef'] = float(loguniform.rvs(0.0003, .3, size=1)[0])
    config['clip_vloss'] = bool(np.random.choice([True, False]))
    config['entropy_coef'] = float(np.random.choice([1e-4, 1e-5, 1e-6, 0, 3e-5, 3e-6]))
    config['value_coef'] = float(loguniform.rvs(0.1, 1.3, size=1)[0])
    config['num_envs'] = int(np.random.choice([4, 8, 16]))
    config['normalize_advantages'] = bool(np.random.choice([True, False]))
    
    config['optimizer'] = random.choice(['adam', 'rmsprop', 'sgd'])
    optimizer_kwargs = dict()
    if config['optimizer'] == 'adam':
        beta1 = float(np.random.uniform(0.8, .9))
        beta2 = float(np.random.uniform(0.8, .999))
        optimizer_kwargs['betas'] = [beta1, beta2]
    elif config['optimizer'] == 'sgd':
        pass
    elif config['optimizer'] == 'rmsprop':
        pass
    else:
        raise ValueError()
    
    config['optimizer_kwargs'] = optimizer_kwargs
    config['max_grad_norm'] = float(np.random.uniform(0.1, 1)) 
      
    agent_kwargs = dict()
    agent_fn = base_config['agent_fn'].lower()
    if agent_fn == 'auctionnet':
        agent_kwargs['activation'] = str(np.random.choice(['relu', 'tanh']))
        agent_kwargs['hidden_sizes'] = random.choice([
            [32, 32, 32], [64, 64, 64], [128, 128, 128],
        ])
        agent_kwargs['add_skip_connections'] = bool(np.random.choice([True, False]))
        agent_kwargs['use_torso'] = bool(np.random.choice([True, False]))
    elif agent_fn == 'ppoagent': # MLP
        hidden_sizes = random.choice([
            [64, 64], [128, 128], [256, 256], 
            [64, 64, 64], [128, 128, 128], [256, 256, 256],
        ])
        activation = str(np.random.choice(['relu', 'tanh']))
        agent_kwargs['actor_hidden_sizes'] = hidden_sizes
        agent_kwargs['critic_hidden_sizes'] = hidden_sizes
        agent_kwargs['actor_activation'] = activation
        agent_kwargs['critic_activation'] = activation
    else:
        raise ValueError(f'Unrecognized agent_fn {agent_fn}')

    config['agent_fn_kwargs'] = agent_kwargs

    return config

def sample_configs(config_subdir, base_config, n_configs, seed=1234, config_dir=CONFIG_DIR, overwrite=False):
    if os.path.exists(f'{config_dir}/{config_subdir}') and not overwrite:
        raise ValueError(f'Config subdir {config_subdir} already exists')
    os.makedirs(f'{config_dir}/{config_subdir}', exist_ok=True)

    random.seed(seed)
    np.random.seed(seed)

    print(f'Writing {n_configs} configs to {config_dir}/{config_subdir}...')
    for i in tqdm(range(n_configs)):
        config = sample_config(base_config)
        with open(f'{config_dir}/{config_subdir}/ppo_{i}.yml', 'w') as f:
            yaml.dump(config, f)
    print(f'All done, goodbye!')

# Feb10: PPO tuning

In [10]:
base_auction_net = yaml.safe_load(open('configs/ppo_may31_23/ppo_76.yml'))
sample_configs('feb10_ppo_auctionnet', base_auction_net, 30, seed=1234)


In [18]:
base_mlp = yaml.safe_load(open('configs/ppo_may31_23/ppo_76.yml'))
base_mlp['agent_fn'] = 'PPOAgent'
sample_configs('feb10_ppo_mlp', base_mlp, 30, seed=1234, overwrite=True)

Writing 30 configs to /global/scratch/open_spiel/open_spiel/notebooks/configs/feb10_ppo_mlp...


  0%|          | 0/30 [00:00<?, ?it/s]

100%|██████████| 30/30 [00:00<00:00, 126.74it/s]
