In [2]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

from pathlib import Path
import shutil
import itertools

# Baseline BART Environment

Only active trials, both start/stop or hold, number of colors used

### Basic Experiment

In [4]:
colors = 3
num_trials = 3

rows = []

for t in range(num_trials):
    exp_name = f'3color_test_t{t}'
    env_kwargs = {
        'colors_used': colors,
    }
    row = env_kwargs.copy()
    seed = t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 300000,
        'env_name': 'BartEnv-v0',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'save_dir': 'basics',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'FlexBaseAux',
        'nn_base_kwargs': {'hidden_size': 8, 'recurrent': True},
    }

    
    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    
    if 'recurrent' in config:
        row['recurrent'] = True
    else:
        row['recurrent'] = False
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,seed,batch_size,processes,mini_batch,total_steps,recurrent
0,3,3color_test_t0,0,128,16,16,300000,True
1,3,3color_test_t1,1,128,16,16,300000,True
2,3,3color_test_t2,2,128,16,16,300000,True


## Fundamental Hyperparams

### Batch Size

In [17]:
batch_sizes = [16, 32, 64, 128, 256]
colors = [1, 3]
num_trials = 3
rows = []

for i, (batch, color) in enumerate(itertools.product(batch_sizes, colors)):
    for t in range(num_trials):
        exp_name = f'{color}color_mlp_bs{batch}_t{t}'
    
        env_kwargs = {'colors_used': color}
        row = env_kwargs.copy()
        seed = t + i*10
        row['name'] = exp_name
        row['batch_size'] = batch
        row['seed'] = seed
    
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 8,
            'num_processes': 2,
            # 'num_mini_batch': 1,
            # 'num_processes': 1,
            'num_steps': batch,
    
            'checkpoint_interval': 10,
            'save_dir': 'basics',
    
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
    
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }
    
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,batch_size,seed
0,1,1color_mlp_bs16_t0,16,0
1,1,1color_mlp_bs16_t1,16,1
2,1,1color_mlp_bs16_t2,16,2
3,3,3color_mlp_bs16_t0,16,10
4,3,3color_mlp_bs16_t1,16,11
5,3,3color_mlp_bs16_t2,16,12
6,1,1color_mlp_bs32_t0,32,20
7,1,1color_mlp_bs32_t1,32,21
8,1,1color_mlp_bs32_t2,32,22
9,3,3color_mlp_bs32_t0,32,30


### Network Size

In [19]:
network_sizes = [2, 4, 8, 16, 32, 64]
colors = [1, 3]
batch = 64
num_trials = 3
rows = []

for i, (network, color) in enumerate(itertools.product(network_sizes, colors)):
    for t in range(num_trials):
        exp_name = f'{color}color_mlp_ns{network}_t{t}'
    
        env_kwargs = {'colors_used': color}
        row = env_kwargs.copy()
        seed = t + i*10
        row['name'] = exp_name
        row['batch_size'] = batch
        row['seed'] = seed
    
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 8,
            'num_processes': 2,
            # 'num_mini_batch': 1,
            # 'num_processes': 1,
            'num_steps': batch,
    
            'checkpoint_interval': 10,
            'save_dir': 'basics',
    
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
    
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
    
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,batch_size,seed
0,1,1color_mlp_ns2_t0,64,0
1,1,1color_mlp_ns2_t1,64,1
2,1,1color_mlp_ns2_t2,64,2
3,3,3color_mlp_ns2_t0,64,10
4,3,3color_mlp_ns2_t1,64,11
5,3,3color_mlp_ns2_t2,64,12
6,1,1color_mlp_ns4_t0,64,20
7,1,1color_mlp_ns4_t1,64,21
8,1,1color_mlp_ns4_t2,64,22
9,3,3color_mlp_ns4_t0,64,30


### Number of processes

In [None]:
num_processes = [2, 4, 8, 16, 32, 64]
network = 64
colors = [1, 3]
batch = 64
num_trials = 3
rows = []

for i, (processes, color) in enumerate(itertools.product(num_processes, colors)):
    for t in range(num_trials):
        exp_name = f'{color}color_mlp_nump{processes}_t{t}'
    
        env_kwargs = {'colors_used': color}
        row = env_kwargs.copy()
        seed = t + i*10
        row['name'] = exp_name
        row['batch_size'] = batch
        row['seed'] = seed
    
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 8,
            'num_processes': processes,
            # 'num_mini_batch': 1,
            # 'num_processes': 1,
            'num_steps': batch,
    
            'checkpoint_interval': 10,
            'save_dir': 'basics',
    
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
    
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
    
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,batch_size,seed
0,1,1color_mlp_nump2_t0,64,0
1,1,1color_mlp_nump2_t1,64,1
2,1,1color_mlp_nump2_t2,64,2
3,3,3color_mlp_nump2_t0,64,10
4,3,3color_mlp_nump2_t1,64,11
5,3,3color_mlp_nump2_t2,64,12
6,1,1color_mlp_nump4_t0,64,20
7,1,1color_mlp_nump4_t1,64,21
8,1,1color_mlp_nump4_t2,64,22
9,3,3color_mlp_nump4_t0,64,30


Testing even more processes to see if it scales up

In [5]:
num_processes = [128, 256, 512]
network = 64
colors = [3]
batch = 64
num_trials = 3
rows = []

for i, (processes, color) in enumerate(itertools.product(num_processes, colors)):
    for t in range(num_trials):
        exp_name = f'{color}color_mlp_nump{processes}_t{t}'
    
        env_kwargs = {'colors_used': color}
        row = env_kwargs.copy()
        seed = t + i*10
        row['name'] = exp_name
        row['batch_size'] = batch
        row['seed'] = seed
    
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 8,
            'num_processes': processes,
            # 'num_mini_batch': 1,
            # 'num_processes': 1,
            'num_steps': batch,
    
            'checkpoint_interval': 10,
            'save_dir': 'basics',
    
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
    
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
    
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,batch_size,seed
0,3,3color_mlp_nump128_t0,64,0
1,3,3color_mlp_nump128_t1,64,1
2,3,3color_mlp_nump128_t2,64,2
3,3,3color_mlp_nump256_t0,64,10
4,3,3color_mlp_nump256_t1,64,11
5,3,3color_mlp_nump256_t2,64,12
6,3,3color_mlp_nump512_t0,64,20
7,3,3color_mlp_nump512_t1,64,21
8,3,3color_mlp_nump512_t2,64,22


## num_mini_batch

In [6]:
processes = 64
network = 16
color = 3
batch = 64
num_trials = 3
num_mini_batches = [4, 8, 16, 32]
rows = []

for i, num_mini_batch in enumerate(num_mini_batches):
    for t in range(num_trials):
        exp_name = f'{color}color_mlp_minib{num_mini_batch}_t{t}'
    
        env_kwargs = {'colors_used': color}
        row = env_kwargs.copy()
        seed = t + i*10
        row['name'] = exp_name
        row['batch_size'] = batch
        row['seed'] = seed
    
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': num_mini_batch,
            'num_processes': processes,
            'num_steps': batch,
    
            'checkpoint_interval': 10,
            'save_dir': 'basics',
    
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
    
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
    
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,name,batch_size,seed
0,3,3color_mlp_minib4_t0,64,0
1,3,3color_mlp_minib4_t1,64,1
2,3,3color_mlp_minib4_t2,64,2
3,3,3color_mlp_minib8_t0,64,10
4,3,3color_mlp_minib8_t1,64,11
5,3,3color_mlp_minib8_t2,64,12
6,3,3color_mlp_minib16_t0,64,20
7,3,3color_mlp_minib16_t1,64,21
8,3,3color_mlp_minib16_t2,64,22
9,3,3color_mlp_minib32_t0,64,30


# Recurrent Network

In [11]:
network = 16
colors = 3
batch = 64
num_trials = 3
rows = []

network_types = ['rnn', 'mlp']
recurrents = [True, False]

for i, network_type in enumerate(network_types):
    for t in range(num_trials):
        exp_name = f'memory_{network_type}_t{t}'

        env_kwargs = {'colors_used': colors,
                    'give_size': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 8,
            'num_processes': 64,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'rnn',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        
        if recurrents[i]:
            config['nn_base_kwargs']['recurrent'] = True
            row['recurrent'] = True

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,give_size,name,batch_size,seed,processes,mini_batch,recurrent
0,3,False,memory_rnn_t0,64,0,64,8,True
1,3,False,memory_rnn_t1,64,1,64,8,True
2,3,False,memory_rnn_t2,64,2,64,8,True
3,3,False,memory_mlp_t0,64,0,64,8,
4,3,False,memory_mlp_t1,64,1,64,8,
5,3,False,memory_mlp_t2,64,2,64,8,


In [3]:
network_sizes = [2, 4, 8, 16]
colors = 3
batch = 256
num_trials = 3
rows = []

for i, network_size in enumerate(network_sizes):
    for t in range(num_trials):
        exp_name = f'memory_ns{network_size}_t{t}'

        env_kwargs = {'colors_used': colors,
                    'give_size': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'rnn',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network_size,
            'recurrent': True},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['recurrent'] = config['recurrent']
        row['network_size'] = config['nn_base_kwargs']['hidden_size']
        

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,give_size,name,batch_size,seed,processes,mini_batch,recurrent,network_size
0,3,False,memory_ns2_t0,256,0,16,16,True,2
1,3,False,memory_ns2_t1,256,1,16,16,True,2
2,3,False,memory_ns2_t2,256,2,16,16,True,2
3,3,False,memory_ns4_t0,256,0,16,16,True,4
4,3,False,memory_ns4_t1,256,1,16,16,True,4
5,3,False,memory_ns4_t2,256,2,16,16,True,4
6,3,False,memory_ns8_t0,256,0,16,16,True,8
7,3,False,memory_ns8_t1,256,1,16,16,True,8
8,3,False,memory_ns8_t2,256,2,16,16,True,8
9,3,False,memory_ns16_t0,256,0,16,16,True,16


In [None]:
network_sizes = [2, 4, 8, 16]
colors = 3
batch = 256
num_trials = 3
rows = []

for i, network_size in enumerate(network_sizes):
    for t in range(num_trials):
        exp_name = f'memory_ns{network_size}_t{t}'

        env_kwargs = {'colors_used': colors,
                    'give_size': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'rnn',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network_size,
            'recurrent': True},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['recurrent'] = config['recurrent']
        row['network_size'] = config['nn_base_kwargs']['hidden_size']
        

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,give_size,name,batch_size,seed,processes,mini_batch,recurrent,network_size
0,3,False,memory_ns2_t0,256,0,16,16,True,2
1,3,False,memory_ns2_t1,256,1,16,16,True,2
2,3,False,memory_ns2_t2,256,2,16,16,True,2
3,3,False,memory_ns4_t0,256,0,16,16,True,4
4,3,False,memory_ns4_t1,256,1,16,16,True,4
5,3,False,memory_ns4_t2,256,2,16,16,True,4
6,3,False,memory_ns8_t0,256,0,16,16,True,8
7,3,False,memory_ns8_t1,256,1,16,16,True,8
8,3,False,memory_ns8_t2,256,2,16,16,True,8
9,3,False,memory_ns16_t0,256,0,16,16,True,16


In [None]:
network_sizes = [2, 4, 8, 16]
colors = 3
batch = 256
num_trials = 3
rows = []

for i, network_size in enumerate(network_sizes):
    for t in range(num_trials):
        exp_name = f'memory_ns{network_size}_t{t}'

        env_kwargs = {'colors_used': colors,
                    'give_size': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'rnn',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network_size,
                               'recurrent': True},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['recurrent'] = config['recurrent']
        row['network_size'] = config['nn_base_kwargs']['hidden_size']
        

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,give_size,name,batch_size,seed,processes,mini_batch,recurrent,network_size
0,3,False,memory_ns2_t0,256,0,16,16,True,2
1,3,False,memory_ns2_t1,256,1,16,16,True,2
2,3,False,memory_ns2_t2,256,2,16,16,True,2
3,3,False,memory_ns4_t0,256,0,16,16,True,4
4,3,False,memory_ns4_t1,256,1,16,16,True,4
5,3,False,memory_ns4_t2,256,2,16,16,True,4
6,3,False,memory_ns8_t0,256,0,16,16,True,8
7,3,False,memory_ns8_t1,256,1,16,16,True,8
8,3,False,memory_ns8_t2,256,2,16,16,True,8
9,3,False,memory_ns16_t0,256,0,16,16,True,16


# Toggle task

Testing the task where the agent has to toggle the inflation on or off

In [5]:
network = 8
colors = 3
batch = 256
num_trials = 3
rows = []

network_types = ['rnn', 'mlp']

for i, network_type in enumerate(network_types):
    for t in range(num_trials):
        exp_name = f'toggle_{network_type}_t{t}'

        env_kwargs = {'colors_used': colors,
                      'hold_to_inflate': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'toggle',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        
        if network_type == 'rnn':
            config['nn_base_kwargs']['recurrent'] = True
            row['recurrent'] = True
        
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,hold_to_inflate,name,batch_size,seed,processes,mini_batch,total_steps,recurrent
0,3,False,toggle_rnn_t0,256,0,16,16,300000,True
1,3,False,toggle_rnn_t1,256,1,16,16,300000,True
2,3,False,toggle_rnn_t2,256,2,16,16,300000,True
3,3,False,toggle_mlp_t0,256,0,16,16,300000,
4,3,False,toggle_mlp_t1,256,1,16,16,300000,
5,3,False,toggle_mlp_t2,256,2,16,16,300000,


In [7]:
network = 8
colors = 3
batch = 256
num_trials = 3
rows = []

network_types = ['rnn', 'mlp']

for i, network_type in enumerate(network_types):
    for t in range(num_trials):
        exp_name = f'3color_{network_type}_t{t}'

        env_kwargs = {'colors_used': colors,
                      'hold_to_inflate': True}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'toggle',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        
        if network_type == 'rnn':
            config['nn_base_kwargs']['recurrent'] = True
            row['recurrent'] = True
        
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,hold_to_inflate,name,batch_size,seed,processes,mini_batch,total_steps,recurrent
0,3,True,3color_rnn_t0,256,0,16,16,300000,True
1,3,True,3color_rnn_t1,256,1,16,16,300000,True
2,3,True,3color_rnn_t2,256,2,16,16,300000,True
3,3,True,3color_mlp_t0,256,0,16,16,300000,
4,3,True,3color_mlp_t1,256,1,16,16,300000,
5,3,True,3color_mlp_t2,256,2,16,16,300000,


## Toggle with memory

In [8]:
network = 8
colors = 3
batch = 256
num_trials = 3
rows = []

network_types = ['rnn', 'mlp']

for i, network_type in enumerate(network_types):
    for t in range(num_trials):
        exp_name = f'togglemem_{network_type}_t{t}'

        env_kwargs = {'colors_used': colors,
                      'hold_to_inflate': False,
                      'give_size': False}
        row = env_kwargs.copy()
        seed = t
        
        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'toggle',

            'subproc_vec': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': network},
        }
        
        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        
        if network_type == 'rnn':
            config['nn_base_kwargs']['recurrent'] = True
            row['recurrent'] = True
        
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,colors_used,hold_to_inflate,give_size,name,batch_size,seed,processes,mini_batch,total_steps,recurrent
0,3,False,False,togglemem_rnn_t0,256,0,16,16,300000,True
1,3,False,False,togglemem_rnn_t1,256,1,16,16,300000,True
2,3,False,False,togglemem_rnn_t2,256,2,16,16,300000,True
3,3,False,False,togglemem_mlp_t0,256,0,16,16,300000,
4,3,False,False,togglemem_mlp_t1,256,1,16,16,300000,
5,3,False,False,togglemem_mlp_t2,256,2,16,16,300000,


# Behavior comparisons

These are similar to tasks we ran before, but now recording per episode data for plotting


In [10]:

colors = 3
num_trials = 3

variations = [
    {'key': '', 'config': {}},
    {'key': 'ns2_', 'config': {'nn_base_kwargs': {'hidden_size': 2,
                                                  'recurrent': True}}},
    {'key': 'batch16_', 'config': {'num_steps': 16}},
    {'key': 'mlp_', 'config': {'nn_base_kwargs': {'hidden_size': 8,
                                                  'recurrent': False}}}
]

rows = []

for i, variation in enumerate(variations):
    for t in range(num_trials):
        exp_name = f'behav_{variation["key"]}t{t}'
        env_kwargs = {}
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 300000,
            'env_name': 'BartEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': 128,

            'checkpoint_interval': 10,
            'save_dir': 'behav',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 8,
                                'recurrent': True},
        }

        for k, v in variation['config'].items():
            config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,name,seed,batch_size,processes,mini_batch,total_steps,recurrent
0,behav_t0,0,128,16,16,300000,True
1,behav_t1,1,128,16,16,300000,True
2,behav_t2,2,128,16,16,300000,True
3,behav_ns2_t0,10,128,16,16,300000,True
4,behav_ns2_t1,11,128,16,16,300000,True
5,behav_ns2_t2,12,128,16,16,300000,True
6,behav_batch16_t0,20,16,16,16,300000,True
7,behav_batch16_t1,21,16,16,16,300000,True
8,behav_batch16_t2,22,16,16,16,300000,True
9,behav_mlp_t0,30,128,16,16,300000,False


# DelayedRNNPPO

In [7]:
colors = 3
num_trials = 3
rows = []

for t in range(num_trials):
    exp_name = f'behav_delayedrnn_t{t}'
    env_kwargs = {}
    row = env_kwargs.copy()
    seed = 1*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 200000,
        'env_name': 'BartEnv-v0',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'ep_verbosity': 100,
        'save_dir': 'behav',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 8,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,name,seed,batch_size,processes,mini_batch,total_steps
0,behav_delayedrnn_t0,10,128,16,16,200000
1,behav_delayedrnn_t1,11,128,16,16,200000
2,behav_delayedrnn_t2,12,128,16,16,200000


# Meta BART

In [9]:
colors = 3
num_trials = 3
rows = []

for t in range(num_trials):
    exp_name = f'meta_t{t}'
    env_kwargs = {}
    row = env_kwargs.copy()
    seed = 1*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 500000,
        'env_name': 'BartMetaEnv',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'ep_verbosity': 1,
        'save_dir': 'meta',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 32,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,name,seed,batch_size,processes,mini_batch,total_steps
0,meta_t0,10,128,16,16,500000
1,meta_t1,11,128,16,16,500000
2,meta_t2,12,128,16,16,500000


## MetaSetup 1: non-ordered balloon mean sizes

In [10]:
colors = 3
num_trials = 3
rows = []

for t in range(num_trials):
    exp_name = f'meta_uni_t{t}'
    env_kwargs = {'meta_setup': 1}
    row = env_kwargs.copy()
    seed = 1*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 500000,
        'env_name': 'BartMetaEnv',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'ep_verbosity': 1,
        'save_dir': 'meta',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 64,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,name,seed,batch_size,processes,mini_batch,total_steps
0,1,meta_uni_t0,10,128,16,16,500000
1,1,meta_uni_t1,11,128,16,16,500000
2,1,meta_uni_t2,12,128,16,16,500000


## 1 Color

In [3]:
colors = 1
num_trials = 3
rows = []

for t in range(num_trials):
    exp_name = f'meta_1c_t{t}'
    env_kwargs = {'meta_setup': 1}
    row = env_kwargs.copy()
    seed = 1*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 500000,
        'env_name': 'BartMetaEnv',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'ep_verbosity': 1,
        'save_dir': 'meta',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 64,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,name,seed,batch_size,processes,mini_batch,total_steps
0,1,meta_1c_t0,10,128,16,16,500000
1,1,meta_1c_t1,11,128,16,16,500000
2,1,meta_1c_t2,12,128,16,16,500000


In [4]:
colors = 1
num_trials = 3
rows = []

for t in range(num_trials):
    exp_name = f'meta_1c1p_t{t}'
    env_kwargs = {'meta_setup': 1}
    row = env_kwargs.copy()
    seed = 1*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 500000,
        'env_name': 'BartMetaEnv',
        'algo': 'ppo',
        'num_mini_batch': 1,
        'num_processes': 1,
        'num_steps': 4000,

        'checkpoint_interval': 10,
        'ep_verbosity': 1,
        'save_dir': 'meta',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 64,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,name,seed,batch_size,processes,mini_batch,total_steps
0,1,meta_1c1p_t0,10,4000,1,1,500000
1,1,meta_1c1p_t1,11,4000,1,1,500000
2,1,meta_1c1p_t2,12,4000,1,1,500000


## Easier version of meta bart

The earlier versions looked like they may not train the correct things. This variation tests

* Give points throughout inflation
* Give points at a scaling rate of 2x^1.3
* Remove randomization of inflate speed and size

In [8]:
num_trials = 3

rew_structures = [0, 1, 2, 3]
rows = []

for rew_struct in rew_structures:
    for t in range(num_trials):
        exp_name = f'meta_easyr{rew_struct}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'inflate_noise': 0, 'pop_noise': 0,
                    'rew_structure': rew_struct, 'max_steps': 1000}
        row = env_kwargs.copy()
        seed = rew_struct*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 500000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': 128,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,inflate_noise,pop_noise,rew_structure,max_steps,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,0,0,1000,meta_easyr0_0,0,128,16,16,500000
1,1,1,0,0,0,1000,meta_easyr0_1,1,128,16,16,500000
2,1,1,0,0,0,1000,meta_easyr0_2,2,128,16,16,500000
3,1,1,0,0,1,1000,meta_easyr1_0,10,128,16,16,500000
4,1,1,0,0,1,1000,meta_easyr1_1,11,128,16,16,500000
5,1,1,0,0,1,1000,meta_easyr1_2,12,128,16,16,500000
6,1,1,0,0,2,1000,meta_easyr2_0,20,128,16,16,500000
7,1,1,0,0,2,1000,meta_easyr2_1,21,128,16,16,500000
8,1,1,0,0,2,1000,meta_easyr2_2,22,128,16,16,500000
9,1,1,0,0,3,1000,meta_easyr3_0,30,128,16,16,500000


In [10]:
num_trials = 3

rew_structures = [4, 5]
rows = []

for rew_struct in rew_structures:
    for t in range(num_trials):
        exp_name = f'meta_easyr{rew_struct}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'inflate_noise': 0, 'pop_noise': 0,
                    'rew_structure': rew_struct, 'max_steps': 1000}
        row = env_kwargs.copy()
        seed = rew_struct*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': 128,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,inflate_noise,pop_noise,rew_structure,max_steps,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,0,4,1000,meta_easyr4_0,40,128,16,16,1000000
1,1,1,0,0,4,1000,meta_easyr4_1,41,128,16,16,1000000
2,1,1,0,0,4,1000,meta_easyr4_2,42,128,16,16,1000000
3,1,1,0,0,5,1000,meta_easyr5_0,50,128,16,16,1000000
4,1,1,0,0,5,1000,meta_easyr5_1,51,128,16,16,1000000
5,1,1,0,0,5,1000,meta_easyr5_2,52,128,16,16,1000000


In [11]:
num_trials = 3

batch_sizes = [256, 512]
rows = []

for i, batch_size in enumerate(batch_sizes):
    for t in range(num_trials):
        exp_name = f'meta_easyr4b{batch_size}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'inflate_noise': 0, 'pop_noise': 0,
                    'rew_structure': 4, 'max_steps': 1000}
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': batch_size,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,inflate_noise,pop_noise,rew_structure,max_steps,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,0,4,1000,meta_easyr4b256_0,0,256,16,16,1000000
1,1,1,0,0,4,1000,meta_easyr4b256_1,1,256,16,16,1000000
2,1,1,0,0,4,1000,meta_easyr4b256_2,2,256,16,16,1000000
3,1,1,0,0,4,1000,meta_easyr4b512_0,10,512,16,16,1000000
4,1,1,0,0,4,1000,meta_easyr4b512_1,11,512,16,16,1000000
5,1,1,0,0,4,1000,meta_easyr4b512_2,12,512,16,16,1000000


### Limit by num balloons

The exact number of balloons seems to not be hugely important in 

In [15]:
num_trials = 3

num_balloons = [30, 50, 70]
rows = []

for i, n in enumerate(num_balloons):
    for t in range(num_trials):
        exp_name = f'meta_easyr0n{n}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'inflate_noise': 0, 'pop_noise': 0,
                    'rew_structure': 0, 'max_steps': 1500,
                    'num_balloons': n}
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': 128,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,inflate_noise,pop_noise,rew_structure,max_steps,num_balloons,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,0,0,1500,30,meta_easyr0n30_0,0,128,16,16,1000000
1,1,1,0,0,0,1500,30,meta_easyr0n30_1,1,128,16,16,1000000
2,1,1,0,0,0,1500,30,meta_easyr0n30_2,2,128,16,16,1000000
3,1,1,0,0,0,1500,50,meta_easyr0n50_0,10,128,16,16,1000000
4,1,1,0,0,0,1500,50,meta_easyr0n50_1,11,128,16,16,1000000
5,1,1,0,0,0,1500,50,meta_easyr0n50_2,12,128,16,16,1000000
6,1,1,0,0,0,1500,70,meta_easyr0n70_0,20,128,16,16,1000000
7,1,1,0,0,0,1500,70,meta_easyr0n70_1,21,128,16,16,1000000
8,1,1,0,0,0,1500,70,meta_easyr0n70_2,22,128,16,16,1000000


In [14]:
num_trials = 3

rows = []
exp_types = ['r0', 'r4', 'easyr4']

for i, exp_type in enumerate(exp_types):
    for t in range(num_trials):
        r = exp_type[-1]
        exp_name = f'meta_{exp_type}n50_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'rew_structure': r, 'max_steps': 1500,
                    'num_balloons': 50}
        
        if 'easy' in exp_type:
            env_kwargs['inflate_noise'] = 0
            env_kwargs['pop_noise'] = 0
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': 128,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,num_balloons,name,seed,batch_size,processes,mini_batch,total_steps,inflate_noise,pop_noise
0,1,1,0,1500,50,meta_r0n50_0,0,128,16,16,1000000,,
1,1,1,0,1500,50,meta_r0n50_1,1,128,16,16,1000000,,
2,1,1,0,1500,50,meta_r0n50_2,2,128,16,16,1000000,,
3,1,1,4,1500,50,meta_r4n50_0,10,128,16,16,1000000,,
4,1,1,4,1500,50,meta_r4n50_1,11,128,16,16,1000000,,
5,1,1,4,1500,50,meta_r4n50_2,12,128,16,16,1000000,,
6,1,1,4,1500,50,meta_easyr4n50_0,20,128,16,16,1000000,0.0,0.0
7,1,1,4,1500,50,meta_easyr4n50_1,21,128,16,16,1000000,0.0,0.0
8,1,1,4,1500,50,meta_easyr4n50_2,22,128,16,16,1000000,0.0,0.0


In [17]:
num_trials = 3

rows = []
exp_type = 'r4'

for t in range(num_trials):
    r = exp_type[-1]
    exp_name = f'meta_{exp_type}_{t}'
    env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                'rew_structure': 4, 'max_steps': 1500,} 
    row = env_kwargs.copy()
    seed = i*10+t
    row['name'] = exp_name
    row['seed'] = seed

    rows.append(row)
    config = {
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 1000000,
        'env_name': 'BartMetaEnv',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'num_steps': 128,

        'checkpoint_interval': 10,
        'ep_verbosity': 1,
        'save_dir': 'meta',

        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 64,},
    }
    # for k, v in variation['config'].items():
    #     config[k] = v

    row['name'] = config['exp_name']
    row['batch_size'] = config['num_steps']
    row['seed'] = config['seed']
    row['processes'] = config['num_processes']
    row['mini_batch'] = config['num_mini_batch']
    row['total_steps'] = config['num_env_steps']
    # row['recurrent'] = config['nn_base_kwargs']['recurrent']
    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,4,1500,meta_r4_0,20,128,16,16,1000000
1,1,1,4,1500,meta_r4_1,21,128,16,16,1000000
2,1,1,4,1500,meta_r4_2,22,128,16,16,1000000


#### Batch size with num balloon limit

In [16]:
num_trials = 3

rows = []
batch_sizes = [128, 256, 512]
for i, b in enumerate(batch_sizes):
    for t in range(num_trials):
        exp_name = f'meta_r0n50b{b}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'rew_structure': 0, 'max_steps': 1500,
                    'num_balloons': 50}
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': b,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,num_balloons,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,1500,50,meta_r0n50b128_0,0,128,16,16,1000000
1,1,1,0,1500,50,meta_r0n50b128_1,1,128,16,16,1000000
2,1,1,0,1500,50,meta_r0n50b128_2,2,128,16,16,1000000
3,1,1,0,1500,50,meta_r0n50b256_0,10,256,16,16,1000000
4,1,1,0,1500,50,meta_r0n50b256_1,11,256,16,16,1000000
5,1,1,0,1500,50,meta_r0n50b256_2,12,256,16,16,1000000
6,1,1,0,1500,50,meta_r0n50b512_0,20,512,16,16,1000000
7,1,1,0,1500,50,meta_r0n50b512_1,21,512,16,16,1000000
8,1,1,0,1500,50,meta_r0n50b512_2,22,512,16,16,1000000


In [18]:
num_trials = 3

rows = []
batch_sizes = [128, 256, 512]
for i, b in enumerate(batch_sizes):
    for t in range(num_trials):
        exp_name = f'meta_easyr0n50b{b}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'rew_structure': 0, 'max_steps': 1500,
                    'num_balloons': 50}
        env_kwargs['inflate_noise'] = 0
        env_kwargs['pop_noise'] = 0
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': b,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,num_balloons,inflate_noise,pop_noise,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,1500,50,0,0,meta_easyr0n50b128_0,0,128,16,16,1000000
1,1,1,0,1500,50,0,0,meta_easyr0n50b128_1,1,128,16,16,1000000
2,1,1,0,1500,50,0,0,meta_easyr0n50b128_2,2,128,16,16,1000000
3,1,1,0,1500,50,0,0,meta_easyr0n50b256_0,10,256,16,16,1000000
4,1,1,0,1500,50,0,0,meta_easyr0n50b256_1,11,256,16,16,1000000
5,1,1,0,1500,50,0,0,meta_easyr0n50b256_2,12,256,16,16,1000000
6,1,1,0,1500,50,0,0,meta_easyr0n50b512_0,20,512,16,16,1000000
7,1,1,0,1500,50,0,0,meta_easyr0n50b512_1,21,512,16,16,1000000
8,1,1,0,1500,50,0,0,meta_easyr0n50b512_2,22,512,16,16,1000000


In [19]:
num_trials = 3

rows = []
batch_sizes = [128, 256, 512]
for i, b in enumerate(batch_sizes):
    for t in range(num_trials):
        exp_name = f'meta_r0n30b{b}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'rew_structure': 0, 'max_steps': 1500,
                    'num_balloons': 30}
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': b,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,num_balloons,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,1500,30,meta_r0n30b128_0,0,128,16,16,1000000
1,1,1,0,1500,30,meta_r0n30b128_1,1,128,16,16,1000000
2,1,1,0,1500,30,meta_r0n30b128_2,2,128,16,16,1000000
3,1,1,0,1500,30,meta_r0n30b256_0,10,256,16,16,1000000
4,1,1,0,1500,30,meta_r0n30b256_1,11,256,16,16,1000000
5,1,1,0,1500,30,meta_r0n30b256_2,12,256,16,16,1000000
6,1,1,0,1500,30,meta_r0n30b512_0,20,512,16,16,1000000
7,1,1,0,1500,30,meta_r0n30b512_1,21,512,16,16,1000000
8,1,1,0,1500,30,meta_r0n30b512_2,22,512,16,16,1000000


## Increasing Gamma

In [None]:
num_trials = 3

rows = []
batch_sizes = [128, 256, 512]
for i, b in enumerate(batch_sizes):
    for t in range(num_trials):
        exp_name = f'meta_easyr0n50b{b}_{t}'
        env_kwargs = {'meta_setup': 1, 'colors_used': 1, 
                    'rew_structure': 0, 'max_steps': 1500,
                    'num_balloons': 50}
        env_kwargs['inflate_noise'] = 0
        env_kwargs['pop_noise'] = 0
        row = env_kwargs.copy()
        seed = i*10+t
        row['name'] = exp_name
        row['seed'] = seed

        rows.append(row)
        config = {
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 1000000,
            'env_name': 'BartMetaEnv',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'num_steps': b,

            'checkpoint_interval': 10,
            'ep_verbosity': 1,
            'save_dir': 'meta',

            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64,},
        }
        # for k, v in variation['config'].items():
        #     config[k] = v

        row['name'] = config['exp_name']
        row['batch_size'] = config['num_steps']
        row['seed'] = config['seed']
        row['processes'] = config['num_processes']
        row['mini_batch'] = config['num_mini_batch']
        row['total_steps'] = config['num_env_steps']
        # row['recurrent'] = config['nn_base_kwargs']['recurrent']
        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))


pd.DataFrame(rows)

Unnamed: 0,meta_setup,colors_used,rew_structure,max_steps,num_balloons,inflate_noise,pop_noise,name,seed,batch_size,processes,mini_batch,total_steps
0,1,1,0,1500,50,0,0,meta_easyr0n50b128_0,0,128,16,16,1000000
1,1,1,0,1500,50,0,0,meta_easyr0n50b128_1,1,128,16,16,1000000
2,1,1,0,1500,50,0,0,meta_easyr0n50b128_2,2,128,16,16,1000000
3,1,1,0,1500,50,0,0,meta_easyr0n50b256_0,10,256,16,16,1000000
4,1,1,0,1500,50,0,0,meta_easyr0n50b256_1,11,256,16,16,1000000
5,1,1,0,1500,50,0,0,meta_easyr0n50b256_2,12,256,16,16,1000000
6,1,1,0,1500,50,0,0,meta_easyr0n50b512_0,20,512,16,16,1000000
7,1,1,0,1500,50,0,0,meta_easyr0n50b512_1,21,512,16,16,1000000
8,1,1,0,1500,50,0,0,meta_easyr0n50b512_2,22,512,16,16,1000000
