# Write Experiments

* This file is used to generate JSON files for running experients
* Also record env_kwargs to be added to trained_models folder for loading of agents and their corresponding envs

JSON files will be written for main.py to read arguments from, rather than going from the command-line

In [2]:
import pickle
import pandas as pd
from datetime import datetime
import os

## Experiment 1: Number of Objects, dist vs goal

Basic toy example, each 

In [33]:
num_objs = [7]
rew_structures = ['dist', 'goal']


for rew_struct in rew_structures:
    for num_obj in num_objs:
        exp_name = f'nav_{num_obj}_obj_{rew_struct}_recurrent'
        config = {
            'wandb_project_name': 'nav1',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 250000,
            'env_name': 'NavEnv-v0',
            'recurrent_policy': True,
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'env_kwargs': {'num_objects': num_obj, 'rew_structure': rew_struct},
            'track': True,
            'capture_video': 1
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [8]:
num_objs = [0, 1, 2, 3, 4]
rew_structures = ['dist', 'goal']


for rew_struct in rew_structures:
    for num_obj in num_objs:
        exp_name = f'nav_{num_obj}_obj_{rew_struct}'
        config = {
            'wandb_project_name': 'nav1',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 250000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'env_kwargs': {'num_objects': num_obj, 'rew_structure': rew_struct},
            'track': True,
            'capture_video': 1
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [4]:
exp_name = 'nav_0_obj_goal'
config = {
    'wandb_project_name': 'nav1',
    'exp_name': exp_name,
    'save_name': exp_name,
    'num_env_steps': 250000,
    'env_name': 'NavEnv-v0',
    'recurrent_policy': True,
    'algo': 'ppo',
    'num_mini_batch': 4,
    'num_processes': 4,
    'env_kwargs': {'num_objects': 0},
    'track': True,
    'capture_video': True
}

pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [18]:
pickle.load(open('../experiment_configs/nav_0_obj', 'rb'))

{'wandb_project_name': 'nav1',
 'exp_name': 'nav_0_obj',
 'save_name': 'nav_0_obj',
 'num_env_steps': 10000,
 'env_name': 'NavEnv-v0',
 'recurrent_policy': True,
 'algo': 'ppo',
 'num_mini_batch': 4,
 'num_processes': 4,
 'env_kwargs': {'num_objects': 0},
 'track': True,
 'capture_video': True}

In [9]:
files = os.listdir('../experiment_configs/')

In [10]:
for file in files:
    if file not in ['.ipynb_checkpoints', 'archive']:
        print(file)

hello_world


In [16]:
pickle.load(open('experiment_log', 'rb'))

Unnamed: 0,file,begin,end,exp_name,save_name,num_env_steps,env_name,recurrent_policy,algo,num_mini_batch,num_processes,success,env_kwargs,wandb_project_name
0,hello_world,2021-10-25 18:23:03.197613,,hello_world,hello_world,10000,CartPole-v0,True,ppo,4,4,,,
1,hello_world,2021-10-25 18:28:38.870516,2021-10-25 18:43:41.004250,hello_world,hello_world,10000,CartPole-v0,True,ppo,4,4,,,
2,hello_world,2021-10-25 18:58:10.351818,2021-10-25 18:59:07.591750,hello_world,hello_world,10000,CartPole-v0,True,ppo,4,4,True,,
3,nav_0_obj,2021-10-27 13:05:19.228026,,nav_0_obj,nav_0_obj,10000,NavEnv-v0,True,ppo,4,4,,{'num_objects': 0},nav1


## Scheduler functions

These functions are found in scheduler.py (testing them here), used to run and track the experiments in experiment_configs

In [3]:
from scheduler import *
import torch

In [3]:
js = pickle.load(open('exp_log.json', 'rb'))

In [6]:
df = pd.read_json(js)

In [7]:
df.to_pickle('experiment_log')

In [5]:
run_experiment('morris_epstruct_1_recurrent_sep_false', cont=True)

In [4]:
convert_config_to_command('morris_epstruct_1_recurrent_sep_false', cont=True)

'python main.py --wandb-project-name nav2 --exp-name morris_epstruct_1_recurrent_sep_false --save-name morris_epstruct_1_recurrent_sep_false --num-env-steps 500000 --env-name MorrisEnv-v0 --recurrent-policy --algo ppo --num-mini-batch 16 --num-processes 16 --env-kwargs ep_struct=1 character_sep=False --track --capture-video 1 --cont --config-file-name morris_epstruct_1_recurrent_sep_false '

In [6]:
actor_critic, obs_rms = torch.load('trained_models/ppo/morris_epstruct_1_recurrent_sep_false.pt')

In [9]:
int(obs_rms.count)

192096

In [10]:
from a2c_ppo_acktr import algo, utils


In [15]:
algo.PPO(actor_critic, 1.2, 100, 16, 0.1, 0.1, lr=0.005, eps=0.01, )

<a2c_ppo_acktr.algo.ppo.PPO at 0x26bc10eba48>

In [17]:
actor_critic.recurrent_hidden_state_size

64

In [3]:
run_experiment('hello_world')

# Morris Environment Experiments

In [30]:
ep_structs = [1, 3]


for ep_struct in ep_structs:
    exp_name = f'morris_epstruct_{ep_struct}_sep_false_visible_negrew'
    config = {
        'wandb_project_name': 'nav2',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 500000,
        'env_name': 'MorrisEnv-v0',
        'algo': 'ppo',
        'num_mini_batch': 16,
        'num_processes': 16,
        'recurrent': True,
        'env_kwargs': {'ep_struct': ep_struct, 'character_sep': False, 'platform_visible': True,
                      'reward_shift': -1, 'platform_fixed_duration': 5, 'platform_size': 30,
                      'max_steps': 1000},
        'track': True,
        'capture_video': 1
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

# Gridworld Experiments

In [9]:
# Test local
exp_name = f'gridworld_basic'
config = {
    'wandb_project_name': 'nav3',
    'exp_name': exp_name,
    'save_name': exp_name,
    'num_env_steps': 500000,
    'env_name': 'Gridworld-v0',
    'algo': 'ppo',
    'num_mini_batch': 1,
    'num_processes': 1,
    'recurrent': True,
    'capture_video': 1
}

pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [13]:
# Test local
exp_name = f'test'
config = {
    'wandb_project_name': 'test',
    'exp_name': exp_name,
    'save_name': exp_name,
    'num_env_steps': 200000,
    'env_name': 'Gridworld-v0',
    'algo': 'ppo',
    'num_mini_batch': 16,
    'num_processes': 96,
    'recurrent': True,
    'capture_video': 1,
    'no-cuda': True,
}

pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [1]:

exp_name = f'gridworld_basic'
config = {
    'wandb_project_name': 'nav3',
    'exp_name': exp_name,
    'save_name': exp_name,
    'num_env_steps': 500000,
    'env_name': 'Gridworld-v0',
    'algo': 'ppo',
    'num_mini_batch': 16,
    'num_processes': 16,
    'recurrent': True,
    'track': True,
    'capture_video': 1
}

pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [4]:
world_sizes = [6, 10, 14, 18]
num_objs = [0, 5, 10]

for size in world_sizes:
    for obj in num_objs:
        print(size, obj)
        exp_name = f'grid_dist_{size}_{obj}'
        config = {
            'wandb_project_name': 'nav3',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 500000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'recurrent': True,
            'track': True,
            'capture_video': 1,
            'env_kwargs': {'world_size': size, 'num_obstacles': obj, 'give_dist': True}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

6 0
6 5
6 10
10 0
10 5
10 10
14 0
14 5
14 10
18 0
18 5
18 10


In [2]:
world_sizes = [6, 10, 14, 18]
num_objs = [0, 5, 10]

for size in world_sizes:
    for obj in num_objs:
        print(size, obj)
        exp_name = f'grid_{size}_{obj}'
        config = {
            'wandb_project_name': 'nav3',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 100000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'recurrent': True,
            'track': True,
            'capture_video': 1,
            'env_kwargs': {'world_size': size, 'num_obstacles': obj}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

6 0
6 5
6 10
10 0
10 5
10 10
14 0
14 5
14 10
18 0
18 5
18 10


## Reshaping experiments

In [3]:
reward_shaping = [0, 1, 2, 3]
num_trials=10

for shape in reward_shaping:
    for i in range(num_trials):
        exp_name = f'grid_reshaping_{shape}_{i}'
        config = {
            'wandb_project_name': 'GridNav_Reward_Shaping',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 100000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 16,
            'recurrent': True,
            'track': True,
            'capture_video': 1,
            'seed': i,
            'env_kwargs': {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': shape}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [4]:
#save env kwargs

reward_shaping = [0, 1, 2, 3]
num_trials=10

for shape in reward_shaping:
    exp_name = f'grid_reshaping_{shape}_env'
    env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                  'give_time': True, 'reward_shaping': shape}
    pickle.dump(env_kwargs, open('../trained_models/ppo/visible_reshaping/' + exp_name, 'wb'))

In [13]:
reward_shaping = [1]
num_trials=1

for shape in reward_shaping:
    for i in range(num_trials):
        exp_name = f'grid_reshaping_{shape}_{i}'
        config = {
            'wandb_project_name': 'nav3',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 100000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 2,
            'num_processes': 2,
            'recurrent': True,
            'track': True,
            'capture_video': 1,
            'seed': i,
            'env_kwargs': {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': shape}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [10]:
color_configs = [1, 2, 2.5, 4]
num_trials = 10
shape = 1
for color in color_configs:
    for i in range(num_trials):
        exp_name = f'wallcolors_{color}_{i}'
        config = {
            'wandb_project_name': 'GridNav_Visible_Platform_Colored_Walls',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'track': True,
            'capture_video': 1,
            'seed': i,
            'env_kwargs': {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': shape, 'wall_colors': color}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

In [6]:
color_configs = [1, 2, 2.5, 4]
for color in color_configs:
    exp_name = f'wallcolors_{color}_env'
    env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                  'give_time': True, 'reward_shaping': shape, 'wall_colors': color}

    pickle.dump(env_kwargs, open('../trained_models/ppo/visible_wallcolors/' + exp_name, 'wb'))

# Invisible goal trials

First testing a proof of concept if the invisible goal task can be learned, with or without reward shaping, and with or without wall colors

In [20]:
color_configs = [1, 2, 2.5, 4]
reward_shapings = [0, 1]
num_trials = 3
rows = []
for color in color_configs:
    for shape in reward_shapings:
        for i in range(num_trials):
            exp_name = f'invisible_{shape}_{color}_{i}'
            
            env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                              'give_time': True, 'reward_shaping': shape, 'wall_colors': color,
                              'task_structure': 2, }
            row = env_kwargs.copy()
            row['name'] = exp_name
            
            rows.append(row)
            config = {
                'wandb_project_name': 'GridNav_Invisible_Reward_Colors',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 200000,
                'env_name': 'Gridworld-v0',
                'algo': 'ppo',
                'num_mini_batch': 16,
                'num_processes': 96,
                'recurrent': True,
                'track': True,
                'capture_video': 1,
                'seed': i,
                'env_kwargs': env_kwargs
            }
            
            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,world_size,num_obstacles,give_dist,give_time,reward_shaping,wall_colors,task_structure,name
0,14,0,True,True,0,1.0,2,invisible_0_1_0
1,14,0,True,True,0,1.0,2,invisible_0_1_1
2,14,0,True,True,0,1.0,2,invisible_0_1_2
3,14,0,True,True,1,1.0,2,invisible_1_1_0
4,14,0,True,True,1,1.0,2,invisible_1_1_1
5,14,0,True,True,1,1.0,2,invisible_1_1_2
6,14,0,True,True,0,2.0,2,invisible_0_2_0
7,14,0,True,True,0,2.0,2,invisible_0_2_1
8,14,0,True,True,0,2.0,2,invisible_0_2_2
9,14,0,True,True,1,2.0,2,invisible_1_2_0


In [7]:
color_configs = [1, 2, 2.5, 4]
reward_shapings = [0, 1]
num_trials = 3
rows = []
for color in color_configs:
    for shape in reward_shapings:
        exp_name = f'invisible_{shape}_{color}_env'

        env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': shape, 'wall_colors': color,
                          'task_structure': 2, }

        pickle.dump(env_kwargs, open('../trained_models/ppo/invisible_wallcolors/' + exp_name, 'wb'))
            

In [6]:
color_configs = [1]
reward_shapings = [0, 1]
num_trials = 10
start_trial_idx = 0
rows = []
for color in color_configs:
    for shape in reward_shapings:
        for i in range(start_trial_idx, num_trials):
            exp_name = f'invisible_{shape}_{color}_{i}'
            
            env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                              'give_time': True, 'reward_shaping': shape, 'wall_colors': color,
                              'task_structure': 2, }
            row = env_kwargs.copy()
            row['name'] = exp_name
            
            rows.append(row)
            config = {
                'wandb_project_name': 'GridNav_Invisible_Reward_Colors',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 400000,
                'env_name': 'Gridworld-v0',
                'algo': 'ppo',
                'num_mini_batch': 1,
                'num_processes': 1,
                'recurrent': True,
                'track': True,
                'capture_video': 1,
                'seed': i,
                'env_kwargs': env_kwargs
            }
            
            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,world_size,num_obstacles,give_dist,give_time,reward_shaping,wall_colors,task_structure,name
0,14,0,True,True,0,1,2,invisible_0_1_0
1,14,0,True,True,0,1,2,invisible_0_1_1
2,14,0,True,True,0,1,2,invisible_0_1_2
3,14,0,True,True,0,1,2,invisible_0_1_3
4,14,0,True,True,0,1,2,invisible_0_1_4
5,14,0,True,True,0,1,2,invisible_0_1_5
6,14,0,True,True,0,1,2,invisible_0_1_6
7,14,0,True,True,0,1,2,invisible_0_1_7
8,14,0,True,True,0,1,2,invisible_0_1_8
9,14,0,True,True,0,1,2,invisible_0_1_9


# Hanging up poster

In [5]:
poster_pos = [0, 1]
reward_shapings = [0, 1]
start_trial_idx = 0
num_trials = 5
rows = []
for poster in poster_pos:
    for shape in reward_shapings:
        for i in range(start_trial_idx, num_trials):
            exp_name = f'invisible_poster_{poster}_shape_{shape}_{i}'
            
            env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                              'give_time': True, 'reward_shaping': shape, 'wall_colors': 1,
                              'task_structure': 2, 'poster': poster}
            row = env_kwargs.copy()
            row['name'] = exp_name
            
            rows.append(row)
            config = {
                'wandb_project_name': 'GridNav_Invisible_Reward_Poster',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 400000,
                'env_name': 'Gridworld-v0',
                'algo': 'ppo',
                'num_mini_batch': 16,
                'num_processes': 96,
                'recurrent': True,
                'track': True,
                'capture_video': 1,
                'seed': i,
                'env_kwargs': env_kwargs
            }
            
            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,world_size,num_obstacles,give_dist,give_time,reward_shaping,wall_colors,task_structure,poster,name
0,14,0,True,True,0,1,2,0,invisible_poster_0_shape_0_0
1,14,0,True,True,0,1,2,0,invisible_poster_0_shape_0_1
2,14,0,True,True,0,1,2,0,invisible_poster_0_shape_0_2
3,14,0,True,True,0,1,2,0,invisible_poster_0_shape_0_3
4,14,0,True,True,0,1,2,0,invisible_poster_0_shape_0_4
5,14,0,True,True,1,1,2,0,invisible_poster_0_shape_1_0
6,14,0,True,True,1,1,2,0,invisible_poster_0_shape_1_1
7,14,0,True,True,1,1,2,0,invisible_poster_0_shape_1_2
8,14,0,True,True,1,1,2,0,invisible_poster_0_shape_1_3
9,14,0,True,True,1,1,2,0,invisible_poster_0_shape_1_4


In [8]:
poster_pos = [0, 1]
reward_shapings = [0, 1]
for poster in poster_pos:
    for shape in reward_shapings:
        exp_name = f'invisible_poster_{poster}_shape_{shape}_env'

        env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': shape, 'wall_colors': 1,
                          'task_structure': 2, 'poster': poster}
        pickle.dump(env_kwargs, open('../trained_models/ppo/invisible_poster/' + exp_name, 'wb'))
            


<br><br><br><br><br><br><br><br><br><br>

**!!**

**!! IMPORTANT - Use code after here to ensure track and capture video off, as well as no_cuda true, since these are not working on school computers**

**!!**


<br><br><br><br><br><br><br><br>

# Layer Sharing

Here we are testing one of the tasks we already learned - 4 wall colors with invisible platform, since this has shown to be reasonable to learn. We are going to change how many layers are shared between actor and critic

In [5]:
num_shared_layers = [0, 1, 2]
num_trials = 5
rows = []
for shared in num_shared_layers:
    for i in range(num_trials):
        exp_name = f'invisible_shared{shared}_t{i}'

        env_kwargs = {'world_size': 14, 'num_obstacles': 0, 'give_dist': True,
                          'give_time': True, 'reward_shaping': 1, 'wall_colors': 4,
                          'task_structure': 2, }
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['num_shared_layers'] = shared

        rows.append(row)
        config = {
            'wandb_project_name': 'GridNav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 200000,
            'env_name': 'Gridworld-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': shared}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,world_size,num_obstacles,give_dist,give_time,reward_shaping,wall_colors,task_structure,name,num_shared_layers
0,14,0,True,True,1,4,2,invisible_shared0_t0,0
1,14,0,True,True,1,4,2,invisible_shared0_t1,0
2,14,0,True,True,1,4,2,invisible_shared0_t2,0
3,14,0,True,True,1,4,2,invisible_shared0_t3,0
4,14,0,True,True,1,4,2,invisible_shared0_t4,0
5,14,0,True,True,1,4,2,invisible_shared1_t0,1
6,14,0,True,True,1,4,2,invisible_shared1_t1,1
7,14,0,True,True,1,4,2,invisible_shared1_t2,1
8,14,0,True,True,1,4,2,invisible_shared1_t3,1
9,14,0,True,True,1,4,2,invisible_shared1_t4,1


## Shared layers for continuous nav

In [5]:
num_shared_layers = [0, 1, 2]
num_trials = 5
rows = []
for shared in num_shared_layers:
    for i in range(num_trials):
        exp_name = f'nav_invisible_shared{shared}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'dist',
                      'task_structure': 2, 'wall_colors': 4.0}
        row = env_kwargs.copy()                         
        row['name'] = exp_name
        row['num_shared_layers'] = shared

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': shared}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,name,num_shared_layers
0,0,dist,2,4.0,nav_invisible_shared0_t0,0
1,0,dist,2,4.0,nav_invisible_shared0_t1,0
2,0,dist,2,4.0,nav_invisible_shared0_t2,0
3,0,dist,2,4.0,nav_invisible_shared0_t3,0
4,0,dist,2,4.0,nav_invisible_shared0_t4,0
5,0,dist,2,4.0,nav_invisible_shared1_t0,1
6,0,dist,2,4.0,nav_invisible_shared1_t1,1
7,0,dist,2,4.0,nav_invisible_shared1_t2,1
8,0,dist,2,4.0,nav_invisible_shared1_t3,1
9,0,dist,2,4.0,nav_invisible_shared1_t4,1


# Continuous Space

## Reward Shaping Visible Goal

In [21]:
num_trials = 5
shaping = ['none', 'dist']
rows = []
for shape in shaping:
    for i in range(num_trials):
        exp_name = f'nav_visible_{shape}_{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': shape}
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['env'] = 'NavEnv-v0'

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Visible',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 200000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'no-cuda': True,
            # 'capture_video': 1,
            'seed': i,
            'env_kwargs': env_kwargs
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
    env_name = f'nav_visible_{shape}_env'
    pickle.dump(env_kwargs, open('../experiment_configs/' + env_name, 'wb'))
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,name,env
0,0,none,nav_visible_none_0,NavEnv-v0
1,0,none,nav_visible_none_1,NavEnv-v0
2,0,none,nav_visible_none_2,NavEnv-v0
3,0,none,nav_visible_none_3,NavEnv-v0
4,0,none,nav_visible_none_4,NavEnv-v0
5,0,dist,nav_visible_dist_0,NavEnv-v0
6,0,dist,nav_visible_dist_1,NavEnv-v0
7,0,dist,nav_visible_dist_2,NavEnv-v0
8,0,dist,nav_visible_dist_3,NavEnv-v0
9,0,dist,nav_visible_dist_4,NavEnv-v0


## Invisible Wall Colors

In [5]:
num_trials = 5
wall_colors = [1, 2, 2.5, 4]
shaping = ['none', 'dist']
rows = []
for color in wall_colors:
    for shape in shaping:
        for i in range(num_trials):
            exp_name = f'nav_invisible_color_{color}_{shape}_{i}'

            env_kwargs = {'num_objects': 0, 'rew_structure': shape,
                         'task_structure': 2, 'wall_colors': color}
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['env'] = 'NavEnv-v0'

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Invisible_Color_Shape',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 500000,
                'env_name': 'NavEnv-v0',
                'algo': 'ppo',
                'num_mini_batch': 16,
                'num_processes': 96,
                'recurrent': True,
                'no-cuda': True,
                # 'capture_video': 1,
                'seed': i,
                'env_kwargs': env_kwargs
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        env_name = f'nav_invisible_color_{color}_{shape}_env'
        pickle.dump(env_kwargs, open('../experiment_configs/' + env_name, 'wb'))
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,name,env
0,0,none,2,1.0,nav_invisible_color_1_none_0,NavEnv-v0
1,0,none,2,1.0,nav_invisible_color_1_none_1,NavEnv-v0
2,0,none,2,1.0,nav_invisible_color_1_none_2,NavEnv-v0
3,0,none,2,1.0,nav_invisible_color_1_none_3,NavEnv-v0
4,0,none,2,1.0,nav_invisible_color_1_none_4,NavEnv-v0
5,0,dist,2,1.0,nav_invisible_color_1_dist_0,NavEnv-v0
6,0,dist,2,1.0,nav_invisible_color_1_dist_1,NavEnv-v0
7,0,dist,2,1.0,nav_invisible_color_1_dist_2,NavEnv-v0
8,0,dist,2,1.0,nav_invisible_color_1_dist_3,NavEnv-v0
9,0,dist,2,1.0,nav_invisible_color_1_dist_4,NavEnv-v0


# Auxiliary Tasks

Try to run a simple experiment with either auxiliary head or not

In [21]:

num_trials = 5
rows = []
auxiliary = [0, 1, 2]
for aux in auxiliary:
    for i in range(5,10):
        exp_name = f'nav_euclid_start_{aux}_t{i}'
        if aux == 2:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [1]
        elif aux == 1:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [0]
        elif aux == 0:
            auxiliary_heads = []
            auxiliary_tasks = []
        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 4.0, 'auxiliary_tasks': auxiliary_tasks}
        row = env_kwargs.copy()                         
        row['name'] = exp_name
        row['num_shared_layers'] = 0
        
        
        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,name,num_shared_layers
0,0,goal,2,4.0,[],nav_euclid_start_0_t5,0
1,0,goal,2,4.0,[],nav_euclid_start_0_t6,0
2,0,goal,2,4.0,[],nav_euclid_start_0_t7,0
3,0,goal,2,4.0,[],nav_euclid_start_0_t8,0
4,0,goal,2,4.0,[],nav_euclid_start_0_t9,0
5,0,goal,2,4.0,[0],nav_euclid_start_1_t5,0
6,0,goal,2,4.0,[0],nav_euclid_start_1_t6,0
7,0,goal,2,4.0,[0],nav_euclid_start_1_t7,0
8,0,goal,2,4.0,[0],nav_euclid_start_1_t8,0
9,0,goal,2,4.0,[0],nav_euclid_start_1_t9,0


In [17]:
rows = []
auxiliary = [0, 1, 2]
for aux in auxiliary:
    exp_name = f'nav_euclid_start_{aux}'
    if aux == 2:
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [1]
    elif aux == 1:
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [0]
    elif aux == 0:
        auxiliary_heads = []
        auxiliary_tasks = []
    env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                  'task_structure': 2, 'wall_colors': 4.0, 'auxiliary_tasks': auxiliary_tasks}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



In [22]:

num_trials = 5
rows = []
auxiliary_wall = [1, 3]
for wall in auxiliary_wall:
    for i in range(5, 10):
        exp_name = f'nav_aux_wall_{wall}_t{i}'
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [2]
        auxiliary_task_args = [wall]
        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 4.0, 'auxiliary_tasks': auxiliary_tasks,
                      'auxiliary_task_args': auxiliary_task_args}
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['num_shared_layers'] = 0
        
        
        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,auxiliary_task_args,name,num_shared_layers
0,0,goal,2,4.0,[2],[1],nav_aux_wall_1_t5,0
1,0,goal,2,4.0,[2],[1],nav_aux_wall_1_t6,0
2,0,goal,2,4.0,[2],[1],nav_aux_wall_1_t7,0
3,0,goal,2,4.0,[2],[1],nav_aux_wall_1_t8,0
4,0,goal,2,4.0,[2],[1],nav_aux_wall_1_t9,0
5,0,goal,2,4.0,[2],[3],nav_aux_wall_3_t5,0
6,0,goal,2,4.0,[2],[3],nav_aux_wall_3_t6,0
7,0,goal,2,4.0,[2],[3],nav_aux_wall_3_t7,0
8,0,goal,2,4.0,[2],[3],nav_aux_wall_3_t8,0
9,0,goal,2,4.0,[2],[3],nav_aux_wall_3_t9,0


In [18]:
rows = []
auxiliary_wall = [1, 3]
for wall in auxiliary_wall:
    exp_name = f'nav_aux_wall_{wall}'
    auxiliary_heads = [[-1, 0, 1]]
    auxiliary_tasks = [2]
    auxiliary_task_args = [wall]
    env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                  'task_structure': 2, 'wall_colors': 4.0, 'auxiliary_tasks': auxiliary_tasks,
                  'auxiliary_task_args': auxiliary_task_args}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



## 4 Wall colors was too easy... try 2 symmetrical

In [6]:

num_trials = 5
rows = []
auxiliary = [0, 1, 2]
for aux in auxiliary:
    for i in range(num_trials):
        exp_name = f'nav_euclid_c2_start_{aux}_t{i}'
        if aux == 2:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [1]
        elif aux == 1:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [0]
        elif aux == 0:
            auxiliary_heads = []
            auxiliary_tasks = []
        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 2, 'auxiliary_tasks': auxiliary_tasks}
        row = env_kwargs.copy()                         
        row['name'] = exp_name
        row['num_shared_layers'] = 0
        
        
        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,name,num_shared_layers
0,0,goal,2,2,[],nav_euclid_c2_start_0_t0,0
1,0,goal,2,2,[],nav_euclid_c2_start_0_t1,0
2,0,goal,2,2,[],nav_euclid_c2_start_0_t2,0
3,0,goal,2,2,[],nav_euclid_c2_start_0_t3,0
4,0,goal,2,2,[],nav_euclid_c2_start_0_t4,0
5,0,goal,2,2,[0],nav_euclid_c2_start_1_t0,0
6,0,goal,2,2,[0],nav_euclid_c2_start_1_t1,0
7,0,goal,2,2,[0],nav_euclid_c2_start_1_t2,0
8,0,goal,2,2,[0],nav_euclid_c2_start_1_t3,0
9,0,goal,2,2,[0],nav_euclid_c2_start_1_t4,0


In [7]:
rows = []
auxiliary = [0, 1, 2]
for aux in auxiliary:
    exp_name = f'nav_euclid_c2_start_{aux}'
    if aux == 2:
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [1]
    elif aux == 1:
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [0]
    elif aux == 0:
        auxiliary_heads = []
        auxiliary_tasks = []
    env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                  'task_structure': 2, 'wall_colors': 2, 'auxiliary_tasks': auxiliary_tasks}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



In [10]:

num_trials = 5
rows = []
auxiliary_wall = [1, 3]
for wall in auxiliary_wall:
    for i in range(num_trials):
        exp_name = f'nav_aux_c2_wall_{wall}_t{i}'
        auxiliary_heads = [[-1, 0, 1]]
        auxiliary_tasks = [2]
        auxiliary_task_args = [wall]
        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 2, 'auxiliary_tasks': auxiliary_tasks,
                      'auxiliary_task_args': auxiliary_task_args}
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['num_shared_layers'] = 0
        
        
        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 400000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 16,
            'num_processes': 96,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,auxiliary_task_args,name,num_shared_layers
0,0,goal,2,2,[2],[1],nav_aux_c2_wall_1_t0,0
1,0,goal,2,2,[2],[1],nav_aux_c2_wall_1_t1,0
2,0,goal,2,2,[2],[1],nav_aux_c2_wall_1_t2,0
3,0,goal,2,2,[2],[1],nav_aux_c2_wall_1_t3,0
4,0,goal,2,2,[2],[1],nav_aux_c2_wall_1_t4,0
5,0,goal,2,2,[2],[3],nav_aux_c2_wall_3_t0,0
6,0,goal,2,2,[2],[3],nav_aux_c2_wall_3_t1,0
7,0,goal,2,2,[2],[3],nav_aux_c2_wall_3_t2,0
8,0,goal,2,2,[2],[3],nav_aux_c2_wall_3_t3,0
9,0,goal,2,2,[2],[3],nav_aux_c2_wall_3_t4,0


In [9]:
rows = []
auxiliary_wall = [1, 3]
for wall in auxiliary_wall:
    exp_name = f'nav_aux_c2_wall_{wall}'
    auxiliary_heads = [[-1, 0, 1]]
    auxiliary_tasks = [2]
    auxiliary_task_args = [wall]
    env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                  'task_structure': 2, 'wall_colors': 2, 'auxiliary_tasks': auxiliary_tasks,
                  'auxiliary_task_args': auxiliary_task_args}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



## Again... with poster now

In [7]:

num_trials = 3
rows = []
auxiliary = [0, 1, 2]
posters = [0, 1]
poster_labels = ['proxim', 'distal']

for aux in auxiliary:
    for j, poster in enumerate(posters):
        for i in range(num_trials):
            exp_name = f'nav_euclid_p{poster_labels[j]}_start_{aux}_t{i}'
            if aux == 2:
                auxiliary_heads = [[-1, 0, 1]]
                auxiliary_tasks = [1]
            elif aux == 1:
                auxiliary_heads = [[-1, 0, 1]]
                auxiliary_tasks = [0]
            elif aux == 0:
                auxiliary_heads = []
                auxiliary_tasks = []
            env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                          'task_structure': 2, 'wall_colors': 1, 'auxiliary_tasks': auxiliary_tasks,
                          'num_rays': 12, 'fov': 1, 'poster': poster}
            row = env_kwargs.copy()                         
            row['name'] = exp_name
            row['num_shared_layers'] = 0


            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 1000000,
                'env_name': 'NavEnv-v0',
                'algo': 'ppo',
                'num_mini_batch': 16,
                'num_processes': 96,
                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,
                'nn_base': 'FlexBase',
                'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,num_rays,fov,poster,name,num_shared_layers
0,0,goal,2,1,[],12,1,0,nav_euclid_pproxim_start_0_t0,0
1,0,goal,2,1,[],12,1,0,nav_euclid_pproxim_start_0_t1,0
2,0,goal,2,1,[],12,1,0,nav_euclid_pproxim_start_0_t2,0
3,0,goal,2,1,[],12,1,1,nav_euclid_pdistal_start_0_t0,0
4,0,goal,2,1,[],12,1,1,nav_euclid_pdistal_start_0_t1,0
5,0,goal,2,1,[],12,1,1,nav_euclid_pdistal_start_0_t2,0
6,0,goal,2,1,[0],12,1,0,nav_euclid_pproxim_start_1_t0,0
7,0,goal,2,1,[0],12,1,0,nav_euclid_pproxim_start_1_t1,0
8,0,goal,2,1,[0],12,1,0,nav_euclid_pproxim_start_1_t2,0
9,0,goal,2,1,[0],12,1,1,nav_euclid_pdistal_start_1_t0,0


In [4]:
rows = []
auxiliary = [0, 1, 2]
posters = [0, 1]
poster_labels = ['proxim', 'distal']

for aux in auxiliary:
    for j, poster in enumerate(posters):
        for i in range(num_trials):
            exp_name = f'nav_euclid_p{poster_labels[j]}_start_{aux}_t{i}'
        if aux == 2:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [1]
        elif aux == 1:
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [0]
        elif aux == 0:
            auxiliary_heads = []
            auxiliary_tasks = []
        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'auxiliary_tasks': auxiliary_tasks,
                      'num_rays': 12, 'fov': 1, 'poster': poster}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



In [8]:

num_trials = 3
rows = []
auxiliary_wall = [1, 2]
posters = [0, 1]
poster_labels = ['proxim', 'distal']

for wall in auxiliary_wall:
    for j, poster in enumerate(posters):
        for i in range(num_trials):
            exp_name = f'nav_aux_p{poster_labels[j]}_wall_{wall}_t{i}'
            auxiliary_heads = [[-1, 0, 1]]
            auxiliary_tasks = [2]
            auxiliary_task_args = [wall]
            env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                          'task_structure': 2, 'wall_colors': 1, 'auxiliary_tasks': auxiliary_tasks,
                          'auxiliary_task_args': auxiliary_task_args,
                          'num_rays': 12, 'fov': 1, 'poster': poster}
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['num_shared_layers'] = 0


            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Auxiliary_Euclidean_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 1000000,
                'env_name': 'NavEnv-v0',
                'algo': 'ppo',
                'num_mini_batch': 16,
                'num_processes': 96,
                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,
                'nn_base': 'FlexBase',
                'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,auxiliary_tasks,auxiliary_task_args,num_rays,fov,poster,name,num_shared_layers
0,0,goal,2,1,[2],[1],12,1,0,nav_aux_pproxim_wall_1_t0,0
1,0,goal,2,1,[2],[1],12,1,0,nav_aux_pproxim_wall_1_t1,0
2,0,goal,2,1,[2],[1],12,1,0,nav_aux_pproxim_wall_1_t2,0
3,0,goal,2,1,[2],[1],12,1,1,nav_aux_pdistal_wall_1_t0,0
4,0,goal,2,1,[2],[1],12,1,1,nav_aux_pdistal_wall_1_t1,0
5,0,goal,2,1,[2],[1],12,1,1,nav_aux_pdistal_wall_1_t2,0
6,0,goal,2,1,[2],[2],12,1,0,nav_aux_pproxim_wall_2_t0,0
7,0,goal,2,1,[2],[2],12,1,0,nav_aux_pproxim_wall_2_t1,0
8,0,goal,2,1,[2],[2],12,1,0,nav_aux_pproxim_wall_2_t2,0
9,0,goal,2,1,[2],[2],12,1,1,nav_aux_pdistal_wall_2_t0,0


In [9]:
rows = []
auxiliary_wall = [1, 3]
for wall in auxiliary_wall:
    exp_name = f'nav_aux_c2_wall_{wall}'
    auxiliary_heads = [[-1, 0, 1]]
    auxiliary_tasks = [2]
    auxiliary_task_args = [wall]
    env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                  'task_structure': 2, 'wall_colors': 2, 'auxiliary_tasks': auxiliary_tasks,
                  'auxiliary_task_args': auxiliary_task_args}

    pickle.dump(env_kwargs, open('../trained_models/ppo/nav_auxiliary_tasks/' + exp_name, 'wb'))



# Testing null task in more ways

In [14]:

num_trials = 1
rows = []
env_names = ['CartPole-v1', 'Pendulum-v1', 'Acrobot-v1', 'MountainCar-v0']
names = ['cartpole', 'pendulum', 'acrobot', 'mountaincar']
timesteps = [50000, 200000, 200000, 200000]

auxes = ['aux', 'noaux']

for i, name in enumerate(names):
    env_name = env_names[i]
    total_timesteps = timesteps[i]
    for aux in auxes:
        if aux == 'aux':
            auxiliary_heads = [[-1, 0, 1]]
        else:
            auxiliary_heads = []
        for t in range(1, 10):
            exp_name = f'{name}_{aux}_t{t}'
            env_kwargs = {}
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['num_shared_layers'] = 0

            rows.append(row)
            config = {
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': total_timesteps,
                'env_name': env_name,
                'algo': 'ppo',
                'num_mini_batch': 2,
                'num_processes': 4,
                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,
                'nn_base': 'FlexBase',
                'nn_base_kwargs': {'num_shared_layers': 0, 'auxiliary_heads': auxiliary_heads}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
            
pd.DataFrame(rows)

Unnamed: 0,name,num_shared_layers
0,cartpole_aux_t1,0
1,cartpole_aux_t2,0
2,cartpole_aux_t3,0
3,cartpole_aux_t4,0
4,cartpole_aux_t5,0
...,...,...
67,mountaincar_noaux_t5,0
68,mountaincar_noaux_t6,0
69,mountaincar_noaux_t7,0
70,mountaincar_noaux_t8,0
