In [1]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

from pathlib import Path
import shutil

# Collection of all settings to run auxiliary tasks that we set up
aux_task_configs = {
    'none': {
        'auxiliary_tasks': [], #Inside env_kwargs
        'auxiliary_task_args': [], #Inside env_kwargs
        'auxiliary_heads': [], #Inside nn_base_kwargs
        'auxiliary_truth_sizes': [], #Inside config (parseargs)
        'aux_wrapper_kwargs': {}, #Inside config (parserags)
        'rew_structure': 'goal' #Inside env_kwargs
    },
    
    'rewexplore': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'explore'
    },
    
    'rewdist': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'dist'
    },
    
    'wall0': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall1': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall01': {
        'auxiliary_tasks': [2, 2],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 0, 1], [-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'goaldist': {
        'auxiliary_tasks': [3],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'terminal': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {'auxiliary_tasks': [0], 
                               'auxiliary_task_args': [None]},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall01': {
        'auxiliary_tasks': [5, 5],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 1, 2], [-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catfacewall': {
        'auxiliary_tasks': [4],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catquad': {
        'auxiliary_tasks': [6],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
}

# MWM North Poster Baseline

These were the experiments by which others were compared. Specifically batch size 16 and 32

In [None]:
batch_sizes = [16, 32, 64]
num_trials = 3
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pdistal_batch{batch}auxnone_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1}

        aux_heads = auxiliary_heads[n]
        aux_kwargs = aux_wrapper_kwargs[n]

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['layer_width'] = width
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': width,
                               'auxiliary_heads': aux_heads},
            'aux_wrapper_kwargs': aux_kwargs
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

# Testing one-hot observation encodings and retesting exploration MWM variation

## One-hot north poster

In [4]:
batch_sizes = [16, 32, 64]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pnorth_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 16},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,poster,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t0,16
1,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t1,16
2,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t2,16
3,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t3,16
4,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t4,16
5,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t0,32
6,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t1,32
7,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t2,32
8,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t3,32
9,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t4,32


### No vecnormalize

In [12]:
batch_sizes = [16, 32, 64]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pnorth_batch{batch}onehotnovec_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 16},
            'normalize_env': False
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,poster,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t0,16
1,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t1,16
2,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t2,16
3,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t3,16
4,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t4,16
5,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t0,32
6,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t1,32
7,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t2,32
8,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t3,32
9,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t4,32


In [11]:
import sys
sys.path.append('../')
from scheduler import convert_config_to_command

convert_config_to_command('nav_pnorth_batch64onehotnovec_t0', config_folder='../experiment_configs/')

'python main.py --wandb-project-name Nav_Shared_Layers --exp-name nav_pnorth_batch64onehotnovec_t0 --save-name nav_pnorth_batch64onehotnovec_t0 --num-env-steps 3000000 --env-name NavEnv-v0 --algo ppo --num-mini-batch 1 --num-processes 1 --num-steps 64 --checkpoint-interval 10 --recurrent --seed 0 --no-cuda --env-kwargs num_objects=0 rew_structure=goal task_structure=2 wall_colors=1 num_rays=12 fov=1 poster=1 character_reset_pos=1 one_hot_obs=True --nn-base FlexBase --nn-base-kwargs hidden_size=16 --normalize-env False --config-file-name nav_pnorth_batch64onehotnovec_t0 '

# Explore env

In [5]:
batch_sizes = [16, 64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 1,
            'num_processes': 1,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t0,16
1,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t1,16
2,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t2,16
3,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t3,16
4,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t4,16
5,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t0,64
6,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t1,64
7,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t2,64
8,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t3,64
9,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t4,64


## Tweaking some environment parameters

It appears from looking at trajectories that the agent can in fact explore, locate, and head back to the goal. So going to increase turn speed and move speed and number of steps per episode to encourage this even more

In [24]:
batch_sizes = [64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 15, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t0,64
1,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t1,64
2,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t2,64
3,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t3,64
4,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t4,64
5,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t0,128
6,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t1,128
7,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t2,128
8,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t3,128
9,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t4,128


In [25]:
batch_sizes = [64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster20m_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 15, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 20000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t0,64
1,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t1,64
2,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t2,64
3,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t3,64
4,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t4,64
5,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t0,128
6,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t1,128
7,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t2,128
8,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t3,128
9,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t4,128


In [3]:
batch_sizes = [128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster20mrewexplore_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explore',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 20000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t0,128
1,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t1,128
2,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t2,128
3,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t3,128
4,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t4,128


In [3]:
batch_sizes = [128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}explorepunish_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t0,128
1,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t1,128
2,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t2,128
3,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t3,128
4,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t4,128


In [6]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4we_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t0,128
1,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t1,128
2,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t2,128
3,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t0,128
4,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t1,128
5,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t2,128
6,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t0,128
7,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t1,128
8,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t2,128
9,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wallexplore_ep0.1_t0,128


In [3]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_2ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish2',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4we_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t0,128
1,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t1,128
2,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t2,128
3,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t0,128
4,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t1,128
5,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t2,128
6,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t0,128
7,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t1,128
8,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t2,128
9,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wallexplore_2ep0.1_t0,128


In [6]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wefc_2ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish2',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4wefc_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t0,128
1,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t1,128
2,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t2,128
3,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t0,128
4,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t1,128
5,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t2,128
6,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t0,128
7,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t1,128
8,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t2,128
9,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wefc_2ep0.1_t0,128


In [2]:
mult = 2
punish_arg = 5
sub_reward = 0.04



num_trials = 3

batch = 128
rows = []
for punish_arg in punish_args:
    for mult in mults:
        
        for sub_reward in sub_reward_sizes:
            for i in range(num_trials):
                exp_name = f'nav_4wefc_ep{sub_reward}mult{mult}arg{punish_arg}_t{i}'

                env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish1_explorebonus',
                              'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                              'character_reset_pos': 1,  'one_hot_obs': True,
                              'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                              'sub_goal_reward': sub_reward, 'bonus_multiplier': mult,
                              'explore_punish_arg': punish_arg}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 5000000,
                    'env_name': 'NavEnv-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': '4wefc_punish',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,bonus_multiplier,explore_punish_arg,name,batch_size
0,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t0,128
1,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t1,128
2,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t2,128
3,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t0,128
4,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t1,128
5,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t2,128
6,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t0,128
7,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t1,128
8,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t2,128
9,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,5,3,nav_4wefc_ep0.01mult5arg3_t0,128


In [2]:
sub_reward_sizes = [0.01, 0.02, 0.04]
punish_args = [3, 5, 7]
mults = [2, 5]
num_trials = 3

batch = 128
rows = []
for punish_arg in punish_args:
    for mult in mults:
        
        for sub_reward in sub_reward_sizes:
            for i in range(num_trials):
                exp_name = f'nav_4wefc_ep{sub_reward}mult{mult}arg{punish_arg}_t{i}'

                env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish1_explorebonus',
                              'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                              'character_reset_pos': 1,  'one_hot_obs': True,
                              'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                              'sub_goal_reward': sub_reward, 'bonus_multiplier': mult,
                              'explore_punish_arg': punish_arg}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 5000000,
                    'env_name': 'NavEnv-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': '4wefc_punish',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,bonus_multiplier,explore_punish_arg,name,batch_size
0,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t0,128
1,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t1,128
2,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t2,128
3,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t0,128
4,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t1,128
5,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t2,128
6,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t0,128
7,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t1,128
8,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t2,128
9,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,5,3,nav_4wefc_ep0.01mult5arg3_t0,128


# Giving hidden location information

In [7]:
num_trials = 3

batch = 128
rows = []
eps = [0.01, 0.02]
for ep in eps:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_punishbonusep{ep}_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': ep, 'bonus_multiplier': 5,
                      'explore_punish_arg': 5, 'obs_set': 3, }

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t0,128
1,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t1,128
2,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t2,128
3,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t0,128
4,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t1,128
5,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t2,128


In [9]:
num_trials = 3

batch = 128
rows = []
eps = [0.01, 0.02]
goal_sizes = [30, 40, 50]
for goal in goal_sizes:
    for ep in eps:
        for i in range(num_trials):
            exp_name = f'expl_goalpos_punishbonusep{ep}goal{goal}_t{i}'

            env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                          'sub_goal_reward': ep, 'bonus_multiplier': 5,
                          'explore_punish_arg': 5, 'obs_set': 3, 'goal_size': goal}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ExploreNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'explorenav_pos',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,goal_size,name,batch_size
0,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t0,128
1,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t1,128
2,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t2,128
3,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t0,128
4,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t1,128
5,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t2,128
6,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t0,128
7,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t1,128
8,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t2,128
9,explorepunish1_explorebonus,0.02,5,5,3,40,expl_goalpos_punishbonusep0.02goal40_t0,128


# Hidden location information and network sizes

In [5]:
num_trials = 3

batch = 128
rows = []
obs_sets = [2, 3, 4, 5]
network_sizes = [64, 128, 256]

for obs in obs_sets:
    for hidden in network_sizes:
        for i in range(num_trials):
            exp_name = f'expl_goalpos_obs{obs}hidden{hidden}_t{i}'

            env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                          'sub_goal_reward': 0.04, 'bonus_multiplier': 5,
                          'explore_punish_arg': 5, 'obs_set': obs}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ExploreNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'explorenav_pos',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': hidden},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t0,128
1,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t1,128
2,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t2,128
3,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t0,128
4,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t1,128
5,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t2,128
6,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t0,128
7,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t1,128
8,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t2,128
9,explorepunish1_explorebonus,0.04,5,5,3,expl_goalpos_obs3hidden64_t0,128


## Longer training - only obs set 3

In [7]:
num_trials = 3

batch = 256
rows = []
network_sizes = [64, 128, 256]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t2,256
6,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t0,256
7,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t1,256
8,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t2,256


In [2]:
num_trials = 3

batch = 128
rows = []
network_sizes = [64, 128, 256]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_batch128obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t0,128
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t1,128
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t2,128
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t0,128
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t1,128
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t2,128
6,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t0,128
7,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t1,128
8,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t2,128


In [30]:
num_trials = 3

batch = 128
rows = []
network_sizes = [16, 32, 64]
obs = 2

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_batch128obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t0,128
1,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t1,128
2,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t2,128
3,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t0,128
4,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t1,128
5,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t2,128
6,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t0,128
7,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t1,128
8,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t2,128


## Smaller networks - obs set 3

In [2]:
num_trials = 3

batch = 256
rows = []
network_sizes = [16, 32]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t2,256


## Hidden 64, obs set 1/2

In [3]:
num_trials = 3

batch = 256
rows = []
obs_sets = [1, 2]
hidden = 64

for obs in obs_sets:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t2,256


In [14]:
num_trials = 3

batch = 256
rows = []
obs_sets = ['2b', '2c']
hidden = 64

for obs in obs_sets:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t2,256


### Give goal pos for number of steps

In [4]:
num_trials = 3

batch = 256
rows = []
num_steps = [2, 5, 10, 25, 50]
hidden = 64

for num in num_steps:
    for i in range(num_trials):
        exp_name = f'expl_givegoal_num{num}_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': '2d', 
                      'obs_set_arg': num}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t0,256
1,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t1,256
2,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t2,256
3,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t0,256
4,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t1,256
5,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t2,256
6,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t0,256
7,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t1,256
8,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t2,256
9,explorepunish1,0.04,1,5,2d,25,expl_givegoal_num25_t0,256


#### With step sched

In [14]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 1.5e7, 2e7]
schedule = [
    [500, 100, 50, 25, 10],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched1_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 25000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t2,256


In [3]:
for t in range(3):
    copy = Path(f'../saved_checkpoints/explorenav_pos/expl_givegoal_sched2cont_t{t}/770.pt')
    paste = Path(f'../saved_models/explorenav_pos/expl_givegoal_sched2cont_t{t}_770.pt')
    shutil.copy(copy, paste)

In [18]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 2e7, 3e7, 4e7]
schedule = [
    [500, 100, 50, 30, 20, 10],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched2cont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 50000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched1_t{i}_580.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t2,256


In [4]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 2e7]
schedule = [
    [500, 100, 50, 30],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched2bcont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 50000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched2cont_t{i}_770.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t2,256


#### Sched 3

In [27]:
num_trials = 3

batch = 256
rows = []

step_schedule = list(np.append([0, 5e6], 1e7 + np.arange(70)*2e6))
val_schedule = list(np.append([500, 100], 99 - np.arange(70)))

schedule = [
    val_schedule,
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched3_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': int(step_schedule[-1]),
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
                
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t2,256


#### Sched 3 cont

In [31]:
num_trials = 3

batch = 256
rows = []

step_schedule = list(np.append([0, 5e6], 1e7 + np.arange(70)*2e6))
val_schedule = list(np.append([500, 100], 99 - np.arange(70)))

schedule = [
    val_schedule,
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched3cont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': int(step_schedule[-1]),
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched3_t{i}.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t2,256


## Hidden 64, last action given

In [5]:
num_trials = 3

batch = 256
rows = []
hidden = 64
obs = 1
for i in range(num_trials):
    exp_name = f'expl_goalpos_lastacthidden{hidden}longer_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': obs}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 25000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t2,256


## Obs set 3, reward shaping options

In [13]:
num_trials = 3

batch = 256
rows = []
shaping_names = ['bonus', 'punish', 'noshaping', 'bonuspunish']
rew_structures = ['explorebonus', 'explorepunish1', 'goal', 'explorepunish1_explorebonus']
hidden = 64

for n in range(len(shaping_names)):
    for i in range(num_trials):
        shape_name = shaping_names[n]
        rew_struct = rew_structures[n]
        
        exp_name = f'expl_goalpos_obs3hidden64{shape_name}longer_t{i}'

        env_kwargs = {'rew_structure': rew_struct,
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': 3}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t0,256
1,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t1,256
2,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t2,256
3,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t0,256
4,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t1,256
5,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t2,256
6,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t0,256
7,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t1,256
8,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t2,256
9,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuspunishlonger_t0,256


In [10]:
num_trials = 3

batch = 256
rows = []
hidden = 64

for n in range(len(shaping_names)):
    for i in range(num_trials):
        shape_name = shaping_names[n]
        rew_struct = rew_structures[n]
        
        exp_name = f'expl_goalpos_obs3hidden64{shape_name}longer_t{i}'

        env_kwargs = {'rew_structure': rew_struct,
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': 3}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 1,
            'num_processes': 1,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t0,256
1,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t1,256
2,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t2,256
3,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t0,256
4,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t1,256
5,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t2,256


# Shortcut Nav

In [4]:
num_trials = 3
batch_sizes = [32, 64, 128]
rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_shortcutprob0batch{batch}_t{i}'

        env_kwargs = {'shortcut_probability': 0}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'checkpoint_dir': 'shortcuts',
            'save_dir': 'shortcuts',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

    pd.DataFrame(rows)

In [9]:
num_trials = 3
batch_sizes = [32, 64, 128]
shortcut_probs = [0.1, 0.2, 0.4]
rows = []
for shortcut in shortcut_probs:
    for batch in batch_sizes:
        for i in range(num_trials):
            exp_name = f'shortcutnav_shortcutprob{shortcut}batch{batch}_t{i}'

            env_kwargs = {'shortcut_probability': shortcut}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'checkpoint_dir': 'shortcuts',
                'save_dir': 'shortcuts',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'FlexBaseAux',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,name,batch_size
0,0.1,shortcutnav_shortcutprob0.1batch32_t0,32
1,0.1,shortcutnav_shortcutprob0.1batch32_t1,32
2,0.1,shortcutnav_shortcutprob0.1batch32_t2,32
3,0.1,shortcutnav_shortcutprob0.1batch64_t0,64
4,0.1,shortcutnav_shortcutprob0.1batch64_t1,64
5,0.1,shortcutnav_shortcutprob0.1batch64_t2,64
6,0.1,shortcutnav_shortcutprob0.1batch128_t0,128
7,0.1,shortcutnav_shortcutprob0.1batch128_t1,128
8,0.1,shortcutnav_shortcutprob0.1batch128_t2,128
9,0.2,shortcutnav_shortcutprob0.2batch32_t0,32


In [13]:
num_trials = 3
character_reset_poss = [1, 2, 3]
shortcut_probs = [0.1, 0.2, 0.4]
batch_sizes = [32, 64]
rows = []
for batch in batch_sizes:
    for shortcut in shortcut_probs:
        for char_reset in character_reset_poss:
            for i in range(num_trials):
                exp_name = f'shortcutnav_p{shortcut}reset{char_reset}batch{batch}_t{i}'

                env_kwargs = {'shortcut_probability': shortcut,
                              'character_reset_pos': char_reset}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': 'shortcut_resets',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'FlexBaseAux',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,1,shortcutnav_p0.1reset1batch32_t0,32
1,0.1,1,shortcutnav_p0.1reset1batch32_t1,32
2,0.1,1,shortcutnav_p0.1reset1batch32_t2,32
3,0.1,2,shortcutnav_p0.1reset2batch32_t0,32
4,0.1,2,shortcutnav_p0.1reset2batch32_t1,32
5,0.1,2,shortcutnav_p0.1reset2batch32_t2,32
6,0.1,3,shortcutnav_p0.1reset3batch32_t0,32
7,0.1,3,shortcutnav_p0.1reset3batch32_t1,32
8,0.1,3,shortcutnav_p0.1reset3batch32_t2,32
9,0.2,1,shortcutnav_p0.2reset1batch32_t0,32


In [14]:
num_trials = 3
character_reset_poss = [1, 2, 3]
shortcut_probs = [0.1, 0.2, 0.4]
batch_sizes = [32, 64]
rows = []
for batch in batch_sizes:
    for shortcut in shortcut_probs:
        for char_reset in character_reset_poss:
            for i in range(num_trials):
                exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}_t{i}'

                env_kwargs = {'shortcut_probability': shortcut,
                              'character_reset_pos': char_reset}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': 'shortcut_resets',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,1,shortcutnav_fcp0.1reset1batch32_t0,32
1,0.1,1,shortcutnav_fcp0.1reset1batch32_t1,32
2,0.1,1,shortcutnav_fcp0.1reset1batch32_t2,32
3,0.1,2,shortcutnav_fcp0.1reset2batch32_t0,32
4,0.1,2,shortcutnav_fcp0.1reset2batch32_t1,32
5,0.1,2,shortcutnav_fcp0.1reset2batch32_t2,32
6,0.1,3,shortcutnav_fcp0.1reset3batch32_t0,32
7,0.1,3,shortcutnav_fcp0.1reset3batch32_t1,32
8,0.1,3,shortcutnav_fcp0.1reset3batch32_t2,32
9,0.2,1,shortcutnav_fcp0.2reset1batch32_t0,32


## Longer training

In [24]:
num_trials = 3
batch_sizes = [32, 64]

char_reset = 3
shortcut = 0.4

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}longer_t{i}'

        env_kwargs = {'shortcut_probability': shortcut,
                      'character_reset_pos': char_reset}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 15000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_resets',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t0,32
1,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t1,32
2,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t2,32
3,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t0,64
4,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t1,64
5,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t2,64


In [32]:
num_trials = 3
batch_sizes = [32, 64]

char_reset = 3
shortcut = 0.1

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}longer_t{i}'

        env_kwargs = {'shortcut_probability': shortcut,
                      'character_reset_pos': char_reset}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 15000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_resets',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t0,32
1,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t1,32
2,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t2,32
3,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t0,64
4,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t1,64
5,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t2,64


## Wall colors


In [7]:
num_trials = 3
shortcut_probs = [0.1, 0.2, 0.4]
wall_colors = [1, 1.5]
rows = []
batch = 64
for p in shortcut_probs:
    for wc in wall_colors:
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.0,shortcut_wc1p0.1_t0,64
1,0.1,3,1.0,shortcut_wc1p0.1_t1,64
2,0.1,3,1.0,shortcut_wc1p0.1_t2,64
3,0.1,3,1.5,shortcut_wc1.5p0.1_t0,64
4,0.1,3,1.5,shortcut_wc1.5p0.1_t1,64
5,0.1,3,1.5,shortcut_wc1.5p0.1_t2,64
6,0.2,3,1.0,shortcut_wc1p0.2_t0,64
7,0.2,3,1.0,shortcut_wc1p0.2_t1,64
8,0.2,3,1.0,shortcut_wc1p0.2_t2,64
9,0.2,3,1.5,shortcut_wc1.5p0.2_t0,64


## 2 Corridors

In [9]:
num_trials = 3
shortcut_probs = [0.2, 0.4, 0.6, 0.8]
shortcut_configs = [2, 2.5]
rows = []
batch = 64
for p in shortcut_probs:
    for shortcut in shortcut_configs:
        for i in range(num_trials):
            exp_name = f'shortcut_sc{shortcut}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'shortcut_config': shortcut}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 1,
                'num_processes': 1,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_sc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,shortcut_config,name,batch_size
0,0.2,3,2.0,shortcut_sc2p0.2_t0,64
1,0.2,3,2.0,shortcut_sc2p0.2_t1,64
2,0.2,3,2.0,shortcut_sc2p0.2_t2,64
3,0.2,3,2.5,shortcut_sc2.5p0.2_t0,64
4,0.2,3,2.5,shortcut_sc2.5p0.2_t1,64
5,0.2,3,2.5,shortcut_sc2.5p0.2_t2,64
6,0.4,3,2.0,shortcut_sc2p0.4_t0,64
7,0.4,3,2.0,shortcut_sc2p0.4_t1,64
8,0.4,3,2.0,shortcut_sc2p0.4_t2,64
9,0.4,3,2.5,shortcut_sc2.5p0.4_t0,64
