In [1]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

from pathlib import Path
import shutil

# Collection of all settings to run auxiliary tasks that we set up
aux_task_configs = {
    'none': {
        'auxiliary_tasks': [], #Inside env_kwargs
        'auxiliary_task_args': [], #Inside env_kwargs
        'auxiliary_heads': [], #Inside nn_base_kwargs
        'auxiliary_truth_sizes': [], #Inside config (parseargs)
        'aux_wrapper_kwargs': {}, #Inside config (parserags)
        'rew_structure': 'goal' #Inside env_kwargs
    },
    
    'rewexplore': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'explore'
    },
    
    'rewdist': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'dist'
    },
    
    'wall0': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall1': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall01': {
        'auxiliary_tasks': [2, 2],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 0, 1], [-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'goaldist': {
        'auxiliary_tasks': [3],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'terminal': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {'auxiliary_tasks': [0], 
                               'auxiliary_task_args': [None]},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall01': {
        'auxiliary_tasks': [5, 5],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 1, 2], [-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catfacewall': {
        'auxiliary_tasks': [4],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catquad': {
        'auxiliary_tasks': [6],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
}

# MWM North Poster Baseline

These were the experiments by which others were compared. Specifically batch size 16 and 32

In [None]:
batch_sizes = [16, 32, 64]
num_trials = 3
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pdistal_batch{batch}auxnone_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1}

        aux_heads = auxiliary_heads[n]
        aux_kwargs = aux_wrapper_kwargs[n]

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['layer_width'] = width
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': width,
                               'auxiliary_heads': aux_heads},
            'aux_wrapper_kwargs': aux_kwargs
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

# Testing one-hot observation encodings and retesting exploration MWM variation

## One-hot north poster

In [4]:
batch_sizes = [16, 32, 64]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pnorth_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 16},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,poster,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t0,16
1,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t1,16
2,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t2,16
3,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t3,16
4,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t4,16
5,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t0,32
6,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t1,32
7,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t2,32
8,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t3,32
9,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t4,32


### No vecnormalize

In [12]:
batch_sizes = [16, 32, 64]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pnorth_batch{batch}onehotnovec_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 16},
            'normalize_env': False
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,poster,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t0,16
1,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t1,16
2,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t2,16
3,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t3,16
4,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehotnovec_t4,16
5,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t0,32
6,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t1,32
7,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t2,32
8,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t3,32
9,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehotnovec_t4,32


In [11]:
import sys
sys.path.append('../')
from scheduler import convert_config_to_command

convert_config_to_command('nav_pnorth_batch64onehotnovec_t0', config_folder='../experiment_configs/')

'python main.py --wandb-project-name Nav_Shared_Layers --exp-name nav_pnorth_batch64onehotnovec_t0 --save-name nav_pnorth_batch64onehotnovec_t0 --num-env-steps 3000000 --env-name NavEnv-v0 --algo ppo --num-mini-batch 1 --num-processes 1 --num-steps 64 --checkpoint-interval 10 --recurrent --seed 0 --no-cuda --env-kwargs num_objects=0 rew_structure=goal task_structure=2 wall_colors=1 num_rays=12 fov=1 poster=1 character_reset_pos=1 one_hot_obs=True --nn-base FlexBase --nn-base-kwargs hidden_size=16 --normalize-env False --config-file-name nav_pnorth_batch64onehotnovec_t0 '

# Explore env

In [5]:
batch_sizes = [16, 64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 1,
            'num_processes': 1,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t0,16
1,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t1,16
2,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t2,16
3,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t3,16
4,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t4,16
5,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t0,64
6,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t1,64
7,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t2,64
8,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t3,64
9,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t4,64


## Tweaking some environment parameters

It appears from looking at trajectories that the agent can in fact explore, locate, and head back to the goal. So going to increase turn speed and move speed and number of steps per episode to encourage this even more

In [24]:
batch_sizes = [64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 15, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t0,64
1,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t1,64
2,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t2,64
3,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t3,64
4,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster_t4,64
5,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t0,128
6,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t1,128
7,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t2,128
8,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t3,128
9,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster_t4,128


In [25]:
batch_sizes = [64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster20m_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 15, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 20000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t0,64
1,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t1,64
2,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t2,64
3,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t3,64
4,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch64faster20m_t4,64
5,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t0,128
6,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t1,128
7,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t2,128
8,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t3,128
9,0,goal,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20m_t4,128


In [3]:
batch_sizes = [128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}faster20mrewexplore_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explore',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 20000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t0,128
1,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t1,128
2,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t2,128
3,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t3,128
4,0,explore,3,4,12,1,1,True,0.5,15,500,nav_4wallexplore_batch128faster20mrewexplore_t4,128


In [3]:
batch_sizes = [128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}explorepunish_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'checkpoint_dir': '4wallexplore',
            'save_dir': '4wallexplore',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,name,batch_size
0,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t0,128
1,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t1,128
2,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t2,128
3,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t3,128
4,0,explorepunish,3,4,12,1,1,True,0.5,10,500,nav_4wallexplore_batch128explorepunish_t4,128


In [6]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4we_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t0,128
1,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t1,128
2,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_ep0.01_t2,128
3,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t0,128
4,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t1,128
5,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_ep0.02_t2,128
6,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t0,128
7,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t1,128
8,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_ep0.05_t2,128
9,0,explorepunish,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wallexplore_ep0.1_t0,128


In [3]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_2ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish2',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4we_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t0,128
1,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t1,128
2,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wallexplore_2ep0.01_t2,128
3,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t0,128
4,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t1,128
5,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wallexplore_2ep0.02_t2,128
6,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t0,128
7,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t1,128
8,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wallexplore_2ep0.05_t2,128
9,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wallexplore_2ep0.1_t0,128


In [6]:
sub_reward_sizes = [0.01, 0.02, 0.05, 0.1]
num_trials = 3

batch = 128
rows = []
for sub_reward in sub_reward_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wefc_2ep{sub_reward}_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish2',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True,
                      'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                      'sub_goal_reward': sub_reward}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            
            'checkpoint_interval': 10,
            'save_dir': '4wefc_punish',
            
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            
            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,name,batch_size
0,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t0,128
1,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t1,128
2,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.01,nav_4wefc_2ep0.01_t2,128
3,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t0,128
4,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t1,128
5,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.02,nav_4wefc_2ep0.02_t2,128
6,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t0,128
7,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t1,128
8,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.05,nav_4wefc_2ep0.05_t2,128
9,0,explorepunish2,3,4,12,1,1,True,0.5,10,500,0.1,nav_4wefc_2ep0.1_t0,128


In [2]:
mult = 2
punish_arg = 5
sub_reward = 0.04



num_trials = 3

batch = 128
rows = []
for punish_arg in punish_args:
    for mult in mults:
        
        for sub_reward in sub_reward_sizes:
            for i in range(num_trials):
                exp_name = f'nav_4wefc_ep{sub_reward}mult{mult}arg{punish_arg}_t{i}'

                env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish1_explorebonus',
                              'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                              'character_reset_pos': 1,  'one_hot_obs': True,
                              'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                              'sub_goal_reward': sub_reward, 'bonus_multiplier': mult,
                              'explore_punish_arg': punish_arg}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 5000000,
                    'env_name': 'NavEnv-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': '4wefc_punish',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,bonus_multiplier,explore_punish_arg,name,batch_size
0,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t0,128
1,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t1,128
2,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t2,128
3,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t0,128
4,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t1,128
5,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t2,128
6,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t0,128
7,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t1,128
8,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t2,128
9,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,5,3,nav_4wefc_ep0.01mult5arg3_t0,128


In [2]:
sub_reward_sizes = [0.01, 0.02, 0.04]
punish_args = [3, 5, 7]
mults = [2, 5]
num_trials = 3

batch = 128
rows = []
for punish_arg in punish_args:
    for mult in mults:
        
        for sub_reward in sub_reward_sizes:
            for i in range(num_trials):
                exp_name = f'nav_4wefc_ep{sub_reward}mult{mult}arg{punish_arg}_t{i}'

                env_kwargs = {'num_objects': 0, 'rew_structure': 'explorepunish1_explorebonus',
                              'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                              'character_reset_pos': 1,  'one_hot_obs': True,
                              'turn_speed': 0.5, 'move_speed': 10, 'max_steps': 500,
                              'sub_goal_reward': sub_reward, 'bonus_multiplier': mult,
                              'explore_punish_arg': punish_arg}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 5000000,
                    'env_name': 'NavEnv-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': '4wefc_punish',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,turn_speed,move_speed,max_steps,sub_goal_reward,bonus_multiplier,explore_punish_arg,name,batch_size
0,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t0,128
1,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t1,128
2,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,2,3,nav_4wefc_ep0.01mult2arg3_t2,128
3,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t0,128
4,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t1,128
5,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.02,2,3,nav_4wefc_ep0.02mult2arg3_t2,128
6,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t0,128
7,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t1,128
8,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.04,2,3,nav_4wefc_ep0.04mult2arg3_t2,128
9,0,explorepunish1_explorebonus,3,4,12,1,1,True,0.5,10,500,0.01,5,3,nav_4wefc_ep0.01mult5arg3_t0,128


# Giving hidden location information

In [7]:
num_trials = 3

batch = 128
rows = []
eps = [0.01, 0.02]
for ep in eps:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_punishbonusep{ep}_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': ep, 'bonus_multiplier': 5,
                      'explore_punish_arg': 5, 'obs_set': 3, }

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t0,128
1,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t1,128
2,explorepunish1_explorebonus,0.01,5,5,3,expl_goalpos_punishbonusep0.01_t2,128
3,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t0,128
4,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t1,128
5,explorepunish1_explorebonus,0.02,5,5,3,expl_goalpos_punishbonusep0.02_t2,128


In [9]:
num_trials = 3

batch = 128
rows = []
eps = [0.01, 0.02]
goal_sizes = [30, 40, 50]
for goal in goal_sizes:
    for ep in eps:
        for i in range(num_trials):
            exp_name = f'expl_goalpos_punishbonusep{ep}goal{goal}_t{i}'

            env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                          'sub_goal_reward': ep, 'bonus_multiplier': 5,
                          'explore_punish_arg': 5, 'obs_set': 3, 'goal_size': goal}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ExploreNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'explorenav_pos',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,goal_size,name,batch_size
0,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t0,128
1,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t1,128
2,explorepunish1_explorebonus,0.01,5,5,3,30,expl_goalpos_punishbonusep0.01goal30_t2,128
3,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t0,128
4,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t1,128
5,explorepunish1_explorebonus,0.02,5,5,3,30,expl_goalpos_punishbonusep0.02goal30_t2,128
6,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t0,128
7,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t1,128
8,explorepunish1_explorebonus,0.01,5,5,3,40,expl_goalpos_punishbonusep0.01goal40_t2,128
9,explorepunish1_explorebonus,0.02,5,5,3,40,expl_goalpos_punishbonusep0.02goal40_t0,128


# Hidden location information and network sizes

In [5]:
num_trials = 3

batch = 128
rows = []
obs_sets = [2, 3, 4, 5]
network_sizes = [64, 128, 256]

for obs in obs_sets:
    for hidden in network_sizes:
        for i in range(num_trials):
            exp_name = f'expl_goalpos_obs{obs}hidden{hidden}_t{i}'

            env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                          'sub_goal_reward': 0.04, 'bonus_multiplier': 5,
                          'explore_punish_arg': 5, 'obs_set': obs}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ExploreNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'explorenav_pos',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': hidden},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t0,128
1,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t1,128
2,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden64_t2,128
3,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t0,128
4,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t1,128
5,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden128_t2,128
6,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t0,128
7,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t1,128
8,explorepunish1_explorebonus,0.04,5,5,2,expl_goalpos_obs2hidden256_t2,128
9,explorepunish1_explorebonus,0.04,5,5,3,expl_goalpos_obs3hidden64_t0,128


## Longer training - only obs set 3

In [7]:
num_trials = 3

batch = 256
rows = []
network_sizes = [64, 128, 256]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden128longer_t2,256
6,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t0,256
7,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t1,256
8,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden256longer_t2,256


In [2]:
num_trials = 3

batch = 128
rows = []
network_sizes = [64, 128, 256]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_batch128obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t0,128
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t1,128
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden64longer_t2,128
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t0,128
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t1,128
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden128longer_t2,128
6,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t0,128
7,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t1,128
8,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_batch128obs3hidden256longer_t2,128


In [30]:
num_trials = 3

batch = 128
rows = []
network_sizes = [16, 32, 64]
obs = 2

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_batch128obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t0,128
1,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t1,128
2,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden16longer_t2,128
3,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t0,128
4,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t1,128
5,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden32longer_t2,128
6,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t0,128
7,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t1,128
8,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_batch128obs2hidden64longer_t2,128


## Smaller networks - obs set 3

In [2]:
num_trials = 3

batch = 256
rows = []
network_sizes = [16, 32]
obs = 3

for hidden in network_sizes:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden16longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden32longer_t2,256


## Hidden 64, obs set 1/2

In [3]:
num_trials = 3

batch = 256
rows = []
obs_sets = [1, 2]
hidden = 64

for obs in obs_sets:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_obs1hidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,2,expl_goalpos_obs2hidden64longer_t2,256


In [14]:
num_trials = 3

batch = 256
rows = []
obs_sets = ['2b', '2c']
hidden = 64

for obs in obs_sets:
    for i in range(num_trials):
        exp_name = f'expl_goalpos_obs{obs}hidden{hidden}longer_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': obs}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,2b,expl_goalpos_obs2bhidden64longer_t2,256
3,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t0,256
4,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t1,256
5,explorepunish1_explorebonus,0.04,1,5,2c,expl_goalpos_obs2chidden64longer_t2,256


### Give goal pos for number of steps

In [4]:
num_trials = 3

batch = 256
rows = []
num_steps = [2, 5, 10, 25, 50]
hidden = 64

for num in num_steps:
    for i in range(num_trials):
        exp_name = f'expl_givegoal_num{num}_t{i}'

        env_kwargs = {'rew_structure': 'explorepunish1',
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': '2d', 
                      'obs_set_arg': num}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t0,256
1,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t1,256
2,explorepunish1,0.04,1,5,2d,2,expl_givegoal_num2_t2,256
3,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t0,256
4,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t1,256
5,explorepunish1,0.04,1,5,2d,5,expl_givegoal_num5_t2,256
6,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t0,256
7,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t1,256
8,explorepunish1,0.04,1,5,2d,10,expl_givegoal_num10_t2,256
9,explorepunish1,0.04,1,5,2d,25,expl_givegoal_num25_t0,256


#### With step sched

In [14]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 1.5e7, 2e7]
schedule = [
    [500, 100, 50, 25, 10],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched1_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 25000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 25, 10], [0, 5000000.0, 100000...",expl_givegoal_sched1_t2,256


In [3]:
for t in range(3):
    copy = Path(f'../saved_checkpoints/explorenav_pos/expl_givegoal_sched2cont_t{t}/770.pt')
    paste = Path(f'../saved_models/explorenav_pos/expl_givegoal_sched2cont_t{t}_770.pt')
    shutil.copy(copy, paste)

In [18]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 2e7, 3e7, 4e7]
schedule = [
    [500, 100, 50, 30, 20, 10],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched2cont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 50000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched1_t{i}_580.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30, 20, 10], [0, 5000000.0, 10...",expl_givegoal_sched2cont_t2,256


In [4]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 2e7]
schedule = [
    [500, 100, 50, 30],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched2bcont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 50000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched2cont_t{i}_770.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 50, 30], [0, 5000000.0, 10000000.0...",expl_givegoal_sched2bcont_t2,256


#### Sched 3

In [27]:
num_trials = 3

batch = 256
rows = []

step_schedule = list(np.append([0, 5e6], 1e7 + np.arange(70)*2e6))
val_schedule = list(np.append([500, 100], 99 - np.arange(70)))

schedule = [
    val_schedule,
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched3_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': int(step_schedule[-1]),
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
                
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3_t2,256


#### Sched 3 cont

In [31]:
num_trials = 3

batch = 256
rows = []

step_schedule = list(np.append([0, 5e6], 1e7 + np.arange(70)*2e6))
val_schedule = list(np.append([500, 100], 99 - np.arange(70)))

schedule = [
    val_schedule,
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched3cont_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': int(step_schedule[-1]),
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'cont': True,
        'cont_file_name': f'saved_models/explorenav_pos/expl_givegoal_sched3_t{i}.pt',
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,obs_set_arg,name,batch_size
0,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t0,256
1,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t1,256
2,explorepunish1,0.04,1,5,2e,"[[500, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91...",expl_givegoal_sched3cont_t2,256


## Hidden 64, last action given

In [5]:
num_trials = 3

batch = 256
rows = []
hidden = 64
obs = 1
for i in range(num_trials):
    exp_name = f'expl_goalpos_lastacthidden{hidden}longer_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1_explorebonus',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': obs}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 25000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t0,256
1,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t1,256
2,explorepunish1_explorebonus,0.04,1,5,1,expl_goalpos_lastacthidden64longer_t2,256


## Obs set 3, reward shaping options

In [13]:
num_trials = 3

batch = 256
rows = []
shaping_names = ['bonus', 'punish', 'noshaping', 'bonuspunish']
rew_structures = ['explorebonus', 'explorepunish1', 'goal', 'explorepunish1_explorebonus']
hidden = 64

for n in range(len(shaping_names)):
    for i in range(num_trials):
        shape_name = shaping_names[n]
        rew_struct = rew_structures[n]
        
        exp_name = f'expl_goalpos_obs3hidden64{shape_name}longer_t{i}'

        env_kwargs = {'rew_structure': rew_struct,
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': 3}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t0,256
1,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t1,256
2,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t2,256
3,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t0,256
4,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t1,256
5,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t2,256
6,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t0,256
7,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t1,256
8,goal,0.04,1,5,3,expl_goalpos_obs3hidden64noshapinglonger_t2,256
9,explorepunish1_explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuspunishlonger_t0,256


In [10]:
num_trials = 3

batch = 256
rows = []
hidden = 64

for n in range(len(shaping_names)):
    for i in range(num_trials):
        shape_name = shaping_names[n]
        rew_struct = rew_structures[n]
        
        exp_name = f'expl_goalpos_obs3hidden64{shape_name}longer_t{i}'

        env_kwargs = {'rew_structure': rew_struct,
                      'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                      'explore_punish_arg': 5, 'obs_set': 3}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 25000000,
            'env_name': 'ExploreNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 1,
            'num_processes': 1,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'explorenav_pos',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': hidden},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,rew_structure,sub_goal_reward,bonus_multiplier,explore_punish_arg,obs_set,name,batch_size
0,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t0,256
1,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t1,256
2,explorebonus,0.04,1,5,3,expl_goalpos_obs3hidden64bonuslonger_t2,256
3,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t0,256
4,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t1,256
5,explorepunish1,0.04,1,5,3,expl_goalpos_obs3hidden64punishlonger_t2,256


# Shortcut Nav

In [4]:
num_trials = 3
batch_sizes = [32, 64, 128]
rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_shortcutprob0batch{batch}_t{i}'

        env_kwargs = {'shortcut_probability': 0}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'checkpoint_dir': 'shortcuts',
            'save_dir': 'shortcuts',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'FlexBaseAux',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

    pd.DataFrame(rows)

In [9]:
num_trials = 3
batch_sizes = [32, 64, 128]
shortcut_probs = [0.1, 0.2, 0.4]
rows = []
for shortcut in shortcut_probs:
    for batch in batch_sizes:
        for i in range(num_trials):
            exp_name = f'shortcutnav_shortcutprob{shortcut}batch{batch}_t{i}'

            env_kwargs = {'shortcut_probability': shortcut}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'checkpoint_dir': 'shortcuts',
                'save_dir': 'shortcuts',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'FlexBaseAux',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,name,batch_size
0,0.1,shortcutnav_shortcutprob0.1batch32_t0,32
1,0.1,shortcutnav_shortcutprob0.1batch32_t1,32
2,0.1,shortcutnav_shortcutprob0.1batch32_t2,32
3,0.1,shortcutnav_shortcutprob0.1batch64_t0,64
4,0.1,shortcutnav_shortcutprob0.1batch64_t1,64
5,0.1,shortcutnav_shortcutprob0.1batch64_t2,64
6,0.1,shortcutnav_shortcutprob0.1batch128_t0,128
7,0.1,shortcutnav_shortcutprob0.1batch128_t1,128
8,0.1,shortcutnav_shortcutprob0.1batch128_t2,128
9,0.2,shortcutnav_shortcutprob0.2batch32_t0,32


In [13]:
num_trials = 3
character_reset_poss = [1, 2, 3]
shortcut_probs = [0.1, 0.2, 0.4]
batch_sizes = [32, 64]
rows = []
for batch in batch_sizes:
    for shortcut in shortcut_probs:
        for char_reset in character_reset_poss:
            for i in range(num_trials):
                exp_name = f'shortcutnav_p{shortcut}reset{char_reset}batch{batch}_t{i}'

                env_kwargs = {'shortcut_probability': shortcut,
                              'character_reset_pos': char_reset}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': 'shortcut_resets',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'FlexBaseAux',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,1,shortcutnav_p0.1reset1batch32_t0,32
1,0.1,1,shortcutnav_p0.1reset1batch32_t1,32
2,0.1,1,shortcutnav_p0.1reset1batch32_t2,32
3,0.1,2,shortcutnav_p0.1reset2batch32_t0,32
4,0.1,2,shortcutnav_p0.1reset2batch32_t1,32
5,0.1,2,shortcutnav_p0.1reset2batch32_t2,32
6,0.1,3,shortcutnav_p0.1reset3batch32_t0,32
7,0.1,3,shortcutnav_p0.1reset3batch32_t1,32
8,0.1,3,shortcutnav_p0.1reset3batch32_t2,32
9,0.2,1,shortcutnav_p0.2reset1batch32_t0,32


In [14]:
num_trials = 3
character_reset_poss = [1, 2, 3]
shortcut_probs = [0.1, 0.2, 0.4]
batch_sizes = [32, 64]
rows = []
for batch in batch_sizes:
    for shortcut in shortcut_probs:
        for char_reset in character_reset_poss:
            for i in range(num_trials):
                exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}_t{i}'

                env_kwargs = {'shortcut_probability': shortcut,
                              'character_reset_pos': char_reset}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': 'shortcut_resets',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,1,shortcutnav_fcp0.1reset1batch32_t0,32
1,0.1,1,shortcutnav_fcp0.1reset1batch32_t1,32
2,0.1,1,shortcutnav_fcp0.1reset1batch32_t2,32
3,0.1,2,shortcutnav_fcp0.1reset2batch32_t0,32
4,0.1,2,shortcutnav_fcp0.1reset2batch32_t1,32
5,0.1,2,shortcutnav_fcp0.1reset2batch32_t2,32
6,0.1,3,shortcutnav_fcp0.1reset3batch32_t0,32
7,0.1,3,shortcutnav_fcp0.1reset3batch32_t1,32
8,0.1,3,shortcutnav_fcp0.1reset3batch32_t2,32
9,0.2,1,shortcutnav_fcp0.2reset1batch32_t0,32


## Longer training

In [24]:
num_trials = 3
batch_sizes = [32, 64]

char_reset = 3
shortcut = 0.4

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}longer_t{i}'

        env_kwargs = {'shortcut_probability': shortcut,
                      'character_reset_pos': char_reset}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 15000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_resets',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t0,32
1,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t1,32
2,0.4,3,shortcutnav_fcp0.4reset3batch32longer_t2,32
3,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t0,64
4,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t1,64
5,0.4,3,shortcutnav_fcp0.4reset3batch64longer_t2,64


In [32]:
num_trials = 3
batch_sizes = [32, 64]

char_reset = 3
shortcut = 0.1

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'shortcutnav_fcp{shortcut}reset{char_reset}batch{batch}longer_t{i}'

        env_kwargs = {'shortcut_probability': shortcut,
                      'character_reset_pos': char_reset}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 15000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_resets',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,name,batch_size
0,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t0,32
1,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t1,32
2,0.1,3,shortcutnav_fcp0.1reset3batch32longer_t2,32
3,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t0,64
4,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t1,64
5,0.1,3,shortcutnav_fcp0.1reset3batch64longer_t2,64


## Wall colors


In [12]:
num_trials = 3
shortcut_probs = [0.1, 0.2, 0.4]
wall_colors = [1, 1.5]
rows = []
batch = 64
for p in shortcut_probs:
    for wc in wall_colors:
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.0,shortcut_wc1p0.1_t0,64
1,0.1,3,1.0,shortcut_wc1p0.1_t1,64
2,0.1,3,1.0,shortcut_wc1p0.1_t2,64
3,0.1,3,1.5,shortcut_wc1.5p0.1_t0,64
4,0.1,3,1.5,shortcut_wc1.5p0.1_t1,64
5,0.1,3,1.5,shortcut_wc1.5p0.1_t2,64
6,0.2,3,1.0,shortcut_wc1p0.2_t0,64
7,0.2,3,1.0,shortcut_wc1p0.2_t1,64
8,0.2,3,1.0,shortcut_wc1p0.2_t2,64
9,0.2,3,1.5,shortcut_wc1.5p0.2_t0,64


### Non-normalized env 0.4 to transfer with

In [17]:
num_trials = 3
p = 0.4
wall_colors = [1.5, 4]
rows = []
batch = 64
for wc in wall_colors:
    for i in range(num_trials):
        exp_name = f'shortcut_nonorm_wc{wc}p{p}_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'normalize': False,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_wc',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4_t0,64
1,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4_t1,64
2,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4_t2,64
3,0.4,3,4.0,shortcut_nonorm_wc4p0.4_t0,64
4,0.4,3,4.0,shortcut_nonorm_wc4p0.4_t1,64
5,0.4,3,4.0,shortcut_nonorm_wc4p0.4_t2,64


#### No norm, longer batch 0.4

In [12]:
num_trials = 3
p = 0.4
wall_colors = [1.5, 4]
rows = []
batch = 256
for wc in wall_colors:
    for i in range(num_trials):
        exp_name = f'shortcut_nonorm_wc{wc}p{p}batch256_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'normalize': False,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_wc',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4batch256_t0,256
1,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4batch256_t1,256
2,0.4,3,1.5,shortcut_nonorm_wc1.5p0.4batch256_t2,256
3,0.4,3,4.0,shortcut_nonorm_wc4p0.4batch256_t0,256
4,0.4,3,4.0,shortcut_nonorm_wc4p0.4batch256_t1,256
5,0.4,3,4.0,shortcut_nonorm_wc4p0.4batch256_t2,256


#### No norm, batch 256, p 0.1

In [13]:
num_trials = 3
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256
for wc in wall_colors:
    for i in range(num_trials):
        exp_name = f'shortcut_nonorm_wc{wc}p{p}batch256_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'normalize': False,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_wc',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t0,256
1,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t1,256
2,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t2,256
3,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t0,256
4,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t1,256
5,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t2,256


#### No norm, batch 256, p 0.1, 30m steps

In [3]:
num_trials = 3
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256
for wc in wall_colors:
    for i in range(num_trials):
        exp_name = f'shortcut_nonorm_wc{wc}p{p}batch256longer_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 30000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'normalize': False,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_wc',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256longer_t0,256
1,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256longer_t1,256
2,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256longer_t2,256
3,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256longer_t0,256
4,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256longer_t1,256
5,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256longer_t2,256


### Clone experiments on non-normalized 0.4

In [11]:
num_trials = 3
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256

clone_sets = ['closhared', 'clofrshared',  'clocritic', 'clofrcritic']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'freeze_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True},
    {'clone_layers': 'gru,shared0,critic0,critic1',
     'freeze_layers': 'gru,shared0,critic0,critic1',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0,critic0,critic1',
     'copy_obs_rms': True,
     'freeze_obs_rms': True}
]
clone_path_formatter = 'saved_models/shortcut_wc/shortcut_nonorm_wc{wc}p0.4batch256_t{t}.pt'
for j in range(len(clone_sets)):
    for wc in wall_colors:
        for i in range(num_trials):
            clone_path = clone_path_formatter.format(wc=wc, t=i)
            clone_param = clone_params[j]
            clone_param['clone_path'] = clone_path
            
            exp_name = f'shortcut_nonorm{clone_sets[j]}_wc{wc}p{p}batch{batch}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['clone_target'] = clone_path

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_clonewc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
                
                'clone_parameter_experiment': True,
                'clone_args': clone_param
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,clone_target
0,0.1,3,1.5,shortcut_nonormcloshared_wc1.5p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
1,0.1,3,1.5,shortcut_nonormcloshared_wc1.5p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
2,0.1,3,1.5,shortcut_nonormcloshared_wc1.5p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
3,0.1,3,4.0,shortcut_nonormcloshared_wc4p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
4,0.1,3,4.0,shortcut_nonormcloshared_wc4p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
5,0.1,3,4.0,shortcut_nonormcloshared_wc4p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
6,0.1,3,1.5,shortcut_nonormclofrshared_wc1.5p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
7,0.1,3,1.5,shortcut_nonormclofrshared_wc1.5p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
8,0.1,3,1.5,shortcut_nonormclofrshared_wc1.5p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
9,0.1,3,4.0,shortcut_nonormclofrshared_wc4p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...


#### Clone actor side

In [16]:
num_trials = 3
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256

clone_sets = ['cloactor0', 'clofractor0',  'cloactor1', 'clofractor1']
clone_params = [
    {'clone_layers': 'gru,shared0,actor0',
     'freeze_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True},
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'freeze_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': True}
]
clone_path_formatter = 'saved_models/shortcut_wc/shortcut_nonorm_wc{wc}p0.4batch256_t{t}.pt'
for j in range(len(clone_sets)):
    for wc in wall_colors:
        for i in range(num_trials):
            clone_path = clone_path_formatter.format(wc=wc, t=i)
            clone_param = clone_params[j]
            clone_param['clone_path'] = clone_path
            
            exp_name = f'shortcut_nonorm{clone_sets[j]}_wc{wc}p{p}batch{batch}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['clone_target'] = clone_path

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_clonewc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
                
                'clone_parameter_experiment': True,
                'clone_args': clone_param
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,clone_target
0,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
1,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
2,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
3,0.1,3,4.0,shortcut_nonormcloactor0_wc4p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
4,0.1,3,4.0,shortcut_nonormcloactor0_wc4p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
5,0.1,3,4.0,shortcut_nonormcloactor0_wc4p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...
6,0.1,3,1.5,shortcut_nonormclofractor0_wc1.5p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
7,0.1,3,1.5,shortcut_nonormclofractor0_wc1.5p0.1batch256_t1,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
8,0.1,3,1.5,shortcut_nonormclofractor0_wc1.5p0.1batch256_t2,256,saved_models/shortcut_wc/shortcut_nonorm_wc1.5...
9,0.1,3,4.0,shortcut_nonormclofractor0_wc4p0.1batch256_t0,256,saved_models/shortcut_wc/shortcut_nonorm_wc4p0...


### Clone experiments

In [12]:
num_trials = 3
rows = []
batch = 64
p = 0.1
wc = 1.5

clone_sets = ['clone_and_freeze', 'clone']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'freeze_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True}
]
clone_path_formatter = 'saved_models/shortcut_wc/shortcut_wc1p0.4_t{}.pt'
for j in range(len(clone_sets)):
    for i in range(num_trials):
        clone_path = clone_path_formatter.format(i)
        clone_param = clone_params[j]
        clone_param['clone_path'] = clone_path
        
        exp_name = f'shortcut_wc{wc}p{p}{clone_sets[j]}_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_clonewc',

            'recurrent': True,
            'seed': i+100,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
            
            'clone_parameter_experiment': True,
            'clone_args': clone_param
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.5,shortcut_wc1.5p0.1clone_and_freeze_t0,64
1,0.1,3,1.5,shortcut_wc1.5p0.1clone_and_freeze_t1,64
2,0.1,3,1.5,shortcut_wc1.5p0.1clone_and_freeze_t2,64
3,0.1,3,1.5,shortcut_wc1.5p0.1clone_t0,64
4,0.1,3,1.5,shortcut_wc1.5p0.1clone_t1,64
5,0.1,3,1.5,shortcut_wc1.5p0.1clone_t2,64


In [14]:
num_trials = 3
rows = []
batch = 64
p = 0.1
wc = 4

clone_sets = ['clone_and_freeze', 'clone']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'freeze_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True}
]
clone_path_formatter = 'saved_models/shortcut_resets/shortcutnav_fcp0.4reset3batch64_t{}.pt'
for j in range(len(clone_sets)):
    for i in range(num_trials):
        clone_path = clone_path_formatter.format(i)
        clone_param = clone_params[j]
        clone_param['clone_path'] = clone_path
        
        exp_name = f'shortcut_wc{wc}p{p}{clone_sets[j]}_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_clonewc',

            'recurrent': True,
            'seed': i+100,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
            
            'clone_parameter_experiment': True,
            'clone_args': clone_param
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,4,shortcut_wc4p0.1clone_and_freeze_t0,64
1,0.1,3,4,shortcut_wc4p0.1clone_and_freeze_t1,64
2,0.1,3,4,shortcut_wc4p0.1clone_and_freeze_t2,64
3,0.1,3,4,shortcut_wc4p0.1clone_t0,64
4,0.1,3,4,shortcut_wc4p0.1clone_t1,64
5,0.1,3,4,shortcut_wc4p0.1clone_t2,64


## Additional trials of p=0.4 and p=0.1; differing seeds

In [7]:
num_trials = 10
shortcut_probs = [0.1, 0.4]
wall_colors = [1.5, 4]
rows = []
batch = 64
for m, p in enumerate(shortcut_probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = i+(n+1)*100+(m+1)*1000

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc2',

                'recurrent': True,
                'seed': i+(n+1)*100+(m+1)*1000,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.1,3,1.5,shortcut_wc1.5p0.1_t0,64,1100
1,0.1,3,1.5,shortcut_wc1.5p0.1_t1,64,1101
2,0.1,3,1.5,shortcut_wc1.5p0.1_t2,64,1102
3,0.1,3,1.5,shortcut_wc1.5p0.1_t3,64,1103
4,0.1,3,1.5,shortcut_wc1.5p0.1_t4,64,1104
5,0.1,3,1.5,shortcut_wc1.5p0.1_t5,64,1105
6,0.1,3,1.5,shortcut_wc1.5p0.1_t6,64,1106
7,0.1,3,1.5,shortcut_wc1.5p0.1_t7,64,1107
8,0.1,3,1.5,shortcut_wc1.5p0.1_t8,64,1108
9,0.1,3,1.5,shortcut_wc1.5p0.1_t9,64,1109


## wc1.5, p=0.2, 0.3

In [4]:
num_trials = 10
shortcut_probs = [0.2, 0.3]
wall_colors = [1.5]
rows = []
batch = 64
for m, p in enumerate(shortcut_probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = i+(n+5)*100+(m+1)*1000

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc2',

                'recurrent': True,
                'seed': i+(n+1)*100+(m+1)*1000,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.2,3,1.5,shortcut_wc1.5p0.2_t0,64,1500
1,0.2,3,1.5,shortcut_wc1.5p0.2_t1,64,1501
2,0.2,3,1.5,shortcut_wc1.5p0.2_t2,64,1502
3,0.2,3,1.5,shortcut_wc1.5p0.2_t3,64,1503
4,0.2,3,1.5,shortcut_wc1.5p0.2_t4,64,1504
5,0.2,3,1.5,shortcut_wc1.5p0.2_t5,64,1505
6,0.2,3,1.5,shortcut_wc1.5p0.2_t6,64,1506
7,0.2,3,1.5,shortcut_wc1.5p0.2_t7,64,1507
8,0.2,3,1.5,shortcut_wc1.5p0.2_t8,64,1508
9,0.2,3,1.5,shortcut_wc1.5p0.2_t9,64,1509


## Additional trials of nonorm cloning experiments

In [12]:
num_trials = 10
wall_colors = [1.5, 4]
probs = [0.1, 0.4]
rows = []
batch = 256
for m, p in enumerate(probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_nonorm_wc{wc}p{p}batch256_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            seed = i + 10 + (n+1)*100 + (m+1)*1000

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = seed

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 10000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_clonewc2',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t0,256,1110
1,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t1,256,1111
2,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t2,256,1112
3,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t3,256,1113
4,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t4,256,1114
5,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t5,256,1115
6,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t6,256,1116
7,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t7,256,1117
8,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t8,256,1118
9,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t9,256,1119


### Clone up to actor

In [14]:
num_trials = 10
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256

clone_sets = ['cloactor0', 'clofractor0']
clone_params = [
    {'clone_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True},
    {'clone_layers': 'gru,shared0,actor0',
     'freeze_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': True,
     },
]
clone_path_formatter = 'saved_models/shortcut_clonewc2/shortcut_nonorm_wc{wc}p0.4batch256_t{t}.pt'
for j in range(len(clone_sets)):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            clone_path = clone_path_formatter.format(wc=wc, t=i)
            clone_param = clone_params[j]
            clone_param['clone_path'] = clone_path
            
            exp_name = f'shortcut_nonorm{clone_sets[j]}_wc{wc}p{p}batch{batch}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}
            seed = i + 100*(n+1) + 200 + (j+1)*1000
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['clone_target'] = clone_path
            row['seed'] = seed

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_clonewc2',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
                
                'clone_parameter_experiment': True,
                'clone_args': clone_param
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,clone_target,seed
0,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t0,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1300
1,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t1,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1301
2,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t2,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1302
3,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t3,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1303
4,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t4,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1304
5,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t5,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1305
6,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t6,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1306
7,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t7,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1307
8,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t8,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1308
9,0.1,3,1.5,shortcut_nonormcloactor0_wc1.5p0.1batch256_t9,256,saved_models/shortcut_clonewc2/shortcut_nonorm...,1309


#### No norm, batch 256, p 0.1

In [13]:
num_trials = 3
p = 0.1
wall_colors = [1.5, 4]
rows = []
batch = 256
for wc in wall_colors:
    for i in range(num_trials):
        exp_name = f'shortcut_nonorm_wc{wc}p{p}batch256_t{i}'

        env_kwargs = {'shortcut_probability': p,
                      'character_reset_pos': 3,
                      'wall_colors': wc}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'ShortcutNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'normalize': False,

            'checkpoint_interval': 10,
            'save_dir': 'shortcut_wc',

            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size
0,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t0,256
1,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t1,256
2,0.1,3,1.5,shortcut_nonorm_wc1.5p0.1batch256_t2,256
3,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t0,256
4,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t1,256
5,0.1,3,4.0,shortcut_nonorm_wc4p0.1batch256_t2,256


## 2 Corridors

In [11]:
num_trials = 3
shortcut_probs = [0.2, 0.4, 0.6, 0.8]
shortcut_configs = [2, 2.5]
rows = []
batch = 64
for p in shortcut_probs:
    for shortcut in shortcut_configs:
        for i in range(num_trials):
            exp_name = f'shortcut_sc{shortcut}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'shortcut_config': shortcut}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 6000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_sc',

                'recurrent': True,
                'seed': i,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,shortcut_config,name,batch_size
0,0.2,3,2.0,shortcut_sc2p0.2_t0,64
1,0.2,3,2.0,shortcut_sc2p0.2_t1,64
2,0.2,3,2.0,shortcut_sc2p0.2_t2,64
3,0.2,3,2.5,shortcut_sc2.5p0.2_t0,64
4,0.2,3,2.5,shortcut_sc2.5p0.2_t1,64
5,0.2,3,2.5,shortcut_sc2.5p0.2_t2,64
6,0.4,3,2.0,shortcut_sc2p0.4_t0,64
7,0.4,3,2.0,shortcut_sc2p0.4_t1,64
8,0.4,3,2.0,shortcut_sc2p0.4_t2,64
9,0.4,3,2.5,shortcut_sc2.5p0.4_t0,64


### 2 corridors with wall colors

In [13]:
num_trials = 3
shortcut_probs = [0.6, 0.8]
shortcut_configs = [2, 2.5]
wall_colors = [1, 1.5]
rows = []
batch = 64
for p in shortcut_probs:
    for shortcut in shortcut_configs:
        for wc in wall_colors:
            for i in range(num_trials):
                exp_name = f'shortcut_sc{shortcut}wc{wc}p{p}_t{i}'

                env_kwargs = {'shortcut_probability': p,
                              'character_reset_pos': 3,
                              'shortcut_config': shortcut,
                              'wall_colors': wc}

                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 10000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'checkpoint_interval': 10,
                    'save_dir': 'shortcut_sc',

                    'recurrent': True,
                    'seed': i,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,shortcut_config,wall_colors,name,batch_size
0,0.6,3,2.0,1.0,shortcut_sc2wc1p0.6_t0,64
1,0.6,3,2.0,1.0,shortcut_sc2wc1p0.6_t1,64
2,0.6,3,2.0,1.0,shortcut_sc2wc1p0.6_t2,64
3,0.6,3,2.0,1.5,shortcut_sc2wc1.5p0.6_t0,64
4,0.6,3,2.0,1.5,shortcut_sc2wc1.5p0.6_t1,64
5,0.6,3,2.0,1.5,shortcut_sc2wc1.5p0.6_t2,64
6,0.6,3,2.5,1.0,shortcut_sc2.5wc1p0.6_t0,64
7,0.6,3,2.5,1.0,shortcut_sc2.5wc1p0.6_t1,64
8,0.6,3,2.5,1.0,shortcut_sc2.5wc1p0.6_t2,64
9,0.6,3,2.5,1.5,shortcut_sc2.5wc1.5p0.6_t0,64


# Shortcut Nav pt2

## Training p=0.1 longer

In [2]:
num_trials = 10
shortcut_probs = [0.1]
wall_colors = [1.5]
rows = []
batch = 64
for m, p in enumerate(shortcut_probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}longer_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = i+(n+1)*100+(m+1)*1000

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 10000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc2',

                'recurrent': True,
                'seed': i+(n+1)*100+(m+1)*1000,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.1,3,1.5,shortcut_wc1.5p0.1longer_t0,64,1100
1,0.1,3,1.5,shortcut_wc1.5p0.1longer_t1,64,1101
2,0.1,3,1.5,shortcut_wc1.5p0.1longer_t2,64,1102
3,0.1,3,1.5,shortcut_wc1.5p0.1longer_t3,64,1103
4,0.1,3,1.5,shortcut_wc1.5p0.1longer_t4,64,1104
5,0.1,3,1.5,shortcut_wc1.5p0.1longer_t5,64,1105
6,0.1,3,1.5,shortcut_wc1.5p0.1longer_t6,64,1106
7,0.1,3,1.5,shortcut_wc1.5p0.1longer_t7,64,1107
8,0.1,3,1.5,shortcut_wc1.5p0.1longer_t8,64,1108
9,0.1,3,1.5,shortcut_wc1.5p0.1longer_t9,64,1109


In [3]:
num_trials = 10
shortcut_probs = [0.4]
wall_colors = [1.5]
rows = []
batch = 64
for m, p in enumerate(shortcut_probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}longer_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = i+(n+1)*200+(m+1)*1000

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 10000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc2',

                'recurrent': True,
                'seed': i+(n+1)*100+(m+1)*1000,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.4,3,1.5,shortcut_wc1.5p0.4longer_t0,64,1200
1,0.4,3,1.5,shortcut_wc1.5p0.4longer_t1,64,1201
2,0.4,3,1.5,shortcut_wc1.5p0.4longer_t2,64,1202
3,0.4,3,1.5,shortcut_wc1.5p0.4longer_t3,64,1203
4,0.4,3,1.5,shortcut_wc1.5p0.4longer_t4,64,1204
5,0.4,3,1.5,shortcut_wc1.5p0.4longer_t5,64,1205
6,0.4,3,1.5,shortcut_wc1.5p0.4longer_t6,64,1206
7,0.4,3,1.5,shortcut_wc1.5p0.4longer_t7,64,1207
8,0.4,3,1.5,shortcut_wc1.5p0.4longer_t8,64,1208
9,0.4,3,1.5,shortcut_wc1.5p0.4longer_t9,64,1209


## Training p=0, p=0.8 agents

In [6]:
num_trials = 10
shortcut_probs = [0., 0.8, 1.0]
wall_colors = [1.5]
rows = []
batch = 64
for m, p in enumerate(shortcut_probs):
    for n, wc in enumerate(wall_colors):
        for i in range(num_trials):
            exp_name = f'shortcut_wc{wc}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': wc}

            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = i+(n+4)*100+(m+1)*1000

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 5000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'checkpoint_interval': 10,
                'save_dir': 'shortcut_wc2',

                'recurrent': True,
                'seed': i+(n+1)*100+(m+1)*1000,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.0,3,1.5,shortcut_wc1.5p0.0_t0,64,1400
1,0.0,3,1.5,shortcut_wc1.5p0.0_t1,64,1401
2,0.0,3,1.5,shortcut_wc1.5p0.0_t2,64,1402
3,0.0,3,1.5,shortcut_wc1.5p0.0_t3,64,1403
4,0.0,3,1.5,shortcut_wc1.5p0.0_t4,64,1404
5,0.0,3,1.5,shortcut_wc1.5p0.0_t5,64,1405
6,0.0,3,1.5,shortcut_wc1.5p0.0_t6,64,1406
7,0.0,3,1.5,shortcut_wc1.5p0.0_t7,64,1407
8,0.0,3,1.5,shortcut_wc1.5p0.0_t8,64,1408
9,0.0,3,1.5,shortcut_wc1.5p0.0_t9,64,1409


# Test step
Make sure that changing total number of steps doesn't change the learning

In [5]:
rows = []
batch = 64
steps = [1000000, 3000000, 5000000]
p = 0.4
wc = 1.5
for step in steps:
    exp_name = f'teststep_step{step}'

    env_kwargs = {'shortcut_probability': p,
                  'character_reset_pos': 3,
                  'wall_colors': wc}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch
    seed = 0
    row['seed'] = seed

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': step,
        'env_name': 'ShortcutNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'teststep',

        'recurrent': True,
        'seed': seed,
        'no_cuda': True,
        'env_kwargs': env_kwargs,

        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': 64},
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,seed
0,0.4,3,1.5,teststep_step1000000,64,0
1,0.4,3,1.5,teststep_step3000000,64,0
2,0.4,3,1.5,teststep_step5000000,64,0


# PlumNav

New environment to test different "raising conditions"

In [4]:
num_trials = 3
task_structures = [1, 1.5, 2, 2.5]
rows = []
batch = 64
for m, task in enumerate(task_structures):
    for i in range(num_trials):
        exp_name = f'plum_task{task}_t{i}'

        env_kwargs = {'task_structure': task}
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch
        seed = i+m*100
        row['seed'] = seed

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 5000000,
            'env_name': 'PlumNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'plum',

            'recurrent': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,task_structure,name,batch_size,seed
0,1.0,plum_task1_t0,64,0
1,1.0,plum_task1_t1,64,1
2,1.0,plum_task1_t2,64,2
3,1.5,plum_task1.5_t0,64,100
4,1.5,plum_task1.5_t1,64,101
5,1.5,plum_task1.5_t2,64,102
6,2.0,plum_task2_t0,64,200
7,2.0,plum_task2_t1,64,201
8,2.0,plum_task2_t2,64,202
9,2.5,plum_task2.5_t0,64,300


In [15]:
num_trials = 3
task_structures = [1, 1.5, 2, 2.5]
rows = []
batch = 64
for m, task in enumerate(task_structures):
    for i in range(num_trials):
        exp_name = f'plum_task{task}_1plum_t{i}'

        env_kwargs = {'task_structure': task, }
        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch
        seed = i+m*100
        row['seed'] = seed

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'PlumNav-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,

            'checkpoint_interval': 10,
            'save_dir': 'plum',

            'recurrent': True,
            'seed': seed,
            'no_cuda': True,
            'env_kwargs': env_kwargs,

            'nn_base': 'DelayedRNNPPO',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,task_structure,name,batch_size,seed
0,1.0,plum_task1_1plum_t0,64,0
1,1.0,plum_task1_1plum_t1,64,1
2,1.0,plum_task1_1plum_t2,64,2
3,1.5,plum_task1.5_1plum_t0,64,100
4,1.5,plum_task1.5_1plum_t1,64,101
5,1.5,plum_task1.5_1plum_t2,64,102
6,2.0,plum_task2_1plum_t0,64,200
7,2.0,plum_task2_1plum_t1,64,201
8,2.0,plum_task2_1plum_t2,64,202
9,2.5,plum_task2.5_1plum_t0,64,300


## Transfer from plum nav to shortcut p=0.1, p=0.4

In [8]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 1.5, 2, 2.5] # original plum envs
rows = []
batch = 64

clone_sets = ['shared', 'actor0', 'actor1']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False},
    {'clone_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },

]
clone_path_formatter = 'saved_models/plum/plum_task{task}_t{t}.pt'
for j in range(len(clone_sets)):
    for m, task in enumerate(tasks):
        for n, p in enumerate(probs):
            for i in range(num_trials):
                clone_path = clone_path_formatter.format(task=task, t=i)
                clone_param = clone_params[j]
                clone_param['clone_path'] = clone_path

                exp_name = f'plumtosc_{clone_sets[j]}_plumtask{task}p{p}_t{i}'

                env_kwargs = {'shortcut_probability': p,
                              'character_reset_pos': 3,
                              'wall_colors': 1.5}
                seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000
                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch
                row['clone_target'] = clone_path
                row['seed'] = seed

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'normalize': False,

                    'checkpoint_interval': 10,
                    'save_dir': 'plumtosc',

                    'recurrent': True,
                    'seed': seed,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},

                    'clone_parameter_experiment': True,
                    'clone_args': clone_param
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,clone_target,seed
0,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t0,64,saved_models/plum/plum_task1_t0.pt,1310
1,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t1,64,saved_models/plum/plum_task1_t1.pt,1311
2,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t2,64,saved_models/plum/plum_task1_t2.pt,1312
3,0.4,3,1.5,plumtosc_shared_plumtask1p0.4_t0,64,saved_models/plum/plum_task1_t0.pt,1410
4,0.4,3,1.5,plumtosc_shared_plumtask1p0.4_t1,64,saved_models/plum/plum_task1_t1.pt,1411
...,...,...,...,...,...,...,...
67,0.1,3,1.5,plumtosc_actor1_plumtask2.5p0.1_t1,64,saved_models/plum/plum_task2.5_t1.pt,3341
68,0.1,3,1.5,plumtosc_actor1_plumtask2.5p0.1_t2,64,saved_models/plum/plum_task2.5_t2.pt,3342
69,0.4,3,1.5,plumtosc_actor1_plumtask2.5p0.4_t0,64,saved_models/plum/plum_task2.5_t0.pt,3440
70,0.4,3,1.5,plumtosc_actor1_plumtask2.5p0.4_t1,64,saved_models/plum/plum_task2.5_t1.pt,3441


## Transfer with plum indicator

In [8]:
num_trials = 3

batch = 256
rows = []

step_schedule = [0, 5e6, 1e7, 1.5e7, 2e7]
schedule = [
    [500, 100, 50, 25, 10],
    step_schedule
]

hidden = 64

for i in range(num_trials):
    exp_name = f'expl_givegoal_sched1_t{i}'

    env_kwargs = {'rew_structure': 'explorepunish1',
                  'sub_goal_reward': 0.04, 'bonus_multiplier': 1,
                  'explore_punish_arg': 5, 'obs_set': '2e', 
                  'obs_set_arg': schedule}

    row = env_kwargs.copy()
    row['name'] = exp_name
    row['batch_size'] = batch

    rows.append(row)
    config = {
        'wandb_project_name': 'Nav_Shared_Layers',
        'exp_name': exp_name,
        'save_name': exp_name,
        'num_env_steps': 25000000,
        'env_name': 'ExploreNav-v0',
        'algo': 'ppo',
        'num_mini_batch': 10,
        'num_processes': 100,
        'num_steps': batch,

        'checkpoint_interval': 10,
        'save_dir': 'explorenav_pos',

        'recurrent': True,
        'seed': i,
        'no_cuda': True,
        'env_kwargs': env_kwargs,
        
        'nn_base': 'DelayedRNNPPO',
        'nn_base_kwargs': {'hidden_size': hidden},
        
        'use_universal_step': {'on': True, 'schedule': step_schedule}
    }

    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,name,batch_size,clone_target,seed
0,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t0,64,saved_models/plum/plum_task1_t0.pt,1310
1,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t1,64,saved_models/plum/plum_task1_t1.pt,1311
2,0.1,3,1.5,plumtosc_shared_plumtask1p0.1_t2,64,saved_models/plum/plum_task1_t2.pt,1312
3,0.4,3,1.5,plumtosc_shared_plumtask1p0.4_t0,64,saved_models/plum/plum_task1_t0.pt,1410
4,0.4,3,1.5,plumtosc_shared_plumtask1p0.4_t1,64,saved_models/plum/plum_task1_t1.pt,1411
...,...,...,...,...,...,...,...
67,0.1,3,1.5,plumtosc_actor1_plumtask2.5p0.1_t1,64,saved_models/plum/plum_task2.5_t1.pt,3341
68,0.1,3,1.5,plumtosc_actor1_plumtask2.5p0.1_t2,64,saved_models/plum/plum_task2.5_t2.pt,3342
69,0.4,3,1.5,plumtosc_actor1_plumtask2.5p0.4_t0,64,saved_models/plum/plum_task2.5_t0.pt,3440
70,0.4,3,1.5,plumtosc_actor1_plumtask2.5p0.4_t1,64,saved_models/plum/plum_task2.5_t1.pt,3441


In [13]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64
plum_steps = [100_000, 200_000, 500_000]
schedule = [100_001, 200_001, 500_001]
clone_sets = ['actor1']
clone_params = [
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },

]
clone_path_formatter = 'saved_models/plum/plum_task{task}_t{t}.pt'
for j in range(len(clone_sets)):
    for m, task in enumerate(tasks):
        for n, p in enumerate(probs):
            for l, plum_step in enumerate(plum_steps):
                for i in range(num_trials):
                    clone_path = clone_path_formatter.format(task=task, t=i)
                    clone_param = clone_params[j]
                    clone_param['clone_path'] = clone_path

                    exp_name = f'plumtosc_{clone_sets[j]}_wplum{plum_step}_plumtask{task}p{p}_t{i}'

                    env_kwargs = {'shortcut_probability': p,
                                  'character_reset_pos': 3,
                                  'wall_colors': 1.5,
                                  'plum_steps': plum_step}
                    seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000 + 10000*(l+1)
                    row = env_kwargs.copy()
                    row['name'] = exp_name
                    row['batch_size'] = batch
                    row['clone_target'] = clone_path
                    row['seed'] = seed

                    rows.append(row)
                    config = {
                        'wandb_project_name': 'Nav_Shared_Layers',
                        'exp_name': exp_name,
                        'save_name': exp_name,
                        'num_env_steps': 3000000,
                        'env_name': 'ShortcutNav-v0',
                        'algo': 'ppo',
                        'num_mini_batch': 10,
                        'num_processes': 100,
                        'num_steps': batch,

                        'normalize': False,

                        'checkpoint_interval': 10,
                        'save_dir': 'plumtosc',

                        'recurrent': True,
                        'seed': seed,
                        'no_cuda': True,
                        'env_kwargs': env_kwargs,

                        'nn_base': 'DelayedRNNPPO',
                        'nn_base_kwargs': {'hidden_size': 64},

                        'clone_parameter_experiment': True,
                        'clone_args': clone_param,
                        
                        'use_universal_step': {'on': True, 'schedule': schedule}
                    }

                    pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_steps,name,batch_size,clone_target,seed
0,0.1,3,1.5,100000,plumtosc_actor1_wplum100000_plumtask1p0.1_t0,64,saved_models/plum/plum_task1_t0.pt,11310
1,0.1,3,1.5,100000,plumtosc_actor1_wplum100000_plumtask1p0.1_t1,64,saved_models/plum/plum_task1_t1.pt,11311
2,0.1,3,1.5,100000,plumtosc_actor1_wplum100000_plumtask1p0.1_t2,64,saved_models/plum/plum_task1_t2.pt,11312
3,0.1,3,1.5,200000,plumtosc_actor1_wplum200000_plumtask1p0.1_t0,64,saved_models/plum/plum_task1_t0.pt,21310
4,0.1,3,1.5,200000,plumtosc_actor1_wplum200000_plumtask1p0.1_t1,64,saved_models/plum/plum_task1_t1.pt,21311
5,0.1,3,1.5,200000,plumtosc_actor1_wplum200000_plumtask1p0.1_t2,64,saved_models/plum/plum_task1_t2.pt,21312
6,0.1,3,1.5,500000,plumtosc_actor1_wplum500000_plumtask1p0.1_t0,64,saved_models/plum/plum_task1_t0.pt,31310
7,0.1,3,1.5,500000,plumtosc_actor1_wplum500000_plumtask1p0.1_t1,64,saved_models/plum/plum_task1_t1.pt,31311
8,0.1,3,1.5,500000,plumtosc_actor1_wplum500000_plumtask1p0.1_t2,64,saved_models/plum/plum_task1_t2.pt,31312
9,0.4,3,1.5,100000,plumtosc_actor1_wplum100000_plumtask1p0.4_t0,64,saved_models/plum/plum_task1_t0.pt,11410


## Scheduler

### Without transfer

In [7]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64

schedules = [
    [0, 100_000, 200_000, 300_000, 400_000],
    [0, 200_000, 400_000, 600_000, 800_000]
]
# step_args = {0: {'plum_pos': 0},
#             1: {'plum_pos': 1},
#             2: {'plum_pos': 2},
#             3: {'plum_pos': 3},
#             4: {'plum_pos': -1}}
step_args = [0, 1, 2, 3, -1]
for n, p in enumerate(probs):
    for l, schedule in enumerate(schedules):
        for i in range(num_trials):
            exp_name = f'scwplum_schedule{l}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': 0}
            seed = i + 100*(n+1) + 10000*(l+1)
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = seed

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'scwplum',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_args}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,seed
0,0.1,3,1.5,0,scwplum_schedule0p0.1_t0,64,10100
1,0.1,3,1.5,0,scwplum_schedule0p0.1_t1,64,10101
2,0.1,3,1.5,0,scwplum_schedule0p0.1_t2,64,10102
3,0.1,3,1.5,0,scwplum_schedule1p0.1_t0,64,20100
4,0.1,3,1.5,0,scwplum_schedule1p0.1_t1,64,20101
5,0.1,3,1.5,0,scwplum_schedule1p0.1_t2,64,20102
6,0.4,3,1.5,0,scwplum_schedule0p0.4_t0,64,10200
7,0.4,3,1.5,0,scwplum_schedule0p0.4_t1,64,10201
8,0.4,3,1.5,0,scwplum_schedule0p0.4_t2,64,10202
9,0.4,3,1.5,0,scwplum_schedule1p0.4_t0,64,20200


In [9]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64

schedules = [
    [0, 200_000, 400_000, 600_000],
    [0, 200_000, 400_000]
]
schedule_labels = [3, 4]
step_args = [[0, 1, 3, -1],
             [1, 3, -1]]

for n, p in enumerate(probs):
    for l, schedule in enumerate(schedules):
        for i in range(num_trials):
            schedule_label = schedule_labels[l]
            
            exp_name = f'scwplum_schedule{schedule_label}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': 0}
            seed = i + 100*(n+1) + 10000*(l+1)+20000
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = seed
            
            step_arg = step_args[l]
            

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'scwplum',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'use_universal_step': {'on': True, 'schedule': schedule, 
                                       'step_args': step_arg}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,seed
0,0.1,3,1.5,0,scwplum_schedule3p0.1_t0,64,30100
1,0.1,3,1.5,0,scwplum_schedule3p0.1_t1,64,30101
2,0.1,3,1.5,0,scwplum_schedule3p0.1_t2,64,30102
3,0.1,3,1.5,0,scwplum_schedule4p0.1_t0,64,40100
4,0.1,3,1.5,0,scwplum_schedule4p0.1_t1,64,40101
5,0.1,3,1.5,0,scwplum_schedule4p0.1_t2,64,40102
6,0.4,3,1.5,0,scwplum_schedule3p0.4_t0,64,30200
7,0.4,3,1.5,0,scwplum_schedule3p0.4_t1,64,30201
8,0.4,3,1.5,0,scwplum_schedule3p0.4_t2,64,30202
9,0.4,3,1.5,0,scwplum_schedule4p0.4_t0,64,40200


In [17]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64

schedules = [
    [0, 100_000, 200_000, 300_000],
    [0, 200_000, 400_000, 600_000],
    [0]
]
schedule_labels = ['0b', '1b', 5]
step_args = [[0, 1, 2, 3], [0, 1, 2, 3], [3]]

for n, p in enumerate(probs):
    for l, schedule in enumerate(schedules):
        for i in range(num_trials):
            schedule_label = schedule_labels[l]
            step_arg = step_args[l]
            
            exp_name = f'scwplum_schedule{schedule_label}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': step_arg[0]}
            seed = i + 100*(n+1) + 10000*(l+1)+20000
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = seed
            
            

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'scwplum',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'use_universal_step': {'on': True, 'schedule': schedule, 
                                       'step_args': step_arg}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,seed
0,0.1,3,1.5,0,scwplum_schedule0bp0.1_t0,64,30100
1,0.1,3,1.5,0,scwplum_schedule0bp0.1_t1,64,30101
2,0.1,3,1.5,0,scwplum_schedule0bp0.1_t2,64,30102
3,0.1,3,1.5,0,scwplum_schedule1bp0.1_t0,64,40100
4,0.1,3,1.5,0,scwplum_schedule1bp0.1_t1,64,40101
5,0.1,3,1.5,0,scwplum_schedule1bp0.1_t2,64,40102
6,0.1,3,1.5,3,scwplum_schedule5p0.1_t0,64,50100
7,0.1,3,1.5,3,scwplum_schedule5p0.1_t1,64,50101
8,0.1,3,1.5,3,scwplum_schedule5p0.1_t2,64,50102
9,0.4,3,1.5,0,scwplum_schedule0bp0.4_t0,64,30200


#### Larger batch

In [5]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch_sizes = [256, 1024]

schedule = [0, 200_000, 400_000, 600_000]
schedule_label = '1b'
step_arg = [0, 1, 2, 3]

for n, p in enumerate(probs):
    for l, batch in enumerate(batch_sizes):
        for i in range(num_trials):            
            exp_name = f'scwplum_batch{batch}schedule{schedule_label}p{p}_t{i}'

            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': step_arg[0]}
            seed = i + 100*(n+1) + 10000*(l+1)+20000
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['seed'] = seed
            
            

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'scwplum',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'use_universal_step': {'on': True, 'schedule': schedule, 
                                       'step_args': step_arg}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,seed
0,0.1,3,1.5,0,scwplum_batch256schedule1bp0.1_t0,256,30100
1,0.1,3,1.5,0,scwplum_batch256schedule1bp0.1_t1,256,30101
2,0.1,3,1.5,0,scwplum_batch256schedule1bp0.1_t2,256,30102
3,0.1,3,1.5,0,scwplum_batch1024schedule1bp0.1_t0,1024,40100
4,0.1,3,1.5,0,scwplum_batch1024schedule1bp0.1_t1,1024,40101
5,0.1,3,1.5,0,scwplum_batch1024schedule1bp0.1_t2,1024,40102
6,0.4,3,1.5,0,scwplum_batch256schedule1bp0.4_t0,256,30200
7,0.4,3,1.5,0,scwplum_batch256schedule1bp0.4_t1,256,30201
8,0.4,3,1.5,0,scwplum_batch256schedule1bp0.4_t2,256,30202
9,0.4,3,1.5,0,scwplum_batch1024schedule1bp0.4_t0,1024,40200


### Transfer from 1plum agents to scheduled plum shortcut

In [10]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64
# plum schedule in the sc env
schedule = [0, 200_000, 400_000, 600_000, 800_000]
step_args = [0, 1, 2, 3, -1]

clone_sets = ['shared', 'actor0', 'actor1']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False},
    {'clone_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },

]
clone_path_formatter = 'saved_models/plum/plum_task{task}_1plum_t{t}.pt'
for j in range(len(clone_sets)):
    for m, task in enumerate(tasks):
        for n, p in enumerate(probs):
            for i in range(num_trials):
                clone_path = clone_path_formatter.format(task=task, t=i)
                clone_param = clone_params[j]
                clone_param['clone_path'] = clone_path

                exp_name = f'plumtosc_{clone_sets[j]}_plumsched_1plumtask{task}p{p}_t{i}'

                env_kwargs = {'shortcut_probability': p,
                              'character_reset_pos': 3,
                              'wall_colors': 1.5,
                              'plum_pos': 0}
                seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000 + 10000*(l+1)
                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch
                row['clone_target'] = clone_path
                row['seed'] = seed

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'normalize': False,

                    'checkpoint_interval': 10,
                    'save_dir': 'plumtosc',

                    'recurrent': True,
                    'seed': seed,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},

                    'clone_parameter_experiment': True,
                    'clone_args': clone_param,

                    'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_args}
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,clone_target,seed
0,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,21310
1,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,21311
2,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,21312
3,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,21410
4,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,21411
5,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,21412
6,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,21320
7,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t1,64,saved_models/plum/plum_task2_1plum_t1.pt,21321
8,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t2,64,saved_models/plum/plum_task2_1plum_t2.pt,21322
9,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.4_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,21420


In [10]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64
# plum schedule in the sc env
schedule = [0, 200_000, 400_000, 600_000, 800_000]
step_args = [0, 1, 2, 3, -1]

clone_sets = ['shared', 'actor0', 'actor1']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False},
    {'clone_layers': 'gru,shared0,actor0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },
    {'clone_layers': 'gru,shared0,actor0,actor1',
     'copy_obs_rms': True,
     'freeze_obs_rms': False,
     },

]
clone_path_formatter = 'saved_models/plum/plum_task{task}_1plum_t{t}.pt'
for j in range(len(clone_sets)):
    for m, task in enumerate(tasks):
        for n, p in enumerate(probs):
            for i in range(num_trials):
                clone_path = clone_path_formatter.format(task=task, t=i)
                clone_param = clone_params[j]
                clone_param['clone_path'] = clone_path

                exp_name = f'plumtosc_{clone_sets[j]}_plumsched_1plumtask{task}p{p}_t{i}'

                env_kwargs = {'shortcut_probability': p,
                              'character_reset_pos': 3,
                              'wall_colors': 1.5,
                              'plum_pos': 0}
                seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000 + 10000*(l+1)
                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch
                row['clone_target'] = clone_path
                row['seed'] = seed

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'normalize': False,

                    'checkpoint_interval': 10,
                    'save_dir': 'plumtosc',

                    'recurrent': True,
                    'seed': seed,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},

                    'clone_parameter_experiment': True,
                    'clone_args': clone_param,

                    'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_args}
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,clone_target,seed
0,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,21310
1,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,21311
2,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.1_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,21312
3,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,21410
4,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,21411
5,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask1p0.4_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,21412
6,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,21320
7,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t1,64,saved_models/plum/plum_task2_1plum_t1.pt,21321
8,0.1,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.1_t2,64,saved_models/plum/plum_task2_1plum_t2.pt,21322
9,0.4,3,1.5,0,plumtosc_shared_plumsched_1plumtask2p0.4_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,21420


In [19]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1, 2] # original plum envs
rows = []
batch = 64
# plum schedule in the sc env
schedules = [
    [0, 100_000, 200_000, 300_000],
    [0, 200_000, 400_000, 600_000],
    [0]
]
schedule_labels = ['0b', '1b', 5]
step_args = [[0, 1, 2, 3], [0, 1, 2, 3], [3]]

clone_set = ['shared', 'actor0', 'actor1']
clone_param = {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False}

clone_path_formatter = 'saved_models/plum/plum_task{task}_1plum_t{t}.pt'
for j, schedule in enumerate(schedules):
    for m, task in enumerate(tasks):
        for n, p in enumerate(probs):
            for i in range(num_trials):
                clone_path = clone_path_formatter.format(task=task, t=i)
                clone_param['clone_path'] = clone_path
                
                schedule_label = schedule_labels[j]
                step_arg = step_args[j]

                exp_name = f'plumtosc_shared_plumsched{schedule_label}_1plumtask{task}p{p}_t{i}'

                env_kwargs = {'shortcut_probability': p,
                              'character_reset_pos': 3,
                              'wall_colors': 1.5,
                              'plum_pos': 0}
                seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000 + 10000*(l+1)
                row = env_kwargs.copy()
                row['name'] = exp_name
                row['batch_size'] = batch
                row['clone_target'] = clone_path
                row['seed'] = seed

                rows.append(row)
                config = {
                    'wandb_project_name': 'Nav_Shared_Layers',
                    'exp_name': exp_name,
                    'save_name': exp_name,
                    'num_env_steps': 3000000,
                    'env_name': 'ShortcutNav-v0',
                    'algo': 'ppo',
                    'num_mini_batch': 10,
                    'num_processes': 100,
                    'num_steps': batch,

                    'normalize': False,

                    'checkpoint_interval': 10,
                    'save_dir': 'plumtosc',

                    'recurrent': True,
                    'seed': seed,
                    'no_cuda': True,
                    'env_kwargs': env_kwargs,

                    'nn_base': 'DelayedRNNPPO',
                    'nn_base_kwargs': {'hidden_size': 64},

                    'clone_parameter_experiment': True,
                    'clone_args': clone_param,

                    'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_arg}
                }

                pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,clone_target,seed
0,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.1_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,31310
1,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.1_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,31311
2,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.1_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,31312
3,0.4,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.4_t0,64,saved_models/plum/plum_task1_1plum_t0.pt,31410
4,0.4,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.4_t1,64,saved_models/plum/plum_task1_1plum_t1.pt,31411
5,0.4,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask1p0.4_t2,64,saved_models/plum/plum_task1_1plum_t2.pt,31412
6,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask2p0.1_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,31320
7,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask2p0.1_t1,64,saved_models/plum/plum_task2_1plum_t1.pt,31321
8,0.1,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask2p0.1_t2,64,saved_models/plum/plum_task2_1plum_t2.pt,31322
9,0.4,3,1.5,0,plumtosc_shared_plumsched0b_1plumtask2p0.4_t0,64,saved_models/plum/plum_task2_1plum_t0.pt,31420


### Transfer to sched using 1.5, 2.5 task and middle of training, schedule 0

In [15]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1.5, 2.5] # original plum envs
rows = []
batch = 64
# plum schedule in the sc env
schedule = [0, 100_000, 200_000, 300_000, 400_000]
step_args = [0, 1, 2, 3, -1]

clone_sets = ['shared']
clone_params = [
    {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False},
]
j = 0

# fix chk 150 in clone_path_formatter
clone_path_formatter = 'saved_checkpoints/plum/plum_task{task}_1plum_t{t}/150.pt'
for m, task in enumerate(tasks):
    for n, p in enumerate(probs):
        for i in range(num_trials):
            clone_path = clone_path_formatter.format(task=task, t=i)
            clone_param = clone_params[j]
            clone_param['clone_path'] = clone_path

            exp_name = f'plumtosc_{clone_sets[j]}_plumsched0_chk150task{task}p{p}_t{i}'
            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': 0}
            seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000 + 10000*(l+1)
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['clone_target'] = clone_path
            row['seed'] = seed

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'plumtosc',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'clone_parameter_experiment': True,
                'clone_args': clone_param,

                'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_args}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,clone_target,seed
0,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.1_t0,64,saved_checkpoints/plum/plum_task1.5_1plum_t0/1...,21310
1,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.1_t1,64,saved_checkpoints/plum/plum_task1.5_1plum_t1/1...,21311
2,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.1_t2,64,saved_checkpoints/plum/plum_task1.5_1plum_t2/1...,21312
3,0.4,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.4_t0,64,saved_checkpoints/plum/plum_task1.5_1plum_t0/1...,21410
4,0.4,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.4_t1,64,saved_checkpoints/plum/plum_task1.5_1plum_t1/1...,21411
5,0.4,3,1.5,0,plumtosc_shared_plumsched0_chk150task1.5p0.4_t2,64,saved_checkpoints/plum/plum_task1.5_1plum_t2/1...,21412
6,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task2.5p0.1_t0,64,saved_checkpoints/plum/plum_task2.5_1plum_t0/1...,21320
7,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task2.5p0.1_t1,64,saved_checkpoints/plum/plum_task2.5_1plum_t1/1...,21321
8,0.1,3,1.5,0,plumtosc_shared_plumsched0_chk150task2.5p0.1_t2,64,saved_checkpoints/plum/plum_task2.5_1plum_t2/1...,21322
9,0.4,3,1.5,0,plumtosc_shared_plumsched0_chk150task2.5p0.4_t0,64,saved_checkpoints/plum/plum_task2.5_1plum_t0/1...,21420


In [3]:
num_trials = 3
probs = [0.1, 0.4]
tasks = [1.5, 2.5] # original plum envs
rows = []
batch = 64
# plum schedule in the sc env
schedule = [0, 200_000, 400_000, 600_000]
step_args = [0, 1, 2, 3]

clone_sets = ['shared']
clone_param = {'clone_layers': 'gru,shared0',
     'copy_obs_rms': True,
     'freeze_obs_rms': False}
j = 0

# fix chk 150 in clone_path_formatter
# clone_path_formatter = 'saved_checkpoints/plum/plum_task{task}_1plum_t{t}/150.pt'
clone_path_formatter = 'saved_models/plum/plum_task{task}_1plum_t{t}.pt'
for m, task in enumerate(tasks):
    for n, p in enumerate(probs):
        for i in range(num_trials):
            clone_path = clone_path_formatter.format(task=task, t=i)
            clone_param['clone_path'] = clone_path

            exp_name = f'plumtosc_shared_plumsched1b_task{task}p{p}_t{i}'
            env_kwargs = {'shortcut_probability': p,
                          'character_reset_pos': 3,
                          'wall_colors': 1.5,
                          'plum_pos': 0}
            seed = i + 10*(m+1) + 100*(n+1) + 200 + (j+1)*1000
            row = env_kwargs.copy()
            row['name'] = exp_name
            row['batch_size'] = batch
            row['clone_target'] = clone_path
            row['seed'] = seed

            rows.append(row)
            config = {
                'wandb_project_name': 'Nav_Shared_Layers',
                'exp_name': exp_name,
                'save_name': exp_name,
                'num_env_steps': 3000000,
                'env_name': 'ShortcutNav-v0',
                'algo': 'ppo',
                'num_mini_batch': 10,
                'num_processes': 100,
                'num_steps': batch,

                'normalize': False,

                'checkpoint_interval': 10,
                'save_dir': 'plumtosc',

                'recurrent': True,
                'seed': seed,
                'no_cuda': True,
                'env_kwargs': env_kwargs,

                'nn_base': 'DelayedRNNPPO',
                'nn_base_kwargs': {'hidden_size': 64},

                'clone_parameter_experiment': True,
                'clone_args': clone_param,

                'use_universal_step': {'on': True, 'schedule': schedule, 'step_args': step_args}
            }

            pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

pd.DataFrame(rows)

Unnamed: 0,shortcut_probability,character_reset_pos,wall_colors,plum_pos,name,batch_size,clone_target,seed
0,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.1_t0,64,saved_models/plum/plum_task1.5_1plum_t0.pt,1310
1,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.1_t1,64,saved_models/plum/plum_task1.5_1plum_t1.pt,1311
2,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.1_t2,64,saved_models/plum/plum_task1.5_1plum_t2.pt,1312
3,0.4,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.4_t0,64,saved_models/plum/plum_task1.5_1plum_t0.pt,1410
4,0.4,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.4_t1,64,saved_models/plum/plum_task1.5_1plum_t1.pt,1411
5,0.4,3,1.5,0,plumtosc_shared_plumsched1b_task1.5p0.4_t2,64,saved_models/plum/plum_task1.5_1plum_t2.pt,1412
6,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task2.5p0.1_t0,64,saved_models/plum/plum_task2.5_1plum_t0.pt,1320
7,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task2.5p0.1_t1,64,saved_models/plum/plum_task2.5_1plum_t1.pt,1321
8,0.1,3,1.5,0,plumtosc_shared_plumsched1b_task2.5p0.1_t2,64,saved_models/plum/plum_task2.5_1plum_t2.pt,1322
9,0.4,3,1.5,0,plumtosc_shared_plumsched1b_task2.5p0.4_t0,64,saved_models/plum/plum_task2.5_1plum_t0.pt,1420
