In [1]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

# Collection of all settings to run auxiliary tasks that we set up
aux_task_configs = {
    'none': {
        'auxiliary_tasks': [], #Inside env_kwargs
        'auxiliary_task_args': [], #Inside env_kwargs
        'auxiliary_heads': [], #Inside nn_base_kwargs
        'auxiliary_truth_sizes': [], #Inside config (parseargs)
        'aux_wrapper_kwargs': {}, #Inside config (parserags)
        'rew_structure': 'goal' #Inside env_kwargs
    },
    
    'rewexplore': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'explore'
    },
    
    'rewdist': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [],
        'auxiliary_truth_sizes': [],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'dist'
    },
    
    'wall0': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall1': {
        'auxiliary_tasks': [2],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'wall01': {
        'auxiliary_tasks': [2, 2],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 0, 1], [-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'goaldist': {
        'auxiliary_tasks': [3],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'terminal': {
        'auxiliary_tasks': [],
        'auxiliary_task_args': [],
        'auxiliary_heads': [[-1, 0, 0, 1]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {'auxiliary_tasks': [0], 
                               'auxiliary_task_args': [None]},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [0],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall0': {
        'auxiliary_tasks': [5],
        'auxiliary_task_args': [1],
        'auxiliary_heads': [[-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catwall01': {
        'auxiliary_tasks': [5, 5],
        'auxiliary_task_args': [0, 1],
        'auxiliary_heads': [[-1, 0, 1, 2], [-1, 0, 1, 2]],
        'auxiliary_truth_sizes': [1, 1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catfacewall': {
        'auxiliary_tasks': [4],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
    
    'catquad': {
        'auxiliary_tasks': [6],
        'auxiliary_task_args': [None],
        'auxiliary_heads': [[-1, 0, 1, 4]],
        'auxiliary_truth_sizes': [1],
        'aux_wrapper_kwargs': {},
        'rew_structure': 'goal'
    },
}

# MWM North Poster Baseline

These were the experiments by which others were compared. Specifically batch size 16 and 32

In [None]:
batch_sizes = [16, 32, 64]
num_trials = 3
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pdistal_batch{batch}auxnone_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1}

        aux_heads = auxiliary_heads[n]
        aux_kwargs = aux_wrapper_kwargs[n]

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['layer_width'] = width
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': width,
                               'auxiliary_heads': aux_heads},
            'aux_wrapper_kwargs': aux_kwargs
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))

# Testing one-hot observation encodings and retesting exploration MWM variation

## One-hot north poster

In [4]:
batch_sizes = [16, 32, 64]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_pnorth_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 2, 'wall_colors': 1, 'num_rays': 12, 'fov': 1,
                      'poster': 1, 'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 10,
            'num_processes': 100,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 16},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,poster,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t0,16
1,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t1,16
2,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t2,16
3,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t3,16
4,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch16onehot_t4,16
5,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t0,32
6,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t1,32
7,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t2,32
8,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t3,32
9,0,goal,2,1,12,1,1,1,True,nav_pnorth_batch32onehot_t4,32


In [5]:
batch_sizes = [16, 64, 128]
num_trials = 5

rows = []
for batch in batch_sizes:
    for i in range(num_trials):
        exp_name = f'nav_4wallexplore_batch{batch}onehot_t{i}'

        env_kwargs = {'num_objects': 0, 'rew_structure': 'goal',
                      'task_structure': 3, 'wall_colors': 4, 'num_rays': 12, 'fov': 1,
                      'character_reset_pos': 1,  'one_hot_obs': True}

        row = env_kwargs.copy()
        row['name'] = exp_name
        row['batch_size'] = batch

        rows.append(row)
        config = {
            'wandb_project_name': 'Nav_Shared_Layers',
            'exp_name': exp_name,
            'save_name': exp_name,
            'num_env_steps': 3000000,
            'env_name': 'NavEnv-v0',
            'algo': 'ppo',
            'num_mini_batch': 1,
            'num_processes': 1,
            'num_steps': batch,
            'checkpoint_interval': 10,
            'recurrent': True,
            'seed': i,
            'no_cuda': True,
            'env_kwargs': env_kwargs,
            'nn_base': 'FlexBase',
            'nn_base_kwargs': {'hidden_size': 64},
        }

        pickle.dump(config, open('../experiment_configs/' + exp_name, 'wb'))
        
pd.DataFrame(rows)

Unnamed: 0,num_objects,rew_structure,task_structure,wall_colors,num_rays,fov,character_reset_pos,one_hot_obs,name,batch_size
0,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t0,16
1,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t1,16
2,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t2,16
3,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t3,16
4,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch16onehot_t4,16
5,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t0,64
6,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t1,64
7,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t2,64
8,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t3,64
9,0,goal,3,4,12,1,1,True,nav_4wallexplore_batch64onehot_t4,64
