In [2]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

# Basic CartPole sequence length testing

In [6]:
sequence_lengths = [2, 4, 8, 16]
burn_in_lengths = [2, 4, 8]
trials = 3

for sequence_length in sequence_lengths:
    for burn_in_length in burn_in_lengths:
        for t in range(trials):
            exp_name = f'cartpole_seq{sequence_length}bil{burn_in_length}_t{t}'
            
            config = {
                'exp_name': exp_name,
                'seed': t,
                'sequence_length': sequence_length,
                'burn_in_length': burn_in_length
            }
            
            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

# NavEnv-v0 Hyperparameter Testing

Running some experiments to see how new hyperparameters introduced by using the R2D2 algorithm affect things

In [20]:

for trial in range(3):
    exp_name = f'mwm_test_seqbil8n1t8s20000l1_t{trial}'
    
    config = {
        'exp_name': exp_name,

        'total_timesteps': 800000,
        'buffer_size': 20000,
        'batch_size': 256,
        'sequence_length': 8,
        'burn_in_length': 8,
        'learning_starts': 10000,

        'save_name': exp_name,
        'save_dir': 'mwm_seqbil',
        'checkpoint_interval': 4000,
        'checkpoint_dir': 'mwm_seqbil',
        'n_envs': 1,
        'train_frequency': 8,
        'seed': trial,
        'alpha': 0,

        'env_id': 'NavEnv-v0',
        'env_kwargs': {
            'num_objects': 0, 'rew_structure': 'goal',
            'task_structure': 2, 'wall_colors': 4,
            'num_rays': 12, 'fov': 1
        }
    }

    pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [26]:

exp_name = f'mwm_test3'

config = {
    'exp_name': exp_name,

    'total_timesteps': 800000,
    'buffer_size': 10000,
    'batch_size': 256,
    'sequence_length': 8,
    'burn_in_length': 4,
    'learning_starts': 4000,

    'save_name': exp_name,
    'save_dir': 'mwm_seqbil',
    'checkpoint_interval': 4000,
    'checkpoint_dir': 'mwm_seqbil',
    'n_envs': 4,
    'train_frequency': 4,
    'dummy_vec_env': True,

    'env_id': 'NavEnv-v0',
    'env_kwargs': {
        'num_objects': 0, 'rew_structure': 'goal',
        'task_structure': 2, 'wall_colors': 4,
        'num_rays': 12, 'fov': 1
    }
}

pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

## Sequence Length and Burn in Length

In [18]:

sequence_lengths = [2, 4, 8, 16, 32]
burn_in_lengths = [4, 8, 16]
trials = 3

for seq in sequence_lengths:
    for bil in burn_in_lengths:
        for trial in range(trials):
            exp_name = f'mwm_seq{seq}bil{bil}_t{trial}'
            
            config = {
                'exp_name': exp_name,

                'total_timesteps': 800000,
                'buffer_size': 20000,
                'batch_size': 256,
                'sequence_length': seq,
                'burn_in_length': bil,

                'save_name': exp_name,
                'save_dir': 'mwm_seqbil',
                'checkpoint_interval': 4000,
                'checkpoint_dir': 'mwm_seqbil',

                'env_id': 'NavEnv-v0',
                'env_kwargs': {
                    'num_objects': 0, 'rew_structure': 'goal',
                    'task_structure': 2, 'wall_colors': 4,
                    'num_rays': 12, 'fov': 1
                }
            }

            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [2]:

sequence_lengths = [2, 4, 8, 16, 32]
# burn_in_lengths = [4, 8, 16]
burn_in_lengths = [0, 2]
trials = 3

for seq in sequence_lengths:
    for bil in burn_in_lengths:
        for trial in range(trials):
            exp_name = f'mwm_seq{seq}bil{bil}_t{trial}'
            
            config = {
                'exp_name': exp_name,

                'total_timesteps': 800000,
                'buffer_size': 20000,
                'batch_size': 256,
                'sequence_length': seq,
                'burn_in_length': bil,

                'save_name': exp_name,
                'save_dir': 'mwm_seqbil',
                'checkpoint_interval': 4000,
                'checkpoint_dir': 'mwm_seqbil',

                'env_id': 'NavEnv-v0',
                'env_kwargs': {
                    'num_objects': 0, 'rew_structure': 'goal',
                    'task_structure': 2, 'wall_colors': 4,
                    'num_rays': 12, 'fov': 1
                }
            }

            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

## Batch size and Buffer size

In [5]:
buffer_sizes = [10000, 20000, 40000, 80000]
trials = 3

for buffer_size in buffer_sizes:
    for trial in range(trials):
        exp_name = f'mwm_buffer{buffer_size}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': buffer_size,
            'batch_size': 256,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [6]:
batch_sizes = [16, 32, 64, 128, 256, 512, 1024]
trials = 3

for batch_size in batch_sizes:
    for trial in range(trials):
        exp_name = f'mwm_batch{batch_size}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': 20000,
            'batch_size': batch_size,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

## Adam epsilon

In [16]:
trials = 3
batch_sizes = [64, 256]
for batch in batch_sizes:
    for trial in range(trials):
        exp_name = f'mwm_adam15e-5batch{batch}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': 20000,
            'batch_size': batch,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,
            'adam_epsilon': 1.5e-4,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [18]:
trials = 3
batch_sizes = [64, 256]
for batch in batch_sizes:
    for trial in range(trials):
        exp_name = f'mwm_adam1e-6batch{batch}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': 20000,
            'batch_size': batch,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,
            'adam_epsilon': 1e-6,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [7]:

for trial in range(3):
    exp_name = f'mwm_per_t{trial}'
    
    config = {
        'exp_name': exp_name,

        'total_timesteps': 800000,
        'buffer_size': 20000,
        'batch_size': 256,
        'sequence_length': 8,
        'burn_in_length': 8,
        'learning_starts': 50000,

        'save_name': exp_name,
        'save_dir': 'mwm_seqbil',
        'checkpoint_interval': 4000,
        'checkpoint_dir': 'mwm_seqbil',
        'n_envs': 1,
        'train_frequency': 8,
        'seed': trial,

        'env_id': 'NavEnv-v0',
        'env_kwargs': {
            'num_objects': 0, 'rew_structure': 'goal',
            'task_structure': 2, 'wall_colors': 4,
            'num_rays': 12, 'fov': 1
        }
    }

    pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [11]:
alphas = [0.6, 0]
betas = [0.4, 0]

n_envs = [4, 1]
tfs = [2, 8]
buffer_sizes = [5000, 20000]
learning_starts = [2500, 10000]

use_segment_trees = [True, False]
use_st_flags = ['t', 'f']

for i in range(2):
    for j in range(2):
        for k in range(2):
            for trial in range(3):
                alpha = alphas[i]
                beta = betas[i]
                
                n_env = n_envs[j]
                tf = tfs[j]
                buffer_size = buffer_sizes[j]
                learning_start = learning_starts[j]
                
                use_segment_tree = use_segment_trees[k]
                use_st_flag = use_st_flags[k]

                exp_name = f'mwm_persegmentree_alpha{alpha}n{n_env}usest{use_st_flag}_t{trial}'
                
                config = {
                    'exp_name': exp_name,

                    'total_timesteps': 800000,
                    'buffer_size': buffer_size,
                    'batch_size': 256,
                    'sequence_length': 8,
                    'burn_in_length': 8,
                    'learning_starts': learning_start,
                    'alpha': alpha,
                    'beta': beta,
                    'use_segment_tree': use_segment_tree,

                    'save_name': exp_name,
                    'save_dir': 'mwm_seqbil',
                    'checkpoint_interval': 4000,
                    'checkpoint_dir': 'mwm_persegtree',
                    'n_envs': n_env,
                    'train_frequency': tf,
                    'seed': trial,

                    'env_id': 'NavEnv-v0',
                    'env_kwargs': {
                        'num_objects': 0, 'rew_structure': 'goal',
                        'task_structure': 2, 'wall_colors': 4,
                        'num_rays': 12, 'fov': 1
                    }
                }

                pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [8]:

for trial in range(3):
    exp_name = f'mwm_pertest_t{trial}'
    
    config = {
        'exp_name': exp_name,

        'total_timesteps': 800000,
        'buffer_size': 20000,
        'batch_size': 256,
        'sequence_length': 8,
        'burn_in_length': 8,
        'learning_starts': 10000,
        'alpha': 0,

        'save_name': exp_name,
        'save_dir': 'mwm_seqbil',
        'checkpoint_interval': 4000,
        'checkpoint_dir': 'mwm_seqbil',
        'n_envs': 1,
        'train_frequency': 8,
        'seed': trial,

        'env_id': 'NavEnv-v0',
        'env_kwargs': {
            'num_objects': 0, 'rew_structure': 'goal',
            'task_structure': 2, 'wall_colors': 4,
            'num_rays': 12, 'fov': 1
        }
    }

    pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [10]:

for trial in range(3):
    exp_name = f'mwm_pertest2_t{trial}'
    
    config = {
        'exp_name': exp_name,

        'total_timesteps': 800000,
        'buffer_size': 20000,
        'batch_size': 256,
        'sequence_length': 8,
        'burn_in_length': 8,
        'learning_starts': 10000,
        'alpha': 0,
        'beta': 0,
        
        'save_name': exp_name,
        'save_dir': 'mwm_seqbil',
        'checkpoint_interval': 4000,
        'checkpoint_dir': 'mwm_seqbil',
        'n_envs': 1,
        'train_frequency': 8,
        'seed': trial,

        'env_id': 'NavEnv-v0',
        'env_kwargs': {
            'num_objects': 0, 'rew_structure': 'goal',
            'task_structure': 2, 'wall_colors': 4,
            'num_rays': 12, 'fov': 1
        }
    }

    pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [3]:
n_envs = [1, 4]
tf = [8, 2]
dummies = [True, False]
dummy_flags = ['t', 'f']

for i, dummy in enumerate(dummies):
    for j, n_env in enumerate(n_envs):
        for trial in range(3):
            exp_name = f'mwm_perv2n{n_env}dummy{dummy_flags[i]}_t{trial}'

            config = {
                'exp_name': exp_name,

                'total_timesteps': 800000,
                'buffer_size': 20000,
                'batch_size': 256,
                'sequence_length': 8,
                'burn_in_length': 4,
                'learning_starts': 10000,
                'alpha': 0,
                
                'save_name': exp_name,
                'save_dir': 'mwm_seqbil',
                'checkpoint_interval': 4000,
                'checkpoint_dir': 'mwm_seqbil',
                'n_envs': n_env,
                'dummy_vec_env': dummy,
                'train_frequency': tf[j],
                'seed': trial,

                'env_id': 'NavEnv-v0',
                'env_kwargs': {
                    'num_objects': 0, 'rew_structure': 'goal',
                    'task_structure': 2, 'wall_colors': 4,
                    'num_rays': 12, 'fov': 1
                }
            }

            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [5]:
pickle.load(open('../experiment_configs/mwm_perv2n4dummyf_t1', 'rb'))

{'exp_name': 'mwm_perv2n4dummyf_t1',
 'total_timesteps': 800000,
 'buffer_size': 20000,
 'batch_size': 256,
 'sequence_length': 8,
 'burn_in_length': 4,
 'learning_starts': 10000,
 'save_name': 'mwm_perv2n4dummyf_t1',
 'save_dir': 'mwm_seqbil',
 'checkpoint_interval': 4000,
 'checkpoint_dir': 'mwm_seqbil',
 'n_envs': 4,
 'dummy_vec_env': False,
 'train_frequency': 2,
 'seed': 1,
 'env_id': 'NavEnv-v0',
 'env_kwargs': {'num_objects': 0,
  'rew_structure': 'goal',
  'task_structure': 2,
  'wall_colors': 4,
  'num_rays': 12,
  'fov': 1}}

In [4]:
from pathlib import Path

In [14]:
path = Path('../saved_checkpoints/mwm_seqbil/')/'mwm_seqbil_t0'

In [15]:
path.mkdir(exist_ok=True, parents=True)