In [1]:
import pickle
import pandas as pd
from datetime import datetime
import os
import numpy as np

# Basic CartPole sequence length testing

In [6]:
sequence_lengths = [2, 4, 8, 16]
burn_in_lengths = [2, 4, 8]
trials = 3

for sequence_length in sequence_lengths:
    for burn_in_length in burn_in_lengths:
        for t in range(trials):
            exp_name = f'cartpole_seq{sequence_length}bil{burn_in_length}_t{t}'
            
            config = {
                'exp_name': exp_name,
                'seed': t,
                'sequence_length': sequence_length,
                'burn_in_length': burn_in_length
            }
            
            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

# NavEnv-v0 Hyperparameter Testing

Running some experiments to see how new hyperparameters introduced by using the R2D2 algorithm affect things

## Sequence Length and Burn in Length

In [18]:

sequence_lengths = [2, 4, 8, 16, 32]
burn_in_lengths = [4, 8, 16]
trials = 3

for seq in sequence_lengths:
    for bil in burn_in_lengths:
        for trial in range(trials):
            exp_name = f'mwm_seq{seq}bil{bil}_t{trial}'
            
            config = {
                'exp_name': exp_name,

                'total_timesteps': 800000,
                'buffer_size': 20000,
                'batch_size': 256,
                'sequence_length': seq,
                'burn_in_length': bil,

                'save_name': exp_name,
                'save_dir': 'mwm_seqbil',
                'checkpoint_interval': 4000,
                'checkpoint_dir': 'mwm_seqbil',

                'env_id': 'NavEnv-v0',
                'env_kwargs': {
                    'num_objects': 0, 'rew_structure': 'goal',
                    'task_structure': 2, 'wall_colors': 4,
                    'num_rays': 12, 'fov': 1
                }
            }

            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [2]:

sequence_lengths = [2, 4, 8, 16, 32]
# burn_in_lengths = [4, 8, 16]
burn_in_lengths = [0, 2]
trials = 3

for seq in sequence_lengths:
    for bil in burn_in_lengths:
        for trial in range(trials):
            exp_name = f'mwm_seq{seq}bil{bil}_t{trial}'
            
            config = {
                'exp_name': exp_name,

                'total_timesteps': 800000,
                'buffer_size': 20000,
                'batch_size': 256,
                'sequence_length': seq,
                'burn_in_length': bil,

                'save_name': exp_name,
                'save_dir': 'mwm_seqbil',
                'checkpoint_interval': 4000,
                'checkpoint_dir': 'mwm_seqbil',

                'env_id': 'NavEnv-v0',
                'env_kwargs': {
                    'num_objects': 0, 'rew_structure': 'goal',
                    'task_structure': 2, 'wall_colors': 4,
                    'num_rays': 12, 'fov': 1
                }
            }

            pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

## Batch size and Buffer size

In [5]:
buffer_sizes = [10000, 20000, 40000, 80000]
trials = 3

for buffer_size in buffer_sizes:
    for trial in range(trials):
        exp_name = f'mwm_buffer{buffer_size}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': buffer_size,
            'batch_size': 256,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [6]:
batch_sizes = [16, 32, 64, 128, 256, 512, 1024]
trials = 3

for batch_size in batch_sizes:
    for trial in range(trials):
        exp_name = f'mwm_batch{batch_size}_t{trial}'

        config = {
            'exp_name': exp_name,

            'total_timesteps': 800000,
            'buffer_size': 20000,
            'batch_size': batch_size,
            'sequence_length': 8,
            'burn_in_length': 8,
            'train_frequency': 10,

            'save_name': exp_name,
            'save_dir': 'mwm_seqbil',
            'checkpoint_interval': 4000,
            'checkpoint_dir': 'mwm_seqbil',

            'env_id': 'NavEnv-v0',
            'env_kwargs': {
                'num_objects': 0, 'rew_structure': 'goal',
                'task_structure': 2, 'wall_colors': 4,
                'num_rays': 12, 'fov': 1
            }
        }

        pickle.dump(config, open(f'../experiment_configs/{exp_name}', 'wb'))

In [4]:
from pathlib import Path

In [14]:
path = Path('../saved_checkpoints/mwm_seqbil/')/'mwm_seqbil_t0'

In [15]:
path.mkdir(exist_ok=True, parents=True)