<a href="https://colab.research.google.com/github/nathanwispinski/meta-rl/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# train.ipynb

This is a Google Colab notebook to demo a single-thread version of agent training.

For a (faster) distributed version of training, see the [meta-rl Github repo](https://github.com/nathanwispinski/meta-rl/).

In the Setup section below, you will need to enter your Github username and access key, because this is a private repo.

For any questions, contact nathan3@ualberta.ca.

# Setup

In [2]:
#@title Change working directory to meta-rl.
%cd meta-rl
%pwd

[Errno 2] No such file or directory: 'meta-rl'
/home/natha/meta-rl


'/home/natha/meta-rl'

In [None]:
#@title Install dependencies from `requirements.txt`.
#!pip install -r requirements.txt

In [3]:
#@title Import dependencies after install.

import json
import numpy as np
import pickle

import modules.envs as envs
import modules.agents as agents
import modules.loggers as loggers

In [4]:
#@title Import config for training.
from configs.bandit_config_train import get_config

config = get_config()
json_config = json.loads(config.to_json_best_effort())

In [None]:
#@title Print loaded config.
json_config

In [None]:
#@title Modify config (optional).
#@markdown Add as many lines as needed in the code here.
config.update({'random_seed': 100})

# Print to see changes
json.loads(config.to_json_best_effort())

# Training

In [8]:
#@title Unpack config.
env_config = config.environment
agent_config = config.agent
random_seed = config.random_seed
total_training_steps = config.agent.total_training_steps
log_every_steps = config.log_every_steps
params_filename = config.params_filename

In [9]:
#@title Set random seed.
np.random.seed(random_seed)

In [9]:
#@title Initialize environment.
env = envs.create_env(env_config)
observation = env.reset()

In [10]:
#@title Initialize agent.
agent = agents.create_agent(
    observation=observation,
    num_actions=env.num_actions,
    agent_config=agent_config)



In [11]:
#@title Initialize performance logger.
logger = loggers.create_logger(logger_name='bandit', config=config, log_to_console=True)

In [12]:
#@title Initialize LSTM recurrent state to zeros.
initial_lstm_state = agent.get_initial_lstm_state()
lstm_state = initial_lstm_state

In [None]:
#@title Main training loop (with default settings, will take ~4.5 hours).

step, episode, loss = 0, 0, 0
while step < total_training_steps:

    # Get an action and step the environment with the agent's action
    action, _, v_out, new_lstm_state, _ = agent.get_action(observation, lstm_state)
    next_observation, reward, done, info = env.step(action)

    # Save experience in a buffer
    agent.buffer.append(
        obs=observation,
        action=action,
        reward=reward,
        next_obs=next_observation,
        done=done,
        lstm_state=lstm_state,
    )

    observation = next_observation
    lstm_state = new_lstm_state

    # Log performance
    logger.log_step(
        global_step=step,
        worker_step=step,
        reward=reward,
        info=info,
        loss=loss,
        entropy_coef=agent.e_loss_coef,
    )

    # Update agent parameters if an episode is done, or
    # if the agent experience buffer == max_unroll_steps
    loss, grads, num_steps = agent.update(done, update_params=True)
    step += 1

    # If done, reset the environment and LSTM state
    if done:
        episode += 1
        done = False
        lstm_state = initial_lstm_state
        observation = env.reset()

print('Done training!')

In [None]:
#@title Save model.
results = {
    "params": agent.params,
    "config": config.to_dict(),
}
with open(params_filename + '.pickle', 'wb') as fp:
    pickle.dump([results], fp)
print("Saved parameters.")