In [1]:
from mlrl.runners.dqn_runner import DQNRun
from mlrl.experiments.procgen_dqn import make_procgen, create_rainbow_agent

pygame 2.1.0 (SDL 2.0.16, Python 3.8.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
!ls sync/dqn/categorical_dqn_agent

checkpoint
run-16823461236877840
run-16823461236877840.data-00000-of-00001
run-16823461236877840.index
run-16823470883794980
run-16823470883794980.data-00000-of-00001
run-16823470883794980.index
run-16823527592836354
run-16823527592836354.data-00000-of-00001
run-16823527592836354.index


In [5]:
folder = './sync/dqn/categorical_dqn_agent/run-16823527592836354'

import json

with open(folder + '/config.json') as f:
    config = json.load(f)

config

{'action_repeats': 4,
 'agent_name': 'categorical_dqn_agent',
 'collect_steps_per_iteration': 1,
 'discount': 0.999,
 'env': 'bigfish',
 'epsilon': 0.1,
 'epsilon_decay_steps': 250000,
 'epsilon_schedule': False,
 'eval_steps': 1000,
 'experience_batch_size': 64,
 'final_epsilon': 0.1,
 'frame_stack': 0,
 'grayscale': False,
 'initial_collect_steps': 500,
 'initial_epsilon': 1.0,
 'learning_rate': 0.00025,
 'max_epochs': 500,
 'metrics': [],
 'model_config': 'N/A',
 'n_collect_envs': 64,
 'n_eval_envs': 64,
 'n_video_envs': 12,
 'name': 'run',
 'optimiser_config': {'amsgrad': False,
  'beta_1': 0.9,
  'beta_2': 0.999,
  'decay': 0.0,
  'epsilon': 1e-07,
  'learning_rate': 0.00025,
  'name': 'Adam'},
 'procgen_env_name': 'bigfish',
 'replay_buffer_capacity': 1024,
 'run_dir': './runs/categorical_dqn_agent/run-16823527592836354',
 'target_network_update_period': 10000,
 'train_steps_per_epoch': 20000,
 'video_seconds': 60}

In [6]:
env_name = config.get('env')
env = make_procgen(env_name, config)
q_net, agent = create_rainbow_agent(env, config)

Building Categorical Q-Network weights...


In [7]:
q_vals, _ = q_net(env.current_time_step().observation)
q_vals.shape

TensorShape([64, 15, 51])

In [36]:
import re
from typing import Tuple


def parse_model_weights_string(path: str) -> Tuple[int, float]:
    pattern = r"sequential_best_(\d+)_(\d+\.\d+).index"

    match = re.match(pattern, path)

    if match:
        epoch = int(match.group(1))
        value = float(match.group(2))
        return epoch, value

    raise ValueError(f"Could not parse string: {path}")

In [38]:
from pathlib import Path

model_paths = [
    (str(path).replace('.index', ''), *parse_model_weights_string(path.name))
    for path in Path(f'{folder}/model_weights').glob('*')
    if path.is_file() and str(path).endswith('.index')
]

In [39]:
path, epoch, ret_val = max(model_paths, key=lambda x: x[2])
path, epoch, ret_val

('sync/dqn/categorical_dqn_agent/run-16823527592836354/model_weights/sequential_best_23_27.598986',
 23,
 27.598986)

In [40]:
chkpt_status = q_net.load_weights(path)

In [41]:
q_net.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 CategoricalQNetwork (Catego  ((64, 15, 51),           638909    
 ricalQNetwork)               ())                                
                                                                 
Total params: 638,909
Trainable params: 638,909
Non-trainable params: 0
_________________________________________________________________


In [43]:
from mlrl.runners.eval_runner import EvalRunner

In [62]:
eval_runner = EvalRunner(2000 * env.batch_size, env, agent.policy)

In [63]:
eval_runner.run()

Evaluation stats:
EvalNumberOfEpisodes: 428.000, EvalEnvironmentSteps: 128020.000, EvalAverageReturn: 33.140, EvalAverageEpisodeLength: 260.953, EvalTime: 20.766


{'EvalNumberOfEpisodes': 428,
 'EvalEnvironmentSteps': 128020,
 'EvalAverageReturn': 33.140186,
 'EvalAverageEpisodeLength': 260.95328,
 'EvalTime': 20.765726566314697}

In [59]:
from mlrl.experiments.procgen_dqn import create_video_renderer
from mlrl.utils.render_utils import embed_mp4

In [56]:
renderer = create_video_renderer(env_name, config)

In [60]:
embed_mp4(renderer(agent.policy, 'video.mp4'))