In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set()



In [2]:
from mlrl.experiments.train_maze_agent import (
    create_maze_meta_env, RestrictedActionsMazeState, create_batched_tf_meta_env, 
    create_agent, create_training_run, get_maze_name
)
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from tf_agents.environments.gym_wrapper import GymWrapper
from tf_agents.environments.batched_py_environment import BatchedPyEnvironment
from tf_agents.train.utils import spec_utils

args = {'agent': 'ppo_agent', 'env_batch_size': 2}

env = BatchedPyEnvironment([
    GymWrapper(create_maze_meta_env(RestrictedActionsMazeState, args)) 
    for _ in range(2)
])

env.reset()

observation_tensor_spec, action_tensor_spec, time_step_tensor_spec = (
      spec_utils.get_tensor_specs(env))

pygame 2.1.0 (SDL 2.0.16, Python 3.8.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


ALSA lib confmisc.c:767:(parse_card) cannot find card '0'
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory
ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5220:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2642:(snd_pcm_open_noupdate) Unknown PCM default
ALSA lib confmisc.c:767:(parse_card) cannot find card '0'
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory
ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_concat returned error: N

In [3]:
from tf_agents.agents.ppo.ppo_agent import PPOAgent
from tf_agents.networks.sequential import Sequential
from tf_agents.networks.value_network import ValueNetwork
from tf_agents.networks.actor_distribution_network import ActorDistributionNetwork
from tf_agents.train.utils import train_utils

from mlrl.meta.search_networks import SearchActorNetwork, SearchValueNetwork
from mlrl.meta.search_networks import SearchTransformer, CategoricalNetwork, SearchActorLogitsNetwork


custom_objects = {
    'SearchActorLogitsNetwork': SearchActorLogitsNetwork,
    'SearchTransformer': SearchTransformer
}

with tf.keras.utils.custom_object_scope(custom_objects):
    actor_dist_net = ActorDistributionNetwork(
        observation_tensor_spec, action_tensor_spec,
        preprocessing_layers=SearchActorLogitsNetwork(),
        fc_layer_params=None,
        discrete_projection_net=lambda spec: CategoricalNetwork(spec)
    )

value_net = ValueNetwork(
    observation_tensor_spec,
    preprocessing_layers=SearchTransformer(3, 16, 2),
    preprocessing_combiner=tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0], axis=-2)),
    batch_squash=True,
    fc_layer_params=None
)

train_step = train_utils.create_train_step()

agent = PPOAgent(
    time_step_tensor_spec,
    action_tensor_spec,
    actor_net=actor_dist_net,
    value_net=value_net,
    optimizer=tf.keras.optimizers.Adam(),
    train_step_counter=train_step,
    compute_value_and_advantage_in_train=False,
    update_normalizers_in_train=False,
    normalize_observations=False,
    discount_factor=0.99,
    num_epochs=1,  # deprecated param
)

2022-10-03 07:51:29.534138: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-03 07:51:29.545437: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-03 07:51:29.546179: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:961] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-10-03 07:51:29.547741: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate

In [4]:
from tf_agents.replay_buffers import tf_uniform_replay_buffer

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec=agent.collect_data_spec,
    batch_size=env.batch_size or 1,
    max_length=1000
)


def preprocess_seq(experience, info):
    return agent.preprocess_sequence(experience), info


def dataset_fn():
    ds = replay_buffer.as_dataset(sample_batch_size=8, num_steps=8)
    return ds.map(preprocess_seq)

In [8]:
from tf_agents.train import actor
from tf_agents.train import learner
from tf_agents.metrics import py_metrics

import os

collect_env_step_metric = py_metrics.EnvironmentSteps()
root_dir = './test1'

collect_actor = actor.Actor(
    env,
    agent.collect_policy,
    train_step,
    steps_per_run=8,
    observers=[replay_buffer.add_batch],
    metrics=actor.collect_metrics(buffer_size=10) + [collect_env_step_metric],
    reference_metrics=[collect_env_step_metric],
    summary_dir=os.path.join(root_dir, learner.TRAIN_DIR),
    summary_interval=100)

collect_actor.run()

In [9]:
from tf_agents.train.ppo_learner import PPOLearner

learner = PPOLearner(
    root_dir,
    train_step,
    agent,
    experience_dataset_fn=dataset_fn,
    normalization_dataset_fn=dataset_fn,
    num_samples=1,
    num_epochs=1,
)

In [10]:
for i in range(10):
    print(f'Iteration: {i}')
    collect_actor.run()
    learner.run()

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9


In [11]:
learner.run()

LossInfo(loss=<tf.Tensor: shape=(), dtype=float32, numpy=1.9071126>, extra=PPOLossInfo(policy_gradient_loss=<tf.Tensor: shape=(), dtype=float32, numpy=-0.00576571>, value_estimation_loss=<tf.Tensor: shape=(), dtype=float32, numpy=1.8566148>, l2_regularization_loss=<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, entropy_regularization_loss=<tf.Tensor: shape=(), dtype=float32, numpy=0.0>, kl_penalty_loss=<tf.Tensor: shape=(), dtype=float32, numpy=0.056263443>))

In [12]:
from mlrl.utils.render_utils import create_policy_eval_video, embed_mp4

embed_mp4(create_policy_eval_video(agent.collect_policy, env, max_steps=30))