In [10]:
%matplotlib inline

import sys
import gym
import os.path as osp
import matplotlib.pyplot as plt
import tensorflow as tf
import imageio
from tqdm import tqdm_notebook as tqdm

from tf_agents.agents.sac import sac_agent
from tf_agents.policies import random_tf_policy
from tf_agents.environments import tf_py_environment
from tf_agents.environments import gym_wrapper
from tf_agents.metrics import tf_metrics
from tf_agents.networks import actor_distribution_network
from tf_agents.networks import normal_projection_network
from tf_agents.agents.ddpg import critic_network
from tf_agents.drivers import dynamic_step_driver

import envs
import algos

In [11]:
env = gym.make('MinitaurGoalVelocityEnv-v0')
tf_env = tf_py_environment.TFPyEnvironment(gym_wrapper.GymWrapper(env))

In [3]:
def normal_projection_net(action_spec,
                          init_action_stddev=0.35,
                          init_means_output_factor=0.1):
    del init_action_stddev
    return normal_projection_network.NormalProjectionNetwork(
          action_spec,
          mean_transform=None,
          state_dependent_std=True,
          init_means_output_factor=init_means_output_factor,
          std_transform=sac_agent.std_clip_transform,
          scale_distribution=True)

In [4]:
global_step = tf.compat.v1.train.get_or_create_global_step()
time_step_spec = tf_env.time_step_spec()
observation_spec = time_step_spec.observation
action_spec = tf_env.action_spec()

actor_net = actor_distribution_network.ActorDistributionNetwork(
        observation_spec,
        action_spec,
        fc_layer_params=(256, 256),
        continuous_projection_net=normal_projection_net)

critic_net = critic_network.CriticNetwork(
    (observation_spec, action_spec),
    joint_fc_layer_params=(256, 256))

tf_agent = sac_agent.SacAgent(
    time_step_spec,
    action_spec,
    actor_network=actor_net,
    critic_network=critic_net,
    actor_optimizer=tf.compat.v1.train.AdamOptimizer(
        learning_rate=3e-4),
    critic_optimizer=tf.compat.v1.train.AdamOptimizer(
        learning_rate=3e-4),
    alpha_optimizer=tf.compat.v1.train.AdamOptimizer(
        learning_rate=3e-4),
    target_update_tau=0.005,
    target_update_period=1,
    td_errors_loss_fn=tf.keras.losses.mse,
    gamma=0,
    reward_scale_factor=1.,
    gradient_clipping=1.,
    debug_summaries=False,
    summarize_grads_and_vars=False,
    train_step_counter=global_step)

W1019 14:07:40.541437 140264089372416 module_wrapper.py:137] From /scr1/.virtualenvs/tfagents/lib/python3.6/site-packages/tf_agents/agents/ddpg/critic_network.py:136: The name tf.keras.initializers.RandomUniform is deprecated. Please use tf.compat.v1.keras.initializers.RandomUniform instead.

W1019 14:07:40.546610 140264089372416 deprecation.py:323] From /scr1/.virtualenvs/tfagents/lib/python3.6/site-packages/tf_agents/specs/tensor_spec.py:295: SeedStream.__init__ (from tensorflow_probability.python.util.seed_stream) is deprecated and will be removed after 2019-10-01.
Instructions for updating:
SeedStream has moved to `tfp.util.SeedStream`.
W1019 14:07:41.392197 140264089372416 deprecation.py:323] From /scr1/.virtualenvs/tfagents/lib/python3.6/site-packages/tf_agents/distributions/utils.py:92: AffineScalar.__init__ (from tensorflow_probability.python.bijectors.affine_scalar) is deprecated and will be removed after 2020-01-01.
Instructions for updating:
`AffineScalar` bijector is deprec

In [13]:
global_step.numpy()

0

In [5]:
policy = tf_agent.policy

In [6]:
traj_len = 0
time_step = tf_env.reset()
frames = [env.render('rgb_array')]
pol_state = policy.get_initial_state(1)
while not time_step.is_last():
    action_step = policy.action(time_step, pol_state)
    action, pol_state = action_step.action, action_step.state
    time_step = tf_env.step(action)
    frames.append(env.render('rgb_array'))
    traj_len += 1

In [7]:
traj_len

363

In [11]:
i = 0
path = './videos/sac/{}-steps/episode-{}.mp4'.format(global_step.numpy(), i)
while osp.exists(path):
    i += 1
    path = './videos/sac/{}-steps/episode-{}.mp4'.format(global_step.numpy(), i)

writer = imageio.get_writer(path)

for frame in frames:
    writer.append_data(frame)

writer.close()

