In [1]:
import sklearn
assert sklearn.__version__ >= "0.20"
import PIL
# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

import jupyter_beeper
import time

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# To get smooth animations
import matplotlib.animation as animation
mpl.rc('animation', html='jshtml')
import tf_agents.environments.wrappers

import gym
# gym.envs.registry.all()

from tf_agents.environments.wrappers import ActionRepeat

game_name = "Pong-v4"

PROJECT_ROOT_DIR = "."
CHAPTER_ID = "rl"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
    
    
tf.random.set_seed(42)
np.random.seed(42)

from tf_agents.environments import suite_gym
import ale_py

from functools import partial
from gym.wrappers import TimeLimit

from tf_agents.environments import suite_atari
from tf_agents.environments.atari_preprocessing import AtariPreprocessing
from tf_agents.environments.atari_wrappers import FrameStack4
from tf_agents.environments.wrappers import ActionRepeat
import tf_agents.environments.wrappers
from functools import partial
from gym.wrappers import TimeLimit
from tf_agents.environments import suite_atari
from tf_agents.environments.atari_preprocessing import AtariPreprocessing
from tf_agents.environments.atari_wrappers import FrameStack4
from tf_agents.environments.tf_py_environment import TFPyEnvironment
from tf_agents.networks.q_network import QNetwork
from tf_agents.agents.dqn.dqn_agent import DqnAgent
from tf_agents.metrics import tf_metrics
from tf_agents.eval.metric_utils import log_metrics
from tf_agents.drivers.dynamic_step_driver import DynamicStepDriver
from tf_agents.policies.random_tf_policy import RandomTFPolicy
from tf_agents.trajectories.trajectory import to_transition
from tf_agents.utils.common import function
from tf_agents.utils import common
from tf_agents.policies import policy_saver

import logging


train_metrics = [
    tf_metrics.NumberOfEpisodes(),
    tf_metrics.EnvironmentSteps(),
    tf_metrics.AverageReturnMetric(),
    tf_metrics.AverageEpisodeLengthMetric(),
]

logging.getLogger().setLevel(logging.INFO)
log_metrics(train_metrics)

env = gym.make(game_name)
print(env)

env.seed(42)
env.reset()

# env.step(1) # Fire
repeating_env = ActionRepeat(env, times=4)
repeating_env.unwrapped

for name in dir(tf_agents.environments.wrappers):
    obj = getattr(tf_agents.environments.wrappers, name)
    if hasattr(obj, "__base__") and issubclass(obj, tf_agents.environments.wrappers.PyEnvironmentBaseWrapper):
        print("{:27s} {}".format(name, obj.__doc__.split("\n")[0]))

        
limited_repeating_env = suite_gym.load(
    game_name,
    gym_env_wrappers=[partial(TimeLimit, max_episode_steps=100)],
    env_wrappers=[partial(ActionRepeat, times=4)],
)

max_episode_steps = 27000 # <=> 108k ALE frames since 1 step = 4 frames
environment_name = "PongNoFrameskip-v4"

# class AtariPreprocessingWithSkipStart(AtariPreprocessing):
#     def skip_frames(self, num_skip):
#         for _ in range(num_skip):
#           super().step(0) # NOOP for num_skip steps
#     def reset(self, **kwargs):
#         obs = super().reset(**kwargs)
#         self.skip_frames(40)
#         return obs
#     def step(self, action):
#         lives_before_action = self.ale.lives()
#         obs, rewards, done, info = super().step(action)
#         if self.ale.lives() < lives_before_action and not done:
#             self.skip_frames(40)
#         return obs, rewards, done, info

def plot_observation(obs):
    # Since there are only 3 color channels, you cannot display 4 frames
    # with one primary color per frame. So this code computes the delta between
    # the current frame and the mean of the other frames, and it adds this delta
    # to the red and blue channels to get a pink color for the current frame.
    obs = obs.astype(np.float32)
    img = obs[..., :3]
    current_frame_delta = np.maximum(obs[..., 3] - obs[..., :3].mean(axis=-1), 0.)
    img[..., 0] += current_frame_delta
    img[..., 2] += current_frame_delta
    img = np.clip(img / 150, 0, 1)
    plt.imshow(img)
    plt.axis("off")    

env = suite_atari.load(
    environment_name,
    max_episode_steps=max_episode_steps,
    gym_env_wrappers=[AtariPreprocessing, FrameStack4])

env.seed(42)
env.reset()
# for _ in range(4):
#     time_step = env.step(3) # LEFT
    
repeating_env = ActionRepeat(env, times=4)

limited_repeating_env = suite_gym.load(
    game_name,
    gym_env_wrappers=[partial(TimeLimit, max_episode_steps=100)],
    env_wrappers=[partial(ActionRepeat, times=4)],
)

max_episode_steps = 27000 # <=> 108k ALE frames since 1 step = 4 frames
# environment_name = "PomgNoFrameskip-v4"
# SpaceInvadersNoFrameskip-v4

# class AtariPreprocessingWithSkipStart(AtariPreprocessing):
#     def skip_frames(self, num_skip):
#         for _ in range(num_skip):
#           super().step(0) # NOOP for num_skip steps
#     def reset(self, **kwargs):
#         obs = super().reset(**kwargs)
#         self.skip_frames(40)
#         return obs
#     def step(self, action):
#         lives_before_action = self.ale.lives()
#         obs, rewards, done, info = super().step(action)
#         if self.ale.lives() < lives_before_action and not done:
#             self.skip_frames(40)
#         return obs, rewards, done, info

class ShowProgress:
    def __init__(self, total):
        self.counter = 0
        self.total = total
    def __call__(self, trajectory):
        if not trajectory.is_boundary():
            self.counter += 1
        if self.counter % 100 == 0:
            print("\r{}/{}".format(self.counter, self.total), end="")    
            
def train_agent(n_iterations):
    time_step = None
    policy_state = agent.collect_policy.get_initial_state(tf_env.batch_size)
    iterator = iter(dataset)
    for iteration in range(n_iterations):
        time_step, policy_state = collect_driver.run(time_step, policy_state)
        trajectories, buffer_info = next(iterator)
        train_loss = agent.train(trajectories)
        print("\r{} loss:{:.5f}".format(
            iteration, train_loss.loss.numpy()), end="")
        if iteration % 1000 == 0:
            log_metrics(train_metrics)

def update_scene(num, frames, patch):
    patch.set_data(frames[num])
    return patch,

def plot_animation(frames, repeat=False, interval=40):
    fig = plt.figure()
    patch = plt.imshow(frames[0])
    plt.axis('off')
    anim = animation.FuncAnimation(
        fig, update_scene, fargs=(frames, patch),
        frames=len(frames), repeat=repeat, interval=interval)
    plt.close()
    return anim

def create_gif(frames):
    gif_name = "myAgentPlays_3.gif" 
    image_path = os.path.join("images", "rl", gif_name)
    # frame_images = [PIL.Image.fromarray(frame) for frame in frames[:150]]
    frame_images = [PIL.Image.fromarray(frame) for frame in frames]
    frame_images[0].save(image_path, format='GIF',
                         append_images=frame_images[1:],
                         save_all=True,
                         duration=30,
                         loop=0)

env = suite_atari.load(
    environment_name,
    max_episode_steps=max_episode_steps,
    gym_env_wrappers=[AtariPreprocessing, FrameStack4])


tf_env = TFPyEnvironment(env)

preprocessing_layer = keras.layers.Lambda(
                          lambda obs: tf.cast(obs, np.float32) / 255.)
conv_layer_params=[(32, (8, 8), 4), (64, (4, 4), 2), (64, (3, 3), 1)]
fc_layer_params=[512]

q_net = QNetwork(
    tf_env.observation_spec(),
    tf_env.action_spec(),
    preprocessing_layers=preprocessing_layer,
    conv_layer_params=conv_layer_params,
    fc_layer_params=fc_layer_params)

train_step = tf.Variable(0) # globak_step substitute
update_period = 4 # run a training step every 4 collect steps
optimizer = keras.optimizers.RMSprop(learning_rate=2.5e-4, rho=0.95, momentum=0.0,
                                     epsilon=0.00001, centered=True)
epsilon_fn = keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=1.0, # initial ε
    decay_steps=100000// update_period, # <=> 1,000,000 ALE frames
    end_learning_rate=0.01) # final ε
agent = DqnAgent(tf_env.time_step_spec(),
                 tf_env.action_spec(),
                 q_network=q_net,
                 optimizer=optimizer,
                 target_update_period=2000, # <=> 32,000 ALE frames
                 td_errors_loss_fn=keras.losses.Huber(reduction="none"),
                 gamma=0.99, # discount factor
                 train_step_counter=train_step,
                 epsilon_greedy=lambda: epsilon_fn(train_step))
agent.initialize()


from tf_agents.replay_buffers import tf_uniform_replay_buffer

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec=agent.collect_data_spec,
    batch_size=tf_env.batch_size,
    max_length=100000) # reduce if OOM error

checkpoint_dir = os.path.join(os.getcwd(), 'lastModelCheckpoint')
train_checkpointer = common.Checkpointer(
    ckpt_dir=checkpoint_dir,
    max_to_keep=1,
    agent=agent,
    policy=agent.policy,
    replay_buffer=replay_buffer,
    global_step=train_step
)

train_checkpointer.initialize_or_restore()
global_step = tf.compat.v1.train.get_global_step()

replay_buffer_observer = replay_buffer.add_batch

collect_driver = DynamicStepDriver(
    tf_env,
    agent.collect_policy,
    observers=[replay_buffer_observer] + train_metrics,
    num_steps=update_period) # collect 4 steps for each training iteration

ale_frames = 20000

initial_collect_policy = RandomTFPolicy(tf_env.time_step_spec(),
                                        tf_env.action_spec())
init_driver = DynamicStepDriver(
    tf_env,
    initial_collect_policy,
    observers=[replay_buffer.add_batch, ShowProgress(ale_frames)],
    num_steps=ale_frames) # <=> 80,000 ALE frames

final_time_step, final_policy_state = init_driver.run()

tf.random.set_seed(9) # chosen to show an example of trajectory at the end of an episode

#trajectories, buffer_info = replay_buffer.get_next( # get_next() is deprecated
#    sample_batch_size=2, num_steps=3)

trajectories, buffer_info = next(iter(replay_buffer.as_dataset(
    sample_batch_size=2,
    num_steps=3,
    single_deterministic_pass=False)))

time_steps, action_steps, next_time_steps = to_transition(trajectories)

dataset = replay_buffer.as_dataset(
    sample_batch_size=64,
    num_steps=2,
    num_parallel_calls=3).prefetch(3)

collect_driver.run = function(collect_driver.run)
agent.train = function(agent.train)

start_time = time.time()
train_agent(n_iterations=300000)
print("\nTime Taken: ", time.time() - start_time)
# beep_when_finished()

eval_policy = agent.policy
# eval_policy_dir = os.path.join(os.getcwd(), 'eval_policy')
# policy_dir = os.path.join(os.getcwd(), 'savedPolicy')
# tf_policy_saver = policy_saver.PolicySaver(eval_policy)
# tf_policy_saver.save(policy_dir)
# tf_policy_saver.save(eval_policy_dir)
# train_checkpointer.save(train_step)
# checkpoint_path = os.path.join(os.getcwd(), "savedModel", "cpkt")
# os.makedirs(checkpoint_path, exist_ok=True)
# model_checkpoint = tf.train.Checkpoint(model = agent, step = train_step)
# saved_path = model_checkpoint.save(file_prefix = checkpoint_path)
# print("Model saced in: \n", saved_path)


frames = []
def save_frames(trajectory):
    global frames
    frames.append(tf_env.pyenv.envs[0].render(mode="rgb_array"))

num_frames = 1000
    
watch_driver = DynamicStepDriver(
    tf_env,
    agent.policy,
    observers=[save_frames, ShowProgress(num_frames)],
    num_steps=num_frames)
final_time_step, final_policy_state = watch_driver.run()

create_gif(frames)

anim = plot_animation(frames)

print("\n------------Program Execution Complete-------------\n")

2022-12-14 17:03:20.051620: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-14 17:03:20.222342: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-12-14 17:03:20.945243: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2022-12-14 17:03:20.945358: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer

<TimeLimit<AtariEnv<Pong-v4>>>
ActionClipWrapper           Wraps an environment and clips actions to spec before applying.
ActionDiscretizeWrapper     Wraps an environment with continuous actions and discretizes them.
ActionOffsetWrapper         Offsets actions to be zero-based.
ActionRepeat                Repeates actions over n-steps while acummulating the received reward.
ExtraDisabledActionsWrapper Adds extra unavailable actions.
FixedLength                 Truncates long episodes and pads short episodes to have a fixed length.
FlattenActionWrapper        Flattens the action.
FlattenObservationsWrapper  Wraps an environment and flattens nested multi-dimensional observations.
GoalReplayEnvWrapper        Adds a goal to the observation, used for HER (Hindsight Experience Replay).
HistoryWrapper              Adds observation and action history to the environment's observations.
ObservationFilterWrapper    Filters observations based on an array of indexes.
OneHotActionWrapper         Co

2022-12-14 17:03:26.514578: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8200
2022-12-14 17:03:27.785956: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2822400000 exceeds 10% of free system memory.
INFO:absl:No checkpoint available at /home/jupyter/Reinforcement_Learning_Atari/lastModelCheckpoint


Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))
INFO:absl: 
		 NumberOfEpisodes = 0
		 EnvironmentSteps = 4
		 AverageReturn = 0.0
		 AverageEpisodeLength = 0.0


994 loss:0.01827

INFO:absl: 
		 NumberOfEpisodes = 4
		 EnvironmentSteps = 4004
		 AverageReturn = -17.5
		 AverageEpisodeLength = 868.75


1996 loss:0.00615

INFO:absl: 
		 NumberOfEpisodes = 8
		 EnvironmentSteps = 8004
		 AverageReturn = -19.125
		 AverageEpisodeLength = 875.25


2996 loss:0.00016

INFO:absl: 
		 NumberOfEpisodes = 13
		 EnvironmentSteps = 12004
		 AverageReturn = -20.600000381469727
		 AverageEpisodeLength = 941.7000122070312


3995 loss:0.00035

INFO:absl: 
		 NumberOfEpisodes = 17
		 EnvironmentSteps = 16004
		 AverageReturn = -20.600000381469727
		 AverageEpisodeLength = 964.7999877929688


4992 loss:0.00027

INFO:absl: 
		 NumberOfEpisodes = 21
		 EnvironmentSteps = 20004
		 AverageReturn = -20.200000762939453
		 AverageEpisodeLength = 968.5999755859375


5992 loss:0.00015

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 24004
		 AverageReturn = -20.299999237060547
		 AverageEpisodeLength = 917.5999755859375


6996 loss:0.00152

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 28004
		 AverageReturn = -19.899999618530273
		 AverageEpisodeLength = 977.7000122070312


7995 loss:0.00014

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 32004
		 AverageReturn = -20.100000381469727
		 AverageEpisodeLength = 964.7999877929688


8992 loss:0.00121

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 36004
		 AverageReturn = -20.299999237060547
		 AverageEpisodeLength = 926.4000244140625


9997 loss:0.00926

INFO:absl: 
		 NumberOfEpisodes = 42
		 EnvironmentSteps = 40004
		 AverageReturn = -20.299999237060547
		 AverageEpisodeLength = 937.7000122070312


10991 loss:0.00117

INFO:absl: 
		 NumberOfEpisodes = 46
		 EnvironmentSteps = 44004
		 AverageReturn = -20.299999237060547
		 AverageEpisodeLength = 941.2999877929688


11994 loss:0.00758

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 48004
		 AverageReturn = -20.100000381469727
		 AverageEpisodeLength = 968.5


12991 loss:0.00421

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 52004
		 AverageReturn = -20.5
		 AverageEpisodeLength = 942.7999877929688


13998 loss:0.00202

INFO:absl: 
		 NumberOfEpisodes = 58
		 EnvironmentSteps = 56004
		 AverageReturn = -20.0
		 AverageEpisodeLength = 988.9000244140625


14998 loss:0.00072

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 60004
		 AverageReturn = -19.600000381469727
		 AverageEpisodeLength = 1000.4000244140625


15993 loss:0.00050

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 64004
		 AverageReturn = -19.5
		 AverageEpisodeLength = 973.7000122070312


16991 loss:0.00117

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 68004
		 AverageReturn = -19.5
		 AverageEpisodeLength = 961.0


17996 loss:0.00045

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 72004
		 AverageReturn = -19.399999618530273
		 AverageEpisodeLength = 994.7999877929688


18998 loss:0.00461

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 76004
		 AverageReturn = -19.799999237060547
		 AverageEpisodeLength = 1011.7999877929688


19995 loss:0.00124

INFO:absl: 
		 NumberOfEpisodes = 83
		 EnvironmentSteps = 80004
		 AverageReturn = -20.299999237060547
		 AverageEpisodeLength = 1000.4000244140625


20997 loss:0.00069

INFO:absl: 
		 NumberOfEpisodes = 86
		 EnvironmentSteps = 84004
		 AverageReturn = -20.200000762939453
		 AverageEpisodeLength = 1057.0999755859375


21995 loss:0.00287

INFO:absl: 
		 NumberOfEpisodes = 90
		 EnvironmentSteps = 88004
		 AverageReturn = -20.0
		 AverageEpisodeLength = 1119.5999755859375


22997 loss:0.00216

INFO:absl: 
		 NumberOfEpisodes = 93
		 EnvironmentSteps = 92004
		 AverageReturn = -20.200000762939453
		 AverageEpisodeLength = 1151.699951171875


23999 loss:0.00086

INFO:absl: 
		 NumberOfEpisodes = 97
		 EnvironmentSteps = 96004
		 AverageReturn = -20.399999618530273
		 AverageEpisodeLength = 1090.9000244140625


24994 loss:0.00073

INFO:absl: 
		 NumberOfEpisodes = 100
		 EnvironmentSteps = 100004
		 AverageReturn = -20.0
		 AverageEpisodeLength = 1140.300048828125


25999 loss:0.00092

INFO:absl: 
		 NumberOfEpisodes = 103
		 EnvironmentSteps = 104004
		 AverageReturn = -20.100000381469727
		 AverageEpisodeLength = 1224.9000244140625


26998 loss:0.00518

INFO:absl: 
		 NumberOfEpisodes = 105
		 EnvironmentSteps = 108004
		 AverageReturn = -20.0
		 AverageEpisodeLength = 1371.5


27998 loss:0.00241

INFO:absl: 
		 NumberOfEpisodes = 108
		 EnvironmentSteps = 112004
		 AverageReturn = -19.299999237060547
		 AverageEpisodeLength = 1534.300048828125


28991 loss:0.00118

INFO:absl: 
		 NumberOfEpisodes = 109
		 EnvironmentSteps = 116004
		 AverageReturn = -19.0
		 AverageEpisodeLength = 1627.5


29995 loss:0.00039

INFO:absl: 
		 NumberOfEpisodes = 111
		 EnvironmentSteps = 120004
		 AverageReturn = -18.399999618530273
		 AverageEpisodeLength = 1773.300048828125


30992 loss:0.00102

INFO:absl: 
		 NumberOfEpisodes = 112
		 EnvironmentSteps = 124004
		 AverageReturn = -18.100000381469727
		 AverageEpisodeLength = 1892.699951171875


31995 loss:0.00258

INFO:absl: 
		 NumberOfEpisodes = 114
		 EnvironmentSteps = 128004
		 AverageReturn = -17.899999618530273
		 AverageEpisodeLength = 2133.800048828125


32996 loss:0.00065

INFO:absl: 
		 NumberOfEpisodes = 115
		 EnvironmentSteps = 132004
		 AverageReturn = -17.5
		 AverageEpisodeLength = 2260.800048828125


33996 loss:0.00166

INFO:absl: 
		 NumberOfEpisodes = 117
		 EnvironmentSteps = 136004
		 AverageReturn = -16.899999618530273
		 AverageEpisodeLength = 2572.89990234375


34994 loss:0.00151

INFO:absl: 
		 NumberOfEpisodes = 118
		 EnvironmentSteps = 140004
		 AverageReturn = -16.799999237060547
		 AverageEpisodeLength = 2738.10009765625


35998 loss:0.00245

INFO:absl: 
		 NumberOfEpisodes = 119
		 EnvironmentSteps = 144004
		 AverageReturn = -16.399999618530273
		 AverageEpisodeLength = 2873.0


36997 loss:0.00119

INFO:absl: 
		 NumberOfEpisodes = 120
		 EnvironmentSteps = 148004
		 AverageReturn = -16.700000762939453
		 AverageEpisodeLength = 2863.800048828125


37991 loss:0.00057

INFO:absl: 
		 NumberOfEpisodes = 122
		 EnvironmentSteps = 152004
		 AverageReturn = -16.799999237060547
		 AverageEpisodeLength = 2961.10009765625


38992 loss:0.00157

INFO:absl: 
		 NumberOfEpisodes = 123
		 EnvironmentSteps = 156004
		 AverageReturn = -16.600000381469727
		 AverageEpisodeLength = 3014.300048828125


39991 loss:0.00255

INFO:absl: 
		 NumberOfEpisodes = 124
		 EnvironmentSteps = 160004
		 AverageReturn = -16.5
		 AverageEpisodeLength = 3044.60009765625


40992 loss:0.00172

INFO:absl: 
		 NumberOfEpisodes = 125
		 EnvironmentSteps = 164004
		 AverageReturn = -16.399999618530273
		 AverageEpisodeLength = 3117.300048828125


41993 loss:0.00242

INFO:absl: 
		 NumberOfEpisodes = 126
		 EnvironmentSteps = 168004
		 AverageReturn = -16.899999618530273
		 AverageEpisodeLength = 3095.199951171875


42992 loss:0.00104

INFO:absl: 
		 NumberOfEpisodes = 127
		 EnvironmentSteps = 172004
		 AverageReturn = -16.200000762939453
		 AverageEpisodeLength = 3240.800048828125


43994 loss:0.00266

INFO:absl: 
		 NumberOfEpisodes = 128
		 EnvironmentSteps = 176004
		 AverageReturn = -15.699999809265137
		 AverageEpisodeLength = 3292.800048828125


44994 loss:0.00222

INFO:absl: 
		 NumberOfEpisodes = 129
		 EnvironmentSteps = 180004
		 AverageReturn = -15.899999618530273
		 AverageEpisodeLength = 3360.5


45997 loss:0.00066

INFO:absl: 
		 NumberOfEpisodes = 130
		 EnvironmentSteps = 184004
		 AverageReturn = -15.0
		 AverageEpisodeLength = 3563.0


46997 loss:0.00249

INFO:absl: 
		 NumberOfEpisodes = 131
		 EnvironmentSteps = 188004
		 AverageReturn = -14.5
		 AverageEpisodeLength = 3648.10009765625


47994 loss:0.00590

INFO:absl: 
		 NumberOfEpisodes = 132
		 EnvironmentSteps = 192004
		 AverageReturn = -13.199999809265137
		 AverageEpisodeLength = 3947.10009765625


48992 loss:0.00164

INFO:absl: 
		 NumberOfEpisodes = 133
		 EnvironmentSteps = 196004
		 AverageReturn = -12.399999618530273
		 AverageEpisodeLength = 4065.10009765625


49994 loss:0.00099

INFO:absl: 
		 NumberOfEpisodes = 134
		 EnvironmentSteps = 200004
		 AverageReturn = -11.800000190734863
		 AverageEpisodeLength = 4259.5


50996 loss:0.00109

INFO:absl: 
		 NumberOfEpisodes = 135
		 EnvironmentSteps = 204004
		 AverageReturn = -11.300000190734863
		 AverageEpisodeLength = 4287.5


51996 loss:0.00116

INFO:absl: 
		 NumberOfEpisodes = 136
		 EnvironmentSteps = 208004
		 AverageReturn = -10.600000381469727
		 AverageEpisodeLength = 4318.7998046875


52998 loss:0.00129

INFO:absl: 
		 NumberOfEpisodes = 136
		 EnvironmentSteps = 212004
		 AverageReturn = -10.600000381469727
		 AverageEpisodeLength = 4318.7998046875


53994 loss:0.00146

INFO:absl: 
		 NumberOfEpisodes = 137
		 EnvironmentSteps = 216004
		 AverageReturn = -10.199999809265137
		 AverageEpisodeLength = 4439.60009765625


54994 loss:0.00109

INFO:absl: 
		 NumberOfEpisodes = 138
		 EnvironmentSteps = 220004
		 AverageReturn = -9.800000190734863
		 AverageEpisodeLength = 4488.7998046875


55997 loss:0.00176

INFO:absl: 
		 NumberOfEpisodes = 139
		 EnvironmentSteps = 224004
		 AverageReturn = -9.5
		 AverageEpisodeLength = 4454.39990234375


56991 loss:0.00120

INFO:absl: 
		 NumberOfEpisodes = 140
		 EnvironmentSteps = 228004
		 AverageReturn = -9.800000190734863
		 AverageEpisodeLength = 4379.2998046875


57996 loss:0.00140

INFO:absl: 
		 NumberOfEpisodes = 141
		 EnvironmentSteps = 232004
		 AverageReturn = -9.600000381469727
		 AverageEpisodeLength = 4425.2998046875


58998 loss:0.00128

INFO:absl: 
		 NumberOfEpisodes = 142
		 EnvironmentSteps = 236004
		 AverageReturn = -10.199999809265137
		 AverageEpisodeLength = 4269.5


59995 loss:0.00047

INFO:absl: 
		 NumberOfEpisodes = 143
		 EnvironmentSteps = 240004
		 AverageReturn = -10.5
		 AverageEpisodeLength = 4150.7998046875


60998 loss:0.00093

INFO:absl: 
		 NumberOfEpisodes = 144
		 EnvironmentSteps = 244004
		 AverageReturn = -10.199999809265137
		 AverageEpisodeLength = 4104.89990234375


61996 loss:0.00128

INFO:absl: 
		 NumberOfEpisodes = 145
		 EnvironmentSteps = 248004
		 AverageReturn = -10.5
		 AverageEpisodeLength = 4115.2001953125


62995 loss:0.00152

INFO:absl: 
		 NumberOfEpisodes = 146
		 EnvironmentSteps = 252004
		 AverageReturn = -10.600000381469727
		 AverageEpisodeLength = 4093.39990234375


63992 loss:0.00063

INFO:absl: 
		 NumberOfEpisodes = 148
		 EnvironmentSteps = 256004
		 AverageReturn = -11.300000190734863
		 AverageEpisodeLength = 3833.5


64994 loss:0.00101

INFO:absl: 
		 NumberOfEpisodes = 149
		 EnvironmentSteps = 260004
		 AverageReturn = -11.0
		 AverageEpisodeLength = 3840.60009765625


65997 loss:0.00105

INFO:absl: 
		 NumberOfEpisodes = 150
		 EnvironmentSteps = 264004
		 AverageReturn = -10.399999618530273
		 AverageEpisodeLength = 3840.89990234375


66993 loss:0.00242

INFO:absl: 
		 NumberOfEpisodes = 151
		 EnvironmentSteps = 268004
		 AverageReturn = -10.800000190734863
		 AverageEpisodeLength = 3704.60009765625


67993 loss:0.00158

INFO:absl: 
		 NumberOfEpisodes = 152
		 EnvironmentSteps = 272004
		 AverageReturn = -10.899999618530273
		 AverageEpisodeLength = 3697.60009765625


68997 loss:0.00172

INFO:absl: 
		 NumberOfEpisodes = 153
		 EnvironmentSteps = 276004
		 AverageReturn = -10.800000190734863
		 AverageEpisodeLength = 3722.60009765625


69992 loss:0.00065

INFO:absl: 
		 NumberOfEpisodes = 155
		 EnvironmentSteps = 280004
		 AverageReturn = -11.300000190734863
		 AverageEpisodeLength = 3498.800048828125


70997 loss:0.00104

INFO:absl: 
		 NumberOfEpisodes = 156
		 EnvironmentSteps = 284004
		 AverageReturn = -11.5
		 AverageEpisodeLength = 3366.699951171875


71996 loss:0.00103

INFO:absl: 
		 NumberOfEpisodes = 158
		 EnvironmentSteps = 288004
		 AverageReturn = -12.199999809265137
		 AverageEpisodeLength = 3147.699951171875


72997 loss:0.00059

INFO:absl: 
		 NumberOfEpisodes = 159
		 EnvironmentSteps = 292004
		 AverageReturn = -12.600000381469727
		 AverageEpisodeLength = 3060.60009765625


73999 loss:0.00348

INFO:absl: 
		 NumberOfEpisodes = 160
		 EnvironmentSteps = 296004
		 AverageReturn = -12.800000190734863
		 AverageEpisodeLength = 2994.300048828125


74993 loss:0.00355

INFO:absl: 
		 NumberOfEpisodes = 161
		 EnvironmentSteps = 300004
		 AverageReturn = -12.300000190734863
		 AverageEpisodeLength = 3023.800048828125


75994 loss:0.00162

INFO:absl: 
		 NumberOfEpisodes = 163
		 EnvironmentSteps = 304004
		 AverageReturn = -11.199999809265137
		 AverageEpisodeLength = 3037.300048828125


76991 loss:0.00146

INFO:absl: 
		 NumberOfEpisodes = 164
		 EnvironmentSteps = 308004
		 AverageReturn = -10.300000190734863
		 AverageEpisodeLength = 3044.39990234375


77999 loss:0.00264

INFO:absl: 
		 NumberOfEpisodes = 165
		 EnvironmentSteps = 312004
		 AverageReturn = -10.100000381469727
		 AverageEpisodeLength = 3068.800048828125


78999 loss:0.00089

INFO:absl: 
		 NumberOfEpisodes = 166
		 EnvironmentSteps = 316004
		 AverageReturn = -10.300000190734863
		 AverageEpisodeLength = 3093.10009765625


79992 loss:0.00210

INFO:absl: 
		 NumberOfEpisodes = 167
		 EnvironmentSteps = 320004
		 AverageReturn = -9.300000190734863
		 AverageEpisodeLength = 3260.89990234375


80995 loss:0.00111

INFO:absl: 
		 NumberOfEpisodes = 169
		 EnvironmentSteps = 324004
		 AverageReturn = -8.399999618530273
		 AverageEpisodeLength = 3393.300048828125


81993 loss:0.00143

INFO:absl: 
		 NumberOfEpisodes = 170
		 EnvironmentSteps = 328004
		 AverageReturn = -8.5
		 AverageEpisodeLength = 3370.699951171875


82995 loss:0.00090

INFO:absl: 
		 NumberOfEpisodes = 171
		 EnvironmentSteps = 332004
		 AverageReturn = -9.199999809265137
		 AverageEpisodeLength = 3300.89990234375


83991 loss:0.00122

INFO:absl: 
		 NumberOfEpisodes = 172
		 EnvironmentSteps = 336004
		 AverageReturn = -9.100000381469727
		 AverageEpisodeLength = 3376.300048828125


84992 loss:0.00252

INFO:absl: 
		 NumberOfEpisodes = 173
		 EnvironmentSteps = 340004
		 AverageReturn = -9.199999809265137
		 AverageEpisodeLength = 3402.5


85999 loss:0.00103

INFO:absl: 
		 NumberOfEpisodes = 174
		 EnvironmentSteps = 344004
		 AverageReturn = -9.399999618530273
		 AverageEpisodeLength = 3467.60009765625


86995 loss:0.00173

INFO:absl: 
		 NumberOfEpisodes = 175
		 EnvironmentSteps = 348004
		 AverageReturn = -9.399999618530273
		 AverageEpisodeLength = 3522.699951171875


87993 loss:0.00158

INFO:absl: 
		 NumberOfEpisodes = 176
		 EnvironmentSteps = 352004
		 AverageReturn = -8.600000381469727
		 AverageEpisodeLength = 3673.89990234375


88999 loss:0.00203

INFO:absl: 
		 NumberOfEpisodes = 177
		 EnvironmentSteps = 356004
		 AverageReturn = -9.199999809265137
		 AverageEpisodeLength = 3698.0


89997 loss:0.00288

INFO:absl: 
		 NumberOfEpisodes = 178
		 EnvironmentSteps = 360004
		 AverageReturn = -9.0
		 AverageEpisodeLength = 3836.0


90995 loss:0.00102

INFO:absl: 
		 NumberOfEpisodes = 178
		 EnvironmentSteps = 364004
		 AverageReturn = -9.0
		 AverageEpisodeLength = 3836.0


91991 loss:0.00148

INFO:absl: 
		 NumberOfEpisodes = 179
		 EnvironmentSteps = 368004
		 AverageReturn = -7.400000095367432
		 AverageEpisodeLength = 4038.300048828125


92995 loss:0.00184

INFO:absl: 
		 NumberOfEpisodes = 180
		 EnvironmentSteps = 372004
		 AverageReturn = -6.800000190734863
		 AverageEpisodeLength = 4189.89990234375


93991 loss:0.00147

INFO:absl: 
		 NumberOfEpisodes = 181
		 EnvironmentSteps = 376004
		 AverageReturn = -6.699999809265137
		 AverageEpisodeLength = 4284.7998046875


94992 loss:0.00195

INFO:absl: 
		 NumberOfEpisodes = 182
		 EnvironmentSteps = 380004
		 AverageReturn = -6.300000190734863
		 AverageEpisodeLength = 4386.39990234375


95991 loss:0.00091

INFO:absl: 
		 NumberOfEpisodes = 183
		 EnvironmentSteps = 384004
		 AverageReturn = -6.5
		 AverageEpisodeLength = 4440.2998046875


96991 loss:0.00137

INFO:absl: 
		 NumberOfEpisodes = 184
		 EnvironmentSteps = 388004
		 AverageReturn = -6.0
		 AverageEpisodeLength = 4512.39990234375


97991 loss:0.00055

INFO:absl: 
		 NumberOfEpisodes = 185
		 EnvironmentSteps = 392004
		 AverageReturn = -6.199999809265137
		 AverageEpisodeLength = 4503.7001953125


98992 loss:0.00269

INFO:absl: 
		 NumberOfEpisodes = 186
		 EnvironmentSteps = 396004
		 AverageReturn = -5.800000190734863
		 AverageEpisodeLength = 4600.0


99991 loss:0.00122

INFO:absl: 
		 NumberOfEpisodes = 187
		 EnvironmentSteps = 400004
		 AverageReturn = -5.5
		 AverageEpisodeLength = 4551.10009765625


100996 loss:0.00210

INFO:absl: 
		 NumberOfEpisodes = 187
		 EnvironmentSteps = 404004
		 AverageReturn = -5.5
		 AverageEpisodeLength = 4551.10009765625


101994 loss:0.00093

INFO:absl: 
		 NumberOfEpisodes = 188
		 EnvironmentSteps = 408004
		 AverageReturn = -5.0
		 AverageEpisodeLength = 4511.5


102996 loss:0.00083

INFO:absl: 
		 NumberOfEpisodes = 189
		 EnvironmentSteps = 412004
		 AverageReturn = -4.800000190734863
		 AverageEpisodeLength = 4559.10009765625


103994 loss:0.00091

INFO:absl: 
		 NumberOfEpisodes = 190
		 EnvironmentSteps = 416004
		 AverageReturn = -4.800000190734863
		 AverageEpisodeLength = 4655.39990234375


104998 loss:0.00161

INFO:absl: 
		 NumberOfEpisodes = 190
		 EnvironmentSteps = 420004
		 AverageReturn = -4.800000190734863
		 AverageEpisodeLength = 4655.39990234375


105992 loss:0.00202

INFO:absl: 
		 NumberOfEpisodes = 191
		 EnvironmentSteps = 424004
		 AverageReturn = -3.0999999046325684
		 AverageEpisodeLength = 4875.89990234375


106993 loss:0.00125

INFO:absl: 
		 NumberOfEpisodes = 192
		 EnvironmentSteps = 428004
		 AverageReturn = -2.5999999046325684
		 AverageEpisodeLength = 4915.60009765625


107998 loss:0.00212

INFO:absl: 
		 NumberOfEpisodes = 192
		 EnvironmentSteps = 432004
		 AverageReturn = -2.5999999046325684
		 AverageEpisodeLength = 4915.60009765625


108999 loss:0.00126

INFO:absl: 
		 NumberOfEpisodes = 193
		 EnvironmentSteps = 436004
		 AverageReturn = -1.399999976158142
		 AverageEpisodeLength = 4960.89990234375


109991 loss:0.00253

INFO:absl: 
		 NumberOfEpisodes = 194
		 EnvironmentSteps = 440004
		 AverageReturn = -0.6000000238418579
		 AverageEpisodeLength = 4956.10009765625


110993 loss:0.00209

INFO:absl: 
		 NumberOfEpisodes = 195
		 EnvironmentSteps = 444004
		 AverageReturn = 1.2000000476837158
		 AverageEpisodeLength = 5039.2001953125


111997 loss:0.00129

INFO:absl: 
		 NumberOfEpisodes = 196
		 EnvironmentSteps = 448004
		 AverageReturn = 2.700000047683716
		 AverageEpisodeLength = 4961.5


112999 loss:0.00248

INFO:absl: 
		 NumberOfEpisodes = 197
		 EnvironmentSteps = 452004
		 AverageReturn = 3.700000047683716
		 AverageEpisodeLength = 5003.39990234375


113992 loss:0.00420

INFO:absl: 
		 NumberOfEpisodes = 198
		 EnvironmentSteps = 456004
		 AverageReturn = 3.299999952316284
		 AverageEpisodeLength = 5113.2998046875


114998 loss:0.00165

INFO:absl: 
		 NumberOfEpisodes = 198
		 EnvironmentSteps = 460004
		 AverageReturn = 3.299999952316284
		 AverageEpisodeLength = 5113.2998046875


115993 loss:0.00546

INFO:absl: 
		 NumberOfEpisodes = 199
		 EnvironmentSteps = 464004
		 AverageReturn = 2.5
		 AverageEpisodeLength = 5114.10009765625


116995 loss:0.00249

INFO:absl: 
		 NumberOfEpisodes = 200
		 EnvironmentSteps = 468004
		 AverageReturn = 3.200000047683716
		 AverageEpisodeLength = 5127.10009765625


117993 loss:0.00283

INFO:absl: 
		 NumberOfEpisodes = 201
		 EnvironmentSteps = 472004
		 AverageReturn = 4.400000095367432
		 AverageEpisodeLength = 4960.0


118997 loss:0.00114

INFO:absl: 
		 NumberOfEpisodes = 202
		 EnvironmentSteps = 476004
		 AverageReturn = 4.699999809265137
		 AverageEpisodeLength = 4877.2998046875


119994 loss:0.00127

INFO:absl: 
		 NumberOfEpisodes = 203
		 EnvironmentSteps = 480004
		 AverageReturn = 5.900000095367432
		 AverageEpisodeLength = 4720.7001953125


120996 loss:0.00174

INFO:absl: 
		 NumberOfEpisodes = 203
		 EnvironmentSteps = 484004
		 AverageReturn = 5.900000095367432
		 AverageEpisodeLength = 4720.7001953125


121996 loss:0.00042

INFO:absl: 
		 NumberOfEpisodes = 204
		 EnvironmentSteps = 488004
		 AverageReturn = 5.699999809265137
		 AverageEpisodeLength = 4735.2001953125


122991 loss:0.00073

INFO:absl: 
		 NumberOfEpisodes = 205
		 EnvironmentSteps = 492004
		 AverageReturn = 5.599999904632568
		 AverageEpisodeLength = 4829.89990234375


123991 loss:0.00061

INFO:absl: 
		 NumberOfEpisodes = 206
		 EnvironmentSteps = 496004
		 AverageReturn = 4.0
		 AverageEpisodeLength = 4958.89990234375


124992 loss:0.00243

INFO:absl: 
		 NumberOfEpisodes = 207
		 EnvironmentSteps = 500004
		 AverageReturn = 3.5999999046325684
		 AverageEpisodeLength = 5010.0


125997 loss:0.00072

INFO:absl: 
		 NumberOfEpisodes = 207
		 EnvironmentSteps = 504004
		 AverageReturn = 3.5999999046325684
		 AverageEpisodeLength = 5010.0


126991 loss:0.00569

INFO:absl: 
		 NumberOfEpisodes = 208
		 EnvironmentSteps = 508004
		 AverageReturn = 4.599999904632568
		 AverageEpisodeLength = 4889.5


127991 loss:0.00079

INFO:absl: 
		 NumberOfEpisodes = 209
		 EnvironmentSteps = 512004
		 AverageReturn = 5.400000095367432
		 AverageEpisodeLength = 4800.0


128996 loss:0.00150

INFO:absl: 
		 NumberOfEpisodes = 211
		 EnvironmentSteps = 516004
		 AverageReturn = 6.400000095367432
		 AverageEpisodeLength = 4510.39990234375


129993 loss:0.00139

INFO:absl: 
		 NumberOfEpisodes = 211
		 EnvironmentSteps = 520004
		 AverageReturn = 6.400000095367432
		 AverageEpisodeLength = 4510.39990234375


130998 loss:0.00373

INFO:absl: 
		 NumberOfEpisodes = 212
		 EnvironmentSteps = 524004
		 AverageReturn = 6.199999809265137
		 AverageEpisodeLength = 4525.60009765625


131992 loss:0.00367

INFO:absl: 
		 NumberOfEpisodes = 213
		 EnvironmentSteps = 528004
		 AverageReturn = 5.199999809265137
		 AverageEpisodeLength = 4636.60009765625


132999 loss:0.00136

INFO:absl: 
		 NumberOfEpisodes = 214
		 EnvironmentSteps = 532004
		 AverageReturn = 4.900000095367432
		 AverageEpisodeLength = 4596.60009765625


133992 loss:0.00156

INFO:absl: 
		 NumberOfEpisodes = 215
		 EnvironmentSteps = 536004
		 AverageReturn = 5.300000190734863
		 AverageEpisodeLength = 4453.2001953125


134991 loss:0.00151

INFO:absl: 
		 NumberOfEpisodes = 216
		 EnvironmentSteps = 540004
		 AverageReturn = 6.599999904632568
		 AverageEpisodeLength = 4314.2001953125


135997 loss:0.00088

INFO:absl: 
		 NumberOfEpisodes = 217
		 EnvironmentSteps = 544004
		 AverageReturn = 6.699999809265137
		 AverageEpisodeLength = 4249.10009765625


136996 loss:0.00135

INFO:absl: 
		 NumberOfEpisodes = 218
		 EnvironmentSteps = 548004
		 AverageReturn = 6.599999904632568
		 AverageEpisodeLength = 4239.2998046875


137998 loss:0.00273

INFO:absl: 
		 NumberOfEpisodes = 219
		 EnvironmentSteps = 552004
		 AverageReturn = 7.300000190734863
		 AverageEpisodeLength = 4185.10009765625


138995 loss:0.00123

INFO:absl: 
		 NumberOfEpisodes = 220
		 EnvironmentSteps = 556004
		 AverageReturn = 6.300000190734863
		 AverageEpisodeLength = 4336.39990234375


139998 loss:0.00100

INFO:absl: 
		 NumberOfEpisodes = 220
		 EnvironmentSteps = 560004
		 AverageReturn = 6.300000190734863
		 AverageEpisodeLength = 4336.39990234375


140995 loss:0.00181

INFO:absl: 
		 NumberOfEpisodes = 221
		 EnvironmentSteps = 564004
		 AverageReturn = 5.400000095367432
		 AverageEpisodeLength = 4456.89990234375


141999 loss:0.00384

INFO:absl: 
		 NumberOfEpisodes = 222
		 EnvironmentSteps = 568004
		 AverageReturn = 5.5
		 AverageEpisodeLength = 4374.5


142992 loss:0.00102

INFO:absl: 
		 NumberOfEpisodes = 223
		 EnvironmentSteps = 572004
		 AverageReturn = 6.199999809265137
		 AverageEpisodeLength = 4346.0


143995 loss:0.00120

INFO:absl: 
		 NumberOfEpisodes = 224
		 EnvironmentSteps = 576004
		 AverageReturn = 6.599999904632568
		 AverageEpisodeLength = 4370.89990234375


144992 loss:0.00250

INFO:absl: 
		 NumberOfEpisodes = 225
		 EnvironmentSteps = 580004
		 AverageReturn = 6.5
		 AverageEpisodeLength = 4456.60009765625


145992 loss:0.00101

INFO:absl: 
		 NumberOfEpisodes = 226
		 EnvironmentSteps = 584004
		 AverageReturn = 6.699999809265137
		 AverageEpisodeLength = 4509.89990234375


146997 loss:0.00118

INFO:absl: 
		 NumberOfEpisodes = 227
		 EnvironmentSteps = 588004
		 AverageReturn = 7.199999809265137
		 AverageEpisodeLength = 4540.2001953125


147993 loss:0.00075

INFO:absl: 
		 NumberOfEpisodes = 227
		 EnvironmentSteps = 592004
		 AverageReturn = 7.199999809265137
		 AverageEpisodeLength = 4540.2001953125


148999 loss:0.00125

INFO:absl: 
		 NumberOfEpisodes = 228
		 EnvironmentSteps = 596004
		 AverageReturn = 7.300000190734863
		 AverageEpisodeLength = 4548.60009765625


149998 loss:0.00098

INFO:absl: 
		 NumberOfEpisodes = 229
		 EnvironmentSteps = 600004
		 AverageReturn = 7.199999809265137
		 AverageEpisodeLength = 4550.7998046875


150995 loss:0.00124

INFO:absl: 
		 NumberOfEpisodes = 230
		 EnvironmentSteps = 604004
		 AverageReturn = 8.0
		 AverageEpisodeLength = 4460.60009765625


151999 loss:0.00092

INFO:absl: 
		 NumberOfEpisodes = 231
		 EnvironmentSteps = 608004
		 AverageReturn = 8.899999618530273
		 AverageEpisodeLength = 4383.7998046875


152994 loss:0.00110

INFO:absl: 
		 NumberOfEpisodes = 232
		 EnvironmentSteps = 612004
		 AverageReturn = 8.899999618530273
		 AverageEpisodeLength = 4406.60009765625


153993 loss:0.00246

INFO:absl: 
		 NumberOfEpisodes = 233
		 EnvironmentSteps = 616004
		 AverageReturn = 8.699999809265137
		 AverageEpisodeLength = 4345.60009765625


154999 loss:0.00165

INFO:absl: 
		 NumberOfEpisodes = 234
		 EnvironmentSteps = 620004
		 AverageReturn = 9.0
		 AverageEpisodeLength = 4318.7998046875


155999 loss:0.00170

INFO:absl: 
		 NumberOfEpisodes = 235
		 EnvironmentSteps = 624004
		 AverageReturn = 8.899999618530273
		 AverageEpisodeLength = 4293.60009765625


156998 loss:0.00116

INFO:absl: 
		 NumberOfEpisodes = 236
		 EnvironmentSteps = 628004
		 AverageReturn = 9.199999809265137
		 AverageEpisodeLength = 4243.89990234375


157997 loss:0.00117

INFO:absl: 
		 NumberOfEpisodes = 237
		 EnvironmentSteps = 632004
		 AverageReturn = 10.300000190734863
		 AverageEpisodeLength = 4087.300048828125


158995 loss:0.00104

INFO:absl: 
		 NumberOfEpisodes = 238
		 EnvironmentSteps = 636004
		 AverageReturn = 10.300000190734863
		 AverageEpisodeLength = 4081.800048828125


159995 loss:0.00104

INFO:absl: 
		 NumberOfEpisodes = 239
		 EnvironmentSteps = 640004
		 AverageReturn = 10.100000381469727
		 AverageEpisodeLength = 4021.0


160993 loss:0.00234

INFO:absl: 
		 NumberOfEpisodes = 240
		 EnvironmentSteps = 644004
		 AverageReturn = 9.300000190734863
		 AverageEpisodeLength = 4086.199951171875


161999 loss:0.00187

INFO:absl: 
		 NumberOfEpisodes = 241
		 EnvironmentSteps = 648004
		 AverageReturn = 8.699999809265137
		 AverageEpisodeLength = 4070.60009765625


162995 loss:0.00154

INFO:absl: 
		 NumberOfEpisodes = 242
		 EnvironmentSteps = 652004
		 AverageReturn = 8.899999618530273
		 AverageEpisodeLength = 4014.39990234375


163992 loss:0.00075

INFO:absl: 
		 NumberOfEpisodes = 243
		 EnvironmentSteps = 656004
		 AverageReturn = 8.199999809265137
		 AverageEpisodeLength = 4016.39990234375


164995 loss:0.00125

INFO:absl: 
		 NumberOfEpisodes = 244
		 EnvironmentSteps = 660004
		 AverageReturn = 8.199999809265137
		 AverageEpisodeLength = 3927.800048828125


165999 loss:0.00121

INFO:absl: 
		 NumberOfEpisodes = 246
		 EnvironmentSteps = 664004
		 AverageReturn = 7.699999809265137
		 AverageEpisodeLength = 3830.800048828125


166997 loss:0.00152

INFO:absl: 
		 NumberOfEpisodes = 247
		 EnvironmentSteps = 668004
		 AverageReturn = 6.699999809265137
		 AverageEpisodeLength = 3873.199951171875


167994 loss:0.00089

INFO:absl: 
		 NumberOfEpisodes = 248
		 EnvironmentSteps = 672004
		 AverageReturn = 7.599999904632568
		 AverageEpisodeLength = 3728.800048828125


168999 loss:0.00073

INFO:absl: 
		 NumberOfEpisodes = 249
		 EnvironmentSteps = 676004
		 AverageReturn = 8.399999618530273
		 AverageEpisodeLength = 3724.5


169995 loss:0.00148

INFO:absl: 
		 NumberOfEpisodes = 250
		 EnvironmentSteps = 680004
		 AverageReturn = 9.699999809265137
		 AverageEpisodeLength = 3535.89990234375


170993 loss:0.00127

INFO:absl: 
		 NumberOfEpisodes = 252
		 EnvironmentSteps = 684004
		 AverageReturn = 10.600000381469727
		 AverageEpisodeLength = 3460.10009765625


171991 loss:0.00109

INFO:absl: 
		 NumberOfEpisodes = 253
		 EnvironmentSteps = 688004
		 AverageReturn = 11.699999809265137
		 AverageEpisodeLength = 3403.5


172994 loss:0.00226

INFO:absl: 
		 NumberOfEpisodes = 254
		 EnvironmentSteps = 692004
		 AverageReturn = 12.300000190734863
		 AverageEpisodeLength = 3342.699951171875


173996 loss:0.00099

INFO:absl: 
		 NumberOfEpisodes = 255
		 EnvironmentSteps = 696004
		 AverageReturn = 13.399999618530273
		 AverageEpisodeLength = 3259.39990234375


174993 loss:0.00151

INFO:absl: 
		 NumberOfEpisodes = 257
		 EnvironmentSteps = 700004
		 AverageReturn = 14.0
		 AverageEpisodeLength = 3171.39990234375


175998 loss:0.00166

INFO:absl: 
		 NumberOfEpisodes = 258
		 EnvironmentSteps = 704004
		 AverageReturn = 13.800000190734863
		 AverageEpisodeLength = 3134.199951171875


176994 loss:0.00103

INFO:absl: 
		 NumberOfEpisodes = 259
		 EnvironmentSteps = 708004
		 AverageReturn = 13.800000190734863
		 AverageEpisodeLength = 3066.39990234375


177995 loss:0.00083

INFO:absl: 
		 NumberOfEpisodes = 261
		 EnvironmentSteps = 712004
		 AverageReturn = 13.600000381469727
		 AverageEpisodeLength = 3109.10009765625


178993 loss:0.00283

INFO:absl: 
		 NumberOfEpisodes = 262
		 EnvironmentSteps = 716004
		 AverageReturn = 13.699999809265137
		 AverageEpisodeLength = 3091.10009765625


179998 loss:0.00115

INFO:absl: 
		 NumberOfEpisodes = 263
		 EnvironmentSteps = 720004
		 AverageReturn = 13.699999809265137
		 AverageEpisodeLength = 3056.699951171875


180992 loss:0.00345

INFO:absl: 
		 NumberOfEpisodes = 265
		 EnvironmentSteps = 724004
		 AverageReturn = 13.300000190734863
		 AverageEpisodeLength = 3062.699951171875


181999 loss:0.00145

INFO:absl: 
		 NumberOfEpisodes = 266
		 EnvironmentSteps = 728004
		 AverageReturn = 14.199999809265137
		 AverageEpisodeLength = 2953.39990234375


182991 loss:0.00098

INFO:absl: 
		 NumberOfEpisodes = 268
		 EnvironmentSteps = 732004
		 AverageReturn = 14.699999809265137
		 AverageEpisodeLength = 2910.39990234375


183999 loss:0.00070

INFO:absl: 
		 NumberOfEpisodes = 270
		 EnvironmentSteps = 736004
		 AverageReturn = 15.699999809265137
		 AverageEpisodeLength = 2701.39990234375


184999 loss:0.00201

INFO:absl: 
		 NumberOfEpisodes = 271
		 EnvironmentSteps = 740004
		 AverageReturn = 15.899999618530273
		 AverageEpisodeLength = 2628.89990234375


185992 loss:0.00223

INFO:absl: 
		 NumberOfEpisodes = 272
		 EnvironmentSteps = 744004
		 AverageReturn = 15.399999618530273
		 AverageEpisodeLength = 2678.0


186999 loss:0.00126

INFO:absl: 
		 NumberOfEpisodes = 274
		 EnvironmentSteps = 748004
		 AverageReturn = 14.5
		 AverageEpisodeLength = 2681.300048828125


187991 loss:0.00143

INFO:absl: 
		 NumberOfEpisodes = 275
		 EnvironmentSteps = 752004
		 AverageReturn = 14.199999809265137
		 AverageEpisodeLength = 2735.60009765625


188993 loss:0.00106

INFO:absl: 
		 NumberOfEpisodes = 276
		 EnvironmentSteps = 756004
		 AverageReturn = 13.100000381469727
		 AverageEpisodeLength = 2748.5


189991 loss:0.00128

INFO:absl: 
		 NumberOfEpisodes = 278
		 EnvironmentSteps = 760004
		 AverageReturn = 12.300000190734863
		 AverageEpisodeLength = 2768.800048828125


190999 loss:0.00152

INFO:absl: 
		 NumberOfEpisodes = 280
		 EnvironmentSteps = 764004
		 AverageReturn = 12.600000381469727
		 AverageEpisodeLength = 2680.800048828125


191994 loss:0.00150

INFO:absl: 
		 NumberOfEpisodes = 283
		 EnvironmentSteps = 768004
		 AverageReturn = 14.800000190734863
		 AverageEpisodeLength = 2346.89990234375


192993 loss:0.00111

INFO:absl: 
		 NumberOfEpisodes = 284
		 EnvironmentSteps = 772004
		 AverageReturn = 15.100000381469727
		 AverageEpisodeLength = 2355.5


193991 loss:0.00115

INFO:absl: 
		 NumberOfEpisodes = 285
		 EnvironmentSteps = 776004
		 AverageReturn = 15.800000190734863
		 AverageEpisodeLength = 2367.699951171875


194996 loss:0.00137

INFO:absl: 
		 NumberOfEpisodes = 287
		 EnvironmentSteps = 780004
		 AverageReturn = 17.600000381469727
		 AverageEpisodeLength = 2340.60009765625


195999 loss:0.00124

INFO:absl: 
		 NumberOfEpisodes = 288
		 EnvironmentSteps = 784004
		 AverageReturn = 17.100000381469727
		 AverageEpisodeLength = 2442.39990234375


196999 loss:0.00069

INFO:absl: 
		 NumberOfEpisodes = 290
		 EnvironmentSteps = 788004
		 AverageReturn = 17.299999237060547
		 AverageEpisodeLength = 2464.10009765625


197997 loss:0.00257

INFO:absl: 
		 NumberOfEpisodes = 292
		 EnvironmentSteps = 792004
		 AverageReturn = 17.100000381469727
		 AverageEpisodeLength = 2539.10009765625


198995 loss:0.00188

INFO:absl: 
		 NumberOfEpisodes = 294
		 EnvironmentSteps = 796004
		 AverageReturn = 17.700000762939453
		 AverageEpisodeLength = 2398.10009765625


199997 loss:0.00142

INFO:absl: 
		 NumberOfEpisodes = 295
		 EnvironmentSteps = 800004
		 AverageReturn = 17.799999237060547
		 AverageEpisodeLength = 2294.39990234375


200997 loss:0.00094

INFO:absl: 
		 NumberOfEpisodes = 297
		 EnvironmentSteps = 804004
		 AverageReturn = 15.800000190734863
		 AverageEpisodeLength = 2252.199951171875


201995 loss:0.00077

INFO:absl: 
		 NumberOfEpisodes = 299
		 EnvironmentSteps = 808004
		 AverageReturn = 15.199999809265137
		 AverageEpisodeLength = 2175.89990234375


202999 loss:0.00067

INFO:absl: 
		 NumberOfEpisodes = 301
		 EnvironmentSteps = 812004
		 AverageReturn = 15.699999809265137
		 AverageEpisodeLength = 2080.199951171875


203996 loss:0.00069

INFO:absl: 
		 NumberOfEpisodes = 304
		 EnvironmentSteps = 816004
		 AverageReturn = 16.100000381469727
		 AverageEpisodeLength = 2036.800048828125


204994 loss:0.00155

INFO:absl: 
		 NumberOfEpisodes = 306
		 EnvironmentSteps = 820004
		 AverageReturn = 16.299999237060547
		 AverageEpisodeLength = 1971.0999755859375


205991 loss:0.00093

INFO:absl: 
		 NumberOfEpisodes = 308
		 EnvironmentSteps = 824004
		 AverageReturn = 18.600000381469727
		 AverageEpisodeLength = 1906.9000244140625


206998 loss:0.00298

INFO:absl: 
		 NumberOfEpisodes = 309
		 EnvironmentSteps = 828004
		 AverageReturn = 18.700000762939453
		 AverageEpisodeLength = 1978.699951171875


207994 loss:0.00076

INFO:absl: 
		 NumberOfEpisodes = 312
		 EnvironmentSteps = 832004
		 AverageReturn = 18.899999618530273
		 AverageEpisodeLength = 1962.5999755859375


208997 loss:0.00216

INFO:absl: 
		 NumberOfEpisodes = 314
		 EnvironmentSteps = 836004
		 AverageReturn = 18.899999618530273
		 AverageEpisodeLength = 1960.0999755859375


209998 loss:0.00052

INFO:absl: 
		 NumberOfEpisodes = 316
		 EnvironmentSteps = 840004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1904.4000244140625


210992 loss:0.00074

INFO:absl: 
		 NumberOfEpisodes = 318
		 EnvironmentSteps = 844004
		 AverageReturn = 19.200000762939453
		 AverageEpisodeLength = 1896.5


211993 loss:0.00045

INFO:absl: 
		 NumberOfEpisodes = 320
		 EnvironmentSteps = 848004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1815.199951171875


212997 loss:0.00060

INFO:absl: 
		 NumberOfEpisodes = 323
		 EnvironmentSteps = 852004
		 AverageReturn = 19.700000762939453
		 AverageEpisodeLength = 1858.4000244140625


213992 loss:0.00035

INFO:absl: 
		 NumberOfEpisodes = 325
		 EnvironmentSteps = 856004
		 AverageReturn = 19.799999237060547
		 AverageEpisodeLength = 1862.800048828125


214991 loss:0.00036

INFO:absl: 
		 NumberOfEpisodes = 327
		 EnvironmentSteps = 860004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1828.5


215997 loss:0.00062

INFO:absl: 
		 NumberOfEpisodes = 329
		 EnvironmentSteps = 864004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1811.199951171875


216994 loss:0.00052

INFO:absl: 
		 NumberOfEpisodes = 332
		 EnvironmentSteps = 868004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1773.300048828125


217992 loss:0.00051

INFO:absl: 
		 NumberOfEpisodes = 334
		 EnvironmentSteps = 872004
		 AverageReturn = 20.399999618530273
		 AverageEpisodeLength = 1748.9000244140625


218996 loss:0.00282

INFO:absl: 
		 NumberOfEpisodes = 336
		 EnvironmentSteps = 876004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1752.5


219992 loss:0.00348

INFO:absl: 
		 NumberOfEpisodes = 339
		 EnvironmentSteps = 880004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1721.4000244140625


220999 loss:0.00200

INFO:absl: 
		 NumberOfEpisodes = 341
		 EnvironmentSteps = 884004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1707.5999755859375


221995 loss:0.00044

INFO:absl: 
		 NumberOfEpisodes = 343
		 EnvironmentSteps = 888004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1691.4000244140625


222993 loss:0.00037

INFO:absl: 
		 NumberOfEpisodes = 346
		 EnvironmentSteps = 892004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1715.300048828125


223992 loss:0.00042

INFO:absl: 
		 NumberOfEpisodes = 348
		 EnvironmentSteps = 896004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1713.5999755859375


224994 loss:0.00109

INFO:absl: 
		 NumberOfEpisodes = 350
		 EnvironmentSteps = 900004
		 AverageReturn = 20.399999618530273
		 AverageEpisodeLength = 1726.800048828125


225991 loss:0.00081

INFO:absl: 
		 NumberOfEpisodes = 352
		 EnvironmentSteps = 904004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1743.5


226993 loss:0.00041

INFO:absl: 
		 NumberOfEpisodes = 355
		 EnvironmentSteps = 908004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1788.5999755859375


227998 loss:0.00062

INFO:absl: 
		 NumberOfEpisodes = 357
		 EnvironmentSteps = 912004
		 AverageReturn = 20.100000381469727
		 AverageEpisodeLength = 1758.800048828125


228994 loss:0.00055

INFO:absl: 
		 NumberOfEpisodes = 359
		 EnvironmentSteps = 916004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1775.699951171875


229995 loss:0.00018

INFO:absl: 
		 NumberOfEpisodes = 361
		 EnvironmentSteps = 920004
		 AverageReturn = 19.899999618530273
		 AverageEpisodeLength = 1794.5999755859375


230999 loss:0.00053

INFO:absl: 
		 NumberOfEpisodes = 364
		 EnvironmentSteps = 924004
		 AverageReturn = 19.600000381469727
		 AverageEpisodeLength = 1815.300048828125


231998 loss:0.00029

INFO:absl: 
		 NumberOfEpisodes = 366
		 EnvironmentSteps = 928004
		 AverageReturn = 19.799999237060547
		 AverageEpisodeLength = 1784.5999755859375


232997 loss:0.00047

INFO:absl: 
		 NumberOfEpisodes = 368
		 EnvironmentSteps = 932004
		 AverageReturn = 19.700000762939453
		 AverageEpisodeLength = 1764.5999755859375


233996 loss:0.00030

INFO:absl: 
		 NumberOfEpisodes = 371
		 EnvironmentSteps = 936004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1718.199951171875


234992 loss:0.00025

INFO:absl: 
		 NumberOfEpisodes = 373
		 EnvironmentSteps = 940004
		 AverageReturn = 20.799999237060547
		 AverageEpisodeLength = 1662.699951171875


235993 loss:0.00054

INFO:absl: 
		 NumberOfEpisodes = 375
		 EnvironmentSteps = 944004
		 AverageReturn = 20.799999237060547
		 AverageEpisodeLength = 1693.9000244140625


236997 loss:0.00023

INFO:absl: 
		 NumberOfEpisodes = 378
		 EnvironmentSteps = 948004
		 AverageReturn = 20.899999618530273
		 AverageEpisodeLength = 1692.0


237994 loss:0.00027

INFO:absl: 
		 NumberOfEpisodes = 380
		 EnvironmentSteps = 952004
		 AverageReturn = 20.700000762939453
		 AverageEpisodeLength = 1737.9000244140625


238999 loss:0.00048

INFO:absl: 
		 NumberOfEpisodes = 382
		 EnvironmentSteps = 956004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1751.0999755859375


239994 loss:0.00023

INFO:absl: 
		 NumberOfEpisodes = 384
		 EnvironmentSteps = 960004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1748.800048828125


240997 loss:0.00022

INFO:absl: 
		 NumberOfEpisodes = 387
		 EnvironmentSteps = 964004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1744.5999755859375


241995 loss:0.00042

INFO:absl: 
		 NumberOfEpisodes = 389
		 EnvironmentSteps = 968004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1740.300048828125


242997 loss:0.00023

INFO:absl: 
		 NumberOfEpisodes = 391
		 EnvironmentSteps = 972004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1724.300048828125


243999 loss:0.00010

INFO:absl: 
		 NumberOfEpisodes = 394
		 EnvironmentSteps = 976004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1698.0


244995 loss:0.00062

INFO:absl: 
		 NumberOfEpisodes = 396
		 EnvironmentSteps = 980004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1740.199951171875


245995 loss:0.00008

INFO:absl: 
		 NumberOfEpisodes = 398
		 EnvironmentSteps = 984004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1746.300048828125


246991 loss:0.00049

INFO:absl: 
		 NumberOfEpisodes = 400
		 EnvironmentSteps = 988004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1813.5999755859375


247996 loss:0.00051

INFO:absl: 
		 NumberOfEpisodes = 403
		 EnvironmentSteps = 992004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1809.4000244140625


248993 loss:0.00065

INFO:absl: 
		 NumberOfEpisodes = 405
		 EnvironmentSteps = 996004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1799.0


249995 loss:0.00029

INFO:absl: 
		 NumberOfEpisodes = 407
		 EnvironmentSteps = 1000004
		 AverageReturn = 19.200000762939453
		 AverageEpisodeLength = 1779.4000244140625


250993 loss:0.00029

INFO:absl: 
		 NumberOfEpisodes = 410
		 EnvironmentSteps = 1004004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1699.199951171875


251994 loss:0.00022

INFO:absl: 
		 NumberOfEpisodes = 412
		 EnvironmentSteps = 1008004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1712.5999755859375


252994 loss:0.00024

INFO:absl: 
		 NumberOfEpisodes = 414
		 EnvironmentSteps = 1012004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1743.0999755859375


253997 loss:0.00016

INFO:absl: 
		 NumberOfEpisodes = 417
		 EnvironmentSteps = 1016004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1731.800048828125


254991 loss:0.00031

INFO:absl: 
		 NumberOfEpisodes = 419
		 EnvironmentSteps = 1020004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1773.0


255991 loss:0.00021

INFO:absl: 
		 NumberOfEpisodes = 421
		 EnvironmentSteps = 1024004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1772.800048828125


256999 loss:0.00029

INFO:absl: 
		 NumberOfEpisodes = 423
		 EnvironmentSteps = 1028004
		 AverageReturn = 20.399999618530273
		 AverageEpisodeLength = 1746.0999755859375


257993 loss:0.00064

INFO:absl: 
		 NumberOfEpisodes = 426
		 EnvironmentSteps = 1032004
		 AverageReturn = 20.5
		 AverageEpisodeLength = 1712.4000244140625


258991 loss:0.00022

INFO:absl: 
		 NumberOfEpisodes = 428
		 EnvironmentSteps = 1036004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1712.5999755859375


259996 loss:0.00053

INFO:absl: 
		 NumberOfEpisodes = 430
		 EnvironmentSteps = 1040004
		 AverageReturn = 20.399999618530273
		 AverageEpisodeLength = 1704.800048828125


260994 loss:0.00070

INFO:absl: 
		 NumberOfEpisodes = 433
		 EnvironmentSteps = 1044004
		 AverageReturn = 19.899999618530273
		 AverageEpisodeLength = 1749.5999755859375


261994 loss:0.00098

INFO:absl: 
		 NumberOfEpisodes = 435
		 EnvironmentSteps = 1048004
		 AverageReturn = 19.600000381469727
		 AverageEpisodeLength = 1782.5999755859375


262995 loss:0.00038

INFO:absl: 
		 NumberOfEpisodes = 437
		 EnvironmentSteps = 1052004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1828.0


263995 loss:0.00046

INFO:absl: 
		 NumberOfEpisodes = 439
		 EnvironmentSteps = 1056004
		 AverageReturn = 19.600000381469727
		 AverageEpisodeLength = 1814.0999755859375


264994 loss:0.00012

INFO:absl: 
		 NumberOfEpisodes = 442
		 EnvironmentSteps = 1060004
		 AverageReturn = 19.899999618530273
		 AverageEpisodeLength = 1793.800048828125


265994 loss:0.00075

INFO:absl: 
		 NumberOfEpisodes = 444
		 EnvironmentSteps = 1064004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1739.800048828125


266999 loss:0.00118

INFO:absl: 
		 NumberOfEpisodes = 446
		 EnvironmentSteps = 1068004
		 AverageReturn = 20.299999237060547
		 AverageEpisodeLength = 1732.0999755859375


267997 loss:0.00022

INFO:absl: 
		 NumberOfEpisodes = 449
		 EnvironmentSteps = 1072004
		 AverageReturn = 20.399999618530273
		 AverageEpisodeLength = 1711.9000244140625


268993 loss:0.00014

INFO:absl: 
		 NumberOfEpisodes = 451
		 EnvironmentSteps = 1076004
		 AverageReturn = 20.100000381469727
		 AverageEpisodeLength = 1736.0999755859375


269996 loss:0.00013

INFO:absl: 
		 NumberOfEpisodes = 453
		 EnvironmentSteps = 1080004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1744.9000244140625


270993 loss:0.00020

INFO:absl: 
		 NumberOfEpisodes = 455
		 EnvironmentSteps = 1084004
		 AverageReturn = 20.0
		 AverageEpisodeLength = 1748.0999755859375


271995 loss:0.00027

INFO:absl: 
		 NumberOfEpisodes = 457
		 EnvironmentSteps = 1088004
		 AverageReturn = 19.700000762939453
		 AverageEpisodeLength = 1791.699951171875


272995 loss:0.00044

INFO:absl: 
		 NumberOfEpisodes = 460
		 EnvironmentSteps = 1092004
		 AverageReturn = 19.899999618530273
		 AverageEpisodeLength = 1782.800048828125


273992 loss:0.00025

INFO:absl: 
		 NumberOfEpisodes = 462
		 EnvironmentSteps = 1096004
		 AverageReturn = 19.5
		 AverageEpisodeLength = 1804.9000244140625


274992 loss:0.00093

INFO:absl: 
		 NumberOfEpisodes = 464
		 EnvironmentSteps = 1100004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1823.800048828125


275997 loss:0.00014

INFO:absl: 
		 NumberOfEpisodes = 466
		 EnvironmentSteps = 1104004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1821.800048828125


276996 loss:0.00052

INFO:absl: 
		 NumberOfEpisodes = 469
		 EnvironmentSteps = 1108004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1789.9000244140625


277993 loss:0.00040

INFO:absl: 
		 NumberOfEpisodes = 471
		 EnvironmentSteps = 1112004
		 AverageReturn = 19.700000762939453
		 AverageEpisodeLength = 1783.5


278992 loss:0.00032

INFO:absl: 
		 NumberOfEpisodes = 473
		 EnvironmentSteps = 1116004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1806.0999755859375


279993 loss:0.00008

INFO:absl: 
		 NumberOfEpisodes = 475
		 EnvironmentSteps = 1120004
		 AverageReturn = 19.399999618530273
		 AverageEpisodeLength = 1813.300048828125


280997 loss:0.00027

INFO:absl: 
		 NumberOfEpisodes = 477
		 EnvironmentSteps = 1124004
		 AverageReturn = 19.5
		 AverageEpisodeLength = 1822.5999755859375


281998 loss:0.00032

INFO:absl: 
		 NumberOfEpisodes = 479
		 EnvironmentSteps = 1128004
		 AverageReturn = 19.299999237060547
		 AverageEpisodeLength = 1851.0999755859375


282995 loss:0.00009

INFO:absl: 
		 NumberOfEpisodes = 482
		 EnvironmentSteps = 1132004
		 AverageReturn = 19.5
		 AverageEpisodeLength = 1845.800048828125


283999 loss:0.00064

INFO:absl: 
		 NumberOfEpisodes = 484
		 EnvironmentSteps = 1136004
		 AverageReturn = 19.799999237060547
		 AverageEpisodeLength = 1831.5999755859375


284993 loss:0.00032

INFO:absl: 
		 NumberOfEpisodes = 486
		 EnvironmentSteps = 1140004
		 AverageReturn = 19.899999618530273
		 AverageEpisodeLength = 1861.800048828125


285996 loss:0.00015

INFO:absl: 
		 NumberOfEpisodes = 488
		 EnvironmentSteps = 1144004
		 AverageReturn = 19.700000762939453
		 AverageEpisodeLength = 1895.9000244140625


286999 loss:0.00028

INFO:absl: 
		 NumberOfEpisodes = 490
		 EnvironmentSteps = 1148004
		 AverageReturn = 20.100000381469727
		 AverageEpisodeLength = 1862.699951171875


287997 loss:0.00019

INFO:absl: 
		 NumberOfEpisodes = 492
		 EnvironmentSteps = 1152004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1850.800048828125


288992 loss:0.00069

INFO:absl: 
		 NumberOfEpisodes = 495
		 EnvironmentSteps = 1156004
		 AverageReturn = 20.100000381469727
		 AverageEpisodeLength = 1838.300048828125


289995 loss:0.00017

INFO:absl: 
		 NumberOfEpisodes = 497
		 EnvironmentSteps = 1160004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1803.800048828125


290993 loss:0.00033

INFO:absl: 
		 NumberOfEpisodes = 499
		 EnvironmentSteps = 1164004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1794.4000244140625


291992 loss:0.00027

INFO:absl: 
		 NumberOfEpisodes = 501
		 EnvironmentSteps = 1168004
		 AverageReturn = 19.799999237060547
		 AverageEpisodeLength = 1825.699951171875


292992 loss:0.00026

INFO:absl: 
		 NumberOfEpisodes = 503
		 EnvironmentSteps = 1172004
		 AverageReturn = 19.600000381469727
		 AverageEpisodeLength = 1824.4000244140625


293998 loss:0.00024

INFO:absl: 
		 NumberOfEpisodes = 505
		 EnvironmentSteps = 1176004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1850.300048828125


294994 loss:0.00028

INFO:absl: 
		 NumberOfEpisodes = 508
		 EnvironmentSteps = 1180004
		 AverageReturn = 19.100000381469727
		 AverageEpisodeLength = 1879.5999755859375


295997 loss:0.00066

INFO:absl: 
		 NumberOfEpisodes = 510
		 EnvironmentSteps = 1184004
		 AverageReturn = 19.399999618530273
		 AverageEpisodeLength = 1891.4000244140625


296993 loss:0.00018

INFO:absl: 
		 NumberOfEpisodes = 512
		 EnvironmentSteps = 1188004
		 AverageReturn = 19.799999237060547
		 AverageEpisodeLength = 1880.199951171875


297999 loss:0.00056

INFO:absl: 
		 NumberOfEpisodes = 514
		 EnvironmentSteps = 1192004
		 AverageReturn = 20.200000762939453
		 AverageEpisodeLength = 1843.699951171875


298993 loss:0.00048

INFO:absl: 
		 NumberOfEpisodes = 517
		 EnvironmentSteps = 1196004
		 AverageReturn = 20.600000381469727
		 AverageEpisodeLength = 1785.199951171875


299999 loss:0.00068
Time Taken:  7536.4341950416565
1000/1000
------------Program Execution Complete-------------



In [2]:
eval_policy = agent.policy
eval_policy_dir = os.path.join(os.getcwd(), 'eval_policy')
policy_dir = os.path.join(os.getcwd(), 'savedPolicy')
tf_policy_saver = policy_saver.PolicySaver(eval_policy)
tf_policy_saver.save(policy_dir)
tf_policy_saver.save(eval_policy_dir)
train_checkpointer.save(train_step)
checkpoint_path = os.path.join(os.getcwd(), "savedModel", "cpkt")
os.makedirs(checkpoint_path, exist_ok=True)
model_checkpoint = tf.train.Checkpoint(model = agent, step = train_step)
saved_path = model_checkpoint.save(file_prefix = checkpoint_path)
print("Model saced in: \n", saved_path)



INFO:tensorflow:Assets written to: /home/jupyter/Reinforcement_Learning_Atari/savedPolicy/assets


  "imported and registered." % type_spec_class_name)
INFO:tensorflow:Assets written to: /home/jupyter/Reinforcement_Learning_Atari/savedPolicy/assets
  "imported and registered." % type_spec_class_name)


INFO:tensorflow:Assets written to: /home/jupyter/Reinforcement_Learning_Atari/eval_policy/assets


INFO:tensorflow:Assets written to: /home/jupyter/Reinforcement_Learning_Atari/eval_policy/assets


Model saced in: 
 /home/jupyter/Reinforcement_Learning_Atari/savedModel/cpkt-1


In [3]:
anim

INFO:matplotlib.animation:Animation.save using <class 'matplotlib.animation.HTMLWriter'>
