In [22]:
%matplotlib inline
import bisect
import copy 
import os 
from collections import deque, Counter
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import editdistance
import sys
import RNA
from typing import Dict, List, Tuple

# import path 
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.sequence_utils import translate_one_hot_to_string,generate_random_mutant
from utils.sequence_utils import translate_string_to_one_hot, translate_one_hot_to_string
from models.Theoretical_models import *
from models.Noise_wrapper import *
from exploration_strategies.CE import *
from utils.landscape_utils import *
from models.RNA_landscapes import *
from models.Multi_dimensional_model import *

import tensorflow as tf
from tf_agents.drivers import dynamic_step_driver
from tf_agents.metrics import tf_metrics
from tf_agents.agents import tf_agent
from tf_agents.policies import random_tf_policy
from tf_agents.agents.ppo import ppo_policy, ppo_agent, ppo_utils
from tf_agents.environments import py_environment, tf_py_environment
from tf_agents.environments.utils import validate_py_environment
from tf_agents.drivers import dynamic_episode_driver
from tf_agents.networks import network, normal_projection_network
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
from tf_agents.trajectories import time_step as ts
from tf_agents.specs import array_spec

In [23]:
RAA="UGCA" #alphabet
alphabet_len=len(RAA)
# TODO: UNDO THIS
# length=40
length=20
noise_alpha=1
generations = 10
experiment_batch_size = 1000
wt=generate_random_sequences(length,1,alphabet=RAA)[0]
landscape1=RNA_landscape(wt)
landscape2=RNA_landscape(wt)
noisy_landscape=Noise_wrapper(landscape1,
                              noise_alpha=noise_alpha,
                              always_costly=True)
initial_genotypes=list(set([wt]+[generate_random_mutant(wt,0.05,RAA) 
                                 for i in range(experiment_batch_size*10)]))[:experiment_batch_size]
noisy_landscape.reset()
noisy_landscape.measure_true_landscape(initial_genotypes)
noisy_landscape.natural_mode=False
noisy_landscape.local_mode=False
noisy_landscape.cost

def renormalize_moves(one_hot_input, rewards_output):
    """ensures that staying in place gives no reward"""
    zero_current_state = (one_hot_input - 1) * (-1)
    return np.multiply(rewards_output, zero_current_state)

def walk_away_renormalize_moves(one_hot_input, one_hot_wt, rewards_output):
    """ensures that moving toward wt is also not useful"""
    zero_current_state=(one_hot_input-1)*-1
    zero_wt=((one_hot_wt-1)*-1)
    zero_conservative_moves=np.multiply(zero_wt,zero_current_state)
    return np.multiply(rewards_output,zero_conservative_moves)

def get_all_singles_fitness(model,sequence,alphabet):
    prob_singles=np.zeros((len(alphabet),len(sequence)))
    for i in range(len(sequence)):
        for j in range(len(alphabet)):
            putative_seq=sequence[:i]+alphabet[j]+sequence[i+1:]
           # print (putative_seq)
            prob_singles[j][i]=model.get_fitness(putative_seq)
    return prob_singles

def get_all_mutants(sequence):
    mutants = []
    for i in range(sequence.shape[0]):
        for j in range(sequence.shape[1]):
            putative_seq = sequence.copy()
            putative_seq[:, j] = 0
            putative_seq[i, j] = 1
            mutants.append(putative_seq)
    return np.array(mutants)

def sample_greedy(matrix):
    i,j=matrix.shape
    max_arg=np.argmax(matrix)
    y=max_arg%j
    x=int(max_arg/j)
    output=np.zeros((i,j))
    output[x][y]=matrix[x][y]
    return output

def sample_multi_greedy(matrix):
    n = 5 # the number of base positions to greedily change
    max_args = np.argpartition(matrix.flatten(), -n)[-n:]
    i,j=matrix.shape
    output=np.zeros((i,j))
    for max_arg in max_args:
        y=max_arg%j
        x=int(max_arg/j)
        output[x][y]=matrix[x][y]
    return output

def sample_random(matrix):
    i,j=matrix.shape
    non_zero_moves=np.nonzero(matrix)
   # print (non_zero_moves)
    k=len(non_zero_moves)
    l=len(non_zero_moves[0])
    if k!=0 and l!=0:
        rand_arg=random.choice([[non_zero_moves[alph][pos] for alph in range(k)] for pos in range(l)])
    else:
        rand_arg=[random.randint(0,i-1),random.randint(0,j-1)]
    #print (rand_arg)
    y=rand_arg[1]
    x=rand_arg[0]
    output=np.zeros((i,j))
    output[x][y] = 1
    return output   

def action_to_scalar(matrix):
    matrix = matrix.ravel()
    for i in range(len(matrix)):
        if matrix[i] != 0:
            return i
    
def construct_mutant_from_sample(pwm_sample, one_hot_base):
    one_hot = np.zeros(one_hot_base.shape)
    one_hot += one_hot_base
    nonzero = np.nonzero(pwm_sample)
    nonzero = list(zip(nonzero[0], nonzero[1]))
    for nz in nonzero: # this can be problematic for non-positive fitnesses
        i, j = nz
        one_hot[:,j]=0
        one_hot[i,j]=1
    return one_hot

def best_predicted_new_gen(actor, genotypes, alphabet, pop_size):
    mutants = get_all_mutants(genotypes)
    one_hot_mutants = np.array([translate_string_to_one_hot(mutant, alphabet) for mutant in mutants])
    torch_one_hot_mutants = torch.from_numpy(np.expand_dims(one_hot_mutants, axis=0)).float()
    predictions = actor(torch_one_hot_mutants)
    predictions = predictions.detach().numpy()
    best_pred_ind = predictions.argsort()[-pop_size:]
    return mutants[best_pred_ind]

def make_one_hot_train_test(genotypes, model, alphabet):
    genotypes_one_hot = np.array([translate_string_to_one_hot(genotype, alphabet) for genotype in genotypes])
    genotype_fitnesses = []
    for genotype in genotypes:
        genotype_fitnesses.append(model.get_fitness(genotype))
    genotype_fitnesses = np.array(genotype_fitnesses)

    return genotypes_one_hot, genotype_fitnesses

In [24]:
# parameters for PPO Agent 
generations = 10
experiment_batch_size = 1000
global_step = tf.compat.v1.train.get_or_create_global_step()
optimizer = tf.keras.optimizers.Adam(1e-5)
optimizer.iterations = global_step

In [33]:
# environment for PPO Agent
class FitnessLandscapeEnvironment(py_environment.PyEnvironment):
    # Based on this: https://www.mikulskibartosz.name/how-to-create-an-environment-for-a-tensorflow-agent/
    def __init__(self, alphabet, seq_len, landscape, max_episodes):
        self.alphabet = alphabet
        self.alphabet_len = len(self.alphabet)
        self.landscape = copy.deepcopy(landscape)
        self.seq_len = seq_len
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(1, 2), dtype=np.float32, minimum=0, 
            maximum=1, name='action_x')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(self.alphabet_len, self.seq_len), dtype=np.float32, minimum=0, 
            maximum=1, name='observation')
        self._time_step_spec = ts.time_step_spec(self._observation_spec)
        self._state = translate_string_to_one_hot(wt, self.alphabet)
        self._episode_ended = False
        self.ctr = 0
        self.max_episodes = max_episodes
        self.seen_sequences = {}
        
    def _reset(self):
        self.ctr = 0
        # TODO: UNDO THIS
#         self._state = translate_string_to_one_hot(wt, self.alphabet)
        self._state = translate_string_to_one_hot(generate_random_sequences(length,1,alphabet=RAA)[0], self.alphabet)
        self._episode_ended = False
        return ts.restart(np.array(self._state, dtype=np.float32))
    
    def time_step_spec(self):
        return self._time_step_spec 

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec
    
    def get_state_string(self):
        return translate_one_hot_to_string(self._state, self.alphabet)
    
    def _step(self, action):
        if self.ctr < self.max_episodes:
            self.ctr += 1
            action_one_hot = np.zeros((self.alphabet_len, self.seq_len))
#             print(action)
            if np.amax(action) > 1 or np.amin(action) < 0:
                return ts.termination(np.array(self._state, dtype=np.float32), 0)
            x, y = action[0]
            x, y = int(self.alphabet_len*x), int(self.seq_len*y)
            action_one_hot[x, y] = 1
            assert self._state.sum() == self._state.shape[1]
            if self._state[x, y] == 1:
                self._episode_ended = True
                return ts.termination(np.array(self._state, dtype=np.float32), 0)
            else:
                self._state = construct_mutant_from_sample(action_one_hot, self._state)
                state_string = translate_one_hot_to_string(self._state, self.alphabet)
                
                if state_string in self.seen_sequences:
                    return ts.termination(np.array(self._state, dtype=np.float32), 0)
                self.seen_sequences[state_string] = 1
                
                reward = self.landscape.get_fitness(state_string)
                assert self._state.sum() == self._state.shape[1]
                return ts.transition(np.array(self._state, dtype=np.float32), reward=reward)
        else:
            self._episode_ended = True
            assert self._state.sum() == self._state.shape[1]
            return ts.termination(np.array(self._state, dtype=np.float32), 0)

In [34]:
#max_iter = experiment_batch_size * generations 
max_iter = 10 ** 6
fle2 = FitnessLandscapeEnvironment(RAA, length, landscape1, max_iter)
print("starting validating environment...")
validate_py_environment(fle2, episodes=2)
print("done validating environment.")
fle = FitnessLandscapeEnvironment(RAA, length, landscape2, max_iter)
tf_env = tf_py_environment.TFPyEnvironment(fle)

starting validating environment...
done validating environment.


In [35]:
# specs 
time_step_spec = tf_env.time_step_spec()
observation_spec = tf_env.observation_spec()
action_spec = tf_env.action_spec()
alphabet_len = len(RAA)
seq_len = length

def BoostedEnvironment():
    return FitnessLandscapeEnvironment(RAA, length, landscape2, max_iter)

In [38]:
# coding=utf-8
# Copyright 2018 The TF-Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

r"""Train and Eval PPO.
To run:
```bash
tensorboard --logdir $HOME/tmp/ppo/gym/HalfCheetah-v2/ --port 2223 &
python tf_agents/agents/ppo/examples/v2/train_eval.py \
  --root_dir=$HOME/tmp/ppo/gym/HalfCheetah-v2/ \
  --logtostderr
```
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import logging

import gin
import tensorflow as tf

from tf_agents.agents.ppo import ppo_agent
from tf_agents.drivers import dynamic_episode_driver
from tf_agents.environments import parallel_py_environment
from tf_agents.environments import tf_py_environment
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.networks import actor_distribution_network
from tf_agents.networks import actor_distribution_rnn_network
from tf_agents.networks import value_network
from tf_agents.networks import value_rnn_network
from tf_agents.policies import policy_saver
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common


def train_eval(
    root_dir,
    env_name='HalfCheetah-v2',
    env_load_fn=BoostedEnvironment,
    random_seed=0,
    # TODO(b/127576522): rename to policy_fc_layers.
    actor_fc_layers=(200, 100),
    value_fc_layers=(200, 100),
    use_rnns=False,
    # Params for collect
    num_environment_steps=10000000,
    collect_episodes_per_iteration=30,
    num_parallel_environments=30,
    replay_buffer_capacity=1001,  # Per-environment
    # Params for train
    num_epochs=25,
    learning_rate=1e-5,
    # Params for eval
    num_eval_episodes=30,
    eval_interval=500,
    # Params for summaries and logging
    train_checkpoint_interval=500,
    policy_checkpoint_interval=500,
    log_interval=50,
    summary_interval=50,
    summaries_flush_secs=1,
    use_tf_functions=True,
    debug_summaries=True,
    summarize_grads_and_vars=False):
  """A simple train and eval for PPO."""
  if root_dir is None:
    raise AttributeError('train_eval requires a root_dir.')

  root_dir = os.path.expanduser(root_dir)
  train_dir = os.path.join(root_dir, 'train')
  eval_dir = os.path.join(root_dir, 'eval')
  saved_model_dir = os.path.join(root_dir, 'policy_saved_model')

  train_summary_writer = tf.compat.v2.summary.create_file_writer(
      train_dir, flush_millis=summaries_flush_secs * 1000)
  train_summary_writer.set_as_default()

  eval_summary_writer = tf.compat.v2.summary.create_file_writer(
      eval_dir, flush_millis=summaries_flush_secs * 1000)
  eval_metrics = [
      tf_metrics.AverageReturnMetric(buffer_size=num_eval_episodes),
      tf_metrics.AverageEpisodeLengthMetric(buffer_size=num_eval_episodes)
  ]

  global_step = tf.compat.v1.train.get_or_create_global_step()
  with tf.compat.v2.summary.record_if(
      lambda: tf.math.equal(global_step % summary_interval, 0)):
    tf.compat.v1.set_random_seed(random_seed)
    eval_tf_env = tf_py_environment.TFPyEnvironment(env_load_fn())
    tf_env = tf_py_environment.TFPyEnvironment(
        parallel_py_environment.ParallelPyEnvironment(
            [lambda: env_load_fn()] * num_parallel_environments))
    optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

    if use_rnns:
      actor_net = actor_distribution_rnn_network.ActorDistributionRnnNetwork(
          tf_env.observation_spec(),
          tf_env.action_spec(),
          input_fc_layer_params=actor_fc_layers,
          output_fc_layer_params=None)
      value_net = value_rnn_network.ValueRnnNetwork(
          tf_env.observation_spec(),
          input_fc_layer_params=value_fc_layers,
          output_fc_layer_params=None)
    else:
      actor_net = actor_distribution_network.ActorDistributionNetwork(
          tf_env.observation_spec(),
          tf_env.action_spec(),
          fc_layer_params=actor_fc_layers)
      value_net = value_network.ValueNetwork(
          tf_env.observation_spec(), fc_layer_params=value_fc_layers)

    tf_agent = ppo_agent.PPOAgent(
        tf_env.time_step_spec(),
        tf_env.action_spec(),
        optimizer,
        actor_net=actor_net,
        value_net=value_net,
        num_epochs=num_epochs,
        debug_summaries=debug_summaries,
        summarize_grads_and_vars=summarize_grads_and_vars,
        train_step_counter=global_step)
    tf_agent.initialize()

    environment_steps_metric = tf_metrics.EnvironmentSteps()
    step_metrics = [
        tf_metrics.NumberOfEpisodes(),
        environment_steps_metric,
    ]

    train_metrics = step_metrics + [
        tf_metrics.AverageReturnMetric(
            batch_size=num_parallel_environments),
        tf_metrics.AverageEpisodeLengthMetric(
            batch_size=num_parallel_environments),
    ]

    eval_policy = tf_agent.policy
    collect_policy = tf_agent.collect_policy

    replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
        tf_agent.collect_data_spec,
        batch_size=num_parallel_environments,
        max_length=replay_buffer_capacity)

    train_checkpointer = common.Checkpointer(
        ckpt_dir=train_dir,
        agent=tf_agent,
        global_step=global_step,
        metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics'))
    policy_checkpointer = common.Checkpointer(
        ckpt_dir=os.path.join(train_dir, 'policy'),
        policy=eval_policy,
        global_step=global_step)
    saved_model = policy_saver.PolicySaver(
        eval_policy, train_step=global_step)

    train_checkpointer.initialize_or_restore()

    collect_driver = dynamic_episode_driver.DynamicEpisodeDriver(
        tf_env,
        collect_policy,
        observers=[replay_buffer.add_batch] + train_metrics,
        num_episodes=collect_episodes_per_iteration)

    def train_step():
      trajectories = replay_buffer.gather_all()
      return tf_agent.train(experience=trajectories)

    if use_tf_functions:
      # TODO(b/123828980): Enable once the cause for slowdown was identified.
      collect_driver.run = common.function(collect_driver.run, autograph=False)
      tf_agent.train = common.function(tf_agent.train, autograph=False)
      train_step = common.function(train_step)

    collect_time = 0
    train_time = 0
    timed_at_step = global_step.numpy()

    while environment_steps_metric.result() < num_environment_steps:
      global_step_val = global_step.numpy()
      if global_step_val % eval_interval == 0:
        metric_utils.eager_compute(
            eval_metrics,
            eval_tf_env,
            eval_policy,
            num_episodes=num_eval_episodes,
            train_step=global_step,
            summary_writer=eval_summary_writer,
            summary_prefix='Metrics',
        )

      start_time = time.time()
      collect_driver.run()
      collect_time += time.time() - start_time

      start_time = time.time()
      total_loss, _ = train_step()
      replay_buffer.clear()
      train_time += time.time() - start_time

      for train_metric in train_metrics:
        train_metric.tf_summaries(
            train_step=global_step, step_metrics=step_metrics)

      if global_step_val % log_interval == 0:
        logging.info('step = %d, loss = %f', global_step_val, total_loss)
        steps_per_sec = (
            (global_step_val - timed_at_step) / (collect_time + train_time))
        logging.info('%.3f steps/sec', steps_per_sec)
        logging.info('collect_time = {}, train_time = {}'.format(
            collect_time, train_time))
        with tf.compat.v2.summary.record_if(True):
          tf.compat.v2.summary.scalar(
              name='global_steps_per_sec', data=steps_per_sec, step=global_step)

        if global_step_val % train_checkpoint_interval == 0:
          train_checkpointer.save(global_step=global_step_val)

        if global_step_val % policy_checkpoint_interval == 0:
          policy_checkpointer.save(global_step=global_step_val)
          saved_model_path = os.path.join(
              saved_model_dir, 'policy_' + ('%d' % global_step_val).zfill(9))
          saved_model.save(saved_model_path)

        timed_at_step = global_step_val
        collect_time = 0
        train_time = 0

    # One final eval before exiting.
    metric_utils.eager_compute(
        eval_metrics,
        eval_tf_env,
        eval_policy,
        num_episodes=num_eval_episodes,
        train_step=global_step,
        summary_writer=eval_summary_writer,
        summary_prefix='Metrics',
    )
    print("EXITED")
    
    return tf_agent

logging.set_verbosity(logging.INFO)
tf.compat.v1.enable_v2_behavior()
agent = train_eval(
      '/Users/richard/Documents/Research/FLRL/dump',
      env_name='BoostedEnvironment',
      use_rnns=False,
      num_environment_steps=100000,
      collect_episodes_per_iteration=30,
      num_parallel_environments=4,
      replay_buffer_capacity=1000,
      num_epochs=25,
      num_eval_episodes=30)

INFO:absl:Spawning all processes.
INFO:absl:All processes started.
INFO:absl:Checkpoint available: /Users/richard/Documents/Research/FLRL/dump/train/ckpt-6000
INFO:absl:Checkpoint available: /Users/richard/Documents/Research/FLRL/dump/train/policy/ckpt-6000
INFO:absl:step = 6050, loss = 0.327176
INFO:absl:0.488 steps/sec
INFO:absl:collect_time = 0.913827657699585, train_time = 50.274866342544556
INFO:absl:step = 6100, loss = 0.593229
INFO:absl:258.382 steps/sec
INFO:absl:collect_time = 0.08973288536071777, train_time = 0.10377907752990723
INFO:absl:step = 6150, loss = 0.491860
INFO:absl:225.788 steps/sec
INFO:absl:collect_time = 0.1052849292755127, train_time = 0.11616206169128418
INFO:absl:step = 6200, loss = 0.431152
INFO:absl:198.255 steps/sec
INFO:absl:collect_time = 0.11707305908203125, train_time = 0.13512778282165527
INFO:absl:step = 6250, loss = 3.424420
INFO:absl:199.480 steps/sec
INFO:absl:collect_time = 0.14392495155334473, train_time = 0.10672688484191895
INFO:absl:step = 6

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000006500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000006500/assets
INFO:absl:step = 6550, loss = 0.457388
INFO:absl:301.313 steps/sec
INFO:absl:collect_time = 0.07382988929748535, train_time = 0.09211039543151855
INFO:absl:step = 6600, loss = 2.678300
INFO:absl:263.632 steps/sec
INFO:absl:collect_time = 0.08650803565979004, train_time = 0.1031501293182373
INFO:absl:step = 6650, loss = 2.355242
INFO:absl:248.940 steps/sec
INFO:absl:collect_time = 0.08457183837890625, train_time = 0.11627960205078125
INFO:absl:step = 6700, loss = 3.222640
INFO:absl:302.447 steps/sec
INFO:absl:collect_time = 0.07122921943664551, train_time = 0.09408926963806152
INFO:absl:step = 6750, loss = 0.195254
INFO:absl:277.279 steps/sec
INFO:absl:collect_time = 0.07951688766479492, train_time = 0.10080718994140625
INFO:absl:step = 6800, loss = 0.496647
INFO:absl:226.840 steps/sec
INFO:absl:collect_time = 0.10196495056152344, train_time = 0.1184544563293457
INFO

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000007000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000007000/assets
INFO:absl:step = 7050, loss = 1.937962
INFO:absl:266.433 steps/sec
INFO:absl:collect_time = 0.07274985313415527, train_time = 0.1149144172668457
INFO:absl:step = 7100, loss = 0.306742
INFO:absl:271.658 steps/sec
INFO:absl:collect_time = 0.07436990737915039, train_time = 0.10968518257141113
INFO:absl:step = 7150, loss = 0.339729
INFO:absl:288.976 steps/sec
INFO:absl:collect_time = 0.07502388954162598, train_time = 0.09800100326538086
INFO:absl:step = 7200, loss = 0.431425
INFO:absl:276.842 steps/sec
INFO:absl:collect_time = 0.08433699607849121, train_time = 0.09627127647399902
INFO:absl:step = 7250, loss = 0.269292
INFO:absl:287.913 steps/sec
INFO:absl:collect_time = 0.08380722999572754, train_time = 0.08985614776611328
INFO:absl:step = 7300, loss = 4.611185
INFO:absl:322.968 steps/sec
INFO:absl:collect_time = 0.06992697715759277, train_time = 0.08488702774047852
INF

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000007500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000007500/assets
INFO:absl:step = 7550, loss = 0.419848
INFO:absl:304.936 steps/sec
INFO:absl:collect_time = 0.07573127746582031, train_time = 0.08823776245117188
INFO:absl:step = 7600, loss = 0.624562
INFO:absl:310.141 steps/sec
INFO:absl:collect_time = 0.06985688209533691, train_time = 0.09136009216308594
INFO:absl:step = 7650, loss = 0.933148
INFO:absl:293.328 steps/sec
INFO:absl:collect_time = 0.07640576362609863, train_time = 0.09405207633972168
INFO:absl:step = 7700, loss = 0.608489
INFO:absl:302.729 steps/sec
INFO:absl:collect_time = 0.0725867748260498, train_time = 0.09257721900939941
INFO:absl:step = 7750, loss = 0.210870
INFO:absl:299.651 steps/sec
INFO:absl:collect_time = 0.07546114921569824, train_time = 0.0913994312286377
INFO:absl:step = 7800, loss = 0.437889
INFO:absl:289.831 steps/sec
INFO:absl:collect_time = 0.07190132141113281, train_time = 0.10061287879943848
INFO

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000008000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000008000/assets
INFO:absl:step = 8050, loss = 1.309322
INFO:absl:307.204 steps/sec
INFO:absl:collect_time = 0.07191824913024902, train_time = 0.09083986282348633
INFO:absl:step = 8100, loss = 1.232553
INFO:absl:341.034 steps/sec
INFO:absl:collect_time = 0.06228017807006836, train_time = 0.08433294296264648
INFO:absl:step = 8150, loss = 0.193988
INFO:absl:328.718 steps/sec
INFO:absl:collect_time = 0.06548810005187988, train_time = 0.08661818504333496
INFO:absl:step = 8200, loss = 0.737521
INFO:absl:309.207 steps/sec
INFO:absl:collect_time = 0.06781888008117676, train_time = 0.09388518333435059
INFO:absl:step = 8250, loss = 1.769880
INFO:absl:280.868 steps/sec
INFO:absl:collect_time = 0.07486581802368164, train_time = 0.10315394401550293
INFO:absl:step = 8300, loss = 2.907954
INFO:absl:270.684 steps/sec
INFO:absl:collect_time = 0.08800125122070312, train_time = 0.09671616554260254
IN

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000008500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000008500/assets
INFO:absl:step = 8550, loss = 2.763157
INFO:absl:305.681 steps/sec
INFO:absl:collect_time = 0.0662531852722168, train_time = 0.09731578826904297
INFO:absl:step = 8600, loss = 0.397839
INFO:absl:289.563 steps/sec
INFO:absl:collect_time = 0.06387686729431152, train_time = 0.10879707336425781
INFO:absl:step = 8650, loss = 0.531880
INFO:absl:193.691 steps/sec
INFO:absl:collect_time = 0.12361407279968262, train_time = 0.13452887535095215
INFO:absl:step = 8700, loss = 0.901585
INFO:absl:217.511 steps/sec
INFO:absl:collect_time = 0.10374808311462402, train_time = 0.12612581253051758
INFO:absl:step = 8750, loss = 0.342611
INFO:absl:231.161 steps/sec
INFO:absl:collect_time = 0.10838198661804199, train_time = 0.10791707038879395
INFO:absl:step = 8800, loss = 0.857926
INFO:absl:285.709 steps/sec
INFO:absl:collect_time = 0.07627224922180176, train_time = 0.09873080253601074
INF

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000009000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000009000/assets
INFO:absl:step = 9050, loss = 0.446990
INFO:absl:248.140 steps/sec
INFO:absl:collect_time = 0.08246493339538574, train_time = 0.1190342903137207
INFO:absl:step = 9100, loss = 0.870448
INFO:absl:292.735 steps/sec
INFO:absl:collect_time = 0.07385897636413574, train_time = 0.09694385528564453
INFO:absl:step = 9150, loss = 0.333921
INFO:absl:243.206 steps/sec
INFO:absl:collect_time = 0.08575320243835449, train_time = 0.11983370780944824
INFO:absl:step = 9200, loss = 0.468378
INFO:absl:309.426 steps/sec
INFO:absl:collect_time = 0.07043075561523438, train_time = 0.09115886688232422
INFO:absl:step = 9250, loss = 0.988557
INFO:absl:271.993 steps/sec
INFO:absl:collect_time = 0.07933330535888672, train_time = 0.10449528694152832
INFO:absl:step = 9300, loss = 0.589139
INFO:absl:315.100 steps/sec
INFO:absl:collect_time = 0.0632009506225586, train_time = 0.09547901153564453
INFO

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000009500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000009500/assets
INFO:absl:step = 9550, loss = 0.553869
INFO:absl:302.187 steps/sec
INFO:absl:collect_time = 0.0704808235168457, train_time = 0.09497976303100586
INFO:absl:step = 9600, loss = 0.166596
INFO:absl:300.626 steps/sec
INFO:absl:collect_time = 0.07394981384277344, train_time = 0.09237003326416016
INFO:absl:step = 9650, loss = 0.326670
INFO:absl:324.197 steps/sec
INFO:absl:collect_time = 0.06292891502380371, train_time = 0.09129810333251953
INFO:absl:step = 9700, loss = 0.840337
INFO:absl:298.213 steps/sec
INFO:absl:collect_time = 0.06781315803527832, train_time = 0.09985208511352539
INFO:absl:step = 9750, loss = 0.253269
INFO:absl:275.317 steps/sec
INFO:absl:collect_time = 0.08143901824951172, train_time = 0.10017013549804688
INFO:absl:step = 9800, loss = 0.373666
INFO:absl:242.934 steps/sec
INFO:absl:collect_time = 0.10892891883850098, train_time = 0.09688806533813477
INF

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000010000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000010000/assets
INFO:absl:step = 10050, loss = 1.308379
INFO:absl:325.529 steps/sec
INFO:absl:collect_time = 0.06386399269104004, train_time = 0.08973193168640137
INFO:absl:step = 10100, loss = 0.174692
INFO:absl:277.958 steps/sec
INFO:absl:collect_time = 0.08427596092224121, train_time = 0.09560751914978027
INFO:absl:step = 10150, loss = 0.280670
INFO:absl:289.711 steps/sec
INFO:absl:collect_time = 0.07226872444152832, train_time = 0.10031700134277344
INFO:absl:step = 10200, loss = 0.964488
INFO:absl:271.435 steps/sec
INFO:absl:collect_time = 0.07799005508422852, train_time = 0.1062159538269043
INFO:absl:step = 10250, loss = 1.047303
INFO:absl:194.629 steps/sec
INFO:absl:collect_time = 0.12446761131286621, train_time = 0.1324319839477539
INFO:absl:step = 10300, loss = 0.287490
INFO:absl:250.601 steps/sec
INFO:absl:collect_time = 0.08243417739868164, train_time = 0.1170859336853027

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000010500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000010500/assets
INFO:absl:step = 10550, loss = 2.326523
INFO:absl:290.012 steps/sec
INFO:absl:collect_time = 0.07677698135375977, train_time = 0.09562969207763672
INFO:absl:step = 10600, loss = 1.399264
INFO:absl:278.484 steps/sec
INFO:absl:collect_time = 0.08447504043579102, train_time = 0.09506821632385254
INFO:absl:step = 10650, loss = 1.339389
INFO:absl:295.072 steps/sec
INFO:absl:collect_time = 0.07990121841430664, train_time = 0.08954906463623047
INFO:absl:step = 10700, loss = 1.193581
INFO:absl:312.385 steps/sec
INFO:absl:collect_time = 0.07043099403381348, train_time = 0.08962798118591309
INFO:absl:step = 10750, loss = 1.513503
INFO:absl:338.675 steps/sec
INFO:absl:collect_time = 0.06405925750732422, train_time = 0.08357477188110352
INFO:absl:step = 10800, loss = 0.741922
INFO:absl:282.860 steps/sec
INFO:absl:collect_time = 0.08315086364746094, train_time = 0.09361481666564

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000011000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000011000/assets
INFO:absl:step = 11050, loss = 0.305020
INFO:absl:252.240 steps/sec
INFO:absl:collect_time = 0.08259701728820801, train_time = 0.11562705039978027
INFO:absl:step = 11100, loss = 0.517850
INFO:absl:257.036 steps/sec
INFO:absl:collect_time = 0.08251667022705078, train_time = 0.11200857162475586
INFO:absl:step = 11150, loss = 1.212381
INFO:absl:243.084 steps/sec
INFO:absl:collect_time = 0.10069441795349121, train_time = 0.1049957275390625
INFO:absl:step = 11200, loss = 2.142819
INFO:absl:218.023 steps/sec
INFO:absl:collect_time = 0.11751413345336914, train_time = 0.11181998252868652
INFO:absl:step = 11250, loss = 0.817836
INFO:absl:277.946 steps/sec
INFO:absl:collect_time = 0.07600998878479004, train_time = 0.1038811206817627
INFO:absl:step = 11300, loss = 0.409746
INFO:absl:292.860 steps/sec
INFO:absl:collect_time = 0.07764720916748047, train_time = 0.0930826663970947

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000011500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000011500/assets
INFO:absl:step = 11550, loss = 1.328672
INFO:absl:247.316 steps/sec
INFO:absl:collect_time = 0.10334014892578125, train_time = 0.0988306999206543
INFO:absl:step = 11600, loss = 2.014152
INFO:absl:316.749 steps/sec
INFO:absl:collect_time = 0.06581425666809082, train_time = 0.09203934669494629
INFO:absl:step = 11650, loss = 0.295014
INFO:absl:276.185 steps/sec
INFO:absl:collect_time = 0.08395862579345703, train_time = 0.09707975387573242
INFO:absl:step = 11700, loss = 0.039500
INFO:absl:245.239 steps/sec
INFO:absl:collect_time = 0.10793399810791016, train_time = 0.09594893455505371
INFO:absl:step = 11750, loss = 0.747795
INFO:absl:256.776 steps/sec
INFO:absl:collect_time = 0.08163666725158691, train_time = 0.11308574676513672
INFO:absl:step = 11800, loss = 0.340987
INFO:absl:195.799 steps/sec
INFO:absl:collect_time = 0.09890222549438477, train_time = 0.156461954116821

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000012000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000012000/assets
INFO:absl:step = 12050, loss = 0.711768
INFO:absl:270.078 steps/sec
INFO:absl:collect_time = 0.08239984512329102, train_time = 0.10273218154907227
INFO:absl:step = 12100, loss = 0.748969
INFO:absl:297.148 steps/sec
INFO:absl:collect_time = 0.07575321197509766, train_time = 0.09251284599304199
INFO:absl:step = 12150, loss = 1.462781
INFO:absl:278.672 steps/sec
INFO:absl:collect_time = 0.08157515525817871, train_time = 0.09784698486328125
INFO:absl:step = 12200, loss = 0.173812
INFO:absl:311.970 steps/sec
INFO:absl:collect_time = 0.07246685028076172, train_time = 0.08780503273010254
INFO:absl:step = 12250, loss = 0.740221
INFO:absl:211.538 steps/sec
INFO:absl:collect_time = 0.09465861320495605, train_time = 0.14170598983764648
INFO:absl:step = 12300, loss = 2.489814
INFO:absl:261.378 steps/sec
INFO:absl:collect_time = 0.08911275863647461, train_time = 0.10218095779418

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000012500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000012500/assets
INFO:absl:step = 12550, loss = 0.492930
INFO:absl:264.439 steps/sec
INFO:absl:collect_time = 0.08505558967590332, train_time = 0.10402417182922363
INFO:absl:step = 12600, loss = 0.692839
INFO:absl:229.526 steps/sec
INFO:absl:collect_time = 0.09990382194519043, train_time = 0.11793684959411621
INFO:absl:step = 12650, loss = 0.761500
INFO:absl:197.421 steps/sec
INFO:absl:collect_time = 0.1198725700378418, train_time = 0.1333932876586914
INFO:absl:step = 12700, loss = 1.089145
INFO:absl:220.943 steps/sec
INFO:absl:collect_time = 0.10120892524719238, train_time = 0.12509417533874512
INFO:absl:step = 12750, loss = 0.440038
INFO:absl:201.992 steps/sec
INFO:absl:collect_time = 0.11603999137878418, train_time = 0.13149499893188477
INFO:absl:step = 12800, loss = 0.243470
INFO:absl:349.986 steps/sec
INFO:absl:collect_time = 0.054795026779174805, train_time = 0.088068008422851

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000013000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000013000/assets
INFO:absl:step = 13050, loss = 0.586321
INFO:absl:217.536 steps/sec
INFO:absl:collect_time = 0.1100921630859375, train_time = 0.11975479125976562
INFO:absl:step = 13100, loss = 2.351941
INFO:absl:271.252 steps/sec
INFO:absl:collect_time = 0.07713723182678223, train_time = 0.1071929931640625
INFO:absl:step = 13150, loss = 3.640187
INFO:absl:232.239 steps/sec
INFO:absl:collect_time = 0.09544706344604492, train_time = 0.11984872817993164
INFO:absl:step = 13200, loss = 0.602617
INFO:absl:258.062 steps/sec
INFO:absl:collect_time = 0.08468818664550781, train_time = 0.10906386375427246
INFO:absl:step = 13250, loss = 0.555162
INFO:absl:229.168 steps/sec
INFO:absl:collect_time = 0.09299683570861816, train_time = 0.1251838207244873
INFO:absl:step = 13300, loss = 2.115784
INFO:absl:223.555 steps/sec
INFO:absl:collect_time = 0.10345578193664551, train_time = 0.12020277976989746

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000013500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000013500/assets
INFO:absl:step = 13550, loss = 0.533616
INFO:absl:247.143 steps/sec
INFO:absl:collect_time = 0.09393191337585449, train_time = 0.10837984085083008
INFO:absl:step = 13600, loss = 1.653349
INFO:absl:238.450 steps/sec
INFO:absl:collect_time = 0.08508682250976562, train_time = 0.12460064888000488
INFO:absl:step = 13650, loss = 1.433314
INFO:absl:241.080 steps/sec
INFO:absl:collect_time = 0.08287692070007324, train_time = 0.12452316284179688
INFO:absl:step = 13700, loss = 0.609254
INFO:absl:209.991 steps/sec
INFO:absl:collect_time = 0.10683059692382812, train_time = 0.13127517700195312
INFO:absl:step = 13750, loss = 0.408103
INFO:absl:237.925 steps/sec
INFO:absl:collect_time = 0.08275699615478516, train_time = 0.12739300727844238
INFO:absl:step = 13800, loss = 2.558806
INFO:absl:277.162 steps/sec
INFO:absl:collect_time = 0.07716226577758789, train_time = 0.10323786735534

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000014000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000014000/assets
INFO:absl:step = 14050, loss = 0.325211
INFO:absl:290.238 steps/sec
INFO:absl:collect_time = 0.07297492027282715, train_time = 0.09929776191711426
INFO:absl:step = 14100, loss = 0.375552
INFO:absl:294.492 steps/sec
INFO:absl:collect_time = 0.07273697853088379, train_time = 0.09704709053039551
INFO:absl:step = 14150, loss = 0.729911
INFO:absl:285.162 steps/sec
INFO:absl:collect_time = 0.07390642166137695, train_time = 0.10143280029296875
INFO:absl:step = 14200, loss = 0.426856
INFO:absl:328.286 steps/sec
INFO:absl:collect_time = 0.06015896797180176, train_time = 0.0921471118927002
INFO:absl:step = 14250, loss = 1.179721
INFO:absl:292.296 steps/sec
INFO:absl:collect_time = 0.07336091995239258, train_time = 0.09769868850708008
INFO:absl:step = 14300, loss = 1.339566
INFO:absl:287.103 steps/sec
INFO:absl:collect_time = 0.07261013984680176, train_time = 0.101543188095092

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000014500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000014500/assets
INFO:absl:step = 14550, loss = 0.590504
INFO:absl:290.168 steps/sec
INFO:absl:collect_time = 0.07311606407165527, train_time = 0.0991981029510498
INFO:absl:step = 14600, loss = 0.981694
INFO:absl:230.332 steps/sec
INFO:absl:collect_time = 0.09207701683044434, train_time = 0.1250009536743164
INFO:absl:step = 14650, loss = 0.431778
INFO:absl:253.242 steps/sec
INFO:absl:collect_time = 0.09139871597290039, train_time = 0.10604095458984375
INFO:absl:step = 14700, loss = 0.731260
INFO:absl:183.595 steps/sec
INFO:absl:collect_time = 0.12279391288757324, train_time = 0.14954400062561035
INFO:absl:step = 14750, loss = 1.887379
INFO:absl:242.114 steps/sec
INFO:absl:collect_time = 0.09748506546020508, train_time = 0.10902905464172363
INFO:absl:step = 14800, loss = 1.014925
INFO:absl:276.533 steps/sec
INFO:absl:collect_time = 0.07685732841491699, train_time = 0.1039528846740722

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000015000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000015000/assets
INFO:absl:step = 15050, loss = 0.626225
INFO:absl:249.802 steps/sec
INFO:absl:collect_time = 0.0877084732055664, train_time = 0.11245036125183105
INFO:absl:step = 15100, loss = 0.571794
INFO:absl:280.293 steps/sec
INFO:absl:collect_time = 0.07042980194091797, train_time = 0.1079549789428711
INFO:absl:step = 15150, loss = 2.408306
INFO:absl:254.626 steps/sec
INFO:absl:collect_time = 0.08498692512512207, train_time = 0.11137914657592773
INFO:absl:step = 15200, loss = 0.465194
INFO:absl:281.108 steps/sec
INFO:absl:collect_time = 0.0761568546295166, train_time = 0.10171103477478027
INFO:absl:step = 15250, loss = 0.218246
INFO:absl:290.212 steps/sec
INFO:absl:collect_time = 0.07130908966064453, train_time = 0.10097885131835938
INFO:absl:step = 15300, loss = 0.401065
INFO:absl:246.988 steps/sec
INFO:absl:collect_time = 0.08827733993530273, train_time = 0.11416172981262207

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000015500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000015500/assets
INFO:absl:step = 15550, loss = 0.774512
INFO:absl:273.624 steps/sec
INFO:absl:collect_time = 0.07900571823120117, train_time = 0.10372662544250488
INFO:absl:step = 15600, loss = 1.421342
INFO:absl:268.792 steps/sec
INFO:absl:collect_time = 0.08097481727600098, train_time = 0.10504293441772461
INFO:absl:step = 15650, loss = 2.079892
INFO:absl:277.698 steps/sec
INFO:absl:collect_time = 0.08473777770996094, train_time = 0.09531402587890625
INFO:absl:step = 15700, loss = 0.661402
INFO:absl:259.944 steps/sec
INFO:absl:collect_time = 0.07261919975280762, train_time = 0.11972999572753906
INFO:absl:step = 15750, loss = 0.873353
INFO:absl:266.913 steps/sec
INFO:absl:collect_time = 0.08523011207580566, train_time = 0.1020967960357666
INFO:absl:step = 15800, loss = 0.669580
INFO:absl:299.503 steps/sec
INFO:absl:collect_time = 0.07195210456848145, train_time = 0.094990968704223

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000016000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000016000/assets
INFO:absl:step = 16050, loss = 1.064889
INFO:absl:241.970 steps/sec
INFO:absl:collect_time = 0.09121918678283691, train_time = 0.1154177188873291
INFO:absl:step = 16100, loss = 0.442866
INFO:absl:279.975 steps/sec
INFO:absl:collect_time = 0.07173633575439453, train_time = 0.10685133934020996
INFO:absl:step = 16150, loss = 1.181929
INFO:absl:271.998 steps/sec
INFO:absl:collect_time = 0.08653545379638672, train_time = 0.0972893238067627
INFO:absl:step = 16200, loss = 0.114283
INFO:absl:280.247 steps/sec
INFO:absl:collect_time = 0.07498335838317871, train_time = 0.10343074798583984
INFO:absl:step = 16250, loss = 2.011961
INFO:absl:246.881 steps/sec
INFO:absl:collect_time = 0.09777331352233887, train_time = 0.10475301742553711
INFO:absl:step = 16300, loss = 1.241492
INFO:absl:273.808 steps/sec
INFO:absl:collect_time = 0.07871222496032715, train_time = 0.1038978099822998

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000016500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000016500/assets
INFO:absl:step = 16550, loss = 0.346138
INFO:absl:285.086 steps/sec
INFO:absl:collect_time = 0.07293701171875, train_time = 0.10244870185852051
INFO:absl:step = 16600, loss = 0.773807
INFO:absl:284.576 steps/sec
INFO:absl:collect_time = 0.07518982887268066, train_time = 0.1005103588104248
INFO:absl:step = 16650, loss = 1.585618
INFO:absl:308.878 steps/sec
INFO:absl:collect_time = 0.07108592987060547, train_time = 0.0907900333404541
INFO:absl:step = 16700, loss = 0.437658
INFO:absl:291.819 steps/sec
INFO:absl:collect_time = 0.07916712760925293, train_time = 0.09217214584350586
INFO:absl:step = 16750, loss = 0.611632
INFO:absl:307.754 steps/sec
INFO:absl:collect_time = 0.07336115837097168, train_time = 0.08910608291625977
INFO:absl:step = 16800, loss = 0.959614
INFO:absl:321.847 steps/sec
INFO:absl:collect_time = 0.07027006149291992, train_time = 0.0850832462310791
IN

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000017000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000017000/assets
INFO:absl:step = 17050, loss = 0.554681
INFO:absl:278.444 steps/sec
INFO:absl:collect_time = 0.07921290397644043, train_time = 0.1003561019897461
INFO:absl:step = 17100, loss = 0.775526
INFO:absl:302.040 steps/sec
INFO:absl:collect_time = 0.07654738426208496, train_time = 0.08899378776550293
INFO:absl:step = 17150, loss = 0.930405
INFO:absl:293.638 steps/sec
INFO:absl:collect_time = 0.07629084587097168, train_time = 0.09398698806762695
INFO:absl:step = 17200, loss = 1.560384
INFO:absl:268.157 steps/sec
INFO:absl:collect_time = 0.08968210220336914, train_time = 0.09677577018737793
INFO:absl:step = 17250, loss = 0.448295
INFO:absl:318.984 steps/sec
INFO:absl:collect_time = 0.06867480278015137, train_time = 0.0880727767944336
INFO:absl:step = 17300, loss = 2.647143
INFO:absl:272.853 steps/sec
INFO:absl:collect_time = 0.08514761924743652, train_time = 0.0981009006500244

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000017500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000017500/assets
INFO:absl:step = 17550, loss = 1.049228
INFO:absl:284.740 steps/sec
INFO:absl:collect_time = 0.07564783096313477, train_time = 0.09995102882385254
INFO:absl:step = 17600, loss = 3.411746
INFO:absl:263.107 steps/sec
INFO:absl:collect_time = 0.08758378028869629, train_time = 0.10245299339294434
INFO:absl:step = 17650, loss = 0.468525
INFO:absl:297.109 steps/sec
INFO:absl:collect_time = 0.06685638427734375, train_time = 0.10143184661865234
INFO:absl:step = 17700, loss = 0.705234
INFO:absl:235.911 steps/sec
INFO:absl:collect_time = 0.10207700729370117, train_time = 0.10986757278442383
INFO:absl:step = 17750, loss = 0.300021
INFO:absl:262.510 steps/sec
INFO:absl:collect_time = 0.08543109893798828, train_time = 0.10503816604614258
INFO:absl:step = 17800, loss = 0.319752
INFO:absl:280.233 steps/sec
INFO:absl:collect_time = 0.08050823211669922, train_time = 0.09791493415832

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000018000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000018000/assets
INFO:absl:step = 18050, loss = 2.342985
INFO:absl:263.664 steps/sec
INFO:absl:collect_time = 0.0839688777923584, train_time = 0.1056666374206543
INFO:absl:step = 18100, loss = 0.890483
INFO:absl:287.107 steps/sec
INFO:absl:collect_time = 0.07341575622558594, train_time = 0.10073518753051758
INFO:absl:step = 18150, loss = 0.948546
INFO:absl:271.837 steps/sec
INFO:absl:collect_time = 0.08021211624145508, train_time = 0.10372185707092285
INFO:absl:step = 18200, loss = 1.839524
INFO:absl:254.775 steps/sec
INFO:absl:collect_time = 0.08007097244262695, train_time = 0.1161808967590332
INFO:absl:step = 18250, loss = 1.009753
INFO:absl:273.753 steps/sec
INFO:absl:collect_time = 0.06986832618713379, train_time = 0.1127779483795166
INFO:absl:step = 18300, loss = 1.131612
INFO:absl:273.673 steps/sec
INFO:absl:collect_time = 0.08103489875793457, train_time = 0.10166501998901367


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000018500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000018500/assets
INFO:absl:step = 18550, loss = 2.145594
INFO:absl:296.190 steps/sec
INFO:absl:collect_time = 0.07253146171569824, train_time = 0.09627914428710938
INFO:absl:step = 18600, loss = 2.677756
INFO:absl:277.271 steps/sec
INFO:absl:collect_time = 0.08463621139526367, train_time = 0.09569287300109863
INFO:absl:step = 18650, loss = 0.287529
INFO:absl:287.863 steps/sec
INFO:absl:collect_time = 0.07635807991027832, train_time = 0.0973358154296875
INFO:absl:step = 18700, loss = 0.199711
INFO:absl:332.110 steps/sec
INFO:absl:collect_time = 0.06759309768676758, train_time = 0.08295941352844238
INFO:absl:step = 18750, loss = 1.054546
INFO:absl:264.175 steps/sec
INFO:absl:collect_time = 0.09795689582824707, train_time = 0.09131169319152832
INFO:absl:step = 18800, loss = 1.642310
INFO:absl:276.110 steps/sec
INFO:absl:collect_time = 0.0846858024597168, train_time = 0.0964016914367675

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000019000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000019000/assets
INFO:absl:step = 19050, loss = 0.258332
INFO:absl:261.149 steps/sec
INFO:absl:collect_time = 0.0817112922668457, train_time = 0.10975027084350586
INFO:absl:step = 19100, loss = 0.319581
INFO:absl:268.956 steps/sec
INFO:absl:collect_time = 0.0803060531616211, train_time = 0.10559797286987305
INFO:absl:step = 19150, loss = 0.886249
INFO:absl:274.698 steps/sec
INFO:absl:collect_time = 0.0838019847869873, train_time = 0.09821605682373047
INFO:absl:step = 19200, loss = 8.176774
INFO:absl:240.671 steps/sec
INFO:absl:collect_time = 0.0935831069946289, train_time = 0.11416912078857422
INFO:absl:step = 19250, loss = 1.571269
INFO:absl:248.843 steps/sec
INFO:absl:collect_time = 0.09409570693969727, train_time = 0.10683393478393555
INFO:absl:step = 19300, loss = 0.171076
INFO:absl:327.353 steps/sec
INFO:absl:collect_time = 0.07039499282836914, train_time = 0.08234524726867676


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000019500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000019500/assets
INFO:absl:step = 19550, loss = 0.555005
INFO:absl:323.241 steps/sec
INFO:absl:collect_time = 0.06580519676208496, train_time = 0.08887791633605957
INFO:absl:step = 19600, loss = 2.917080
INFO:absl:277.823 steps/sec
INFO:absl:collect_time = 0.08420228958129883, train_time = 0.09576845169067383
INFO:absl:step = 19650, loss = 0.333149
INFO:absl:305.738 steps/sec
INFO:absl:collect_time = 0.07011985778808594, train_time = 0.09341907501220703
INFO:absl:step = 19700, loss = 0.231886
INFO:absl:289.871 steps/sec
INFO:absl:collect_time = 0.07820415496826172, train_time = 0.09428620338439941
INFO:absl:step = 19750, loss = 0.761755
INFO:absl:272.907 steps/sec
INFO:absl:collect_time = 0.08734321594238281, train_time = 0.09586906433105469
INFO:absl:step = 19800, loss = 0.859842
INFO:absl:257.010 steps/sec
INFO:absl:collect_time = 0.09461188316345215, train_time = 0.09993314743041

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000020000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000020000/assets
INFO:absl:step = 20050, loss = 0.783475
INFO:absl:260.880 steps/sec
INFO:absl:collect_time = 0.07433104515075684, train_time = 0.11732769012451172
INFO:absl:step = 20100, loss = 0.582978
INFO:absl:284.921 steps/sec
INFO:absl:collect_time = 0.08052301406860352, train_time = 0.09496402740478516
INFO:absl:step = 20150, loss = 0.748215
INFO:absl:268.500 steps/sec
INFO:absl:collect_time = 0.08720660209655762, train_time = 0.09901309013366699
INFO:absl:step = 20200, loss = 0.511851
INFO:absl:310.435 steps/sec
INFO:absl:collect_time = 0.07375979423522949, train_time = 0.08730435371398926
INFO:absl:step = 20250, loss = 0.336765
INFO:absl:315.264 steps/sec
INFO:absl:collect_time = 0.07317399978637695, train_time = 0.08542299270629883
INFO:absl:step = 20300, loss = 1.909381
INFO:absl:282.716 steps/sec
INFO:absl:collect_time = 0.08259892463684082, train_time = 0.09425687789916

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000020500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000020500/assets
INFO:absl:step = 20550, loss = 1.109267
INFO:absl:322.878 steps/sec
INFO:absl:collect_time = 0.06535816192626953, train_time = 0.08949899673461914
INFO:absl:step = 20600, loss = 0.529880
INFO:absl:323.839 steps/sec
INFO:absl:collect_time = 0.06579971313476562, train_time = 0.08859801292419434
INFO:absl:step = 20650, loss = 1.769297
INFO:absl:273.677 steps/sec
INFO:absl:collect_time = 0.08045697212219238, train_time = 0.10224008560180664
INFO:absl:step = 20700, loss = 1.651883
INFO:absl:300.563 steps/sec
INFO:absl:collect_time = 0.07368016242980957, train_time = 0.09267425537109375
INFO:absl:step = 20750, loss = 1.025844
INFO:absl:255.737 steps/sec
INFO:absl:collect_time = 0.09016919136047363, train_time = 0.10534405708312988
INFO:absl:step = 20800, loss = 1.521464
INFO:absl:276.414 steps/sec
INFO:absl:collect_time = 0.07941317558288574, train_time = 0.10147500038146

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000021000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000021000/assets
INFO:absl:step = 21050, loss = 2.445268
INFO:absl:282.973 steps/sec
INFO:absl:collect_time = 0.07939934730529785, train_time = 0.09729576110839844
INFO:absl:step = 21100, loss = 0.847258
INFO:absl:276.992 steps/sec
INFO:absl:collect_time = 0.08600401878356934, train_time = 0.09450674057006836
INFO:absl:step = 21150, loss = 0.487143
INFO:absl:289.541 steps/sec
INFO:absl:collect_time = 0.07222628593444824, train_time = 0.10046076774597168
INFO:absl:step = 21200, loss = 2.390611
INFO:absl:233.453 steps/sec
INFO:absl:collect_time = 0.11032295227050781, train_time = 0.10385298728942871
INFO:absl:step = 21250, loss = 0.656210
INFO:absl:308.165 steps/sec
INFO:absl:collect_time = 0.06946897506713867, train_time = 0.09278202056884766
INFO:absl:step = 21300, loss = 1.622292
INFO:absl:289.618 steps/sec
INFO:absl:collect_time = 0.07548689842224121, train_time = 0.09715414047241

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000021500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000021500/assets
INFO:absl:step = 21550, loss = 0.956250
INFO:absl:308.873 steps/sec
INFO:absl:collect_time = 0.07081079483032227, train_time = 0.09106802940368652
INFO:absl:step = 21600, loss = 3.301107
INFO:absl:246.563 steps/sec
INFO:absl:collect_time = 0.09644103050231934, train_time = 0.10634684562683105
INFO:absl:step = 21650, loss = 1.391212
INFO:absl:315.356 steps/sec
INFO:absl:collect_time = 0.07228207588195801, train_time = 0.08626890182495117
INFO:absl:step = 21700, loss = 4.618543
INFO:absl:281.693 steps/sec
INFO:absl:collect_time = 0.08724617958068848, train_time = 0.09025192260742188
INFO:absl:step = 21750, loss = 4.759870
INFO:absl:289.852 steps/sec
INFO:absl:collect_time = 0.07515120506286621, train_time = 0.0973503589630127
INFO:absl:step = 21800, loss = 1.073486
INFO:absl:260.093 steps/sec
INFO:absl:collect_time = 0.08922410011291504, train_time = 0.103014945983886

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000022000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000022000/assets
INFO:absl:step = 22050, loss = 2.890740
INFO:absl:277.737 steps/sec
INFO:absl:collect_time = 0.07994413375854492, train_time = 0.1000821590423584
INFO:absl:step = 22100, loss = 0.592835
INFO:absl:264.462 steps/sec
INFO:absl:collect_time = 0.08749508857727051, train_time = 0.10156798362731934
INFO:absl:step = 22150, loss = 1.314694
INFO:absl:284.912 steps/sec
INFO:absl:collect_time = 0.07896733283996582, train_time = 0.09652566909790039
INFO:absl:step = 22200, loss = 0.728177
INFO:absl:285.222 steps/sec
INFO:absl:collect_time = 0.07714986801147461, train_time = 0.09815192222595215
INFO:absl:step = 22250, loss = 1.908714
INFO:absl:251.489 steps/sec
INFO:absl:collect_time = 0.07688379287719727, train_time = 0.12193179130554199
INFO:absl:step = 22300, loss = 0.516932
INFO:absl:294.181 steps/sec
INFO:absl:collect_time = 0.07644152641296387, train_time = 0.093522071838378

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000022500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000022500/assets
INFO:absl:step = 22550, loss = 0.327956
INFO:absl:312.182 steps/sec
INFO:absl:collect_time = 0.062274932861328125, train_time = 0.09788799285888672
INFO:absl:step = 22600, loss = 2.295183
INFO:absl:267.137 steps/sec
INFO:absl:collect_time = 0.08711910247802734, train_time = 0.10005068778991699
INFO:absl:step = 22650, loss = 0.476926
INFO:absl:266.173 steps/sec
INFO:absl:collect_time = 0.08427667617797852, train_time = 0.10357093811035156
INFO:absl:step = 22700, loss = 0.451653
INFO:absl:284.131 steps/sec
INFO:absl:collect_time = 0.07968926429748535, train_time = 0.09628605842590332
INFO:absl:step = 22750, loss = 0.381163
INFO:absl:282.745 steps/sec
INFO:absl:collect_time = 0.07924580574035645, train_time = 0.09759187698364258
INFO:absl:step = 22800, loss = 0.345754
INFO:absl:317.822 steps/sec
INFO:absl:collect_time = 0.0711979866027832, train_time = 0.08612298965454

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000023000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000023000/assets
INFO:absl:step = 23050, loss = 3.873787
INFO:absl:291.867 steps/sec
INFO:absl:collect_time = 0.07251095771789551, train_time = 0.0988001823425293
INFO:absl:step = 23100, loss = 0.472751
INFO:absl:289.081 steps/sec
INFO:absl:collect_time = 0.08509492874145508, train_time = 0.08786702156066895
INFO:absl:step = 23150, loss = 1.555418
INFO:absl:285.656 steps/sec
INFO:absl:collect_time = 0.07810091972351074, train_time = 0.09693503379821777
INFO:absl:step = 23200, loss = 1.413200
INFO:absl:286.980 steps/sec
INFO:absl:collect_time = 0.07930803298950195, train_time = 0.09492039680480957
INFO:absl:step = 23250, loss = 1.300969
INFO:absl:262.049 steps/sec
INFO:absl:collect_time = 0.09003496170043945, train_time = 0.10076904296875
INFO:absl:step = 23300, loss = 1.174142
INFO:absl:268.369 steps/sec
INFO:absl:collect_time = 0.08750605583190918, train_time = 0.09880447387695312


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000023500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000023500/assets
INFO:absl:step = 23550, loss = 0.494340
INFO:absl:267.113 steps/sec
INFO:absl:collect_time = 0.08048295974731445, train_time = 0.1067037582397461
INFO:absl:step = 23600, loss = 0.643438
INFO:absl:270.151 steps/sec
INFO:absl:collect_time = 0.08191990852355957, train_time = 0.10316181182861328
INFO:absl:step = 23650, loss = 0.518403
INFO:absl:251.028 steps/sec
INFO:absl:collect_time = 0.09195232391357422, train_time = 0.10722899436950684
INFO:absl:step = 23700, loss = 0.887440
INFO:absl:267.669 steps/sec
INFO:absl:collect_time = 0.08516407012939453, train_time = 0.10163402557373047
INFO:absl:step = 23750, loss = 0.566847
INFO:absl:257.873 steps/sec
INFO:absl:collect_time = 0.09540486335754395, train_time = 0.09848904609680176
INFO:absl:step = 23800, loss = 4.435362
INFO:absl:290.779 steps/sec
INFO:absl:collect_time = 0.08134293556213379, train_time = 0.090609073638916

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000024000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000024000/assets
INFO:absl:step = 24050, loss = 0.813378
INFO:absl:300.539 steps/sec
INFO:absl:collect_time = 0.06981801986694336, train_time = 0.09654998779296875
INFO:absl:step = 24100, loss = 1.106540
INFO:absl:266.497 steps/sec
INFO:absl:collect_time = 0.08289027214050293, train_time = 0.10472917556762695
INFO:absl:step = 24150, loss = 1.893652
INFO:absl:278.309 steps/sec
INFO:absl:collect_time = 0.08099102973937988, train_time = 0.09866523742675781
INFO:absl:step = 24200, loss = 0.534160
INFO:absl:283.158 steps/sec
INFO:absl:collect_time = 0.08340263366699219, train_time = 0.09317731857299805
INFO:absl:step = 24250, loss = 0.937282
INFO:absl:265.433 steps/sec
INFO:absl:collect_time = 0.0916740894317627, train_time = 0.09669709205627441
INFO:absl:step = 24300, loss = 0.445490
INFO:absl:257.274 steps/sec
INFO:absl:collect_time = 0.08299612998962402, train_time = 0.111349105834960

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000024500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000024500/assets
INFO:absl:step = 24550, loss = 1.594351
INFO:absl:247.029 steps/sec
INFO:absl:collect_time = 0.09108853340148926, train_time = 0.11131715774536133
INFO:absl:step = 24600, loss = 0.730341
INFO:absl:259.118 steps/sec
INFO:absl:collect_time = 0.0891718864440918, train_time = 0.1037900447845459
INFO:absl:step = 24650, loss = 1.830380
INFO:absl:279.460 steps/sec
INFO:absl:collect_time = 0.07945656776428223, train_time = 0.09945988655090332
INFO:absl:step = 24700, loss = 0.486215
INFO:absl:290.229 steps/sec
INFO:absl:collect_time = 0.07272171974182129, train_time = 0.09955620765686035
INFO:absl:step = 24750, loss = 1.060379
INFO:absl:271.139 steps/sec
INFO:absl:collect_time = 0.08797478675842285, train_time = 0.09643220901489258
INFO:absl:step = 24800, loss = 0.531606
INFO:absl:297.560 steps/sec
INFO:absl:collect_time = 0.0778207778930664, train_time = 0.09021282196044922

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000025000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000025000/assets
INFO:absl:step = 25050, loss = 0.835343
INFO:absl:232.551 steps/sec
INFO:absl:collect_time = 0.07848596572875977, train_time = 0.1365208625793457
INFO:absl:step = 25100, loss = 0.413110
INFO:absl:289.209 steps/sec
INFO:absl:collect_time = 0.07625222206115723, train_time = 0.0966329574584961
INFO:absl:step = 25150, loss = 0.489721
INFO:absl:270.949 steps/sec
INFO:absl:collect_time = 0.08155107498168945, train_time = 0.10298562049865723
INFO:absl:step = 25200, loss = 1.572600
INFO:absl:264.529 steps/sec
INFO:absl:collect_time = 0.08698797225952148, train_time = 0.10202693939208984
INFO:absl:step = 25250, loss = 0.211656
INFO:absl:256.731 steps/sec
INFO:absl:collect_time = 0.08633208274841309, train_time = 0.10842418670654297
INFO:absl:step = 25300, loss = 0.747391
INFO:absl:270.993 steps/sec
INFO:absl:collect_time = 0.08886027336120605, train_time = 0.0956463813781738

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000025500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000025500/assets
INFO:absl:step = 25550, loss = 0.536993
INFO:absl:237.570 steps/sec
INFO:absl:collect_time = 0.07028579711914062, train_time = 0.14017844200134277
INFO:absl:step = 25600, loss = 2.447760
INFO:absl:231.495 steps/sec
INFO:absl:collect_time = 0.11080598831176758, train_time = 0.10518097877502441
INFO:absl:step = 25650, loss = 1.264019
INFO:absl:301.888 steps/sec
INFO:absl:collect_time = 0.07447004318237305, train_time = 0.09115409851074219
INFO:absl:step = 25700, loss = 0.877153
INFO:absl:263.758 steps/sec
INFO:absl:collect_time = 0.08013486862182617, train_time = 0.10943293571472168
INFO:absl:step = 25750, loss = 0.449109
INFO:absl:258.330 steps/sec
INFO:absl:collect_time = 0.09589052200317383, train_time = 0.09766006469726562
INFO:absl:step = 25800, loss = 0.795348
INFO:absl:197.553 steps/sec
INFO:absl:collect_time = 0.11270880699157715, train_time = 0.14038801193237

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000026000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000026000/assets
INFO:absl:step = 26050, loss = 0.378757
INFO:absl:310.428 steps/sec
INFO:absl:collect_time = 0.07035160064697266, train_time = 0.09071612358093262
INFO:absl:step = 26100, loss = 0.887760
INFO:absl:291.774 steps/sec
INFO:absl:collect_time = 0.07647895812988281, train_time = 0.09488677978515625
INFO:absl:step = 26150, loss = 0.805764
INFO:absl:284.074 steps/sec
INFO:absl:collect_time = 0.08417701721191406, train_time = 0.09183335304260254
INFO:absl:step = 26200, loss = 2.180315
INFO:absl:268.206 steps/sec
INFO:absl:collect_time = 0.09009599685668945, train_time = 0.0963280200958252
INFO:absl:step = 26250, loss = 1.652327
INFO:absl:283.271 steps/sec
INFO:absl:collect_time = 0.07558393478393555, train_time = 0.10092568397521973
INFO:absl:step = 26300, loss = 0.585726
INFO:absl:157.663 steps/sec
INFO:absl:collect_time = 0.13385915756225586, train_time = 0.183272123336792

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000026500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000026500/assets
INFO:absl:step = 26550, loss = 1.965551
INFO:absl:217.202 steps/sec
INFO:absl:collect_time = 0.0870518684387207, train_time = 0.14314866065979004
INFO:absl:step = 26600, loss = 0.445690
INFO:absl:227.311 steps/sec
INFO:absl:collect_time = 0.10922002792358398, train_time = 0.11074304580688477
INFO:absl:step = 26650, loss = 1.004504
INFO:absl:245.999 steps/sec
INFO:absl:collect_time = 0.09008097648620605, train_time = 0.11317229270935059
INFO:absl:step = 26700, loss = 4.466619
INFO:absl:249.994 steps/sec
INFO:absl:collect_time = 0.09631705284118652, train_time = 0.10368800163269043
INFO:absl:step = 26750, loss = 1.704247
INFO:absl:235.521 steps/sec
INFO:absl:collect_time = 0.1139230728149414, train_time = 0.09837198257446289
INFO:absl:step = 26800, loss = 2.746903
INFO:absl:244.734 steps/sec
INFO:absl:collect_time = 0.0909123420715332, train_time = 0.11339092254638672

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000027000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000027000/assets
INFO:absl:step = 27050, loss = 0.408760
INFO:absl:305.265 steps/sec
INFO:absl:collect_time = 0.0717463493347168, train_time = 0.09204578399658203
INFO:absl:step = 27100, loss = 0.879673
INFO:absl:309.975 steps/sec
INFO:absl:collect_time = 0.07365226745605469, train_time = 0.08765125274658203
INFO:absl:step = 27150, loss = 1.021609
INFO:absl:239.545 steps/sec
INFO:absl:collect_time = 0.09834623336791992, train_time = 0.1103830337524414
INFO:absl:step = 27200, loss = 1.297038
INFO:absl:233.900 steps/sec
INFO:absl:collect_time = 0.10482263565063477, train_time = 0.10894417762756348
INFO:absl:step = 27250, loss = 0.479521
INFO:absl:283.284 steps/sec
INFO:absl:collect_time = 0.08217310905456543, train_time = 0.09432816505432129
INFO:absl:step = 27300, loss = 0.403717
INFO:absl:266.683 steps/sec
INFO:absl:collect_time = 0.08611297607421875, train_time = 0.1013758182525634

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000027500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000027500/assets
INFO:absl:step = 27550, loss = 7.513808
INFO:absl:299.267 steps/sec
INFO:absl:collect_time = 0.07366394996643066, train_time = 0.09341096878051758
INFO:absl:step = 27600, loss = 0.894643
INFO:absl:306.246 steps/sec
INFO:absl:collect_time = 0.06992244720458984, train_time = 0.09334492683410645
INFO:absl:step = 27650, loss = 1.725776
INFO:absl:250.374 steps/sec
INFO:absl:collect_time = 0.0899970531463623, train_time = 0.10970449447631836
INFO:absl:step = 27700, loss = 4.268197
INFO:absl:277.463 steps/sec
INFO:absl:collect_time = 0.0820620059967041, train_time = 0.09814214706420898
INFO:absl:step = 27750, loss = 0.324778
INFO:absl:302.004 steps/sec
INFO:absl:collect_time = 0.07788252830505371, train_time = 0.0876779556274414
INFO:absl:step = 27800, loss = 0.614794
INFO:absl:248.889 steps/sec
INFO:absl:collect_time = 0.0702810287475586, train_time = 0.13061165809631348


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000028000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000028000/assets
INFO:absl:step = 28050, loss = 0.378856
INFO:absl:269.273 steps/sec
INFO:absl:collect_time = 0.08524799346923828, train_time = 0.10043692588806152
INFO:absl:step = 28100, loss = 0.428745
INFO:absl:249.124 steps/sec
INFO:absl:collect_time = 0.10127973556518555, train_time = 0.09942317008972168
INFO:absl:step = 28150, loss = 2.905816
INFO:absl:237.699 steps/sec
INFO:absl:collect_time = 0.10096907615661621, train_time = 0.10938072204589844
INFO:absl:step = 28200, loss = 0.569874
INFO:absl:190.569 steps/sec
INFO:absl:collect_time = 0.1138918399810791, train_time = 0.14848017692565918
INFO:absl:step = 28250, loss = 0.570912
INFO:absl:181.235 steps/sec
INFO:absl:collect_time = 0.10558629035949707, train_time = 0.17029905319213867
INFO:absl:step = 28300, loss = 2.516598
INFO:absl:215.742 steps/sec
INFO:absl:collect_time = 0.1137089729309082, train_time = 0.1180496215820312

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000028500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000028500/assets
INFO:absl:step = 28550, loss = 0.579684
INFO:absl:269.834 steps/sec
INFO:absl:collect_time = 0.07964920997619629, train_time = 0.10565018653869629
INFO:absl:step = 28600, loss = 1.870920
INFO:absl:268.427 steps/sec
INFO:absl:collect_time = 0.08520793914794922, train_time = 0.10106277465820312
INFO:absl:step = 28650, loss = 0.648379
INFO:absl:255.340 steps/sec
INFO:absl:collect_time = 0.0903160572052002, train_time = 0.10550093650817871
INFO:absl:step = 28700, loss = 1.386619
INFO:absl:275.491 steps/sec
INFO:absl:collect_time = 0.08320188522338867, train_time = 0.09829235076904297
INFO:absl:step = 28750, loss = 0.498725
INFO:absl:322.142 steps/sec
INFO:absl:collect_time = 0.06831502914428711, train_time = 0.08689594268798828
INFO:absl:step = 28800, loss = 6.724638
INFO:absl:258.328 steps/sec
INFO:absl:collect_time = 0.08902502059936523, train_time = 0.104527235031127

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000029000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000029000/assets
INFO:absl:step = 29050, loss = 0.933534
INFO:absl:257.639 steps/sec
INFO:absl:collect_time = 0.0955667495727539, train_time = 0.09850311279296875
INFO:absl:step = 29100, loss = 4.761210
INFO:absl:234.125 steps/sec
INFO:absl:collect_time = 0.10189104080200195, train_time = 0.11166977882385254
INFO:absl:step = 29150, loss = 2.838459
INFO:absl:237.591 steps/sec
INFO:absl:collect_time = 0.09544897079467773, train_time = 0.11499714851379395
INFO:absl:step = 29200, loss = 0.653517
INFO:absl:264.517 steps/sec
INFO:absl:collect_time = 0.09827375411987305, train_time = 0.09075021743774414
INFO:absl:step = 29250, loss = 1.460132
INFO:absl:267.339 steps/sec
INFO:absl:collect_time = 0.08472323417663574, train_time = 0.10230493545532227
INFO:absl:step = 29300, loss = 12.987043
INFO:absl:284.049 steps/sec
INFO:absl:collect_time = 0.07246112823486328, train_time = 0.10356473922729

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000029500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000029500/assets
INFO:absl:step = 29550, loss = 2.732585
INFO:absl:297.116 steps/sec
INFO:absl:collect_time = 0.07446622848510742, train_time = 0.09381794929504395
INFO:absl:step = 29600, loss = 0.682435
INFO:absl:298.952 steps/sec
INFO:absl:collect_time = 0.0706472396850586, train_time = 0.0966038703918457
INFO:absl:step = 29650, loss = 5.801513
INFO:absl:268.424 steps/sec
INFO:absl:collect_time = 0.08833479881286621, train_time = 0.09793734550476074
INFO:absl:step = 29700, loss = 0.581281
INFO:absl:234.732 steps/sec
INFO:absl:collect_time = 0.10187625885009766, train_time = 0.11113262176513672
INFO:absl:step = 29750, loss = 2.728086
INFO:absl:230.652 steps/sec
INFO:absl:collect_time = 0.09859681129455566, train_time = 0.1181800365447998
INFO:absl:step = 29800, loss = 0.905490
INFO:absl:255.839 steps/sec
INFO:absl:collect_time = 0.09436535835266113, train_time = 0.10106968879699707

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000030000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000030000/assets
INFO:absl:step = 30050, loss = 1.188113
INFO:absl:296.620 steps/sec
INFO:absl:collect_time = 0.06789684295654297, train_time = 0.10066914558410645
INFO:absl:step = 30100, loss = 0.694215
INFO:absl:282.778 steps/sec
INFO:absl:collect_time = 0.07943201065063477, train_time = 0.09738516807556152
INFO:absl:step = 30150, loss = 0.466721
INFO:absl:295.305 steps/sec
INFO:absl:collect_time = 0.07459092140197754, train_time = 0.09472537040710449
INFO:absl:step = 30200, loss = 4.888458
INFO:absl:299.168 steps/sec
INFO:absl:collect_time = 0.07754325866699219, train_time = 0.08958697319030762
INFO:absl:step = 30250, loss = 0.648154
INFO:absl:271.069 steps/sec
INFO:absl:collect_time = 0.08657479286193848, train_time = 0.09787988662719727
INFO:absl:step = 30300, loss = 0.762757
INFO:absl:271.357 steps/sec
INFO:absl:collect_time = 0.08637571334838867, train_time = 0.09788322448730

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000030500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000030500/assets
INFO:absl:step = 30550, loss = 0.605174
INFO:absl:304.727 steps/sec
INFO:absl:collect_time = 0.06640076637268066, train_time = 0.09768056869506836
INFO:absl:step = 30600, loss = 0.917678
INFO:absl:268.826 steps/sec
INFO:absl:collect_time = 0.07989192008972168, train_time = 0.10610198974609375
INFO:absl:step = 30650, loss = 0.475770
INFO:absl:275.406 steps/sec
INFO:absl:collect_time = 0.08759808540344238, train_time = 0.09395217895507812
INFO:absl:step = 30700, loss = 1.059051
INFO:absl:291.279 steps/sec
INFO:absl:collect_time = 0.07495522499084473, train_time = 0.09670162200927734
INFO:absl:step = 30750, loss = 0.369909
INFO:absl:286.057 steps/sec
INFO:absl:collect_time = 0.0824439525604248, train_time = 0.0923464298248291
INFO:absl:step = 30800, loss = 1.482520
INFO:absl:285.220 steps/sec
INFO:absl:collect_time = 0.08186125755310059, train_time = 0.0934422016143798

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000031000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000031000/assets
INFO:absl:step = 31050, loss = 0.310472
INFO:absl:292.087 steps/sec
INFO:absl:collect_time = 0.07698678970336914, train_time = 0.09419536590576172
INFO:absl:step = 31100, loss = 2.376977
INFO:absl:259.125 steps/sec
INFO:absl:collect_time = 0.09050321578979492, train_time = 0.10245370864868164
INFO:absl:step = 31150, loss = 1.674321
INFO:absl:201.873 steps/sec
INFO:absl:collect_time = 0.11825871467590332, train_time = 0.12942194938659668
INFO:absl:step = 31200, loss = 0.674099
INFO:absl:235.643 steps/sec
INFO:absl:collect_time = 0.09374094009399414, train_time = 0.11844420433044434
INFO:absl:step = 31250, loss = 1.272683
INFO:absl:242.391 steps/sec
INFO:absl:collect_time = 0.09772777557373047, train_time = 0.1085507869720459
INFO:absl:step = 31300, loss = 0.458871
INFO:absl:269.732 steps/sec
INFO:absl:collect_time = 0.09366393089294434, train_time = 0.091705083847045

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000031500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000031500/assets
INFO:absl:step = 31550, loss = 1.567216
INFO:absl:210.018 steps/sec
INFO:absl:collect_time = 0.1082615852355957, train_time = 0.12981343269348145
INFO:absl:step = 31600, loss = 2.030240
INFO:absl:253.644 steps/sec
INFO:absl:collect_time = 0.09418988227844238, train_time = 0.10293698310852051
INFO:absl:step = 31650, loss = 3.007765
INFO:absl:210.151 steps/sec
INFO:absl:collect_time = 0.11694836616516113, train_time = 0.12097597122192383
INFO:absl:step = 31700, loss = 1.400626
INFO:absl:239.973 steps/sec
INFO:absl:collect_time = 0.09968113899230957, train_time = 0.10867595672607422
INFO:absl:step = 31750, loss = 0.403495
INFO:absl:248.698 steps/sec
INFO:absl:collect_time = 0.09313082695007324, train_time = 0.10791611671447754
INFO:absl:step = 31800, loss = 1.299856
INFO:absl:234.657 steps/sec
INFO:absl:collect_time = 0.09699630737304688, train_time = 0.116080284118652

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000032000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000032000/assets
INFO:absl:step = 32050, loss = 1.407326
INFO:absl:250.287 steps/sec
INFO:absl:collect_time = 0.0860891342163086, train_time = 0.11368179321289062
INFO:absl:step = 32100, loss = 1.851470
INFO:absl:251.407 steps/sec
INFO:absl:collect_time = 0.09052681922912598, train_time = 0.10835409164428711
INFO:absl:step = 32150, loss = 0.733193
INFO:absl:264.318 steps/sec
INFO:absl:collect_time = 0.08456587791442871, train_time = 0.10460019111633301
INFO:absl:step = 32200, loss = 0.596464
INFO:absl:274.234 steps/sec
INFO:absl:collect_time = 0.08276510238647461, train_time = 0.09956073760986328
INFO:absl:step = 32250, loss = 7.080750
INFO:absl:245.436 steps/sec
INFO:absl:collect_time = 0.09919500350952148, train_time = 0.10452389717102051
INFO:absl:step = 32300, loss = 2.630346
INFO:absl:239.397 steps/sec
INFO:absl:collect_time = 0.10487890243530273, train_time = 0.103979110717773

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000032500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000032500/assets
INFO:absl:step = 32550, loss = 2.126656
INFO:absl:215.895 steps/sec
INFO:absl:collect_time = 0.11461400985717773, train_time = 0.11697983741760254
INFO:absl:step = 32600, loss = 1.533375
INFO:absl:258.650 steps/sec
INFO:absl:collect_time = 0.08762717247009277, train_time = 0.10568428039550781
INFO:absl:step = 32650, loss = 1.598842
INFO:absl:234.538 steps/sec
INFO:absl:collect_time = 0.09958124160766602, train_time = 0.11360406875610352
INFO:absl:step = 32700, loss = 0.861880
INFO:absl:278.932 steps/sec
INFO:absl:collect_time = 0.08414626121520996, train_time = 0.0951089859008789
INFO:absl:step = 32750, loss = 0.824923
INFO:absl:283.766 steps/sec
INFO:absl:collect_time = 0.0794367790222168, train_time = 0.09676504135131836
INFO:absl:step = 32800, loss = 1.907054
INFO:absl:238.394 steps/sec
INFO:absl:collect_time = 0.1097710132598877, train_time = 0.09996604919433594

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000033000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000033000/assets
INFO:absl:step = 33050, loss = 2.834840
INFO:absl:289.363 steps/sec
INFO:absl:collect_time = 0.07982492446899414, train_time = 0.09296822547912598
INFO:absl:step = 33100, loss = 3.549296
INFO:absl:312.276 steps/sec
INFO:absl:collect_time = 0.07255315780639648, train_time = 0.08756184577941895
INFO:absl:step = 33150, loss = 0.677678
INFO:absl:280.998 steps/sec
INFO:absl:collect_time = 0.07775092124938965, train_time = 0.10018610954284668
INFO:absl:step = 33200, loss = 0.345876
INFO:absl:252.103 steps/sec
INFO:absl:collect_time = 0.08165740966796875, train_time = 0.11667394638061523
INFO:absl:step = 33250, loss = 1.029427
INFO:absl:287.137 steps/sec
INFO:absl:collect_time = 0.0792238712310791, train_time = 0.0949091911315918
INFO:absl:step = 33300, loss = 3.493516
INFO:absl:263.310 steps/sec
INFO:absl:collect_time = 0.08498024940490723, train_time = 0.1049096584320068

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000033500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000033500/assets
INFO:absl:step = 33550, loss = 2.110599
INFO:absl:247.182 steps/sec
INFO:absl:collect_time = 0.09521794319152832, train_time = 0.10706186294555664
INFO:absl:step = 33600, loss = 1.145651
INFO:absl:267.334 steps/sec
INFO:absl:collect_time = 0.0854949951171875, train_time = 0.10153698921203613
INFO:absl:step = 33650, loss = 0.392991
INFO:absl:270.298 steps/sec
INFO:absl:collect_time = 0.08736610412597656, train_time = 0.09761476516723633
INFO:absl:step = 33700, loss = 1.169528
INFO:absl:263.608 steps/sec
INFO:absl:collect_time = 0.09301972389221191, train_time = 0.09665584564208984
INFO:absl:step = 33750, loss = 1.095726
INFO:absl:240.736 steps/sec
INFO:absl:collect_time = 0.08820676803588867, train_time = 0.11948990821838379
INFO:absl:step = 33800, loss = 1.731503
INFO:absl:240.693 steps/sec
INFO:absl:collect_time = 0.0881502628326416, train_time = 0.1195828914642334

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000034000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000034000/assets
INFO:absl:step = 34050, loss = 0.396428
INFO:absl:286.894 steps/sec
INFO:absl:collect_time = 0.07799839973449707, train_time = 0.09628176689147949
INFO:absl:step = 34100, loss = 0.199051
INFO:absl:264.624 steps/sec
INFO:absl:collect_time = 0.07528114318847656, train_time = 0.11366605758666992
INFO:absl:step = 34150, loss = 1.924595
INFO:absl:235.741 steps/sec
INFO:absl:collect_time = 0.0992729663848877, train_time = 0.1128244400024414
INFO:absl:step = 34200, loss = 4.167245
INFO:absl:206.468 steps/sec
INFO:absl:collect_time = 0.09980583190917969, train_time = 0.14236211776733398
INFO:absl:step = 34250, loss = 2.801863
INFO:absl:232.020 steps/sec
INFO:absl:collect_time = 0.10662221908569336, train_time = 0.10887670516967773
INFO:absl:step = 34300, loss = 0.731667
INFO:absl:235.782 steps/sec
INFO:absl:collect_time = 0.10233330726623535, train_time = 0.1097266674041748

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000034500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000034500/assets
INFO:absl:step = 34550, loss = 1.171617
INFO:absl:221.814 steps/sec
INFO:absl:collect_time = 0.0966801643371582, train_time = 0.12873363494873047
INFO:absl:step = 34600, loss = 2.108478
INFO:absl:244.299 steps/sec
INFO:absl:collect_time = 0.1005702018737793, train_time = 0.10409688949584961
INFO:absl:step = 34650, loss = 0.966168
INFO:absl:290.604 steps/sec
INFO:absl:collect_time = 0.07935500144958496, train_time = 0.09270071983337402
INFO:absl:step = 34700, loss = 3.244466
INFO:absl:252.825 steps/sec
INFO:absl:collect_time = 0.09168386459350586, train_time = 0.10608124732971191
INFO:absl:step = 34750, loss = 0.495699
INFO:absl:225.362 steps/sec
INFO:absl:collect_time = 0.10927677154541016, train_time = 0.11258888244628906
INFO:absl:step = 34800, loss = 0.346429
INFO:absl:246.652 steps/sec
INFO:absl:collect_time = 0.09988212585449219, train_time = 0.1028330326080322

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000035000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000035000/assets
INFO:absl:step = 35050, loss = 1.490644
INFO:absl:242.735 steps/sec
INFO:absl:collect_time = 0.08630013465881348, train_time = 0.11968612670898438
INFO:absl:step = 35100, loss = 0.466728
INFO:absl:240.682 steps/sec
INFO:absl:collect_time = 0.09090805053710938, train_time = 0.11683487892150879
INFO:absl:step = 35150, loss = 2.058692
INFO:absl:260.737 steps/sec
INFO:absl:collect_time = 0.08952045440673828, train_time = 0.10224390029907227
INFO:absl:step = 35200, loss = 1.181963
INFO:absl:262.889 steps/sec
INFO:absl:collect_time = 0.0864405632019043, train_time = 0.10375404357910156
INFO:absl:step = 35250, loss = 0.543122
INFO:absl:256.623 steps/sec
INFO:absl:collect_time = 0.09139299392700195, train_time = 0.10344529151916504
INFO:absl:step = 35300, loss = 0.130337
INFO:absl:282.628 steps/sec
INFO:absl:collect_time = 0.08133101463317871, train_time = 0.095579862594604

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000035500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000035500/assets
INFO:absl:step = 35550, loss = 1.368705
INFO:absl:258.773 steps/sec
INFO:absl:collect_time = 0.08989930152893066, train_time = 0.10332012176513672
INFO:absl:step = 35600, loss = 6.075073
INFO:absl:234.287 steps/sec
INFO:absl:collect_time = 0.09320330619812012, train_time = 0.12021017074584961
INFO:absl:step = 35650, loss = 7.673312
INFO:absl:213.058 steps/sec
INFO:absl:collect_time = 0.10325336456298828, train_time = 0.1314249038696289
INFO:absl:step = 35700, loss = 1.707029
INFO:absl:212.561 steps/sec
INFO:absl:collect_time = 0.1124422550201416, train_time = 0.12278461456298828
INFO:absl:step = 35750, loss = 1.259003
INFO:absl:219.275 steps/sec
INFO:absl:collect_time = 0.09997844696044922, train_time = 0.12804603576660156
INFO:absl:step = 35800, loss = 0.996759
INFO:absl:273.873 steps/sec
INFO:absl:collect_time = 0.0878748893737793, train_time = 0.09469175338745117

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000036000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000036000/assets
INFO:absl:step = 36050, loss = 1.850601
INFO:absl:251.011 steps/sec
INFO:absl:collect_time = 0.08333182334899902, train_time = 0.11586284637451172
INFO:absl:step = 36100, loss = 0.407940
INFO:absl:174.885 steps/sec
INFO:absl:collect_time = 0.13950800895690918, train_time = 0.1463940143585205
INFO:absl:step = 36150, loss = 4.397059
INFO:absl:229.775 steps/sec
INFO:absl:collect_time = 0.10614824295043945, train_time = 0.11145591735839844
INFO:absl:step = 36200, loss = 1.414541
INFO:absl:252.686 steps/sec
INFO:absl:collect_time = 0.0915679931640625, train_time = 0.10630607604980469
INFO:absl:step = 36250, loss = 16.056690
INFO:absl:231.357 steps/sec
INFO:absl:collect_time = 0.09676814079284668, train_time = 0.11934804916381836
INFO:absl:step = 36300, loss = 0.743653
INFO:absl:258.411 steps/sec
INFO:absl:collect_time = 0.08725500106811523, train_time = 0.106235027313232

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000036500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000036500/assets
INFO:absl:step = 36550, loss = 4.648633
INFO:absl:284.842 steps/sec
INFO:absl:collect_time = 0.07350683212280273, train_time = 0.10202884674072266
INFO:absl:step = 36600, loss = 2.351145
INFO:absl:266.894 steps/sec
INFO:absl:collect_time = 0.08963823318481445, train_time = 0.0977020263671875
INFO:absl:step = 36650, loss = 2.190078
INFO:absl:263.816 steps/sec
INFO:absl:collect_time = 0.09280109405517578, train_time = 0.0967249870300293
INFO:absl:step = 36700, loss = 1.442540
INFO:absl:249.475 steps/sec
INFO:absl:collect_time = 0.09654974937438965, train_time = 0.10387110710144043
INFO:absl:step = 36750, loss = 0.965823
INFO:absl:288.947 steps/sec
INFO:absl:collect_time = 0.07859992980957031, train_time = 0.09444189071655273
INFO:absl:step = 36800, loss = 2.155527
INFO:absl:251.694 steps/sec
INFO:absl:collect_time = 0.0913538932800293, train_time = 0.1073000431060791


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000037000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000037000/assets
INFO:absl:step = 37050, loss = 1.071191
INFO:absl:243.078 steps/sec
INFO:absl:collect_time = 0.08958578109741211, train_time = 0.11610960960388184
INFO:absl:step = 37100, loss = 1.173108
INFO:absl:173.064 steps/sec
INFO:absl:collect_time = 0.1251671314239502, train_time = 0.1637427806854248
INFO:absl:step = 37150, loss = 0.654155
INFO:absl:231.289 steps/sec
INFO:absl:collect_time = 0.09881782531738281, train_time = 0.11736226081848145
INFO:absl:step = 37200, loss = 0.735436
INFO:absl:224.398 steps/sec
INFO:absl:collect_time = 0.10135507583618164, train_time = 0.12146377563476562
INFO:absl:step = 37250, loss = 0.513879
INFO:absl:276.960 steps/sec
INFO:absl:collect_time = 0.07997441291809082, train_time = 0.10055685043334961
INFO:absl:step = 37300, loss = 0.782167
INFO:absl:206.111 steps/sec
INFO:absl:collect_time = 0.1162269115447998, train_time = 0.12636113166809082

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000037500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000037500/assets
INFO:absl:step = 37550, loss = 1.675645
INFO:absl:216.198 steps/sec
INFO:absl:collect_time = 0.10341572761535645, train_time = 0.1278536319732666
INFO:absl:step = 37600, loss = 1.898650
INFO:absl:236.362 steps/sec
INFO:absl:collect_time = 0.09566712379455566, train_time = 0.11587309837341309
INFO:absl:step = 37650, loss = 1.311205
INFO:absl:291.209 steps/sec
INFO:absl:collect_time = 0.08080482482910156, train_time = 0.09089303016662598
INFO:absl:step = 37700, loss = 0.832676
INFO:absl:251.806 steps/sec
INFO:absl:collect_time = 0.09399247169494629, train_time = 0.10457301139831543
INFO:absl:step = 37750, loss = 1.387591
INFO:absl:227.194 steps/sec
INFO:absl:collect_time = 0.09772014617919922, train_time = 0.12235593795776367
INFO:absl:step = 37800, loss = 2.446346
INFO:absl:272.191 steps/sec
INFO:absl:collect_time = 0.08569884300231934, train_time = 0.097995996475219

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000038000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000038000/assets
INFO:absl:step = 38050, loss = 0.933312
INFO:absl:278.232 steps/sec
INFO:absl:collect_time = 0.07887959480285645, train_time = 0.10082626342773438
INFO:absl:step = 38100, loss = 0.847768
INFO:absl:239.412 steps/sec
INFO:absl:collect_time = 0.10166501998901367, train_time = 0.10717964172363281
INFO:absl:step = 38150, loss = 1.064269
INFO:absl:250.083 steps/sec
INFO:absl:collect_time = 0.09068894386291504, train_time = 0.10924434661865234
INFO:absl:step = 38200, loss = 2.797059
INFO:absl:217.960 steps/sec
INFO:absl:collect_time = 0.09288573265075684, train_time = 0.13651418685913086
INFO:absl:step = 38250, loss = 1.040124
INFO:absl:294.341 steps/sec
INFO:absl:collect_time = 0.0772249698638916, train_time = 0.09264612197875977
INFO:absl:step = 38300, loss = 5.056213
INFO:absl:245.249 steps/sec
INFO:absl:collect_time = 0.09043455123901367, train_time = 0.113439798355102

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000038500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000038500/assets
INFO:absl:step = 38550, loss = 1.430433
INFO:absl:283.586 steps/sec
INFO:absl:collect_time = 0.07352805137634277, train_time = 0.10278511047363281
INFO:absl:step = 38600, loss = 2.853502
INFO:absl:240.032 steps/sec
INFO:absl:collect_time = 0.08947372436523438, train_time = 0.11883163452148438
INFO:absl:step = 38650, loss = 0.384708
INFO:absl:161.051 steps/sec
INFO:absl:collect_time = 0.13736510276794434, train_time = 0.1730959415435791
INFO:absl:step = 38700, loss = 10.464804
INFO:absl:127.145 steps/sec
INFO:absl:collect_time = 0.2258462905883789, train_time = 0.1674048900604248
INFO:absl:step = 38750, loss = 0.934846
INFO:absl:244.920 steps/sec
INFO:absl:collect_time = 0.10315322875976562, train_time = 0.10099530220031738
INFO:absl:step = 38800, loss = 0.691305
INFO:absl:227.056 steps/sec
INFO:absl:collect_time = 0.09345793724060059, train_time = 0.1267518997192382

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000039000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000039000/assets
INFO:absl:step = 39050, loss = 1.004665
INFO:absl:238.489 steps/sec
INFO:absl:collect_time = 0.10400724411010742, train_time = 0.10564589500427246
INFO:absl:step = 39100, loss = 4.797210
INFO:absl:265.040 steps/sec
INFO:absl:collect_time = 0.08745479583740234, train_time = 0.1011960506439209
INFO:absl:step = 39150, loss = 2.797630
INFO:absl:271.602 steps/sec
INFO:absl:collect_time = 0.08649873733520508, train_time = 0.09759402275085449
INFO:absl:step = 39200, loss = 1.394187
INFO:absl:261.184 steps/sec
INFO:absl:collect_time = 0.0935208797454834, train_time = 0.0979151725769043
INFO:absl:step = 39250, loss = 1.452845
INFO:absl:245.471 steps/sec
INFO:absl:collect_time = 0.10929083824157715, train_time = 0.09439897537231445
INFO:absl:step = 39300, loss = 6.774278
INFO:absl:276.290 steps/sec
INFO:absl:collect_time = 0.08175015449523926, train_time = 0.09921884536743164

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000039500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000039500/assets
INFO:absl:step = 39550, loss = 1.095227
INFO:absl:239.970 steps/sec
INFO:absl:collect_time = 0.09378194808959961, train_time = 0.1145777702331543
INFO:absl:step = 39600, loss = 3.105994
INFO:absl:255.002 steps/sec
INFO:absl:collect_time = 0.09366917610168457, train_time = 0.10240793228149414
INFO:absl:step = 39650, loss = 0.778284
INFO:absl:266.780 steps/sec
INFO:absl:collect_time = 0.09293794631958008, train_time = 0.094482421875
INFO:absl:step = 39700, loss = 0.678014
INFO:absl:277.750 steps/sec
INFO:absl:collect_time = 0.09238100051879883, train_time = 0.08763694763183594
INFO:absl:step = 39750, loss = 1.073202
INFO:absl:230.785 steps/sec
INFO:absl:collect_time = 0.10317516326904297, train_time = 0.11347699165344238
INFO:absl:step = 39800, loss = 0.313932
INFO:absl:330.223 steps/sec
INFO:absl:collect_time = 0.06897091865539551, train_time = 0.08244204521179199
IN

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000040000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000040000/assets
INFO:absl:step = 40050, loss = 0.880053
INFO:absl:245.394 steps/sec
INFO:absl:collect_time = 0.09202384948730469, train_time = 0.11173009872436523
INFO:absl:step = 40100, loss = 0.336147
INFO:absl:286.262 steps/sec
INFO:absl:collect_time = 0.0765533447265625, train_time = 0.09811186790466309
INFO:absl:step = 40150, loss = 2.165032
INFO:absl:214.952 steps/sec
INFO:absl:collect_time = 0.10695576667785645, train_time = 0.12565398216247559
INFO:absl:step = 40200, loss = 4.183830
INFO:absl:265.417 steps/sec
INFO:absl:collect_time = 0.08863997459411621, train_time = 0.09974288940429688
INFO:absl:step = 40250, loss = 1.691617
INFO:absl:224.220 steps/sec
INFO:absl:collect_time = 0.1043088436126709, train_time = 0.11868596076965332
INFO:absl:step = 40300, loss = 1.368943
INFO:absl:214.041 steps/sec
INFO:absl:collect_time = 0.08572983741760254, train_time = 0.1478700637817382

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000040500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000040500/assets
INFO:absl:step = 40550, loss = 1.954580
INFO:absl:239.920 steps/sec
INFO:absl:collect_time = 0.09152436256408691, train_time = 0.11687874794006348
INFO:absl:step = 40600, loss = 4.067834
INFO:absl:254.165 steps/sec
INFO:absl:collect_time = 0.09283828735351562, train_time = 0.10388398170471191
INFO:absl:step = 40650, loss = 11.431011
INFO:absl:235.388 steps/sec
INFO:absl:collect_time = 0.10564327239990234, train_time = 0.10677242279052734
INFO:absl:step = 40700, loss = 0.621280
INFO:absl:243.189 steps/sec
INFO:absl:collect_time = 0.0987858772277832, train_time = 0.10681533813476562
INFO:absl:step = 40750, loss = 2.799237
INFO:absl:250.028 steps/sec
INFO:absl:collect_time = 0.08781909942626953, train_time = 0.11215877532958984
INFO:absl:step = 40800, loss = 1.605796
INFO:absl:285.683 steps/sec
INFO:absl:collect_time = 0.07778167724609375, train_time = 0.09723734855651

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000041000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000041000/assets
INFO:absl:step = 41050, loss = 3.579572
INFO:absl:257.538 steps/sec
INFO:absl:collect_time = 0.08667302131652832, train_time = 0.10747313499450684
INFO:absl:step = 41100, loss = 1.309279
INFO:absl:256.198 steps/sec
INFO:absl:collect_time = 0.09236621856689453, train_time = 0.10279512405395508
INFO:absl:step = 41150, loss = 3.685194
INFO:absl:226.725 steps/sec
INFO:absl:collect_time = 0.10482597351074219, train_time = 0.11570596694946289
INFO:absl:step = 41200, loss = 0.685019
INFO:absl:223.036 steps/sec
INFO:absl:collect_time = 0.11249208450317383, train_time = 0.11168694496154785
INFO:absl:step = 41250, loss = 0.543510
INFO:absl:228.626 steps/sec
INFO:absl:collect_time = 0.10290884971618652, train_time = 0.11578917503356934
INFO:absl:step = 41300, loss = 2.433960
INFO:absl:248.452 steps/sec
INFO:absl:collect_time = 0.09869027137756348, train_time = 0.10255599021911

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000041500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000041500/assets
INFO:absl:step = 41550, loss = 0.530079
INFO:absl:240.818 steps/sec
INFO:absl:collect_time = 0.10121273994445801, train_time = 0.10641336441040039
INFO:absl:step = 41600, loss = 0.762750
INFO:absl:285.936 steps/sec
INFO:absl:collect_time = 0.077545166015625, train_time = 0.09731888771057129
INFO:absl:step = 41650, loss = 1.616031
INFO:absl:249.992 steps/sec
INFO:absl:collect_time = 0.09372782707214355, train_time = 0.10627865791320801
INFO:absl:step = 41700, loss = 0.627024
INFO:absl:235.955 steps/sec
INFO:absl:collect_time = 0.10397791862487793, train_time = 0.10792708396911621
INFO:absl:step = 41750, loss = 0.745157
INFO:absl:198.608 steps/sec
INFO:absl:collect_time = 0.13353180885314941, train_time = 0.11822080612182617
INFO:absl:step = 41800, loss = 2.072354
INFO:absl:213.636 steps/sec
INFO:absl:collect_time = 0.11831927299499512, train_time = 0.1157238483428955

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000042000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000042000/assets
INFO:absl:step = 42050, loss = 1.835827
INFO:absl:175.419 steps/sec
INFO:absl:collect_time = 0.11735105514526367, train_time = 0.16768097877502441
INFO:absl:step = 42100, loss = 1.360408
INFO:absl:262.288 steps/sec
INFO:absl:collect_time = 0.07732415199279785, train_time = 0.11330604553222656
INFO:absl:step = 42150, loss = 0.555300
INFO:absl:225.274 steps/sec
INFO:absl:collect_time = 0.11328291893005371, train_time = 0.10866904258728027
INFO:absl:step = 42200, loss = 0.906540
INFO:absl:244.035 steps/sec
INFO:absl:collect_time = 0.09851884841918945, train_time = 0.1063694953918457
INFO:absl:step = 42250, loss = 2.048461
INFO:absl:172.924 steps/sec
INFO:absl:collect_time = 0.12950682640075684, train_time = 0.1596376895904541
INFO:absl:step = 42300, loss = 1.710151
INFO:absl:238.213 steps/sec
INFO:absl:collect_time = 0.08544325828552246, train_time = 0.1244530677795410

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000042500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000042500/assets
INFO:absl:step = 42550, loss = 0.571647
INFO:absl:222.468 steps/sec
INFO:absl:collect_time = 0.10216689109802246, train_time = 0.12258481979370117
INFO:absl:step = 42600, loss = 1.472187
INFO:absl:216.412 steps/sec
INFO:absl:collect_time = 0.1147451400756836, train_time = 0.11629533767700195
INFO:absl:step = 42650, loss = 2.424568
INFO:absl:273.562 steps/sec
INFO:absl:collect_time = 0.08893680572509766, train_time = 0.09383726119995117
INFO:absl:step = 42700, loss = 0.608042
INFO:absl:174.086 steps/sec
INFO:absl:collect_time = 0.13883495330810547, train_time = 0.14837908744812012
INFO:absl:step = 42750, loss = 0.884667
INFO:absl:235.498 steps/sec
INFO:absl:collect_time = 0.1034700870513916, train_time = 0.10884618759155273
INFO:absl:step = 42800, loss = 1.375546
INFO:absl:257.394 steps/sec
INFO:absl:collect_time = 0.09474515914916992, train_time = 0.0995094776153564

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000043000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000043000/assets
INFO:absl:step = 43050, loss = 0.700653
INFO:absl:239.806 steps/sec
INFO:absl:collect_time = 0.10036492347717285, train_time = 0.10813713073730469
INFO:absl:step = 43100, loss = 3.010526
INFO:absl:240.381 steps/sec
INFO:absl:collect_time = 0.09621858596801758, train_time = 0.1117849349975586
INFO:absl:step = 43150, loss = 2.117424
INFO:absl:248.150 steps/sec
INFO:absl:collect_time = 0.09457898139953613, train_time = 0.10691213607788086
INFO:absl:step = 43200, loss = 0.244867
INFO:absl:267.382 steps/sec
INFO:absl:collect_time = 0.09091019630432129, train_time = 0.09608793258666992
INFO:absl:step = 43250, loss = 1.153368
INFO:absl:161.525 steps/sec
INFO:absl:collect_time = 0.10365939140319824, train_time = 0.20589089393615723
INFO:absl:step = 43300, loss = 0.845266
INFO:absl:187.129 steps/sec
INFO:absl:collect_time = 0.13612985610961914, train_time = 0.131065130233764

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000043500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000043500/assets
INFO:absl:step = 43550, loss = 1.061434
INFO:absl:210.129 steps/sec
INFO:absl:collect_time = 0.13091111183166504, train_time = 0.10703802108764648
INFO:absl:step = 43600, loss = 1.062253
INFO:absl:244.808 steps/sec
INFO:absl:collect_time = 0.09111142158508301, train_time = 0.11313056945800781
INFO:absl:step = 43650, loss = 0.767741
INFO:absl:255.438 steps/sec
INFO:absl:collect_time = 0.08447599411010742, train_time = 0.11126589775085449
INFO:absl:step = 43700, loss = 1.802525
INFO:absl:182.641 steps/sec
INFO:absl:collect_time = 0.13793492317199707, train_time = 0.13582611083984375
INFO:absl:step = 43750, loss = 0.968453
INFO:absl:254.867 steps/sec
INFO:absl:collect_time = 0.08516693115234375, train_time = 0.11101412773132324
INFO:absl:step = 43800, loss = 0.555552
INFO:absl:169.448 steps/sec
INFO:absl:collect_time = 0.13654422760009766, train_time = 0.15853118896484

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000044000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000044000/assets
INFO:absl:step = 44050, loss = 3.197127
INFO:absl:218.587 steps/sec
INFO:absl:collect_time = 0.10610127449035645, train_time = 0.12264108657836914
INFO:absl:step = 44100, loss = 20.193684
INFO:absl:192.651 steps/sec
INFO:absl:collect_time = 0.130202054977417, train_time = 0.1293351650238037
INFO:absl:step = 44150, loss = 2.693974
INFO:absl:214.470 steps/sec
INFO:absl:collect_time = 0.10340404510498047, train_time = 0.1297292709350586
INFO:absl:step = 44200, loss = 1.913730
INFO:absl:262.099 steps/sec
INFO:absl:collect_time = 0.08699512481689453, train_time = 0.10377216339111328
INFO:absl:step = 44250, loss = 0.936060
INFO:absl:230.454 steps/sec
INFO:absl:collect_time = 0.09778618812561035, train_time = 0.11917686462402344
INFO:absl:step = 44300, loss = 0.498136
INFO:absl:201.155 steps/sec
INFO:absl:collect_time = 0.12550711631774902, train_time = 0.12305784225463867

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000044500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000044500/assets
INFO:absl:step = 44550, loss = 0.607143
INFO:absl:259.189 steps/sec
INFO:absl:collect_time = 0.08687710762023926, train_time = 0.1060326099395752
INFO:absl:step = 44600, loss = 0.797925
INFO:absl:263.007 steps/sec
INFO:absl:collect_time = 0.08870577812194824, train_time = 0.10140323638916016
INFO:absl:step = 44650, loss = 1.971471
INFO:absl:247.724 steps/sec
INFO:absl:collect_time = 0.10388350486755371, train_time = 0.09795403480529785
INFO:absl:step = 44700, loss = 1.786437
INFO:absl:251.284 steps/sec
INFO:absl:collect_time = 0.08588480949401855, train_time = 0.11309361457824707
INFO:absl:step = 44750, loss = 4.331830
INFO:absl:227.698 steps/sec
INFO:absl:collect_time = 0.10663414001464844, train_time = 0.11295485496520996
INFO:absl:step = 44800, loss = 0.330798
INFO:absl:280.870 steps/sec
INFO:absl:collect_time = 0.08270716667175293, train_time = 0.095311403274536

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000045000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000045000/assets
INFO:absl:step = 45050, loss = 1.389466
INFO:absl:261.270 steps/sec
INFO:absl:collect_time = 0.08147192001342773, train_time = 0.10990118980407715
INFO:absl:step = 45100, loss = 3.292096
INFO:absl:257.247 steps/sec
INFO:absl:collect_time = 0.0961906909942627, train_time = 0.0981748104095459
INFO:absl:step = 45150, loss = 2.558369
INFO:absl:247.973 steps/sec
INFO:absl:collect_time = 0.09328675270080566, train_time = 0.10834789276123047
INFO:absl:step = 45200, loss = 0.748684
INFO:absl:259.615 steps/sec
INFO:absl:collect_time = 0.09045600891113281, train_time = 0.10213708877563477
INFO:absl:step = 45250, loss = 0.988218
INFO:absl:291.949 steps/sec
INFO:absl:collect_time = 0.07491087913513184, train_time = 0.09635186195373535
INFO:absl:step = 45300, loss = 1.406425
INFO:absl:286.020 steps/sec
INFO:absl:collect_time = 0.08020305633544922, train_time = 0.0946099758148193

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000045500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000045500/assets
INFO:absl:step = 45550, loss = 1.444727
INFO:absl:255.948 steps/sec
INFO:absl:collect_time = 0.0850670337677002, train_time = 0.11028504371643066
INFO:absl:step = 45600, loss = 0.700916
INFO:absl:264.064 steps/sec
INFO:absl:collect_time = 0.08444523811340332, train_time = 0.10490298271179199
INFO:absl:step = 45650, loss = 1.423931
INFO:absl:278.470 steps/sec
INFO:absl:collect_time = 0.0780029296875, train_time = 0.10154938697814941
INFO:absl:step = 45700, loss = 7.915609
INFO:absl:219.789 steps/sec
INFO:absl:collect_time = 0.11099815368652344, train_time = 0.11649298667907715
INFO:absl:step = 45750, loss = 0.786308
INFO:absl:235.306 steps/sec
INFO:absl:collect_time = 0.09957218170166016, train_time = 0.11291694641113281
INFO:absl:step = 45800, loss = 0.897320
INFO:absl:269.892 steps/sec
INFO:absl:collect_time = 0.07762980461120605, train_time = 0.10762977600097656
I

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000046000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000046000/assets
INFO:absl:step = 46050, loss = 0.525152
INFO:absl:276.270 steps/sec
INFO:absl:collect_time = 0.07808423042297363, train_time = 0.10289812088012695
INFO:absl:step = 46100, loss = 4.254232
INFO:absl:250.952 steps/sec
INFO:absl:collect_time = 0.08958816528320312, train_time = 0.10965347290039062
INFO:absl:step = 46150, loss = 1.282307
INFO:absl:269.443 steps/sec
INFO:absl:collect_time = 0.0860600471496582, train_time = 0.09950804710388184
INFO:absl:step = 46200, loss = 1.508260
INFO:absl:249.901 steps/sec
INFO:absl:collect_time = 0.0966951847076416, train_time = 0.10338401794433594
INFO:absl:step = 46250, loss = 4.496926
INFO:absl:224.989 steps/sec
INFO:absl:collect_time = 0.10585427284240723, train_time = 0.1163790225982666
INFO:absl:step = 46300, loss = 5.545374
INFO:absl:272.816 steps/sec
INFO:absl:collect_time = 0.07936358451843262, train_time = 0.10390996932983398

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000046500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000046500/assets
INFO:absl:step = 46550, loss = 3.163092
INFO:absl:258.024 steps/sec
INFO:absl:collect_time = 0.08653688430786133, train_time = 0.10724329948425293
INFO:absl:step = 46600, loss = 0.705267
INFO:absl:268.251 steps/sec
INFO:absl:collect_time = 0.08765316009521484, train_time = 0.0987393856048584
INFO:absl:step = 46650, loss = 1.108124
INFO:absl:246.474 steps/sec
INFO:absl:collect_time = 0.09312605857849121, train_time = 0.10973501205444336
INFO:absl:step = 46700, loss = 1.101244
INFO:absl:244.839 steps/sec
INFO:absl:collect_time = 0.09989619255065918, train_time = 0.10432004928588867
INFO:absl:step = 46750, loss = 1.060301
INFO:absl:259.309 steps/sec
INFO:absl:collect_time = 0.0917959213256836, train_time = 0.10102415084838867
INFO:absl:step = 46800, loss = 4.080630
INFO:absl:238.713 steps/sec
INFO:absl:collect_time = 0.1002199649810791, train_time = 0.10923624038696289

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000047000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000047000/assets
INFO:absl:step = 47050, loss = 1.776473
INFO:absl:260.162 steps/sec
INFO:absl:collect_time = 0.08893322944641113, train_time = 0.10325503349304199
INFO:absl:step = 47100, loss = 0.792905
INFO:absl:217.376 steps/sec
INFO:absl:collect_time = 0.10541296005249023, train_time = 0.1246027946472168
INFO:absl:step = 47150, loss = 2.242041
INFO:absl:257.303 steps/sec
INFO:absl:collect_time = 0.09684491157531738, train_time = 0.09747886657714844
INFO:absl:step = 47200, loss = 4.923855
INFO:absl:249.709 steps/sec
INFO:absl:collect_time = 0.0924062728881836, train_time = 0.10782694816589355
INFO:absl:step = 47250, loss = 2.357864
INFO:absl:244.009 steps/sec
INFO:absl:collect_time = 0.10050415992736816, train_time = 0.10440611839294434
INFO:absl:step = 47300, loss = 1.834432
INFO:absl:245.375 steps/sec
INFO:absl:collect_time = 0.09780383110046387, train_time = 0.1059660911560058

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000047500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000047500/assets
INFO:absl:step = 47550, loss = 0.967490
INFO:absl:262.084 steps/sec
INFO:absl:collect_time = 0.08632898330688477, train_time = 0.10444974899291992
INFO:absl:step = 47600, loss = 6.600316
INFO:absl:215.356 steps/sec
INFO:absl:collect_time = 0.12694001197814941, train_time = 0.10523390769958496
INFO:absl:step = 47650, loss = 1.209136
INFO:absl:226.798 steps/sec
INFO:absl:collect_time = 0.10865330696105957, train_time = 0.11180710792541504
INFO:absl:step = 47700, loss = 4.462541
INFO:absl:249.642 steps/sec
INFO:absl:collect_time = 0.09984087944030762, train_time = 0.10044622421264648
INFO:absl:step = 47750, loss = 1.096350
INFO:absl:268.572 steps/sec
INFO:absl:collect_time = 0.08692193031311035, train_time = 0.09924769401550293
INFO:absl:step = 47800, loss = 1.037406
INFO:absl:207.639 steps/sec
INFO:absl:collect_time = 0.11765408515930176, train_time = 0.12314891815185

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000048000/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000048000/assets
INFO:absl:step = 48050, loss = 1.281160
INFO:absl:257.386 steps/sec
INFO:absl:collect_time = 0.09092068672180176, train_time = 0.10334014892578125
INFO:absl:step = 48100, loss = 0.592749
INFO:absl:256.048 steps/sec
INFO:absl:collect_time = 0.08860135078430176, train_time = 0.1066746711730957
INFO:absl:step = 48150, loss = 0.762767
INFO:absl:243.615 steps/sec
INFO:absl:collect_time = 0.09011578559875488, train_time = 0.11512613296508789
INFO:absl:step = 48200, loss = 6.125124
INFO:absl:242.147 steps/sec
INFO:absl:collect_time = 0.10020756721496582, train_time = 0.1062781810760498
INFO:absl:step = 48250, loss = 1.878250
INFO:absl:255.054 steps/sec
INFO:absl:collect_time = 0.09403872489929199, train_time = 0.10199785232543945
INFO:absl:step = 48300, loss = 1.391526
INFO:absl:265.453 steps/sec
INFO:absl:collect_time = 0.0825951099395752, train_time = 0.10576200485229492

INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000048500/assets


INFO:tensorflow:Assets written to: /Users/richard/Documents/Research/FLRL/dump/policy_saved_model/policy_000048500/assets
INFO:absl:step = 48550, loss = 1.129066
INFO:absl:240.681 steps/sec
INFO:absl:collect_time = 0.10470914840698242, train_time = 0.10303497314453125
INFO:absl:step = 48600, loss = 14.142413
INFO:absl:241.037 steps/sec
INFO:absl:collect_time = 0.09703803062438965, train_time = 0.11039900779724121
INFO:absl:step = 48650, loss = 1.087065
INFO:absl:239.407 steps/sec
INFO:absl:collect_time = 0.09114694595336914, train_time = 0.11770200729370117
INFO:absl:step = 48700, loss = 3.698543
INFO:absl:221.039 steps/sec
INFO:absl:collect_time = 0.11099624633789062, train_time = 0.11520791053771973
INFO:absl:step = 48750, loss = 2.650952
INFO:absl:232.311 steps/sec
INFO:absl:collect_time = 0.10853195190429688, train_time = 0.10669708251953125
INFO:absl:step = 48800, loss = 0.968679
INFO:absl:273.325 steps/sec
INFO:absl:collect_time = 0.08434486389160156, train_time = 0.0985872745513

EXITED


In [39]:
be = BoostedEnvironment()
validate_py_environment(be, episodes=1)
tf_env = tf_py_environment.TFPyEnvironment(be)

# see how agent performs 
time_step = tf_env.reset()
for _ in range(50):
    tf_env.reset()
    episode_reward = 0
    episode_steps = 0
    while not tf_env.current_time_step().is_last():
        action = agent.policy.action(tf_env.current_time_step()).action
        print('predicted action', action)
        next_time_step = tf_env.step(action)
        episode_steps += 1
        print("Reward", next_time_step.reward.numpy())
        episode_reward += next_time_step.reward.numpy()
    print("Steps:", episode_steps, "Reward:", episode_reward)

predicted action tf.Tensor([[[0.32016593 0.52141124]]], shape=(1, 1, 2), dtype=float32)
Reward [0.00235294]
predicted action tf.Tensor([[[0.35313064 0.5240068 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.00235294]
predicted action tf.Tensor([[[0.47748858 0.4257298 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.02941176]
predicted action tf.Tensor([[[0.54622453 0.4379269 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.01882353]
predicted action tf.Tensor([[[0.47748858 0.4257298 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 3 Reward: [0.0482353]
predicted action tf.Tensor([[[0.5850633 0.5429086]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 1 Reward: [0.]
predicted action tf.Tensor([[[0.39333037 0.4595622 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.00235294]
predicted action tf.Tensor([[[0.44661033 0.4924605 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.00235294]
predicted action tf.Tensor([[[0.45497605 0.45990297]]], sha

predicted action tf.Tensor([[[0.45336407 0.47315836]]], shape=(1, 1, 2), dtype=float32)
Reward [0.02705882]
predicted action tf.Tensor([[[0.48258638 0.47715747]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.02705882]
predicted action tf.Tensor([[[0.608012   0.54048145]]], shape=(1, 1, 2), dtype=float32)
Reward [0.01294118]
predicted action tf.Tensor([[[0.57309616 0.52038664]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.01294118]
predicted action tf.Tensor([[[0.3862327  0.44728348]]], shape=(1, 1, 2), dtype=float32)
Reward [0.00823529]
predicted action tf.Tensor([[[0.43184984 0.4117758 ]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.00823529]
predicted action tf.Tensor([[[0.4680928  0.46774697]]], shape=(1, 1, 2), dtype=float32)
Reward [0.02235294]
predicted action tf.Tensor([[[0.4990568  0.49323973]]], shape=(1, 1, 2), dtype=float32)
Reward [0.]
Steps: 2 Reward: [0.02235294]
