In [1]:
# # in google colab uncomment this

# import os

# os.system('apt-get install -y xvfb')
# os.system('wget https://raw.githubusercontent.com/yandexdataschool/Practical_DL/fall18/xvfb -O ../xvfb')
# os.system('apt-get install -y python-opengl ffmpeg')
# os.system('pip install pyglet==1.2.4')
# os.system('pip install gym')

# prefix = 'https://raw.githubusercontent.com/yandexdataschool/Practical_RL/master/week09_policy_II/'

# os.system('wget ' + prefix + 'runners.py')
# os.system('wget ' + prefix + 'mujoco_wrappers.py')

# print('setup complete')

# XVFB will be launched if you run on a server
import os
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY")) == 0:
    !bash ../xvfb start
    %env DISPLAY = : 1

# Implementing Proximal Policy Optimization 


In this notebook you will be implementing Proximal Policy Optimization algorithm, 
scaled up version of which was used to train [OpenAI Five](https://openai.com/blog/openai-five/) 
to [win](https://openai.com/blog/how-to-train-your-openai-five/) against the
world champions in Dota 2.
You will be solving a continuous control environment on which it may be easier and faster 
to train an agent, however note that PPO here may not be the best algorithm as, for example,
Deep Deterministic Policy Gradient and Soft Actor Critic may be more suited 
for continuous control environments. ~~To run the environment you will need to install 
[pybullet-gym](https://github.com/benelot/pybullet-gym) which unlike MuJoCo 
does not require you to have a license.~~

To install the library:

#### Installed pybullet  instead of pybulletgym
https://github.com/bulletphysics/bullet3
 

The overall structure of the code is similar to the one in the A2C optional homework, but don't worry if you haven't done it, it should be relatively easy to figure it out. 
First, we will create an instance of the environment. 
We will normalize the observations and rewards, but before that you will need a wrapper that will 
write summaries, mainly, the total reward during an episode. You can either use one for `TensorFlow` 
implemented in `atari_wrappers.py` file from the optional A2C homework, or implement your own. 

In [29]:
import gym 
import pybullet_envs


env = gym.make("HalfCheetahBulletEnv-v0")
print("observation space: ", env.observation_space,
      "\nobservations:", env.reset())
print("action space: ", env.action_space, 
      "\naction_sample: ", env.action_space.sample())

observation space:  Box(26,) 
observations: [ 0.          0.          1.          0.          0.          0.
  0.         -0.         -0.38433948  0.         -0.1040039   0.
 -0.17367972  0.          0.33260956  0.          0.11239259  0.
  1.2187908   0.          0.          0.          0.          0.
  0.          0.        ]
action space:  Box(6,) 
action_sample:  [0.57659733 0.26563177 0.5228279  0.74690473 0.75159585 0.23399161]


In [30]:
import torch
import numpy as np
from collections import deque
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

def torch_steps():
    #https://discuss.pytorch.org/t/current-step-from-optimizer/19370
    try:
        stepval = optimizer.state[optimizer.param_groups[0]["params"][-1]]
    except NameError:
        stepval=0
    return stepval

class Summaries(gym.Wrapper):
    """ Writes env summaries."""
    def __init__(self, env, prefix=None, running_mean_size=100, step_var=None):
        super(Summaries, self).__init__(env)
        self.episode_counter = 0
        self.prefix = prefix or self.env.spec.id
        self.step_count =0
        self.step_var = (step_var if step_var is not None
                         else torch_steps())

        nenvs = getattr(self.env.unwrapped, "nenvs", 1)
        self.rewards = np.zeros(nenvs)
        self.had_ended_episodes = np.zeros(nenvs, dtype=np.bool)
        self.episode_lengths = np.zeros(nenvs)
        self.reward_queues = [deque([], maxlen=running_mean_size)
                              for _ in range(nenvs)]
        self.rewards_step =0

    def should_write_summaries(self):
        """ Returns true if it's time to write summaries. """
        return np.all(self.had_ended_episodes)

    def add_summaries(self):
        """ Writes summaries. """
        print(f"Step={self.step_count}, rewards={self.rewards_step}")
        
        if 'step' not in torch_steps().keys():
            self.step_var =0
        else:
            self.step_var = torch_steps()["step"]
            
        writer.add_scalar(
            f"{self.prefix}/total_reward",
            np.mean([q[-1] for q in self.reward_queues]),
            global_step=self.step_var)
        writer.add_scalar(
            f"{self.prefix}/reward_mean_{self.reward_queues[0].maxlen}",
            np.mean([np.mean(q) for q in self.reward_queues]),
            global_step=self.step_var)
        '''
        writer.add_scalar(
            f"{self.prefix}/rewards_update",
            self.rewards_step,
            global_step=self.step_var)
        
        writer.add_scalar(
            f"{self.prefix}/episode_length",
            np.mean(self.episode_lengths),
            global_step=self.step_var)
        '''
        if self.had_ended_episodes.size > 1:
            '''
            writer.add_scalar(
                f"{self.prefix}/min_reward",
                min(q[-1] for q in self.reward_queues),
                global_step=self.step_var)
            writer.add_scalar(
                f"{self.prefix}/max_reward",
                max(q[-1] for q in self.reward_queues),
                global_step=self.step_var)
            '''
        self.episode_lengths.fill(0)
        self.had_ended_episodes.fill(False)

    def step(self, action):
        
        self.step_count +=1
        clipped_action = np.clip(action.tolist(), self.env.action_space.low, self.env.action_space.high)
        obs, rew, done, info = self.env.step(clipped_action)
        self.rewards += rew
        self.rewards_step += rew
        self.episode_lengths[~self.had_ended_episodes] += 1

        info_collection = [info] if isinstance(info, dict) else info
        done_collection = [done] if isinstance(done, bool) else done
        done_indices = [i for i, info in enumerate(info_collection)
                        if info.get("real_done", done_collection[i])]
        
        for i in done_indices:
            if not self.had_ended_episodes[i]:
                self.had_ended_episodes[i] = True
            self.reward_queues[i].append(self.rewards[i])
            self.rewards[i] = 0

        if self.should_write_summaries():
            self.add_summaries()
        return obs, rew, done, info

    def reset(self, **kwargs):
        self.rewards_step =0
        self.rewards.fill(0)
        self.episode_lengths.fill(0)
        self.had_ended_episodes.fill(False)
        return self.env.reset(**kwargs)


The normalization wrapper will subtract running mean from observations and rewards and divide 
the resulting quantities by the  running variances.

In [31]:
from mujoco_wrappers import Normalize

env = Normalize(Summaries(gym.make("HalfCheetahBulletEnv-v0")));
env.unwrapped.seed(0);

Next, you will need to define a model for training. We suggest that you use two separate networks: one for policy
and another for value function. Each network should be a 3-layer MLP with 64 hidden units, $\mathrm{tanh}$ 
activation function, kernel matrices initialized with orthogonal initializer with parameter $\sqrt{2}$
and biases initialized with zeros. 

Our policy distribution is going to be multivariate normal with diagonal covariance. 
The network from above will predict the mean, and the covariance should be represented by a single 
(learned) vector of size 6 (corresponding to the dimensionality of the action space from above). 
You should initialize this vector to zero and take the exponent of it to always
have a non-negative quantity. 

Overall the model should return three things: predicted mean of the distribution, variance vector, 
value function. 

In [32]:
# import tensorflow as tf
# import torch
import torch.nn as nn

hidden_units =64
input_shape = np.array(env.observation_space.shape).prod()
n_actions = np.prod(env.action_space.shape)


def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    # https://github.com/vwxyzjn/cleanrl/blob/418bfc01fe69712c5b617d49d810a1df7f4f0c14/cleanrl/ppo_continuous_action.py#L221
    nn.init.orthogonal_(layer.weight, std)
    nn.init.constant_(layer.bias, bias_const)
    return layer

class Model(nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super(Model,self).__init__()
        self.fc1 = layer_init(nn.Linear(input_shape,64))
        self.fc2 = layer_init(nn.Linear(64,64))
        self.fc_Policy = layer_init(nn.Linear(64,n_actions))
        self.fc_Value = layer_init(nn.Linear(64,1))
        self.covariance = nn.Parameter(torch.zeros(1,n_actions))
        
    def Policy_network(self,x):
        '''
        The network predicts the mean and covariance(log of standard deviation)
        '''
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        mean = self.fc_Policy(x)

        logstd = self.covariance.expand_as(mean)
        return mean,logstd
    
    def Value_network(self,x):
        '''
        The network predicts the Value Function
        '''
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc_Value(x)
        return x


This model will be wrapped by a `Policy`. The policy can work in two modes, but in either case 
it is going to return dictionary with string-type keys. The first mode is when the policy is 
used to sample actions for a trajectory which will later be used for training. In this case 
the flag `training` passed to `act` method is `False` and the method should return 
a `dict` with the following keys: 

* `"actions"`: actions to pass to the environment
* `"log_probs"`: log-probabilities of sampled actions
* `"values"`: value function $V^\pi(s)$ predictions.

We don't need to use the values under these keys for training, so all of them should be of type `np.ndarray`.

When `training` is `True`, the model is training on a given batch of observations. In this
case it should return a `dict` with the following keys

* `"distribution"`: an instance of multivariate normal distribution (`torch.distributions.MultivariateNormal` or `tf.distributions.MultivariateNormalDiag`)
* `"values"`: value function $V^\pi(s)$ prediction.

The distinction about the modes comes into play depending on where the policy is used: if it is called from `EnvRunner`, 
the `training` flag is `False`, if it is called from `PPO`, the `training` flag is `True`. These classed 
will be described below. 

In [33]:
from torch.distributions.normal import Normal
class Policy:
  def __init__(self, model):
    self.model = model
        
    
  def act(self, inputs, training=False):
    x = torch.Tensor(inputs)
    if not training:
        '''
        training=False -- Value
        Input is Observation 
        Sample action for a Trajectory
        return {"actions:","log_probs","values"}
        '''
        with torch.no_grad():
            x = torch.Tensor(inputs).unsqueeze(0)
            mean,logstd = self.model.Policy_network(x)
            std = torch.exp(logstd)
            distrib = Normal(mean,std)
            action = distrib.sample()[0]
            log_prob = distrib.log_prob(action).sum(1).view(-1).cpu().detach().numpy()
            value = self.model.Value_network(x).view(-1).cpu().detach().numpy()
        return {"actions":action.detach().numpy(),"log_probs":log_prob,"values":value}
    else: 
        '''
        training=True - - Policy & Value
        
        Input is Observations
        return {"distribution","values"}
        '''
        x = torch.Tensor(inputs)
        mean,logstd = self.model.Policy_network(x)
        std = torch.exp(logstd)
        distrib = Normal(mean,std)
        value = self.model.Value_network(x)
        return {"distribution":distrib,"values":value}
        

We will use `EnvRunner` to perform interactions with an environment with a policy for a fixed number of timesteps. Calling `.get_next()` on a runner will return a trajectory &mdash; dictionary 
containing keys

* `"observations"`
* `"rewards"` 
* `"resets"`
* `"actions"`
* all other keys that you defined in `Policy`,

under each of these keys there is a `np.ndarray` of specified length $T$ &mdash; the size of partial trajectory. 

Additionally, before returning a trajectory this runner can apply a list of transformations. 
Each transformation is simply a callable that should modify passed trajectory in-place.

In [34]:
class AsArray:
  """ 
  Converts lists of interactions to ndarray.
  """
  def __call__(self, trajectory):
    # Modify trajectory inplace. 
    for k, v in filter(lambda kv: kv[0] != "state",
                       trajectory.items()):
      trajectory[k] = np.asarray(v)

In [35]:
import numpy as np
from runners import EnvRunner

class DummyPolicy:
  def act(self, inputs, training=False):
    assert not training
    return {"actions": np.random.randn(6), "values": np.nan}
  
runner = EnvRunner(env, DummyPolicy(), 3,
                   transforms=[AsArray()])
trajectory = runner.get_next()

{k: v.shape for k, v in trajectory.items() if k != "state"}

{'actions': (3, 6),
 'values': (3,),
 'observations': (3, 26),
 'rewards': (3,),
 'resets': (3,)}

You will need to implement the following two transformations. 

The first is `GAE` that implements [Generalized Advantage Estimator](https://arxiv.org/abs/1506.02438).
In it you should add two keys to the trajectory: `"advantages"` and `"value_targets"`. In GAE the advantages
$A_t^{\mathrm{GAE}(\gamma,\lambda)}$ are essentially defined as the exponential 
moving average with parameter $\lambda$ of the regular advantages 
$\hat{A}^{(n)}(s_t) = \sum_{l=0}^{T-1} \gamma^l r_{t+l} + \gamma^{T} V^\pi(s_{t+l}) - V^\pi(s_t)$. 
The exact formula for the computation is the following

$$
A_t^{\mathrm{GAE}(\gamma,\lambda)} = \sum_{l=0}^{T-1} (\gamma\lambda)^l\delta_{t + l}^V,
$$
where $\delta_{t+l}^V = r_{t+l} + \gamma V^\pi(s_{t+l+1}) - V^\pi(s_{t+l})$. You can look at the 
derivation (formulas 11-16) in the paper. Don't forget to reset the summation on terminal
states as determined by the flags `trajectory["resets"]`. You can use `trajectory["values"]`
to get values of all observations except the most recent which is stored under 
 `trajectory["state"]["latest_observation"]`. For this observation you will need to call the policy 
 to get the value prediction.

Once you computed the advantages, you can get the targets for training the value function by adding 
back values:
$$
\hat{V}(s_{t+l}) = A_{t+l}^{\mathrm{GAE}(\gamma,\lambda)} + V(s_{t + l}),
$$
where $\hat{V}$ is a tensor of value targets that are used to train the value function. 

In [36]:
class GAE:
  """ Generalized Advantage Estimator. """
  def __init__(self, policy, gamma=0.99, lambda_=0.95):
    self.policy = policy
    self.gamma = gamma
    self.lambda_ = lambda_
    
  def __call__(self, trajectory):
    advantages = []
    returns =[]
    lastgae = 0
    rewards = trajectory["rewards"]
    values = trajectory["values"]
    dones = 1- trajectory["resets"]
    
    #Get the latest state
    last_state = trajectory["state"]["latest_observation"]
    # Output of the network for the 'next_state' input
    network_output  =self.policy.act(last_state, training=False)
    last_value = network_output["values"]
    values = np.append(values,[last_value])# Append the next  value
    
   #https://github.com/colinskow/move37/blob/f57afca9d15ce0233b27b2b0d6508b99b46d4c7f/ppo/ppo_train.py#L69
    for step in reversed(range(len(rewards))):            
        td_delta = rewards[step] + self.gamma * values[step+1] * dones[step] - values[step]
        advantage =lastgae= td_delta + self.gamma*self.lambda_*dones[step]*lastgae
        advantages.insert(0,advantage)
        returns.insert(0,advantage+values[step])

    trajectory ["advantages"] = advantages
    trajectory ["value_targets"] = returns   
    

The main advantage of PPO over simpler policy based methods like A2C is that it is possible
to train on the same trajectory for multiple gradient steps. The following class wraps 
an `EnvRunner`. It should call the runner to get a trajectory, then return minibatches 
from it for a number of epochs, shuffling the data before each epoch.

In [37]:
class TrajectorySampler:
  """ Samples minibatches from trajectory for a number of epochs. """
  def __init__(self, runner, num_epochs, num_minibatches, transforms=None):
    self.runner = runner
    self.num_epochs = num_epochs
    self.num_minibatches = num_minibatches
    self.transforms = transforms or []
    self.minibatch_count = 0
    self.epoch_count = 0
    self.trajectory = None
    self.minibatchsize = self.num_epochs // self.num_minibatches
    self.indices = []
    
  def shuffle_trajectory(self):
    """ Shuffles all elements in trajectory.
    
    Should be called at the beginning of each epoch.
    """
    num_elements = len(self.trajectory["values"])
    self.indices  = np.arange(num_elements)
    np.random.shuffle(self.indices)
    for k,v in self.trajectory.items():
        if k != "state" :
            self.trajectory[k] = np.array(v)[self.indices.astype(int)]
    
  def get_next(self):
    """ Returns next minibatch.  """
    trajectory_minibatch ={}

    if self.epoch_count > self.num_epochs:
        return trajectory_minibatch

    if self.epoch_count ==0 and self.minibatch_count == 0:
        '''Initial Start '''
        self.trajectory = self.runner.get_next()
        self.shuffle_trajectory()
        start = 0
        end = start + self.minibatchsize-1       
        
    elif self.minibatch_count == self.num_minibatches:
        '''New Epoch'''
        self.minibatch_count=0
        self.epoch_count += 1
        self.trajectory = self.runner.get_next()
        self.shuffle_trajectory()
        start = 0
        end = start + self.minibatchsize-1
    else:
        '''Batches in the same Epoch '''
        start = self.minibatch_count*self.minibatchsize
        end = start + self.minibatchsize-1
    
    for k,v in self.trajectory.items():
        if k != "state": 
            trajectory_minibatch[k] = v[self.indices [start:end]]
    
    self.minibatch_count+=1
    
    return trajectory_minibatch
        
    

A common trick to use with GAE is to normalize advantages, the following transformation does that. 

In [38]:
class NormalizeAdvantages:
  """ Normalizes advantages to have zero mean and variance 1. """
  def __call__(self, trajectory):
    adv = trajectory["advantages"]
    adv = (adv - adv.mean()) / (adv.std() + 1e-8)
    trajectory["advantages"] = adv

Finally, we can create our PPO runner. 

In [39]:
def make_ppo_runner(env, policy, num_runner_steps=2048,
                    gamma=0.99, lambda_=0.95, 
                    num_epochs=10, num_minibatches=32):
  """ Creates runner for PPO algorithm. """
  runner_transforms = [AsArray(),
                       GAE(policy, gamma=gamma, lambda_=lambda_)]
  runner = EnvRunner(env, policy, num_runner_steps, 
                     transforms=runner_transforms)
  
  sampler_transforms = [NormalizeAdvantages()]
  sampler = TrajectorySampler(runner, num_epochs=num_epochs, 
                              num_minibatches=num_minibatches,
                              transforms=sampler_transforms)
  return sampler

In the next cell you will need to implement Proximal Policy Optimization algorithm itself. The algorithm
modifies the typical policy gradient loss in the following way:

$$
L_{\pi} = \frac{1}{T-1}\sum_{l=0}^{T-1}
\frac{\pi_\theta(a_{t+l}|s_{t+l})}{\pi_\theta^{\text{old}}(a_{t+l}|s_{t+l})}
A^{\mathrm{GAE}(\gamma,\lambda)}_{t+l}\\
L_{\pi}^{\text{clipped}} = \frac{1}{T-1}\sum_{l=0}^{T-1}\mathrm{clip}\left(
\frac{\pi_\theta(a_{t+l}|s_{t+l})}{\pi_{\theta^{\text{old}}}(a_{t+l}|s_{t+l})}
\cdot A^{\mathrm{GAE(\gamma, \lambda)}}_{t+l},
1 - \text{cliprange}, 1 + \text{cliprange}\right)\\
L_{\text{policy}} = \max\left(L_\pi, L_{\pi}^{\text{clipped}}\right).
$$

Additionally, the value loss is modified in the following way:

$$
L_V = \frac{1}{T-1}\sum_{l=0}^{T-1}(V_\theta(s_{t+l}) - \hat{V}(s_{t+l}))^2\\
L_{V}^{\text{clipped}} = \frac{1}{T-1}\sum_{l=0}^{T-1}
V_{\theta^{\text{old}}}(s_{t+l}) +
\text{clip}\left(
V_\theta(s_{t+l}) - V_{\theta^\text{old}}(s_{t+l}),
-\text{cliprange}, \text{cliprange}
\right)\\
L_{\text{value}} = \max\left(L_V, L_V^{\text{clipped}}\right).
$$

In [40]:
class PPO:
  def __init__(self, policy, optimizer,
               cliprange=0.2,
               value_loss_coef=0.25,
               max_grad_norm=0.5):
    self.policy = policy
    self.optimizer = optimizer
    self.cliprange = cliprange
    self.value_loss_coef = value_loss_coef
    # Note that we don't need entropy regularization for this env.
    self.max_grad_norm = max_grad_norm
    self.Policy_loss = 0
    self.Value_loss = 0
    self.Total_loss =0
    
  def policy_loss(self, trajectory, act):
    """ Computes and returns policy loss on a given trajectory. """
    
    actions = torch.tensor(trajectory["actions"]).to(device) 
    old_log_probs = torch.tensor(trajectory["log_probs"]).to(device).flatten() # datatype - numpy array
    new_distrib = act["distribution"] 
    new_logprobs = new_distrib.log_prob(actions).sum(1)

    
    ratio = torch.exp(new_logprobs - old_log_probs)
    surrogate1 = ratio * -torch.Tensor(trajectory["advantages"]).to(device)
    #surrogate2 = ratio.clamp(1-self.cliprange,1+self.cliprange)*-torch.Tensor(trajectory["advantages"])
    surrogate2 = torch.clamp(ratio,1-self.cliprange,1+self.cliprange)*-torch.Tensor(trajectory["advantages"])
    policy_loss = torch.mean(torch.max(surrogate1,surrogate2))
    self.Policy_loss = policy_loss
    return policy_loss
        
      
  def value_loss(self, trajectory, act):
    """ Computes and returns value loss on a given trajectory. """
    new_values = act["values"].flatten() 
    returns = torch.tensor(trajectory["value_targets"]).to(device)
    values = torch.tensor(trajectory["values"]).to(device).flatten()

    v_loss1  =(returns - new_values ).pow(2)
    clipped_values = values + torch.clamp(new_values - values,-self.cliprange,self.cliprange)
    v_loss2 = (clipped_values - returns ).pow(2)
    
    value_loss = 0.5*(torch.max(v_loss1,v_loss2)).mean()
    self.Value_loss = value_loss
    return value_loss

  def loss(self, trajectory):
    act = self.policy.act(trajectory["observations"], training=True)
    policy_loss = self.policy_loss(trajectory, act)
    value_loss = self.value_loss(trajectory, act)
    total_loss = policy_loss + self.value_loss_coef * value_loss
    self.Total_loss = total_loss # just for monitoring
    return total_loss
      
  def step(self, trajectory):
    """ Computes the loss function and performs a single gradient step. """
    #print(trajectory.keys())
    loss = self.loss(trajectory)
    optimizer.zero_grad()
    loss.backward()
    nn.utils.clip_grad_norm(self.policy.model.parameters(),self.max_grad_norm)
    optimizer.step()

Now everything is ready to do training. In one million of interactions it should be possible to 
achieve the total raw reward of about 1500. You should plot this quantity with respect to 
`runner.step_var` &mdash; the number of interactions with the environment. It is highly 
encouraged to also provide plots of the following quantities (these are useful for debugging as well):

* [Coefficient of Determination](https://en.wikipedia.org/wiki/Coefficient_of_determination) between 
value targets and value predictions
* Entropy of the policy $\pi$
* Value loss
* Policy loss
* Value targets
* Value predictions
* Gradient norm
* Advantages

For optimization it is suggested to use Adam optimizer with linearly annealing learning rate 
from 3e-4 to 0 and epsilon 1e-5.

In [41]:
import time
from gym.wrappers import  Monitor

In [42]:
from mujoco_wrappers import Normalize

suffix = f"ppo_notebook_{int(time.time())}"
writer = SummaryWriter(f"runs/{suffix}")

env = Normalize(Summaries(gym.make("HalfCheetahBulletEnv-v0")));
env.unwrapped.seed(0);

experiment_name = env.spec.id + suffix
#env = Monitor(env, f'videos/{experiment_name}')

# Enableing thrws error in Jupyter notebbok .But works in the scriot.
# Video recored with the .py file (same code)


In [43]:
# Parameters:
hidden_units =64
input_shape = np.array(env.observation_space.shape).prod()
n_actions = np.prod(env.action_space.shape)

# Linear Anneling Helper
lr = lambda f: f * learning_rate
anneal_lr = False

#PPO Hyper Params:
learning_rate = 3e-4
cliprange=0.2
value_loss_coef=0.25
max_grad_norm=0.5

##Training

'''
nsteps:number of steps of the  environment per update
total_timesteps:number of timesteps (i.e. number of actions taken in the environment)
noptepochs: number of training epochs per update
nminibatches: number of training minibatches per update

'''
nenvs =1
nsteps = num_runner_steps=2048 
num_batch = nsteps * nenvs

gamma=0.99
lambda_=0.95
num_epochs=100
num_minibatches=32

total_timesteps = 10000

#https://github.com/openai/baselines/blob/
#ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/ppo2/ppo2.py#L99
train_mini_batch = num_batch//num_minibatches
num_updates = total_timesteps//num_minibatches

print(f"TotalSteps={total_timesteps},minibatchsize = {train_mini_batch},numUpdates={num_updates}")

TotalSteps=40000,minibatchsize = 64,numUpdates=1250


In [44]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ppo_agent = Model(input_shape,n_actions).to(device)
policy = Policy(ppo_agent)
optimizer = torch.optim.Adam(policy.model.parameters(),lr= learning_rate,eps= 1e-5 )
ppo = PPO(policy, optimizer,cliprange=0.2,value_loss_coef=0.25,max_grad_norm=0.5)


minibatcher = make_ppo_runner(env, policy, num_runner_steps,
                    gamma, lambda_, 
                    num_epochs, num_minibatches)

minibatcher.minibatchsize = train_mini_batch



In [45]:
# Training
for update in range(1,num_updates+1):
    # Linear Annealing
    minibatcher.epoch_count=0
    minibatcher.minibatch_count =0
    total_loss =[]
    value_loss =[]
    policy_loss =[]
    if anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = lr(frac)
        ppo.optimizer.param_groups[0]['lr'] = lrnow
    for _ in range (num_epochs):
        for _ in range(num_minibatches):
            trajectory_mbatch = minibatcher.get_next()
            ppo.step(trajectory_mbatch)
            step = optimizer.state[optimizer.param_groups[0]["params"][-1]]["step"]

            total_loss.append(ppo.Total_loss.detach().numpy())
            value_loss.append(ppo.Value_loss.detach().numpy())
            policy_loss.append(ppo.Policy_loss.detach().numpy())
        
        writer.add_scalar(
            f"{env.spec.id}/total_loss",
            np.mean(total_loss),
            global_step=step)
        writer.add_scalar(
            f"{env.spec.id}/value_loss",
            np.mean(value_loss),
            global_step=step)
        writer.add_scalar(
            f"{env.spec.id}/policy_loss",
            np.mean(policy_loss),
            global_step=step)
    

Step=1000, rewards=-1449.6471991975902
Step=2000, rewards=-1584.3469496983053




Step=3000, rewards=-1188.830579690747
Step=4000, rewards=-1375.0890424226423
Step=5000, rewards=-1623.277119108456
Step=6000, rewards=-1553.380158702709
Step=7000, rewards=-1395.2868026014485
Step=8000, rewards=-1353.5120846179323
Step=9000, rewards=-1421.1730971309262
Step=10000, rewards=-1715.798400365278
Step=11000, rewards=-1649.3676157167752
Step=12000, rewards=-1314.4896851017681
Step=13000, rewards=-1436.934075716229
Step=14000, rewards=-1543.8626415501249
Step=15000, rewards=-1403.8575652899056
Step=16000, rewards=-1350.909989023313
Step=17000, rewards=-1492.1348738716024
Step=18000, rewards=-1485.9582353110143
Step=19000, rewards=-1526.9880541487344
Step=20000, rewards=-1640.128845749762
Step=21000, rewards=-1524.0068278302747
Step=22000, rewards=-1396.8687654773044
Step=23000, rewards=-1293.94684001562
Step=24000, rewards=-1346.5131318072506
Step=25000, rewards=-1280.6993186267039
Step=26000, rewards=-1410.3895765411096
Step=27000, rewards=-1304.744916215505
Step=28000, rewar

Step=209000, rewards=447.044709271437
Step=210000, rewards=481.89511240769957
Step=211000, rewards=410.0589119034106
Step=212000, rewards=476.2706404883315
Step=213000, rewards=470.7973033473839
Step=214000, rewards=464.41251711813544
Step=215000, rewards=427.58089198666136
Step=216000, rewards=472.4329378268423
Step=217000, rewards=471.2798007033295
Step=218000, rewards=458.5038634517065
Step=219000, rewards=464.1641169233499
Step=220000, rewards=491.3847789926069
Step=221000, rewards=484.09452554466304
Step=222000, rewards=498.41728756949794
Step=223000, rewards=438.0637326350283
Step=224000, rewards=339.38303242734514
Step=225000, rewards=487.4954208522535
Step=226000, rewards=332.5232515511285
Step=227000, rewards=478.2915142296385
Step=228000, rewards=407.6525456734079
Step=229000, rewards=455.1861685829014
Step=230000, rewards=354.8040898049377
Step=231000, rewards=303.1811711071609
Step=232000, rewards=459.7046413738642
Step=233000, rewards=455.0215673217588
Step=234000, rewards

Step=418000, rewards=528.804066013394
Step=419000, rewards=545.0402882825388
Step=420000, rewards=547.7290377734781
Step=421000, rewards=545.1163677812898
Step=422000, rewards=546.8050928057693
Step=423000, rewards=536.2373014859959
Step=424000, rewards=-71.35208624356373
Step=425000, rewards=537.6420741774473
Step=426000, rewards=553.2659931690099
Step=427000, rewards=531.1398540745867
Step=428000, rewards=547.5027304199681
Step=429000, rewards=518.2640459734245
Step=430000, rewards=532.5388427673095
Step=431000, rewards=538.4126136210847
Step=432000, rewards=548.5788398147604
Step=433000, rewards=533.068359372254
Step=434000, rewards=562.1412831817898
Step=435000, rewards=520.8617179980712
Step=436000, rewards=530.8470670242473
Step=437000, rewards=416.82945162987176
Step=438000, rewards=585.9309281883048
Step=439000, rewards=506.4144605485316
Step=440000, rewards=540.6113694301775
Step=441000, rewards=545.0969439471594
Step=442000, rewards=529.7109889211007
Step=443000, rewards=536.

Step=628000, rewards=535.5271051728918
Step=629000, rewards=536.9955374743281
Step=630000, rewards=554.3126298166123
Step=631000, rewards=549.1871025898753
Step=632000, rewards=555.8040504124473
Step=633000, rewards=561.2559341067748
Step=634000, rewards=552.8452714127802
Step=635000, rewards=568.2769177239531
Step=636000, rewards=549.6461369711932
Step=637000, rewards=561.1979489779876
Step=638000, rewards=561.5197405449492
Step=639000, rewards=574.1658316338674
Step=640000, rewards=564.8147058798153
Step=641000, rewards=557.4803144968248
Step=642000, rewards=547.3631094598097
Step=643000, rewards=556.1935891122234
Step=644000, rewards=559.2634169269722
Step=645000, rewards=566.5794350549737
Step=646000, rewards=567.9289929090812
Step=647000, rewards=562.9559178379486
Step=648000, rewards=574.8651444529496
Step=649000, rewards=554.2210489498596
Step=650000, rewards=552.7848320034384
Step=651000, rewards=560.7688521575476
Step=652000, rewards=546.0102143629784
Step=653000, rewards=583.

Step=838000, rewards=581.0909150207021
Step=839000, rewards=592.8314103701191
Step=840000, rewards=588.4960806297232
Step=841000, rewards=588.3928745119078
Step=842000, rewards=574.4175293638237
Step=843000, rewards=565.0629934275049
Step=844000, rewards=574.0750751627022
Step=845000, rewards=584.0966832034605
Step=846000, rewards=573.2444515661334
Step=847000, rewards=601.4959337699783
Step=848000, rewards=595.006270942472
Step=849000, rewards=586.4474849847292
Step=850000, rewards=582.711068793097
Step=851000, rewards=582.6053913360064
Step=852000, rewards=586.885450449017
Step=853000, rewards=588.6134366229678
Step=854000, rewards=601.6686512050929
Step=855000, rewards=584.2206424253762
Step=856000, rewards=592.5042310019612
Step=857000, rewards=609.1841850487264
Step=858000, rewards=606.1729757046518
Step=859000, rewards=583.78132939919
Step=860000, rewards=571.5484958540351
Step=861000, rewards=592.5172810537155
Step=862000, rewards=581.4636839024438
Step=863000, rewards=572.61758

Step=1047000, rewards=603.2468936379639
Step=1048000, rewards=620.2414407949151
Step=1049000, rewards=601.7303451394625
Step=1050000, rewards=611.8417077881231
Step=1051000, rewards=605.6267994466243
Step=1052000, rewards=607.8051130138988
Step=1053000, rewards=610.4923457151472
Step=1054000, rewards=602.2927745508892
Step=1055000, rewards=632.1862166375595
Step=1056000, rewards=612.6625372654661
Step=1057000, rewards=597.7333719352964
Step=1058000, rewards=612.6611436332457
Step=1059000, rewards=612.6723428836672
Step=1060000, rewards=606.5191263665803
Step=1061000, rewards=607.6942249930315
Step=1062000, rewards=612.051122405475
Step=1063000, rewards=614.1506700868433
Step=1064000, rewards=600.3594854184637
Step=1065000, rewards=599.7721127674498
Step=1066000, rewards=615.999056917059
Step=1067000, rewards=620.3065405432892
Step=1068000, rewards=594.2918167388034
Step=1069000, rewards=625.594188583984
Step=1070000, rewards=611.4843395562314
Step=1071000, rewards=610.508664521583
Step

Step=1252000, rewards=646.7027603294968
Step=1253000, rewards=618.6546819519476
Step=1254000, rewards=661.3959176625433
Step=1255000, rewards=660.0940759517335
Step=1256000, rewards=635.6067768751275
Step=1257000, rewards=647.0440951713945
Step=1258000, rewards=635.3653785437303
Step=1259000, rewards=635.8997530509278
Step=1260000, rewards=644.3596502729557
Step=1261000, rewards=614.9139650705634
Step=1262000, rewards=617.1421697047035
Step=1263000, rewards=636.049224570499
Step=1264000, rewards=653.6467201911519
Step=1265000, rewards=650.8606043301073
Step=1266000, rewards=635.296569622724
Step=1267000, rewards=656.3254837410724
Step=1268000, rewards=616.7292689561491
Step=1269000, rewards=656.8415314961765
Step=1270000, rewards=642.4901510201682
Step=1271000, rewards=657.6466766652894
Step=1272000, rewards=652.71673136737
Step=1273000, rewards=664.5774677694762
Step=1274000, rewards=662.2981358072714
Step=1275000, rewards=626.3473400935944
Step=1276000, rewards=674.2878537005096
Step

Step=1457000, rewards=741.6237443399632
Step=1458000, rewards=731.3566144561115
Step=1459000, rewards=728.5792621883476
Step=1460000, rewards=719.6539717844241
Step=1461000, rewards=718.659561912765
Step=1462000, rewards=757.4414553673676
Step=1463000, rewards=742.5682875090808
Step=1464000, rewards=734.5313624072038
Step=1465000, rewards=708.8857557036223
Step=1466000, rewards=735.7663859396658
Step=1467000, rewards=692.0484652463888
Step=1468000, rewards=695.2661320584549
Step=1469000, rewards=732.2390286420354
Step=1470000, rewards=730.1548565435945
Step=1471000, rewards=779.0131366133872
Step=1472000, rewards=734.3682340797071
Step=1473000, rewards=756.4081694242008
Step=1474000, rewards=728.0190613782543
Step=1475000, rewards=751.1354289079782
Step=1476000, rewards=795.7470107527752
Step=1477000, rewards=788.9000382755438
Step=1478000, rewards=710.9776384851223
Step=1479000, rewards=735.7851986139615
Step=1480000, rewards=729.1076607080741
Step=1481000, rewards=706.3135913040478
S

Step=1661000, rewards=999.3072671682385
Step=1662000, rewards=656.5411403120823
Step=1663000, rewards=910.9334701987152
Step=1664000, rewards=942.5746824249287
Step=1665000, rewards=1006.2605776807429
Step=1666000, rewards=1009.788360895652
Step=1667000, rewards=1057.863196107529
Step=1668000, rewards=1033.8509291519622
Step=1669000, rewards=978.9281052694417
Step=1670000, rewards=1011.7474787988018
Step=1671000, rewards=1029.181244042882
Step=1672000, rewards=979.3304536757632
Step=1673000, rewards=1097.9096901144774
Step=1674000, rewards=1019.1834371352012
Step=1675000, rewards=1030.6480680039517
Step=1676000, rewards=1025.1301972271392
Step=1677000, rewards=1018.7977782178234
Step=1678000, rewards=1035.5491085241288
Step=1679000, rewards=1016.0561803541805
Step=1680000, rewards=946.7423178633105
Step=1681000, rewards=1003.674439339537
Step=1682000, rewards=1076.8139505036916
Step=1683000, rewards=1026.075551537201
Step=1684000, rewards=1010.0665864362248
Step=1685000, rewards=1041.4

Step=1862000, rewards=1354.14658766737
Step=1863000, rewards=1295.4435705670776
Step=1864000, rewards=1224.2803140114802
Step=1865000, rewards=1285.5563554986554
Step=1866000, rewards=1298.3818514213613
Step=1867000, rewards=1318.978710699671
Step=1868000, rewards=1387.6853889464855
Step=1869000, rewards=1199.8399966571706
Step=1870000, rewards=1389.7733441178625
Step=1871000, rewards=1170.5760949365956
Step=1872000, rewards=1243.364791453255
Step=1873000, rewards=1255.9295879030894
Step=1874000, rewards=1296.4134947474813
Step=1875000, rewards=1272.4261721990597
Step=1876000, rewards=1332.1953153803852
Step=1877000, rewards=1343.9505450496986
Step=1878000, rewards=1433.8252537850021
Step=1879000, rewards=1257.249044518328
Step=1880000, rewards=1253.2709663763922
Step=1881000, rewards=1324.1955665982193
Step=1882000, rewards=1313.0227868945847
Step=1883000, rewards=1265.0484576864546
Step=1884000, rewards=1162.685776849133
Step=1885000, rewards=1176.0610109251093
Step=1886000, rewards=

Step=2063000, rewards=1448.3597420536428
Step=2064000, rewards=1430.0117151248194
Step=2065000, rewards=1560.7002410210098
Step=2066000, rewards=1600.6704288597819
Step=2067000, rewards=1529.0760558167106
Step=2068000, rewards=1495.804934466675
Step=2069000, rewards=1511.8558915978297
Step=2070000, rewards=1551.5601719102997
Step=2071000, rewards=1527.2764853811593
Step=2072000, rewards=1596.7329223699196
Step=2073000, rewards=1379.959405247287
Step=2074000, rewards=1516.04270537678
Step=2075000, rewards=1592.6348878873805
Step=2076000, rewards=1591.5370641458603
Step=2077000, rewards=1359.6278679349093
Step=2078000, rewards=1519.1214671691196
Step=2079000, rewards=1579.9976810537253
Step=2080000, rewards=1484.192808290223
Step=2081000, rewards=1465.1983163465611
Step=2082000, rewards=1455.9536669686015
Step=2083000, rewards=1423.7213757150819
Step=2084000, rewards=1447.1716690010367
Step=2085000, rewards=1388.044268340376
Step=2086000, rewards=1521.3420567184874
Step=2087000, rewards=

Step=2263000, rewards=1330.1271481376446
Step=2264000, rewards=1612.9389694389274
Step=2265000, rewards=1617.5355448292535
Step=2266000, rewards=1564.781711235208
Step=2267000, rewards=1560.6046531146164
Step=2268000, rewards=1586.6783439102812
Step=2269000, rewards=1598.4927110281285
Step=2270000, rewards=1604.5813449041598
Step=2271000, rewards=1480.8057985683377
Step=2272000, rewards=1470.527476669861
Step=2273000, rewards=1634.9597471785175
Step=2274000, rewards=1554.310466460715
Step=2275000, rewards=1571.9411717288526
Step=2276000, rewards=1584.3999177360874
Step=2277000, rewards=1464.7567865180274
Step=2278000, rewards=1514.1922071697286
Step=2279000, rewards=1544.9165080278024
Step=2280000, rewards=1535.0016877823289
Step=2281000, rewards=1521.947274820081
Step=2282000, rewards=1530.33268670634
Step=2283000, rewards=1501.9055376704475
Step=2284000, rewards=1556.5903499747762
Step=2285000, rewards=1614.6411579321339
Step=2286000, rewards=1527.3387002869197
Step=2287000, rewards=

Step=2463000, rewards=1536.5019898108724
Step=2464000, rewards=1571.3853594149957
Step=2465000, rewards=1535.6256078837603
Step=2466000, rewards=1489.8547370965857
Step=2467000, rewards=1575.3547751588571
Step=2468000, rewards=1569.661936775762
Step=2469000, rewards=1653.8500048056135
Step=2470000, rewards=1519.1941479406632
Step=2471000, rewards=1508.05903557134
Step=2472000, rewards=1660.519324937318
Step=2473000, rewards=1550.9051806782982
Step=2474000, rewards=1612.1247176120116
Step=2475000, rewards=1611.930505057291
Step=2476000, rewards=1637.576250020926
Step=2477000, rewards=1582.760979775683
Step=2478000, rewards=1618.0725876359627
Step=2479000, rewards=1610.617846908283
Step=2480000, rewards=1540.0805085790441
Step=2481000, rewards=1667.4619572219287
Step=2482000, rewards=1497.3769690163963
Step=2483000, rewards=1594.0397061374504
Step=2484000, rewards=1576.3694863752853
Step=2485000, rewards=1688.2000115729822
Step=2486000, rewards=1618.793787698679
Step=2487000, rewards=165

Step=2663000, rewards=1597.8964250020833
Step=2664000, rewards=1668.1648642790617
Step=2665000, rewards=1590.8622216719973
Step=2666000, rewards=1637.739248191829
Step=2667000, rewards=1649.165086826995
Step=2668000, rewards=1662.7129872997587
Step=2669000, rewards=1655.718894077688
Step=2670000, rewards=1682.8737473648166
Step=2671000, rewards=1628.09047164396
Step=2672000, rewards=1604.9133390668703
Step=2673000, rewards=1568.2259808194676
Step=2674000, rewards=1660.9266932745643
Step=2675000, rewards=1594.900866338075
Step=2676000, rewards=1526.3761720205296
Step=2677000, rewards=1413.306402085948
Step=2678000, rewards=1668.2776476108081
Step=2679000, rewards=1605.9603727242413
Step=2680000, rewards=1619.9145825977598
Step=2681000, rewards=1572.2646978943715
Step=2682000, rewards=1652.726148725854
Step=2683000, rewards=1688.6597059051212
Step=2684000, rewards=1664.3640411792885
Step=2685000, rewards=1661.1117608428924
Step=2686000, rewards=1622.9776443679248
Step=2687000, rewards=16

Step=2864000, rewards=1638.9268431002035
Step=2865000, rewards=1650.2305511075745
Step=2866000, rewards=1633.1145593472374
Step=2867000, rewards=1615.1620499733872
Step=2868000, rewards=1647.0991127229502
Step=2869000, rewards=1721.753937194186
Step=2870000, rewards=1714.0125564660932
Step=2871000, rewards=1567.8851666377875
Step=2872000, rewards=1676.6682549245315
Step=2873000, rewards=1577.1406080164772
Step=2874000, rewards=1632.1967271113213
Step=2875000, rewards=1661.1692145122954
Step=2876000, rewards=1657.7810537401651
Step=2877000, rewards=1686.9183721758761
Step=2878000, rewards=1400.879796385084
Step=2879000, rewards=1724.0457789863513
Step=2880000, rewards=1809.0657109659103
Step=2881000, rewards=1690.5201152596642
Step=2882000, rewards=1713.5553936001445
Step=2883000, rewards=1690.105604734279
Step=2884000, rewards=1497.1048047389374
Step=2885000, rewards=1646.1595538658496
Step=2886000, rewards=1563.080718060155
Step=2887000, rewards=1675.6236966957474
Step=2888000, reward

Step=3065000, rewards=1733.3154771929032
Step=3066000, rewards=1666.2439306503695
Step=3067000, rewards=1800.0405746498293
Step=3068000, rewards=1759.6993189112886
Step=3069000, rewards=1731.0560308884526
Step=3070000, rewards=1583.7140005579777
Step=3071000, rewards=1676.3026910703686
Step=3072000, rewards=1757.898607650264
Step=3073000, rewards=1729.1062451089015
Step=3074000, rewards=1736.197643867383
Step=3075000, rewards=1833.5510551751568
Step=3076000, rewards=1748.9716544213848
Step=3077000, rewards=1764.72123117632
Step=3078000, rewards=1709.985884421658
Step=3079000, rewards=1711.3535106942074
Step=3080000, rewards=1706.9449479261432
Step=3081000, rewards=1709.1993775512956
Step=3082000, rewards=1612.5266259043747
Step=3083000, rewards=1847.1558149974392
Step=3084000, rewards=1737.752534732081
Step=3085000, rewards=1626.7453083053908
Step=3086000, rewards=1469.3985753635327
Step=3087000, rewards=1759.5195488085128
Step=3088000, rewards=1756.7548219408086
Step=3089000, rewards=

Step=3265000, rewards=1722.9343396931865
Step=3266000, rewards=1627.9760505455145
Step=3267000, rewards=1751.6021905515763
Step=3268000, rewards=1630.199835931394
Step=3269000, rewards=1752.8039219078394
Step=3270000, rewards=1791.210200103558
Step=3271000, rewards=1594.2724655052855
Step=3272000, rewards=1737.633796046268
Step=3273000, rewards=1747.6247299467207
Step=3274000, rewards=1631.8964756464086
Step=3275000, rewards=1744.6942082650482
Step=3276000, rewards=1655.7872575145796
Step=3277000, rewards=1851.6381969717454
Step=3278000, rewards=1689.9272263393498
Step=3279000, rewards=1622.6680953261448
Step=3280000, rewards=1715.9222247035534
Step=3281000, rewards=1745.8097330530018
Step=3282000, rewards=1614.8516743934752
Step=3283000, rewards=1560.8020755517832
Step=3284000, rewards=1730.4117202137875
Step=3285000, rewards=1667.9886614515447
Step=3286000, rewards=1778.5469264157991
Step=3287000, rewards=1742.319443146726
Step=3288000, rewards=1572.269209242289
Step=3289000, rewards

Step=3466000, rewards=1807.7050301797294
Step=3467000, rewards=1703.4722797638913
Step=3468000, rewards=1743.8071287920725
Step=3469000, rewards=1780.1559335531938
Step=3470000, rewards=1760.037427276459
Step=3471000, rewards=1743.316092032431
Step=3472000, rewards=1618.800605853744
Step=3473000, rewards=1749.2648485267277
Step=3474000, rewards=1738.1647968707532
Step=3475000, rewards=1777.621085921539
Step=3476000, rewards=1691.8875474496201
Step=3477000, rewards=1699.2011774001135
Step=3478000, rewards=1693.2929840637228
Step=3479000, rewards=1756.781345127604
Step=3480000, rewards=1743.1794627185586
Step=3481000, rewards=1596.9327947658385
Step=3482000, rewards=1734.3981812509285
Step=3483000, rewards=1643.9549253909036
Step=3484000, rewards=1668.0925300092424
Step=3485000, rewards=1685.986720625153
Step=3486000, rewards=1690.9609431492431
Step=3487000, rewards=1662.7407432106947
Step=3488000, rewards=1648.2376812605944
Step=3489000, rewards=1748.0055217727565
Step=3490000, rewards=

Step=3667000, rewards=1648.8619989655997
Step=3668000, rewards=1624.1489536788338
Step=3669000, rewards=1688.2812568715967
Step=3670000, rewards=1649.6090653074039
Step=3671000, rewards=1619.232766026786
Step=3672000, rewards=1693.713889076557
Step=3673000, rewards=1586.1309922635733
Step=3674000, rewards=1667.464103169151
Step=3675000, rewards=1621.445926551533
Step=3676000, rewards=1681.8134477997407
Step=3677000, rewards=1647.9811230903197
Step=3678000, rewards=1657.5699989575864
Step=3679000, rewards=1610.1902981985036
Step=3680000, rewards=1683.8098283574227
Step=3681000, rewards=1680.086008237745
Step=3682000, rewards=1675.6401603232205
Step=3683000, rewards=1682.2198350900335
Step=3684000, rewards=1638.927920221975
Step=3685000, rewards=1607.251538949014
Step=3686000, rewards=1635.123982009986
Step=3687000, rewards=1646.357211508111
Step=3688000, rewards=1716.4920923603106
Step=3689000, rewards=1681.7137265637277
Step=3690000, rewards=1710.461444867572
Step=3691000, rewards=1682

Step=3868000, rewards=1659.8834753306141
Step=3869000, rewards=1657.344768955794
Step=3870000, rewards=1677.8832247888763
Step=3871000, rewards=1684.8392865703552
Step=3872000, rewards=1726.342873566014
Step=3873000, rewards=1701.347557243174
Step=3874000, rewards=1722.0603063963608
Step=3875000, rewards=1738.015886964938
Step=3876000, rewards=1669.7729739399801
Step=3877000, rewards=1687.6444366290157
Step=3878000, rewards=1767.2269949842582
Step=3879000, rewards=1720.4844371779543
Step=3880000, rewards=1690.4762964780627
Step=3881000, rewards=1613.2885102463572
Step=3882000, rewards=1769.4746571471276
Step=3883000, rewards=1672.6988220005476
Step=3884000, rewards=1758.5083849471296
Step=3885000, rewards=1729.5626957055472
Step=3886000, rewards=1621.6963327629983
Step=3887000, rewards=1752.661639324138
Step=3888000, rewards=1638.7167505932682
Step=3889000, rewards=1642.9894508562977
Step=3890000, rewards=1753.2562862625614
Step=3891000, rewards=1639.2495768865872
Step=3892000, rewards

Step=4069000, rewards=1781.0443687325305
Step=4070000, rewards=1586.7809520389067
Step=4071000, rewards=1588.4906658557766
Step=4072000, rewards=1667.8455461218214
Step=4073000, rewards=1786.9376088481922
Step=4074000, rewards=1685.9750359955526
Step=4075000, rewards=1622.6087667419158
Step=4076000, rewards=1698.2527078369908
Step=4077000, rewards=1784.6237400733526
Step=4078000, rewards=1627.7983954724411
Step=4079000, rewards=1827.139304682656
Step=4080000, rewards=1696.7744750324741
Step=4081000, rewards=1793.6532874372533
Step=4082000, rewards=1853.3125395741015
Step=4083000, rewards=1627.2782466604751
Step=4084000, rewards=1671.1111535280859
Step=4085000, rewards=1554.7471207546068
Step=4086000, rewards=1785.4938697651826
Step=4087000, rewards=1595.4598295884189
Step=4088000, rewards=1683.181677254948
Step=4089000, rewards=1692.2754203757456
Step=4090000, rewards=1754.8690925401997
Step=4091000, rewards=1687.3491367527417
Step=4092000, rewards=1594.7467778912849
Step=4093000, rewa

Step=4270000, rewards=1720.188379849049
Step=4271000, rewards=1635.603131724684
Step=4272000, rewards=1621.9134085938638
Step=4273000, rewards=1699.5221440163084
Step=4274000, rewards=1561.2113385876255
Step=4275000, rewards=1693.5307372052307
Step=4276000, rewards=1573.1481754590834
Step=4277000, rewards=1624.544225973491
Step=4278000, rewards=1593.0909204076822
Step=4279000, rewards=1549.5634205919869
Step=4280000, rewards=1872.0631493561916
Step=4281000, rewards=1639.4882954027191
Step=4282000, rewards=1655.4534962448536
Step=4283000, rewards=1785.3660875142823
Step=4284000, rewards=1480.8053058363287
Step=4285000, rewards=1745.7098492698574
Step=4286000, rewards=1636.288100403588
Step=4287000, rewards=1628.648642939459
Step=4288000, rewards=1784.7748871738402
Step=4289000, rewards=1720.4711456166376
Step=4290000, rewards=1642.2561858439647
Step=4291000, rewards=1730.5961609123992
Step=4292000, rewards=1736.6093779401997
Step=4293000, rewards=1669.0330879258374
Step=4294000, rewards

Step=4470000, rewards=1746.623376667737
Step=4471000, rewards=1784.0414511004928
Step=4472000, rewards=1857.0920248344164
Step=4473000, rewards=1725.7384407561149
Step=4474000, rewards=1550.2403123688625
Step=4475000, rewards=1732.825567806339
Step=4476000, rewards=1787.1437469629807
Step=4477000, rewards=1715.83662418399
Step=4478000, rewards=1805.8506223122472
Step=4479000, rewards=1650.6347971008522
Step=4480000, rewards=1842.863043681029
Step=4481000, rewards=1686.1046702018411
Step=4482000, rewards=1895.61660393794
Step=4483000, rewards=1816.5538724763471
Step=4484000, rewards=1702.2652114427744
Step=4485000, rewards=1773.1220351532245
Step=4486000, rewards=1695.5556939526869
Step=4487000, rewards=1821.4042964067642
Step=4488000, rewards=1864.012448348691
Step=4489000, rewards=1773.615490672051
Step=4490000, rewards=1812.864139064301
Step=4491000, rewards=1800.4814607075193
Step=4492000, rewards=1697.5953169774973
Step=4493000, rewards=1805.5071762862683
Step=4494000, rewards=1780

Step=4671000, rewards=1629.6123682235404
Step=4672000, rewards=1685.5322977603169
Step=4673000, rewards=1660.769034291436
Step=4674000, rewards=1668.0779277442477
Step=4675000, rewards=1811.4702064069738
Step=4676000, rewards=1753.7589810330726
Step=4677000, rewards=1791.0143945990058
Step=4678000, rewards=1832.0809196381476
Step=4679000, rewards=1792.4454856375685
Step=4680000, rewards=1597.4003784358465
Step=4681000, rewards=1798.470954690841
Step=4682000, rewards=1739.430828480539
Step=4683000, rewards=1733.7004186025906
Step=4684000, rewards=1674.2664785154193
Step=4685000, rewards=1683.1285344888868
Step=4686000, rewards=1842.8161020001633
Step=4687000, rewards=1633.1422187535713
Step=4688000, rewards=1509.6984368699943
Step=4689000, rewards=1732.46297645062
Step=4690000, rewards=1683.3615942501638
Step=4691000, rewards=1677.390788529404
Step=4692000, rewards=1581.8852904767239
Step=4693000, rewards=1769.9571741493528
Step=4694000, rewards=1719.4348681445147
Step=4695000, rewards=

Step=4872000, rewards=1881.2975866916793
Step=4873000, rewards=1828.2534102515808
Step=4874000, rewards=1852.809151631035
Step=4875000, rewards=1806.0815706033927
Step=4876000, rewards=1778.1720673605048
Step=4877000, rewards=1644.0250754040262
Step=4878000, rewards=1878.0418887430174
Step=4879000, rewards=1683.824829018125
Step=4880000, rewards=1779.4106748902877
Step=4881000, rewards=1730.2740004382724
Step=4882000, rewards=1749.3593380212405
Step=4883000, rewards=1890.0358459654544
Step=4884000, rewards=1728.6255825640278
Step=4885000, rewards=1712.3347329733356
Step=4886000, rewards=1725.459128355468
Step=4887000, rewards=1674.5439662252368
Step=4888000, rewards=1754.3304614922652
Step=4889000, rewards=1723.8779716558543
Step=4890000, rewards=1731.0136626941048
Step=4891000, rewards=1792.6870497165612
Step=4892000, rewards=1741.1351889047207
Step=4893000, rewards=1803.1128069409
Step=4894000, rewards=1834.7737991315057
Step=4895000, rewards=1769.916416996012
Step=4896000, rewards=1

Step=5072000, rewards=1772.3870437378766
Step=5073000, rewards=1749.6454669021007
Step=5074000, rewards=1685.3658745378668
Step=5075000, rewards=1823.4395073067471
Step=5076000, rewards=1771.027692728609
Step=5077000, rewards=1678.0393259651034
Step=5078000, rewards=1795.393972473036
Step=5079000, rewards=1932.519236988411
Step=5080000, rewards=1911.4721602107124
Step=5081000, rewards=1814.829209425444
Step=5082000, rewards=1763.9090828602011
Step=5083000, rewards=1802.2458167206225
Step=5084000, rewards=1796.9313441220304
Step=5085000, rewards=1837.0447205128744
Step=5086000, rewards=1807.736253272652
Step=5087000, rewards=1921.42067100701
Step=5088000, rewards=1826.7534301730532
Step=5089000, rewards=1632.595681391078
Step=5090000, rewards=1750.7770531443537
Step=5091000, rewards=1801.3398278124141
Step=5092000, rewards=1727.8314389428278
Step=5093000, rewards=1720.8646640900567
Step=5094000, rewards=1810.7150598023845
Step=5095000, rewards=1912.98157003579
Step=5096000, rewards=1773

Step=5273000, rewards=1858.9012815638007
Step=5274000, rewards=1774.5025067892395
Step=5275000, rewards=1831.1099811466624
Step=5276000, rewards=1710.712499087713
Step=5277000, rewards=1771.046235570932
Step=5278000, rewards=1931.7026618384018
Step=5279000, rewards=1842.5403869614358
Step=5280000, rewards=1781.6736824471259
Step=5281000, rewards=1896.1465314981385
Step=5282000, rewards=1812.573769116144
Step=5283000, rewards=1688.0948418816786
Step=5284000, rewards=1775.1858626545886
Step=5285000, rewards=1704.3797924475352
Step=5286000, rewards=1677.2024697394654
Step=5287000, rewards=1807.5576332783019
Step=5288000, rewards=1634.9834262792538
Step=5289000, rewards=1834.3857176949691
Step=5290000, rewards=1894.487628273989
Step=5291000, rewards=1829.0503650104251
Step=5292000, rewards=1956.0509215295358
Step=5293000, rewards=1805.8403936791467
Step=5294000, rewards=1786.5595161550898
Step=5295000, rewards=1843.0363467268457
Step=5296000, rewards=1718.379319698563
Step=5297000, rewards

Step=5474000, rewards=1761.9317044579802
Step=5475000, rewards=1846.2458352183519
Step=5476000, rewards=1927.0657141321326
Step=5477000, rewards=1857.908238724999
Step=5478000, rewards=1712.67268253206
Step=5479000, rewards=1777.9897161101571
Step=5480000, rewards=1605.6510799818245
Step=5481000, rewards=1741.5687632379318
Step=5482000, rewards=1631.4815912456736
Step=5483000, rewards=1895.5741801049908
Step=5484000, rewards=1609.1251788346133
Step=5485000, rewards=1853.3115702695675
Step=5486000, rewards=1720.557738013065
Step=5487000, rewards=1740.7506860200774
Step=5488000, rewards=1852.805303977841
Step=5489000, rewards=1782.745640842575
Step=5490000, rewards=1839.1903729228309
Step=5491000, rewards=1844.1620314348013
Step=5492000, rewards=1856.0669842011896
Step=5493000, rewards=1930.8127424195982
Step=5494000, rewards=1848.0808056064025
Step=5495000, rewards=1774.7221050510623
Step=5496000, rewards=1825.6065042270034
Step=5497000, rewards=1685.4287102722453
Step=5498000, rewards=

Step=5675000, rewards=1798.8505080748346
Step=5676000, rewards=1812.383541402334
Step=5677000, rewards=1760.8711426925363
Step=5678000, rewards=1796.6650945049375
Step=5679000, rewards=1787.724736406746
Step=5680000, rewards=1815.3280395664137
Step=5681000, rewards=1848.5844057905508
Step=5682000, rewards=1916.8979980498275
Step=5683000, rewards=1830.1752614406507
Step=5684000, rewards=1848.044443410174
Step=5685000, rewards=1868.4989253484584
Step=5686000, rewards=1775.6200816445382
Step=5687000, rewards=1653.6784758115741
Step=5688000, rewards=1763.1218163814763
Step=5689000, rewards=1897.2709885414242
Step=5690000, rewards=1906.1940640469543
Step=5691000, rewards=1961.484448034148
Step=5692000, rewards=1904.4767762983702
Step=5693000, rewards=1666.3803038185868
Step=5694000, rewards=1795.2658724870812
Step=5695000, rewards=1836.7260826539211
Step=5696000, rewards=1817.2315415599646
Step=5697000, rewards=1756.1859843362527
Step=5698000, rewards=1756.7508741644356
Step=5699000, reward

Step=5875000, rewards=1693.5785099481475
Step=5876000, rewards=1666.4392708381697
Step=5877000, rewards=1611.103220815492
Step=5878000, rewards=1780.2702672731164
Step=5879000, rewards=1890.7160306973428
Step=5880000, rewards=1735.003395499226
Step=5881000, rewards=1666.6158939586587
Step=5882000, rewards=1790.6509532066545
Step=5883000, rewards=1774.2467049188795
Step=5884000, rewards=1782.8365793134762
Step=5885000, rewards=1763.8302254181838
Step=5886000, rewards=-207.95518528437086
Step=5887000, rewards=1764.6345327035408
Step=5888000, rewards=1799.2272817312712
Step=5889000, rewards=1809.9327240641342
Step=5890000, rewards=1683.821796630996
Step=5891000, rewards=1728.5218178702555
Step=5892000, rewards=1674.350243484686
Step=5893000, rewards=1807.9641227062202
Step=5894000, rewards=1823.5992083322374
Step=5895000, rewards=1774.2130589663655
Step=5896000, rewards=1776.4107009975644
Step=5897000, rewards=1850.0187243444184
Step=5898000, rewards=1743.3444711445343
Step=5899000, rewar

Step=6075000, rewards=1595.0501113361604
Step=6076000, rewards=1755.145322320173
Step=6077000, rewards=1763.1711673325476
Step=6078000, rewards=1735.7330712268367
Step=6079000, rewards=1840.8204137161088
Step=6080000, rewards=1752.9943912692302
Step=6081000, rewards=1842.2713902121309
Step=6082000, rewards=1865.3397959671208
Step=6083000, rewards=1848.0141669722912
Step=6084000, rewards=1749.0505530521534
Step=6085000, rewards=1792.3384432077667
Step=6086000, rewards=1816.986439912584
Step=6087000, rewards=1635.5655800608736
Step=6088000, rewards=1664.4226877730164
Step=6089000, rewards=1743.2903399787947
Step=6090000, rewards=1775.0024614638548
Step=6091000, rewards=-611.1104075371442
Step=6092000, rewards=1699.3967121578823
Step=6093000, rewards=1747.652919143207
Step=6094000, rewards=1868.0485827197433
Step=6095000, rewards=1853.7220901087421
Step=6096000, rewards=1686.5547020094434
Step=6097000, rewards=1816.3975573002108
Step=6098000, rewards=1874.5113040936662
Step=6099000, rewar

Step=6275000, rewards=1692.249471881563
Step=6276000, rewards=1860.0639766040467
Step=6277000, rewards=1793.1804496282791
Step=6278000, rewards=1732.422971806035
Step=6279000, rewards=1772.8516202405326
Step=6280000, rewards=1667.0164617555574
Step=6281000, rewards=1825.9868463597518
Step=6282000, rewards=1741.8586789482906
Step=6283000, rewards=1839.3480426646572
Step=6284000, rewards=1664.8975601809475
Step=6285000, rewards=1834.814759080429
Step=6286000, rewards=1753.3220702358587
Step=6287000, rewards=1774.482738908755
Step=6288000, rewards=1940.5965791032377
Step=6289000, rewards=1723.0283282972691
Step=6290000, rewards=1854.028322672656
Step=6291000, rewards=1797.9863587174038
Step=6292000, rewards=1634.220729016814
Step=6293000, rewards=1831.7117848352261
Step=6294000, rewards=1755.7240999103285
Step=6295000, rewards=1826.6604386277595
Step=6296000, rewards=1840.2429516364882
Step=6297000, rewards=1847.5853123512359
Step=6298000, rewards=1706.8350105384288
Step=6299000, rewards=

Step=6475000, rewards=1773.9553936948234
Step=6476000, rewards=1743.609143981973
Step=6477000, rewards=1671.8093948636342
Step=6478000, rewards=1812.3800233398426
Step=6479000, rewards=1702.623655549635
Step=6480000, rewards=1814.1008915378181
Step=6481000, rewards=1849.820458077276
Step=6482000, rewards=1770.3188816604056
Step=6483000, rewards=1874.8203381449282
Step=6484000, rewards=1724.1284892938072
Step=6485000, rewards=1807.4957873734659
Step=6486000, rewards=1836.996122044072
Step=6487000, rewards=1711.9197960312883
Step=6488000, rewards=1796.6422414817796
Step=6489000, rewards=1756.858476291515
Step=6490000, rewards=1736.4350533266363
Step=6491000, rewards=1830.4758222000291
Step=6492000, rewards=1883.0045811787645
Step=6493000, rewards=1686.2894367037813
Step=6494000, rewards=1818.2400655903032
Step=6495000, rewards=1855.2944388958217
Step=6496000, rewards=1807.56859286682
Step=6497000, rewards=1709.5369982595653
Step=6498000, rewards=1830.1613024179724
Step=6499000, rewards=1

Step=6676000, rewards=1621.1780178044478
Step=6677000, rewards=1745.8629714381527
Step=6678000, rewards=1758.8026056526483
Step=6679000, rewards=1869.3152615468448
Step=6680000, rewards=1679.3980148294518
Step=6681000, rewards=1648.5162569600823
Step=6682000, rewards=1724.2131386067203
Step=6683000, rewards=1708.3289233232174
Step=6684000, rewards=1655.2176547277918
Step=6685000, rewards=1688.0756179035925
Step=6686000, rewards=1946.4466682119923
Step=6687000, rewards=1712.926502177874
Step=6688000, rewards=1747.004706174696
Step=6689000, rewards=1772.181987051861
Step=6690000, rewards=1839.5361763729993
Step=6691000, rewards=1638.3969948661472
Step=6692000, rewards=1785.9277498765055
Step=6693000, rewards=1856.2614635898622
Step=6694000, rewards=1660.841695778389
Step=6695000, rewards=1707.4426167908332
Step=6696000, rewards=1816.6569371420187
Step=6697000, rewards=1962.465388710823
Step=6698000, rewards=1758.053192207881
Step=6699000, rewards=1648.6566148999561
Step=6700000, rewards=

Step=6877000, rewards=1664.3277389926423
Step=6878000, rewards=1802.9435212578237
Step=6879000, rewards=1820.286252229123
Step=6880000, rewards=1789.3829054394319
Step=6881000, rewards=1786.0593397418045
Step=6882000, rewards=1598.5412735977416
Step=6883000, rewards=1822.36637254961
Step=6884000, rewards=1856.607003800604
Step=6885000, rewards=1729.953991203554
Step=6886000, rewards=1788.5576688115696
Step=6887000, rewards=1811.7913576474987
Step=6888000, rewards=1837.986017423642
Step=6889000, rewards=1658.7472213669264
Step=6890000, rewards=1753.946739200969
Step=6891000, rewards=1703.5056844103763
Step=6892000, rewards=1701.1016387299085
Step=6893000, rewards=1827.7403285485768
Step=6894000, rewards=1573.2401202180765
Step=6895000, rewards=1726.6155680694033
Step=6896000, rewards=1735.453083604346
Step=6897000, rewards=1736.9892441343925
Step=6898000, rewards=1769.4259293021644
Step=6899000, rewards=1747.5644033061728
Step=6900000, rewards=1795.2783158471766
Step=6901000, rewards=17

Step=7078000, rewards=1730.036319794626
Step=7079000, rewards=1731.0049830465725
Step=7080000, rewards=1732.2090840277062
Step=7081000, rewards=1648.9185138696141
Step=7082000, rewards=1784.5419431793955
Step=7083000, rewards=1933.6948579598416
Step=7084000, rewards=1837.170575857725
Step=7085000, rewards=1791.409609182332
Step=7086000, rewards=1752.80051181245
Step=7087000, rewards=1690.3149328394538
Step=7088000, rewards=1746.275551913196
Step=7089000, rewards=1728.2240691081204
Step=7090000, rewards=1794.8433743407033
Step=7091000, rewards=1758.1926162468212
Step=7092000, rewards=1851.0924733566326
Step=7093000, rewards=1825.4762302463025
Step=7094000, rewards=1278.7299867014344
Step=7095000, rewards=1814.9476586786639
Step=7096000, rewards=1799.6526705112453
Step=7097000, rewards=1695.39964252029
Step=7098000, rewards=1891.9025247169639
Step=7099000, rewards=1701.534551386899
Step=7100000, rewards=1841.571416134393
Step=7101000, rewards=1618.5391921866135
Step=7102000, rewards=1697

Step=7279000, rewards=1800.5534732658602
Step=7280000, rewards=1786.3276515458715
Step=7281000, rewards=1830.5432758353522
Step=7282000, rewards=1854.5652466695444
Step=7283000, rewards=1927.9186939973126
Step=7284000, rewards=1567.8133493525165
Step=7285000, rewards=1911.121533359817
Step=7286000, rewards=1779.333634050368
Step=7287000, rewards=1762.840464185011
Step=7288000, rewards=1887.4787146717772
Step=7289000, rewards=1777.2822872763122
Step=7290000, rewards=1747.2582557783078
Step=7291000, rewards=1884.8923186814598
Step=7292000, rewards=1819.1066655508632
Step=7293000, rewards=1728.5276291111286
Step=7294000, rewards=1678.5068616630654
Step=7295000, rewards=1786.2765150001987
Step=7296000, rewards=1804.0535907771512
Step=7297000, rewards=1793.0881580199518
Step=7298000, rewards=1895.7954741666135
Step=7299000, rewards=1888.710451153054
Step=7300000, rewards=1908.0559040078797
Step=7301000, rewards=1692.9787073887808
Step=7302000, rewards=1894.2498357312063
Step=7303000, reward

Step=7479000, rewards=1897.0794023449685
Step=7480000, rewards=1851.7774334873864
Step=7481000, rewards=1822.0628744844482
Step=7482000, rewards=1868.2004809594573
Step=7483000, rewards=1807.2737096972917
Step=7484000, rewards=1715.1721335035456
Step=7485000, rewards=1754.948124497474
Step=7486000, rewards=1828.8619926387937
Step=7487000, rewards=1812.4301286371656
Step=7488000, rewards=1832.040844914901
Step=7489000, rewards=1848.8175763678166
Step=7490000, rewards=1811.7859446642178
Step=7491000, rewards=1719.9517423044063
Step=7492000, rewards=1804.8777447479113
Step=7493000, rewards=1845.9784939971833
Step=7494000, rewards=1855.2875947723637
Step=7495000, rewards=1910.6066655815298
Step=7496000, rewards=1907.7825239829754
Step=7497000, rewards=1850.619878854866
Step=7498000, rewards=1892.4741914729887
Step=7499000, rewards=1714.297955563374
Step=7500000, rewards=1761.1877186650402
Step=7501000, rewards=1824.584614386465
Step=7502000, rewards=1832.947615962393
Step=7503000, rewards=

Step=7680000, rewards=1752.5264121698676
Step=7681000, rewards=1767.5849035646393
Step=7682000, rewards=1843.7508125453512
Step=7683000, rewards=1795.011018385189
Step=7684000, rewards=1830.8465273148875
Step=7685000, rewards=1892.3227836730623
Step=7686000, rewards=1794.7301904575804
Step=7687000, rewards=1787.8145550739398
Step=7688000, rewards=1813.8496897552384
Step=7689000, rewards=1817.584845197584
Step=7690000, rewards=1835.7764048665865
Step=7691000, rewards=1824.4671469045009
Step=7692000, rewards=1730.8796576706666
Step=7693000, rewards=1856.4052945689539
Step=7694000, rewards=1814.5870012849782
Step=7695000, rewards=1873.5478696115445
Step=7696000, rewards=1869.0669815925637
Step=7697000, rewards=1815.1096802393608
Step=7698000, rewards=1824.528240674441
Step=7699000, rewards=1762.9300657476663
Step=7700000, rewards=1829.0745812581006
Step=7701000, rewards=1966.582250936914
Step=7702000, rewards=1842.7414255573726
Step=7703000, rewards=1744.7991342522691
Step=7704000, reward

Step=7880000, rewards=1896.2070937398269
Step=7881000, rewards=1898.1648946214375
Step=7882000, rewards=1588.6042159246913
Step=7883000, rewards=1721.401699218681
Step=7884000, rewards=1823.345010806629
Step=7885000, rewards=1880.7885281456786
Step=7886000, rewards=1757.1558018695152
Step=7887000, rewards=1870.6153669042812
Step=7888000, rewards=1834.855806202676
Step=7889000, rewards=1746.9081708100275
Step=7890000, rewards=1790.182142118872
Step=7891000, rewards=1823.680063485705
Step=7892000, rewards=1842.5000665207044
Step=7893000, rewards=1866.1806215538236
Step=7894000, rewards=1855.8373888008487
Step=7895000, rewards=1871.6668452461356
Step=7896000, rewards=1863.533112100295
Step=7897000, rewards=1866.7394398174845
Step=7898000, rewards=1912.8219330708937
Step=7899000, rewards=1951.9954162632948
Step=7900000, rewards=1727.9830586685794
Step=7901000, rewards=1761.2446511704807
Step=7902000, rewards=1912.5664964414298
Step=7903000, rewards=1777.9491673742466
Step=7904000, rewards=

Step=8081000, rewards=1789.783399266484
Step=8082000, rewards=1883.82525503051
Step=8083000, rewards=1864.18338348135
Step=8084000, rewards=1843.3235923826612
Step=8085000, rewards=1860.393249336232
Step=8086000, rewards=1756.3116289952213
Step=8087000, rewards=1864.9846425345795
Step=8088000, rewards=1875.243452176181
Step=8089000, rewards=1969.54592869422
Step=8090000, rewards=1865.332030105237
Step=8091000, rewards=1746.4988043223598
Step=8092000, rewards=1886.8546375747042
Step=8093000, rewards=1888.1494026134844
Step=8094000, rewards=1937.6895567525837
Step=8095000, rewards=1911.8783005406472
Step=8096000, rewards=1825.6185595218672
Step=8097000, rewards=1922.588143106126
Step=8098000, rewards=1892.290499500847
Step=8099000, rewards=1750.1599009701683
Step=8100000, rewards=1729.643022362158
Step=8101000, rewards=1707.0954976085475
Step=8102000, rewards=1784.1738583727695
Step=8103000, rewards=1837.7217861163542
Step=8104000, rewards=1825.9706591215936
Step=8105000, rewards=1798.66

Step=8282000, rewards=1729.2919729129
Step=8283000, rewards=1830.278265708257
Step=8284000, rewards=1853.391288674546
Step=8285000, rewards=1728.1814733941767
Step=8286000, rewards=1729.5341132550984
Step=8287000, rewards=1852.569465058255
Step=8288000, rewards=1699.0235621348231
Step=8289000, rewards=1846.7246173266574
Step=8290000, rewards=1682.8251355660655
Step=8291000, rewards=1690.9178710427625
Step=8292000, rewards=1756.7137961961741
Step=8293000, rewards=1925.4782969716439
Step=8294000, rewards=1879.9356865331524
Step=8295000, rewards=1970.3351377023546
Step=8296000, rewards=1755.8516671511813
Step=8297000, rewards=1859.2406258984042
Step=8298000, rewards=1808.4223588916846
Step=8299000, rewards=1649.3818968887579
Step=8300000, rewards=1797.6507242815712
Step=8301000, rewards=1949.7695790425419
Step=8302000, rewards=1792.1028325454224
Step=8303000, rewards=1639.925506305195
Step=8304000, rewards=1771.1618530359488
Step=8305000, rewards=1732.8215421091238
Step=8306000, rewards=1

Step=8483000, rewards=1754.991888456873
Step=8484000, rewards=1932.3030273061083
Step=8485000, rewards=1691.6210933263978
Step=8486000, rewards=1924.3049123291662
Step=8487000, rewards=1850.1843016054509
Step=8488000, rewards=1872.982557978738
Step=8489000, rewards=1898.7399509083236
Step=8490000, rewards=1812.4499914236671
Step=8491000, rewards=1859.0020171232104
Step=8492000, rewards=1932.5651194514157
Step=8493000, rewards=1866.6700896822028
Step=8494000, rewards=1943.0611890557605
Step=8495000, rewards=1863.0777187549331
Step=8496000, rewards=1979.5518761201902
Step=8497000, rewards=1882.4207931239937
Step=8498000, rewards=1804.2559142740247
Step=8499000, rewards=1775.2075847535077
Step=8500000, rewards=1872.408218235906
Step=8501000, rewards=1839.2077629610255
Step=8502000, rewards=1730.8572309901876
Step=8503000, rewards=1913.0580561631543
Step=8504000, rewards=1832.3859005099282
Step=8505000, rewards=1848.156428932522
Step=8506000, rewards=1760.6415112825769
Step=8507000, reward

Step=8684000, rewards=1800.4172525858255
Step=8685000, rewards=1736.6267985671482
Step=8686000, rewards=1831.6176064541653
Step=8687000, rewards=1892.9833235781125
Step=8688000, rewards=1808.1888530712229
Step=8689000, rewards=1933.2078935863797
Step=8690000, rewards=1782.8613721435245
Step=8691000, rewards=1861.3780124024252
Step=8692000, rewards=1856.4559106127588
Step=8693000, rewards=1771.9548148308688
Step=8694000, rewards=1794.0617329424852
Step=8695000, rewards=1883.1233374462004
Step=8696000, rewards=1727.093542091906
Step=8697000, rewards=1869.8636565568422
Step=8698000, rewards=1912.5547597668076
Step=8699000, rewards=1848.523743765101
Step=8700000, rewards=1898.935452565189
Step=8701000, rewards=1916.7764539711811
Step=8702000, rewards=1741.2455500963608
Step=8703000, rewards=1828.47342470587
Step=8704000, rewards=1890.5937747776168
Step=8705000, rewards=1904.8929768458993
Step=8706000, rewards=1869.185343038892
Step=8707000, rewards=1776.8428814612614
Step=8708000, rewards=

Step=8884000, rewards=1855.4843558525638
Step=8885000, rewards=1819.3856907959953
Step=8886000, rewards=1912.2792209810034
Step=8887000, rewards=1861.0387458570704
Step=8888000, rewards=1830.7322613034003
Step=8889000, rewards=1898.7208360604984
Step=8890000, rewards=1864.7437144630612
Step=8891000, rewards=1802.05767219943
Step=8892000, rewards=2016.4853935467706
Step=8893000, rewards=1865.2493721925975
Step=8894000, rewards=1813.4877147958937
Step=8895000, rewards=1835.4018777312526
Step=8896000, rewards=1763.061382508694
Step=8897000, rewards=1880.4799672183742
Step=8898000, rewards=1707.487796194336
Step=8899000, rewards=1856.3613261638463
Step=8900000, rewards=1742.9280802572202
Step=8901000, rewards=1805.4476254061751
Step=8902000, rewards=1992.7890940605819
Step=8903000, rewards=1905.7682907472783
Step=8904000, rewards=1914.8804792522058
Step=8905000, rewards=1910.4921382497278
Step=8906000, rewards=1783.9688721688947
Step=8907000, rewards=1896.3449617143187
Step=8908000, reward

Step=9085000, rewards=1891.052998942064
Step=9086000, rewards=1891.2785007318803
Step=9087000, rewards=1942.0203022038347
Step=9088000, rewards=1914.803133604627
Step=9089000, rewards=1821.9505523538523
Step=9090000, rewards=1676.725614438317
Step=9091000, rewards=1888.0056047595515
Step=9092000, rewards=1676.3538271201273
Step=9093000, rewards=1876.5898987746211
Step=9094000, rewards=1578.9136866152144
Step=9095000, rewards=1839.2340412711135
Step=9096000, rewards=1907.2053676375904
Step=9097000, rewards=1909.9436485244607
Step=9098000, rewards=1891.9918054065238
Step=9099000, rewards=1818.765796201247
Step=9100000, rewards=1835.2960374564993
Step=9101000, rewards=1842.089172181666
Step=9102000, rewards=1832.7220194347863
Step=9103000, rewards=1812.9268947324897
Step=9104000, rewards=1717.3550231860556
Step=9105000, rewards=1952.790064373361
Step=9106000, rewards=1830.8392502401216
Step=9107000, rewards=1872.6543955145846
Step=9108000, rewards=1857.9828090289677
Step=9109000, rewards=

Step=9285000, rewards=1935.255885308891
Step=9286000, rewards=1953.7882649294463
Step=9287000, rewards=1937.4748783017003
Step=9288000, rewards=1983.3610115677552
Step=9289000, rewards=1967.2104570979295
Step=9290000, rewards=1952.62262469473
Step=9291000, rewards=1838.3738850415607
Step=9292000, rewards=1849.2327647657562
Step=9293000, rewards=1742.8303246735636
Step=9294000, rewards=1828.120538504912
Step=9295000, rewards=1793.9976990207908
Step=9296000, rewards=1923.2999964481128
Step=9297000, rewards=1925.1919875603076
Step=9298000, rewards=1906.2857712227897
Step=9299000, rewards=1981.6842192695008
Step=9300000, rewards=1922.5540712681463
Step=9301000, rewards=1926.1732950494027
Step=9302000, rewards=1926.9813761071018
Step=9303000, rewards=1878.5606937441783
Step=9304000, rewards=1924.8071669632802
Step=9305000, rewards=1988.065367513026
Step=9306000, rewards=1854.1945521764385
Step=9307000, rewards=1852.6395430168461
Step=9308000, rewards=1873.5636032277985
Step=9309000, rewards

Step=9486000, rewards=1864.83917016758
Step=9487000, rewards=1855.4594391319954
Step=9488000, rewards=1859.1930201479706
Step=9489000, rewards=1889.6046670098851
Step=9490000, rewards=1860.2788679160483
Step=9491000, rewards=1853.3149625548901
Step=9492000, rewards=1646.4651061146621
Step=9493000, rewards=1846.349048208214
Step=9494000, rewards=1756.4483877274993
Step=9495000, rewards=1949.6411078027788
Step=9496000, rewards=1796.4139981271721
Step=9497000, rewards=1803.0956810584028
Step=9498000, rewards=1752.4358807832848
Step=9499000, rewards=1825.5625415172262
Step=9500000, rewards=1948.3614770874583
Step=9501000, rewards=1819.9611526559927
Step=9502000, rewards=1824.49323925911
Step=9503000, rewards=1695.6001219324546
Step=9504000, rewards=1823.8770593115787
Step=9505000, rewards=1803.850176870776
Step=9506000, rewards=1657.0633985627285
Step=9507000, rewards=1753.9534612897075
Step=9508000, rewards=1840.9969519323704
Step=9509000, rewards=1845.4096760046514
Step=9510000, rewards=

Step=9687000, rewards=1941.8846141394688
Step=9688000, rewards=1671.727251086736
Step=9689000, rewards=1697.5262565735893
Step=9690000, rewards=1822.739248206988
Step=9691000, rewards=1857.350217828099
Step=9692000, rewards=1919.965089768814
Step=9693000, rewards=1877.4643664935224
Step=9694000, rewards=1912.9073653058874
Step=9695000, rewards=1880.0156157763527
Step=9696000, rewards=1783.467925552422
Step=9697000, rewards=1786.0628230697048
Step=9698000, rewards=1797.0587363898542
Step=9699000, rewards=1905.6608331913333
Step=9700000, rewards=1777.972963942107
Step=9701000, rewards=1856.0276444674187
Step=9702000, rewards=1879.09940249575
Step=9703000, rewards=1812.545765340197
Step=9704000, rewards=1878.4282419720264
Step=9705000, rewards=1817.4355205691002
Step=9706000, rewards=1906.4340088334998
Step=9707000, rewards=1948.5132248732725
Step=9708000, rewards=1704.5927979180863
Step=9709000, rewards=1847.4510249643984
Step=9710000, rewards=1743.6395822939428
Step=9711000, rewards=188

Step=9888000, rewards=1815.4155309462835
Step=9889000, rewards=1832.3433201097805
Step=9890000, rewards=1790.9362718994105
Step=9891000, rewards=1813.1711779523691
Step=9892000, rewards=1800.6837868911743
Step=9893000, rewards=1895.2832121942845
Step=9894000, rewards=1983.6980698858574
Step=9895000, rewards=1953.2888861236913
Step=9896000, rewards=1846.8994102927365
Step=9897000, rewards=1909.5825053524343
Step=9898000, rewards=1897.269151261001
Step=9899000, rewards=1883.8715644126632
Step=9900000, rewards=1861.2539564731755
Step=9901000, rewards=1752.0872163733923
Step=9902000, rewards=1806.3030987440682
Step=9903000, rewards=1833.3810390684205
Step=9904000, rewards=1896.6500209180413
Step=9905000, rewards=1842.3944361816102
Step=9906000, rewards=1828.4375634891219
Step=9907000, rewards=1878.4759132737602
Step=9908000, rewards=1974.8453635164146
Step=9909000, rewards=1888.6504838808967
Step=9910000, rewards=1874.1581581829594
Step=9911000, rewards=1907.2514873808086
Step=9912000, rew

Step=10086000, rewards=1858.0267973590578
Step=10087000, rewards=1844.4937474656429
Step=10088000, rewards=1827.5724072765006
Step=10089000, rewards=1870.996240091426
Step=10090000, rewards=1878.2538866751406
Step=10091000, rewards=1887.2332278996341
Step=10092000, rewards=1870.8306047956114
Step=10093000, rewards=1835.8932096824358
Step=10094000, rewards=1907.524100094957
Step=10095000, rewards=1896.8397887392018
Step=10096000, rewards=1968.613230488491
Step=10097000, rewards=1851.4043838775174
Step=10098000, rewards=1901.098727592526
Step=10099000, rewards=1901.1314348376816
Step=10100000, rewards=1894.6748672189417
Step=10101000, rewards=1732.2535311388717
Step=10102000, rewards=1883.105806247672
Step=10103000, rewards=1855.433287938408
Step=10104000, rewards=1945.5891908292972
Step=10105000, rewards=1743.64750357279
Step=10106000, rewards=1944.9192389042037
Step=10107000, rewards=1824.4325130467437
Step=10108000, rewards=1961.6489436262532
Step=10109000, rewards=1858.422370715466
S

Step=10282000, rewards=1888.8670528315058
Step=10283000, rewards=1904.9208999231143
Step=10284000, rewards=1787.3115064394349
Step=10285000, rewards=1874.494637795554
Step=10286000, rewards=1824.9337042167037
Step=10287000, rewards=1730.2714610567996
Step=10288000, rewards=1710.2299283471145
Step=10289000, rewards=1812.684460074043
Step=10290000, rewards=1772.399747790727
Step=10291000, rewards=1897.5808972056943
Step=10292000, rewards=1935.0809368634152
Step=10293000, rewards=1890.8895329702186
Step=10294000, rewards=1829.430408552211
Step=10295000, rewards=1839.6950994655601
Step=10296000, rewards=1899.9101098038857
Step=10297000, rewards=1865.335448399431
Step=10298000, rewards=1926.0033222150134
Step=10299000, rewards=1953.3193185948544
Step=10300000, rewards=1700.1673174458858
Step=10301000, rewards=1936.508844396384
Step=10302000, rewards=1882.7673880072805
Step=10303000, rewards=1865.4595339692046
Step=10304000, rewards=1800.4971783683225
Step=10305000, rewards=1932.923440202120

Step=10478000, rewards=1795.9442238485562
Step=10479000, rewards=1723.3487871815582
Step=10480000, rewards=1786.7950541165567
Step=10481000, rewards=1767.319725422549
Step=10482000, rewards=1770.980462458851
Step=10483000, rewards=1876.2967644486475
Step=10484000, rewards=1847.073475338127
Step=10485000, rewards=1852.5403353548243
Step=10486000, rewards=1751.2010704997606
Step=10487000, rewards=1835.797937584413
Step=10488000, rewards=1713.6168397914462
Step=10489000, rewards=1759.8109974984636
Step=10490000, rewards=1880.6932221202392
Step=10491000, rewards=1847.5481237243848
Step=10492000, rewards=1880.5040703020738
Step=10493000, rewards=1901.9362629511932
Step=10494000, rewards=1870.1179025393294
Step=10495000, rewards=1878.3925331447197
Step=10496000, rewards=1884.3673697037088
Step=10497000, rewards=1826.2106954380756
Step=10498000, rewards=1889.0946802829988
Step=10499000, rewards=1899.2173306038223
Step=10500000, rewards=1914.355938992135
Step=10501000, rewards=1853.75572035157

Step=10674000, rewards=1605.1578886708157
Step=10675000, rewards=1612.0039972771872
Step=10676000, rewards=1876.8435539748111
Step=10677000, rewards=1808.2999715694543
Step=10678000, rewards=1826.936927006032
Step=10679000, rewards=1934.0460310476558
Step=10680000, rewards=1905.415066399909
Step=10681000, rewards=1755.8194817123106
Step=10682000, rewards=1744.2967666617108
Step=10683000, rewards=1820.390347475367
Step=10684000, rewards=1839.3966512863074
Step=10685000, rewards=1832.5468192255082
Step=10686000, rewards=1832.7606286924452
Step=10687000, rewards=1868.0416552643294
Step=10688000, rewards=1877.7196652818322
Step=10689000, rewards=1754.2771058898754
Step=10690000, rewards=1835.393034502704
Step=10691000, rewards=1875.2831111806408
Step=10692000, rewards=1854.7707570880184
Step=10693000, rewards=1790.9863004328279
Step=10694000, rewards=1862.6703865058141
Step=10695000, rewards=1784.9349191700423
Step=10696000, rewards=1911.3513445306062
Step=10697000, rewards=1360.2797648219

Step=10870000, rewards=1745.434344089544
Step=10871000, rewards=1723.3845602236074
Step=10872000, rewards=1797.5612159843654
Step=10873000, rewards=1915.7677515453881
Step=10874000, rewards=1865.8931394021038
Step=10875000, rewards=1765.0606266923048
Step=10876000, rewards=1858.2333515905966
Step=10877000, rewards=1868.5015201431427
Step=10878000, rewards=1803.1874012524406
Step=10879000, rewards=1772.9616043478077
Step=10880000, rewards=1890.3712724356863
Step=10881000, rewards=1881.8777567857155
Step=10882000, rewards=1755.1045695557368
Step=10883000, rewards=1732.5471953187177
Step=10884000, rewards=1913.7912994596566
Step=10885000, rewards=1879.4644082874122
Step=10886000, rewards=1807.9942263252035
Step=10887000, rewards=1863.6836928973435
Step=10888000, rewards=1929.875272490711
Step=10889000, rewards=1746.6543227619509
Step=10890000, rewards=1832.8727669880611
Step=10891000, rewards=1750.3136244049053
Step=10892000, rewards=1825.7241326557005
Step=10893000, rewards=1771.49873283

Step=11066000, rewards=1868.618667714962
Step=11067000, rewards=1837.9969438663122
Step=11068000, rewards=1744.8944976041519
Step=11069000, rewards=1901.1769214207106
Step=11070000, rewards=1807.690685252187
Step=11071000, rewards=1836.4521644877948
Step=11072000, rewards=1829.0050207650818
Step=11073000, rewards=1884.8577798126069
Step=11074000, rewards=1863.4531465898997
Step=11075000, rewards=1945.05397460458
Step=11076000, rewards=1885.7273803755697
Step=11077000, rewards=1817.350138633241
Step=11078000, rewards=1860.1701486557615
Step=11079000, rewards=1924.56704452866
Step=11080000, rewards=1734.8276187585882
Step=11081000, rewards=1888.5231425188379
Step=11082000, rewards=1909.453313568002
Step=11083000, rewards=1854.3511056894856
Step=11084000, rewards=1819.7777359764725
Step=11085000, rewards=1694.3019029030568
Step=11086000, rewards=1894.6192297828682
Step=11087000, rewards=1867.2718094242402
Step=11088000, rewards=1726.0166694432246
Step=11089000, rewards=1833.0704198305853


Step=11262000, rewards=1919.3605474972276
Step=11263000, rewards=1866.985880177737
Step=11264000, rewards=1863.2392843022176
Step=11265000, rewards=1922.5002012185946
Step=11266000, rewards=1879.1139368955378
Step=11267000, rewards=1875.8975435478899
Step=11268000, rewards=1805.3372182553967
Step=11269000, rewards=1760.6958667971262
Step=11270000, rewards=1876.8915068099977
Step=11271000, rewards=1804.753449155278
Step=11272000, rewards=1801.566292234849
Step=11273000, rewards=1862.6006788092607
Step=11274000, rewards=1876.1053458386475
Step=11275000, rewards=1872.808008461738
Step=11276000, rewards=1809.4779946037381
Step=11277000, rewards=1856.9463321293667
Step=11278000, rewards=1847.2346613328914
Step=11279000, rewards=1810.9327783939339
Step=11280000, rewards=1696.8711897993203
Step=11281000, rewards=1710.9506518700705
Step=11282000, rewards=1855.41599974295
Step=11283000, rewards=1851.9128790009981
Step=11284000, rewards=1767.9051203245467
Step=11285000, rewards=1800.026714826772

Step=11458000, rewards=1831.5114870016225
Step=11459000, rewards=1820.153586010926
Step=11460000, rewards=1895.222878121926
Step=11461000, rewards=2004.758521553931
Step=11462000, rewards=1901.697829438681
Step=11463000, rewards=1898.8535207972325
Step=11464000, rewards=1958.1463185991654
Step=11465000, rewards=1886.8138029301513
Step=11466000, rewards=1922.9606744549985
Step=11467000, rewards=1840.602835709121
Step=11468000, rewards=1857.0494334789128
Step=11469000, rewards=1784.122593245377
Step=11470000, rewards=1853.5981498475805
Step=11471000, rewards=1845.2106435757241
Step=11472000, rewards=1905.891406741142
Step=11473000, rewards=1807.8324209540817
Step=11474000, rewards=1778.636558829026
Step=11475000, rewards=1868.542700470602
Step=11476000, rewards=1909.6485928712068
Step=11477000, rewards=1814.3115099470592
Step=11478000, rewards=1699.2531489984153
Step=11479000, rewards=1871.337479979352
Step=11480000, rewards=1837.615228601443
Step=11481000, rewards=1727.0506637666542
Ste

Step=11654000, rewards=1765.1965316971891
Step=11655000, rewards=1797.0801952415463
Step=11656000, rewards=1749.1522998509824
Step=11657000, rewards=1940.1213720195503
Step=11658000, rewards=1895.401463844896
Step=11659000, rewards=1775.3841394785832
Step=11660000, rewards=1746.4015090203595
Step=11661000, rewards=1904.6715392824383
Step=11662000, rewards=1847.8239803593467
Step=11663000, rewards=1818.6459472734177
Step=11664000, rewards=1729.6451386355154
Step=11665000, rewards=1821.3311964985596
Step=11666000, rewards=1877.4507884578065
Step=11667000, rewards=1796.8521476236385
Step=11668000, rewards=1637.3172606169808
Step=11669000, rewards=1757.597584891791
Step=11670000, rewards=1865.5051689567797
Step=11671000, rewards=1769.5488349203683
Step=11672000, rewards=1909.8734725814013
Step=11673000, rewards=1721.9134855474795
Step=11674000, rewards=1855.5326849963328
Step=11675000, rewards=1958.7580935323706
Step=11676000, rewards=1903.8959485430275
Step=11677000, rewards=1874.69599475

Step=11849000, rewards=1906.1617233254185
Step=11850000, rewards=1755.4673936906188
Step=11851000, rewards=1854.2417060667408
Step=11852000, rewards=1854.5390009908053
Step=11853000, rewards=1990.6302672238826
Step=11854000, rewards=1942.9390594980914
Step=11855000, rewards=1907.5743762703028
Step=11856000, rewards=1783.93700754839
Step=11857000, rewards=1906.2252585891922
Step=11858000, rewards=1900.1916302501281
Step=11859000, rewards=1802.6549580403343
Step=11860000, rewards=1860.820188644594
Step=11861000, rewards=1943.0682999912897
Step=11862000, rewards=1806.1101569919474
Step=11863000, rewards=1952.9977656649198
Step=11864000, rewards=1712.6016949088732
Step=11865000, rewards=1696.2032121489449
Step=11866000, rewards=1913.6292046361455
Step=11867000, rewards=1859.3429989356928
Step=11868000, rewards=1816.8192462085879
Step=11869000, rewards=1958.6905933787866
Step=11870000, rewards=1893.142360570916
Step=11871000, rewards=1878.3684312999092
Step=11872000, rewards=1929.1000958379

Step=12044000, rewards=1966.3129204918887
Step=12045000, rewards=1949.9696649738496
Step=12046000, rewards=1867.2599521943873
Step=12047000, rewards=1728.662511384961
Step=12048000, rewards=1920.882046304841
Step=12049000, rewards=1878.9175982693785
Step=12050000, rewards=1882.0052839432472
Step=12051000, rewards=1858.444354573618
Step=12052000, rewards=1868.6350171315867
Step=12053000, rewards=1828.340962731353
Step=12054000, rewards=1861.8035326378395
Step=12055000, rewards=1919.0944680570067
Step=12056000, rewards=1879.5106701702084
Step=12057000, rewards=1921.3878044070407
Step=12058000, rewards=1913.140320161607
Step=12059000, rewards=1882.676125862748
Step=12060000, rewards=1969.2886486271764
Step=12061000, rewards=1841.760265682752
Step=12062000, rewards=1751.9212382462767
Step=12063000, rewards=1958.2953299351414
Step=12064000, rewards=1660.084474002651
Step=12065000, rewards=1930.2846227329142
Step=12066000, rewards=1953.4057688250455
Step=12067000, rewards=1916.0956570983817


Step=12240000, rewards=1837.5683765851743
Step=12241000, rewards=1819.0955876923608
Step=12242000, rewards=1906.371879209155
Step=12243000, rewards=1972.5440738794418
Step=12244000, rewards=1873.5785427516503
Step=12245000, rewards=1877.7627292917325
Step=12246000, rewards=1799.9943307354881
Step=12247000, rewards=1864.8213190510119
Step=12248000, rewards=1844.3741939375793
Step=12249000, rewards=1888.4606119635093
Step=12250000, rewards=1959.2715195452197
Step=12251000, rewards=1791.3646518074822
Step=12252000, rewards=1680.6357509543918
Step=12253000, rewards=1803.577435662103
Step=12254000, rewards=1925.2884741848789
Step=12255000, rewards=1931.639184143389
Step=12256000, rewards=1866.4188016805801
Step=12257000, rewards=1962.167733125725
Step=12258000, rewards=1832.7289435892028
Step=12259000, rewards=1990.488230985002
Step=12260000, rewards=1921.4338921388533
Step=12261000, rewards=1880.5182635149692
Step=12262000, rewards=1958.4421387235682
Step=12263000, rewards=1731.13552816480

Step=12435000, rewards=1943.760352545388
Step=12436000, rewards=1885.5109982939348
Step=12437000, rewards=1857.2194179934272
Step=12438000, rewards=1847.9722806109064
Step=12439000, rewards=1890.7984990623765
Step=12440000, rewards=1842.5949148714185
Step=12441000, rewards=1890.9121304226871
Step=12442000, rewards=1914.617452874112
Step=12443000, rewards=1977.7523706529223
Step=12444000, rewards=1895.8205025412813
Step=12445000, rewards=1751.964993745778
Step=12446000, rewards=1876.6534536844545
Step=12447000, rewards=1780.68043120879
Step=12448000, rewards=1920.7298186586386
Step=12449000, rewards=1831.6592725998319
Step=12450000, rewards=1863.94785203924
Step=12451000, rewards=1847.5412123802926
Step=12452000, rewards=1830.2618195921013
Step=12453000, rewards=1985.779213742509
Step=12454000, rewards=1741.0247635846358
Step=12455000, rewards=1785.471009865844
Step=12456000, rewards=1905.6439550601121
Step=12457000, rewards=1875.8195530588137
Step=12458000, rewards=1855.292577093116
St

Step=12631000, rewards=1943.503025728361
Step=12632000, rewards=1808.4844180672687
Step=12633000, rewards=1913.9715200891815
Step=12634000, rewards=1891.5261009447504
Step=12635000, rewards=1931.0135219136566
Step=12636000, rewards=1871.6369206328566
Step=12637000, rewards=1730.7077750748304
Step=12638000, rewards=1853.770803121592
Step=12639000, rewards=1921.435880991036
Step=12640000, rewards=1860.4982744341312
Step=12641000, rewards=1887.9000840268266
Step=12642000, rewards=1730.3157770521466
Step=12643000, rewards=1952.1691237095854
Step=12644000, rewards=1938.6952340254297
Step=12645000, rewards=1863.4044006427025
Step=12646000, rewards=1711.4310228514842
Step=12647000, rewards=1945.0142630982662
Step=12648000, rewards=1941.2701878818098
Step=12649000, rewards=1866.1781090692489
Step=12650000, rewards=1951.5073583221858
Step=12651000, rewards=1933.3742801275885
Step=12652000, rewards=1947.3923017662748
Step=12653000, rewards=1915.4526565264255
Step=12654000, rewards=1989.024753536

Step=12827000, rewards=1900.3905985946267
Step=12828000, rewards=1819.5121830633445
Step=12829000, rewards=1884.8251173251974
Step=12830000, rewards=1843.5965007054253
Step=12831000, rewards=1803.131765252061
Step=12832000, rewards=1920.3966977000257
Step=12833000, rewards=1878.074836734477
Step=12834000, rewards=1903.3603725722392
Step=12835000, rewards=1902.1284010149682
Step=12836000, rewards=1787.2629273426323
Step=12837000, rewards=1623.342997584014
Step=12838000, rewards=1874.4154329404628
Step=12839000, rewards=1713.0515641127156
Step=12840000, rewards=1837.4672094790947
Step=12841000, rewards=1804.7389453884325
Step=12842000, rewards=1856.3885311041845
Step=12843000, rewards=1861.371027005634
Step=12844000, rewards=1819.7697303675418
Step=12845000, rewards=1888.1283877514131
Step=12846000, rewards=1767.4973977543452
Step=12847000, rewards=1655.341690281351
Step=12848000, rewards=1814.2376759119868
Step=12849000, rewards=1867.2942758202937
Step=12850000, rewards=1937.83223816958

Step=13023000, rewards=1768.153861987928
Step=13024000, rewards=1809.8561613757086
Step=13025000, rewards=1832.9057976417455
Step=13026000, rewards=1811.923633643095
Step=13027000, rewards=1726.2071746809004
Step=13028000, rewards=1821.5025039727386
Step=13029000, rewards=1845.2977275870269
Step=13030000, rewards=1929.613816825089
Step=13031000, rewards=1867.3782282972586
Step=13032000, rewards=1932.251626515338
Step=13033000, rewards=1992.9283853064817
Step=13034000, rewards=1885.9987403915247
Step=13035000, rewards=1839.8652156387561
Step=13036000, rewards=1758.198450145058
Step=13037000, rewards=2011.603476610881
Step=13038000, rewards=1930.9277625882228
Step=13039000, rewards=1922.6365177592274
Step=13040000, rewards=1578.500360693349
Step=13041000, rewards=1967.4950262271284
Step=13042000, rewards=1904.3847369612479
Step=13043000, rewards=1836.917885281804
Step=13044000, rewards=1739.4786024044236
Step=13045000, rewards=1875.5039546966193
Step=13046000, rewards=1713.6382952644565


Step=13219000, rewards=1849.4178924781188
Step=13220000, rewards=1928.470745025137
Step=13221000, rewards=1900.9731277222204
Step=13222000, rewards=1853.975663842383
Step=13223000, rewards=1912.1278293693188
Step=13224000, rewards=1913.5873265981706
Step=13225000, rewards=1959.935911705694
Step=13226000, rewards=1884.1997868651101
Step=13227000, rewards=1809.1556498484154
Step=13228000, rewards=1942.5805328566726
Step=13229000, rewards=1933.1692632584236
Step=13230000, rewards=1860.1622682490543
Step=13231000, rewards=1821.971396938451
Step=13232000, rewards=1937.1657137094755
Step=13233000, rewards=1861.0665695199973
Step=13234000, rewards=1935.0393737181153
Step=13235000, rewards=1913.3357314430204
Step=13236000, rewards=1950.2915851035877
Step=13237000, rewards=1802.496609731017
Step=13238000, rewards=1911.3786318462433
Step=13239000, rewards=1927.5487847225536
Step=13240000, rewards=1830.9489870981818
Step=13241000, rewards=1918.77337385458
Step=13242000, rewards=1749.49137818942
S

Step=13415000, rewards=1788.8213654759018
Step=13416000, rewards=1859.0410491398707
Step=13417000, rewards=1839.6375636371986
Step=13418000, rewards=1844.9144603819057
Step=13419000, rewards=1854.4843018794222
Step=13420000, rewards=1879.0493124352272
Step=13421000, rewards=1748.3739396592957
Step=13422000, rewards=1845.0427405419966
Step=13423000, rewards=1912.146417399507
Step=13424000, rewards=1962.5836802814542
Step=13425000, rewards=1894.2936426957983
Step=13426000, rewards=1942.0358541366727
Step=13427000, rewards=1800.091970683146
Step=13428000, rewards=1935.184670887937
Step=13429000, rewards=1870.4193044503074
Step=13430000, rewards=1958.0843448128842
Step=13431000, rewards=1927.80431998118
Step=13432000, rewards=1860.2617908906345
Step=13433000, rewards=1827.201641077537
Step=13434000, rewards=1878.4145534555387
Step=13435000, rewards=1858.0751715979004
Step=13436000, rewards=1874.868454144515
Step=13437000, rewards=1879.0891677881348
Step=13438000, rewards=1908.8315033494212

Step=13611000, rewards=1856.2009152634334
Step=13612000, rewards=1919.0302094203216
Step=13613000, rewards=1879.3492484367944
Step=13614000, rewards=1994.0533925488717
Step=13615000, rewards=1985.3599201322459
Step=13616000, rewards=1854.6493959839295
Step=13617000, rewards=1919.3666655869602
Step=13618000, rewards=1958.8499475455742
Step=13619000, rewards=1864.1787925643237
Step=13620000, rewards=1837.5332463462958
Step=13621000, rewards=1878.5796393786054
Step=13622000, rewards=1765.08083549562
Step=13623000, rewards=1891.6647016315587
Step=13624000, rewards=1910.22058648919
Step=13625000, rewards=1919.9590291399281
Step=13626000, rewards=1953.5452249924276
Step=13627000, rewards=1917.1386885764641
Step=13628000, rewards=1922.1933844404052
Step=13629000, rewards=1838.805214879564
Step=13630000, rewards=1856.1053043744066
Step=13631000, rewards=1854.612878325638
Step=13632000, rewards=1869.5163784317604
Step=13633000, rewards=1970.5886442469769
Step=13634000, rewards=1952.644666136605

Step=13807000, rewards=1879.8211008364904
Step=13808000, rewards=1857.0912068741648
Step=13809000, rewards=1879.7485646044663
Step=13810000, rewards=1963.272714144813
Step=13811000, rewards=2003.1071131148528
Step=13812000, rewards=1956.1495008790387
Step=13813000, rewards=1752.9377532487945
Step=13814000, rewards=1881.196611175845
Step=13815000, rewards=1962.4841400044763
Step=13816000, rewards=2056.2047509490812
Step=13817000, rewards=1872.1269182140363
Step=13818000, rewards=1925.203693422941
Step=13819000, rewards=1923.6697285737384
Step=13820000, rewards=1798.4134252241035
Step=13821000, rewards=1938.3784068175203
Step=13822000, rewards=1980.462991888168
Step=13823000, rewards=1898.5948332759674
Step=13824000, rewards=1939.2511308023002
Step=13825000, rewards=1909.80119842259
Step=13826000, rewards=1770.2788079153493
Step=13827000, rewards=1930.6952653105625
Step=13828000, rewards=1971.1587351212363
Step=13829000, rewards=1885.0046898855248
Step=13830000, rewards=1895.776789308845

Step=14003000, rewards=1887.1394450097139
Step=14004000, rewards=1781.8111372986639
Step=14005000, rewards=1777.8596702877462
Step=14006000, rewards=1853.9483767914112
Step=14007000, rewards=1871.5334268447784
Step=14008000, rewards=1872.1656911359187
Step=14009000, rewards=1883.16167991112
Step=14010000, rewards=1738.9200436940669
Step=14011000, rewards=1820.3338069097306
Step=14012000, rewards=1759.1932960740382
Step=14013000, rewards=1916.4931764267847
Step=14014000, rewards=1804.496838531044
Step=14015000, rewards=1922.1023592851875
Step=14016000, rewards=1922.1908134140865
Step=14017000, rewards=1877.4208532178334
Step=14018000, rewards=1907.9001183965372
Step=14019000, rewards=1673.786271805099
Step=14020000, rewards=1712.7500682008247
Step=14021000, rewards=1920.1919161926169
Step=14022000, rewards=1876.9833038454324
Step=14023000, rewards=1916.9575928287966
Step=14024000, rewards=1927.885043239716
Step=14025000, rewards=1813.0097823508465
Step=14026000, rewards=1783.20142397473

Step=14199000, rewards=1789.0584033510827
Step=14200000, rewards=1817.978439010795
Step=14201000, rewards=1808.9939079050034
Step=14202000, rewards=1903.708819321432
Step=14203000, rewards=1826.6178562800778
Step=14204000, rewards=1803.8791106937565
Step=14205000, rewards=1968.8449109353257
Step=14206000, rewards=1882.7273119556462
Step=14207000, rewards=1736.930702044526
Step=14208000, rewards=1746.9930755606583
Step=14209000, rewards=1791.7897153149077
Step=14210000, rewards=1828.0374846350724
Step=14211000, rewards=1886.7013943612649
Step=14212000, rewards=1855.0903751929636
Step=14213000, rewards=1886.8603004185175
Step=14214000, rewards=1932.1992995453802
Step=14215000, rewards=1923.181798157692
Step=14216000, rewards=1740.6255054440019
Step=14217000, rewards=1709.9490858703791
Step=14218000, rewards=1843.9466215498703
Step=14219000, rewards=1878.7108255929998
Step=14220000, rewards=1827.232365782369
Step=14221000, rewards=1943.3104085883158
Step=14222000, rewards=1835.42657326410

Step=14395000, rewards=1924.0290924191493
Step=14396000, rewards=1928.0405401784908
Step=14397000, rewards=1945.1971708838776
Step=14398000, rewards=1876.4272513695687
Step=14399000, rewards=1684.9239035410626
Step=14400000, rewards=1949.9942793622088
Step=14401000, rewards=1904.1827957580829
Step=14402000, rewards=2009.6072855439904
Step=14403000, rewards=1910.608205251737
Step=14404000, rewards=1925.7141324001504
Step=14405000, rewards=1978.3315523533506
Step=14406000, rewards=1940.86287056443
Step=14407000, rewards=1815.1980866988163
Step=14408000, rewards=1848.5319926189004
Step=14409000, rewards=1902.823744860648
Step=14410000, rewards=1794.5579482728192
Step=14411000, rewards=1945.1911393469238
Step=14412000, rewards=1994.947863679054
Step=14413000, rewards=1885.8243461390186
Step=14414000, rewards=1806.3941294141002
Step=14415000, rewards=1810.230778190475
Step=14416000, rewards=1762.52117878698
Step=14417000, rewards=1895.6261963617067
Step=14418000, rewards=1856.0625321450598


Step=14591000, rewards=1897.9957199683602
Step=14592000, rewards=1890.5989929172474
Step=14593000, rewards=1863.4779990501659
Step=14594000, rewards=1846.0894421037847
Step=14595000, rewards=1906.8371796813333
Step=14596000, rewards=1973.8284119391626
Step=14597000, rewards=1920.0854431419034
Step=14598000, rewards=1901.8224028225438
Step=14599000, rewards=1839.347891680249
Step=14600000, rewards=1862.5186304377949
Step=14601000, rewards=1900.6069922936188
Step=14602000, rewards=1756.893975624322
Step=14603000, rewards=1861.0660811441064
Step=14604000, rewards=1775.3012978228737
Step=14605000, rewards=1889.591677329079
Step=14606000, rewards=1834.4052519203801
Step=14607000, rewards=1856.608539111789
Step=14608000, rewards=1953.6386507981
Step=14609000, rewards=1861.543932390679
Step=14610000, rewards=1885.6788578069052
Step=14611000, rewards=1742.7668847706937
Step=14612000, rewards=1894.702991429936
Step=14613000, rewards=1831.672893128236
Step=14614000, rewards=1770.262870818761
Ste

Step=14787000, rewards=1830.4804910850796
Step=14788000, rewards=1937.9896892009342
Step=14789000, rewards=1745.1313102781096
Step=14790000, rewards=1903.782998239665
Step=14791000, rewards=1767.9117618189537
Step=14792000, rewards=1902.393089446796
Step=14793000, rewards=1847.8896621780948
Step=14794000, rewards=1825.7826721913937
Step=14795000, rewards=1893.8886566732242
Step=14796000, rewards=1802.757600939744
Step=14797000, rewards=1782.277573559329
Step=14798000, rewards=1822.9694564844197
Step=14799000, rewards=1942.2675130961327
Step=14800000, rewards=1839.9214682265413
Step=14801000, rewards=1738.5004582821305
Step=14802000, rewards=1842.297489497435
Step=14803000, rewards=1817.9233719448926
Step=14804000, rewards=1812.678146872503
Step=14805000, rewards=1674.8958820343191
Step=14806000, rewards=1689.6057709753259
Step=14807000, rewards=1881.8337343262438
Step=14808000, rewards=1823.9065027243375
Step=14809000, rewards=1902.021147584315
Step=14810000, rewards=1869.6518502593328

Step=14983000, rewards=1730.1005816420798
Step=14984000, rewards=1849.607831455329
Step=14985000, rewards=1789.40661505091
Step=14986000, rewards=1854.9672865004025
Step=14987000, rewards=1887.9585084402618
Step=14988000, rewards=1800.8520676960952
Step=14989000, rewards=1804.956684982503
Step=14990000, rewards=1796.4312012351652
Step=14991000, rewards=1770.41080168396
Step=14992000, rewards=1901.35704169528
Step=14993000, rewards=1841.91045263678
Step=14994000, rewards=1822.7504542361048
Step=14995000, rewards=1810.4432797479112
Step=14996000, rewards=1879.5342325359848
Step=14997000, rewards=1713.2226924676088
Step=14998000, rewards=1828.6694092787507
Step=14999000, rewards=1680.5327229150646
Step=15000000, rewards=1850.3790233220066
Step=15001000, rewards=1822.345961528747
Step=15002000, rewards=1795.2062119279465
Step=15003000, rewards=1885.1183774607634
Step=15004000, rewards=1877.014524755009
Step=15005000, rewards=1783.2672946487148
Step=15006000, rewards=1726.2306090872082
Step

Step=15179000, rewards=1833.247901447883
Step=15180000, rewards=1652.9315498224637
Step=15181000, rewards=1849.061067247449
Step=15182000, rewards=1961.395138578164
Step=15183000, rewards=1800.8060858308088
Step=15184000, rewards=1887.793664840734
Step=15185000, rewards=1902.1209411067925
Step=15186000, rewards=1838.6716901541556
Step=15187000, rewards=1704.4360811688568
Step=15188000, rewards=1840.1965199332576
Step=15189000, rewards=1868.2073118308329
Step=15190000, rewards=1783.3752563671649
Step=15191000, rewards=1826.123723509512
Step=15192000, rewards=1974.9727469827935
Step=15193000, rewards=1954.91144894901
Step=15194000, rewards=1909.9448219601716
Step=15195000, rewards=1864.4814455080927
Step=15196000, rewards=1774.118251671805
Step=15197000, rewards=1768.2544423467584
Step=15198000, rewards=1818.897422068791
Step=15199000, rewards=1870.7931838783625
Step=15200000, rewards=1851.5991328585449
Step=15201000, rewards=1875.7763050809006
Step=15202000, rewards=1835.1196327826117
S

Step=15375000, rewards=1921.8452707030062
Step=15376000, rewards=1974.8904177805662
Step=15377000, rewards=1839.7744570415227
Step=15378000, rewards=1849.5432866632893
Step=15379000, rewards=1870.764027701479
Step=15380000, rewards=1998.1015285736078
Step=15381000, rewards=1826.0335454267927
Step=15382000, rewards=1905.6591155477547
Step=15383000, rewards=1869.8496207297771
Step=15384000, rewards=1983.4345884885684
Step=15385000, rewards=1737.1428520274708
Step=15386000, rewards=1930.5780849148405
Step=15387000, rewards=1898.2286198104655
Step=15388000, rewards=1959.895908811542
Step=15389000, rewards=1697.876341067754
Step=15390000, rewards=1850.7145034645632
Step=15391000, rewards=1987.841377388975
Step=15392000, rewards=1584.220761965711
Step=15393000, rewards=1965.295539886975
Step=15394000, rewards=1856.697716073312
Step=15395000, rewards=1817.2561210373449
Step=15396000, rewards=1816.7743570061425
Step=15397000, rewards=1951.2790177676534
Step=15398000, rewards=1884.1115611545147

Step=15571000, rewards=1925.7831854158987
Step=15572000, rewards=1927.152489410722
Step=15573000, rewards=2056.7913502714187
Step=15574000, rewards=1959.3795219263488
Step=15575000, rewards=1936.557552839932
Step=15576000, rewards=1848.3679634787313
Step=15577000, rewards=1830.9831083519894
Step=15578000, rewards=1881.4706365110835
Step=15579000, rewards=1860.8673552715632
Step=15580000, rewards=1792.5120742365539
Step=15581000, rewards=1909.9763679125927
Step=15582000, rewards=1929.0050628876825
Step=15583000, rewards=1936.4775807941644
Step=15584000, rewards=1948.6588561212225
Step=15585000, rewards=1840.5182455009501
Step=15586000, rewards=1892.3365071827568
Step=15587000, rewards=1980.327189396922
Step=15588000, rewards=1892.3589561011954
Step=15589000, rewards=1829.873377863047
Step=15590000, rewards=1842.1535608545103
Step=15591000, rewards=1839.929190534449
Step=15592000, rewards=1847.7962912076105
Step=15593000, rewards=1909.5117243211066
Step=15594000, rewards=1850.21009379686

Step=15767000, rewards=1841.6252793842007
Step=15768000, rewards=1825.1393247724066
Step=15769000, rewards=1917.706801415755
Step=15770000, rewards=1991.0107936096922
Step=15771000, rewards=1969.9291018004944
Step=15772000, rewards=1975.5123068680136
Step=15773000, rewards=1999.3304228521572
Step=15774000, rewards=1784.3690655871615
Step=15775000, rewards=1855.6776800833777
Step=15776000, rewards=1880.9877409473627
Step=15777000, rewards=1870.5413503429174
Step=15778000, rewards=1824.0781964671198
Step=15779000, rewards=1819.0360258740359
Step=15780000, rewards=1997.152334864244
Step=15781000, rewards=1856.8704829467217
Step=15782000, rewards=1929.2065468534643
Step=15783000, rewards=1895.030058695831
Step=15784000, rewards=1849.8818309301935
Step=15785000, rewards=1841.9706838359714
Step=15786000, rewards=1891.5524309806003
Step=15787000, rewards=1846.4618156052313
Step=15788000, rewards=1862.7247428963858
Step=15789000, rewards=1712.0802959797095
Step=15790000, rewards=1913.817163738

Step=15963000, rewards=1950.8190470885422
Step=15964000, rewards=1835.0211125239691
Step=15965000, rewards=1925.5044480731945
Step=15966000, rewards=1918.5368157243358
Step=15967000, rewards=1887.771533484453
Step=15968000, rewards=1992.5753525015816
Step=15969000, rewards=1685.7541083232538
Step=15970000, rewards=1877.9918000097666
Step=15971000, rewards=1914.9911137339714
Step=15972000, rewards=1992.4192970868908
Step=15973000, rewards=1992.745530091756
Step=15974000, rewards=1970.8556236763638
Step=15975000, rewards=1913.2203022291576
Step=15976000, rewards=1880.5013577622767
Step=15977000, rewards=1985.791153495092
Step=15978000, rewards=1959.5659703888111
Step=15979000, rewards=1820.1007298988532
Step=15980000, rewards=1873.0930597640745
Step=15981000, rewards=1688.254161691558
Step=15982000, rewards=1937.0173800154464
Step=15983000, rewards=1955.4024112008378
Step=15984000, rewards=2001.1679730194087
Step=15985000, rewards=1784.3856282390707
Step=15986000, rewards=1924.3897732418

Step=16158000, rewards=1838.0118371805954
Step=16159000, rewards=1889.7361500634872
Step=16160000, rewards=1885.0515322032197
Step=16161000, rewards=1944.7946617777109
Step=16162000, rewards=1854.9777454836922
Step=16163000, rewards=1937.365997546364
Step=16164000, rewards=1812.7302794413679
Step=16165000, rewards=1865.6157389016773
Step=16166000, rewards=1899.176692662437
Step=16167000, rewards=1928.2720126540057
Step=16168000, rewards=1740.2694254081
Step=16169000, rewards=1735.3888312342615
Step=16170000, rewards=1936.843411134966
Step=16171000, rewards=1921.7018667855205
Step=16172000, rewards=1808.214603914495
Step=16173000, rewards=1869.5003566814119
Step=16174000, rewards=1913.1251573352836
Step=16175000, rewards=1850.8863388700815
Step=16176000, rewards=1845.3537836459684
Step=16177000, rewards=1812.909161077954
Step=16178000, rewards=1960.8173019941526
Step=16179000, rewards=1925.199364867297
Step=16180000, rewards=1913.134084645591
Step=16181000, rewards=1875.8439558610753
St

Step=16354000, rewards=1907.8846612484
Step=16355000, rewards=1831.4963797031044
Step=16356000, rewards=1921.3665504125775
Step=16357000, rewards=1860.4210673014934
Step=16358000, rewards=1951.3172184392783
Step=16359000, rewards=1887.0892146173885
Step=16360000, rewards=1886.061219380459
Step=16361000, rewards=1852.3978367079558
Step=16362000, rewards=1871.8009934518986
Step=16363000, rewards=1927.9455529394609
Step=16364000, rewards=1845.9382555956993
Step=16365000, rewards=1828.9090112729693
Step=16366000, rewards=1890.906464533998
Step=16367000, rewards=1729.2128672842562
Step=16368000, rewards=1849.4757405732355
Step=16369000, rewards=1831.3348095000185
Step=16370000, rewards=1927.4172640783218
Step=16371000, rewards=1965.7673714536234
Step=16372000, rewards=1981.8456920589836
Step=16373000, rewards=1952.144984424073
Step=16374000, rewards=1810.790026794542
Step=16375000, rewards=2010.9732453398813
Step=16376000, rewards=1922.5643145582058
Step=16377000, rewards=1939.0469040717303

Step=16550000, rewards=1945.4732603625337
Step=16551000, rewards=1919.1939736648226
Step=16552000, rewards=1914.2920376846812
Step=16553000, rewards=1861.900903764983
Step=16554000, rewards=1755.9702821935946
Step=16555000, rewards=1947.150128683845
Step=16556000, rewards=1892.9587212245003
Step=16557000, rewards=1935.0100506745955
Step=16558000, rewards=1997.9868001082582
Step=16559000, rewards=1914.9229118690525
Step=16560000, rewards=2019.0494312206163
Step=16561000, rewards=1849.7886488733295
Step=16562000, rewards=1878.3205895878689
Step=16563000, rewards=1892.0428462068621
Step=16564000, rewards=1922.6518058974796
Step=16565000, rewards=1858.673020011235
Step=16566000, rewards=1993.1818740016777
Step=16567000, rewards=1874.1217903323384
Step=16568000, rewards=1887.0097312009348
Step=16569000, rewards=1904.7955745596596
Step=16570000, rewards=1934.987054748196
Step=16571000, rewards=1868.9266856213928
Step=16572000, rewards=1919.1614838306857
Step=16573000, rewards=1928.3603717878

Step=16746000, rewards=1941.6690379182023
Step=16747000, rewards=1828.8222370498177
Step=16748000, rewards=1918.0825705893162
Step=16749000, rewards=1897.6323741352974
Step=16750000, rewards=1931.4324315034219
Step=16751000, rewards=1870.8966932762225
Step=16752000, rewards=1727.5151571001338
Step=16753000, rewards=1954.3127652570845
Step=16754000, rewards=1953.4712469254052
Step=16755000, rewards=1791.2007814439953
Step=16756000, rewards=1944.0470892837795
Step=16757000, rewards=1912.9336164688623
Step=16758000, rewards=1804.5879385567919
Step=16759000, rewards=1909.3123860619446
Step=16760000, rewards=1944.0569446045183
Step=16761000, rewards=1884.6902658357374
Step=16762000, rewards=2007.1078689251847
Step=16763000, rewards=1967.602260077469
Step=16764000, rewards=1839.8141260540485
Step=16765000, rewards=1926.8493765611936
Step=16766000, rewards=1870.1468101699788
Step=16767000, rewards=1895.4372012876702
Step=16768000, rewards=1822.6249490684572
Step=16769000, rewards=1881.8232987

Step=16942000, rewards=1855.3771672599144
Step=16943000, rewards=1874.6896459436584
Step=16944000, rewards=1862.2729958660739
Step=16945000, rewards=1712.9960570473486
Step=16946000, rewards=1798.188675361244
Step=16947000, rewards=1810.4764495437753
Step=16948000, rewards=1843.8903664144564
Step=16949000, rewards=1772.907777500451
Step=16950000, rewards=1934.6272647670366
Step=16951000, rewards=1826.835090100304
Step=16952000, rewards=1884.3678582676823
Step=16953000, rewards=1874.794769818868
Step=16954000, rewards=1980.8345913602298
Step=16955000, rewards=1878.6515323018737
Step=16956000, rewards=1856.9424657623847
Step=16957000, rewards=1899.6147734959804
Step=16958000, rewards=1777.341696982813
Step=16959000, rewards=1869.9534450522974
Step=16960000, rewards=1860.2151271508544
Step=16961000, rewards=1815.3435521622787
Step=16962000, rewards=1905.1767878959456
Step=16963000, rewards=1794.4965021186977
Step=16964000, rewards=1874.1050603033584
Step=16965000, rewards=1887.26112834131

Step=17138000, rewards=1863.58825722264
Step=17139000, rewards=1869.975157057416
Step=17140000, rewards=1989.3093162599885
Step=17141000, rewards=1874.5045753674963
Step=17142000, rewards=1791.0304998070142
Step=17143000, rewards=1946.1808592127359
Step=17144000, rewards=1977.757735782212
Step=17145000, rewards=1884.6480698257656
Step=17146000, rewards=1896.9812230167472
Step=17147000, rewards=1933.442535809512
Step=17148000, rewards=1863.841635921805
Step=17149000, rewards=1864.9901070186445
Step=17150000, rewards=1934.0043932745043
Step=17151000, rewards=1940.1153438344336
Step=17152000, rewards=1901.867395351439
Step=17153000, rewards=1882.009543686079
Step=17154000, rewards=1982.179132061452
Step=17155000, rewards=1892.4498940129515
Step=17156000, rewards=1860.3630994344096
Step=17157000, rewards=1930.4004463181611
Step=17158000, rewards=1984.6762552988453
Step=17159000, rewards=1917.3303327948286
Step=17160000, rewards=1963.3878169610018
Step=17161000, rewards=1974.9249727715471
S

Step=17334000, rewards=1906.5902069849024
Step=17335000, rewards=1919.6698575187268
Step=17336000, rewards=1879.093925994168
Step=17337000, rewards=1843.165024881721
Step=17338000, rewards=1810.9250588231573
Step=17339000, rewards=1883.4301948434788
Step=17340000, rewards=1937.38076669864
Step=17341000, rewards=1899.576648175825
Step=17342000, rewards=1869.4368567565978
Step=17343000, rewards=1886.0749962430461
Step=17344000, rewards=1849.0962784810617
Step=17345000, rewards=1989.1998410995764
Step=17346000, rewards=1887.728859227164
Step=17347000, rewards=1924.8091943166698
Step=17348000, rewards=1854.012971449337
Step=17349000, rewards=1849.5702028284256
Step=17350000, rewards=1920.5610798462835
Step=17351000, rewards=1885.0267752972532
Step=17352000, rewards=1867.2255361314296
Step=17353000, rewards=1831.6268736116372
Step=17354000, rewards=1845.203858721104
Step=17355000, rewards=1871.891595161069
Step=17356000, rewards=1918.957575370696
Step=17357000, rewards=1834.1625219128464
St

Step=17530000, rewards=1912.8970673456736
Step=17531000, rewards=1771.3176102371012
Step=17532000, rewards=1787.9031267648647
Step=17533000, rewards=1860.4263655164832
Step=17534000, rewards=1879.0959990631927
Step=17535000, rewards=1741.7865149545141
Step=17536000, rewards=1826.2965721417322
Step=17537000, rewards=2000.7748771482854
Step=17538000, rewards=1901.050940629881
Step=17539000, rewards=1757.0794912397143
Step=17540000, rewards=1770.7619407384668
Step=17541000, rewards=1923.8092156742168
Step=17542000, rewards=1903.7463394605165
Step=17543000, rewards=1926.4871273301767
Step=17544000, rewards=1897.253430339728
Step=17545000, rewards=1938.6531682368468
Step=17546000, rewards=1846.5421244715885
Step=17547000, rewards=1931.0056320597735
Step=17548000, rewards=2051.175993202762
Step=17549000, rewards=1972.778646838991
Step=17550000, rewards=1928.2866493811985
Step=17551000, rewards=1797.7373635792737
Step=17552000, rewards=1846.89089951439
Step=17553000, rewards=1856.766989708917

Step=17726000, rewards=1962.4691310258715
Step=17727000, rewards=1923.1153281733627
Step=17728000, rewards=1924.7929609319053
Step=17729000, rewards=1873.8112484310461
Step=17730000, rewards=1852.740275274111
Step=17731000, rewards=1854.7952561223362
Step=17732000, rewards=1789.142495797941
Step=17733000, rewards=1887.3786542478606
Step=17734000, rewards=1897.137810648818
Step=17735000, rewards=1840.2432087627296
Step=17736000, rewards=1846.114033148987
Step=17737000, rewards=1871.5504722976937
Step=17738000, rewards=1825.919066527051
Step=17739000, rewards=1921.126803445623
Step=17740000, rewards=1652.6325472636142
Step=17741000, rewards=1890.2268954689712
Step=17742000, rewards=1916.3275273679167
Step=17743000, rewards=1859.670117515745
Step=17744000, rewards=1824.5159640950915
Step=17745000, rewards=1805.6231672071656
Step=17746000, rewards=1912.681665406813
Step=17747000, rewards=1848.5677958698889
Step=17748000, rewards=1944.8142001250221
Step=17749000, rewards=1993.2874871820582


Step=17922000, rewards=1957.2986676521807
Step=17923000, rewards=1923.0278849789258
Step=17924000, rewards=1943.4913153740697
Step=17925000, rewards=1753.0726737060177
Step=17926000, rewards=1838.8949593765076
Step=17927000, rewards=1842.5687000955809
Step=17928000, rewards=1934.5179714499252
Step=17929000, rewards=1796.2150460005748
Step=17930000, rewards=1710.300205851845
Step=17931000, rewards=1796.0376206266374
Step=17932000, rewards=1931.2126767483974
Step=17933000, rewards=1750.9814839700477
Step=17934000, rewards=1962.7653078834774
Step=17935000, rewards=1835.2011393366176
Step=17936000, rewards=1790.9732385192294
Step=17937000, rewards=1857.84641453544
Step=17938000, rewards=1861.0100643933579
Step=17939000, rewards=1520.4871536230607
Step=17940000, rewards=1810.4102090856327
Step=17941000, rewards=1806.4295315346585
Step=17942000, rewards=1908.57332884834
Step=17943000, rewards=1880.9207635648247
Step=17944000, rewards=1982.1782612334866
Step=17945000, rewards=1886.31365895570

Step=18118000, rewards=1702.5661022274855
Step=18119000, rewards=1685.2158784375526
Step=18120000, rewards=1814.1127682993058
Step=18121000, rewards=1813.9791558257264
Step=18122000, rewards=1791.4796694560482
Step=18123000, rewards=1870.5822857119642
Step=18124000, rewards=1824.1134258956658
Step=18125000, rewards=1815.7321003629015
Step=18126000, rewards=1855.988586424214
Step=18127000, rewards=1789.435636908686
Step=18128000, rewards=1801.8190168940214
Step=18129000, rewards=1798.9597395942365
Step=18130000, rewards=1809.0436459915704
Step=18131000, rewards=1823.8061614469275
Step=18132000, rewards=1860.026745956796
Step=18133000, rewards=1934.031061470834
Step=18134000, rewards=1671.6208758277392
Step=18135000, rewards=1839.3994344660148
Step=18136000, rewards=1773.56913617981
Step=18137000, rewards=1915.500867505305
Step=18138000, rewards=1685.7445486103331
Step=18139000, rewards=1797.9298569775362
Step=18140000, rewards=1801.6984620287587
Step=18141000, rewards=1764.1832590621532

Step=18314000, rewards=1970.587423871244
Step=18315000, rewards=1814.2738827079665
Step=18316000, rewards=1909.339968748456
Step=18317000, rewards=1912.74478519767
Step=18318000, rewards=1883.2186959788653
Step=18319000, rewards=1921.9325704082917
Step=18320000, rewards=1818.9272709174236
Step=18321000, rewards=1948.834299181928
Step=18322000, rewards=1785.5525472464801
Step=18323000, rewards=1816.9889458934474
Step=18324000, rewards=1747.8010528156076
Step=18325000, rewards=1971.136667398656
Step=18326000, rewards=1781.128124946229
Step=18327000, rewards=1867.9939498112988
Step=18328000, rewards=1802.0109879105935
Step=18329000, rewards=1902.4576772266696
Step=18330000, rewards=1837.1218237841128
Step=18331000, rewards=1856.0956915605937
Step=18332000, rewards=1873.5288203685614
Step=18333000, rewards=1851.7430122022354
Step=18334000, rewards=1737.028273443957
Step=18335000, rewards=1761.0201347375892
Step=18336000, rewards=1830.6332698554102
Step=18337000, rewards=1849.0861831657526


Step=18510000, rewards=1907.5972333124628
Step=18511000, rewards=1996.9371299954967
Step=18512000, rewards=1956.371779735084
Step=18513000, rewards=1957.7671017141154
Step=18514000, rewards=1932.0918391912194
Step=18515000, rewards=1937.8228537356947
Step=18516000, rewards=1853.7595954408307
Step=18517000, rewards=1939.8369755667272
Step=18518000, rewards=1845.1074520192014
Step=18519000, rewards=1921.4053974838282
Step=18520000, rewards=1920.4980306339319
Step=18521000, rewards=1826.7852287336307
Step=18522000, rewards=1930.0360604974162
Step=18523000, rewards=1856.0386762141823
Step=18524000, rewards=1927.8516129404754
Step=18525000, rewards=1821.2999571382127
Step=18526000, rewards=1934.7987275611083
Step=18527000, rewards=1980.7285407302336
Step=18528000, rewards=1929.5680927057324
Step=18529000, rewards=1929.3588253702728
Step=18530000, rewards=1844.093367314427
Step=18531000, rewards=1922.0168269198434
Step=18532000, rewards=1953.7742027619652
Step=18533000, rewards=1880.15292514

Step=18705000, rewards=1923.935563024044
Step=18706000, rewards=1831.1107894917827
Step=18707000, rewards=1951.791757898402
Step=18708000, rewards=1923.3668974378509
Step=18709000, rewards=1897.2485662466427
Step=18710000, rewards=1938.2577597276425
Step=18711000, rewards=1941.903753923343
Step=18712000, rewards=1905.8545964324817
Step=18713000, rewards=1906.5562704624342
Step=18714000, rewards=1848.1239169100156
Step=18715000, rewards=1837.943437342041
Step=18716000, rewards=1920.1455302283016
Step=18717000, rewards=1875.0047923115387
Step=18718000, rewards=1929.7081188248214
Step=18719000, rewards=1903.540139049549
Step=18720000, rewards=1903.077323667887
Step=18721000, rewards=1799.6155745721364
Step=18722000, rewards=1950.526228696295
Step=18723000, rewards=1899.3682027382663
Step=18724000, rewards=1836.7852720654002
Step=18725000, rewards=1886.2906067886615
Step=18726000, rewards=1812.0556720133723
Step=18727000, rewards=1819.371018141102
Step=18728000, rewards=1911.9494594072423


Step=18901000, rewards=1937.5220473704771
Step=18902000, rewards=1925.2820492025076
Step=18903000, rewards=2055.5474203691097
Step=18904000, rewards=1957.7634028141267
Step=18905000, rewards=1939.3018753126344
Step=18906000, rewards=1922.2793859372798
Step=18907000, rewards=1786.5743609864442
Step=18908000, rewards=1945.9054931600808
Step=18909000, rewards=1813.6172853595658
Step=18910000, rewards=1956.5223217881426
Step=18911000, rewards=1909.0814865500365
Step=18912000, rewards=1959.0564801544717
Step=18913000, rewards=1786.8648082792934
Step=18914000, rewards=1783.197917743448
Step=18915000, rewards=1963.5837871034332
Step=18916000, rewards=1932.1695956013707
Step=18917000, rewards=1828.2238641186145
Step=18918000, rewards=1955.9503639343145
Step=18919000, rewards=1926.0649061975514
Step=18920000, rewards=1825.11816877637
Step=18921000, rewards=1945.7491829296657
Step=18922000, rewards=1911.952338537621
Step=18923000, rewards=1831.2106031095957
Step=18924000, rewards=1935.9278426496

Step=19097000, rewards=1801.334664550408
Step=19098000, rewards=1781.9634076314387
Step=19099000, rewards=1845.3403788343455
Step=19100000, rewards=1892.279726997123
Step=19101000, rewards=1875.840838650315
Step=19102000, rewards=1907.0027771366179
Step=19103000, rewards=1827.64645109313
Step=19104000, rewards=1859.870875207
Step=19105000, rewards=1908.9282677659548
Step=19106000, rewards=1922.8967691928058
Step=19107000, rewards=1923.936009866877
Step=19108000, rewards=1893.6922095836492
Step=19109000, rewards=1856.5954028057195
Step=19110000, rewards=1928.8320968951766
Step=19111000, rewards=1756.9168450174266
Step=19112000, rewards=1870.0524876378263
Step=19113000, rewards=1794.110388233785
Step=19114000, rewards=1875.8125233506833
Step=19115000, rewards=1791.7259779576063
Step=19116000, rewards=1906.586945661748
Step=19117000, rewards=1900.7808425762112
Step=19118000, rewards=1887.3399307689397
Step=19119000, rewards=1865.6960306265141
Step=19120000, rewards=1821.5885194796454
Step

Step=19293000, rewards=1910.294635072535
Step=19294000, rewards=1828.6889562311808
Step=19295000, rewards=1690.8085218434073
Step=19296000, rewards=1762.5473056803937
Step=19297000, rewards=1858.0037231677036
Step=19298000, rewards=1741.175098063636
Step=19299000, rewards=1924.9682539809298
Step=19300000, rewards=1866.4935840743158
Step=19301000, rewards=1764.5415782336643
Step=19302000, rewards=1875.1365410494668
Step=19303000, rewards=1833.5377826527483
Step=19304000, rewards=1836.1045126213921
Step=19305000, rewards=1780.019848727102
Step=19306000, rewards=1798.3149305139486
Step=19307000, rewards=1869.1985007459964
Step=19308000, rewards=1879.73843489086
Step=19309000, rewards=1846.4783525509765
Step=19310000, rewards=1810.8574939396206
Step=19311000, rewards=1788.6918688567912
Step=19312000, rewards=1753.4650953120245
Step=19313000, rewards=1901.3703650704922
Step=19314000, rewards=2009.135179441236
Step=19315000, rewards=1631.2773917266156
Step=19316000, rewards=1962.154392251986

Step=19488000, rewards=1874.172515896201
Step=19489000, rewards=1744.6769476947861
Step=19490000, rewards=1894.7028424007697
Step=19491000, rewards=1884.5790592443916
Step=19492000, rewards=1926.4175764796614
Step=19493000, rewards=1799.4467016034773
Step=19494000, rewards=1965.0389805546984
Step=19495000, rewards=1887.8232282986917
Step=19496000, rewards=1881.8002703476475
Step=19497000, rewards=1879.7101779498291
Step=19498000, rewards=1807.259436208722
Step=19499000, rewards=1782.980794675369
Step=19500000, rewards=1701.7757664745864
Step=19501000, rewards=1807.8340982368202
Step=19502000, rewards=1728.8926516908994
Step=19503000, rewards=1833.4448741023643
Step=19504000, rewards=1791.3855967339898
Step=19505000, rewards=1867.3032759399111
Step=19506000, rewards=1811.5843326089544
Step=19507000, rewards=1800.766148720718
Step=19508000, rewards=1805.7079736587539
Step=19509000, rewards=1895.5111778911617
Step=19510000, rewards=1792.0713042103544
Step=19511000, rewards=1900.1065719221

Step=19684000, rewards=1864.719516671947
Step=19685000, rewards=1830.4669209712993
Step=19686000, rewards=1895.6389733970204
Step=19687000, rewards=1851.4673344774717
Step=19688000, rewards=1904.391675126905
Step=19689000, rewards=1806.5787691314085
Step=19690000, rewards=1822.8552540089352
Step=19691000, rewards=1876.2389824071322
Step=19692000, rewards=1909.723140622178
Step=19693000, rewards=1875.3186287781139
Step=19694000, rewards=1917.5202916273947
Step=19695000, rewards=2005.1163930760733
Step=19696000, rewards=1905.5529269334834
Step=19697000, rewards=1904.2010175520286
Step=19698000, rewards=1888.4750886167128
Step=19699000, rewards=1946.7751454572744
Step=19700000, rewards=1916.7784461452484
Step=19701000, rewards=1902.9897533283931
Step=19702000, rewards=1892.8880929257227
Step=19703000, rewards=1856.1492252536002
Step=19704000, rewards=1940.5113574441457
Step=19705000, rewards=1927.0511066538256
Step=19706000, rewards=1924.4087541082024
Step=19707000, rewards=1871.260110942

Step=19880000, rewards=1938.8601783590477
Step=19881000, rewards=1914.5729525065533
Step=19882000, rewards=1908.057016711227
Step=19883000, rewards=1890.7470290115562
Step=19884000, rewards=1925.466315595996
Step=19885000, rewards=1834.0856115029142
Step=19886000, rewards=1871.0062838480917
Step=19887000, rewards=1847.1173020929873
Step=19888000, rewards=1906.338754005575
Step=19889000, rewards=1921.951963895705
Step=19890000, rewards=1968.198584095069
Step=19891000, rewards=1931.3039603601037
Step=19892000, rewards=1916.0488967843107
Step=19893000, rewards=1929.1547741305246
Step=19894000, rewards=1947.241724537834
Step=19895000, rewards=1899.8114228613108
Step=19896000, rewards=1876.1783554467725
Step=19897000, rewards=1975.2298798880179
Step=19898000, rewards=1921.600481492816
Step=19899000, rewards=1980.7825179730548
Step=19900000, rewards=1891.5444716195286
Step=19901000, rewards=2001.9376185427216
Step=19902000, rewards=1878.5079394910867
Step=19903000, rewards=2011.0392013308574

Step=20076000, rewards=1877.9437324108992
Step=20077000, rewards=1846.8336839300116
Step=20078000, rewards=1886.609605540006
Step=20079000, rewards=1916.7539276701286
Step=20080000, rewards=2009.9911809373054
Step=20081000, rewards=1968.5017708432013
Step=20082000, rewards=1983.7695092924275
Step=20083000, rewards=1857.1653482289264
Step=20084000, rewards=1957.8430848103624
Step=20085000, rewards=2005.404856557077
Step=20086000, rewards=1956.472172101497
Step=20087000, rewards=1885.006283587448
Step=20088000, rewards=1900.8585315064718
Step=20089000, rewards=2004.284373803925
Step=20090000, rewards=1904.8207180096456
Step=20091000, rewards=1782.130496956242
Step=20092000, rewards=1869.8150515670989
Step=20093000, rewards=1988.9291490802118
Step=20094000, rewards=2001.8121845002922
Step=20095000, rewards=1774.4010002079706
Step=20096000, rewards=1888.5546760926218
Step=20097000, rewards=1868.9428219365607
Step=20098000, rewards=1892.280335256396
Step=20099000, rewards=1913.1449556314442

Step=20272000, rewards=1951.0354470130596
Step=20273000, rewards=1910.3122797138276
Step=20274000, rewards=1932.0234146366056
Step=20275000, rewards=1846.7504649842979
Step=20276000, rewards=1890.9644375125274
Step=20277000, rewards=1918.8922241186829
Step=20278000, rewards=1906.5441721418497
Step=20279000, rewards=1920.858199370912
Step=20280000, rewards=1807.199060012367
Step=20281000, rewards=1936.8581040139463
Step=20282000, rewards=1950.7400818692008
Step=20283000, rewards=1833.0503645022077
Step=20284000, rewards=1919.8536647897572
Step=20285000, rewards=1911.9265854046275
Step=20286000, rewards=1886.3383287810104
Step=20287000, rewards=1935.0742677699768
Step=20288000, rewards=1906.7567839969797
Step=20289000, rewards=1919.3877913435433
Step=20290000, rewards=1992.8696420127185
Step=20291000, rewards=1896.0724952098403
Step=20292000, rewards=1958.4374758287101
Step=20293000, rewards=1850.0727900548384
Step=20294000, rewards=1860.6339859056905
Step=20295000, rewards=1919.36723483

Step=20468000, rewards=1939.5551041347887
Step=20469000, rewards=1967.350181842542
Step=20470000, rewards=1971.666329039421
Step=20471000, rewards=1944.8464955196548
Step=20472000, rewards=1923.245467662009
Step=20473000, rewards=1958.067053946964
Step=20474000, rewards=1956.0789868310135
Step=20475000, rewards=1954.0588324792598
Step=20476000, rewards=1930.6569501889574
Step=20477000, rewards=2007.0503619066812
Step=20478000, rewards=2000.3070382340175
Step=20479000, rewards=1989.2317746056558
Step=20480000, rewards=1972.4317904095526
Step=20481000, rewards=1963.3618600681036
Step=20482000, rewards=1912.6187514935748
Step=20483000, rewards=2013.786138460637
Step=20484000, rewards=1919.7602332500917
Step=20485000, rewards=1895.155131350384
Step=20486000, rewards=1930.259918123756
Step=20487000, rewards=1954.0654765535394
Step=20488000, rewards=1970.0630769300155
Step=20489000, rewards=1953.373259946376
Step=20490000, rewards=1934.4284827937363
Step=20491000, rewards=1957.5670981704168


Step=20664000, rewards=1960.5473657010955
Step=20665000, rewards=1884.5212103619915
Step=20666000, rewards=1815.6420655163538
Step=20667000, rewards=1888.929772931909
Step=20668000, rewards=1846.4102579102334
Step=20669000, rewards=1872.3312923589253
Step=20670000, rewards=1892.2761565271733
Step=20671000, rewards=1894.6036716285616
Step=20672000, rewards=1966.7107010106047
Step=20673000, rewards=1919.1515148142987
Step=20674000, rewards=1950.9706460815053
Step=20675000, rewards=1927.1964061483054
Step=20676000, rewards=1796.9326166914907
Step=20677000, rewards=1891.6139916010395
Step=20678000, rewards=1869.5532032199633
Step=20679000, rewards=1950.3822241871267
Step=20680000, rewards=1916.6638521798632
Step=20681000, rewards=1906.5436194981899
Step=20682000, rewards=1845.0234540739323
Step=20683000, rewards=1973.953780503728
Step=20684000, rewards=1947.1494575886632
Step=20685000, rewards=1820.8834646184707
Step=20686000, rewards=1874.4627131822747
Step=20687000, rewards=1939.99898397

Step=20860000, rewards=1900.407333842713
Step=20861000, rewards=1918.3514764284796
Step=20862000, rewards=1922.7629274501583
Step=20863000, rewards=1954.4368694858615
Step=20864000, rewards=1901.199375572464
Step=20865000, rewards=1932.577967242848
Step=20866000, rewards=1831.972804832941
Step=20867000, rewards=1891.1639664851732
Step=20868000, rewards=1898.2175005357726
Step=20869000, rewards=1889.5758689411327
Step=20870000, rewards=1872.635477163114
Step=20871000, rewards=1903.0108319589194
Step=20872000, rewards=1865.0068188905284
Step=20873000, rewards=1810.0363619562409
Step=20874000, rewards=1919.5304974654941
Step=20875000, rewards=1860.7710815936982
Step=20876000, rewards=1814.1206335468808
Step=20877000, rewards=1958.4833481641372
Step=20878000, rewards=1708.8531064678525
Step=20879000, rewards=2000.2323276698926
Step=20880000, rewards=1900.4246578818936
Step=20881000, rewards=1865.0145495684544
Step=20882000, rewards=1902.9791757583391
Step=20883000, rewards=1924.49449674369

Step=21056000, rewards=1995.2591162897913
Step=21057000, rewards=1936.0531347601725
Step=21058000, rewards=1855.1733481476613
Step=21059000, rewards=1954.732620380089
Step=21060000, rewards=1874.6689871139931
Step=21061000, rewards=1886.9450730996516
Step=21062000, rewards=1953.6208107870634
Step=21063000, rewards=1865.3651255045627
Step=21064000, rewards=1843.128702539634
Step=21065000, rewards=1920.9647025735883
Step=21066000, rewards=1916.9549201177317
Step=21067000, rewards=1877.1899487062174
Step=21068000, rewards=1916.5421315247354
Step=21069000, rewards=1962.7659335105407
Step=21070000, rewards=1918.7626789480723
Step=21071000, rewards=1957.2752227722563
Step=21072000, rewards=1899.6638864356999
Step=21073000, rewards=1787.4719025805805
Step=21074000, rewards=1953.9621552074575
Step=21075000, rewards=1830.0379670366485
Step=21076000, rewards=1960.0443634945773
Step=21077000, rewards=1907.1571648357817
Step=21078000, rewards=1884.1259118234307
Step=21079000, rewards=1940.06005454

Step=21252000, rewards=1960.0749268769669
Step=21253000, rewards=1931.938456231479
Step=21254000, rewards=1862.4051775531789
Step=21255000, rewards=1836.122253842145
Step=21256000, rewards=1906.4944140752614
Step=21257000, rewards=1823.97553638651
Step=21258000, rewards=1953.0166256276816
Step=21259000, rewards=1892.995428197495
Step=21260000, rewards=1828.4709892849153
Step=21261000, rewards=1937.7684972333102
Step=21262000, rewards=1815.1192221069455
Step=21263000, rewards=1916.6625262409493
Step=21264000, rewards=1842.614590854334
Step=21265000, rewards=1886.6333639217526
Step=21266000, rewards=1906.600077374476
Step=21267000, rewards=1966.0757833367154
Step=21268000, rewards=1736.3886062694369
Step=21269000, rewards=1935.3116631263917
Step=21270000, rewards=1947.145676117962
Step=21271000, rewards=1869.8003288414463
Step=21272000, rewards=1889.4564110986355
Step=21273000, rewards=1779.439849038056
Step=21274000, rewards=1881.9501794324942
Step=21275000, rewards=1925.9608611834046
S

Step=21448000, rewards=1907.658708566714
Step=21449000, rewards=1969.4799384346884
Step=21450000, rewards=1811.0305994587
Step=21451000, rewards=1844.063256633944
Step=21452000, rewards=1931.1624563719286
Step=21453000, rewards=1944.8445745224162
Step=21454000, rewards=1861.7000624209861
Step=21455000, rewards=1950.7708201996697
Step=21456000, rewards=1878.8136605146583
Step=21457000, rewards=1951.263752617843
Step=21458000, rewards=1896.3635528322525
Step=21459000, rewards=1816.106690640947
Step=21460000, rewards=1880.1535864936113
Step=21461000, rewards=1923.4533323372912
Step=21462000, rewards=1913.9450488081666
Step=21463000, rewards=1842.9752196099819
Step=21464000, rewards=1928.748800699131
Step=21465000, rewards=1947.04979645723
Step=21466000, rewards=1815.6562838373268
Step=21467000, rewards=1915.431531820273
Step=21468000, rewards=1814.884101188096
Step=21469000, rewards=1582.0865331972357
Step=21470000, rewards=1907.8921427414675
Step=21471000, rewards=1826.0502962003666
Step

Step=21644000, rewards=1864.1660875064724
Step=21645000, rewards=1989.7093370598502
Step=21646000, rewards=2038.8002431407647
Step=21647000, rewards=1899.205035660625
Step=21648000, rewards=1934.0966221756687
Step=21649000, rewards=1903.4448613985248
Step=21650000, rewards=1928.9525721668401
Step=21651000, rewards=2025.9600841072624
Step=21652000, rewards=1913.9542289004414
Step=21653000, rewards=1882.800174857695
Step=21654000, rewards=1930.9842275331996
Step=21655000, rewards=1951.9158749778765
Step=21656000, rewards=1860.7910188982667
Step=21657000, rewards=1920.4098665728206
Step=21658000, rewards=1943.3717625372253
Step=21659000, rewards=1918.6736413250555
Step=21660000, rewards=1937.7415015194847
Step=21661000, rewards=1929.279303911479
Step=21662000, rewards=1956.0677180062798
Step=21663000, rewards=1920.1130760222281
Step=21664000, rewards=1960.7505383177634
Step=21665000, rewards=1938.309107112178
Step=21666000, rewards=1918.5076376140173
Step=21667000, rewards=1916.8085683057

Step=21840000, rewards=1951.9581159438487
Step=21841000, rewards=1972.0287988080393
Step=21842000, rewards=1894.4189101977638
Step=21843000, rewards=1920.7216553098456
Step=21844000, rewards=1959.9674256054775
Step=21845000, rewards=1962.5427660052912
Step=21846000, rewards=1979.051714753898
Step=21847000, rewards=1933.7316680593099
Step=21848000, rewards=1845.918911340948
Step=21849000, rewards=1829.9408605399456
Step=21850000, rewards=2041.7873712928326
Step=21851000, rewards=1963.0257017571926
Step=21852000, rewards=1946.6025760723435
Step=21853000, rewards=1920.712713922015
Step=21854000, rewards=1985.2961685679556
Step=21855000, rewards=1945.9269289529009
Step=21856000, rewards=1884.586903892555
Step=21857000, rewards=1859.7952884188603
Step=21858000, rewards=1913.0722928318437
Step=21859000, rewards=1979.9033875259022
Step=21860000, rewards=1930.1198905084343
Step=21861000, rewards=1893.5117922335041
Step=21862000, rewards=1943.3862219963023
Step=21863000, rewards=1928.8469765980

Step=22036000, rewards=1840.2745882156573
Step=22037000, rewards=1974.4367620703497
Step=22038000, rewards=1918.4839718368669
Step=22039000, rewards=2002.8300287258378
Step=22040000, rewards=1967.0288732943695
Step=22041000, rewards=1912.257626032419
Step=22042000, rewards=2012.3209725060713
Step=22043000, rewards=1953.750925394334
Step=22044000, rewards=1891.3623479619503
Step=22045000, rewards=1850.4353899476027
Step=22046000, rewards=1972.3888780996526
Step=22047000, rewards=1904.7106683396532
Step=22048000, rewards=1955.2150454641903
Step=22049000, rewards=1924.7413982393346
Step=22050000, rewards=1967.7207074002138
Step=22051000, rewards=1980.0859690686248
Step=22052000, rewards=1984.8528687940702
Step=22053000, rewards=1922.364620697464
Step=22054000, rewards=1926.0839935919673
Step=22055000, rewards=1909.6819813434702
Step=22056000, rewards=1803.2922253253153
Step=22057000, rewards=1894.0762041997777
Step=22058000, rewards=1941.1965391065762
Step=22059000, rewards=1880.691828100

Step=22232000, rewards=1877.1298677609288
Step=22233000, rewards=1918.3323060531536
Step=22234000, rewards=1979.4583514601766
Step=22235000, rewards=1956.7099627801906
Step=22236000, rewards=1910.6977683190046
Step=22237000, rewards=1839.1207175696204
Step=22238000, rewards=1872.8331165419027
Step=22239000, rewards=1934.7122973942749
Step=22240000, rewards=1845.7442142548693
Step=22241000, rewards=1809.2295567865558
Step=22242000, rewards=1903.7539556608904
Step=22243000, rewards=1839.6006864084625
Step=22244000, rewards=1991.7499224791168
Step=22245000, rewards=1833.5202506812175
Step=22246000, rewards=1930.0947123821807
Step=22247000, rewards=1873.030122450735
Step=22248000, rewards=1940.0273898888368
Step=22249000, rewards=1962.3763371247187
Step=22250000, rewards=1954.475405862072
Step=22251000, rewards=1946.7402452362812
Step=22252000, rewards=1899.4485625444897
Step=22253000, rewards=1918.288588324127
Step=22254000, rewards=1967.3978104574999
Step=22255000, rewards=1866.695495518

Step=22428000, rewards=1915.8651777730017
Step=22429000, rewards=1946.1405201422783
Step=22430000, rewards=1905.7477649078403
Step=22431000, rewards=1958.8595480618205
Step=22432000, rewards=1886.1188007528344
Step=22433000, rewards=1905.8161694397047
Step=22434000, rewards=1957.632510831492
Step=22435000, rewards=1900.0337381135005
Step=22436000, rewards=1908.2962910687595
Step=22437000, rewards=1926.1153418826195
Step=22438000, rewards=1833.337287241164
Step=22439000, rewards=1953.4295151986125
Step=22440000, rewards=1828.8306548838664
Step=22441000, rewards=1907.7178488447962
Step=22442000, rewards=1874.7465610980244
Step=22443000, rewards=1924.9549641533108
Step=22444000, rewards=1937.1628764834497
Step=22445000, rewards=1910.3457061614783
Step=22446000, rewards=1966.6779187645234
Step=22447000, rewards=1928.1899223260168
Step=22448000, rewards=1863.6194860852681
Step=22449000, rewards=2013.9477489704655
Step=22450000, rewards=1881.2232915463526
Step=22451000, rewards=1929.65658642

Step=22624000, rewards=1806.114917268794
Step=22625000, rewards=1816.4934122100137
Step=22626000, rewards=1923.3597148917136
Step=22627000, rewards=2047.4489983132266
Step=22628000, rewards=2009.8522686997035
Step=22629000, rewards=1898.6043146317797
Step=22630000, rewards=1916.5862030830885
Step=22631000, rewards=1931.9351335317615
Step=22632000, rewards=1910.7109551404856
Step=22633000, rewards=1959.0803250998017
Step=22634000, rewards=2000.3767251779063
Step=22635000, rewards=1970.7297670387507
Step=22636000, rewards=1940.1057674102037
Step=22637000, rewards=2027.1470525305701
Step=22638000, rewards=1978.930259896918
Step=22639000, rewards=1887.6341495182398
Step=22640000, rewards=2002.591911668409
Step=22641000, rewards=1854.6443486524488
Step=22642000, rewards=1916.8597589200938
Step=22643000, rewards=1919.176653114972
Step=22644000, rewards=2020.4558841194087
Step=22645000, rewards=1778.6564516601318
Step=22646000, rewards=1981.7240581429553
Step=22647000, rewards=1820.0085021407

Step=22820000, rewards=2011.386966169268
Step=22821000, rewards=1917.6321057703915
Step=22822000, rewards=1920.39143720114
Step=22823000, rewards=1923.0334404221626
Step=22824000, rewards=1997.0395330451836
Step=22825000, rewards=1962.6217081388645
Step=22826000, rewards=1999.9993212526595
Step=22827000, rewards=2020.949978338118
Step=22828000, rewards=1976.2904587161631
Step=22829000, rewards=1862.7381641561433
Step=22830000, rewards=2025.2934655539548
Step=22831000, rewards=1999.2700664518168
Step=22832000, rewards=2041.7015339427185
Step=22833000, rewards=1992.585230335366
Step=22834000, rewards=1956.0080061051988
Step=22835000, rewards=1931.6159803036394
Step=22836000, rewards=1917.819670389513
Step=22837000, rewards=2000.998858227814
Step=22838000, rewards=1897.9562836167388
Step=22839000, rewards=1909.2833939146137
Step=22840000, rewards=1944.3141925300577
Step=22841000, rewards=1911.9089723926263
Step=22842000, rewards=1923.8722583843114
Step=22843000, rewards=1952.1677236900443

Step=23016000, rewards=1943.264570364673
Step=23017000, rewards=1884.9131443507897
Step=23018000, rewards=1894.3259305561871
Step=23019000, rewards=1986.2116553262797
Step=23020000, rewards=1933.9866084034866
Step=23021000, rewards=1891.911556993435
Step=23022000, rewards=1771.8427446630317
Step=23023000, rewards=1942.7965819056694
Step=23024000, rewards=1894.981914115826
Step=23025000, rewards=1878.5283268168293
Step=23026000, rewards=1859.498430291916
Step=23027000, rewards=1995.4287439195766
Step=23028000, rewards=1913.9851179728103
Step=23029000, rewards=1967.0459498976256
Step=23030000, rewards=1958.9554049269311
Step=23031000, rewards=1784.5481557684516
Step=23032000, rewards=1891.803384678765
Step=23033000, rewards=1903.25887924649
Step=23034000, rewards=1911.6299059475718
Step=23035000, rewards=1885.4530015691626
Step=23036000, rewards=1728.7934534999745
Step=23037000, rewards=1926.1103350647752
Step=23038000, rewards=1836.405693553784
Step=23039000, rewards=1899.0482135318923


Step=23212000, rewards=1864.238070556063
Step=23213000, rewards=1745.3536075831814
Step=23214000, rewards=1890.9465182776407
Step=23215000, rewards=1882.0220954014503
Step=23216000, rewards=1908.8273505701688
Step=23217000, rewards=1896.1669767177464
Step=23218000, rewards=1855.5704985457542
Step=23219000, rewards=1799.3081824702715
Step=23220000, rewards=1891.4642575480564
Step=23221000, rewards=1694.5557432610897
Step=23222000, rewards=1859.7494751236345
Step=23223000, rewards=1831.044463788577
Step=23224000, rewards=1812.6047153349864
Step=23225000, rewards=1810.0998230949708
Step=23226000, rewards=1926.622796599862
Step=23227000, rewards=1775.3944249812605
Step=23228000, rewards=1707.1021500110917
Step=23229000, rewards=1771.5847726755912
Step=23230000, rewards=1885.0690441665333
Step=23231000, rewards=1921.756435497314
Step=23232000, rewards=1832.2522707145963
Step=23233000, rewards=1910.9180657279041
Step=23234000, rewards=1823.1301013169073
Step=23235000, rewards=1846.9643783917

Step=23408000, rewards=1954.939169503755
Step=23409000, rewards=1892.6638470645983
Step=23410000, rewards=1971.0933518141405
Step=23411000, rewards=1914.0499160605361
Step=23412000, rewards=1890.7910014780898
Step=23413000, rewards=1916.2509056144636
Step=23414000, rewards=1875.0378741088966
Step=23415000, rewards=1833.9369260437027
Step=23416000, rewards=2013.434768312431
Step=23417000, rewards=1881.9108430604738
Step=23418000, rewards=2009.19439090443
Step=23419000, rewards=1785.4614255300444
Step=23420000, rewards=1895.5795338339556
Step=23421000, rewards=1970.5642892896801
Step=23422000, rewards=1991.2558135309828
Step=23423000, rewards=1958.6422504979482
Step=23424000, rewards=1930.7271763998913
Step=23425000, rewards=1959.9678694151387
Step=23426000, rewards=1812.1526005656312
Step=23427000, rewards=2026.681501201892
Step=23428000, rewards=1859.503347514696
Step=23429000, rewards=1866.204735005518
Step=23430000, rewards=1837.2613856303274
Step=23431000, rewards=1998.3898294733756

Step=23604000, rewards=1881.1248003441121
Step=23605000, rewards=1859.8480782077597
Step=23606000, rewards=1969.8075686855198
Step=23607000, rewards=1944.6311783305277
Step=23608000, rewards=1779.9321524228967
Step=23609000, rewards=1866.1892078909
Step=23610000, rewards=1927.2795385029483
Step=23611000, rewards=2003.703170813567
Step=23612000, rewards=2017.9311047469662
Step=23613000, rewards=1976.1552630792044
Step=23614000, rewards=1963.849595655266
Step=23615000, rewards=1838.5003135706886
Step=23616000, rewards=1967.232332078169
Step=23617000, rewards=1977.5941548374792
Step=23618000, rewards=1977.8212369118185
Step=23619000, rewards=1938.1823142613896
Step=23620000, rewards=1833.4209478406847
Step=23621000, rewards=1907.234326982428
Step=23622000, rewards=1991.1595435890722
Step=23623000, rewards=1939.1213928412906
Step=23624000, rewards=1865.983988316402
Step=23625000, rewards=1896.7301277811105
Step=23626000, rewards=1855.698435250989
Step=23627000, rewards=1949.4435661705518
S

Step=23800000, rewards=1883.4267502072055
Step=23801000, rewards=1971.7264846197872
Step=23802000, rewards=1902.5396306917198
Step=23803000, rewards=1988.3750505886649
Step=23804000, rewards=1888.5098077729156
Step=23805000, rewards=1897.43573147191
Step=23806000, rewards=1943.7575700670202
Step=23807000, rewards=1864.475282575285
Step=23808000, rewards=2005.0296000398985
Step=23809000, rewards=1969.798743601261
Step=23810000, rewards=1944.2239784287503
Step=23811000, rewards=1985.2308174876162
Step=23812000, rewards=1994.0384008406365
Step=23813000, rewards=1950.5762459405198
Step=23814000, rewards=1915.2199847303282
Step=23815000, rewards=1923.771063835295
Step=23816000, rewards=1909.0758384097644
Step=23817000, rewards=1936.2643421209834
Step=23818000, rewards=1832.5535533631735
Step=23819000, rewards=1944.7009979462948
Step=23820000, rewards=1869.8510142853734
Step=23821000, rewards=1990.298247666324
Step=23822000, rewards=1888.1224352425631
Step=23823000, rewards=1820.788697467032

Step=23996000, rewards=1783.5834916164317
Step=23997000, rewards=1939.9821364344082
Step=23998000, rewards=1945.4046628938788
Step=23999000, rewards=1922.0034000891671
Step=24000000, rewards=1910.265957299755
Step=24001000, rewards=1952.628717519732
Step=24002000, rewards=1909.2182075699047
Step=24003000, rewards=1924.8976680327805
Step=24004000, rewards=1996.4438602128646
Step=24005000, rewards=1954.2153870058496
Step=24006000, rewards=1959.9282640059334
Step=24007000, rewards=1959.987674749053
Step=24008000, rewards=1909.560782394609
Step=24009000, rewards=1974.6492370193478
Step=24010000, rewards=1998.6481606571606
Step=24011000, rewards=1915.0568630987643
Step=24012000, rewards=1943.375750209034
Step=24013000, rewards=1971.632109188464
Step=24014000, rewards=1954.8016329232605
Step=24015000, rewards=1856.066431609164
Step=24016000, rewards=1944.0665301814283
Step=24017000, rewards=1900.029010742151
Step=24018000, rewards=1966.167513694721
Step=24019000, rewards=1901.783356214542
St

Step=24192000, rewards=1949.8249094292332
Step=24193000, rewards=1988.0264276639211
Step=24194000, rewards=1939.2325066934427
Step=24195000, rewards=1993.669859138999
Step=24196000, rewards=1891.4818385597202
Step=24197000, rewards=1907.143261431577
Step=24198000, rewards=1895.2749242288546
Step=24199000, rewards=1998.10952090663
Step=24200000, rewards=1948.8643081436605
Step=24201000, rewards=2016.2530676480058
Step=24202000, rewards=1967.6154080204167
Step=24203000, rewards=1894.1821151488823
Step=24204000, rewards=1982.4454428733472
Step=24205000, rewards=1945.1818592245302
Step=24206000, rewards=1900.7011113735143
Step=24207000, rewards=1862.8518221437487
Step=24208000, rewards=1923.5755147503212
Step=24209000, rewards=1991.5098469533864
Step=24210000, rewards=1979.050774204048
Step=24211000, rewards=1950.4134271200312
Step=24212000, rewards=1791.7923465056492
Step=24213000, rewards=2000.8371680631415
Step=24214000, rewards=2002.8186173100362
Step=24215000, rewards=1962.50330041825

Step=24388000, rewards=1844.5854984256998
Step=24389000, rewards=1937.2218038445208
Step=24390000, rewards=1671.86365328475
Step=24391000, rewards=1760.705853140057
Step=24392000, rewards=1961.403191929231
Step=24393000, rewards=1974.4907583521676
Step=24394000, rewards=1934.8006145080594
Step=24395000, rewards=1737.6865837148266
Step=24396000, rewards=1881.1170643614973
Step=24397000, rewards=1900.3712077772423
Step=24398000, rewards=2013.6207718955006
Step=24399000, rewards=2020.3244016043302
Step=24400000, rewards=1866.261049480015
Step=24401000, rewards=1754.7213938707582
Step=24402000, rewards=2004.493835130369
Step=24403000, rewards=1828.7752718847235
Step=24404000, rewards=1959.4154624393077
Step=24405000, rewards=1652.2139636861575
Step=24406000, rewards=1935.1520346255325
Step=24407000, rewards=1964.9754347457645
Step=24408000, rewards=1885.535679702156
Step=24409000, rewards=1976.6345478354403
Step=24410000, rewards=1870.8396101627427
Step=24411000, rewards=1888.0803960903827

Step=24584000, rewards=1870.648149306408
Step=24585000, rewards=1899.3356490836277
Step=24586000, rewards=1953.3795870854365
Step=24587000, rewards=1634.7176961239359
Step=24588000, rewards=1883.5172232832263
Step=24589000, rewards=1882.8334129681584
Step=24590000, rewards=1782.0756093944244
Step=24591000, rewards=1880.6468201677067
Step=24592000, rewards=1904.24809016486
Step=24593000, rewards=1871.3012426426167
Step=24594000, rewards=1861.5117271593263
Step=24595000, rewards=1915.9429787552212
Step=24596000, rewards=1906.7708075081187
Step=24597000, rewards=1937.8921203667137
Step=24598000, rewards=1872.510731263395
Step=24599000, rewards=1856.2726577077435
Step=24600000, rewards=1793.9748287437178
Step=24601000, rewards=1902.9729441511593
Step=24602000, rewards=1782.474730400294
Step=24603000, rewards=1862.5249965278613
Step=24604000, rewards=1782.050779730371
Step=24605000, rewards=1908.8069070021888
Step=24606000, rewards=1765.774487134611
Step=24607000, rewards=1847.0083072443508

Step=24780000, rewards=1963.675587715414
Step=24781000, rewards=1876.1680996888008
Step=24782000, rewards=1963.4298667801472
Step=24783000, rewards=1787.8988402165542
Step=24784000, rewards=1959.1082027107323
Step=24785000, rewards=1970.822681327963
Step=24786000, rewards=1946.1772022883622
Step=24787000, rewards=1982.9657388799142
Step=24788000, rewards=1993.0233763981378
Step=24789000, rewards=1869.9900534157075
Step=24790000, rewards=1991.7322978116727
Step=24791000, rewards=1908.5357656870872
Step=24792000, rewards=1941.2813973657665
Step=24793000, rewards=1958.1880793815844
Step=24794000, rewards=1886.0216252903574
Step=24795000, rewards=1979.6469085818258
Step=24796000, rewards=1944.3528906463212
Step=24797000, rewards=1953.326190538869
Step=24798000, rewards=1974.7544464531861
Step=24799000, rewards=1850.7496269435733
Step=24800000, rewards=1951.7400924642263
Step=24801000, rewards=1912.838827974138
Step=24802000, rewards=1969.544885253099
Step=24803000, rewards=1734.15119478432

Step=24976000, rewards=1982.588389105799
Step=24977000, rewards=1900.7661962953332
Step=24978000, rewards=1940.8418632245964
Step=24979000, rewards=1926.0827451143496
Step=24980000, rewards=1860.1573925001244
Step=24981000, rewards=1930.4383357610948
Step=24982000, rewards=1991.3392485992217
Step=24983000, rewards=1889.7017931262906
Step=24984000, rewards=1888.5064623571873
Step=24985000, rewards=1945.5116092213939
Step=24986000, rewards=1944.6940219479711
Step=24987000, rewards=1955.4115589279347
Step=24988000, rewards=1911.3955180264963
Step=24989000, rewards=1924.4475245906815
Step=24990000, rewards=1943.305556244508
Step=24991000, rewards=1941.990857086008
Step=24992000, rewards=1924.2118440561203
Step=24993000, rewards=1942.36612473264
Step=24994000, rewards=1958.8403380769382
Step=24995000, rewards=1918.9605349231854
Step=24996000, rewards=1873.0492954717295
Step=24997000, rewards=1923.2425520365268
Step=24998000, rewards=1979.0500500533722
Step=24999000, rewards=1873.94018124711

Step=25172000, rewards=1989.4301557314964
Step=25173000, rewards=1976.663646519047
Step=25174000, rewards=1956.3036753189253
Step=25175000, rewards=1930.5678066867981
Step=25176000, rewards=2006.795467087547
Step=25177000, rewards=1819.4610877991286
Step=25178000, rewards=1900.6263774267488
Step=25179000, rewards=1894.0722961203792
Step=25180000, rewards=1815.6820102218048
Step=25181000, rewards=1875.574801685232
Step=25182000, rewards=1906.9623649522698
Step=25183000, rewards=1920.1573693290893
Step=25184000, rewards=1948.2953314072677
Step=25185000, rewards=1975.865851596193
Step=25186000, rewards=1957.219039726419
Step=25187000, rewards=1962.6938874996176
Step=25188000, rewards=1988.867925650394
Step=25189000, rewards=2065.120341338684
Step=25190000, rewards=1902.3677244237588
Step=25191000, rewards=1999.5171722301834
Step=25192000, rewards=1959.0579510881248
Step=25193000, rewards=1975.373389472192
Step=25194000, rewards=1907.205608403641
Step=25195000, rewards=1896.5789765103023
S

Step=25368000, rewards=1892.8820399457156
Step=25369000, rewards=1961.893788101287
Step=25370000, rewards=1908.503490019099
Step=25371000, rewards=1855.9940724482785
Step=25372000, rewards=1893.4220662780233
Step=25373000, rewards=1905.5738611516222
Step=25374000, rewards=1839.424862482245
Step=25375000, rewards=1962.2243391481047
Step=25376000, rewards=1938.456412360289
Step=25377000, rewards=1907.9390602539906
Step=25378000, rewards=1953.1760428897317
Step=25379000, rewards=1983.9177602854763
Step=25380000, rewards=1915.1357512135855
Step=25381000, rewards=1851.929105376308
Step=25382000, rewards=1928.6863771006904
Step=25383000, rewards=1863.8847215222468
Step=25384000, rewards=1970.7892174219228
Step=25385000, rewards=2054.250820959164
Step=25386000, rewards=1899.2972637487635
Step=25387000, rewards=1890.0016665710637
Step=25388000, rewards=1921.2100778517372
Step=25389000, rewards=1963.5712356443516
Step=25390000, rewards=1975.7147381638038
Step=25391000, rewards=1926.660445467407

Step=25564000, rewards=1975.6348391308936
Step=25565000, rewards=1902.8647796053594
Step=25566000, rewards=1987.2525849318658
Step=25567000, rewards=1931.3853703012717
Step=25568000, rewards=1925.9734457696254
Step=25569000, rewards=1969.816967217776
Step=25570000, rewards=1995.4357908666411
Step=25571000, rewards=1916.8846761104737
Step=25572000, rewards=1942.5679220428333
Step=25573000, rewards=1958.0024812430777
Step=25574000, rewards=1871.41948774726
Step=25575000, rewards=1924.9948868558317
Step=25576000, rewards=1916.6835591274773
Step=25577000, rewards=1962.176351998423
Step=25578000, rewards=1970.0542251479008
Step=25579000, rewards=1976.143947673792
Step=25580000, rewards=1957.7898848222367
Step=25581000, rewards=1978.1776099197677
Step=25582000, rewards=2031.3034668654852
Step=25583000, rewards=2010.4191620165873
Step=25584000, rewards=1887.7985069223587
Step=25585000, rewards=1996.3569806974654
Step=25586000, rewards=1911.0721456620709
Step=25587000, rewards=1907.07144781694

Step=25760000, rewards=1890.983029252466
Step=25761000, rewards=1910.5206395826824
Step=25762000, rewards=1907.7856958399525
Step=25763000, rewards=2060.624612731344
Step=25764000, rewards=1968.4286769619798
Step=25765000, rewards=2001.7477734985878
Step=25766000, rewards=1964.5163827284996
Step=25767000, rewards=1938.5186290861652
Step=25768000, rewards=1925.317832121165
Step=25769000, rewards=1992.0972884095922
Step=25770000, rewards=2003.3855283141454
Step=25771000, rewards=2011.8245766603598
Step=25772000, rewards=1955.6939721513768
Step=25773000, rewards=1980.2264776820982
Step=25774000, rewards=1902.0663150920489
Step=25775000, rewards=1910.1019091591743
Step=25776000, rewards=1986.520910005273
Step=25777000, rewards=1989.5102742526292
Step=25778000, rewards=1956.4158502869482
Step=25779000, rewards=2007.508801123548
Step=25780000, rewards=1951.091732021941
Step=25781000, rewards=1963.1058484140183
Step=25782000, rewards=2037.2798021917488
Step=25783000, rewards=1935.715207400631

Step=25956000, rewards=1997.144218596626
Step=25957000, rewards=2006.5786759318407
Step=25958000, rewards=1904.222101200158
Step=25959000, rewards=1930.9323095854825
Step=25960000, rewards=1996.8056898823563
Step=25961000, rewards=1926.1570463986711
Step=25962000, rewards=1973.7872388343449
Step=25963000, rewards=1941.2677100445783
Step=25964000, rewards=2021.319457498758
Step=25965000, rewards=1849.410619739338
Step=25966000, rewards=1929.4263300946654
Step=25967000, rewards=1947.1181485452514
Step=25968000, rewards=1911.8043663996557
Step=25969000, rewards=2007.5590668569153
Step=25970000, rewards=1975.3296070307936
Step=25971000, rewards=1890.1889484026888
Step=25972000, rewards=1872.1064645764318
Step=25973000, rewards=1852.3686055217242
Step=25974000, rewards=1962.426741376694
Step=25975000, rewards=1969.4257794849652
Step=25976000, rewards=1880.1764038955396
Step=25977000, rewards=1874.3063918812306
Step=25978000, rewards=2009.0742030971226
Step=25979000, rewards=1919.17211883327

Step=26151000, rewards=1993.391612954512
Step=26152000, rewards=1933.0345543287171
Step=26153000, rewards=2000.923820296808
Step=26154000, rewards=1898.0810005153562
Step=26155000, rewards=1924.6573220711623
Step=26156000, rewards=2029.1639728402388
Step=26157000, rewards=1969.0994223659407
Step=26158000, rewards=1948.8578398762995
Step=26159000, rewards=1973.6045820792897
Step=26160000, rewards=1920.3336282371927
Step=26161000, rewards=1873.5373233811
Step=26162000, rewards=1933.393214569612
Step=26163000, rewards=2027.6613955818957
Step=26164000, rewards=1981.9476894789123
Step=26165000, rewards=1889.872581921078
Step=26166000, rewards=1996.193492884875
Step=26167000, rewards=1989.842281728067
Step=26168000, rewards=1912.4180805850592
Step=26169000, rewards=1949.4563247495305
Step=26170000, rewards=1870.4929968084114
Step=26171000, rewards=1949.3855085797788
Step=26172000, rewards=2005.9834587667688
Step=26173000, rewards=2029.415039309463
Step=26174000, rewards=1935.3950308842939
St

Step=26347000, rewards=2028.9362483174011
Step=26348000, rewards=1945.6440142585527
Step=26349000, rewards=1868.7189027067036
Step=26350000, rewards=1952.7815948774432
Step=26351000, rewards=1928.63524107783
Step=26352000, rewards=1976.183533888409
Step=26353000, rewards=1910.4977881921461
Step=26354000, rewards=2056.710327298779
Step=26355000, rewards=1975.0752163637335
Step=26356000, rewards=1932.2143555532828
Step=26357000, rewards=1885.9140812180642
Step=26358000, rewards=1912.6096784199988
Step=26359000, rewards=1974.2410538081465
Step=26360000, rewards=1862.5059315108347
Step=26361000, rewards=1949.65409813098
Step=26362000, rewards=2019.089288143424
Step=26363000, rewards=1807.7595412941464
Step=26364000, rewards=1900.1787994693498
Step=26365000, rewards=1924.2628677883981
Step=26366000, rewards=1977.2626835827596
Step=26367000, rewards=2003.5600448708883
Step=26368000, rewards=1866.1656447536477
Step=26369000, rewards=1983.3104089851936
Step=26370000, rewards=1970.9516201698298

Step=26543000, rewards=1877.256510912179
Step=26544000, rewards=1905.931024288826
Step=26545000, rewards=1944.9970236430777
Step=26546000, rewards=1917.6671061567863
Step=26547000, rewards=2005.6225187687926
Step=26548000, rewards=1823.008613139368
Step=26549000, rewards=1962.109894439092
Step=26550000, rewards=1990.3515071020856
Step=26551000, rewards=1918.5706635305348
Step=26552000, rewards=1676.4191690374992
Step=26553000, rewards=1886.2418620639667
Step=26554000, rewards=1815.765269082122
Step=26555000, rewards=1915.0159161636077
Step=26556000, rewards=1880.559540295537
Step=26557000, rewards=1993.7573911904674
Step=26558000, rewards=1956.166275483244
Step=26559000, rewards=1786.1372628210318
Step=26560000, rewards=1933.1780173212414
Step=26561000, rewards=1932.9561986428348
Step=26562000, rewards=1999.0274046824493
Step=26563000, rewards=1873.9726664431203
Step=26564000, rewards=1843.8007379731162
Step=26565000, rewards=1891.2306804019454
Step=26566000, rewards=1916.850578469725


Step=26739000, rewards=1947.7566776844747
Step=26740000, rewards=1966.7923800869848
Step=26741000, rewards=1835.7696544394798
Step=26742000, rewards=1880.9704868064712
Step=26743000, rewards=1846.8360186391833
Step=26744000, rewards=1950.5836785520062
Step=26745000, rewards=1954.9190070328084
Step=26746000, rewards=1894.5392700929656
Step=26747000, rewards=1889.9851609919945
Step=26748000, rewards=1865.2832692088243
Step=26749000, rewards=1861.301076836909
Step=26750000, rewards=1886.766978412028
Step=26751000, rewards=1943.3739192051087
Step=26752000, rewards=1837.0387633019632
Step=26753000, rewards=1764.9676374759738
Step=26754000, rewards=1894.111889388176
Step=26755000, rewards=1825.8118846762366
Step=26756000, rewards=1778.247371575116
Step=26757000, rewards=1890.2739751797617
Step=26758000, rewards=2004.8165000042713
Step=26759000, rewards=1952.5700792433795
Step=26760000, rewards=1791.5832334034244
Step=26761000, rewards=1903.6792973395773
Step=26762000, rewards=1975.4174024518

Step=26935000, rewards=1793.092127346289
Step=26936000, rewards=1882.7493152311938
Step=26937000, rewards=1926.0579894924247
Step=26938000, rewards=1899.4435321161557
Step=26939000, rewards=1923.2830573807476
Step=26940000, rewards=1922.4458019946408
Step=26941000, rewards=1957.9670791453625
Step=26942000, rewards=1956.684502995374
Step=26943000, rewards=1803.4156051280013
Step=26944000, rewards=1940.251445578874
Step=26945000, rewards=1919.286570236528
Step=26946000, rewards=1902.7962295858342
Step=26947000, rewards=1931.564835113529
Step=26948000, rewards=1918.1676058064888
Step=26949000, rewards=1911.5915950999545
Step=26950000, rewards=1875.6326546672442
Step=26951000, rewards=1891.075219175354
Step=26952000, rewards=1915.4313799772553
Step=26953000, rewards=1747.0109443715742
Step=26954000, rewards=1961.3620077114922
Step=26955000, rewards=1831.5698604958911
Step=26956000, rewards=1901.840877659242
Step=26957000, rewards=1857.8491569001214
Step=26958000, rewards=1927.3180375204163

Step=27131000, rewards=1896.01864668191
Step=27132000, rewards=1863.1110892514266
Step=27133000, rewards=1828.6490503047016
Step=27134000, rewards=1875.6112003307665
Step=27135000, rewards=1854.6358720716353
Step=27136000, rewards=1901.9450487880972
Step=27137000, rewards=1897.2151685641152
Step=27138000, rewards=1737.7861036307927
Step=27139000, rewards=1909.1173622061422
Step=27140000, rewards=1722.1699475693956
Step=27141000, rewards=1827.2927481667857
Step=27142000, rewards=1809.4718901633366
Step=27143000, rewards=1797.8665715549164
Step=27144000, rewards=1893.6353515174617
Step=27145000, rewards=1876.5691817794373
Step=27146000, rewards=1812.0852123213594
Step=27147000, rewards=1763.7625648065848
Step=27148000, rewards=1783.577211284731
Step=27149000, rewards=1879.4891031021932
Step=27150000, rewards=1844.784501426338
Step=27151000, rewards=1722.8990297150222
Step=27152000, rewards=1944.4120619034854
Step=27153000, rewards=1810.203375635056
Step=27154000, rewards=1869.81644997615

Step=27327000, rewards=1979.2598727244551
Step=27328000, rewards=1878.5664107876569
Step=27329000, rewards=1766.9701249233751
Step=27330000, rewards=1881.9899766634433
Step=27331000, rewards=1792.5403873908735
Step=27332000, rewards=1945.6580043394279
Step=27333000, rewards=1794.1926124619138
Step=27334000, rewards=1813.3642821269875
Step=27335000, rewards=1881.9053193879577
Step=27336000, rewards=1983.2370705500944
Step=27337000, rewards=1944.6084789455017
Step=27338000, rewards=1703.9973729907222
Step=27339000, rewards=1850.9407599761014
Step=27340000, rewards=1749.367530977256
Step=27341000, rewards=1861.3617701047735
Step=27342000, rewards=1745.2329273542596
Step=27343000, rewards=1801.7622734860424
Step=27344000, rewards=1985.6399022650774
Step=27345000, rewards=1953.772746560179
Step=27346000, rewards=1920.2423279688892
Step=27347000, rewards=1879.4911503625965
Step=27348000, rewards=1927.9363994339676
Step=27349000, rewards=1978.785625461228
Step=27350000, rewards=1870.971438129

Step=27522000, rewards=1930.8369551265484
Step=27523000, rewards=1850.7229585487498
Step=27524000, rewards=1944.275373383903
Step=27525000, rewards=1741.272397270911
Step=27526000, rewards=1915.9449152919985
Step=27527000, rewards=1852.6941087863684
Step=27528000, rewards=1804.9441916803235
Step=27529000, rewards=1859.4470351937714
Step=27530000, rewards=1971.6922694362495
Step=27531000, rewards=1942.4955443229703
Step=27532000, rewards=1726.3839555148136
Step=27533000, rewards=1862.6989897826588
Step=27534000, rewards=1791.0497589156867
Step=27535000, rewards=1854.6274175092465
Step=27536000, rewards=1930.6415662089414
Step=27537000, rewards=1978.7321381353665
Step=27538000, rewards=1779.5326089328287
Step=27539000, rewards=1859.0730011961864
Step=27540000, rewards=1853.2622042496425
Step=27541000, rewards=1920.463814118037
Step=27542000, rewards=1838.2371397592924
Step=27543000, rewards=1749.6397402999614
Step=27544000, rewards=1895.2630085302771
Step=27545000, rewards=1854.800970897

Step=27718000, rewards=1976.321483700404
Step=27719000, rewards=1904.5078122680488
Step=27720000, rewards=1950.0557310160398
Step=27721000, rewards=1958.02460846783
Step=27722000, rewards=1921.3400858586717
Step=27723000, rewards=1926.621698961777
Step=27724000, rewards=1898.0691754076602
Step=27725000, rewards=1857.1863404872697
Step=27726000, rewards=1932.5187113835043
Step=27727000, rewards=1837.2496691693134
Step=27728000, rewards=1918.7341957957071
Step=27729000, rewards=2008.5871594390721
Step=27730000, rewards=1890.756528798744
Step=27731000, rewards=1796.9418536492308
Step=27732000, rewards=1965.861794373259
Step=27733000, rewards=1827.5987264309733
Step=27734000, rewards=1901.4151212620566
Step=27735000, rewards=1897.8184193327945
Step=27736000, rewards=1894.1654241179303
Step=27737000, rewards=1928.5169067540917
Step=27738000, rewards=1956.202823572457
Step=27739000, rewards=1894.1239064577603
Step=27740000, rewards=1981.5634866320868
Step=27741000, rewards=1877.1001764350383

Step=27914000, rewards=1930.487627654271
Step=27915000, rewards=1697.1475013439574
Step=27916000, rewards=1842.7691541765287
Step=27917000, rewards=1905.9027971015662
Step=27918000, rewards=1902.181926371779
Step=27919000, rewards=1859.0219407991922
Step=27920000, rewards=1963.9141103149525
Step=27921000, rewards=1805.448753700327
Step=27922000, rewards=1894.5972272882536
Step=27923000, rewards=1896.4518133478032
Step=27924000, rewards=1979.990896415216
Step=27925000, rewards=1941.9310011269515
Step=27926000, rewards=1941.0134129050389
Step=27927000, rewards=1917.5369445347528
Step=27928000, rewards=1877.0738168625658
Step=27929000, rewards=1888.4692251779568
Step=27930000, rewards=1906.9399024254235
Step=27931000, rewards=1799.7075669022072
Step=27932000, rewards=1965.1949366292347
Step=27933000, rewards=1852.5732288177765
Step=27934000, rewards=1811.7328772500812
Step=27935000, rewards=1915.1515582154202
Step=27936000, rewards=1973.1352697623568
Step=27937000, rewards=1915.2028008264

Step=28109000, rewards=1918.4410976070344
Step=28110000, rewards=1851.9169420755566
Step=28111000, rewards=1760.7836733137697
Step=28112000, rewards=1972.8708807902972
Step=28113000, rewards=1750.3521439508208
Step=28114000, rewards=1868.8622124115598
Step=28115000, rewards=1972.1885244944206
Step=28116000, rewards=1929.4108658063185
Step=28117000, rewards=1893.2864657332816
Step=28118000, rewards=1792.9973616839793
Step=28119000, rewards=1843.7170893167029
Step=28120000, rewards=1891.9171022464118
Step=28121000, rewards=1923.0886619083867
Step=28122000, rewards=1984.5754360652672
Step=28123000, rewards=1896.3118046339378
Step=28124000, rewards=1862.7273897735581
Step=28125000, rewards=1930.3050274625923
Step=28126000, rewards=1908.694857502868
Step=28127000, rewards=1915.7366268519595
Step=28128000, rewards=1855.2684779159229
Step=28129000, rewards=1825.4676369770025
Step=28130000, rewards=1920.2448911997235
Step=28131000, rewards=1949.1257069283954
Step=28132000, rewards=1916.9581503

Step=28305000, rewards=1863.0042708346589
Step=28306000, rewards=1848.7066993134006
Step=28307000, rewards=1984.2360206508833
Step=28308000, rewards=1906.9600167248582
Step=28309000, rewards=1857.3290701396716
Step=28310000, rewards=1911.6098725611855
Step=28311000, rewards=1950.5829905683192
Step=28312000, rewards=1956.9226163513479
Step=28313000, rewards=1929.9469630557464
Step=28314000, rewards=1933.166158689689
Step=28315000, rewards=1921.6180621514745
Step=28316000, rewards=1739.3376575238265
Step=28317000, rewards=1850.1965089648468
Step=28318000, rewards=1890.1999588161339
Step=28319000, rewards=1919.9494515964366
Step=28320000, rewards=1868.9183568535868
Step=28321000, rewards=1890.322318581614
Step=28322000, rewards=1920.1718406907814
Step=28323000, rewards=1875.7943830441382
Step=28324000, rewards=1957.3440431966512
Step=28325000, rewards=1852.6441519028142
Step=28326000, rewards=1976.052935876751
Step=28327000, rewards=1938.528121978974
Step=28328000, rewards=1942.2275048516

Step=28501000, rewards=1998.8079057524826
Step=28502000, rewards=1952.0741199410986
Step=28503000, rewards=1881.1118956299276
Step=28504000, rewards=1842.706205785314
Step=28505000, rewards=1953.2812866148768
Step=28506000, rewards=1974.363919010311
Step=28507000, rewards=1888.3491544535898
Step=28508000, rewards=1938.7247377026854
Step=28509000, rewards=1954.4938584013378
Step=28510000, rewards=1858.9393896772483
Step=28511000, rewards=1983.841649192605
Step=28512000, rewards=1976.4034983498125
Step=28513000, rewards=1944.4235363043194
Step=28514000, rewards=1794.5027387806979
Step=28515000, rewards=1975.910244501953
Step=28516000, rewards=1887.9351084815603
Step=28517000, rewards=1972.95866278984
Step=28518000, rewards=1936.5248696002297
Step=28519000, rewards=2006.115920621935
Step=28520000, rewards=1958.9311170250949
Step=28521000, rewards=1940.8718762213632
Step=28522000, rewards=1942.2041455431881
Step=28523000, rewards=1958.8686343493746
Step=28524000, rewards=1955.0990820756485

Step=28697000, rewards=1891.8091974431675
Step=28698000, rewards=1625.3252567817572
Step=28699000, rewards=1920.8440755022484
Step=28700000, rewards=1937.6791156695126
Step=28701000, rewards=1911.2049327888974
Step=28702000, rewards=1917.8674732171305
Step=28703000, rewards=1892.0530657788302
Step=28704000, rewards=1914.0391351465296
Step=28705000, rewards=1900.5966923853564
Step=28706000, rewards=1920.0636819194563
Step=28707000, rewards=1856.2145479097935
Step=28708000, rewards=1916.5685168083319
Step=28709000, rewards=1817.98843316509
Step=28710000, rewards=1881.5325137792584
Step=28711000, rewards=1854.913678648124
Step=28712000, rewards=1902.874483285862
Step=28713000, rewards=1866.9375809309092
Step=28714000, rewards=1918.7822103025449
Step=28715000, rewards=1928.3420411696495
Step=28716000, rewards=1867.1681196764455
Step=28717000, rewards=1973.6672600772351
Step=28718000, rewards=1920.7369181584506
Step=28719000, rewards=1877.72290776125
Step=28720000, rewards=1905.570137747566

Step=28893000, rewards=1851.577146239571
Step=28894000, rewards=1845.4815175261967
Step=28895000, rewards=1874.0275664783308
Step=28896000, rewards=1934.3858412805737
Step=28897000, rewards=1922.8271797575599
Step=28898000, rewards=1989.0227743810478
Step=28899000, rewards=1931.186279830722
Step=28900000, rewards=1938.7803264942215
Step=28901000, rewards=1998.3596855159285
Step=28902000, rewards=1971.0278622404396
Step=28903000, rewards=1976.6248512273974
Step=28904000, rewards=1928.7078339441643
Step=28905000, rewards=1945.0185135408083
Step=28906000, rewards=1868.9020241935723
Step=28907000, rewards=1940.5221915373775
Step=28908000, rewards=1881.6463833010152
Step=28909000, rewards=1935.520891632065
Step=28910000, rewards=1989.013565065607
Step=28911000, rewards=1964.3920963571036
Step=28912000, rewards=1891.7450704834603
Step=28913000, rewards=1812.8761216943067
Step=28914000, rewards=1854.0314526129025
Step=28915000, rewards=1733.251702362407
Step=28916000, rewards=1777.01708426366

Step=29089000, rewards=1885.8554738033758
Step=29090000, rewards=2016.7780416272367
Step=29091000, rewards=1981.6949341415884
Step=29092000, rewards=1971.6976581282124
Step=29093000, rewards=1944.5403600523211
Step=29094000, rewards=1952.5048725437903
Step=29095000, rewards=1952.5137131523227
Step=29096000, rewards=2038.5192591267346
Step=29097000, rewards=1943.8101133372313
Step=29098000, rewards=1926.7091883689468
Step=29099000, rewards=1947.4374215253092
Step=29100000, rewards=1828.492160823701
Step=29101000, rewards=1981.419313700003
Step=29102000, rewards=1836.8899589425384
Step=29103000, rewards=1949.6904542426191
Step=29104000, rewards=1994.2513164927366
Step=29105000, rewards=1909.039379526392
Step=29106000, rewards=1976.1269816472966
Step=29107000, rewards=2001.0744313498442
Step=29108000, rewards=1855.886054749115
Step=29109000, rewards=1995.155429696358
Step=29110000, rewards=1929.464587815649
Step=29111000, rewards=1963.188831226977
Step=29112000, rewards=1976.876798162498


Step=29285000, rewards=1908.0426677123332
Step=29286000, rewards=1891.5294773400772
Step=29287000, rewards=1938.4439892069013
Step=29288000, rewards=1889.772318488239
Step=29289000, rewards=1816.9903180581596
Step=29290000, rewards=1931.8936772508928
Step=29291000, rewards=1860.48487737581
Step=29292000, rewards=1976.6504017398172
Step=29293000, rewards=1937.4818486953839
Step=29294000, rewards=1950.5266458655399
Step=29295000, rewards=1999.6830047759142
Step=29296000, rewards=1959.5625666210556
Step=29297000, rewards=1900.3669190815808
Step=29298000, rewards=1993.1068039459697
Step=29299000, rewards=1910.2996642491662
Step=29300000, rewards=2010.3689916673873
Step=29301000, rewards=1953.2994962341786
Step=29302000, rewards=2024.5807390035707
Step=29303000, rewards=1984.3796513481575
Step=29304000, rewards=1836.5784815972688
Step=29305000, rewards=1912.3824694864072
Step=29306000, rewards=1969.471339659226
Step=29307000, rewards=1932.9377972137765
Step=29308000, rewards=1969.0411112919

Step=29481000, rewards=1890.8234948111499
Step=29482000, rewards=1805.8451023420405
Step=29483000, rewards=1896.103904548482
Step=29484000, rewards=1794.6897394191801
Step=29485000, rewards=1903.696727786607
Step=29486000, rewards=1824.302424424023
Step=29487000, rewards=1846.0724456268156
Step=29488000, rewards=1819.0747257682035
Step=29489000, rewards=1860.8058749605905
Step=29490000, rewards=1978.1939132233626
Step=29491000, rewards=1819.235810149318
Step=29492000, rewards=1706.526621628615
Step=29493000, rewards=1827.3470203714105
Step=29494000, rewards=1842.941114574239
Step=29495000, rewards=1956.2150679572985
Step=29496000, rewards=1872.2694638760913
Step=29497000, rewards=1897.86444608937
Step=29498000, rewards=1729.8866729894376
Step=29499000, rewards=1884.4130712158214
Step=29500000, rewards=1786.7712350505037
Step=29501000, rewards=1740.250206965706
Step=29502000, rewards=1756.9996440083767
Step=29503000, rewards=1880.0510750520627
Step=29504000, rewards=1880.4005953668825
S

Step=29677000, rewards=1905.082366467539
Step=29678000, rewards=1772.0373793590156
Step=29679000, rewards=1821.661699934928
Step=29680000, rewards=1885.517660777886
Step=29681000, rewards=1929.0539072333768
Step=29682000, rewards=1909.9765045048355
Step=29683000, rewards=1816.9153454087543
Step=29684000, rewards=1819.0674677571658
Step=29685000, rewards=1954.575550297336
Step=29686000, rewards=1916.7588673167854
Step=29687000, rewards=1768.9941652252485
Step=29688000, rewards=1844.8608798106359
Step=29689000, rewards=1852.400583118598
Step=29690000, rewards=1907.2047357469883
Step=29691000, rewards=1932.507251285172
Step=29692000, rewards=2006.036249091125
Step=29693000, rewards=1752.7657436865013
Step=29694000, rewards=1828.238744359859
Step=29695000, rewards=1939.1988065855753
Step=29696000, rewards=1831.879658999766
Step=29697000, rewards=2027.2356287552125
Step=29698000, rewards=1807.6118639216447
Step=29699000, rewards=1857.9079280296742
Step=29700000, rewards=1808.3666581804728
S

Step=29873000, rewards=1841.709440394412
Step=29874000, rewards=1933.5691381075944
Step=29875000, rewards=1933.462327895829
Step=29876000, rewards=1937.5585935469583
Step=29877000, rewards=1891.3516227443881
Step=29878000, rewards=1938.8117333294758
Step=29879000, rewards=1856.248614348615
Step=29880000, rewards=1922.7790336671012
Step=29881000, rewards=1922.6845852597094
Step=29882000, rewards=1868.819110626201
Step=29883000, rewards=1919.9981856872182
Step=29884000, rewards=1908.4262040362482
Step=29885000, rewards=1862.8888534430805
Step=29886000, rewards=1896.6870587756669
Step=29887000, rewards=1947.8288152017744
Step=29888000, rewards=1832.4778719178516
Step=29889000, rewards=1751.7160337087976
Step=29890000, rewards=1795.9357247418639
Step=29891000, rewards=1934.4310895148462
Step=29892000, rewards=1770.3773319320057
Step=29893000, rewards=1914.4923071281873
Step=29894000, rewards=1849.9837304663345
Step=29895000, rewards=1870.5610187479056
Step=29896000, rewards=1819.8685692938

Step=30069000, rewards=2008.055442182051
Step=30070000, rewards=1949.0432348655702
Step=30071000, rewards=1999.209712666012
Step=30072000, rewards=1883.9242438745266
Step=30073000, rewards=1976.7988913569802
Step=30074000, rewards=1844.5144921863086
Step=30075000, rewards=1909.6578867665662
Step=30076000, rewards=1944.0800027566615
Step=30077000, rewards=1953.4930141109432
Step=30078000, rewards=1988.10823303288
Step=30079000, rewards=1885.5095845539565
Step=30080000, rewards=2091.392180621149
Step=30081000, rewards=1948.8888844089388
Step=30082000, rewards=1896.9041200358608
Step=30083000, rewards=2028.7911705821
Step=30084000, rewards=1921.9023326430072
Step=30085000, rewards=1925.2651301605881
Step=30086000, rewards=1958.4185104693988
Step=30087000, rewards=1900.2397972607578
Step=30088000, rewards=2016.8799279008124
Step=30089000, rewards=1933.238567281592
Step=30090000, rewards=2006.5344276831813
Step=30091000, rewards=1957.7084371918172
Step=30092000, rewards=1956.938943685227
St

Step=30265000, rewards=1769.3998274142311
Step=30266000, rewards=1934.7577485634931
Step=30267000, rewards=1901.6633096176613
Step=30268000, rewards=1914.558275446389
Step=30269000, rewards=1885.4329064865408
Step=30270000, rewards=1825.3023215971796
Step=30271000, rewards=1897.9331063996335
Step=30272000, rewards=1941.0427159635199
Step=30273000, rewards=1860.571486724278
Step=30274000, rewards=1909.5815043418118
Step=30275000, rewards=1729.3181332528588
Step=30276000, rewards=1797.7944044756237
Step=30277000, rewards=1861.3101059423302
Step=30278000, rewards=1853.8760127451922
Step=30279000, rewards=1911.2870129336757
Step=30280000, rewards=1845.3044109745597
Step=30281000, rewards=1867.3479715142512
Step=30282000, rewards=1874.447084233248
Step=30283000, rewards=1832.3773721760183
Step=30284000, rewards=1866.1408463946989
Step=30285000, rewards=1736.6075800058309
Step=30286000, rewards=1889.708072221528
Step=30287000, rewards=1885.2934341877606
Step=30288000, rewards=1800.1872531832

Step=30461000, rewards=1938.4550947726773
Step=30462000, rewards=1886.7575971747235
Step=30463000, rewards=1887.433886908481
Step=30464000, rewards=1913.7456920520863
Step=30465000, rewards=1926.2934813122133
Step=30466000, rewards=1756.57561519491
Step=30467000, rewards=1856.355372886513
Step=30468000, rewards=1840.9381526398079
Step=30469000, rewards=1893.5601841624264
Step=30470000, rewards=1861.7690245090805
Step=30471000, rewards=1870.6938028394336
Step=30472000, rewards=1808.9460659649314
Step=30473000, rewards=1945.776557465061
Step=30474000, rewards=1896.684185417747
Step=30475000, rewards=1931.6996104300626
Step=30476000, rewards=1904.7610955703062
Step=30477000, rewards=1928.233192668377
Step=30478000, rewards=1869.2312466448202
Step=30479000, rewards=1898.38839164775
Step=30480000, rewards=1972.5271067032954
Step=30481000, rewards=1907.3500178788656
Step=30482000, rewards=1866.926461228204
Step=30483000, rewards=1864.2450040956066
Step=30484000, rewards=1923.5943300669178
St

Step=30657000, rewards=1937.2571335040363
Step=30658000, rewards=1786.4319455348507
Step=30659000, rewards=1970.2141487207816
Step=30660000, rewards=1955.367566356524
Step=30661000, rewards=1895.5387408959846
Step=30662000, rewards=1873.9522564371616
Step=30663000, rewards=1945.9888885630073
Step=30664000, rewards=1916.8838810694097
Step=30665000, rewards=1957.1477800978391
Step=30666000, rewards=1912.2653084321532
Step=30667000, rewards=2011.7362649950642
Step=30668000, rewards=1981.3032620055833
Step=30669000, rewards=1962.601678626908
Step=30670000, rewards=1855.0080845365867
Step=30671000, rewards=1925.2734373933242
Step=30672000, rewards=1952.0359681028774
Step=30673000, rewards=1986.2109811909575
Step=30674000, rewards=1909.137692831546
Step=30675000, rewards=1912.3251103877817
Step=30676000, rewards=1877.9366618864583
Step=30677000, rewards=1973.477125379057
Step=30678000, rewards=1982.2959921346217
Step=30679000, rewards=1911.1427488552656
Step=30680000, rewards=2019.7349108436

KeyboardInterrupt: 

### Tensorboard Monitor

In [20]:
%load_ext tensorboard
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 12212), started 6:50:27 ago. (Use '!kill 12212' to kill it.)

### Tensorboard logs snap:
After around 8 hours.
![img](./runs/tensorboardsnapshot.JPG)

### Video
The video with running as python fine

In [27]:
# show video
from IPython.display import HTML
import os

video_names = list(
    filter(lambda s: s.endswith(".mp4"), os.listdir("./videos/")))
HTML("""
<video width="640" height="480" controls>
  <source src="{}" type="video/mp4">
</video>
""".format("./videos/"+video_names[-1]))  # this may or may not be _last_ video. Try other indices

In [28]:
env.close()