In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '4'  # or any {'0', '1', '2'}
from imagenet_dataset import get_dataset
from retina_env import RetinaEnv, calculate_retinal_filter
from rl_networks import create_actor_model, create_critic_model, policy
from rl_core import Buffer, update_target
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

import types
config = types.SimpleNamespace()
config.batch_size = 32
config.margin = 20
config.image_h = 224
config.image_w = 224
config.image_hm = config.image_h+2*config.margin
config.image_wm = config.image_w+2*config.margin
config.foveate = None
config.do_grayscale = True
config.history_length = 16
config.t_ignore = 16
config.t_max =50
config.motion_mode = 'velocity'
config.use_dones = True

config.gym_mode = False
t_vec = np.linspace(0,150,16)

balanced_filter = calculate_retinal_filter(t_vec, R=1.0)
config.filter = balanced_filter.reshape([1,1,-1,1])
config.min_freq = 1
config.max_freq = 13
config.action_upper_bound = np.array([2.0, 2.0])

dataset_dir = '/home/bnapp/datasets/tensorflow_datasets/imagenet2012/5.0.0/'

def epsilon_scheduler(episode, floor_episode=200, epsilon_floor=0.1):
    if episode < floor_episode:
        return 1.-(1.-epsilon_floor)*episode/floor_episode
    else:
        return epsilon_floor



env =  BatchMountainCarContinuous(batch_size=config.batch_size)


if config.gym_mode:
    num_states = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]

    upper_bound = env.action_space.high[0]
    lower_bound = env.action_space.low[0]
else:
    num_states = env.observation_size
    num_actions = env.action_size
    upper_bound = env.action_upper_bound
    lower_bound = env.action_lower_bound

# You might want to adjust the hyperparameters
actor_lr = 0.0001
critic_lr = 0.001
gamma = 0.99
tau = 0.005

buffer_capacity = 10000



# Create actor and critic networks
actor_model = create_actor_model(env.image_h, env.image_w, env.spectral_density_size, env.location_history_size, env.timestep_size, env.action_size)
critic_model = create_critic_model(env.image_h, env.image_w, env.spectral_density_size, env.location_history_size, env.timestep_size, env.action_size)

# Create target actor and critic networks
target_actor = create_actor_model(env.image_h, env.image_w, env.spectral_density_size, env.location_history_size, env.timestep_size, env.action_size)
target_critic = create_critic_model(env.image_h, env.image_w, env.spectral_density_size, env.location_history_size, env.timestep_size, env.action_size)

# Making the weights equal initially
target_actor.set_weights(actor_model.get_weights())
target_critic.set_weights(critic_model.get_weights())

# Learning rate for actor-critic models
critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

# Experience replay buffer
buffer = Buffer(buffer_capacity, config.batch_size, num_states=num_states, num_actions=num_actions,
                state_reshape_fn=env.unflatten_observation, use_dones=config.use_dones)

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
# Training loop
reward_records = []
epsilon_records = []
action_mean_records = []
action_var_records = []
episodes = 10000
for ep in range(episodes):
    prev_state = env.reset(images=images)
    episodic_reward = 0
    epsilon = epsilon_scheduler(ep, floor_episode=1000)

    while True:
        # tf_prev_state = tf.expand_dims(tf.convert_to_tensor(prev_state), 0)
        
        deterministic_action = policy(env.unflatten_observation(prev_state), actor_model, lower_bound, upper_bound)
        random_action = -2+4*np.random.uniform(size=(config.batch_size,2))

        if env.warmup_done:
            action = epsilon*random_action + (1-epsilon)*deterministic_action
        else:
            action = random_action

        # Recieve state and reward from environment
        state, reward, done, info = env.step(10*action)
        
        if config.use_dones:
            buffer.record((prev_state, action, reward, state, done))
        else:   
            buffer.record((prev_state, action, reward, state))

        episodic_reward += reward

        buffer.learn(actor_model, target_actor, critic_model, target_critic, actor_optimizer, critic_optimizer, gamma, tau)
        update_target(target_actor.variables, actor_model.variables, tau)
        update_target(target_critic.variables, critic_model.variables, tau)

        action_mean_records.append(deterministic_action.numpy().mean(axis=0))
        action_var_records.append(deterministic_action.numpy().var(axis=0))
        
        if done:
            break

        #prev_state = state avoid assingment by reference:
        prev_state = np.copy(state)
#         print('debug action: ', action[0])

    print(f"Episode * {ep} * exploration epsilon {epsilon} * Episodic Reward is ==> {episodic_reward.numpy().mean()}")
    print("action means and variances at step -10:", action_mean_records[-10],action_var_records[-10])
    print("action means and variances at step -5:", action_mean_records[-5],action_var_records[-5])
    reward_records.append(episodic_reward.numpy().mean())
    epsilon_records.append(epsilon)

Episode * 0 * exploration epsilon 1.0 * Episodic Reward is ==> -12.085285186767578
action means and variances at step -10: [ 0.1957888 -0.4297591] [0.00220437 0.00051978]
action means and variances at step -5: [ 0.18302326 -0.5028217 ] [0.00221128 0.0005898 ]
Episode * 1 * exploration epsilon 0.9991 * Episodic Reward is ==> -11.289146423339844
action means and variances at step -10: [-0.3747945 -0.5470153] [0.00062857 0.00033525]
action means and variances at step -5: [-0.47265172 -0.52397805] [0.00052581 0.00020283]
Episode * 2 * exploration epsilon 0.9982 * Episodic Reward is ==> -11.516767501831055
action means and variances at step -10: [-0.59991205 -0.74750835] [0.00025864 0.00034701]
action means and variances at step -5: [-0.63800186 -0.76517045] [0.0002392 0.0003766]
Episode * 3 * exploration epsilon 0.9973 * Episodic Reward is ==> -12.017725944519043
action means and variances at step -10: [-0.7687416 -0.9391137] [0.00036351 0.00055186]
action means and variances at step -5: [

Episode * 16 * exploration epsilon 0.9856 * Episodic Reward is ==> -11.727266311645508
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 17 * exploration epsilon 0.9847 * Episodic Reward is ==> -11.993698120117188
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 18 * exploration epsilon 0.9838 * Episodic Reward is ==> -11.47765827178955
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 19 * exploration epsilon 0.9829 * Episodic Reward is ==> -11.764754295349121
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 20 * exploration epsilon 0.982 * Episodic Reward is ==> -11.477373123168945
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] 

Episode * 34 * exploration epsilon 0.9694 * Episodic Reward is ==> -12.105091094970703
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 35 * exploration epsilon 0.9685 * Episodic Reward is ==> -11.543885231018066
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 36 * exploration epsilon 0.9676 * Episodic Reward is ==> -12.195247650146484
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 37 * exploration epsilon 0.9667 * Episodic Reward is ==> -11.67061710357666
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 38 * exploration epsilon 0.9658 * Episodic Reward is ==> -11.701221466064453
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-1.91844

Episode * 52 * exploration epsilon 0.9532 * Episodic Reward is ==> -11.700263977050781
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 53 * exploration epsilon 0.9523 * Episodic Reward is ==> -11.548986434936523
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 54 * exploration epsilon 0.9514 * Episodic Reward is ==> -11.211820602416992
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 55 * exploration epsilon 0.9505 * Episodic Reward is ==> -11.544827461242676
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 56 * exploration epsilon 0.9496 * Episodic Reward is ==> -11.676017761230469
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.

Episode * 70 * exploration epsilon 0.937 * Episodic Reward is ==> -11.715580940246582
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 71 * exploration epsilon 0.9361 * Episodic Reward is ==> -11.844799041748047
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 72 * exploration epsilon 0.9352 * Episodic Reward is ==> -11.811332702636719
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 73 * exploration epsilon 0.9343 * Episodic Reward is ==> -11.816896438598633
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.] [0. 0.]
Episode * 74 * exploration epsilon 0.9334 * Episodic Reward is ==> -11.015264511108398
action means and variances at step -10: [-2. -2.] [0. 0.]
action means and variances at step -5: [-2. -2.]

Episode * 87 * exploration epsilon 0.9217 * Episodic Reward is ==> -11.522330284118652
action means and variances at step -10: [ 0.05299662 -2.        ] [8.828745e-06 0.000000e+00]
action means and variances at step -5: [ 0.04248764 -2.        ] [1.0314303e-05 0.0000000e+00]
Episode * 88 * exploration epsilon 0.9208 * Episodic Reward is ==> -11.452474594116211
action means and variances at step -10: [ 0.03298045 -2.        ] [2.0144988e-05 0.0000000e+00]
action means and variances at step -5: [ 0.0949224 -2.       ] [7.476223e-06 0.000000e+00]
Episode * 89 * exploration epsilon 0.9198999999999999 * Episodic Reward is ==> -11.574450492858887
action means and variances at step -10: [ 0.02240251 -2.        ] [2.5663436e-05 0.0000000e+00]
action means and variances at step -5: [ 0.00799531 -2.        ] [2.330365e-05 0.000000e+00]
Episode * 90 * exploration epsilon 0.919 * Episodic Reward is ==> -11.663267135620117
action means and variances at step -10: [ 0.01512686 -2.        ] [4.062711e

Episode * 103 * exploration epsilon 0.9073 * Episodic Reward is ==> -11.321850776672363
action means and variances at step -10: [ 0.44129097 -2.        ] [1.3957833e-05 0.0000000e+00]
action means and variances at step -5: [ 0.43848622 -2.        ] [9.046789e-06 0.000000e+00]
Episode * 104 * exploration epsilon 0.9064 * Episodic Reward is ==> -11.679078102111816
action means and variances at step -10: [ 0.18266824 -2.        ] [1.649458e-05 0.000000e+00]
action means and variances at step -5: [ 0.17325577 -2.        ] [9.590587e-06 0.000000e+00]
Episode * 105 * exploration epsilon 0.9055 * Episodic Reward is ==> -11.714622497558594
action means and variances at step -10: [ 0.22902715 -2.        ] [1.1113764e-05 0.0000000e+00]
action means and variances at step -5: [ 0.24155693 -2.        ] [1.4504758e-05 0.0000000e+00]
Episode * 106 * exploration epsilon 0.9046 * Episodic Reward is ==> -11.81828498840332
action means and variances at step -10: [ 0.15600106 -2.        ] [4.4120497e-06 0

Episode * 119 * exploration epsilon 0.8929 * Episodic Reward is ==> -11.555164337158203
action means and variances at step -10: [ 0.07965539 -2.        ] [3.2125856e-06 0.0000000e+00]
action means and variances at step -5: [ 0.09111769 -2.        ] [2.2522668e-06 0.0000000e+00]
Episode * 120 * exploration epsilon 0.892 * Episodic Reward is ==> -11.50143051147461
action means and variances at step -10: [ 0.09407436 -2.        ] [4.9823343e-06 0.0000000e+00]
action means and variances at step -5: [ 0.11894236 -2.        ] [4.4108024e-06 0.0000000e+00]
Episode * 121 * exploration epsilon 0.8911 * Episodic Reward is ==> -11.578878402709961
action means and variances at step -10: [ 0.45552933 -2.        ] [2.6222233e-05 0.0000000e+00]
action means and variances at step -5: [ 0.39687002 -2.        ] [2.2283435e-05 0.0000000e+00]
Episode * 122 * exploration epsilon 0.8902 * Episodic Reward is ==> -12.106035232543945
action means and variances at step -10: [ 0.44766653 -2.        ] [1.7341654e

Episode * 135 * exploration epsilon 0.8785000000000001 * Episodic Reward is ==> -11.759062767028809
action means and variances at step -10: [ 0.68578535 -2.        ] [8.83889e-06 0.00000e+00]
action means and variances at step -5: [ 0.69393814 -2.        ] [1.10461915e-05 0.00000000e+00]
Episode * 136 * exploration epsilon 0.8775999999999999 * Episodic Reward is ==> -11.601463317871094
action means and variances at step -10: [ 1.1035392 -2.       ] [2.1456333e-05 0.0000000e+00]
action means and variances at step -5: [ 1.385614 -2.      ] [7.5844444e-05 0.0000000e+00]
Episode * 137 * exploration epsilon 0.8767 * Episodic Reward is ==> -11.493741989135742
action means and variances at step -10: [ 1.450594 -2.      ] [5.2756008e-05 0.0000000e+00]
action means and variances at step -5: [ 1.5198454 -2.       ] [1.5943771e-05 0.0000000e+00]
Episode * 138 * exploration epsilon 0.8758 * Episodic Reward is ==> -11.767711639404297
action means and variances at step -10: [ 1.4609115 -2.       ] [

Episode * 151 * exploration epsilon 0.8641 * Episodic Reward is ==> -11.378840446472168
action means and variances at step -10: [ 1.3922825 -2.       ] [5.154903e-06 0.000000e+00]
action means and variances at step -5: [ 1.3704644 -2.       ] [1.5453954e-05 0.0000000e+00]
Episode * 152 * exploration epsilon 0.8632 * Episodic Reward is ==> -11.664072036743164
action means and variances at step -10: [ 1.5391712 -2.       ] [3.1512347e-05 0.0000000e+00]
action means and variances at step -5: [ 1.5832916 -2.       ] [4.4376815e-05 0.0000000e+00]
Episode * 153 * exploration epsilon 0.8623 * Episodic Reward is ==> -11.942819595336914
action means and variances at step -10: [ 1.6131777 -2.       ] [2.7589344e-05 0.0000000e+00]
action means and variances at step -5: [ 1.5980971 -2.       ] [1.3947822e-05 0.0000000e+00]
Episode * 154 * exploration epsilon 0.8613999999999999 * Episodic Reward is ==> -11.727705001831055
action means and variances at step -10: [ 1.613717 -2.      ] [1.1338745e-05 

Episode * 167 * exploration epsilon 0.8497 * Episodic Reward is ==> -11.630290985107422
action means and variances at step -10: [ 1.4026021 -2.       ] [1.1113341e-05 0.0000000e+00]
action means and variances at step -5: [ 1.3300027 -2.       ] [5.3353965e-06 0.0000000e+00]
Episode * 168 * exploration epsilon 0.8488 * Episodic Reward is ==> -11.115232467651367
action means and variances at step -10: [ 1.3300626 -2.       ] [8.006901e-06 0.000000e+00]
action means and variances at step -5: [ 1.3661679 -2.       ] [8.039699e-06 0.000000e+00]
Episode * 169 * exploration epsilon 0.8479 * Episodic Reward is ==> -11.68170166015625
action means and variances at step -10: [ 1.3837794 -2.       ] [9.529383e-06 0.000000e+00]
action means and variances at step -5: [ 1.3516368 -2.       ] [6.891375e-06 0.000000e+00]
Episode * 170 * exploration epsilon 0.847 * Episodic Reward is ==> -11.96133804321289
action means and variances at step -10: [ 1.4169778 -2.       ] [7.650649e-06 0.000000e+00]
action

Episode * 183 * exploration epsilon 0.8352999999999999 * Episodic Reward is ==> -11.271961212158203
action means and variances at step -10: [ 1.293413 -2.      ] [7.727178e-06 0.000000e+00]
action means and variances at step -5: [ 1.2765368 -2.       ] [9.575524e-06 0.000000e+00]
Episode * 184 * exploration epsilon 0.8344 * Episodic Reward is ==> -11.816490173339844
action means and variances at step -10: [ 1.309581 -2.      ] [2.6044822e-06 0.0000000e+00]
action means and variances at step -5: [ 1.3305112 -2.       ] [2.604632e-06 0.000000e+00]
Episode * 185 * exploration epsilon 0.8335 * Episodic Reward is ==> -11.701468467712402
action means and variances at step -10: [ 1.382561 -2.      ] [4.5400843e-06 0.0000000e+00]
action means and variances at step -5: [ 1.3875693 -2.       ] [3.8133494e-06 0.0000000e+00]
Episode * 186 * exploration epsilon 0.8326 * Episodic Reward is ==> -11.488277435302734
action means and variances at step -10: [ 1.3183268 -2.       ] [4.8362963e-06 0.000000

Episode * 199 * exploration epsilon 0.8209 * Episodic Reward is ==> -11.708271026611328
action means and variances at step -10: [ 1.3371181 -2.       ] [5.447581e-06 0.000000e+00]
action means and variances at step -5: [ 1.3635461 -2.       ] [8.6819955e-06 0.0000000e+00]
Episode * 200 * exploration epsilon 0.8200000000000001 * Episodic Reward is ==> -11.508810997009277
action means and variances at step -10: [ 1.4239087 -2.       ] [4.9998084e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4511833 -2.       ] [3.404634e-06 0.000000e+00]
Episode * 201 * exploration epsilon 0.8190999999999999 * Episodic Reward is ==> -11.811673164367676
action means and variances at step -10: [ 1.4739796 -2.       ] [6.391183e-06 0.000000e+00]
action means and variances at step -5: [ 1.4575287 -2.       ] [4.0353007e-06 0.0000000e+00]
Episode * 202 * exploration epsilon 0.8182 * Episodic Reward is ==> -12.016965866088867
action means and variances at step -10: [ 1.4436188 -2.       ] [6.37

Episode * 215 * exploration epsilon 0.8065 * Episodic Reward is ==> -11.995675086975098
action means and variances at step -10: [ 1.5323914 -2.       ] [4.374662e-06 0.000000e+00]
action means and variances at step -5: [ 1.5353985 -2.       ] [2.574579e-06 0.000000e+00]
Episode * 216 * exploration epsilon 0.8056 * Episodic Reward is ==> -12.115348815917969
action means and variances at step -10: [ 1.5071856 -2.       ] [2.539419e-06 0.000000e+00]
action means and variances at step -5: [ 1.5027816 -2.       ] [3.103559e-06 0.000000e+00]
Episode * 217 * exploration epsilon 0.8047 * Episodic Reward is ==> -11.460977554321289
action means and variances at step -10: [ 1.489978 -2.      ] [5.572518e-06 0.000000e+00]
action means and variances at step -5: [ 1.5060683 -2.       ] [3.71001e-06 0.00000e+00]
Episode * 218 * exploration epsilon 0.8038 * Episodic Reward is ==> -11.93082046508789
action means and variances at step -10: [ 1.4727548 -2.       ] [4.4717885e-06 0.0000000e+00]
action mea

Episode * 231 * exploration epsilon 0.7921 * Episodic Reward is ==> -11.808183670043945
action means and variances at step -10: [ 1.4778502 -2.       ] [1.5946445e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4844778 -2.       ] [1.5085657e-06 0.0000000e+00]
Episode * 232 * exploration epsilon 0.7912 * Episodic Reward is ==> -11.548084259033203
action means and variances at step -10: [ 1.4491941 -2.       ] [2.8076356e-06 0.0000000e+00]
action means and variances at step -5: [ 1.453901 -2.      ] [2.4679696e-06 0.0000000e+00]
Episode * 233 * exploration epsilon 0.7903 * Episodic Reward is ==> -12.296975135803223
action means and variances at step -10: [ 1.4315231 -2.       ] [2.3818247e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4691453 -2.       ] [3.1874374e-06 0.0000000e+00]
Episode * 234 * exploration epsilon 0.7894 * Episodic Reward is ==> -11.568120956420898
action means and variances at step -10: [ 1.4979832 -2.       ] [6.742781e-06 0.000000e+0

Episode * 247 * exploration epsilon 0.7777000000000001 * Episodic Reward is ==> -11.950448989868164
action means and variances at step -10: [ 1.6102839 -2.       ] [4.758629e-06 0.000000e+00]
action means and variances at step -5: [ 1.6398927 -2.       ] [4.764107e-06 0.000000e+00]
Episode * 248 * exploration epsilon 0.7767999999999999 * Episodic Reward is ==> -11.735652923583984
action means and variances at step -10: [ 1.6148889 -2.       ] [3.0290673e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6416562 -2.       ] [3.3629199e-06 0.0000000e+00]
Episode * 249 * exploration epsilon 0.7759 * Episodic Reward is ==> -11.370162963867188
action means and variances at step -10: [ 1.6517105 -2.       ] [7.391721e-06 0.000000e+00]
action means and variances at step -5: [ 1.630151 -2.      ] [5.529851e-06 0.000000e+00]
Episode * 250 * exploration epsilon 0.775 * Episodic Reward is ==> -11.757647514343262
action means and variances at step -10: [ 1.6831344 -2.       ] [2.0287184

Episode * 263 * exploration epsilon 0.7633 * Episodic Reward is ==> -12.075033187866211
action means and variances at step -10: [ 1.5814902 -2.       ] [4.5095767e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6154176 -2.       ] [3.1095515e-06 0.0000000e+00]
Episode * 264 * exploration epsilon 0.7624 * Episodic Reward is ==> -11.478556632995605
action means and variances at step -10: [ 1.6180718 -2.       ] [6.6303674e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6233462 -2.       ] [3.8558737e-06 0.0000000e+00]
Episode * 265 * exploration epsilon 0.7615000000000001 * Episodic Reward is ==> -12.156965255737305
action means and variances at step -10: [ 1.6181659 -2.       ] [2.7360206e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6408471 -2.       ] [2.137026e-06 0.000000e+00]
Episode * 266 * exploration epsilon 0.7605999999999999 * Episodic Reward is ==> -11.848278045654297
action means and variances at step -10: [ 1.6201895 -2.       ] [

Episode * 279 * exploration epsilon 0.7489 * Episodic Reward is ==> -12.17797565460205
action means and variances at step -10: [ 1.4584926 -2.       ] [1.9727845e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4553334 -2.       ] [2.1565345e-06 0.0000000e+00]
Episode * 280 * exploration epsilon 0.748 * Episodic Reward is ==> -11.823040008544922
action means and variances at step -10: [ 1.4454441 -2.       ] [2.0106136e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4683 -2.    ] [2.084398e-06 0.000000e+00]
Episode * 281 * exploration epsilon 0.7471 * Episodic Reward is ==> -11.96685791015625
action means and variances at step -10: [ 1.5224675 -2.       ] [1.7783565e-06 0.0000000e+00]
action means and variances at step -5: [ 1.5198519 -2.       ] [9.858201e-07 0.000000e+00]
Episode * 282 * exploration epsilon 0.7462 * Episodic Reward is ==> -11.501890182495117
action means and variances at step -10: [ 1.5496155 -2.       ] [1.3247619e-06 0.0000000e+00]
action

Episode * 295 * exploration epsilon 0.7344999999999999 * Episodic Reward is ==> -12.091139793395996
action means and variances at step -10: [ 1.4431082 -2.       ] [1.4901302e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4290303 -2.       ] [1.7508786e-06 0.0000000e+00]
Episode * 296 * exploration epsilon 0.7336 * Episodic Reward is ==> -11.271041870117188
action means and variances at step -10: [ 1.446078 -2.      ] [2.0599484e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4311403 -2.       ] [1.9700508e-06 0.0000000e+00]
Episode * 297 * exploration epsilon 0.7326999999999999 * Episodic Reward is ==> -11.624017715454102
action means and variances at step -10: [ 1.426221 -2.      ] [1.941515e-06 0.000000e+00]
action means and variances at step -5: [ 1.4313835 -2.       ] [1.1268372e-06 0.0000000e+00]
Episode * 298 * exploration epsilon 0.7318 * Episodic Reward is ==> -11.693442344665527
action means and variances at step -10: [ 1.4743356 -2.       ] [1.62

Episode * 311 * exploration epsilon 0.7201 * Episodic Reward is ==> -11.677860260009766
action means and variances at step -10: [ 1.4500207 -2.       ] [8.838522e-07 0.000000e+00]
action means and variances at step -5: [ 1.4429764 -2.       ] [9.177082e-07 0.000000e+00]
Episode * 312 * exploration epsilon 0.7192000000000001 * Episodic Reward is ==> -11.97281265258789
action means and variances at step -10: [ 1.5019405 -2.       ] [1.0570384e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4868321 -2.       ] [1.1995005e-06 0.0000000e+00]
Episode * 313 * exploration epsilon 0.7182999999999999 * Episodic Reward is ==> -11.790736198425293
action means and variances at step -10: [ 1.4100789 -2.       ] [2.0547504e-06 0.0000000e+00]
action means and variances at step -5: [ 1.3982337 -2.       ] [1.629403e-06 0.000000e+00]
Episode * 314 * exploration epsilon 0.7174 * Episodic Reward is ==> -12.40772819519043
action means and variances at step -10: [ 1.5561382 -2.       ] [2.1203

Episode * 327 * exploration epsilon 0.7057 * Episodic Reward is ==> -12.16618824005127
action means and variances at step -10: [ 1.5963821 -2.       ] [1.9139154e-06 0.0000000e+00]
action means and variances at step -5: [ 1.583168 -2.      ] [9.955226e-07 0.000000e+00]
Episode * 328 * exploration epsilon 0.7048000000000001 * Episodic Reward is ==> -11.98116683959961
action means and variances at step -10: [ 1.5607197 -2.       ] [1.4622877e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6015371 -2.       ] [1.9469296e-06 0.0000000e+00]
Episode * 329 * exploration epsilon 0.7039 * Episodic Reward is ==> -12.008499145507812
action means and variances at step -10: [ 1.5114115 -2.       ] [3.71598e-06 0.00000e+00]
action means and variances at step -5: [ 1.564892 -2.      ] [1.9468164e-06 0.0000000e+00]
Episode * 330 * exploration epsilon 0.7030000000000001 * Episodic Reward is ==> -11.89178466796875
action means and variances at step -10: [ 1.5516232 -2.       ] [2.3108964e-

Episode * 343 * exploration epsilon 0.6913 * Episodic Reward is ==> -12.223471641540527
action means and variances at step -10: [ 1.6822572 -2.       ] [2.0859252e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6882138 -2.       ] [1.5852501e-06 0.0000000e+00]
Episode * 344 * exploration epsilon 0.6903999999999999 * Episodic Reward is ==> -12.14313793182373
action means and variances at step -10: [ 1.6957642 -2.       ] [1.8306692e-06 0.0000000e+00]
action means and variances at step -5: [ 1.6683881 -2.       ] [1.4813252e-06 0.0000000e+00]
Episode * 345 * exploration epsilon 0.6895 * Episodic Reward is ==> -11.947635650634766
action means and variances at step -10: [ 1.6281081 -2.       ] [2.0454963e-06 0.0000000e+00]
action means and variances at step -5: [ 1.629454 -2.      ] [2.6835298e-06 0.0000000e+00]
Episode * 346 * exploration epsilon 0.6886 * Episodic Reward is ==> -12.183666229248047
action means and variances at step -10: [ 1.6033489 -2.       ] [9.4141836e-07

Episode * 359 * exploration epsilon 0.6769000000000001 * Episodic Reward is ==> -12.526069641113281
action means and variances at step -10: [ 1.2832507 -2.       ] [1.4484369e-06 0.0000000e+00]
action means and variances at step -5: [ 1.2917278 -2.       ] [7.417008e-07 0.000000e+00]
Episode * 360 * exploration epsilon 0.6759999999999999 * Episodic Reward is ==> -11.92110538482666
action means and variances at step -10: [ 1.3085188 -2.       ] [8.1173357e-07 0.0000000e+00]
action means and variances at step -5: [ 1.3345969 -2.       ] [8.5877787e-07 0.0000000e+00]
Episode * 361 * exploration epsilon 0.6751 * Episodic Reward is ==> -12.120820999145508
action means and variances at step -10: [ 1.2915289 -2.       ] [2.116789e-06 0.000000e+00]
action means and variances at step -5: [ 1.263816 -2.      ] [1.5181611e-06 0.0000000e+00]
Episode * 362 * exploration epsilon 0.6741999999999999 * Episodic Reward is ==> -11.857542991638184
action means and variances at step -10: [ 1.3231733 -2.   

Episode * 375 * exploration epsilon 0.6625 * Episodic Reward is ==> -11.957870483398438
action means and variances at step -10: [ 1.4508015 -2.       ] [1.1757465e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4886141 -2.       ] [9.2998954e-07 0.0000000e+00]
Episode * 376 * exploration epsilon 0.6616 * Episodic Reward is ==> -12.069818496704102
action means and variances at step -10: [ 1.504111 -2.      ] [9.666767e-07 0.000000e+00]
action means and variances at step -5: [ 1.5239428 -2.       ] [8.321233e-07 0.000000e+00]
Episode * 377 * exploration epsilon 0.6607000000000001 * Episodic Reward is ==> -12.143548965454102
action means and variances at step -10: [ 1.4694566 -2.       ] [1.973929e-06 0.000000e+00]
action means and variances at step -5: [ 1.4928391 -2.       ] [1.2979896e-06 0.0000000e+00]
Episode * 378 * exploration epsilon 0.6597999999999999 * Episodic Reward is ==> -12.223855972290039
action means and variances at step -10: [ 1.4556978 -2.       ] [1.4187

Episode * 391 * exploration epsilon 0.6480999999999999 * Episodic Reward is ==> -12.087672233581543
action means and variances at step -10: [-0.40455607 -2.        ] [8.679651e-07 0.000000e+00]
action means and variances at step -5: [-0.40251502 -2.        ] [1.0935341e-06 0.0000000e+00]
Episode * 392 * exploration epsilon 0.6472 * Episodic Reward is ==> -11.942333221435547
action means and variances at step -10: [-0.4217188 -2.       ] [6.94788e-07 0.00000e+00]
action means and variances at step -5: [-0.42276472 -2.        ] [6.9372805e-07 0.0000000e+00]
Episode * 393 * exploration epsilon 0.6463 * Episodic Reward is ==> -12.1542329788208
action means and variances at step -10: [-0.44588503 -2.        ] [1.4569111e-06 0.0000000e+00]
action means and variances at step -5: [-0.44417483 -2.        ] [1.3180132e-06 0.0000000e+00]
Episode * 394 * exploration epsilon 0.6454 * Episodic Reward is ==> -12.071185111999512
action means and variances at step -10: [-0.41723835 -2.        ] [1.6782

Episode * 407 * exploration epsilon 0.6336999999999999 * Episodic Reward is ==> -11.848520278930664
action means and variances at step -10: [-0.43421653 -2.        ] [1.6934157e-06 0.0000000e+00]
action means and variances at step -5: [-0.44938987 -2.        ] [1.5497814e-06 0.0000000e+00]
Episode * 408 * exploration epsilon 0.6328 * Episodic Reward is ==> -12.043724060058594
action means and variances at step -10: [-0.46304926 -2.        ] [4.5878463e-07 0.0000000e+00]
action means and variances at step -5: [-0.47376087 -2.        ] [6.4575124e-07 0.0000000e+00]
Episode * 409 * exploration epsilon 0.6318999999999999 * Episodic Reward is ==> -12.058656692504883
action means and variances at step -10: [-0.4976264 -2.       ] [4.4716785e-07 0.0000000e+00]
action means and variances at step -5: [-0.48883978 -2.        ] [4.3769703e-07 0.0000000e+00]
Episode * 410 * exploration epsilon 0.631 * Episodic Reward is ==> -12.364858627319336
action means and variances at step -10: [-0.5141056 -2

Episode * 422 * exploration epsilon 0.6202 * Episodic Reward is ==> -11.870060920715332
action means and variances at step -10: [-0.43601188 -2.        ] [3.9853143e-07 0.0000000e+00]
action means and variances at step -5: [-0.4357407 -2.       ] [3.3907685e-07 0.0000000e+00]
Episode * 423 * exploration epsilon 0.6193 * Episodic Reward is ==> -12.086993217468262
action means and variances at step -10: [-0.48921517 -2.        ] [5.190812e-07 0.000000e+00]
action means and variances at step -5: [-0.48517686 -2.        ] [4.3491616e-07 0.0000000e+00]
Episode * 424 * exploration epsilon 0.6184 * Episodic Reward is ==> -12.30179214477539
action means and variances at step -10: [-0.45677415 -2.        ] [4.369852e-07 0.000000e+00]
action means and variances at step -5: [-0.47068137 -2.        ] [2.916468e-07 0.000000e+00]
Episode * 425 * exploration epsilon 0.6174999999999999 * Episodic Reward is ==> -11.729440689086914
action means and variances at step -10: [-0.4670898 -2.       ] [5.73085

Episode * 438 * exploration epsilon 0.6058 * Episodic Reward is ==> -11.552511215209961
action means and variances at step -10: [-0.02224686 -2.        ] [6.2901114e-07 0.0000000e+00]
action means and variances at step -5: [-0.01241607 -2.        ] [6.7066287e-07 0.0000000e+00]
Episode * 439 * exploration epsilon 0.6049 * Episodic Reward is ==> -11.75342845916748
action means and variances at step -10: [-0.01947619 -2.        ] [8.833565e-07 0.000000e+00]
action means and variances at step -5: [-0.02496328 -2.        ] [2.96867e-07 0.00000e+00]
Episode * 440 * exploration epsilon 0.604 * Episodic Reward is ==> -11.844756126403809
action means and variances at step -10: [-0.01489476 -2.        ] [3.072346e-07 0.000000e+00]
action means and variances at step -5: [-0.01035637 -2.        ] [3.26123e-07 0.00000e+00]
Episode * 441 * exploration epsilon 0.6031 * Episodic Reward is ==> -11.936244010925293
action means and variances at step -10: [-0.01407654 -2.        ] [2.5353216e-07 0.000000

Episode * 454 * exploration epsilon 0.5913999999999999 * Episodic Reward is ==> -11.791109085083008
action means and variances at step -10: [-0.03816124 -2.        ] [3.5449858e-07 0.0000000e+00]
action means and variances at step -5: [-0.02460591 -2.        ] [2.8440164e-07 0.0000000e+00]
Episode * 455 * exploration epsilon 0.5905 * Episodic Reward is ==> -11.907027244567871
action means and variances at step -10: [-0.05949074 -2.        ] [2.9775651e-07 0.0000000e+00]
action means and variances at step -5: [-0.05522942 -2.        ] [2.7335233e-07 0.0000000e+00]
Episode * 456 * exploration epsilon 0.5895999999999999 * Episodic Reward is ==> -11.681609153747559
action means and variances at step -10: [-0.0430658 -2.       ] [2.0903738e-07 0.0000000e+00]
action means and variances at step -5: [-0.04107049 -2.        ] [1.8270518e-07 0.0000000e+00]
Episode * 457 * exploration epsilon 0.5887 * Episodic Reward is ==> -11.57514762878418
action means and variances at step -10: [-0.03758851 -

Episode * 469 * exploration epsilon 0.5779 * Episodic Reward is ==> -11.34129810333252
action means and variances at step -10: [-0.05744385 -2.        ] [1.09283924e-07 0.00000000e+00]
action means and variances at step -5: [-0.0515311 -2.       ] [7.0186445e-08 0.0000000e+00]
Episode * 470 * exploration epsilon 0.577 * Episodic Reward is ==> -12.044756889343262
action means and variances at step -10: [-0.05790449 -2.        ] [1.3265957e-07 0.0000000e+00]
action means and variances at step -5: [-0.07021615 -2.        ] [1.9052008e-07 0.0000000e+00]
Episode * 471 * exploration epsilon 0.5761 * Episodic Reward is ==> -12.001693725585938
action means and variances at step -10: [-0.05961664 -2.        ] [2.1024147e-07 0.0000000e+00]
action means and variances at step -5: [-0.05433519 -2.        ] [9.683788e-08 0.000000e+00]
Episode * 472 * exploration epsilon 0.5751999999999999 * Episodic Reward is ==> -11.973928451538086
action means and variances at step -10: [-0.04696859 -2.        ] [

Episode * 485 * exploration epsilon 0.5635 * Episodic Reward is ==> -11.786407470703125
action means and variances at step -10: [-0.03363837 -2.        ] [2.2691225e-07 0.0000000e+00]
action means and variances at step -5: [-0.03072947 -2.        ] [1.7999113e-07 0.0000000e+00]
Episode * 486 * exploration epsilon 0.5626 * Episodic Reward is ==> -11.698358535766602
action means and variances at step -10: [-0.04901771 -2.        ] [7.158432e-08 0.000000e+00]
action means and variances at step -5: [-0.05106673 -2.        ] [6.449999e-08 0.000000e+00]
Episode * 487 * exploration epsilon 0.5617 * Episodic Reward is ==> -11.211576461791992
action means and variances at step -10: [-0.0581716 -2.       ] [1.8484643e-07 0.0000000e+00]
action means and variances at step -5: [-0.08292204 -2.        ] [3.1147607e-07 0.0000000e+00]
Episode * 488 * exploration epsilon 0.5608 * Episodic Reward is ==> -12.32052230834961
action means and variances at step -10: [-0.05652659 -2.        ] [2.9283632e-07 0

Episode * 500 * exploration epsilon 0.55 * Episodic Reward is ==> -11.898035049438477
action means and variances at step -10: [-0.07408251 -2.        ] [1.3053223e-07 0.0000000e+00]
action means and variances at step -5: [-0.0632136 -2.       ] [7.978545e-08 0.000000e+00]
Episode * 501 * exploration epsilon 0.5490999999999999 * Episodic Reward is ==> -11.7425537109375
action means and variances at step -10: [-0.05812882 -2.        ] [1.4228844e-07 0.0000000e+00]
action means and variances at step -5: [-0.04380697 -2.        ] [1.976665e-07 0.000000e+00]
Episode * 502 * exploration epsilon 0.5482 * Episodic Reward is ==> -11.794587135314941
action means and variances at step -10: [-0.04990387 -2.        ] [1.099999e-07 0.000000e+00]
action means and variances at step -5: [-0.04748359 -2.        ] [5.4145776e-08 0.0000000e+00]
Episode * 503 * exploration epsilon 0.5473 * Episodic Reward is ==> -11.882684707641602
action means and variances at step -10: [-0.03707073 -2.        ] [1.454590

Episode * 516 * exploration epsilon 0.5356 * Episodic Reward is ==> -12.066084861755371
action means and variances at step -10: [-0.11180563 -2.        ] [2.7423746e-07 0.0000000e+00]
action means and variances at step -5: [-0.11558931 -2.        ] [1.5344388e-07 0.0000000e+00]
Episode * 517 * exploration epsilon 0.5347 * Episodic Reward is ==> -12.052565574645996
action means and variances at step -10: [-0.09588376 -2.        ] [1.08236954e-07 0.00000000e+00]
action means and variances at step -5: [-0.10995742 -2.        ] [9.376992e-08 0.000000e+00]
Episode * 518 * exploration epsilon 0.5338 * Episodic Reward is ==> -12.43392276763916
action means and variances at step -10: [-0.10231657 -2.        ] [1.249366e-07 0.000000e+00]
action means and variances at step -5: [-0.09960248 -2.        ] [1.083923e-07 0.000000e+00]
Episode * 519 * exploration epsilon 0.5328999999999999 * Episodic Reward is ==> -12.278606414794922
action means and variances at step -10: [-0.09114566 -2.        ] [5

Episode * 531 * exploration epsilon 0.5221 * Episodic Reward is ==> -12.399872779846191
action means and variances at step -10: [-0.0818286 -2.       ] [4.3438693e-08 0.0000000e+00]
action means and variances at step -5: [-0.09073523 -2.        ] [6.878756e-08 0.000000e+00]
Episode * 532 * exploration epsilon 0.5212 * Episodic Reward is ==> -11.849737167358398
action means and variances at step -10: [-0.10195161 -2.        ] [1.8926545e-07 0.0000000e+00]
action means and variances at step -5: [-0.10054377 -2.        ] [1.2436882e-07 0.0000000e+00]
Episode * 533 * exploration epsilon 0.5203 * Episodic Reward is ==> -12.195201873779297
action means and variances at step -10: [-0.11045262 -2.        ] [6.097876e-07 0.000000e+00]
action means and variances at step -5: [-0.09928797 -2.        ] [2.510588e-07 0.000000e+00]
Episode * 534 * exploration epsilon 0.5194 * Episodic Reward is ==> -12.145849227905273
action means and variances at step -10: [-0.07271669 -2.        ] [5.3234714e-08 0.

Episode * 547 * exploration epsilon 0.5077 * Episodic Reward is ==> -11.931896209716797
action means and variances at step -10: [-0.06327209 -2.        ] [1.6980066e-07 0.0000000e+00]
action means and variances at step -5: [-0.06264518 -2.        ] [1.654405e-07 0.000000e+00]
Episode * 548 * exploration epsilon 0.5068 * Episodic Reward is ==> -12.116262435913086
action means and variances at step -10: [-0.08390432 -2.        ] [1.6042662e-07 0.0000000e+00]
action means and variances at step -5: [-0.0740623 -2.       ] [6.207515e-08 0.000000e+00]
Episode * 549 * exploration epsilon 0.5059 * Episodic Reward is ==> -12.175307273864746
action means and variances at step -10: [-0.07809513 -2.        ] [1.05947066e-07 0.00000000e+00]
action means and variances at step -5: [-0.08194752 -2.        ] [1.2147703e-07 0.0000000e+00]
Episode * 550 * exploration epsilon 0.505 * Episodic Reward is ==> -11.912994384765625
action means and variances at step -10: [-0.07774685 -2.        ] [1.521211e-07 

Episode * 563 * exploration epsilon 0.49329999999999996 * Episodic Reward is ==> -12.342105865478516
action means and variances at step -10: [-0.06887615 -2.        ] [9.527331e-08 0.000000e+00]
action means and variances at step -5: [-0.07256627 -2.        ] [1.10916595e-07 0.00000000e+00]
Episode * 564 * exploration epsilon 0.49239999999999995 * Episodic Reward is ==> -12.241230010986328
action means and variances at step -10: [-0.05988704 -2.        ] [5.1897167e-08 0.0000000e+00]
action means and variances at step -5: [-0.07591165 -2.        ] [1.0440602e-07 0.0000000e+00]
Episode * 565 * exploration epsilon 0.49150000000000005 * Episodic Reward is ==> -12.147424697875977
action means and variances at step -10: [-0.0772412 -2.       ] [8.849728e-08 0.000000e+00]
action means and variances at step -5: [-0.06100623 -2.        ] [3.9800984e-08 0.0000000e+00]
Episode * 566 * exploration epsilon 0.4905999999999999 * Episodic Reward is ==> -12.026692390441895
action means and variances a

Episode * 578 * exploration epsilon 0.4798 * Episodic Reward is ==> -11.773337364196777
action means and variances at step -10: [-0.08017971 -2.        ] [1.297763e-07 0.000000e+00]
action means and variances at step -5: [-0.05404857 -2.        ] [8.1452725e-08 0.0000000e+00]
Episode * 579 * exploration epsilon 0.4789 * Episodic Reward is ==> -11.68204402923584
action means and variances at step -10: [-0.05549369 -2.        ] [5.8509958e-08 0.0000000e+00]
action means and variances at step -5: [-0.06863433 -2.        ] [3.301409e-08 0.000000e+00]
Episode * 580 * exploration epsilon 0.478 * Episodic Reward is ==> -11.996965408325195
action means and variances at step -10: [-0.07686866 -2.        ] [5.372148e-08 0.000000e+00]
action means and variances at step -5: [-0.0659437 -2.       ] [2.9979493e-08 0.0000000e+00]
Episode * 581 * exploration epsilon 0.47709999999999997 * Episodic Reward is ==> -11.619327545166016
action means and variances at step -10: [-0.0428156 -2.       ] [7.5427e

Episode * 594 * exploration epsilon 0.4653999999999999 * Episodic Reward is ==> -12.323518753051758
action means and variances at step -10: [-0.01060027 -2.        ] [1.1404922e-07 0.0000000e+00]
action means and variances at step -5: [-9.190303e-05 -2.000000e+00] [1.844746e-07 0.000000e+00]
Episode * 595 * exploration epsilon 0.4645 * Episodic Reward is ==> -11.546588897705078
action means and variances at step -10: [-0.03908496 -2.        ] [7.836859e-08 0.000000e+00]
action means and variances at step -5: [-0.01977044 -2.        ] [6.602271e-08 0.000000e+00]
Episode * 596 * exploration epsilon 0.4636 * Episodic Reward is ==> -11.638731956481934
action means and variances at step -10: [-0.0113255 -2.       ] [1.8863211e-07 0.0000000e+00]
action means and variances at step -5: [-0.03262201 -2.        ] [3.9284647e-08 0.0000000e+00]
Episode * 597 * exploration epsilon 0.4626999999999999 * Episodic Reward is ==> -11.914205551147461
action means and variances at step -10: [ 0.01004448 -2

Episode * 609 * exploration epsilon 0.45189999999999997 * Episodic Reward is ==> -12.198981285095215
action means and variances at step -10: [ 0.03839878 -2.        ] [1.3665667e-07 0.0000000e+00]
action means and variances at step -5: [ 0.03386382 -2.        ] [9.118911e-08 0.000000e+00]
Episode * 610 * exploration epsilon 0.45099999999999996 * Episodic Reward is ==> -11.687796592712402
action means and variances at step -10: [ 0.02733769 -2.        ] [1.0567331e-07 0.0000000e+00]
action means and variances at step -5: [ 0.04670698 -2.        ] [1.8567113e-07 0.0000000e+00]
Episode * 611 * exploration epsilon 0.45010000000000006 * Episodic Reward is ==> -12.57534408569336
action means and variances at step -10: [ 0.02596277 -2.        ] [1.0066336e-07 0.0000000e+00]
action means and variances at step -5: [ 0.03205685 -2.        ] [1.25289e-07 0.00000e+00]
Episode * 612 * exploration epsilon 0.44919999999999993 * Episodic Reward is ==> -12.088330268859863
action means and variances at 

Episode * 625 * exploration epsilon 0.4375 * Episodic Reward is ==> -12.232244491577148
action means and variances at step -10: [-0.04362422 -2.        ] [5.408923e-08 0.000000e+00]
action means and variances at step -5: [-0.03388183 -2.        ] [2.4208296e-08 0.0000000e+00]
Episode * 626 * exploration epsilon 0.4366 * Episodic Reward is ==> -12.100410461425781
action means and variances at step -10: [-0.01547509 -2.        ] [7.1172884e-08 0.0000000e+00]
action means and variances at step -5: [-0.01486096 -2.        ] [4.6914817e-08 0.0000000e+00]
Episode * 627 * exploration epsilon 0.4357 * Episodic Reward is ==> -11.714212417602539
action means and variances at step -10: [-0.01001988 -2.        ] [6.136639e-08 0.000000e+00]
action means and variances at step -5: [-0.01079113 -2.        ] [6.184286e-08 0.000000e+00]
Episode * 628 * exploration epsilon 0.43479999999999996 * Episodic Reward is ==> -11.857421875
action means and variances at step -10: [-0.01155351 -2.        ] [6.46071

Episode * 640 * exploration epsilon 0.42400000000000004 * Episodic Reward is ==> -12.01679801940918
action means and variances at step -10: [-3.6348385e-04 -2.0000000e+00] [4.3598785e-08 0.0000000e+00]
action means and variances at step -5: [ 0.00681741 -2.        ] [3.78014e-08 0.00000e+00]
Episode * 641 * exploration epsilon 0.42310000000000003 * Episodic Reward is ==> -11.930929183959961
action means and variances at step -10: [ 0.0332632 -2.       ] [1.1014822e-07 0.0000000e+00]
action means and variances at step -5: [ 0.02828055 -2.        ] [7.206486e-08 0.000000e+00]
Episode * 642 * exploration epsilon 0.4221999999999999 * Episodic Reward is ==> -12.109487533569336
action means and variances at step -10: [ 0.01043807 -2.        ] [3.7958564e-08 0.0000000e+00]
action means and variances at step -5: [ 0.0105208 -2.       ] [4.008438e-08 0.000000e+00]
Episode * 643 * exploration epsilon 0.4213 * Episodic Reward is ==> -11.960118293762207
action means and variances at step -10: [-0.

Episode * 656 * exploration epsilon 0.4096000000000001 * Episodic Reward is ==> -12.459772109985352
action means and variances at step -10: [ 0.01655593 -2.        ] [1.4285554e-07 0.0000000e+00]
action means and variances at step -5: [ 0.01633358 -2.        ] [1.2053576e-07 0.0000000e+00]
Episode * 657 * exploration epsilon 0.40869999999999995 * Episodic Reward is ==> -11.876667022705078
action means and variances at step -10: [ 0.01232726 -2.        ] [4.4087205e-08 0.0000000e+00]
action means and variances at step -5: [ 0.02191787 -2.        ] [4.9861065e-08 0.0000000e+00]
Episode * 658 * exploration epsilon 0.40779999999999994 * Episodic Reward is ==> -12.258454322814941
action means and variances at step -10: [ 0.01896364 -2.        ] [5.3439322e-08 0.0000000e+00]
action means and variances at step -5: [ 0.02023347 -2.        ] [4.3443357e-08 0.0000000e+00]
Episode * 659 * exploration epsilon 0.40689999999999993 * Episodic Reward is ==> -12.358063697814941
action means and varianc

Episode * 671 * exploration epsilon 0.3961 * Episodic Reward is ==> -11.831658363342285
action means and variances at step -10: [ 0.00708946 -2.        ] [2.5776151e-08 0.0000000e+00]
action means and variances at step -5: [-1.5661935e-05 -2.0000000e+00] [2.4429134e-08 0.0000000e+00]
Episode * 672 * exploration epsilon 0.3951999999999999 * Episodic Reward is ==> -11.968169212341309
action means and variances at step -10: [-0.00778627 -2.        ] [2.695013e-08 0.000000e+00]
action means and variances at step -5: [-0.02513024 -2.        ] [5.783924e-08 0.000000e+00]
Episode * 673 * exploration epsilon 0.3943 * Episodic Reward is ==> -11.751055717468262
action means and variances at step -10: [ 0.00581338 -2.        ] [2.7463274e-08 0.0000000e+00]
action means and variances at step -5: [ 1.8388848e-04 -2.0000000e+00] [2.1788681e-08 0.0000000e+00]
Episode * 674 * exploration epsilon 0.39339999999999997 * Episodic Reward is ==> -12.238525390625
action means and variances at step -10: [-0.0

Episode * 687 * exploration epsilon 0.38169999999999993 * Episodic Reward is ==> -12.139802932739258
action means and variances at step -10: [-0.04220659 -2.        ] [7.2091616e-08 0.0000000e+00]
action means and variances at step -5: [-0.03694508 -2.        ] [5.3026145e-08 0.0000000e+00]
Episode * 688 * exploration epsilon 0.3807999999999999 * Episodic Reward is ==> -12.316553115844727
action means and variances at step -10: [-0.03039874 -2.        ] [2.4673408e-08 0.0000000e+00]
action means and variances at step -5: [-0.03112085 -2.        ] [2.6129674e-08 0.0000000e+00]
Episode * 689 * exploration epsilon 0.3799 * Episodic Reward is ==> -12.065239906311035
action means and variances at step -10: [-0.06174329 -2.        ] [6.8209864e-08 0.0000000e+00]
action means and variances at step -5: [-0.05047105 -2.        ] [5.1214887e-08 0.0000000e+00]
Episode * 690 * exploration epsilon 0.379 * Episodic Reward is ==> -12.019694328308105
action means and variances at step -10: [-0.0280716

Episode * 702 * exploration epsilon 0.36819999999999997 * Episodic Reward is ==> -12.259759902954102
action means and variances at step -10: [-0.03439054 -2.        ] [4.3374012e-08 0.0000000e+00]
action means and variances at step -5: [-0.02526855 -2.        ] [4.7131202e-08 0.0000000e+00]
Episode * 703 * exploration epsilon 0.36729999999999996 * Episodic Reward is ==> -12.294390678405762
action means and variances at step -10: [-0.04750373 -2.        ] [3.645545e-08 0.000000e+00]
action means and variances at step -5: [-0.03802916 -2.        ] [3.9432333e-08 0.0000000e+00]
Episode * 704 * exploration epsilon 0.36639999999999995 * Episodic Reward is ==> -12.228042602539062
action means and variances at step -10: [ 0.00936973 -2.        ] [7.5820836e-08 0.0000000e+00]
action means and variances at step -5: [-1.9070187e-03 -2.0000000e+00] [5.6292656e-08 0.0000000e+00]
Episode * 705 * exploration epsilon 0.36550000000000005 * Episodic Reward is ==> -12.25076675415039
action means and var

Episode * 718 * exploration epsilon 0.3538 * Episodic Reward is ==> -12.52598762512207
action means and variances at step -10: [-0.03505344 -2.        ] [2.7888621e-08 0.0000000e+00]
action means and variances at step -5: [-0.01882741 -2.        ] [3.690587e-08 0.000000e+00]
Episode * 719 * exploration epsilon 0.3529 * Episodic Reward is ==> -12.494102478027344
action means and variances at step -10: [-0.01560434 -2.        ] [3.727131e-08 0.000000e+00]
action means and variances at step -5: [-0.0138356 -2.       ] [3.918919e-08 0.000000e+00]
Episode * 720 * exploration epsilon 0.352 * Episodic Reward is ==> -12.02610969543457
action means and variances at step -10: [-0.0283314 -2.       ] [2.2262741e-08 0.0000000e+00]
action means and variances at step -5: [-0.02078141 -2.        ] [2.782344e-08 0.000000e+00]
Episode * 721 * exploration epsilon 0.35109999999999997 * Episodic Reward is ==> -12.042426109313965
action means and variances at step -10: [-0.02030304 -2.        ] [3.1701784e

Episode * 733 * exploration epsilon 0.34029999999999994 * Episodic Reward is ==> -12.182494163513184
action means and variances at step -10: [-0.01602296 -2.        ] [1.980233e-08 0.000000e+00]
action means and variances at step -5: [-0.01282667 -2.        ] [1.9850217e-08 0.0000000e+00]
Episode * 734 * exploration epsilon 0.3393999999999999 * Episodic Reward is ==> -12.259100914001465
action means and variances at step -10: [-0.0127049 -2.       ] [2.2408248e-08 0.0000000e+00]
action means and variances at step -5: [-0.01852711 -2.        ] [2.207743e-08 0.000000e+00]
Episode * 735 * exploration epsilon 0.3385 * Episodic Reward is ==> -12.058308601379395
action means and variances at step -10: [-0.02434424 -2.        ] [1.8056662e-08 0.0000000e+00]
action means and variances at step -5: [-0.0253531 -2.       ] [1.8936055e-08 0.0000000e+00]
Episode * 736 * exploration epsilon 0.3376 * Episodic Reward is ==> -12.461986541748047
action means and variances at step -10: [-0.00447564 -2.  

Episode * 749 * exploration epsilon 0.32589999999999997 * Episodic Reward is ==> -12.407867431640625
action means and variances at step -10: [ 1.9881537e-03 -2.0000000e+00] [2.9728557e-08 0.0000000e+00]
action means and variances at step -5: [-0.01298452 -2.        ] [1.5261534e-08 0.0000000e+00]
Episode * 750 * exploration epsilon 0.32499999999999996 * Episodic Reward is ==> -12.13194465637207
action means and variances at step -10: [ 0.00743151 -2.        ] [4.1369624e-08 0.0000000e+00]
action means and variances at step -5: [ 0.00259467 -2.        ] [3.5716806e-08 0.0000000e+00]
Episode * 751 * exploration epsilon 0.32410000000000005 * Episodic Reward is ==> -12.554286003112793
action means and variances at step -10: [-0.02256428 -2.        ] [2.0271232e-08 0.0000000e+00]
action means and variances at step -5: [-0.01005953 -2.        ] [2.7744836e-08 0.0000000e+00]
Episode * 752 * exploration epsilon 0.32319999999999993 * Episodic Reward is ==> -12.164236068725586
action means and v

Episode * 764 * exploration epsilon 0.3124 * Episodic Reward is ==> -12.293292045593262
action means and variances at step -10: [ 0.01662775 -2.        ] [9.159476e-08 0.000000e+00]
action means and variances at step -5: [ 0.01618079 -2.        ] [8.303108e-08 0.000000e+00]
Episode * 765 * exploration epsilon 0.3115 * Episodic Reward is ==> -12.084770202636719
action means and variances at step -10: [ 0.00373041 -2.        ] [4.6298577e-08 0.0000000e+00]
action means and variances at step -5: [ 4.8033142e-04 -2.0000000e+00] [3.8818328e-08 0.0000000e+00]
Episode * 766 * exploration epsilon 0.3106 * Episodic Reward is ==> -12.324296951293945
action means and variances at step -10: [-0.00342261 -2.        ] [3.0144488e-08 0.0000000e+00]
action means and variances at step -5: [-0.0066246 -2.       ] [2.425707e-08 0.000000e+00]
Episode * 767 * exploration epsilon 0.3097 * Episodic Reward is ==> -12.159582138061523
action means and variances at step -10: [-4.5795733e-04 -2.0000000e+00] [3.85

Episode * 779 * exploration epsilon 0.29889999999999994 * Episodic Reward is ==> -12.29765796661377
action means and variances at step -10: [-0.00842532 -2.        ] [2.2051363e-08 0.0000000e+00]
action means and variances at step -5: [ 0.0025485 -2.       ] [1.6472097e-08 0.0000000e+00]
Episode * 780 * exploration epsilon 0.29800000000000004 * Episodic Reward is ==> -12.02682876586914
action means and variances at step -10: [-0.01033632 -2.        ] [9.4751e-09 0.0000e+00]
action means and variances at step -5: [-0.00308696 -2.        ] [5.874463e-09 0.000000e+00]
Episode * 781 * exploration epsilon 0.29710000000000003 * Episodic Reward is ==> -11.813788414001465
action means and variances at step -10: [ 1.8234628e-03 -2.0000000e+00] [1.0356739e-08 0.0000000e+00]
action means and variances at step -5: [-1.8896107e-03 -2.0000000e+00] [9.22288e-09 0.00000e+00]
Episode * 782 * exploration epsilon 0.2961999999999999 * Episodic Reward is ==> -11.953073501586914
action means and variances a

Episode * 795 * exploration epsilon 0.2845 * Episodic Reward is ==> -12.367855072021484
action means and variances at step -10: [-0.03690122 -2.        ] [3.2474908e-08 0.0000000e+00]
action means and variances at step -5: [-0.03419939 -2.        ] [2.4656257e-08 0.0000000e+00]
Episode * 796 * exploration epsilon 0.2836000000000001 * Episodic Reward is ==> -12.674535751342773
action means and variances at step -10: [-1.7249304e-03 -2.0000000e+00] [3.4266971e-09 0.0000000e+00]
action means and variances at step -5: [-0.01262498 -2.        ] [2.7858922e-09 0.0000000e+00]
Episode * 797 * exploration epsilon 0.28269999999999995 * Episodic Reward is ==> -12.419893264770508
action means and variances at step -10: [-0.00460185 -2.        ] [5.4841265e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2215127e-03 -2.0000000e+00] [5.734203e-09 0.000000e+00]
Episode * 798 * exploration epsilon 0.28179999999999994 * Episodic Reward is ==> -12.546884536743164
action means and variances 

Episode * 810 * exploration epsilon 0.271 * Episodic Reward is ==> -13.030828475952148
action means and variances at step -10: [-0.06079178 -2.        ] [1.0061519e-07 0.0000000e+00]
action means and variances at step -5: [-0.05935116 -2.        ] [8.209999e-08 0.000000e+00]
Episode * 811 * exploration epsilon 0.2701 * Episodic Reward is ==> -12.525904655456543
action means and variances at step -10: [ 0.00265189 -2.        ] [9.668386e-08 0.000000e+00]
action means and variances at step -5: [-1.0366557e-03 -2.0000000e+00] [7.927136e-08 0.000000e+00]
Episode * 812 * exploration epsilon 0.2691999999999999 * Episodic Reward is ==> -12.605049133300781
action means and variances at step -10: [-0.0222608 -2.       ] [7.592544e-07 0.000000e+00]
action means and variances at step -5: [-0.00621965 -2.        ] [8.59168e-07 0.00000e+00]
Episode * 813 * exploration epsilon 0.2683 * Episodic Reward is ==> -12.358390808105469
action means and variances at step -10: [-0.02814328 -2.        ] [1.326

Episode * 826 * exploration epsilon 0.25660000000000005 * Episodic Reward is ==> -12.446269989013672
action means and variances at step -10: [-0.02276491 -2.        ] [1.222376e-08 0.000000e+00]
action means and variances at step -5: [-0.03054943 -2.        ] [1.465422e-08 0.000000e+00]
Episode * 827 * exploration epsilon 0.2556999999999999 * Episodic Reward is ==> -12.551462173461914
action means and variances at step -10: [ 0.00757163 -2.        ] [3.093326e-08 0.000000e+00]
action means and variances at step -5: [ 0.00201836 -2.        ] [2.0374054e-08 0.0000000e+00]
Episode * 828 * exploration epsilon 0.2547999999999999 * Episodic Reward is ==> -12.268136978149414
action means and variances at step -10: [-0.02255728 -2.        ] [7.875168e-09 0.000000e+00]
action means and variances at step -5: [-0.01742836 -2.        ] [6.21691e-09 0.00000e+00]
Episode * 829 * exploration epsilon 0.2539 * Episodic Reward is ==> -12.026410102844238
action means and variances at step -10: [-0.012487

Episode * 841 * exploration epsilon 0.24309999999999998 * Episodic Reward is ==> -12.57036018371582
action means and variances at step -10: [-0.05888284 -2.        ] [6.3961124e-08 0.0000000e+00]
action means and variances at step -5: [-0.07118901 -2.        ] [7.380799e-08 0.000000e+00]
Episode * 842 * exploration epsilon 0.24219999999999997 * Episodic Reward is ==> -12.509147644042969
action means and variances at step -10: [-0.06738415 -2.        ] [7.70027e-08 0.00000e+00]
action means and variances at step -5: [-0.06343511 -2.        ] [6.2257904e-08 0.0000000e+00]
Episode * 843 * exploration epsilon 0.24129999999999996 * Episodic Reward is ==> -12.491369247436523
action means and variances at step -10: [-0.06049239 -2.        ] [6.440531e-08 0.000000e+00]
action means and variances at step -5: [-0.05513351 -2.        ] [4.0106677e-08 0.0000000e+00]
Episode * 844 * exploration epsilon 0.24039999999999995 * Episodic Reward is ==> -12.362935066223145
action means and variances at st

Episode * 857 * exploration epsilon 0.2286999999999999 * Episodic Reward is ==> -12.678902626037598
action means and variances at step -10: [-0.08141997 -2.        ] [7.814955e-08 0.000000e+00]
action means and variances at step -5: [-0.07672012 -2.        ] [8.1685364e-08 0.0000000e+00]
Episode * 858 * exploration epsilon 0.2278 * Episodic Reward is ==> -12.170822143554688
action means and variances at step -10: [-0.09632777 -2.        ] [1.966679e-07 0.000000e+00]
action means and variances at step -5: [-0.09631669 -2.        ] [1.7404271e-07 0.0000000e+00]
Episode * 859 * exploration epsilon 0.2269 * Episodic Reward is ==> -12.516019821166992
action means and variances at step -10: [-0.06511604 -2.        ] [6.730191e-08 0.000000e+00]
action means and variances at step -5: [-0.06872361 -2.        ] [7.142864e-08 0.000000e+00]
Episode * 860 * exploration epsilon 0.22599999999999998 * Episodic Reward is ==> -12.593585968017578
action means and variances at step -10: [-0.10928354 -2.  

Episode * 872 * exploration epsilon 0.21519999999999995 * Episodic Reward is ==> -12.94150161743164
action means and variances at step -10: [-0.19780879 -2.        ] [2.2724984e-07 0.0000000e+00]
action means and variances at step -5: [-0.19977087 -2.        ] [1.8814904e-07 0.0000000e+00]
Episode * 873 * exploration epsilon 0.21429999999999993 * Episodic Reward is ==> -12.21565055847168
action means and variances at step -10: [-0.2164615 -2.       ] [2.8871617e-07 0.0000000e+00]
action means and variances at step -5: [-0.21008934 -2.        ] [2.2915145e-07 0.0000000e+00]
Episode * 874 * exploration epsilon 0.21339999999999992 * Episodic Reward is ==> -12.091976165771484
action means and variances at step -10: [-0.1997765 -2.       ] [2.4637717e-07 0.0000000e+00]
action means and variances at step -5: [-0.21609707 -2.        ] [2.4327076e-07 0.0000000e+00]
Episode * 875 * exploration epsilon 0.21250000000000002 * Episodic Reward is ==> -12.372199058532715
action means and variances at

Episode * 888 * exploration epsilon 0.20079999999999998 * Episodic Reward is ==> -13.110353469848633
action means and variances at step -10: [-0.24513918 -2.        ] [3.9197693e-07 0.0000000e+00]
action means and variances at step -5: [-0.24969636 -2.        ] [3.9565717e-07 0.0000000e+00]
Episode * 889 * exploration epsilon 0.19989999999999997 * Episodic Reward is ==> -12.638006210327148
action means and variances at step -10: [-0.26731044 -2.        ] [3.709024e-07 0.000000e+00]
action means and variances at step -5: [-0.27008897 -2.        ] [3.836113e-07 0.000000e+00]
Episode * 890 * exploration epsilon 0.19899999999999995 * Episodic Reward is ==> -12.695365905761719
action means and variances at step -10: [-0.2936834 -2.       ] [6.9041795e-07 0.0000000e+00]
action means and variances at step -5: [-0.29949424 -2.        ] [9.822976e-07 0.000000e+00]
Episode * 891 * exploration epsilon 0.19810000000000005 * Episodic Reward is ==> -13.223365783691406
action means and variances at s

Episode * 903 * exploration epsilon 0.1872999999999999 * Episodic Reward is ==> -12.280216217041016
action means and variances at step -10: [-0.34342274 -2.        ] [3.779635e-07 0.000000e+00]
action means and variances at step -5: [-0.35655716 -2.        ] [3.2437208e-07 0.0000000e+00]
Episode * 904 * exploration epsilon 0.1864 * Episodic Reward is ==> -12.395438194274902
action means and variances at step -10: [-0.33443275 -2.        ] [3.5505997e-07 0.0000000e+00]
action means and variances at step -5: [-0.3294807 -2.       ] [3.1146783e-07 0.0000000e+00]
Episode * 905 * exploration epsilon 0.1855 * Episodic Reward is ==> -12.870134353637695
action means and variances at step -10: [-0.34474736 -2.        ] [3.010033e-07 0.000000e+00]
action means and variances at step -5: [-0.3387421 -2.       ] [2.4214665e-07 0.0000000e+00]
Episode * 906 * exploration epsilon 0.1846 * Episodic Reward is ==> -12.760478973388672
action means and variances at step -10: [-0.26846033 -2.        ] [2.26

Episode * 919 * exploration epsilon 0.17289999999999994 * Episodic Reward is ==> -12.431671142578125
action means and variances at step -10: [-0.22627328 -2.        ] [1.354041e-07 0.000000e+00]
action means and variances at step -5: [-0.23687297 -2.        ] [1.3339924e-07 0.0000000e+00]
Episode * 920 * exploration epsilon 0.17200000000000004 * Episodic Reward is ==> -12.784842491149902
action means and variances at step -10: [-0.2177893 -2.       ] [1.06014774e-07 0.00000000e+00]
action means and variances at step -5: [-0.22019258 -2.        ] [8.587415e-08 0.000000e+00]
Episode * 921 * exploration epsilon 0.17110000000000003 * Episodic Reward is ==> -13.330246925354004
action means and variances at step -10: [-0.22021757 -2.        ] [1.14175684e-07 0.00000000e+00]
action means and variances at step -5: [-0.21874164 -2.        ] [9.7337306e-08 0.0000000e+00]
Episode * 922 * exploration epsilon 0.1701999999999999 * Episodic Reward is ==> -12.807622909545898
action means and variances

Episode * 934 * exploration epsilon 0.1594 * Episodic Reward is ==> -12.244670867919922
action means and variances at step -10: [-0.22554708 -2.        ] [1.2244995e-07 0.0000000e+00]
action means and variances at step -5: [-0.23492125 -2.        ] [1.3812884e-07 0.0000000e+00]
Episode * 935 * exploration epsilon 0.15849999999999997 * Episodic Reward is ==> -12.72158432006836
action means and variances at step -10: [-0.22161667 -2.        ] [1.6395578e-07 0.0000000e+00]
action means and variances at step -5: [-0.21009637 -2.        ] [1.219598e-07 0.000000e+00]
Episode * 936 * exploration epsilon 0.15760000000000007 * Episodic Reward is ==> -12.632375717163086
action means and variances at step -10: [-0.21177715 -2.        ] [1.2471648e-07 0.0000000e+00]
action means and variances at step -5: [-0.19352965 -2.        ] [8.895574e-08 0.000000e+00]
Episode * 937 * exploration epsilon 0.15669999999999995 * Episodic Reward is ==> -12.194480895996094
action means and variances at step -10: [

Episode * 949 * exploration epsilon 0.14590000000000003 * Episodic Reward is ==> -12.83404541015625
action means and variances at step -10: [-0.17597641 -2.        ] [1.0240697e-07 0.0000000e+00]
action means and variances at step -5: [-0.17811036 -2.        ] [1.00534045e-07 0.00000000e+00]
Episode * 950 * exploration epsilon 0.14500000000000002 * Episodic Reward is ==> -12.15590763092041
action means and variances at step -10: [-0.19614477 -2.        ] [1.4193111e-07 0.0000000e+00]
action means and variances at step -5: [-0.18448281 -2.        ] [1.09156815e-07 0.00000000e+00]
Episode * 951 * exploration epsilon 0.1441 * Episodic Reward is ==> -12.66855239868164
action means and variances at step -10: [-0.19495428 -2.        ] [9.2659164e-08 0.0000000e+00]
action means and variances at step -5: [-0.20182252 -2.        ] [9.6993425e-08 0.0000000e+00]
Episode * 952 * exploration epsilon 0.14319999999999988 * Episodic Reward is ==> -12.55073356628418
action means and variances at step -

Episode * 965 * exploration epsilon 0.13149999999999995 * Episodic Reward is ==> -12.868209838867188
action means and variances at step -10: [-0.21731511 -2.        ] [1.3412527e-07 0.0000000e+00]
action means and variances at step -5: [-0.1901351 -2.       ] [1.0865189e-07 0.0000000e+00]
Episode * 966 * exploration epsilon 0.13060000000000005 * Episodic Reward is ==> -12.929750442504883
action means and variances at step -10: [-0.17919405 -2.        ] [1.9635107e-07 0.0000000e+00]
action means and variances at step -5: [-0.17929964 -2.        ] [1.6108574e-07 0.0000000e+00]
Episode * 967 * exploration epsilon 0.12969999999999993 * Episodic Reward is ==> -13.053567886352539
action means and variances at step -10: [-0.15940262 -2.        ] [1.0544635e-07 0.0000000e+00]
action means and variances at step -5: [-0.15833302 -2.        ] [7.065177e-08 0.000000e+00]
Episode * 968 * exploration epsilon 0.12879999999999991 * Episodic Reward is ==> -12.629609107971191
action means and variances 

Episode * 980 * exploration epsilon 0.118 * Episodic Reward is ==> -12.800602912902832
action means and variances at step -10: [-0.11304511 -2.        ] [2.1409663e-08 0.0000000e+00]
action means and variances at step -5: [-0.11342267 -2.        ] [2.5575039e-08 0.0000000e+00]
Episode * 981 * exploration epsilon 0.11709999999999998 * Episodic Reward is ==> -12.579147338867188
action means and variances at step -10: [-0.13334261 -2.        ] [4.3645105e-08 0.0000000e+00]
action means and variances at step -5: [-0.12606217 -2.        ] [3.2148357e-08 0.0000000e+00]
Episode * 982 * exploration epsilon 0.11619999999999997 * Episodic Reward is ==> -12.423250198364258
action means and variances at step -10: [-0.12492072 -2.        ] [1.8479156e-08 0.0000000e+00]
action means and variances at step -5: [-0.1329222 -2.       ] [2.0773715e-08 0.0000000e+00]
Episode * 983 * exploration epsilon 0.11529999999999996 * Episodic Reward is ==> -12.772517204284668
action means and variances at step -10:

Episode * 995 * exploration epsilon 0.10450000000000004 * Episodic Reward is ==> -12.53712272644043
action means and variances at step -10: [-0.12786691 -2.        ] [3.5792752e-08 0.0000000e+00]
action means and variances at step -5: [-0.14379872 -2.        ] [3.421911e-08 0.000000e+00]
Episode * 996 * exploration epsilon 0.10360000000000003 * Episodic Reward is ==> -12.692541122436523
action means and variances at step -10: [-0.11604541 -2.        ] [2.725567e-08 0.000000e+00]
action means and variances at step -5: [-0.12700473 -2.        ] [3.273221e-08 0.000000e+00]
Episode * 997 * exploration epsilon 0.1026999999999999 * Episodic Reward is ==> -12.922408103942871
action means and variances at step -10: [-0.11825092 -2.        ] [3.9334143e-08 0.0000000e+00]
action means and variances at step -5: [-0.12019759 -2.        ] [3.4128412e-08 0.0000000e+00]
Episode * 998 * exploration epsilon 0.1018 * Episodic Reward is ==> -12.606709480285645
action means and variances at step -10: [-0.

Episode * 1011 * exploration epsilon 0.1 * Episodic Reward is ==> -12.625829696655273
action means and variances at step -10: [-0.16407165 -2.        ] [4.5468894e-08 0.0000000e+00]
action means and variances at step -5: [-0.15550417 -2.        ] [3.8279534e-08 0.0000000e+00]
Episode * 1012 * exploration epsilon 0.1 * Episodic Reward is ==> -12.569599151611328
action means and variances at step -10: [-0.14283025 -2.        ] [4.7415185e-08 0.0000000e+00]
action means and variances at step -5: [-0.1430707 -2.       ] [4.7878615e-08 0.0000000e+00]
Episode * 1013 * exploration epsilon 0.1 * Episodic Reward is ==> -12.393587112426758
action means and variances at step -10: [-0.16249825 -2.        ] [4.1911367e-08 0.0000000e+00]
action means and variances at step -5: [-0.15855537 -2.        ] [3.8398678e-08 0.0000000e+00]
Episode * 1014 * exploration epsilon 0.1 * Episodic Reward is ==> -12.457439422607422
action means and variances at step -10: [-0.1673023 -2.       ] [4.1991974e-08 0.0000

Episode * 1027 * exploration epsilon 0.1 * Episodic Reward is ==> -13.006269454956055
action means and variances at step -10: [-0.18364385 -2.        ] [3.2164614e-08 0.0000000e+00]
action means and variances at step -5: [-0.17845128 -2.        ] [2.9095743e-08 0.0000000e+00]
Episode * 1028 * exploration epsilon 0.1 * Episodic Reward is ==> -12.252463340759277
action means and variances at step -10: [-0.19594508 -2.        ] [4.228453e-08 0.000000e+00]
action means and variances at step -5: [-0.19236873 -2.        ] [4.1611536e-08 0.0000000e+00]
Episode * 1029 * exploration epsilon 0.1 * Episodic Reward is ==> -12.675909042358398
action means and variances at step -10: [-0.2012511 -2.       ] [3.910911e-08 0.000000e+00]
action means and variances at step -5: [-0.20214458 -2.        ] [3.8042298e-08 0.0000000e+00]
Episode * 1030 * exploration epsilon 0.1 * Episodic Reward is ==> -12.834870338439941
action means and variances at step -10: [-0.1763423 -2.       ] [3.6204927e-08 0.0000000e

Episode * 1043 * exploration epsilon 0.1 * Episodic Reward is ==> -12.703545570373535
action means and variances at step -10: [-0.21411005 -2.        ] [4.1989146e-08 0.0000000e+00]
action means and variances at step -5: [-0.22528753 -2.        ] [5.386552e-08 0.000000e+00]
Episode * 1044 * exploration epsilon 0.1 * Episodic Reward is ==> -12.848766326904297
action means and variances at step -10: [-0.23414288 -2.        ] [5.456046e-08 0.000000e+00]
action means and variances at step -5: [-0.23176281 -2.        ] [5.2113677e-08 0.0000000e+00]
Episode * 1045 * exploration epsilon 0.1 * Episodic Reward is ==> -12.72152328491211
action means and variances at step -10: [-0.21253772 -2.        ] [3.6118006e-08 0.0000000e+00]
action means and variances at step -5: [-0.20967636 -2.        ] [3.090445e-08 0.000000e+00]
Episode * 1046 * exploration epsilon 0.1 * Episodic Reward is ==> -12.952606201171875
action means and variances at step -10: [-0.20304747 -2.        ] [2.9231868e-08 0.0000000

Episode * 1059 * exploration epsilon 0.1 * Episodic Reward is ==> -12.750282287597656
action means and variances at step -10: [-0.21351583 -2.        ] [5.8127e-08 0.0000e+00]
action means and variances at step -5: [-0.20921436 -2.        ] [5.08791e-08 0.00000e+00]
Episode * 1060 * exploration epsilon 0.1 * Episodic Reward is ==> -12.764123916625977
action means and variances at step -10: [-0.21106267 -2.        ] [5.547432e-08 0.000000e+00]
action means and variances at step -5: [-0.21134229 -2.        ] [4.2714014e-08 0.0000000e+00]
Episode * 1061 * exploration epsilon 0.1 * Episodic Reward is ==> -12.818150520324707
action means and variances at step -10: [-0.21318536 -2.        ] [4.8083134e-08 0.0000000e+00]
action means and variances at step -5: [-0.2097648 -2.       ] [3.7500588e-08 0.0000000e+00]
Episode * 1062 * exploration epsilon 0.1 * Episodic Reward is ==> -12.28032398223877
action means and variances at step -10: [-0.1989823 -2.       ] [2.6268044e-08 0.0000000e+00]
acti

Episode * 1075 * exploration epsilon 0.1 * Episodic Reward is ==> -12.591835975646973
action means and variances at step -10: [-0.21296002 -2.        ] [2.1037406e-08 0.0000000e+00]
action means and variances at step -5: [-0.21790548 -2.        ] [2.0456806e-08 0.0000000e+00]
Episode * 1076 * exploration epsilon 0.1 * Episodic Reward is ==> -12.733912467956543
action means and variances at step -10: [-0.22106595 -2.        ] [2.032671e-08 0.000000e+00]
action means and variances at step -5: [-0.22683923 -2.        ] [1.7822632e-08 0.0000000e+00]
Episode * 1077 * exploration epsilon 0.1 * Episodic Reward is ==> -12.857358932495117
action means and variances at step -10: [-0.20615268 -2.        ] [2.4252842e-08 0.0000000e+00]
action means and variances at step -5: [-0.2081616 -2.       ] [2.13469e-08 0.00000e+00]
Episode * 1078 * exploration epsilon 0.1 * Episodic Reward is ==> -13.038298606872559
action means and variances at step -10: [-0.22564892 -2.        ] [2.8657183e-08 0.0000000e

Episode * 1091 * exploration epsilon 0.1 * Episodic Reward is ==> -12.692667007446289
action means and variances at step -10: [-0.2469542 -2.       ] [2.1137238e-08 0.0000000e+00]
action means and variances at step -5: [-0.25083777 -2.        ] [1.9380884e-08 0.0000000e+00]
Episode * 1092 * exploration epsilon 0.1 * Episodic Reward is ==> -13.072433471679688
action means and variances at step -10: [-0.24348685 -2.        ] [3.4676386e-08 0.0000000e+00]
action means and variances at step -5: [-0.23704937 -2.        ] [2.6624953e-08 0.0000000e+00]
Episode * 1093 * exploration epsilon 0.1 * Episodic Reward is ==> -12.61618423461914
action means and variances at step -10: [-0.23159638 -2.        ] [2.1653673e-08 0.0000000e+00]
action means and variances at step -5: [-0.23292883 -2.        ] [1.9085135e-08 0.0000000e+00]
Episode * 1094 * exploration epsilon 0.1 * Episodic Reward is ==> -12.38908576965332
action means and variances at step -10: [-0.23460434 -2.        ] [2.0126105e-08 0.0000

Episode * 1107 * exploration epsilon 0.1 * Episodic Reward is ==> -12.548685073852539
action means and variances at step -10: [-0.27758345 -2.        ] [4.3384155e-08 0.0000000e+00]
action means and variances at step -5: [-0.28729033 -2.        ] [4.1084842e-08 0.0000000e+00]
Episode * 1108 * exploration epsilon 0.1 * Episodic Reward is ==> -12.77096176147461
action means and variances at step -10: [-0.3070953 -2.       ] [4.7668987e-08 0.0000000e+00]
action means and variances at step -5: [-0.29875967 -2.        ] [3.975e-08 0.000e+00]
Episode * 1109 * exploration epsilon 0.1 * Episodic Reward is ==> -12.909965515136719
action means and variances at step -10: [-0.2973895 -2.       ] [4.7459423e-08 0.0000000e+00]
action means and variances at step -5: [-0.29220173 -2.        ] [4.209153e-08 0.000000e+00]
Episode * 1110 * exploration epsilon 0.1 * Episodic Reward is ==> -12.975926399230957
action means and variances at step -10: [-0.2888817 -2.       ] [2.9912854e-08 0.0000000e+00]
acti

Episode * 1123 * exploration epsilon 0.1 * Episodic Reward is ==> -12.498291015625
action means and variances at step -10: [-0.33240193 -2.        ] [4.3348315e-08 0.0000000e+00]
action means and variances at step -5: [-0.3317321 -2.       ] [4.015166e-08 0.000000e+00]
Episode * 1124 * exploration epsilon 0.1 * Episodic Reward is ==> -12.864657402038574
action means and variances at step -10: [-0.33285862 -2.        ] [3.121395e-08 0.000000e+00]
action means and variances at step -5: [-0.33038706 -2.        ] [2.5731401e-08 0.0000000e+00]
Episode * 1125 * exploration epsilon 0.1 * Episodic Reward is ==> -12.762907028198242
action means and variances at step -10: [-0.31443226 -2.        ] [2.6699936e-08 0.0000000e+00]
action means and variances at step -5: [-0.31167457 -2.        ] [2.6888756e-08 0.0000000e+00]
Episode * 1126 * exploration epsilon 0.1 * Episodic Reward is ==> -12.865467071533203
action means and variances at step -10: [-0.31238267 -2.        ] [2.5178824e-08 0.0000000e+

Episode * 1139 * exploration epsilon 0.1 * Episodic Reward is ==> -13.315120697021484
action means and variances at step -10: [-0.35352364 -2.        ] [4.490858e-08 0.000000e+00]
action means and variances at step -5: [-0.34789923 -2.        ] [3.9071875e-08 0.0000000e+00]
Episode * 1140 * exploration epsilon 0.1 * Episodic Reward is ==> -12.840227127075195
action means and variances at step -10: [-0.36317155 -2.        ] [5.304556e-08 0.000000e+00]
action means and variances at step -5: [-0.3517485 -2.       ] [4.2745516e-08 0.0000000e+00]
Episode * 1141 * exploration epsilon 0.1 * Episodic Reward is ==> -13.20923137664795
action means and variances at step -10: [-0.35514683 -2.        ] [5.111139e-08 0.000000e+00]
action means and variances at step -5: [-0.34985483 -2.        ] [4.4041833e-08 0.0000000e+00]
Episode * 1142 * exploration epsilon 0.1 * Episodic Reward is ==> -12.90290355682373
action means and variances at step -10: [-0.32800892 -2.        ] [3.2740846e-08 0.0000000e+0

Episode * 1155 * exploration epsilon 0.1 * Episodic Reward is ==> -12.884593963623047
action means and variances at step -10: [-0.37994477 -2.        ] [3.4284746e-08 0.0000000e+00]
action means and variances at step -5: [-0.38560972 -2.        ] [3.0549643e-08 0.0000000e+00]
Episode * 1156 * exploration epsilon 0.1 * Episodic Reward is ==> -13.02342414855957
action means and variances at step -10: [-0.36689347 -2.        ] [3.394663e-08 0.000000e+00]
action means and variances at step -5: [-0.36593524 -2.        ] [3.1278624e-08 0.0000000e+00]
Episode * 1157 * exploration epsilon 0.1 * Episodic Reward is ==> -13.237687110900879
action means and variances at step -10: [-0.36781415 -2.        ] [3.8409986e-08 0.0000000e+00]
action means and variances at step -5: [-0.3750881 -2.       ] [4.3165645e-08 0.0000000e+00]
Episode * 1158 * exploration epsilon 0.1 * Episodic Reward is ==> -12.88573169708252
action means and variances at step -10: [-0.36028355 -2.        ] [4.4110994e-08 0.000000

Episode * 1171 * exploration epsilon 0.1 * Episodic Reward is ==> -13.063396453857422
action means and variances at step -10: [-0.38374752 -2.        ] [4.2159563e-08 0.0000000e+00]
action means and variances at step -5: [-0.37609285 -2.        ] [3.431349e-08 0.000000e+00]
Episode * 1172 * exploration epsilon 0.1 * Episodic Reward is ==> -13.062583923339844
action means and variances at step -10: [-0.39593062 -2.        ] [4.3116856e-08 0.0000000e+00]
action means and variances at step -5: [-0.40561166 -2.        ] [4.1042203e-08 0.0000000e+00]
Episode * 1173 * exploration epsilon 0.1 * Episodic Reward is ==> -13.055916786193848
action means and variances at step -10: [-0.371573 -2.      ] [4.4244143e-08 0.0000000e+00]
action means and variances at step -5: [-0.36876082 -2.        ] [3.8897014e-08 0.0000000e+00]
Episode * 1174 * exploration epsilon 0.1 * Episodic Reward is ==> -13.628243446350098
action means and variances at step -10: [-0.37574673 -2.        ] [4.003163e-08 0.000000e

Episode * 1187 * exploration epsilon 0.1 * Episodic Reward is ==> -13.393989562988281
action means and variances at step -10: [-0.42502603 -2.        ] [3.31531e-08 0.00000e+00]
action means and variances at step -5: [-0.41623345 -2.        ] [3.126086e-08 0.000000e+00]
Episode * 1188 * exploration epsilon 0.1 * Episodic Reward is ==> -13.046304702758789
action means and variances at step -10: [-0.4340877 -2.       ] [2.9693005e-08 0.0000000e+00]
action means and variances at step -5: [-0.43146425 -2.        ] [2.6009634e-08 0.0000000e+00]
Episode * 1189 * exploration epsilon 0.1 * Episodic Reward is ==> -13.290924072265625
action means and variances at step -10: [-0.40468082 -2.        ] [3.187043e-08 0.000000e+00]
action means and variances at step -5: [-0.4033314 -2.       ] [3.0233632e-08 0.0000000e+00]
Episode * 1190 * exploration epsilon 0.1 * Episodic Reward is ==> -13.075772285461426
action means and variances at step -10: [-0.40284938 -2.        ] [3.0922298e-08 0.0000000e+00]

Episode * 1203 * exploration epsilon 0.1 * Episodic Reward is ==> -13.051589012145996
action means and variances at step -10: [-0.43465316 -2.        ] [5.9201344e-08 0.0000000e+00]
action means and variances at step -5: [-0.44519794 -2.        ] [5.1365998e-08 0.0000000e+00]
Episode * 1204 * exploration epsilon 0.1 * Episodic Reward is ==> -13.206249237060547
action means and variances at step -10: [-0.45699054 -2.        ] [3.9851198e-08 0.0000000e+00]
action means and variances at step -5: [-0.45632988 -2.        ] [4.2337287e-08 0.0000000e+00]
Episode * 1205 * exploration epsilon 0.1 * Episodic Reward is ==> -12.916828155517578
action means and variances at step -10: [-0.45657125 -2.        ] [3.290348e-08 0.000000e+00]
action means and variances at step -5: [-0.45323387 -2.        ] [3.010743e-08 0.000000e+00]
Episode * 1206 * exploration epsilon 0.1 * Episodic Reward is ==> -13.46480941772461
action means and variances at step -10: [-0.4425358 -2.       ] [4.5668887e-08 0.0000000

Episode * 1219 * exploration epsilon 0.1 * Episodic Reward is ==> -13.523662567138672
action means and variances at step -10: [-0.5103084 -2.       ] [3.4860506e-08 0.0000000e+00]
action means and variances at step -5: [-0.51136833 -2.        ] [3.0688295e-08 0.0000000e+00]
Episode * 1220 * exploration epsilon 0.1 * Episodic Reward is ==> -13.1358642578125
action means and variances at step -10: [-0.484774 -2.      ] [2.9541377e-08 0.0000000e+00]
action means and variances at step -5: [-0.48833883 -2.        ] [2.8744223e-08 0.0000000e+00]
Episode * 1221 * exploration epsilon 0.1 * Episodic Reward is ==> -13.37433910369873
action means and variances at step -10: [-0.47458705 -2.        ] [3.007519e-08 0.000000e+00]
action means and variances at step -5: [-0.4755204 -2.       ] [2.6863654e-08 0.0000000e+00]
Episode * 1222 * exploration epsilon 0.1 * Episodic Reward is ==> -13.274442672729492
action means and variances at step -10: [-0.54622835 -2.        ] [4.3533504e-08 0.0000000e+00]


Episode * 1235 * exploration epsilon 0.1 * Episodic Reward is ==> -12.958704948425293
action means and variances at step -10: [-0.49019232 -2.        ] [2.5372325e-08 0.0000000e+00]
action means and variances at step -5: [-0.48636526 -2.        ] [2.6630119e-08 0.0000000e+00]
Episode * 1236 * exploration epsilon 0.1 * Episodic Reward is ==> -12.878517150878906
action means and variances at step -10: [-0.49351293 -2.        ] [3.734125e-08 0.000000e+00]
action means and variances at step -5: [-0.4926453 -2.       ] [3.4370867e-08 0.0000000e+00]
Episode * 1237 * exploration epsilon 0.1 * Episodic Reward is ==> -13.328619003295898
action means and variances at step -10: [-0.49562532 -2.        ] [3.0693798e-08 0.0000000e+00]
action means and variances at step -5: [-0.49215007 -2.        ] [3.0159228e-08 0.0000000e+00]
Episode * 1238 * exploration epsilon 0.1 * Episodic Reward is ==> -13.23548698425293
action means and variances at step -10: [-0.514715 -2.      ] [3.2950922e-08 0.0000000e+

Episode * 1251 * exploration epsilon 0.1 * Episodic Reward is ==> -13.189431190490723
action means and variances at step -10: [-0.48286748 -2.        ] [2.085668e-08 0.000000e+00]
action means and variances at step -5: [-0.4861154 -2.       ] [2.3920954e-08 0.0000000e+00]
Episode * 1252 * exploration epsilon 0.1 * Episodic Reward is ==> -13.369011878967285
action means and variances at step -10: [-0.48201293 -2.        ] [2.562734e-08 0.000000e+00]
action means and variances at step -5: [-0.48338842 -2.        ] [2.7015583e-08 0.0000000e+00]
Episode * 1253 * exploration epsilon 0.1 * Episodic Reward is ==> -13.545513153076172
action means and variances at step -10: [-0.4867253 -2.       ] [3.3907693e-08 0.0000000e+00]
action means and variances at step -5: [-0.48807415 -2.        ] [3.3762753e-08 0.0000000e+00]
Episode * 1254 * exploration epsilon 0.1 * Episodic Reward is ==> -13.161372184753418
action means and variances at step -10: [-0.48543122 -2.        ] [2.6110568e-08 0.0000000e

Episode * 1267 * exploration epsilon 0.1 * Episodic Reward is ==> -13.053596496582031
action means and variances at step -10: [-0.49227008 -2.        ] [3.0156134e-08 0.0000000e+00]
action means and variances at step -5: [-0.493994 -2.      ] [2.994514e-08 0.000000e+00]
Episode * 1268 * exploration epsilon 0.1 * Episodic Reward is ==> -13.051372528076172
action means and variances at step -10: [-0.49675754 -2.        ] [2.2765379e-08 0.0000000e+00]
action means and variances at step -5: [-0.49860984 -2.        ] [2.5583406e-08 0.0000000e+00]
Episode * 1269 * exploration epsilon 0.1 * Episodic Reward is ==> -13.431583404541016
action means and variances at step -10: [-0.5024744 -2.       ] [3.3879466e-08 0.0000000e+00]
action means and variances at step -5: [-0.5009715 -2.       ] [3.778972e-08 0.000000e+00]
Episode * 1270 * exploration epsilon 0.1 * Episodic Reward is ==> -12.870712280273438
action means and variances at step -10: [-0.4994191 -2.       ] [2.0710756e-08 0.0000000e+00]
a

Episode * 1283 * exploration epsilon 0.1 * Episodic Reward is ==> -13.098852157592773
action means and variances at step -10: [-0.4610819 -2.       ] [1.5887167e-08 0.0000000e+00]
action means and variances at step -5: [-0.4594634 -2.       ] [1.5264527e-08 0.0000000e+00]
Episode * 1284 * exploration epsilon 0.1 * Episodic Reward is ==> -13.37907600402832
action means and variances at step -10: [-0.46213892 -2.        ] [1.5689137e-08 0.0000000e+00]
action means and variances at step -5: [-0.46262565 -2.        ] [1.5307192e-08 0.0000000e+00]
Episode * 1285 * exploration epsilon 0.1 * Episodic Reward is ==> -13.200820922851562
action means and variances at step -10: [-0.45936877 -2.        ] [1.1891509e-08 0.0000000e+00]
action means and variances at step -5: [-0.46153682 -2.        ] [1.0903431e-08 0.0000000e+00]
Episode * 1286 * exploration epsilon 0.1 * Episodic Reward is ==> -13.111072540283203
action means and variances at step -10: [-0.44253466 -2.        ] [1.5576946e-08 0.00000

Episode * 1299 * exploration epsilon 0.1 * Episodic Reward is ==> -13.226890563964844
action means and variances at step -10: [-0.3965471 -2.       ] [7.2327913e-09 0.0000000e+00]
action means and variances at step -5: [-0.3994187 -2.       ] [6.9141786e-09 0.0000000e+00]
Episode * 1300 * exploration epsilon 0.1 * Episodic Reward is ==> -13.204100608825684
action means and variances at step -10: [-0.39231253 -2.        ] [1.0253375e-08 0.0000000e+00]
action means and variances at step -5: [-0.39257315 -2.        ] [9.999217e-09 0.000000e+00]
Episode * 1301 * exploration epsilon 0.1 * Episodic Reward is ==> -13.177836418151855
action means and variances at step -10: [-0.4090835 -2.       ] [1.9661416e-08 0.0000000e+00]
action means and variances at step -5: [-0.4161393 -2.       ] [1.80347e-08 0.00000e+00]
Episode * 1302 * exploration epsilon 0.1 * Episodic Reward is ==> -13.111513137817383
action means and variances at step -10: [-0.44551566 -2.        ] [1.5230569e-08 0.0000000e+00]
a

Episode * 1315 * exploration epsilon 0.1 * Episodic Reward is ==> -13.227909088134766
action means and variances at step -10: [-0.6164664 -2.       ] [1.7551469e-07 0.0000000e+00]
action means and variances at step -5: [-0.61495596 -2.        ] [1.4150307e-07 0.0000000e+00]
Episode * 1316 * exploration epsilon 0.1 * Episodic Reward is ==> -13.195327758789062
action means and variances at step -10: [-0.5878524 -2.       ] [1.7348276e-07 0.0000000e+00]
action means and variances at step -5: [-0.5946046 -2.       ] [1.3256883e-07 0.0000000e+00]
Episode * 1317 * exploration epsilon 0.1 * Episodic Reward is ==> -13.534137725830078
action means and variances at step -10: [-0.6204277 -2.       ] [1.1569438e-07 0.0000000e+00]
action means and variances at step -5: [-0.621275 -2.      ] [9.034759e-08 0.000000e+00]
Episode * 1318 * exploration epsilon 0.1 * Episodic Reward is ==> -13.381235122680664
action means and variances at step -10: [-0.6329072 -2.       ] [1.3416813e-07 0.0000000e+00]
act

Episode * 1331 * exploration epsilon 0.1 * Episodic Reward is ==> -12.7476806640625
action means and variances at step -10: [-0.21990873 -2.        ] [7.985414e-07 0.000000e+00]
action means and variances at step -5: [-0.22068614 -2.        ] [8.183466e-07 0.000000e+00]
Episode * 1332 * exploration epsilon 0.1 * Episodic Reward is ==> -12.93027114868164
action means and variances at step -10: [-0.22714695 -2.        ] [7.770481e-07 0.000000e+00]
action means and variances at step -5: [-0.230414 -2.      ] [8.055805e-07 0.000000e+00]
Episode * 1333 * exploration epsilon 0.1 * Episodic Reward is ==> -12.682772636413574
action means and variances at step -10: [-0.18138292 -2.        ] [9.021527e-07 0.000000e+00]
action means and variances at step -5: [-0.15892147 -2.        ] [9.3557395e-07 0.0000000e+00]
Episode * 1334 * exploration epsilon 0.1 * Episodic Reward is ==> -13.076077461242676
action means and variances at step -10: [ 1.4776031e-03 -2.0000000e+00] [1.2925493e-06 0.0000000e+00

Episode * 1347 * exploration epsilon 0.1 * Episodic Reward is ==> -12.934834480285645
action means and variances at step -10: [ 0.08242512 -2.        ] [6.292976e-07 0.000000e+00]
action means and variances at step -5: [ 0.10954434 -2.        ] [7.682327e-07 0.000000e+00]
Episode * 1348 * exploration epsilon 0.1 * Episodic Reward is ==> -12.673233032226562
action means and variances at step -10: [ 0.12684722 -2.        ] [5.288755e-07 0.000000e+00]
action means and variances at step -5: [ 0.11691783 -2.        ] [5.952627e-07 0.000000e+00]
Episode * 1349 * exploration epsilon 0.1 * Episodic Reward is ==> -12.516149520874023
action means and variances at step -10: [ 0.11040545 -2.        ] [6.356791e-07 0.000000e+00]
action means and variances at step -5: [ 0.09133601 -2.        ] [7.1849576e-07 0.0000000e+00]
Episode * 1350 * exploration epsilon 0.1 * Episodic Reward is ==> -13.11717414855957
action means and variances at step -10: [ 0.1352572 -2.       ] [6.042362e-07 0.000000e+00]
ac

Episode * 1363 * exploration epsilon 0.1 * Episodic Reward is ==> -12.719854354858398
action means and variances at step -10: [ 0.19998677 -2.        ] [3.5152803e-07 0.0000000e+00]
action means and variances at step -5: [ 0.20000227 -2.        ] [3.9862485e-07 0.0000000e+00]
Episode * 1364 * exploration epsilon 0.1 * Episodic Reward is ==> -12.346816062927246
action means and variances at step -10: [ 0.18672195 -2.        ] [3.6433147e-07 0.0000000e+00]
action means and variances at step -5: [ 0.16942436 -2.        ] [3.080799e-07 0.000000e+00]
Episode * 1365 * exploration epsilon 0.1 * Episodic Reward is ==> -13.032758712768555
action means and variances at step -10: [ 0.20602249 -2.        ] [3.3300572e-07 0.0000000e+00]
action means and variances at step -5: [ 0.19500324 -2.        ] [3.1103448e-07 0.0000000e+00]
Episode * 1366 * exploration epsilon 0.1 * Episodic Reward is ==> -12.776531219482422
action means and variances at step -10: [ 0.17871025 -2.        ] [2.8622287e-07 0.00

Episode * 1379 * exploration epsilon 0.1 * Episodic Reward is ==> -12.776849746704102
action means and variances at step -10: [ 0.22678828 -2.        ] [2.9279877e-07 0.0000000e+00]
action means and variances at step -5: [ 0.23445755 -2.        ] [2.464911e-07 0.000000e+00]
Episode * 1380 * exploration epsilon 0.1 * Episodic Reward is ==> -12.147307395935059
action means and variances at step -10: [ 0.28069675 -2.        ] [4.219712e-07 0.000000e+00]
action means and variances at step -5: [ 0.27874118 -2.        ] [3.7935055e-07 0.0000000e+00]
Episode * 1381 * exploration epsilon 0.1 * Episodic Reward is ==> -12.817022323608398
action means and variances at step -10: [ 0.2464211 -2.       ] [3.167566e-07 0.000000e+00]
action means and variances at step -5: [ 0.2382208 -2.       ] [3.0114813e-07 0.0000000e+00]
Episode * 1382 * exploration epsilon 0.1 * Episodic Reward is ==> -12.326787948608398
action means and variances at step -10: [ 0.22897747 -2.        ] [2.5177206e-07 0.0000000e+0

Episode * 1395 * exploration epsilon 0.1 * Episodic Reward is ==> -12.833976745605469
action means and variances at step -10: [ 0.2308902 -2.       ] [1.4579793e-07 0.0000000e+00]
action means and variances at step -5: [ 0.22869405 -2.        ] [1.708013e-07 0.000000e+00]
Episode * 1396 * exploration epsilon 0.1 * Episodic Reward is ==> -12.196352005004883
action means and variances at step -10: [ 0.2556353 -2.       ] [7.338849e-08 0.000000e+00]
action means and variances at step -5: [ 0.23856848 -2.        ] [1.0038394e-07 0.0000000e+00]
Episode * 1397 * exploration epsilon 0.1 * Episodic Reward is ==> -12.577082633972168
action means and variances at step -10: [ 0.2633146 -2.       ] [8.4202576e-08 0.0000000e+00]
action means and variances at step -5: [ 0.25145242 -2.        ] [9.56539e-08 0.00000e+00]
Episode * 1398 * exploration epsilon 0.1 * Episodic Reward is ==> -12.64098834991455
action means and variances at step -10: [ 0.2541792 -2.       ] [1.2843871e-07 0.0000000e+00]
acti

Episode * 1411 * exploration epsilon 0.1 * Episodic Reward is ==> -12.599164962768555
action means and variances at step -10: [ 0.284663 -2.      ] [4.578797e-08 0.000000e+00]
action means and variances at step -5: [ 0.2817082 -2.       ] [3.916488e-08 0.000000e+00]
Episode * 1412 * exploration epsilon 0.1 * Episodic Reward is ==> -12.367040634155273
action means and variances at step -10: [ 0.2842006 -2.       ] [9.860154e-08 0.000000e+00]
action means and variances at step -5: [ 0.2839472 -2.       ] [9.90405e-08 0.00000e+00]
Episode * 1413 * exploration epsilon 0.1 * Episodic Reward is ==> -12.660935401916504
action means and variances at step -10: [ 0.26184806 -2.        ] [5.8385247e-08 0.0000000e+00]
action means and variances at step -5: [ 0.26417157 -2.        ] [5.0312398e-08 0.0000000e+00]
Episode * 1414 * exploration epsilon 0.1 * Episodic Reward is ==> -12.720909118652344
action means and variances at step -10: [ 0.25683197 -2.        ] [8.7343395e-08 0.0000000e+00]
action 

Episode * 1427 * exploration epsilon 0.1 * Episodic Reward is ==> -12.829275131225586
action means and variances at step -10: [ 0.2732188 -2.       ] [1.0740798e-07 0.0000000e+00]
action means and variances at step -5: [ 0.26787505 -2.        ] [8.40417e-08 0.00000e+00]
Episode * 1428 * exploration epsilon 0.1 * Episodic Reward is ==> -13.58082389831543
action means and variances at step -10: [ 0.24006236 -2.        ] [7.233811e-08 0.000000e+00]
action means and variances at step -5: [ 0.23653913 -2.        ] [8.964459e-08 0.000000e+00]
Episode * 1429 * exploration epsilon 0.1 * Episodic Reward is ==> -12.708813667297363
action means and variances at step -10: [ 0.28145024 -2.        ] [8.1172246e-08 0.0000000e+00]
action means and variances at step -5: [ 0.28852558 -2.        ] [8.260886e-08 0.000000e+00]
Episode * 1430 * exploration epsilon 0.1 * Episodic Reward is ==> -13.14954662322998
action means and variances at step -10: [ 0.23312694 -2.        ] [9.564895e-08 0.000000e+00]
act

Episode * 1443 * exploration epsilon 0.1 * Episodic Reward is ==> -13.347572326660156
action means and variances at step -10: [ 0.29123235 -2.        ] [7.066702e-08 0.000000e+00]
action means and variances at step -5: [ 0.28595653 -2.        ] [5.758185e-08 0.000000e+00]
Episode * 1444 * exploration epsilon 0.1 * Episodic Reward is ==> -12.551239013671875
action means and variances at step -10: [ 0.28132161 -2.        ] [6.9927246e-08 0.0000000e+00]
action means and variances at step -5: [ 0.28792766 -2.        ] [5.8468387e-08 0.0000000e+00]
Episode * 1445 * exploration epsilon 0.1 * Episodic Reward is ==> -12.787054061889648
action means and variances at step -10: [ 0.26696685 -2.        ] [6.442614e-08 0.000000e+00]
action means and variances at step -5: [ 0.2661189 -2.       ] [8.427712e-08 0.000000e+00]
Episode * 1446 * exploration epsilon 0.1 * Episodic Reward is ==> -12.841854095458984
action means and variances at step -10: [ 0.28445432 -2.        ] [8.0589864e-08 0.0000000e+0

Episode * 1459 * exploration epsilon 0.1 * Episodic Reward is ==> -12.775424003601074
action means and variances at step -10: [ 0.29453984 -2.        ] [4.2391797e-08 0.0000000e+00]
action means and variances at step -5: [ 0.28395078 -2.        ] [5.0549726e-08 0.0000000e+00]
Episode * 1460 * exploration epsilon 0.1 * Episodic Reward is ==> -12.569653511047363
action means and variances at step -10: [ 0.29363218 -2.        ] [4.240792e-07 0.000000e+00]
action means and variances at step -5: [ 0.31111854 -2.        ] [3.177345e-07 0.000000e+00]
Episode * 1461 * exploration epsilon 0.1 * Episodic Reward is ==> -12.576652526855469
action means and variances at step -10: [ 0.2961775 -2.       ] [2.0980502e-07 0.0000000e+00]
action means and variances at step -5: [ 0.2853217 -2.       ] [5.7763767e-08 0.0000000e+00]
Episode * 1462 * exploration epsilon 0.1 * Episodic Reward is ==> -12.866999626159668
action means and variances at step -10: [ 0.2784365 -2.       ] [8.970791e-08 0.000000e+00]

Episode * 1475 * exploration epsilon 0.1 * Episodic Reward is ==> -12.188474655151367
action means and variances at step -10: [ 0.33230856 -2.        ] [3.639151e-08 0.000000e+00]
action means and variances at step -5: [ 0.34718606 -2.        ] [4.3389292e-08 0.0000000e+00]
Episode * 1476 * exploration epsilon 0.1 * Episodic Reward is ==> -13.037464141845703
action means and variances at step -10: [ 0.33490422 -2.        ] [3.7263625e-08 0.0000000e+00]
action means and variances at step -5: [ 0.31688297 -2.        ] [4.376453e-08 0.000000e+00]
Episode * 1477 * exploration epsilon 0.1 * Episodic Reward is ==> -12.413800239562988
action means and variances at step -10: [ 0.34380433 -2.        ] [4.436026e-08 0.000000e+00]
action means and variances at step -5: [ 0.32374197 -2.        ] [4.4358185e-08 0.0000000e+00]
Episode * 1478 * exploration epsilon 0.1 * Episodic Reward is ==> -12.701583862304688
action means and variances at step -10: [ 0.30348593 -2.        ] [3.756335e-08 0.000000e

Episode * 1491 * exploration epsilon 0.1 * Episodic Reward is ==> -12.726181030273438
action means and variances at step -10: [ 0.3451368 -2.       ] [5.5841372e-08 0.0000000e+00]
action means and variances at step -5: [ 0.35376537 -2.        ] [6.2398115e-08 0.0000000e+00]
Episode * 1492 * exploration epsilon 0.1 * Episodic Reward is ==> -12.380636215209961
action means and variances at step -10: [ 0.34692127 -2.        ] [6.5744025e-08 0.0000000e+00]
action means and variances at step -5: [ 0.35678297 -2.        ] [3.3880983e-07 0.0000000e+00]
Episode * 1493 * exploration epsilon 0.1 * Episodic Reward is ==> -12.907293319702148
action means and variances at step -10: [ 0.35654843 -2.        ] [4.9668504e-08 0.0000000e+00]
action means and variances at step -5: [ 0.3356474 -2.       ] [4.309981e-08 0.000000e+00]
Episode * 1494 * exploration epsilon 0.1 * Episodic Reward is ==> -12.611221313476562
action means and variances at step -10: [ 0.35593054 -2.        ] [4.9432266e-08 0.000000

Episode * 1507 * exploration epsilon 0.1 * Episodic Reward is ==> -14.67926025390625
action means and variances at step -10: [ 1.6734685 -2.       ] [2.0560941e-05 0.0000000e+00]
action means and variances at step -5: [ 1.828145 -2.      ] [1.0078987e-05 0.0000000e+00]
Episode * 1508 * exploration epsilon 0.1 * Episodic Reward is ==> -15.057947158813477
action means and variances at step -10: [ 1.6893461 -2.       ] [2.312309e-05 0.000000e+00]
action means and variances at step -5: [ 1.6307975 -2.       ] [1.0121819e-05 0.0000000e+00]
Episode * 1509 * exploration epsilon 0.1 * Episodic Reward is ==> -14.645784378051758
action means and variances at step -10: [ 1.4186445 -2.       ] [5.6458725e-06 0.0000000e+00]
action means and variances at step -5: [ 1.4588314 -2.       ] [3.6636409e-06 0.0000000e+00]
Episode * 1510 * exploration epsilon 0.1 * Episodic Reward is ==> -14.74972152709961
action means and variances at step -10: [ 1.7922696 -2.       ] [3.7280624e-06 0.0000000e+00]
action 

Episode * 1523 * exploration epsilon 0.1 * Episodic Reward is ==> -15.201730728149414
action means and variances at step -10: [ 1.8370446 -2.       ] [5.3836015e-07 0.0000000e+00]
action means and variances at step -5: [ 1.8407245 -2.       ] [4.0300867e-07 0.0000000e+00]
Episode * 1524 * exploration epsilon 0.1 * Episodic Reward is ==> -15.335396766662598
action means and variances at step -10: [ 1.8359358 -2.       ] [7.0319123e-07 0.0000000e+00]
action means and variances at step -5: [ 1.8357815 -2.       ] [6.643585e-07 0.000000e+00]
Episode * 1525 * exploration epsilon 0.1 * Episodic Reward is ==> -15.421957969665527
action means and variances at step -10: [ 1.8404922 -2.       ] [6.5987456e-07 0.0000000e+00]
action means and variances at step -5: [ 1.8466246 -2.       ] [5.074324e-07 0.000000e+00]
Episode * 1526 * exploration epsilon 0.1 * Episodic Reward is ==> -15.385712623596191
action means and variances at step -10: [ 1.8461767 -2.       ] [4.795924e-07 0.000000e+00]
action 

Episode * 1539 * exploration epsilon 0.1 * Episodic Reward is ==> -14.16387939453125
action means and variances at step -10: [ 1.2535473 -2.       ] [7.7102817e-07 0.0000000e+00]
action means and variances at step -5: [ 1.203726 -2.      ] [6.5633026e-07 0.0000000e+00]
Episode * 1540 * exploration epsilon 0.1 * Episodic Reward is ==> -14.001974105834961
action means and variances at step -10: [ 1.2755058 -2.       ] [5.165868e-07 0.000000e+00]
action means and variances at step -5: [ 1.2772276 -2.       ] [3.1938316e-07 0.0000000e+00]
Episode * 1541 * exploration epsilon 0.1 * Episodic Reward is ==> -13.879846572875977
action means and variances at step -10: [ 1.2444229 -2.       ] [6.0266666e-07 0.0000000e+00]
action means and variances at step -5: [ 1.2638022 -2.       ] [6.215886e-07 0.000000e+00]
Episode * 1542 * exploration epsilon 0.1 * Episodic Reward is ==> -14.111631393432617
action means and variances at step -10: [ 1.2541233 -2.       ] [6.094183e-07 0.000000e+00]
action mea

Episode * 1555 * exploration epsilon 0.1 * Episodic Reward is ==> -14.543824195861816
action means and variances at step -10: [ 1.3808875 -2.       ] [7.928517e-07 0.000000e+00]
action means and variances at step -5: [ 1.3882571 -2.       ] [8.4452284e-07 0.0000000e+00]
Episode * 1556 * exploration epsilon 0.1 * Episodic Reward is ==> -14.424638748168945
action means and variances at step -10: [ 1.4233805 -2.       ] [6.454253e-07 0.000000e+00]
action means and variances at step -5: [ 1.43578 -2.     ] [6.303234e-07 0.000000e+00]
Episode * 1557 * exploration epsilon 0.1 * Episodic Reward is ==> -14.456470489501953
action means and variances at step -10: [ 1.4118327 -2.       ] [2.8756537e-07 0.0000000e+00]
action means and variances at step -5: [ 1.4018382 -2.       ] [2.425418e-07 0.000000e+00]
Episode * 1558 * exploration epsilon 0.1 * Episodic Reward is ==> -14.770408630371094
action means and variances at step -10: [ 1.5075457 -2.       ] [3.7145327e-07 0.0000000e+00]
action means 

Episode * 1571 * exploration epsilon 0.1 * Episodic Reward is ==> -14.27918529510498
action means and variances at step -10: [ 1.3709277 -2.       ] [6.5972495e-07 0.0000000e+00]
action means and variances at step -5: [ 1.4100032 -2.       ] [5.2401396e-07 0.0000000e+00]
Episode * 1572 * exploration epsilon 0.1 * Episodic Reward is ==> -14.995378494262695
action means and variances at step -10: [ 1.3146546 -2.       ] [5.3246947e-07 0.0000000e+00]
action means and variances at step -5: [ 1.2858394 -2.       ] [3.1627553e-07 0.0000000e+00]
Episode * 1573 * exploration epsilon 0.1 * Episodic Reward is ==> -14.369856834411621
action means and variances at step -10: [ 1.3670884 -2.       ] [6.087691e-07 0.000000e+00]
action means and variances at step -5: [ 1.4062719 -2.       ] [5.45578e-07 0.00000e+00]
Episode * 1574 * exploration epsilon 0.1 * Episodic Reward is ==> -14.41110897064209
action means and variances at step -10: [ 1.3987689 -2.       ] [6.901151e-07 0.000000e+00]
action mean

Episode * 1587 * exploration epsilon 0.1 * Episodic Reward is ==> -14.099668502807617
action means and variances at step -10: [ 1.4169126 -2.       ] [5.083143e-07 0.000000e+00]
action means and variances at step -5: [ 1.4046103 -2.       ] [3.9699395e-07 0.0000000e+00]
Episode * 1588 * exploration epsilon 0.1 * Episodic Reward is ==> -14.566917419433594
action means and variances at step -10: [ 1.400685 -2.      ] [2.1168185e-07 0.0000000e+00]
action means and variances at step -5: [ 1.3750515 -2.       ] [2.083971e-07 0.000000e+00]
Episode * 1589 * exploration epsilon 0.1 * Episodic Reward is ==> -14.551469802856445
action means and variances at step -10: [ 1.4273636 -2.       ] [3.0665117e-07 0.0000000e+00]
action means and variances at step -5: [ 1.4443951 -2.       ] [2.778119e-07 0.000000e+00]
Episode * 1590 * exploration epsilon 0.1 * Episodic Reward is ==> -14.37287425994873
action means and variances at step -10: [ 1.3861275 -2.       ] [3.5588687e-07 0.0000000e+00]
action mea

Episode * 1603 * exploration epsilon 0.1 * Episodic Reward is ==> -14.669807434082031
action means and variances at step -10: [ 1.3561342 -2.       ] [3.211581e-07 0.000000e+00]
action means and variances at step -5: [ 1.3562701 -2.       ] [2.3128294e-07 0.0000000e+00]
Episode * 1604 * exploration epsilon 0.1 * Episodic Reward is ==> -14.502167701721191
action means and variances at step -10: [ 1.3990467 -2.       ] [3.2133423e-07 0.0000000e+00]
action means and variances at step -5: [ 1.4037952 -2.       ] [2.7521162e-07 0.0000000e+00]
Episode * 1605 * exploration epsilon 0.1 * Episodic Reward is ==> -14.69697380065918
action means and variances at step -10: [ 1.402542 -2.      ] [3.0752517e-07 0.0000000e+00]
action means and variances at step -5: [ 1.411381 -2.      ] [2.8705534e-07 0.0000000e+00]
Episode * 1606 * exploration epsilon 0.1 * Episodic Reward is ==> -14.335214614868164
action means and variances at step -10: [ 1.3586627 -2.       ] [2.56949e-07 0.00000e+00]
action means

Episode * 1619 * exploration epsilon 0.1 * Episodic Reward is ==> -14.428438186645508
action means and variances at step -10: [ 1.4830062 -2.       ] [4.348434e-07 0.000000e+00]
action means and variances at step -5: [ 1.4839224 -2.       ] [2.6410393e-07 0.0000000e+00]
Episode * 1620 * exploration epsilon 0.1 * Episodic Reward is ==> -14.528146743774414
action means and variances at step -10: [ 1.3607863 -2.       ] [3.0630386e-07 0.0000000e+00]
action means and variances at step -5: [ 1.3761272 -2.       ] [2.9775666e-07 0.0000000e+00]
Episode * 1621 * exploration epsilon 0.1 * Episodic Reward is ==> -14.262701988220215
action means and variances at step -10: [ 1.4035065 -2.       ] [4.1050242e-07 0.0000000e+00]
action means and variances at step -5: [ 1.389184 -2.      ] [3.8144375e-07 0.0000000e+00]
Episode * 1622 * exploration epsilon 0.1 * Episodic Reward is ==> -14.544835090637207
action means and variances at step -10: [ 1.3913788 -2.       ] [3.1578867e-07 0.0000000e+00]
actio

Episode * 1635 * exploration epsilon 0.1 * Episodic Reward is ==> -13.61918830871582
action means and variances at step -10: [ 1.0653592 -2.       ] [3.4378e-07 0.0000e+00]
action means and variances at step -5: [ 1.0639318 -2.       ] [2.1335325e-07 0.0000000e+00]
Episode * 1636 * exploration epsilon 0.1 * Episodic Reward is ==> -14.166948318481445
action means and variances at step -10: [ 1.0953763 -2.       ] [2.7494244e-07 0.0000000e+00]
action means and variances at step -5: [ 1.1128606 -2.       ] [2.5914983e-07 0.0000000e+00]
Episode * 1637 * exploration epsilon 0.1 * Episodic Reward is ==> -14.597122192382812
action means and variances at step -10: [ 1.1314366 -2.       ] [2.3068729e-07 0.0000000e+00]
action means and variances at step -5: [ 1.1252056 -2.       ] [2.2753059e-07 0.0000000e+00]
Episode * 1638 * exploration epsilon 0.1 * Episodic Reward is ==> -13.908893585205078
action means and variances at step -10: [ 1.1216114 -2.       ] [1.6260535e-07 0.0000000e+00]
action m

Episode * 1651 * exploration epsilon 0.1 * Episodic Reward is ==> -13.66163444519043
action means and variances at step -10: [ 1.0676465 -2.       ] [1.2264886e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0895 -2.    ] [1.3330558e-07 0.0000000e+00]
Episode * 1652 * exploration epsilon 0.1 * Episodic Reward is ==> -14.242225646972656
action means and variances at step -10: [ 1.0710628 -2.       ] [1.8732467e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0458926 -2.       ] [1.4801017e-07 0.0000000e+00]
Episode * 1653 * exploration epsilon 0.1 * Episodic Reward is ==> -13.938386917114258
action means and variances at step -10: [ 1.1114558 -2.       ] [1.5106818e-07 0.0000000e+00]
action means and variances at step -5: [ 1.082034 -2.      ] [1.16320116e-07 0.00000000e+00]
Episode * 1654 * exploration epsilon 0.1 * Episodic Reward is ==> -13.89883804321289
action means and variances at step -10: [ 1.103868 -2.      ] [2.074527e-07 0.000000e+00]
action means 

Episode * 1667 * exploration epsilon 0.1 * Episodic Reward is ==> -13.532615661621094
action means and variances at step -10: [ 1.0525827 -2.       ] [1.1974004e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0870844 -2.       ] [1.5614692e-07 0.0000000e+00]
Episode * 1668 * exploration epsilon 0.1 * Episodic Reward is ==> -13.962503433227539
action means and variances at step -10: [ 1.065944 -2.      ] [1.7448677e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0383898 -2.       ] [1.4062536e-07 0.0000000e+00]
Episode * 1669 * exploration epsilon 0.1 * Episodic Reward is ==> -13.866488456726074
action means and variances at step -10: [ 1.0970546 -2.       ] [1.6978825e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0752939 -2.       ] [1.8504369e-07 0.0000000e+00]
Episode * 1670 * exploration epsilon 0.1 * Episodic Reward is ==> -13.9432373046875
action means and variances at step -10: [ 1.1223335 -2.       ] [1.3447499e-07 0.0000000e+00]
actio

Episode * 1683 * exploration epsilon 0.1 * Episodic Reward is ==> -13.825565338134766
action means and variances at step -10: [ 1.1021583 -2.       ] [7.657847e-08 0.000000e+00]
action means and variances at step -5: [ 1.0794586 -2.       ] [8.603937e-08 0.000000e+00]
Episode * 1684 * exploration epsilon 0.1 * Episodic Reward is ==> -13.908554077148438
action means and variances at step -10: [ 1.0672156 -2.       ] [1.10384065e-07 0.00000000e+00]
action means and variances at step -5: [ 1.0614687 -2.       ] [9.8827726e-08 0.0000000e+00]
Episode * 1685 * exploration epsilon 0.1 * Episodic Reward is ==> -13.8580961227417
action means and variances at step -10: [ 1.0658726 -2.       ] [1.13640354e-07 0.00000000e+00]
action means and variances at step -5: [ 1.0785878 -2.       ] [1.2579254e-07 0.0000000e+00]
Episode * 1686 * exploration epsilon 0.1 * Episodic Reward is ==> -14.021470069885254
action means and variances at step -10: [ 1.0691674 -2.       ] [9.5498294e-08 0.0000000e+00]
act

Episode * 1699 * exploration epsilon 0.1 * Episodic Reward is ==> -14.125008583068848
action means and variances at step -10: [ 1.0614023 -2.       ] [9.232196e-08 0.000000e+00]
action means and variances at step -5: [ 1.0605159 -2.       ] [8.247354e-08 0.000000e+00]
Episode * 1700 * exploration epsilon 0.1 * Episodic Reward is ==> -13.666044235229492
action means and variances at step -10: [ 1.0974928 -2.       ] [9.590199e-08 0.000000e+00]
action means and variances at step -5: [ 1.0884286 -2.       ] [1.0232741e-07 0.0000000e+00]
Episode * 1701 * exploration epsilon 0.1 * Episodic Reward is ==> -14.075536727905273
action means and variances at step -10: [ 1.0468165 -2.       ] [1.01216344e-07 0.00000000e+00]
action means and variances at step -5: [ 1.0484896 -2.       ] [1.08422604e-07 0.00000000e+00]
Episode * 1702 * exploration epsilon 0.1 * Episodic Reward is ==> -14.525196075439453
action means and variances at step -10: [ 1.0544398 -2.       ] [6.328638e-08 0.000000e+00]
actio

Episode * 1715 * exploration epsilon 0.1 * Episodic Reward is ==> -14.560382843017578
action means and variances at step -10: [ 1.086858 -2.      ] [6.0804425e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1079826 -2.       ] [8.846672e-08 0.000000e+00]
Episode * 1716 * exploration epsilon 0.1 * Episodic Reward is ==> -13.533050537109375
action means and variances at step -10: [ 1.0924566 -2.       ] [6.512004e-08 0.000000e+00]
action means and variances at step -5: [ 1.1171294 -2.       ] [7.503513e-08 0.000000e+00]
Episode * 1717 * exploration epsilon 0.1 * Episodic Reward is ==> -13.56010627746582
action means and variances at step -10: [ 1.051344 -2.      ] [6.033194e-08 0.000000e+00]
action means and variances at step -5: [ 1.0701803 -2.       ] [6.067481e-08 0.000000e+00]
Episode * 1718 * exploration epsilon 0.1 * Episodic Reward is ==> -13.969627380371094
action means and variances at step -10: [ 1.082736 -2.      ] [7.466149e-08 0.000000e+00]
action means and var

Episode * 1731 * exploration epsilon 0.1 * Episodic Reward is ==> -14.018213272094727
action means and variances at step -10: [ 1.0982329 -2.       ] [7.55237e-08 0.00000e+00]
action means and variances at step -5: [ 1.0887775 -2.       ] [7.515065e-08 0.000000e+00]
Episode * 1732 * exploration epsilon 0.1 * Episodic Reward is ==> -13.683387756347656
action means and variances at step -10: [ 1.0706995 -2.       ] [7.095622e-08 0.000000e+00]
action means and variances at step -5: [ 1.0602753 -2.       ] [7.3289215e-08 0.0000000e+00]
Episode * 1733 * exploration epsilon 0.1 * Episodic Reward is ==> -13.702967643737793
action means and variances at step -10: [ 1.0778943 -2.       ] [5.57541e-08 0.00000e+00]
action means and variances at step -5: [ 1.0865725 -2.       ] [6.078821e-08 0.000000e+00]
Episode * 1734 * exploration epsilon 0.1 * Episodic Reward is ==> -14.174270629882812
action means and variances at step -10: [ 1.0730833 -2.       ] [7.0811716e-08 0.0000000e+00]
action means an

Episode * 1747 * exploration epsilon 0.1 * Episodic Reward is ==> -13.836538314819336
action means and variances at step -10: [ 1.0682634 -2.       ] [6.3300305e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0385057 -2.       ] [4.1783302e-08 0.0000000e+00]
Episode * 1748 * exploration epsilon 0.1 * Episodic Reward is ==> -14.017459869384766
action means and variances at step -10: [ 1.1093403 -2.       ] [7.514298e-08 0.000000e+00]
action means and variances at step -5: [ 1.0999929 -2.       ] [3.822152e-08 0.000000e+00]
Episode * 1749 * exploration epsilon 0.1 * Episodic Reward is ==> -14.197675704956055
action means and variances at step -10: [ 1.0895181 -2.       ] [7.0573954e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0650564 -2.       ] [5.837071e-08 0.000000e+00]
Episode * 1750 * exploration epsilon 0.1 * Episodic Reward is ==> -13.59233570098877
action means and variances at step -10: [ 1.1189752 -2.       ] [5.436716e-08 0.000000e+00]
action mea

Episode * 1763 * exploration epsilon 0.1 * Episodic Reward is ==> -14.33712387084961
action means and variances at step -10: [ 1.1014321 -2.       ] [8.806604e-08 0.000000e+00]
action means and variances at step -5: [ 1.0852331 -2.       ] [7.9441605e-08 0.0000000e+00]
Episode * 1764 * exploration epsilon 0.1 * Episodic Reward is ==> -14.080002784729004
action means and variances at step -10: [ 1.1323515 -2.       ] [1.1413312e-07 0.0000000e+00]
action means and variances at step -5: [ 1.128021 -2.      ] [1.0928308e-07 0.0000000e+00]
Episode * 1765 * exploration epsilon 0.1 * Episodic Reward is ==> -14.019843101501465
action means and variances at step -10: [ 1.1463252 -2.       ] [1.1969206e-07 0.0000000e+00]
action means and variances at step -5: [ 1.1494173 -2.       ] [1.364005e-07 0.000000e+00]
Episode * 1766 * exploration epsilon 0.1 * Episodic Reward is ==> -14.248966217041016
action means and variances at step -10: [ 1.1197218 -2.       ] [9.48168e-08 0.00000e+00]
action means

Episode * 1779 * exploration epsilon 0.1 * Episodic Reward is ==> -13.414048194885254
action means and variances at step -10: [ 1.1027231 -2.       ] [7.727676e-08 0.000000e+00]
action means and variances at step -5: [ 1.0849594 -2.       ] [6.531112e-08 0.000000e+00]
Episode * 1780 * exploration epsilon 0.1 * Episodic Reward is ==> -13.878467559814453
action means and variances at step -10: [ 1.0937909 -2.       ] [5.8925274e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0974607 -2.       ] [5.520416e-08 0.000000e+00]
Episode * 1781 * exploration epsilon 0.1 * Episodic Reward is ==> -14.072742462158203
action means and variances at step -10: [ 1.0937204 -2.       ] [5.066015e-08 0.000000e+00]
action means and variances at step -5: [ 1.0873642 -2.       ] [5.3726218e-08 0.0000000e+00]
Episode * 1782 * exploration epsilon 0.1 * Episodic Reward is ==> -14.085939407348633
action means and variances at step -10: [ 1.08455 -2.     ] [4.44943e-08 0.00000e+00]
action means and 

Episode * 1795 * exploration epsilon 0.1 * Episodic Reward is ==> -14.270896911621094
action means and variances at step -10: [ 1.1081525 -2.       ] [6.0153894e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1016834 -2.       ] [7.025051e-08 0.000000e+00]
Episode * 1796 * exploration epsilon 0.1 * Episodic Reward is ==> -14.06399154663086
action means and variances at step -10: [ 1.0854913 -2.       ] [5.0181896e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0815922 -2.       ] [4.7042988e-08 0.0000000e+00]
Episode * 1797 * exploration epsilon 0.1 * Episodic Reward is ==> -14.083846092224121
action means and variances at step -10: [ 1.0657021 -2.       ] [3.718144e-08 0.000000e+00]
action means and variances at step -5: [ 1.0790349 -2.       ] [2.7703058e-08 0.0000000e+00]
Episode * 1798 * exploration epsilon 0.1 * Episodic Reward is ==> -13.621604919433594
action means and variances at step -10: [ 1.0729172 -2.       ] [3.734083e-08 0.000000e+00]
action m

Episode * 1811 * exploration epsilon 0.1 * Episodic Reward is ==> -14.353256225585938
action means and variances at step -10: [ 1.1111454 -2.       ] [1.0350129e-07 0.0000000e+00]
action means and variances at step -5: [ 1.1025627 -2.       ] [7.589432e-08 0.000000e+00]
Episode * 1812 * exploration epsilon 0.1 * Episodic Reward is ==> -14.265095710754395
action means and variances at step -10: [ 1.1234797 -2.       ] [3.7454495e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1183461 -2.       ] [4.8789715e-08 0.0000000e+00]
Episode * 1813 * exploration epsilon 0.1 * Episodic Reward is ==> -14.07049560546875
action means and variances at step -10: [ 1.0809389 -2.       ] [7.744201e-08 0.000000e+00]
action means and variances at step -5: [ 1.0906136 -2.       ] [7.3680944e-08 0.0000000e+00]
Episode * 1814 * exploration epsilon 0.1 * Episodic Reward is ==> -13.990703582763672
action means and variances at step -10: [ 1.0949669 -2.       ] [6.886899e-08 0.000000e+00]
action m

Episode * 1827 * exploration epsilon 0.1 * Episodic Reward is ==> -13.936058044433594
action means and variances at step -10: [ 1.1133987 -2.       ] [5.740005e-08 0.000000e+00]
action means and variances at step -5: [ 1.1341486 -2.       ] [6.434127e-08 0.000000e+00]
Episode * 1828 * exploration epsilon 0.1 * Episodic Reward is ==> -14.273677825927734
action means and variances at step -10: [ 1.0968486 -2.       ] [7.2057716e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1104776 -2.       ] [5.7384888e-08 0.0000000e+00]
Episode * 1829 * exploration epsilon 0.1 * Episodic Reward is ==> -14.032845497131348
action means and variances at step -10: [ 1.1136827 -2.       ] [6.048779e-08 0.000000e+00]
action means and variances at step -5: [ 1.1049918 -2.       ] [5.0105246e-08 0.0000000e+00]
Episode * 1830 * exploration epsilon 0.1 * Episodic Reward is ==> -13.984208106994629
action means and variances at step -10: [ 1.1086812 -2.       ] [5.279378e-08 0.000000e+00]
action me

Episode * 1843 * exploration epsilon 0.1 * Episodic Reward is ==> -14.006095886230469
action means and variances at step -10: [ 1.0866426 -2.       ] [5.8982547e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1066097 -2.       ] [5.479645e-08 0.000000e+00]
Episode * 1844 * exploration epsilon 0.1 * Episodic Reward is ==> -14.260828018188477
action means and variances at step -10: [ 1.1130346 -2.       ] [7.0115306e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1051072 -2.       ] [6.663585e-08 0.000000e+00]
Episode * 1845 * exploration epsilon 0.1 * Episodic Reward is ==> -14.462737083435059
action means and variances at step -10: [ 1.1002097 -2.       ] [3.5783316e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0901291 -2.       ] [3.631987e-08 0.000000e+00]
Episode * 1846 * exploration epsilon 0.1 * Episodic Reward is ==> -14.14889907836914
action means and variances at step -10: [ 1.1077194 -2.       ] [6.060047e-08 0.000000e+00]
action mea

Episode * 1859 * exploration epsilon 0.1 * Episodic Reward is ==> -14.157760620117188
action means and variances at step -10: [ 1.0995127 -2.       ] [5.3456453e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0830724 -2.       ] [4.4899316e-08 0.0000000e+00]
Episode * 1860 * exploration epsilon 0.1 * Episodic Reward is ==> -14.055521011352539
action means and variances at step -10: [ 1.1154481 -2.       ] [7.780186e-08 0.000000e+00]
action means and variances at step -5: [ 1.1203629 -2.       ] [6.09512e-08 0.00000e+00]
Episode * 1861 * exploration epsilon 0.1 * Episodic Reward is ==> -14.063264846801758
action means and variances at step -10: [ 1.0934542 -2.       ] [1.0335646e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0994965 -2.       ] [1.265915e-07 0.000000e+00]
Episode * 1862 * exploration epsilon 0.1 * Episodic Reward is ==> -14.113718032836914
action means and variances at step -10: [ 1.1030451 -2.       ] [8.5546695e-08 0.0000000e+00]
action me

Episode * 1875 * exploration epsilon 0.1 * Episodic Reward is ==> -13.692218780517578
action means and variances at step -10: [ 1.0612285 -2.       ] [8.185047e-08 0.000000e+00]
action means and variances at step -5: [ 1.0759765 -2.       ] [6.94704e-08 0.00000e+00]
Episode * 1876 * exploration epsilon 0.1 * Episodic Reward is ==> -13.768514633178711
action means and variances at step -10: [ 1.1084846 -2.       ] [9.293695e-08 0.000000e+00]
action means and variances at step -5: [ 1.0946498 -2.       ] [6.90441e-08 0.00000e+00]
Episode * 1877 * exploration epsilon 0.1 * Episodic Reward is ==> -13.63718318939209
action means and variances at step -10: [ 1.1314152 -2.       ] [5.8199134e-08 0.0000000e+00]
action means and variances at step -5: [ 1.120647 -2.      ] [5.822223e-08 0.000000e+00]
Episode * 1878 * exploration epsilon 0.1 * Episodic Reward is ==> -14.295479774475098
action means and variances at step -10: [ 1.0820224 -2.       ] [1.1746287e-07 0.0000000e+00]
action means and v

Episode * 1891 * exploration epsilon 0.1 * Episodic Reward is ==> -14.203756332397461
action means and variances at step -10: [ 1.0709784 -2.       ] [1.2443569e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0650412 -2.       ] [9.308146e-08 0.000000e+00]
Episode * 1892 * exploration epsilon 0.1 * Episodic Reward is ==> -14.29293155670166
action means and variances at step -10: [ 1.1463457 -2.       ] [2.0507014e-06 0.0000000e+00]
action means and variances at step -5: [ 1.1452532 -2.       ] [1.01732546e-07 0.00000000e+00]
Episode * 1893 * exploration epsilon 0.1 * Episodic Reward is ==> -14.063056945800781
action means and variances at step -10: [ 1.1000855 -2.       ] [1.1495355e-07 0.0000000e+00]
action means and variances at step -5: [ 1.0916082 -2.       ] [1.0961851e-07 0.0000000e+00]
Episode * 1894 * exploration epsilon 0.1 * Episodic Reward is ==> -14.071109771728516
action means and variances at step -10: [ 1.0988998 -2.       ] [1.09373296e-07 0.00000000e+00]


Episode * 1907 * exploration epsilon 0.1 * Episodic Reward is ==> -14.194817543029785
action means and variances at step -10: [ 1.2747477 -2.       ] [8.546454e-08 0.000000e+00]
action means and variances at step -5: [ 1.2735846 -2.       ] [1.2607151e-07 0.0000000e+00]
Episode * 1908 * exploration epsilon 0.1 * Episodic Reward is ==> -14.381232261657715
action means and variances at step -10: [ 1.2778828 -2.       ] [6.1983684e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2779226 -2.       ] [6.208898e-08 0.000000e+00]
Episode * 1909 * exploration epsilon 0.1 * Episodic Reward is ==> -14.353401184082031
action means and variances at step -10: [ 1.2494036 -2.       ] [2.8022429e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2479174 -2.       ] [2.5707964e-08 0.0000000e+00]
Episode * 1910 * exploration epsilon 0.1 * Episodic Reward is ==> -14.091593742370605
action means and variances at step -10: [ 1.2352169 -2.       ] [2.6198455e-08 0.0000000e+00]
actio

Episode * 1923 * exploration epsilon 0.1 * Episodic Reward is ==> -14.09391975402832
action means and variances at step -10: [ 1.2280385 -2.       ] [2.9833817e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2267611 -2.       ] [1.4388846e-08 0.0000000e+00]
Episode * 1924 * exploration epsilon 0.1 * Episodic Reward is ==> -14.450053215026855
action means and variances at step -10: [ 1.2057608 -2.       ] [3.9630493e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1921538 -2.       ] [3.4857898e-08 0.0000000e+00]
Episode * 1925 * exploration epsilon 0.1 * Episodic Reward is ==> -14.226308822631836
action means and variances at step -10: [ 1.1815498 -2.       ] [2.8773291e-08 0.0000000e+00]
action means and variances at step -5: [ 1.17926 -2.     ] [2.4016689e-08 0.0000000e+00]
Episode * 1926 * exploration epsilon 0.1 * Episodic Reward is ==> -14.288997650146484
action means and variances at step -10: [ 1.1240945 -2.       ] [3.1633775e-08 0.0000000e+00]
action

Episode * 1939 * exploration epsilon 0.1 * Episodic Reward is ==> -13.99472427368164
action means and variances at step -10: [ 1.1968569 -2.       ] [3.4603318e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1867541 -2.       ] [2.9437325e-08 0.0000000e+00]
Episode * 1940 * exploration epsilon 0.1 * Episodic Reward is ==> -14.076769828796387
action means and variances at step -10: [ 1.1957452 -2.       ] [3.555219e-08 0.000000e+00]
action means and variances at step -5: [ 1.194761 -2.      ] [3.2321733e-08 0.0000000e+00]
Episode * 1941 * exploration epsilon 0.1 * Episodic Reward is ==> -14.224069595336914
action means and variances at step -10: [ 1.1846662 -2.       ] [2.793044e-08 0.000000e+00]
action means and variances at step -5: [ 1.1858711 -2.       ] [2.471257e-08 0.000000e+00]
Episode * 1942 * exploration epsilon 0.1 * Episodic Reward is ==> -14.066774368286133
action means and variances at step -10: [ 1.1689276 -2.       ] [2.9735977e-08 0.0000000e+00]
action mea

Episode * 1955 * exploration epsilon 0.1 * Episodic Reward is ==> -13.675284385681152
action means and variances at step -10: [ 1.1590068 -2.       ] [2.1979528e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1586007 -2.       ] [1.638093e-08 0.000000e+00]
Episode * 1956 * exploration epsilon 0.1 * Episodic Reward is ==> -14.438308715820312
action means and variances at step -10: [ 1.1598275 -2.       ] [1.871091e-08 0.000000e+00]
action means and variances at step -5: [ 1.1604937 -2.       ] [1.9769528e-08 0.0000000e+00]
Episode * 1957 * exploration epsilon 0.1 * Episodic Reward is ==> -14.340957641601562
action means and variances at step -10: [ 1.1581557 -2.       ] [2.5084894e-08 0.0000000e+00]
action means and variances at step -5: [ 1.157882 -2.      ] [2.1605137e-08 0.0000000e+00]
Episode * 1958 * exploration epsilon 0.1 * Episodic Reward is ==> -14.323453903198242
action means and variances at step -10: [ 1.1559733 -2.       ] [2.203273e-08 0.000000e+00]
action me

Episode * 1971 * exploration epsilon 0.1 * Episodic Reward is ==> -14.213937759399414
action means and variances at step -10: [ 1.1049496 -2.       ] [1.8570805e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0923635 -2.       ] [1.496304e-08 0.000000e+00]
Episode * 1972 * exploration epsilon 0.1 * Episodic Reward is ==> -14.111398696899414
action means and variances at step -10: [ 1.1213531 -2.       ] [2.4846315e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1080874 -2.       ] [2.3884258e-08 0.0000000e+00]
Episode * 1973 * exploration epsilon 0.1 * Episodic Reward is ==> -13.890416145324707
action means and variances at step -10: [ 1.0914887 -2.       ] [2.0612985e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0868652 -2.       ] [1.648539e-08 0.000000e+00]
Episode * 1974 * exploration epsilon 0.1 * Episodic Reward is ==> -14.176307678222656
action means and variances at step -10: [ 1.1389511 -2.       ] [2.047739e-08 0.000000e+00]
action 

Episode * 1987 * exploration epsilon 0.1 * Episodic Reward is ==> -13.895283699035645
action means and variances at step -10: [ 1.1157618 -2.       ] [2.6115414e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1140187 -2.       ] [2.722693e-08 0.000000e+00]
Episode * 1988 * exploration epsilon 0.1 * Episodic Reward is ==> -13.96505355834961
action means and variances at step -10: [ 1.0930375 -2.       ] [5.76507e-08 0.00000e+00]
action means and variances at step -5: [ 1.1130062 -2.       ] [1.2632081e-07 0.0000000e+00]
Episode * 1989 * exploration epsilon 0.1 * Episodic Reward is ==> -13.690702438354492
action means and variances at step -10: [ 1.1044989 -2.       ] [3.3700804e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0886524 -2.       ] [3.7188286e-08 0.0000000e+00]
Episode * 1990 * exploration epsilon 0.1 * Episodic Reward is ==> -13.688972473144531
action means and variances at step -10: [ 1.104069 -2.      ] [3.8754038e-08 0.0000000e+00]
action mea

Episode * 2003 * exploration epsilon 0.1 * Episodic Reward is ==> -14.513229370117188
action means and variances at step -10: [ 1.1146318 -2.       ] [2.6795043e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1091386 -2.       ] [3.3759672e-08 0.0000000e+00]
Episode * 2004 * exploration epsilon 0.1 * Episodic Reward is ==> -14.10627555847168
action means and variances at step -10: [ 1.1138483 -2.       ] [2.5370989e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1147344 -2.       ] [3.545719e-08 0.000000e+00]
Episode * 2005 * exploration epsilon 0.1 * Episodic Reward is ==> -13.845970153808594
action means and variances at step -10: [ 1.132276 -2.      ] [2.293357e-08 0.000000e+00]
action means and variances at step -5: [ 1.1341231 -2.       ] [3.0071305e-08 0.0000000e+00]
Episode * 2006 * exploration epsilon 0.1 * Episodic Reward is ==> -13.766203880310059
action means and variances at step -10: [ 1.1371086 -2.       ] [2.2839828e-08 0.0000000e+00]
action m

Episode * 2019 * exploration epsilon 0.1 * Episodic Reward is ==> -14.256235122680664
action means and variances at step -10: [ 1.1293818 -2.       ] [4.1211266e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1299453 -2.       ] [3.9494196e-08 0.0000000e+00]
Episode * 2020 * exploration epsilon 0.1 * Episodic Reward is ==> -14.308206558227539
action means and variances at step -10: [ 1.1083444 -2.       ] [3.5467853e-08 0.0000000e+00]
action means and variances at step -5: [ 1.118139 -2.      ] [4.2871008e-08 0.0000000e+00]
Episode * 2021 * exploration epsilon 0.1 * Episodic Reward is ==> -14.299854278564453
action means and variances at step -10: [ 1.1241769 -2.       ] [3.6231953e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1331564 -2.       ] [3.9911807e-08 0.0000000e+00]
Episode * 2022 * exploration epsilon 0.1 * Episodic Reward is ==> -13.990460395812988
action means and variances at step -10: [ 1.1377704 -2.       ] [2.333829e-08 0.000000e+00]
actio

Episode * 2035 * exploration epsilon 0.1 * Episodic Reward is ==> -13.97689151763916
action means and variances at step -10: [ 1.1395628 -2.       ] [3.718546e-08 0.000000e+00]
action means and variances at step -5: [ 1.1325657 -2.       ] [3.6851006e-08 0.0000000e+00]
Episode * 2036 * exploration epsilon 0.1 * Episodic Reward is ==> -13.82558822631836
action means and variances at step -10: [ 1.1509027 -2.       ] [3.5890153e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1570879 -2.       ] [4.628388e-08 0.000000e+00]
Episode * 2037 * exploration epsilon 0.1 * Episodic Reward is ==> -14.194392204284668
action means and variances at step -10: [ 1.0835632 -2.       ] [4.561508e-08 0.000000e+00]
action means and variances at step -5: [ 1.0876497 -2.       ] [5.1824596e-08 0.0000000e+00]
Episode * 2038 * exploration epsilon 0.1 * Episodic Reward is ==> -14.257255554199219
action means and variances at step -10: [ 1.1265416 -2.       ] [5.1933824e-08 0.0000000e+00]
action me

Episode * 2051 * exploration epsilon 0.1 * Episodic Reward is ==> -13.927223205566406
action means and variances at step -10: [ 1.1184752 -2.       ] [4.0642146e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1268543 -2.       ] [4.3631264e-08 0.0000000e+00]
Episode * 2052 * exploration epsilon 0.1 * Episodic Reward is ==> -14.17933177947998
action means and variances at step -10: [ 1.1193905 -2.       ] [4.2069892e-08 0.0000000e+00]
action means and variances at step -5: [ 1.10505 -2.     ] [3.3625035e-08 0.0000000e+00]
Episode * 2053 * exploration epsilon 0.1 * Episodic Reward is ==> -13.781452178955078
action means and variances at step -10: [ 1.15307 -2.     ] [4.929891e-08 0.000000e+00]
action means and variances at step -5: [ 1.1519074 -2.       ] [4.0268084e-08 0.0000000e+00]
Episode * 2054 * exploration epsilon 0.1 * Episodic Reward is ==> -13.619447708129883
action means and variances at step -10: [ 1.157295 -2.      ] [4.666259e-08 0.000000e+00]
action means and

Episode * 2067 * exploration epsilon 0.1 * Episodic Reward is ==> -14.370231628417969
action means and variances at step -10: [ 1.1075091 -2.       ] [2.2792326e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1102716 -2.       ] [2.601869e-08 0.000000e+00]
Episode * 2068 * exploration epsilon 0.1 * Episodic Reward is ==> -13.952226638793945
action means and variances at step -10: [ 1.0975534 -2.       ] [2.3575847e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1042655 -2.       ] [1.9201071e-08 0.0000000e+00]
Episode * 2069 * exploration epsilon 0.1 * Episodic Reward is ==> -14.287449836730957
action means and variances at step -10: [ 1.053065 -2.      ] [2.3233822e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0478028 -2.       ] [2.179468e-08 0.000000e+00]
Episode * 2070 * exploration epsilon 0.1 * Episodic Reward is ==> -13.896624565124512
action means and variances at step -10: [ 1.0359633 -2.       ] [1.738505e-08 0.000000e+00]
action me

Episode * 2083 * exploration epsilon 0.1 * Episodic Reward is ==> -14.032861709594727
action means and variances at step -10: [ 1.0420351 -2.       ] [2.0010837e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0445992 -2.       ] [2.1197376e-08 0.0000000e+00]
Episode * 2084 * exploration epsilon 0.1 * Episodic Reward is ==> -13.943204879760742
action means and variances at step -10: [ 1.0612382 -2.       ] [1.9894015e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0690602 -2.       ] [2.7846015e-08 0.0000000e+00]
Episode * 2085 * exploration epsilon 0.1 * Episodic Reward is ==> -13.743261337280273
action means and variances at step -10: [ 1.1124768 -2.       ] [5.994263e-08 0.000000e+00]
action means and variances at step -5: [ 1.0990984 -2.       ] [6.251515e-08 0.000000e+00]
Episode * 2086 * exploration epsilon 0.1 * Episodic Reward is ==> -14.201935768127441
action means and variances at step -10: [ 1.163443 -2.      ] [1.1680409e-07 0.0000000e+00]
action 

Episode * 2099 * exploration epsilon 0.1 * Episodic Reward is ==> -13.838544845581055
action means and variances at step -10: [ 1.1470164 -2.       ] [3.3327524e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1528795 -2.       ] [3.6332068e-08 0.0000000e+00]
Episode * 2100 * exploration epsilon 0.1 * Episodic Reward is ==> -14.380611419677734
action means and variances at step -10: [ 1.1483314 -2.       ] [1.7715777e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1461546 -2.       ] [1.912087e-08 0.000000e+00]
Episode * 2101 * exploration epsilon 0.1 * Episodic Reward is ==> -13.8013916015625
action means and variances at step -10: [ 1.1490947 -2.       ] [1.7209052e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1485586 -2.       ] [1.9086036e-08 0.0000000e+00]
Episode * 2102 * exploration epsilon 0.1 * Episodic Reward is ==> -14.227680206298828
action means and variances at step -10: [ 1.1494823 -2.       ] [2.0565516e-08 0.0000000e+00]
actio

Episode * 2115 * exploration epsilon 0.1 * Episodic Reward is ==> -14.366316795349121
action means and variances at step -10: [ 1.1381286 -2.       ] [2.892242e-08 0.000000e+00]
action means and variances at step -5: [ 1.1416247 -2.       ] [3.696474e-08 0.000000e+00]
Episode * 2116 * exploration epsilon 0.1 * Episodic Reward is ==> -14.205217361450195
action means and variances at step -10: [ 1.1378629 -2.       ] [2.3146981e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1354111 -2.       ] [2.3759291e-08 0.0000000e+00]
Episode * 2117 * exploration epsilon 0.1 * Episodic Reward is ==> -14.281509399414062
action means and variances at step -10: [ 1.1293746 -2.       ] [1.08001945e-08 0.00000000e+00]
action means and variances at step -5: [ 1.1283128 -2.       ] [1.4453438e-08 0.0000000e+00]
Episode * 2118 * exploration epsilon 0.1 * Episodic Reward is ==> -14.32121467590332
action means and variances at step -10: [ 1.1196756 -2.       ] [1.8157705e-08 0.0000000e+00]
acti

Episode * 2131 * exploration epsilon 0.1 * Episodic Reward is ==> -14.113580703735352
action means and variances at step -10: [ 1.1350768 -2.       ] [2.2571516e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1353701 -2.       ] [2.2975819e-08 0.0000000e+00]
Episode * 2132 * exploration epsilon 0.1 * Episodic Reward is ==> -13.943772315979004
action means and variances at step -10: [ 1.1248158 -2.       ] [1.698466e-08 0.000000e+00]
action means and variances at step -5: [ 1.1248018 -2.       ] [1.8334504e-08 0.0000000e+00]
Episode * 2133 * exploration epsilon 0.1 * Episodic Reward is ==> -14.066720008850098
action means and variances at step -10: [ 1.1309516 -2.       ] [1.0860342e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1323352 -2.       ] [9.266779e-09 0.000000e+00]
Episode * 2134 * exploration epsilon 0.1 * Episodic Reward is ==> -13.744693756103516
action means and variances at step -10: [ 1.1524878 -2.       ] [1.2443877e-08 0.0000000e+00]
actio

Episode * 2147 * exploration epsilon 0.1 * Episodic Reward is ==> -14.045503616333008
action means and variances at step -10: [ 1.2749207 -2.       ] [1.0455933e-07 0.0000000e+00]
action means and variances at step -5: [ 1.2749964 -2.       ] [1.0444305e-07 0.0000000e+00]
Episode * 2148 * exploration epsilon 0.1 * Episodic Reward is ==> -14.270844459533691
action means and variances at step -10: [ 1.2506163 -2.       ] [8.262228e-08 0.000000e+00]
action means and variances at step -5: [ 1.2438555 -2.       ] [7.3610416e-08 0.0000000e+00]
Episode * 2149 * exploration epsilon 0.1 * Episodic Reward is ==> -13.652360916137695
action means and variances at step -10: [ 1.2346425 -2.       ] [3.6270563e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2308123 -2.       ] [4.1810168e-08 0.0000000e+00]
Episode * 2150 * exploration epsilon 0.1 * Episodic Reward is ==> -14.272789001464844
action means and variances at step -10: [ 1.2259214 -2.       ] [2.7153725e-08 0.0000000e+00]
act

Episode * 2163 * exploration epsilon 0.1 * Episodic Reward is ==> -14.380610466003418
action means and variances at step -10: [ 1.2153945 -2.       ] [1.0316398e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2100189 -2.       ] [1.5119777e-08 0.0000000e+00]
Episode * 2164 * exploration epsilon 0.1 * Episodic Reward is ==> -13.809919357299805
action means and variances at step -10: [ 1.1996632 -2.       ] [2.346876e-08 0.000000e+00]
action means and variances at step -5: [ 1.2006435 -2.       ] [2.1350576e-08 0.0000000e+00]
Episode * 2165 * exploration epsilon 0.1 * Episodic Reward is ==> -14.545419692993164
action means and variances at step -10: [ 1.1987545 -2.       ] [1.4105842e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2105094 -2.       ] [1.7820769e-08 0.0000000e+00]
Episode * 2166 * exploration epsilon 0.1 * Episodic Reward is ==> -14.303434371948242
action means and variances at step -10: [ 1.1963267 -2.       ] [2.0148482e-08 0.0000000e+00]
act

Episode * 2179 * exploration epsilon 0.1 * Episodic Reward is ==> -14.03515625
action means and variances at step -10: [ 1.2070495 -2.       ] [2.021065e-08 0.000000e+00]
action means and variances at step -5: [ 1.2041346 -2.       ] [2.3328273e-08 0.0000000e+00]
Episode * 2180 * exploration epsilon 0.1 * Episodic Reward is ==> -13.967037200927734
action means and variances at step -10: [ 1.1941615 -2.       ] [1.1750593e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1888375 -2.       ] [1.4800191e-08 0.0000000e+00]
Episode * 2181 * exploration epsilon 0.1 * Episodic Reward is ==> -14.168218612670898
action means and variances at step -10: [ 1.1818748 -2.       ] [1.2829874e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1928766 -2.       ] [8.478976e-09 0.000000e+00]
Episode * 2182 * exploration epsilon 0.1 * Episodic Reward is ==> -14.474475860595703
action means and variances at step -10: [ 1.202985 -2.      ] [2.809125e-08 0.000000e+00]
action means and

Episode * 2195 * exploration epsilon 0.1 * Episodic Reward is ==> -13.866931915283203
action means and variances at step -10: [ 1.0836124 -2.       ] [1.2099756e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0845941 -2.       ] [1.0754517e-08 0.0000000e+00]
Episode * 2196 * exploration epsilon 0.1 * Episodic Reward is ==> -13.975152969360352
action means and variances at step -10: [ 1.0717156 -2.       ] [1.5978998e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0692413 -2.       ] [1.4073908e-08 0.0000000e+00]
Episode * 2197 * exploration epsilon 0.1 * Episodic Reward is ==> -13.864828109741211
action means and variances at step -10: [ 1.055115 -2.      ] [1.022528e-08 0.000000e+00]
action means and variances at step -5: [ 1.0626817 -2.       ] [1.1738862e-08 0.0000000e+00]
Episode * 2198 * exploration epsilon 0.1 * Episodic Reward is ==> -13.525872230529785
action means and variances at step -10: [ 1.0747956 -2.       ] [1.4687442e-08 0.0000000e+00]
actio

Episode * 2211 * exploration epsilon 0.1 * Episodic Reward is ==> -14.436349868774414
action means and variances at step -10: [ 1.0399234 -2.       ] [1.5118767e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0396922 -2.       ] [1.5109821e-08 0.0000000e+00]
Episode * 2212 * exploration epsilon 0.1 * Episodic Reward is ==> -14.287126541137695
action means and variances at step -10: [ 1.0426168 -2.       ] [1.26887e-08 0.00000e+00]
action means and variances at step -5: [ 1.0525205 -2.       ] [1.6199653e-08 0.0000000e+00]
Episode * 2213 * exploration epsilon 0.1 * Episodic Reward is ==> -13.853767395019531
action means and variances at step -10: [ 1.0825185 -2.       ] [8.029147e-09 0.000000e+00]
action means and variances at step -5: [ 1.0827581 -2.       ] [1.2332066e-08 0.0000000e+00]
Episode * 2214 * exploration epsilon 0.1 * Episodic Reward is ==> -13.794160842895508
action means and variances at step -10: [ 1.0997102 -2.       ] [1.24352155e-08 0.00000000e+00]
actio

Episode * 2227 * exploration epsilon 0.1 * Episodic Reward is ==> -13.778177261352539
action means and variances at step -10: [ 1.024587 -2.      ] [4.0490644e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0228869 -2.       ] [2.6304578e-08 0.0000000e+00]
Episode * 2228 * exploration epsilon 0.1 * Episodic Reward is ==> -13.963955879211426
action means and variances at step -10: [ 1.0160679 -2.       ] [1.7850507e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0120748 -2.       ] [1.5900046e-08 0.0000000e+00]
Episode * 2229 * exploration epsilon 0.1 * Episodic Reward is ==> -13.751688003540039
action means and variances at step -10: [ 1.0074027 -2.       ] [2.1760737e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0085481 -2.       ] [1.3849677e-08 0.0000000e+00]
Episode * 2230 * exploration epsilon 0.1 * Episodic Reward is ==> -13.708200454711914
action means and variances at step -10: [ 1.0064801 -2.       ] [1.45975765e-08 0.00000000e+00]
a

Episode * 2243 * exploration epsilon 0.1 * Episodic Reward is ==> -14.349125862121582
action means and variances at step -10: [ 1.257766 -2.      ] [1.2903408e-07 0.0000000e+00]
action means and variances at step -5: [ 1.2542061 -2.       ] [6.944664e-08 0.000000e+00]
Episode * 2244 * exploration epsilon 0.1 * Episodic Reward is ==> -14.345834732055664
action means and variances at step -10: [ 1.2019022 -2.       ] [7.238169e-08 0.000000e+00]
action means and variances at step -5: [ 1.191821 -2.      ] [4.9920338e-08 0.0000000e+00]
Episode * 2245 * exploration epsilon 0.1 * Episodic Reward is ==> -13.947729110717773
action means and variances at step -10: [ 1.1985024 -2.       ] [4.543722e-08 0.000000e+00]
action means and variances at step -5: [ 1.1919053 -2.       ] [3.4879278e-08 0.0000000e+00]
Episode * 2246 * exploration epsilon 0.1 * Episodic Reward is ==> -14.434322357177734
action means and variances at step -10: [ 1.1914223 -2.       ] [5.9462188e-08 0.0000000e+00]
action mean

Episode * 2259 * exploration epsilon 0.1 * Episodic Reward is ==> -14.096981048583984
action means and variances at step -10: [ 1.2033391 -2.       ] [7.2075017e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2021811 -2.       ] [7.942742e-09 0.000000e+00]
Episode * 2260 * exploration epsilon 0.1 * Episodic Reward is ==> -14.464920043945312
action means and variances at step -10: [ 1.2047775 -2.       ] [1.9730132e-08 0.0000000e+00]
action means and variances at step -5: [ 1.2056503 -2.       ] [1.8525817e-08 0.0000000e+00]
Episode * 2261 * exploration epsilon 0.1 * Episodic Reward is ==> -14.54361629486084
action means and variances at step -10: [ 1.2157426 -2.       ] [5.9934333e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2170063 -2.       ] [7.6685485e-09 0.0000000e+00]
Episode * 2262 * exploration epsilon 0.1 * Episodic Reward is ==> -14.40739631652832
action means and variances at step -10: [ 1.2161086 -2.       ] [8.510691e-09 0.000000e+00]
action 

Episode * 2275 * exploration epsilon 0.1 * Episodic Reward is ==> -14.35024642944336
action means and variances at step -10: [ 1.2516608 -2.       ] [9.279713e-09 0.000000e+00]
action means and variances at step -5: [ 1.2523742 -2.       ] [1.0624447e-08 0.0000000e+00]
Episode * 2276 * exploration epsilon 0.1 * Episodic Reward is ==> -13.95227336883545
action means and variances at step -10: [ 1.2529218 -2.       ] [6.9033166e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2503228 -2.       ] [6.3216485e-09 0.0000000e+00]
Episode * 2277 * exploration epsilon 0.1 * Episodic Reward is ==> -14.138731956481934
action means and variances at step -10: [ 1.2476306 -2.       ] [7.0737367e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2457483 -2.       ] [7.719475e-09 0.000000e+00]
Episode * 2278 * exploration epsilon 0.1 * Episodic Reward is ==> -14.47982120513916
action means and variances at step -10: [ 1.2549826 -2.       ] [8.268257e-09 0.000000e+00]
action mea

Episode * 2291 * exploration epsilon 0.1 * Episodic Reward is ==> -14.294792175292969
action means and variances at step -10: [ 1.2021925 -2.       ] [4.2833928e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2034534 -2.       ] [3.2655598e-09 0.0000000e+00]
Episode * 2292 * exploration epsilon 0.1 * Episodic Reward is ==> -14.208025932312012
action means and variances at step -10: [ 1.2049094 -2.       ] [4.5582595e-09 0.0000000e+00]
action means and variances at step -5: [ 1.2075148 -2.       ] [5.171625e-09 0.000000e+00]
Episode * 2293 * exploration epsilon 0.1 * Episodic Reward is ==> -14.062294006347656
action means and variances at step -10: [ 1.2034961 -2.       ] [5.603917e-09 0.000000e+00]
action means and variances at step -5: [ 1.2029375 -2.       ] [4.8804374e-09 0.0000000e+00]
Episode * 2294 * exploration epsilon 0.1 * Episodic Reward is ==> -13.70785903930664
action means and variances at step -10: [ 1.1871371 -2.       ] [9.716414e-09 0.000000e+00]
action m

Episode * 2307 * exploration epsilon 0.1 * Episodic Reward is ==> -14.364616394042969
action means and variances at step -10: [ 1.190926 -2.      ] [9.064337e-09 0.000000e+00]
action means and variances at step -5: [ 1.1859118 -2.       ] [1.3060198e-08 0.0000000e+00]
Episode * 2308 * exploration epsilon 0.1 * Episodic Reward is ==> -13.984796524047852
action means and variances at step -10: [ 1.1585193 -2.       ] [1.2119129e-08 0.0000000e+00]
action means and variances at step -5: [ 1.15642 -2.     ] [1.22629e-08 0.00000e+00]
Episode * 2309 * exploration epsilon 0.1 * Episodic Reward is ==> -14.210881233215332
action means and variances at step -10: [ 1.1569995 -2.       ] [1.4019474e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1638653 -2.       ] [1.5485403e-08 0.0000000e+00]
Episode * 2310 * exploration epsilon 0.1 * Episodic Reward is ==> -14.104049682617188
action means and variances at step -10: [ 1.1663837 -2.       ] [1.5314289e-08 0.0000000e+00]
action means 

Episode * 2323 * exploration epsilon 0.1 * Episodic Reward is ==> -14.056732177734375
action means and variances at step -10: [ 1.1927148 -2.       ] [9.285769e-09 0.000000e+00]
action means and variances at step -5: [ 1.1918921 -2.       ] [1.36361855e-08 0.00000000e+00]
Episode * 2324 * exploration epsilon 0.1 * Episodic Reward is ==> -14.110372543334961
action means and variances at step -10: [ 1.2002226 -2.       ] [1.1012572e-08 0.0000000e+00]
action means and variances at step -5: [ 1.1923711 -2.       ] [9.958493e-09 0.000000e+00]
Episode * 2325 * exploration epsilon 0.1 * Episodic Reward is ==> -13.908601760864258
action means and variances at step -10: [ 1.1879473 -2.       ] [4.6664184e-09 0.0000000e+00]
action means and variances at step -5: [ 1.1882216 -2.       ] [4.857531e-09 0.000000e+00]
Episode * 2326 * exploration epsilon 0.1 * Episodic Reward is ==> -14.08651351928711
action means and variances at step -10: [ 1.1781042 -2.       ] [6.2809504e-09 0.0000000e+00]
action

Episode * 2339 * exploration epsilon 0.1 * Episodic Reward is ==> -14.187704086303711
action means and variances at step -10: [ 1.0847483 -2.       ] [1.4625483e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0734153 -2.       ] [1.0100065e-08 0.0000000e+00]
Episode * 2340 * exploration epsilon 0.1 * Episodic Reward is ==> -13.645650863647461
action means and variances at step -10: [ 1.0314636 -2.       ] [1.01486375e-08 0.00000000e+00]
action means and variances at step -5: [ 1.025738 -2.      ] [9.4867065e-09 0.0000000e+00]
Episode * 2341 * exploration epsilon 0.1 * Episodic Reward is ==> -13.801933288574219
action means and variances at step -10: [ 1.0194266 -2.       ] [1.664267e-08 0.000000e+00]
action means and variances at step -5: [ 1.027408 -2.      ] [1.5769976e-08 0.0000000e+00]
Episode * 2342 * exploration epsilon 0.1 * Episodic Reward is ==> -13.86552906036377
action means and variances at step -10: [ 1.0410066 -2.       ] [1.5194772e-08 0.0000000e+00]
action

Episode * 2355 * exploration epsilon 0.1 * Episodic Reward is ==> -14.205201148986816
action means and variances at step -10: [ 1.0168381 -2.       ] [5.201759e-09 0.000000e+00]
action means and variances at step -5: [ 1.0209194 -2.       ] [2.7471434e-09 0.0000000e+00]
Episode * 2356 * exploration epsilon 0.1 * Episodic Reward is ==> -13.510947227478027
action means and variances at step -10: [ 1.0206572 -2.       ] [5.6319536e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0196775 -2.       ] [5.3751896e-09 0.0000000e+00]
Episode * 2357 * exploration epsilon 0.1 * Episodic Reward is ==> -14.031039237976074
action means and variances at step -10: [ 1.0205371 -2.       ] [5.9648486e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0193075 -2.       ] [6.80345e-09 0.00000e+00]
Episode * 2358 * exploration epsilon 0.1 * Episodic Reward is ==> -13.716171264648438
action means and variances at step -10: [ 1.0202514 -2.       ] [5.2448152e-09 0.0000000e+00]
action 

Episode * 2371 * exploration epsilon 0.1 * Episodic Reward is ==> -13.767566680908203
action means and variances at step -10: [ 1.0281968 -2.       ] [4.7011373e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0259814 -2.       ] [4.7742184e-09 0.0000000e+00]
Episode * 2372 * exploration epsilon 0.1 * Episodic Reward is ==> -13.868054389953613
action means and variances at step -10: [ 1.0175306 -2.       ] [4.3259734e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0157689 -2.       ] [4.2248964e-09 0.0000000e+00]
Episode * 2373 * exploration epsilon 0.1 * Episodic Reward is ==> -13.968900680541992
action means and variances at step -10: [ 1.015427 -2.      ] [4.806982e-09 0.000000e+00]
action means and variances at step -5: [ 1.0226773 -2.       ] [4.458109e-09 0.000000e+00]
Episode * 2374 * exploration epsilon 0.1 * Episodic Reward is ==> -14.064322471618652
action means and variances at step -10: [ 1.0505762 -2.       ] [1.0617848e-08 0.0000000e+00]
action 

Episode * 2387 * exploration epsilon 0.1 * Episodic Reward is ==> -13.66917610168457
action means and variances at step -10: [ 0.9989082 -2.       ] [3.1719054e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0008899 -2.       ] [2.623013e-09 0.000000e+00]
Episode * 2388 * exploration epsilon 0.1 * Episodic Reward is ==> -13.622791290283203
action means and variances at step -10: [ 0.99141204 -2.        ] [6.155221e-09 0.000000e+00]
action means and variances at step -5: [ 0.9884883 -2.       ] [5.5461653e-09 0.0000000e+00]
Episode * 2389 * exploration epsilon 0.1 * Episodic Reward is ==> -13.832650184631348
action means and variances at step -10: [ 0.9674712 -2.       ] [6.8403483e-09 0.0000000e+00]
action means and variances at step -5: [ 0.9687323 -2.       ] [6.633393e-09 0.000000e+00]
Episode * 2390 * exploration epsilon 0.1 * Episodic Reward is ==> -13.464900016784668
action means and variances at step -10: [ 0.9564659 -2.       ] [5.964498e-09 0.000000e+00]
action m

Episode * 2403 * exploration epsilon 0.1 * Episodic Reward is ==> -13.45643138885498
action means and variances at step -10: [ 0.92310727 -2.        ] [4.912497e-09 0.000000e+00]
action means and variances at step -5: [ 0.9252428 -2.       ] [4.8513176e-09 0.0000000e+00]
Episode * 2404 * exploration epsilon 0.1 * Episodic Reward is ==> -13.197385787963867
action means and variances at step -10: [ 0.9181927 -2.       ] [4.023148e-09 0.000000e+00]
action means and variances at step -5: [ 0.9204928 -2.       ] [3.8616865e-09 0.0000000e+00]
Episode * 2405 * exploration epsilon 0.1 * Episodic Reward is ==> -13.782384872436523
action means and variances at step -10: [ 0.9226494 -2.       ] [8.236718e-09 0.000000e+00]
action means and variances at step -5: [ 0.91912556 -2.        ] [6.8665886e-09 0.0000000e+00]
Episode * 2406 * exploration epsilon 0.1 * Episodic Reward is ==> -13.797879219055176
action means and variances at step -10: [ 0.9190941 -2.       ] [5.1584865e-09 0.0000000e+00]
acti

Episode * 2419 * exploration epsilon 0.1 * Episodic Reward is ==> -13.620719909667969
action means and variances at step -10: [ 0.9064764 -2.       ] [3.1470044e-09 0.0000000e+00]
action means and variances at step -5: [ 0.9123457 -2.       ] [3.4008605e-09 0.0000000e+00]
Episode * 2420 * exploration epsilon 0.1 * Episodic Reward is ==> -13.804039001464844
action means and variances at step -10: [ 0.9028565 -2.       ] [5.799348e-09 0.000000e+00]
action means and variances at step -5: [ 0.9055897 -2.       ] [6.061754e-09 0.000000e+00]
Episode * 2421 * exploration epsilon 0.1 * Episodic Reward is ==> -13.897872924804688
action means and variances at step -10: [ 0.902614 -2.      ] [2.8899745e-09 0.0000000e+00]
action means and variances at step -5: [ 0.90514135 -2.        ] [2.3818045e-09 0.0000000e+00]
Episode * 2422 * exploration epsilon 0.1 * Episodic Reward is ==> -13.597528457641602
action means and variances at step -10: [ 0.89895815 -2.        ] [4.332098e-09 0.000000e+00]
actio

Episode * 2435 * exploration epsilon 0.1 * Episodic Reward is ==> -13.350269317626953
action means and variances at step -10: [ 1.077122 -2.      ] [6.4038757e-09 0.0000000e+00]
action means and variances at step -5: [ 1.0888292 -2.       ] [9.47637e-09 0.00000e+00]
Episode * 2436 * exploration epsilon 0.1 * Episodic Reward is ==> -13.83471393585205
action means and variances at step -10: [ 1.0126721 -2.       ] [9.022752e-09 0.000000e+00]
action means and variances at step -5: [ 0.9949988 -2.       ] [1.0788035e-08 0.0000000e+00]
Episode * 2437 * exploration epsilon 0.1 * Episodic Reward is ==> -13.559391975402832
action means and variances at step -10: [ 0.97506183 -2.        ] [1.6599772e-08 0.0000000e+00]
action means and variances at step -5: [ 0.9743367 -2.       ] [1.5063756e-08 0.0000000e+00]
Episode * 2438 * exploration epsilon 0.1 * Episodic Reward is ==> -13.871182441711426
action means and variances at step -10: [ 0.97925806 -2.        ] [9.894946e-09 0.000000e+00]
action m

Episode * 2451 * exploration epsilon 0.1 * Episodic Reward is ==> -13.781919479370117
action means and variances at step -10: [ 1.0061711 -2.       ] [1.2464199e-08 0.0000000e+00]
action means and variances at step -5: [ 1.0133479 -2.       ] [1.9350159e-08 0.0000000e+00]
Episode * 2452 * exploration epsilon 0.1 * Episodic Reward is ==> -13.970895767211914
action means and variances at step -10: [ 0.99899954 -2.        ] [9.962841e-09 0.000000e+00]
action means and variances at step -5: [ 0.9952047 -2.       ] [1.0773682e-08 0.0000000e+00]
Episode * 2453 * exploration epsilon 0.1 * Episodic Reward is ==> -13.469076156616211
action means and variances at step -10: [ 0.97258717 -2.        ] [6.746031e-09 0.000000e+00]
action means and variances at step -5: [ 0.979638 -2.      ] [5.606573e-09 0.000000e+00]
Episode * 2454 * exploration epsilon 0.1 * Episodic Reward is ==> -13.694889068603516
action means and variances at step -10: [ 0.97305495 -2.        ] [4.4419357e-09 0.0000000e+00]
act

Episode * 2467 * exploration epsilon 0.1 * Episodic Reward is ==> -13.748562812805176
action means and variances at step -10: [ 0.84244645 -2.        ] [8.563241e-09 0.000000e+00]
action means and variances at step -5: [ 0.8515188 -2.       ] [6.7547905e-09 0.0000000e+00]
Episode * 2468 * exploration epsilon 0.1 * Episodic Reward is ==> -13.497836112976074
action means and variances at step -10: [ 0.8497637 -2.       ] [1.0103708e-08 0.0000000e+00]
action means and variances at step -5: [ 0.8469493 -2.       ] [7.484217e-09 0.000000e+00]
Episode * 2469 * exploration epsilon 0.1 * Episodic Reward is ==> -13.394111633300781
action means and variances at step -10: [ 0.84228384 -2.        ] [7.346444e-09 0.000000e+00]
action means and variances at step -5: [ 0.84981066 -2.        ] [5.580115e-09 0.000000e+00]
Episode * 2470 * exploration epsilon 0.1 * Episodic Reward is ==> -13.907837867736816
action means and variances at step -10: [ 0.8958672 -2.       ] [1.3475077e-08 0.0000000e+00]
act

Episode * 2483 * exploration epsilon 0.1 * Episodic Reward is ==> -13.760512351989746
action means and variances at step -10: [ 0.99547803 -2.        ] [2.7010506e-08 0.0000000e+00]
action means and variances at step -5: [ 0.9927548 -2.       ] [1.8393274e-08 0.0000000e+00]
Episode * 2484 * exploration epsilon 0.1 * Episodic Reward is ==> -13.577118873596191
action means and variances at step -10: [ 0.88337994 -2.        ] [1.06612e-08 0.00000e+00]
action means and variances at step -5: [ 0.8661711 -2.       ] [5.905029e-09 0.000000e+00]
Episode * 2485 * exploration epsilon 0.1 * Episodic Reward is ==> -13.706274032592773
action means and variances at step -10: [ 0.91387 -2.     ] [1.3561646e-08 0.0000000e+00]
action means and variances at step -5: [ 0.898493 -2.      ] [1.1402384e-08 0.0000000e+00]
Episode * 2486 * exploration epsilon 0.1 * Episodic Reward is ==> -13.58873462677002
action means and variances at step -10: [ 0.856207 -2.      ] [5.5435603e-09 0.0000000e+00]
action means

Episode * 2499 * exploration epsilon 0.1 * Episodic Reward is ==> -13.987886428833008
action means and variances at step -10: [ 0.77296937 -2.        ] [7.925786e-09 0.000000e+00]
action means and variances at step -5: [ 0.771329 -2.      ] [6.619736e-09 0.000000e+00]
Episode * 2500 * exploration epsilon 0.1 * Episodic Reward is ==> -13.29994010925293
action means and variances at step -10: [ 0.7603721 -2.       ] [5.5566303e-09 0.0000000e+00]
action means and variances at step -5: [ 0.7559302 -2.       ] [6.35123e-09 0.00000e+00]
Episode * 2501 * exploration epsilon 0.1 * Episodic Reward is ==> -13.268930435180664
action means and variances at step -10: [ 0.78289264 -2.        ] [6.294938e-09 0.000000e+00]
action means and variances at step -5: [ 0.79912645 -2.        ] [4.7335047e-09 0.0000000e+00]
Episode * 2502 * exploration epsilon 0.1 * Episodic Reward is ==> -13.436800003051758
action means and variances at step -10: [ 0.7502148 -2.       ] [7.07275e-09 0.00000e+00]
action means

Episode * 2515 * exploration epsilon 0.1 * Episodic Reward is ==> -13.600700378417969
action means and variances at step -10: [ 0.74060017 -2.        ] [6.3883614e-09 0.0000000e+00]
action means and variances at step -5: [ 0.7398034 -2.       ] [6.276786e-09 0.000000e+00]
Episode * 2516 * exploration epsilon 0.1 * Episodic Reward is ==> -13.546679496765137
action means and variances at step -10: [ 0.7139812 -2.       ] [7.1015465e-09 0.0000000e+00]
action means and variances at step -5: [ 0.7307824 -2.       ] [6.547999e-09 0.000000e+00]
Episode * 2517 * exploration epsilon 0.1 * Episodic Reward is ==> -13.920527458190918
action means and variances at step -10: [ 0.71631825 -2.        ] [3.5962593e-09 0.0000000e+00]
action means and variances at step -5: [ 0.72553194 -2.        ] [3.2747698e-09 0.0000000e+00]
Episode * 2518 * exploration epsilon 0.1 * Episodic Reward is ==> -13.424816131591797
action means and variances at step -10: [ 0.7308226 -2.       ] [5.8396017e-09 0.0000000e+00]

Episode * 2531 * exploration epsilon 0.1 * Episodic Reward is ==> -13.376683235168457
action means and variances at step -10: [ 0.6563605 -2.       ] [4.868545e-09 0.000000e+00]
action means and variances at step -5: [ 0.6465222 -2.       ] [2.9796063e-09 0.0000000e+00]
Episode * 2532 * exploration epsilon 0.1 * Episodic Reward is ==> -13.12155532836914
action means and variances at step -10: [ 0.6218381 -2.       ] [4.8980753e-09 0.0000000e+00]
action means and variances at step -5: [ 0.6197126 -2.       ] [3.8487067e-09 0.0000000e+00]
Episode * 2533 * exploration epsilon 0.1 * Episodic Reward is ==> -12.95893383026123
action means and variances at step -10: [ 0.56158394 -2.        ] [7.45695e-09 0.00000e+00]
action means and variances at step -5: [ 0.57039577 -2.        ] [7.2665407e-09 0.0000000e+00]
Episode * 2534 * exploration epsilon 0.1 * Episodic Reward is ==> -12.971261024475098
action means and variances at step -10: [ 0.58114725 -2.        ] [6.200072e-09 0.000000e+00]
actio

Episode * 2547 * exploration epsilon 0.1 * Episodic Reward is ==> -13.082618713378906
action means and variances at step -10: [ 0.4174327 -2.       ] [1.5712972e-08 0.0000000e+00]
action means and variances at step -5: [ 0.4264756 -2.       ] [1.2799665e-08 0.0000000e+00]
Episode * 2548 * exploration epsilon 0.1 * Episodic Reward is ==> -12.795154571533203
action means and variances at step -10: [ 0.44030774 -2.        ] [1.2516189e-08 0.0000000e+00]
action means and variances at step -5: [ 0.43635076 -2.        ] [1.08112355e-08 0.00000000e+00]
Episode * 2549 * exploration epsilon 0.1 * Episodic Reward is ==> -12.839984893798828
action means and variances at step -10: [ 0.5252898 -2.       ] [8.456436e-09 0.000000e+00]
action means and variances at step -5: [ 0.5434543 -2.       ] [7.534546e-09 0.000000e+00]
Episode * 2550 * exploration epsilon 0.1 * Episodic Reward is ==> -13.130523681640625
action means and variances at step -10: [ 0.5292025 -2.       ] [8.840896e-09 0.000000e+00]
a

Episode * 2563 * exploration epsilon 0.1 * Episodic Reward is ==> -13.018685340881348
action means and variances at step -10: [ 0.45156538 -2.        ] [1.181985e-08 0.000000e+00]
action means and variances at step -5: [ 0.45071635 -2.        ] [9.5078345e-09 0.0000000e+00]
Episode * 2564 * exploration epsilon 0.1 * Episodic Reward is ==> -12.833370208740234
action means and variances at step -10: [ 0.45171735 -2.        ] [1.6822762e-08 0.0000000e+00]
action means and variances at step -5: [ 0.44981506 -2.        ] [1.2122636e-08 0.0000000e+00]
Episode * 2565 * exploration epsilon 0.1 * Episodic Reward is ==> -13.12523365020752
action means and variances at step -10: [ 0.51013976 -2.        ] [1.4280256e-08 0.0000000e+00]
action means and variances at step -5: [ 0.5126507 -2.       ] [1.2315294e-08 0.0000000e+00]
Episode * 2566 * exploration epsilon 0.1 * Episodic Reward is ==> -13.17588996887207
action means and variances at step -10: [ 0.46724775 -2.        ] [1.6684902e-08 0.000000

Episode * 2579 * exploration epsilon 0.1 * Episodic Reward is ==> -12.931602478027344
action means and variances at step -10: [ 0.412608 -2.      ] [1.5612382e-08 0.0000000e+00]
action means and variances at step -5: [ 0.4169303 -2.       ] [1.2214341e-08 0.0000000e+00]
Episode * 2580 * exploration epsilon 0.1 * Episodic Reward is ==> -12.81395149230957
action means and variances at step -10: [ 0.3994592 -2.       ] [1.8292587e-08 0.0000000e+00]
action means and variances at step -5: [ 0.39749974 -2.        ] [1.6988135e-08 0.0000000e+00]
Episode * 2581 * exploration epsilon 0.1 * Episodic Reward is ==> -13.25118637084961
action means and variances at step -10: [ 0.37209967 -2.        ] [2.5126777e-08 0.0000000e+00]
action means and variances at step -5: [ 0.3631012 -2.       ] [1.9973537e-08 0.0000000e+00]
Episode * 2582 * exploration epsilon 0.1 * Episodic Reward is ==> -12.694698333740234
action means and variances at step -10: [ 0.35958502 -2.        ] [2.0258703e-08 0.0000000e+00]

Episode * 2595 * exploration epsilon 0.1 * Episodic Reward is ==> -13.037694931030273
action means and variances at step -10: [ 0.26910472 -2.        ] [2.1573605e-08 0.0000000e+00]
action means and variances at step -5: [ 0.26731914 -2.        ] [2.0904766e-08 0.0000000e+00]
Episode * 2596 * exploration epsilon 0.1 * Episodic Reward is ==> -12.899637222290039
action means and variances at step -10: [ 0.23896855 -2.        ] [7.511542e-08 0.000000e+00]
action means and variances at step -5: [ 0.23889166 -2.        ] [7.384678e-08 0.000000e+00]
Episode * 2597 * exploration epsilon 0.1 * Episodic Reward is ==> -13.01083755493164
action means and variances at step -10: [ 0.24319509 -2.        ] [1.02823776e-07 0.00000000e+00]
action means and variances at step -5: [ 0.23242427 -2.        ] [9.876078e-08 0.000000e+00]
Episode * 2598 * exploration epsilon 0.1 * Episodic Reward is ==> -12.562471389770508
action means and variances at step -10: [ 0.2047783 -2.       ] [9.940657e-08 0.000000e+

Episode * 2611 * exploration epsilon 0.1 * Episodic Reward is ==> -12.78857707977295
action means and variances at step -10: [ 0.16929297 -2.        ] [3.809762e-08 0.000000e+00]
action means and variances at step -5: [ 0.17234369 -2.        ] [3.31146e-08 0.00000e+00]
Episode * 2612 * exploration epsilon 0.1 * Episodic Reward is ==> -12.816094398498535
action means and variances at step -10: [ 0.15216666 -2.        ] [6.502298e-08 0.000000e+00]
action means and variances at step -5: [ 0.15952504 -2.        ] [5.8280868e-08 0.0000000e+00]
Episode * 2613 * exploration epsilon 0.1 * Episodic Reward is ==> -12.94337272644043
action means and variances at step -10: [ 0.14831401 -2.        ] [5.369776e-08 0.000000e+00]
action means and variances at step -5: [ 0.1520682 -2.       ] [4.969686e-08 0.000000e+00]
Episode * 2614 * exploration epsilon 0.1 * Episodic Reward is ==> -12.69569206237793
action means and variances at step -10: [ 0.16507074 -2.        ] [4.766344e-08 0.000000e+00]
action

Episode * 2627 * exploration epsilon 0.1 * Episodic Reward is ==> -12.86842155456543
action means and variances at step -10: [ 0.13113002 -2.        ] [3.664368e-08 0.000000e+00]
action means and variances at step -5: [ 0.1306505 -2.       ] [4.0276543e-08 0.0000000e+00]
Episode * 2628 * exploration epsilon 0.1 * Episodic Reward is ==> -12.848548889160156
action means and variances at step -10: [ 0.11280897 -2.        ] [6.0995816e-08 0.0000000e+00]
action means and variances at step -5: [ 0.12354259 -2.        ] [5.146185e-08 0.000000e+00]
Episode * 2629 * exploration epsilon 0.1 * Episodic Reward is ==> -12.389890670776367
action means and variances at step -10: [ 0.10826215 -2.        ] [3.345351e-08 0.000000e+00]
action means and variances at step -5: [ 0.10382658 -2.        ] [2.8998974e-08 0.0000000e+00]
Episode * 2630 * exploration epsilon 0.1 * Episodic Reward is ==> -12.791757583618164
action means and variances at step -10: [ 0.08833553 -2.        ] [3.216226e-08 0.000000e+00

Episode * 2643 * exploration epsilon 0.1 * Episodic Reward is ==> -12.785543441772461
action means and variances at step -10: [ 0.0656273 -2.       ] [3.0772824e-08 0.0000000e+00]
action means and variances at step -5: [ 0.07615633 -2.        ] [2.6528452e-08 0.0000000e+00]
Episode * 2644 * exploration epsilon 0.1 * Episodic Reward is ==> -12.976823806762695
action means and variances at step -10: [ 0.06569389 -2.        ] [3.0999946e-08 0.0000000e+00]
action means and variances at step -5: [ 0.06576093 -2.        ] [3.142562e-08 0.000000e+00]
Episode * 2645 * exploration epsilon 0.1 * Episodic Reward is ==> -12.989110946655273
action means and variances at step -10: [ 0.06745983 -2.        ] [2.2657373e-08 0.0000000e+00]
action means and variances at step -5: [ 0.07698482 -2.        ] [1.9972111e-08 0.0000000e+00]
Episode * 2646 * exploration epsilon 0.1 * Episodic Reward is ==> -13.192802429199219
action means and variances at step -10: [ 0.06407524 -2.        ] [4.3258936e-08 0.0000

Episode * 2659 * exploration epsilon 0.1 * Episodic Reward is ==> -13.256282806396484
action means and variances at step -10: [ 0.04870826 -2.        ] [2.3143818e-08 0.0000000e+00]
action means and variances at step -5: [ 0.04429656 -2.        ] [1.829062e-08 0.000000e+00]
Episode * 2660 * exploration epsilon 0.1 * Episodic Reward is ==> -12.882762908935547
action means and variances at step -10: [ 0.04386248 -2.        ] [2.0799197e-08 0.0000000e+00]
action means and variances at step -5: [ 0.0364143 -2.       ] [1.9935737e-08 0.0000000e+00]
Episode * 2661 * exploration epsilon 0.1 * Episodic Reward is ==> -13.432615280151367
action means and variances at step -10: [ 0.05565537 -2.        ] [2.0330223e-08 0.0000000e+00]
action means and variances at step -5: [ 0.05735831 -2.        ] [2.0393227e-08 0.0000000e+00]
Episode * 2662 * exploration epsilon 0.1 * Episodic Reward is ==> -13.534811019897461
action means and variances at step -10: [ 0.03744599 -2.        ] [2.4324775e-08 0.0000

Episode * 2675 * exploration epsilon 0.1 * Episodic Reward is ==> -13.547969818115234
action means and variances at step -10: [ 0.05757358 -2.        ] [1.520689e-08 0.000000e+00]
action means and variances at step -5: [ 0.05578457 -2.        ] [1.6196472e-08 0.0000000e+00]
Episode * 2676 * exploration epsilon 0.1 * Episodic Reward is ==> -12.674386978149414
action means and variances at step -10: [ 0.02335748 -2.        ] [2.0024304e-08 0.0000000e+00]
action means and variances at step -5: [ 0.0248075 -2.       ] [1.7925936e-08 0.0000000e+00]
Episode * 2677 * exploration epsilon 0.1 * Episodic Reward is ==> -13.442485809326172
action means and variances at step -10: [ 0.03667899 -2.        ] [1.7502959e-08 0.0000000e+00]
action means and variances at step -5: [ 0.03665913 -2.        ] [1.841552e-08 0.000000e+00]
Episode * 2678 * exploration epsilon 0.1 * Episodic Reward is ==> -13.204404830932617
action means and variances at step -10: [ 0.0282639 -2.       ] [1.7179223e-08 0.0000000e

Episode * 2691 * exploration epsilon 0.1 * Episodic Reward is ==> -13.693876266479492
action means and variances at step -10: [-0.00489099 -2.        ] [1.1490144e-08 0.0000000e+00]
action means and variances at step -5: [-0.00952913 -2.        ] [1.0463776e-08 0.0000000e+00]
Episode * 2692 * exploration epsilon 0.1 * Episodic Reward is ==> -13.674233436584473
action means and variances at step -10: [ 0.00648329 -2.        ] [9.698905e-09 0.000000e+00]
action means and variances at step -5: [ 0.00794832 -2.        ] [8.021642e-09 0.000000e+00]
Episode * 2693 * exploration epsilon 0.1 * Episodic Reward is ==> -13.4732666015625
action means and variances at step -10: [-0.01886757 -2.        ] [9.678157e-09 0.000000e+00]
action means and variances at step -5: [-0.02168008 -2.        ] [7.575602e-09 0.000000e+00]
Episode * 2694 * exploration epsilon 0.1 * Episodic Reward is ==> -12.755186080932617
action means and variances at step -10: [-0.01139088 -2.        ] [7.126542e-09 0.000000e+00]

Episode * 2707 * exploration epsilon 0.1 * Episodic Reward is ==> -13.288235664367676
action means and variances at step -10: [-0.01362117 -2.        ] [7.681533e-08 0.000000e+00]
action means and variances at step -5: [-0.01885471 -2.        ] [1.0089932e-07 0.0000000e+00]
Episode * 2708 * exploration epsilon 0.1 * Episodic Reward is ==> -13.3756103515625
action means and variances at step -10: [ 0.00487677 -2.        ] [5.5959752e-09 0.0000000e+00]
action means and variances at step -5: [ 0.00331311 -2.        ] [6.4533165e-09 0.0000000e+00]
Episode * 2709 * exploration epsilon 0.1 * Episodic Reward is ==> -13.113189697265625
action means and variances at step -10: [-0.01746496 -2.        ] [6.204494e-09 0.000000e+00]
action means and variances at step -5: [-0.02231998 -2.        ] [5.6966076e-09 0.0000000e+00]
Episode * 2710 * exploration epsilon 0.1 * Episodic Reward is ==> -12.954586029052734
action means and variances at step -10: [-0.01829113 -2.        ] [6.066867e-09 0.000000e

Episode * 2723 * exploration epsilon 0.1 * Episodic Reward is ==> -12.953044891357422
action means and variances at step -10: [-0.05394395 -2.        ] [7.663321e-09 0.000000e+00]
action means and variances at step -5: [-0.04988398 -2.        ] [9.29967e-09 0.00000e+00]
Episode * 2724 * exploration epsilon 0.1 * Episodic Reward is ==> -13.051329612731934
action means and variances at step -10: [-0.04837205 -2.        ] [9.277465e-09 0.000000e+00]
action means and variances at step -5: [-0.04900691 -2.        ] [9.061644e-09 0.000000e+00]
Episode * 2725 * exploration epsilon 0.1 * Episodic Reward is ==> -13.209033966064453
action means and variances at step -10: [-0.05182764 -2.        ] [7.589213e-09 0.000000e+00]
action means and variances at step -5: [-0.05069843 -2.        ] [6.474042e-09 0.000000e+00]
Episode * 2726 * exploration epsilon 0.1 * Episodic Reward is ==> -13.442472457885742
action means and variances at step -10: [-0.05846597 -2.        ] [7.854824e-09 0.000000e+00]
act

Episode * 2739 * exploration epsilon 0.1 * Episodic Reward is ==> -12.779740333557129
action means and variances at step -10: [-0.07067385 -2.        ] [1.9982969e-09 0.0000000e+00]
action means and variances at step -5: [-0.07264599 -2.        ] [1.869163e-09 0.000000e+00]
Episode * 2740 * exploration epsilon 0.1 * Episodic Reward is ==> -13.234140396118164
action means and variances at step -10: [-0.06174589 -2.        ] [2.3681124e-09 0.0000000e+00]
action means and variances at step -5: [-0.06590797 -2.        ] [2.138874e-09 0.000000e+00]
Episode * 2741 * exploration epsilon 0.1 * Episodic Reward is ==> -13.133746147155762
action means and variances at step -10: [-0.06239096 -2.        ] [2.2511104e-09 0.0000000e+00]
action means and variances at step -5: [-0.06671502 -2.        ] [1.8248224e-09 0.0000000e+00]
Episode * 2742 * exploration epsilon 0.1 * Episodic Reward is ==> -12.432125091552734
action means and variances at step -10: [-0.08900098 -2.        ] [2.1854354e-09 0.0000

Episode * 2755 * exploration epsilon 0.1 * Episodic Reward is ==> -13.391691207885742
action means and variances at step -10: [-0.13548088 -2.        ] [1.019834e-09 0.000000e+00]
action means and variances at step -5: [-0.12894036 -2.        ] [4.354125e-10 0.000000e+00]
Episode * 2756 * exploration epsilon 0.1 * Episodic Reward is ==> -12.497812271118164
action means and variances at step -10: [-0.13721122 -2.        ] [1.5840176e-10 0.0000000e+00]
action means and variances at step -5: [-0.141511 -2.      ] [2.2869476e-10 0.0000000e+00]
Episode * 2757 * exploration epsilon 0.1 * Episodic Reward is ==> -12.955392837524414
action means and variances at step -10: [-0.12417976 -2.        ] [2.6724428e-10 0.0000000e+00]
action means and variances at step -5: [-0.12583414 -2.        ] [2.2388807e-10 0.0000000e+00]
Episode * 2758 * exploration epsilon 0.1 * Episodic Reward is ==> -12.745046615600586
action means and variances at step -10: [-0.13326931 -2.        ] [4.3221318e-10 0.0000000e

Episode * 2771 * exploration epsilon 0.1 * Episodic Reward is ==> -12.746148109436035
action means and variances at step -10: [-0.18275152 -2.        ] [6.321339e-10 0.000000e+00]
action means and variances at step -5: [-0.1787277 -2.       ] [8.6781093e-10 0.0000000e+00]
Episode * 2772 * exploration epsilon 0.1 * Episodic Reward is ==> -12.75163459777832
action means and variances at step -10: [-0.17143247 -2.        ] [2.0266582e-09 0.0000000e+00]
action means and variances at step -5: [-0.17585123 -2.        ] [1.0318131e-09 0.0000000e+00]
Episode * 2773 * exploration epsilon 0.1 * Episodic Reward is ==> -12.79980182647705
action means and variances at step -10: [-0.18800682 -2.        ] [6.5816824e-10 0.0000000e+00]
action means and variances at step -5: [-0.19071211 -2.        ] [4.4968565e-10 0.0000000e+00]
Episode * 2774 * exploration epsilon 0.1 * Episodic Reward is ==> -12.685832977294922
action means and variances at step -10: [-0.19918238 -2.        ] [1.231528e-09 0.000000e

Episode * 2787 * exploration epsilon 0.1 * Episodic Reward is ==> -12.69636344909668
action means and variances at step -10: [-0.17828606 -2.        ] [1.6527094e-09 0.0000000e+00]
action means and variances at step -5: [-0.18042815 -2.        ] [1.4289304e-09 0.0000000e+00]
Episode * 2788 * exploration epsilon 0.1 * Episodic Reward is ==> -12.525382995605469
action means and variances at step -10: [-0.18251374 -2.        ] [1.4660031e-09 0.0000000e+00]
action means and variances at step -5: [-0.1783389 -2.       ] [1.5625626e-09 0.0000000e+00]
Episode * 2789 * exploration epsilon 0.1 * Episodic Reward is ==> -12.825395584106445
action means and variances at step -10: [-0.18215281 -2.        ] [1.6622442e-09 0.0000000e+00]
action means and variances at step -5: [-0.18522981 -2.        ] [1.6662396e-09 0.0000000e+00]
Episode * 2790 * exploration epsilon 0.1 * Episodic Reward is ==> -12.760892868041992
action means and variances at step -10: [-0.18807082 -2.        ] [1.96709e-09 0.00000

Episode * 2803 * exploration epsilon 0.1 * Episodic Reward is ==> -12.857854843139648
action means and variances at step -10: [-0.24857289 -2.        ] [7.7787476e-10 0.0000000e+00]
action means and variances at step -5: [-0.24414313 -2.        ] [7.2456746e-10 0.0000000e+00]
Episode * 2804 * exploration epsilon 0.1 * Episodic Reward is ==> -12.664745330810547
action means and variances at step -10: [-0.24258089 -2.        ] [9.0380603e-10 0.0000000e+00]
action means and variances at step -5: [-0.24640732 -2.        ] [8.31862e-10 0.00000e+00]
Episode * 2805 * exploration epsilon 0.1 * Episodic Reward is ==> -12.691858291625977
action means and variances at step -10: [-0.25316665 -2.        ] [9.884287e-10 0.000000e+00]
action means and variances at step -5: [-0.2511399 -2.       ] [9.804195e-10 0.000000e+00]
Episode * 2806 * exploration epsilon 0.1 * Episodic Reward is ==> -12.56289291381836
action means and variances at step -10: [-0.23478046 -2.        ] [1.0931319e-09 0.0000000e+00

Episode * 2819 * exploration epsilon 0.1 * Episodic Reward is ==> -12.667543411254883
action means and variances at step -10: [-0.2585184 -2.       ] [4.5214957e-10 0.0000000e+00]
action means and variances at step -5: [-0.25912794 -2.        ] [4.5386053e-10 0.0000000e+00]
Episode * 2820 * exploration epsilon 0.1 * Episodic Reward is ==> -12.855121612548828
action means and variances at step -10: [-0.24798156 -2.        ] [7.347469e-10 0.000000e+00]
action means and variances at step -5: [-0.24575357 -2.        ] [7.318104e-10 0.000000e+00]
Episode * 2821 * exploration epsilon 0.1 * Episodic Reward is ==> -12.910138130187988
action means and variances at step -10: [-0.25882682 -2.        ] [7.9459617e-10 0.0000000e+00]
action means and variances at step -5: [-0.26257193 -2.        ] [8.158779e-10 0.000000e+00]
Episode * 2822 * exploration epsilon 0.1 * Episodic Reward is ==> -12.63599681854248
action means and variances at step -10: [-0.27595183 -2.        ] [5.8278793e-10 0.0000000e+

Episode * 2835 * exploration epsilon 0.1 * Episodic Reward is ==> -13.168951034545898
action means and variances at step -10: [-0.28105414 -2.        ] [6.760269e-10 0.000000e+00]
action means and variances at step -5: [-0.282131 -2.      ] [6.7359307e-10 0.0000000e+00]
Episode * 2836 * exploration epsilon 0.1 * Episodic Reward is ==> -12.715010643005371
action means and variances at step -10: [-0.2812603 -2.       ] [7.994203e-10 0.000000e+00]
action means and variances at step -5: [-0.2766704 -2.       ] [6.994999e-10 0.000000e+00]
Episode * 2837 * exploration epsilon 0.1 * Episodic Reward is ==> -13.32720947265625
action means and variances at step -10: [-0.26419342 -2.        ] [9.756284e-10 0.000000e+00]
action means and variances at step -5: [-0.26143077 -2.        ] [9.397773e-10 0.000000e+00]
Episode * 2838 * exploration epsilon 0.1 * Episodic Reward is ==> -12.633857727050781
action means and variances at step -10: [-0.27133912 -2.        ] [5.866665e-10 0.000000e+00]
action m

Episode * 2851 * exploration epsilon 0.1 * Episodic Reward is ==> -12.874155044555664
action means and variances at step -10: [-0.33358946 -2.        ] [4.890611e-10 0.000000e+00]
action means and variances at step -5: [-0.33728677 -2.        ] [4.8784843e-10 0.0000000e+00]
Episode * 2852 * exploration epsilon 0.1 * Episodic Reward is ==> -12.845975875854492
action means and variances at step -10: [-0.32579112 -2.        ] [5.572218e-10 0.000000e+00]
action means and variances at step -5: [-0.32206652 -2.        ] [4.8762105e-10 0.0000000e+00]
Episode * 2853 * exploration epsilon 0.1 * Episodic Reward is ==> -12.891376495361328
action means and variances at step -10: [-0.31687734 -2.        ] [4.45952e-10 0.00000e+00]
action means and variances at step -5: [-0.3175067 -2.       ] [3.4341677e-10 0.0000000e+00]
Episode * 2854 * exploration epsilon 0.1 * Episodic Reward is ==> -12.921035766601562
action means and variances at step -10: [-0.30169135 -2.        ] [6.614045e-10 0.000000e+00]

Episode * 2867 * exploration epsilon 0.1 * Episodic Reward is ==> -13.086918830871582
action means and variances at step -10: [-0.34598678 -2.        ] [4.0575077e-10 0.0000000e+00]
action means and variances at step -5: [-0.3504821 -2.       ] [4.1343695e-10 0.0000000e+00]
Episode * 2868 * exploration epsilon 0.1 * Episodic Reward is ==> -12.724581718444824
action means and variances at step -10: [-0.3429019 -2.       ] [3.804512e-10 0.000000e+00]
action means and variances at step -5: [-0.34332946 -2.        ] [3.1301295e-10 0.0000000e+00]
Episode * 2869 * exploration epsilon 0.1 * Episodic Reward is ==> -13.2869873046875
action means and variances at step -10: [-0.33878735 -2.        ] [2.4137342e-10 0.0000000e+00]
action means and variances at step -5: [-0.3360857 -2.       ] [2.4942967e-10 0.0000000e+00]
Episode * 2870 * exploration epsilon 0.1 * Episodic Reward is ==> -12.687393188476562
action means and variances at step -10: [-0.33654422 -2.        ] [3.6641862e-10 0.0000000e+0

Episode * 2883 * exploration epsilon 0.1 * Episodic Reward is ==> -12.905245780944824
action means and variances at step -10: [-0.3253971 -2.       ] [8.831209e-10 0.000000e+00]
action means and variances at step -5: [-0.32150078 -2.        ] [6.96036e-10 0.00000e+00]
Episode * 2884 * exploration epsilon 0.1 * Episodic Reward is ==> -12.861557006835938
action means and variances at step -10: [-0.33068436 -2.        ] [4.1238848e-10 0.0000000e+00]
action means and variances at step -5: [-0.3339001 -2.       ] [3.6429187e-10 0.0000000e+00]
Episode * 2885 * exploration epsilon 0.1 * Episodic Reward is ==> -13.094983100891113
action means and variances at step -10: [-0.35002437 -2.        ] [2.7972621e-10 0.0000000e+00]
action means and variances at step -5: [-0.3544467 -2.       ] [2.5290936e-10 0.0000000e+00]
Episode * 2886 * exploration epsilon 0.1 * Episodic Reward is ==> -12.828259468078613
action means and variances at step -10: [-0.3380063 -2.       ] [2.2135385e-10 0.0000000e+00]
a

Episode * 2899 * exploration epsilon 0.1 * Episodic Reward is ==> -13.305216789245605
action means and variances at step -10: [-0.38527033 -2.        ] [2.0084606e-10 0.0000000e+00]
action means and variances at step -5: [-0.38769704 -2.        ] [2.558346e-10 0.000000e+00]
Episode * 2900 * exploration epsilon 0.1 * Episodic Reward is ==> -12.809525489807129
action means and variances at step -10: [-0.38548908 -2.        ] [3.8692063e-10 0.0000000e+00]
action means and variances at step -5: [-0.37843215 -2.        ] [2.8386127e-10 0.0000000e+00]
Episode * 2901 * exploration epsilon 0.1 * Episodic Reward is ==> -13.19764232635498
action means and variances at step -10: [-0.3793414 -2.       ] [2.756154e-10 0.000000e+00]
action means and variances at step -5: [-0.38172546 -2.        ] [2.8532063e-10 0.0000000e+00]
Episode * 2902 * exploration epsilon 0.1 * Episodic Reward is ==> -12.432119369506836
action means and variances at step -10: [-0.39840585 -2.        ] [3.888017e-10 0.000000e+

Episode * 2915 * exploration epsilon 0.1 * Episodic Reward is ==> -12.955218315124512
action means and variances at step -10: [-0.4164023 -2.       ] [3.9543313e-10 0.0000000e+00]
action means and variances at step -5: [-0.4190267 -2.       ] [4.5441628e-10 0.0000000e+00]
Episode * 2916 * exploration epsilon 0.1 * Episodic Reward is ==> -13.0420503616333
action means and variances at step -10: [-0.43390456 -2.        ] [1.0128235e-09 0.0000000e+00]
action means and variances at step -5: [-0.43812776 -2.        ] [1.8611203e-09 0.0000000e+00]
Episode * 2917 * exploration epsilon 0.1 * Episodic Reward is ==> -13.180261611938477
action means and variances at step -10: [-0.4120282 -2.       ] [5.5376514e-10 0.0000000e+00]
action means and variances at step -5: [-0.40373352 -2.        ] [4.635704e-10 0.000000e+00]
Episode * 2918 * exploration epsilon 0.1 * Episodic Reward is ==> -12.452892303466797
action means and variances at step -10: [-0.39693314 -2.        ] [6.58099e-10 0.00000e+00]
a

Episode * 2931 * exploration epsilon 0.1 * Episodic Reward is ==> -12.788270950317383
action means and variances at step -10: [-0.25097626 -2.        ] [6.537241e-09 0.000000e+00]
action means and variances at step -5: [-0.2361072 -2.       ] [5.8850276e-09 0.0000000e+00]
Episode * 2932 * exploration epsilon 0.1 * Episodic Reward is ==> -12.77882194519043
action means and variances at step -10: [-0.1210942 -2.       ] [1.6161769e-08 0.0000000e+00]
action means and variances at step -5: [-0.08877818 -2.        ] [1.6613779e-08 0.0000000e+00]
Episode * 2933 * exploration epsilon 0.1 * Episodic Reward is ==> -12.804817199707031
action means and variances at step -10: [ 0.21171089 -2.        ] [8.8183185e-07 0.0000000e+00]
action means and variances at step -5: [ 0.30492795 -2.        ] [7.195041e-07 0.000000e+00]
Episode * 2934 * exploration epsilon 0.1 * Episodic Reward is ==> -13.015113830566406
action means and variances at step -10: [ 1.2918037 -2.       ] [1.3570047e-07 0.0000000e+00

Episode * 2948 * exploration epsilon 0.1 * Episodic Reward is ==> -15.478199005126953
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2949 * exploration epsilon 0.1 * Episodic Reward is ==> -15.706141471862793
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2950 * exploration epsilon 0.1 * Episodic Reward is ==> -15.195135116577148
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2951 * exploration epsilon 0.1 * Episodic Reward is ==> -15.405769348144531
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2952 * exploration epsilon 0.1 * Episodic Reward is ==> -15.317176818847656
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 2966 * exploration epsilon 0.1 * Episodic Reward is ==> -15.518716812133789
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2967 * exploration epsilon 0.1 * Episodic Reward is ==> -15.320287704467773
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2968 * exploration epsilon 0.1 * Episodic Reward is ==> -15.327945709228516
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2969 * exploration epsilon 0.1 * Episodic Reward is ==> -15.306855201721191
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2970 * exploration epsilon 0.1 * Episodic Reward is ==> -15.441923141479492
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 2984 * exploration epsilon 0.1 * Episodic Reward is ==> -15.804555892944336
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2985 * exploration epsilon 0.1 * Episodic Reward is ==> -15.499380111694336
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2986 * exploration epsilon 0.1 * Episodic Reward is ==> -15.345664978027344
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2987 * exploration epsilon 0.1 * Episodic Reward is ==> -15.778237342834473
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 2988 * exploration epsilon 0.1 * Episodic Reward is ==> -15.208535194396973
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3002 * exploration epsilon 0.1 * Episodic Reward is ==> -15.210272789001465
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3003 * exploration epsilon 0.1 * Episodic Reward is ==> -15.639116287231445
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3004 * exploration epsilon 0.1 * Episodic Reward is ==> -15.274415969848633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3005 * exploration epsilon 0.1 * Episodic Reward is ==> -15.137589454650879
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3006 * exploration epsilon 0.1 * Episodic Reward is ==> -15.909252166748047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3020 * exploration epsilon 0.1 * Episodic Reward is ==> -15.517473220825195
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3021 * exploration epsilon 0.1 * Episodic Reward is ==> -15.380901336669922
action means and variances at step -10: [ 1.9978901 -2.       ] [1.908433e-07 0.000000e+00]
action means and variances at step -5: [ 1.9903729 -2.       ] [1.188609e-07 0.000000e+00]
Episode * 3022 * exploration epsilon 0.1 * Episodic Reward is ==> -15.803699493408203
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3023 * exploration epsilon 0.1 * Episodic Reward is ==> -15.44300365447998
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3024 * exploration epsilon 0.1 * Episodic Reward is ==> -15.697471618652344
action means and variances at step -10: [ 2

Episode * 3038 * exploration epsilon 0.1 * Episodic Reward is ==> -15.494571685791016
action means and variances at step -10: [ 1.977683 -2.      ] [6.230454e-08 0.000000e+00]
action means and variances at step -5: [ 1.9849961 -2.       ] [6.557536e-08 0.000000e+00]
Episode * 3039 * exploration epsilon 0.1 * Episodic Reward is ==> -15.42600154876709
action means and variances at step -10: [ 1.988354 -2.      ] [5.7524893e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9999723 -2.       ] [5.551535e-09 0.000000e+00]
Episode * 3040 * exploration epsilon 0.1 * Episodic Reward is ==> -15.636521339416504
action means and variances at step -10: [ 1.9624678 -2.       ] [9.034587e-08 0.000000e+00]
action means and variances at step -5: [ 1.9653924 -2.       ] [5.3816244e-08 0.0000000e+00]
Episode * 3041 * exploration epsilon 0.1 * Episodic Reward is ==> -15.41916561126709
action means and variances at step -10: [ 1.9835347 -2.       ] [1.09051925e-07 0.00000000e+00]
action means 

Episode * 3054 * exploration epsilon 0.1 * Episodic Reward is ==> -15.734957695007324
action means and variances at step -10: [ 1.9726393 -2.       ] [9.383127e-08 0.000000e+00]
action means and variances at step -5: [ 1.9494028 -2.       ] [8.08645e-08 0.00000e+00]
Episode * 3055 * exploration epsilon 0.1 * Episodic Reward is ==> -15.493009567260742
action means and variances at step -10: [ 1.9736878 -2.       ] [1.5167011e-07 0.0000000e+00]
action means and variances at step -5: [ 1.9762397 -2.       ] [9.2598064e-08 0.0000000e+00]
Episode * 3056 * exploration epsilon 0.1 * Episodic Reward is ==> -15.477602005004883
action means and variances at step -10: [ 1.977126 -2.      ] [7.4656825e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9713486 -2.       ] [8.581469e-08 0.000000e+00]
Episode * 3057 * exploration epsilon 0.1 * Episodic Reward is ==> -15.414337158203125
action means and variances at step -10: [ 1.9712902 -2.       ] [1.0091686e-07 0.0000000e+00]
action mean

Episode * 3070 * exploration epsilon 0.1 * Episodic Reward is ==> -15.290613174438477
action means and variances at step -10: [ 1.9946301 -2.       ] [6.01971e-08 0.00000e+00]
action means and variances at step -5: [ 1.9765949 -2.       ] [4.8336748e-08 0.0000000e+00]
Episode * 3071 * exploration epsilon 0.1 * Episodic Reward is ==> -15.191900253295898
action means and variances at step -10: [ 1.9433876 -2.       ] [6.5425645e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9391034 -2.       ] [4.341826e-08 0.000000e+00]
Episode * 3072 * exploration epsilon 0.1 * Episodic Reward is ==> -15.521869659423828
action means and variances at step -10: [ 1.9422433 -2.       ] [3.8334576e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9512391 -2.       ] [2.8026841e-08 0.0000000e+00]
Episode * 3073 * exploration epsilon 0.1 * Episodic Reward is ==> -15.368978500366211
action means and variances at step -10: [ 1.9404818 -2.       ] [4.5444175e-08 0.0000000e+00]
action 

Episode * 3086 * exploration epsilon 0.1 * Episodic Reward is ==> -15.28958511352539
action means and variances at step -10: [ 1.9639574 -2.       ] [4.9648655e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9512522 -2.       ] [3.666088e-08 0.000000e+00]
Episode * 3087 * exploration epsilon 0.1 * Episodic Reward is ==> -15.17245864868164
action means and variances at step -10: [ 1.9505523 -2.       ] [5.5616105e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9366599 -2.       ] [4.1410747e-08 0.0000000e+00]
Episode * 3088 * exploration epsilon 0.1 * Episodic Reward is ==> -15.451196670532227
action means and variances at step -10: [ 1.9268601 -2.       ] [5.744878e-08 0.000000e+00]
action means and variances at step -5: [ 1.9313923 -2.       ] [3.8987572e-08 0.0000000e+00]
Episode * 3089 * exploration epsilon 0.1 * Episodic Reward is ==> -15.590319633483887
action means and variances at step -10: [ 1.9308088 -2.       ] [9.782448e-08 0.000000e+00]
action me

Episode * 3102 * exploration epsilon 0.1 * Episodic Reward is ==> -15.399185180664062
action means and variances at step -10: [ 1.9556482 -2.       ] [9.533833e-08 0.000000e+00]
action means and variances at step -5: [ 1.9352255 -2.       ] [5.159979e-08 0.000000e+00]
Episode * 3103 * exploration epsilon 0.1 * Episodic Reward is ==> -15.899953842163086
action means and variances at step -10: [ 1.9503379 -2.       ] [5.474451e-08 0.000000e+00]
action means and variances at step -5: [ 1.9682614 -2.       ] [3.4926423e-08 0.0000000e+00]
Episode * 3104 * exploration epsilon 0.1 * Episodic Reward is ==> -15.303092956542969
action means and variances at step -10: [ 1.9169734 -2.       ] [5.854264e-08 0.000000e+00]
action means and variances at step -5: [ 1.8942147 -2.       ] [3.7915417e-08 0.0000000e+00]
Episode * 3105 * exploration epsilon 0.1 * Episodic Reward is ==> -15.358556747436523
action means and variances at step -10: [ 1.9225789 -2.       ] [3.607886e-08 0.000000e+00]
action mean

Episode * 3118 * exploration epsilon 0.1 * Episodic Reward is ==> -16.04741668701172
action means and variances at step -10: [ 1.9375936 -2.       ] [4.4441997e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9339087 -2.       ] [3.452177e-08 0.000000e+00]
Episode * 3119 * exploration epsilon 0.1 * Episodic Reward is ==> -15.44765853881836
action means and variances at step -10: [ 1.9011823 -2.       ] [3.4370277e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9028593 -2.       ] [2.5024093e-08 0.0000000e+00]
Episode * 3120 * exploration epsilon 0.1 * Episodic Reward is ==> -15.90434455871582
action means and variances at step -10: [ 1.9568527 -2.       ] [4.8589435e-08 0.0000000e+00]
action means and variances at step -5: [ 1.943329 -2.      ] [3.4904993e-08 0.0000000e+00]
Episode * 3121 * exploration epsilon 0.1 * Episodic Reward is ==> -15.470861434936523
action means and variances at step -10: [ 1.9477553 -2.       ] [3.8192304e-08 0.0000000e+00]
action m

Episode * 3134 * exploration epsilon 0.1 * Episodic Reward is ==> -15.667388916015625
action means and variances at step -10: [ 1.936475 -2.      ] [3.9507704e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9358823 -2.       ] [3.8327496e-08 0.0000000e+00]
Episode * 3135 * exploration epsilon 0.1 * Episodic Reward is ==> -15.268796920776367
action means and variances at step -10: [ 1.9244084 -2.       ] [3.6345007e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9216797 -2.       ] [2.7567673e-08 0.0000000e+00]
Episode * 3136 * exploration epsilon 0.1 * Episodic Reward is ==> -15.367338180541992
action means and variances at step -10: [ 1.9209663 -2.       ] [6.736481e-08 0.000000e+00]
action means and variances at step -5: [ 1.9305226 -2.       ] [3.6967833e-08 0.0000000e+00]
Episode * 3137 * exploration epsilon 0.1 * Episodic Reward is ==> -15.35312557220459
action means and variances at step -10: [ 1.9791 -2.    ] [4.8201233e-08 0.0000000e+00]
action means

Episode * 3150 * exploration epsilon 0.1 * Episodic Reward is ==> -15.569147109985352
action means and variances at step -10: [ 1.9405323 -2.       ] [5.1787328e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9553876 -2.       ] [3.9777703e-08 0.0000000e+00]
Episode * 3151 * exploration epsilon 0.1 * Episodic Reward is ==> -15.438753128051758
action means and variances at step -10: [ 1.9549882 -2.       ] [5.4241944e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9309981 -2.       ] [2.6902198e-08 0.0000000e+00]
Episode * 3152 * exploration epsilon 0.1 * Episodic Reward is ==> -15.65397834777832
action means and variances at step -10: [ 1.9380549 -2.       ] [2.320435e-08 0.000000e+00]
action means and variances at step -5: [ 1.9340975 -2.       ] [2.5336277e-08 0.0000000e+00]
Episode * 3153 * exploration epsilon 0.1 * Episodic Reward is ==> -15.75672435760498
action means and variances at step -10: [ 1.9493852 -2.       ] [3.555764e-08 0.000000e+00]
action 

Episode * 3166 * exploration epsilon 0.1 * Episodic Reward is ==> -15.520405769348145
action means and variances at step -10: [ 1.9308187 -2.       ] [5.028655e-08 0.000000e+00]
action means and variances at step -5: [ 1.9301348 -2.       ] [4.4139426e-08 0.0000000e+00]
Episode * 3167 * exploration epsilon 0.1 * Episodic Reward is ==> -15.565340042114258
action means and variances at step -10: [ 1.959932 -2.      ] [7.956093e-08 0.000000e+00]
action means and variances at step -5: [ 1.9638667 -2.       ] [5.5596324e-08 0.0000000e+00]
Episode * 3168 * exploration epsilon 0.1 * Episodic Reward is ==> -15.543758392333984
action means and variances at step -10: [ 1.9293903 -2.       ] [6.422567e-08 0.000000e+00]
action means and variances at step -5: [ 1.9167778 -2.       ] [6.1224874e-08 0.0000000e+00]
Episode * 3169 * exploration epsilon 0.1 * Episodic Reward is ==> -15.520313262939453
action means and variances at step -10: [ 1.9793578 -2.       ] [8.0617525e-08 0.0000000e+00]
action me

Episode * 3182 * exploration epsilon 0.1 * Episodic Reward is ==> -15.515270233154297
action means and variances at step -10: [ 1.9362336 -2.       ] [4.2539597e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9401779 -2.       ] [4.5680157e-08 0.0000000e+00]
Episode * 3183 * exploration epsilon 0.1 * Episodic Reward is ==> -15.1805419921875
action means and variances at step -10: [ 1.9583327 -2.       ] [5.289839e-08 0.000000e+00]
action means and variances at step -5: [ 1.9642196 -2.       ] [4.0007798e-08 0.0000000e+00]
Episode * 3184 * exploration epsilon 0.1 * Episodic Reward is ==> -15.17794418334961
action means and variances at step -10: [ 1.9315015 -2.       ] [6.9565786e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9454414 -2.       ] [5.8818458e-08 0.0000000e+00]
Episode * 3185 * exploration epsilon 0.1 * Episodic Reward is ==> -15.972599983215332
action means and variances at step -10: [ 1.934521 -2.      ] [6.79637e-08 0.00000e+00]
action means

Episode * 3198 * exploration epsilon 0.1 * Episodic Reward is ==> -15.860675811767578
action means and variances at step -10: [ 1.9385961 -2.       ] [3.9631193e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9555312 -2.       ] [5.201782e-08 0.000000e+00]
Episode * 3199 * exploration epsilon 0.1 * Episodic Reward is ==> -15.501648902893066
action means and variances at step -10: [ 1.94116 -2.     ] [3.7571866e-08 0.0000000e+00]
action means and variances at step -5: [ 1.925948 -2.      ] [2.7343173e-08 0.0000000e+00]
Episode * 3200 * exploration epsilon 0.1 * Episodic Reward is ==> -15.064023971557617
action means and variances at step -10: [ 1.9224564 -2.       ] [3.0972792e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9341943 -2.       ] [2.7939802e-08 0.0000000e+00]
Episode * 3201 * exploration epsilon 0.1 * Episodic Reward is ==> -15.258373260498047
action means and variances at step -10: [ 1.9332296 -2.       ] [3.7913154e-08 0.0000000e+00]
action me

Episode * 3214 * exploration epsilon 0.1 * Episodic Reward is ==> -15.66479778289795
action means and variances at step -10: [ 1.971594 -2.      ] [2.7429913e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9344795 -2.       ] [2.4078593e-08 0.0000000e+00]
Episode * 3215 * exploration epsilon 0.1 * Episodic Reward is ==> -15.577840805053711
action means and variances at step -10: [ 1.9233704 -2.       ] [3.035243e-08 0.000000e+00]
action means and variances at step -5: [ 1.9320569 -2.       ] [1.9761005e-08 0.0000000e+00]
Episode * 3216 * exploration epsilon 0.1 * Episodic Reward is ==> -15.280431747436523
action means and variances at step -10: [ 1.9566426 -2.       ] [3.641737e-08 0.000000e+00]
action means and variances at step -5: [ 1.947249 -2.      ] [1.8630532e-08 0.0000000e+00]
Episode * 3217 * exploration epsilon 0.1 * Episodic Reward is ==> -15.413576126098633
action means and variances at step -10: [ 1.9522747 -2.       ] [5.3991798e-08 0.0000000e+00]
action mea

Episode * 3230 * exploration epsilon 0.1 * Episodic Reward is ==> -15.685449600219727
action means and variances at step -10: [ 1.9329541 -2.       ] [3.7474866e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9484408 -2.       ] [3.3517704e-08 0.0000000e+00]
Episode * 3231 * exploration epsilon 0.1 * Episodic Reward is ==> -15.439628601074219
action means and variances at step -10: [ 1.9204499 -2.       ] [2.6127633e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9034897 -2.       ] [1.7235259e-08 0.0000000e+00]
Episode * 3232 * exploration epsilon 0.1 * Episodic Reward is ==> -15.405977249145508
action means and variances at step -10: [ 1.9438033 -2.       ] [2.9564116e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9380318 -2.       ] [2.8844156e-08 0.0000000e+00]
Episode * 3233 * exploration epsilon 0.1 * Episodic Reward is ==> -15.030704498291016
action means and variances at step -10: [ 1.9343872 -2.       ] [2.8120404e-08 0.0000000e+00]
a

Episode * 3246 * exploration epsilon 0.1 * Episodic Reward is ==> -15.377313613891602
action means and variances at step -10: [ 1.952687 -2.      ] [4.973961e-08 0.000000e+00]
action means and variances at step -5: [ 1.9661189 -2.       ] [2.4314964e-08 0.0000000e+00]
Episode * 3247 * exploration epsilon 0.1 * Episodic Reward is ==> -15.036909103393555
action means and variances at step -10: [ 1.9511825 -2.       ] [4.7619686e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9534897 -2.       ] [3.17094e-08 0.00000e+00]
Episode * 3248 * exploration epsilon 0.1 * Episodic Reward is ==> -15.184947967529297
action means and variances at step -10: [ 1.9464638 -2.       ] [3.4993377e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9395789 -2.       ] [2.7407696e-08 0.0000000e+00]
Episode * 3249 * exploration epsilon 0.1 * Episodic Reward is ==> -15.289087295532227
action means and variances at step -10: [ 1.9600545 -2.       ] [2.8756759e-08 0.0000000e+00]
action me

Episode * 3262 * exploration epsilon 0.1 * Episodic Reward is ==> -15.387762069702148
action means and variances at step -10: [ 1.9671715 -2.       ] [3.2180164e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9762363 -2.       ] [2.6349115e-08 0.0000000e+00]
Episode * 3263 * exploration epsilon 0.1 * Episodic Reward is ==> -15.248184204101562
action means and variances at step -10: [ 1.9679853 -2.       ] [2.4886797e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9527774 -2.       ] [2.1481288e-08 0.0000000e+00]
Episode * 3264 * exploration epsilon 0.1 * Episodic Reward is ==> -15.353662490844727
action means and variances at step -10: [ 1.9084074 -2.       ] [2.3990228e-08 0.0000000e+00]
action means and variances at step -5: [ 1.927649 -2.      ] [2.3443368e-08 0.0000000e+00]
Episode * 3265 * exploration epsilon 0.1 * Episodic Reward is ==> -15.427045822143555
action means and variances at step -10: [ 1.9451371 -2.       ] [3.1173503e-08 0.0000000e+00]
act

Episode * 3278 * exploration epsilon 0.1 * Episodic Reward is ==> -15.460000991821289
action means and variances at step -10: [ 1.939512 -2.      ] [2.9244658e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9463961 -2.       ] [2.3295284e-08 0.0000000e+00]
Episode * 3279 * exploration epsilon 0.1 * Episodic Reward is ==> -15.464005470275879
action means and variances at step -10: [ 1.9649154 -2.       ] [3.557752e-08 0.000000e+00]
action means and variances at step -5: [ 1.9798692 -2.       ] [2.5347234e-08 0.0000000e+00]
Episode * 3280 * exploration epsilon 0.1 * Episodic Reward is ==> -15.378268241882324
action means and variances at step -10: [ 1.9766979 -2.       ] [2.4297801e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9585643 -2.       ] [1.5302069e-08 0.0000000e+00]
Episode * 3281 * exploration epsilon 0.1 * Episodic Reward is ==> -15.614078521728516
action means and variances at step -10: [ 1.9388349 -2.       ] [1.5357156e-08 0.0000000e+00]
actio

Episode * 3294 * exploration epsilon 0.1 * Episodic Reward is ==> -14.925514221191406
action means and variances at step -10: [ 1.9588213 -2.       ] [3.4060495e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9646041 -2.       ] [3.355745e-08 0.000000e+00]
Episode * 3295 * exploration epsilon 0.1 * Episodic Reward is ==> -15.672286033630371
action means and variances at step -10: [ 1.9308373 -2.       ] [4.9495615e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9180135 -2.       ] [4.0240653e-08 0.0000000e+00]
Episode * 3296 * exploration epsilon 0.1 * Episodic Reward is ==> -15.387739181518555
action means and variances at step -10: [ 1.952688 -2.      ] [2.687581e-08 0.000000e+00]
action means and variances at step -5: [ 1.9539902 -2.       ] [2.198112e-08 0.000000e+00]
Episode * 3297 * exploration epsilon 0.1 * Episodic Reward is ==> -15.556705474853516
action means and variances at step -10: [ 1.9627477 -2.       ] [1.9888546e-08 0.0000000e+00]
action me

Episode * 3310 * exploration epsilon 0.1 * Episodic Reward is ==> -15.30356216430664
action means and variances at step -10: [ 1.9383662 -2.       ] [1.3790826e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9464546 -2.       ] [1.5873807e-08 0.0000000e+00]
Episode * 3311 * exploration epsilon 0.1 * Episodic Reward is ==> -15.336688041687012
action means and variances at step -10: [ 1.9439554 -2.       ] [2.967262e-08 0.000000e+00]
action means and variances at step -5: [ 1.9340266 -2.       ] [1.6599653e-08 0.0000000e+00]
Episode * 3312 * exploration epsilon 0.1 * Episodic Reward is ==> -15.352149963378906
action means and variances at step -10: [ 1.9682746 -2.       ] [2.7495302e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9727689 -2.       ] [2.1974785e-08 0.0000000e+00]
Episode * 3313 * exploration epsilon 0.1 * Episodic Reward is ==> -15.371480941772461
action means and variances at step -10: [ 1.9723825 -2.       ] [4.3757836e-08 0.0000000e+00]
acti

Episode * 3326 * exploration epsilon 0.1 * Episodic Reward is ==> -15.340720176696777
action means and variances at step -10: [ 1.9889799 -2.       ] [1.12622814e-07 0.00000000e+00]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3327 * exploration epsilon 0.1 * Episodic Reward is ==> -15.476195335388184
action means and variances at step -10: [ 1.942987 -2.      ] [6.360643e-08 0.000000e+00]
action means and variances at step -5: [ 1.9550908 -2.       ] [5.5389883e-08 0.0000000e+00]
Episode * 3328 * exploration epsilon 0.1 * Episodic Reward is ==> -15.667572975158691
action means and variances at step -10: [ 1.9673222 -2.       ] [7.038276e-08 0.000000e+00]
action means and variances at step -5: [ 1.9464691 -2.       ] [4.5407965e-08 0.0000000e+00]
Episode * 3329 * exploration epsilon 0.1 * Episodic Reward is ==> -15.519174575805664
action means and variances at step -10: [ 1.9756429 -2.       ] [8.2492605e-08 0.0000000e+00]
action means and variances at step -5: [ 

Episode * 3342 * exploration epsilon 0.1 * Episodic Reward is ==> -15.160385131835938
action means and variances at step -10: [ 1.9397602 -2.       ] [3.5800078e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9367938 -2.       ] [2.7330959e-08 0.0000000e+00]
Episode * 3343 * exploration epsilon 0.1 * Episodic Reward is ==> -15.622286796569824
action means and variances at step -10: [ 1.9587371 -2.       ] [2.6147044e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9524547 -2.       ] [2.211234e-08 0.000000e+00]
Episode * 3344 * exploration epsilon 0.1 * Episodic Reward is ==> -16.014284133911133
action means and variances at step -10: [ 1.9541656 -2.       ] [2.7088289e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9502951 -2.       ] [2.1978014e-08 0.0000000e+00]
Episode * 3345 * exploration epsilon 0.1 * Episodic Reward is ==> -15.420515060424805
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: 

Episode * 3358 * exploration epsilon 0.1 * Episodic Reward is ==> -15.631338119506836
action means and variances at step -10: [ 1.9543353 -2.       ] [3.5391565e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9711447 -2.       ] [3.697776e-08 0.000000e+00]
Episode * 3359 * exploration epsilon 0.1 * Episodic Reward is ==> -15.361083984375
action means and variances at step -10: [ 1.9623842 -2.       ] [5.008504e-08 0.000000e+00]
action means and variances at step -5: [ 1.9571112 -2.       ] [4.8399848e-08 0.0000000e+00]
Episode * 3360 * exploration epsilon 0.1 * Episodic Reward is ==> -15.061029434204102
action means and variances at step -10: [ 1.9793968 -2.       ] [3.0704904e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9636009 -2.       ] [3.856666e-08 0.000000e+00]
Episode * 3361 * exploration epsilon 0.1 * Episodic Reward is ==> -15.533300399780273
action means and variances at step -10: [ 1.9377947 -2.       ] [2.8566863e-08 0.0000000e+00]
action mea

Episode * 3374 * exploration epsilon 0.1 * Episodic Reward is ==> -15.430364608764648
action means and variances at step -10: [ 1.9221725 -2.       ] [2.1355195e-08 0.0000000e+00]
action means and variances at step -5: [ 1.934118 -2.      ] [2.2070537e-08 0.0000000e+00]
Episode * 3375 * exploration epsilon 0.1 * Episodic Reward is ==> -15.416792869567871
action means and variances at step -10: [ 1.9538764 -2.       ] [4.318436e-08 0.000000e+00]
action means and variances at step -5: [ 1.9596167 -2.       ] [2.6387491e-08 0.0000000e+00]
Episode * 3376 * exploration epsilon 0.1 * Episodic Reward is ==> -15.35980224609375
action means and variances at step -10: [ 1.9545629 -2.       ] [3.660849e-08 0.000000e+00]
action means and variances at step -5: [ 1.9717175 -2.       ] [3.6246423e-08 0.0000000e+00]
Episode * 3377 * exploration epsilon 0.1 * Episodic Reward is ==> -15.45564079284668
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 1.99

Episode * 3390 * exploration epsilon 0.1 * Episodic Reward is ==> -15.221242904663086
action means and variances at step -10: [ 1.9843738 -2.       ] [1.5333695e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9760953 -2.       ] [1.8276854e-08 0.0000000e+00]
Episode * 3391 * exploration epsilon 0.1 * Episodic Reward is ==> -15.228418350219727
action means and variances at step -10: [ 1.9556744 -2.       ] [1.2466403e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9681374 -2.       ] [1.0682141e-08 0.0000000e+00]
Episode * 3392 * exploration epsilon 0.1 * Episodic Reward is ==> -15.25748062133789
action means and variances at step -10: [ 1.9628074 -2.       ] [8.356853e-09 0.000000e+00]
action means and variances at step -5: [ 1.9574795 -2.       ] [1.1298567e-08 0.0000000e+00]
Episode * 3393 * exploration epsilon 0.1 * Episodic Reward is ==> -15.17583179473877
action means and variances at step -10: [ 1.9680438 -2.       ] [1.7878016e-08 0.0000000e+00]
actio

Episode * 3406 * exploration epsilon 0.1 * Episodic Reward is ==> -15.7359037399292
action means and variances at step -10: [ 1.9588498 -2.       ] [1.8934136e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9580737 -2.       ] [2.3903626e-08 0.0000000e+00]
Episode * 3407 * exploration epsilon 0.1 * Episodic Reward is ==> -15.132291793823242
action means and variances at step -10: [ 1.9514297 -2.       ] [1.3975745e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9318421 -2.       ] [8.982331e-09 0.000000e+00]
Episode * 3408 * exploration epsilon 0.1 * Episodic Reward is ==> -15.711905479431152
action means and variances at step -10: [ 1.9498818 -2.       ] [1.1937807e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9412153 -2.       ] [8.313197e-09 0.000000e+00]
Episode * 3409 * exploration epsilon 0.1 * Episodic Reward is ==> -15.343231201171875
action means and variances at step -10: [ 1.9587433 -2.       ] [1.1936999e-08 0.0000000e+00]
action 

Episode * 3422 * exploration epsilon 0.1 * Episodic Reward is ==> -15.605167388916016
action means and variances at step -10: [ 1.9767817 -2.       ] [1.9735015e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9772121 -2.       ] [1.8720558e-08 0.0000000e+00]
Episode * 3423 * exploration epsilon 0.1 * Episodic Reward is ==> -15.652193069458008
action means and variances at step -10: [ 1.972498 -2.      ] [6.7853514e-09 0.0000000e+00]
action means and variances at step -5: [ 1.9870442 -2.       ] [9.302637e-09 0.000000e+00]
Episode * 3424 * exploration epsilon 0.1 * Episodic Reward is ==> -15.452554702758789
action means and variances at step -10: [ 1.9702545 -2.       ] [2.0376532e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9709785 -2.       ] [1.9677385e-08 0.0000000e+00]
Episode * 3425 * exploration epsilon 0.1 * Episodic Reward is ==> -15.216886520385742
action means and variances at step -10: [ 1.958452 -2.      ] [1.9788017e-08 0.0000000e+00]
action 

Episode * 3438 * exploration epsilon 0.1 * Episodic Reward is ==> -15.29745101928711
action means and variances at step -10: [ 1.9585259 -2.       ] [1.1228549e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9749591 -2.       ] [1.236727e-08 0.000000e+00]
Episode * 3439 * exploration epsilon 0.1 * Episodic Reward is ==> -15.190951347351074
action means and variances at step -10: [ 1.9628172 -2.       ] [2.6909095e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9706447 -2.       ] [2.1608642e-08 0.0000000e+00]
Episode * 3440 * exploration epsilon 0.1 * Episodic Reward is ==> -15.606782913208008
action means and variances at step -10: [ 1.918065 -2.      ] [1.8457275e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9364575 -2.       ] [1.3823891e-08 0.0000000e+00]
Episode * 3441 * exploration epsilon 0.1 * Episodic Reward is ==> -15.430495262145996
action means and variances at step -10: [ 1.9283354 -2.       ] [2.2594273e-08 0.0000000e+00]
action

Episode * 3454 * exploration epsilon 0.1 * Episodic Reward is ==> -15.599286079406738
action means and variances at step -10: [ 1.9769174 -2.       ] [2.6863063e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9895525 -2.       ] [1.5478589e-08 0.0000000e+00]
Episode * 3455 * exploration epsilon 0.1 * Episodic Reward is ==> -15.310815811157227
action means and variances at step -10: [ 1.959865 -2.      ] [2.5753739e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9771607 -2.       ] [1.7426943e-08 0.0000000e+00]
Episode * 3456 * exploration epsilon 0.1 * Episodic Reward is ==> -15.661495208740234
action means and variances at step -10: [ 1.9815071 -2.       ] [1.6334079e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9935873 -2.       ] [1.1131741e-08 0.0000000e+00]
Episode * 3457 * exploration epsilon 0.1 * Episodic Reward is ==> -15.80207347869873
action means and variances at step -10: [ 1.9702725 -2.       ] [2.2113712e-08 0.0000000e+00]
acti

Episode * 3470 * exploration epsilon 0.1 * Episodic Reward is ==> -15.371694564819336
action means and variances at step -10: [ 1.9533198 -2.       ] [2.104355e-08 0.000000e+00]
action means and variances at step -5: [ 1.9701682 -2.       ] [9.466682e-09 0.000000e+00]
Episode * 3471 * exploration epsilon 0.1 * Episodic Reward is ==> -15.427236557006836
action means and variances at step -10: [ 1.9602475 -2.       ] [2.774603e-08 0.000000e+00]
action means and variances at step -5: [ 1.9391034 -2.       ] [1.0240685e-08 0.0000000e+00]
Episode * 3472 * exploration epsilon 0.1 * Episodic Reward is ==> -15.774410247802734
action means and variances at step -10: [ 1.9946023 -2.       ] [3.664995e-08 0.000000e+00]
action means and variances at step -5: [ 1.9925004 -2.       ] [1.5987005e-08 0.0000000e+00]
Episode * 3473 * exploration epsilon 0.1 * Episodic Reward is ==> -15.483613014221191
action means and variances at step -10: [ 1.9553019 -2.       ] [1.15291865e-08 0.00000000e+00]
action 

Episode * 3486 * exploration epsilon 0.1 * Episodic Reward is ==> -15.241194725036621
action means and variances at step -10: [ 1.967236 -2.      ] [2.141271e-08 0.000000e+00]
action means and variances at step -5: [ 1.9680003 -2.       ] [1.635063e-08 0.000000e+00]
Episode * 3487 * exploration epsilon 0.1 * Episodic Reward is ==> -15.410417556762695
action means and variances at step -10: [ 1.9801959 -2.       ] [3.2566007e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9913267 -2.       ] [1.8151244e-08 0.0000000e+00]
Episode * 3488 * exploration epsilon 0.1 * Episodic Reward is ==> -15.613140106201172
action means and variances at step -10: [ 1.9762855 -2.       ] [1.666062e-08 0.000000e+00]
action means and variances at step -5: [ 1.9764348 -2.       ] [7.766608e-09 0.000000e+00]
Episode * 3489 * exploration epsilon 0.1 * Episodic Reward is ==> -15.286369323730469
action means and variances at step -10: [ 1.9926615 -2.       ] [4.0738286e-08 0.0000000e+00]
action mean

Episode * 3502 * exploration epsilon 0.1 * Episodic Reward is ==> -15.629002571105957
action means and variances at step -10: [ 1.9972085 -2.       ] [3.6447304e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9944625 -2.       ] [2.3536723e-08 0.0000000e+00]
Episode * 3503 * exploration epsilon 0.1 * Episodic Reward is ==> -15.264619827270508
action means and variances at step -10: [ 1.9814765 -2.       ] [3.8541806e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9849657 -2.       ] [1.6951725e-08 0.0000000e+00]
Episode * 3504 * exploration epsilon 0.1 * Episodic Reward is ==> -15.638683319091797
action means and variances at step -10: [ 1.9984468 -2.       ] [3.5971585e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9807727 -2.       ] [1.930914e-08 0.000000e+00]
Episode * 3505 * exploration epsilon 0.1 * Episodic Reward is ==> -15.575111389160156
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: 

Episode * 3519 * exploration epsilon 0.1 * Episodic Reward is ==> -15.365253448486328
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3520 * exploration epsilon 0.1 * Episodic Reward is ==> -15.358699798583984
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3521 * exploration epsilon 0.1 * Episodic Reward is ==> -15.437180519104004
action means and variances at step -10: [ 1.9996674 -2.       ] [2.6863841e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9979817 -2.       ] [2.1090342e-08 0.0000000e+00]
Episode * 3522 * exploration epsilon 0.1 * Episodic Reward is ==> -15.055593490600586
action means and variances at step -10: [ 1.9683878 -2.       ] [1.8007997e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9681765 -2.       ] [7.2228308e-09 0.0000000e+00]
Episode * 3523 * exploration epsilon 0.1 * Episodic 

Episode * 3536 * exploration epsilon 0.1 * Episodic Reward is ==> -15.59619140625
action means and variances at step -10: [ 1.9976305 -2.       ] [2.7205731e-08 0.0000000e+00]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3537 * exploration epsilon 0.1 * Episodic Reward is ==> -15.74786376953125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3538 * exploration epsilon 0.1 * Episodic Reward is ==> -15.493947982788086
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 1.9941237 -2.       ] [2.1010836e-08 0.0000000e+00]
Episode * 3539 * exploration epsilon 0.1 * Episodic Reward is ==> -15.364934921264648
action means and variances at step -10: [ 1.9797565 -2.       ] [2.5611776e-08 0.0000000e+00]
action means and variances at step -5: [ 1.9863275 -2.       ] [1.6470874e-08 0.0000000e+00]
Episode * 3540 * exploration epsilon 0.1 * Episodic Rewar

Episode * 3553 * exploration epsilon 0.1 * Episodic Reward is ==> -15.823507308959961
action means and variances at step -10: [ 1.9953638 -2.       ] [3.457074e-08 0.000000e+00]
action means and variances at step -5: [ 1.9862773 -2.       ] [1.813445e-08 0.000000e+00]
Episode * 3554 * exploration epsilon 0.1 * Episodic Reward is ==> -15.251184463500977
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3555 * exploration epsilon 0.1 * Episodic Reward is ==> -15.527013778686523
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 1.9973621 -2.       ] [2.3281245e-08 0.0000000e+00]
Episode * 3556 * exploration epsilon 0.1 * Episodic Reward is ==> -15.918807983398438
action means and variances at step -10: [ 1.9917424 -2.       ] [6.215187e-09 0.000000e+00]
action means and variances at step -5: [ 1.9855177 -2.       ] [2.7022224e-09 0.0000000e+00]
Episode * 3557 * explo

Episode * 3570 * exploration epsilon 0.1 * Episodic Reward is ==> -15.783346176147461
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3571 * exploration epsilon 0.1 * Episodic Reward is ==> -15.623231887817383
action means and variances at step -10: [ 1.9986205 -2.       ] [5.643675e-08 0.000000e+00]
action means and variances at step -5: [ 1.9713899 -2.       ] [3.7220982e-08 0.0000000e+00]
Episode * 3572 * exploration epsilon 0.1 * Episodic Reward is ==> -15.357049942016602
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3573 * exploration epsilon 0.1 * Episodic Reward is ==> -15.63626480102539
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3574 * exploration epsilon 0.1 * Episodic Reward is ==> -15.281749725341797
action means and variances at step -10: [

Episode * 3588 * exploration epsilon 0.1 * Episodic Reward is ==> -15.504201889038086
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3589 * exploration epsilon 0.1 * Episodic Reward is ==> -15.768966674804688
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3590 * exploration epsilon 0.1 * Episodic Reward is ==> -15.706182479858398
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3591 * exploration epsilon 0.1 * Episodic Reward is ==> -15.577583312988281
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3592 * exploration epsilon 0.1 * Episodic Reward is ==> -15.896085739135742
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 1.9825526 -

Episode * 3606 * exploration epsilon 0.1 * Episodic Reward is ==> -15.616289138793945
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3607 * exploration epsilon 0.1 * Episodic Reward is ==> -15.85607624053955
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3608 * exploration epsilon 0.1 * Episodic Reward is ==> -15.231266021728516
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3609 * exploration epsilon 0.1 * Episodic Reward is ==> -15.457588195800781
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3610 * exploration epsilon 0.1 * Episodic Reward is ==> -15.828900337219238
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3624 * exploration epsilon 0.1 * Episodic Reward is ==> -15.101943016052246
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3625 * exploration epsilon 0.1 * Episodic Reward is ==> -15.858293533325195
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3626 * exploration epsilon 0.1 * Episodic Reward is ==> -15.738971710205078
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3627 * exploration epsilon 0.1 * Episodic Reward is ==> -15.286521911621094
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3628 * exploration epsilon 0.1 * Episodic Reward is ==> -15.393946647644043
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3642 * exploration epsilon 0.1 * Episodic Reward is ==> -15.02275562286377
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3643 * exploration epsilon 0.1 * Episodic Reward is ==> -15.370773315429688
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3644 * exploration epsilon 0.1 * Episodic Reward is ==> -15.490989685058594
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3645 * exploration epsilon 0.1 * Episodic Reward is ==> -15.387617111206055
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3646 * exploration epsilon 0.1 * Episodic Reward is ==> -15.401537895202637
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3660 * exploration epsilon 0.1 * Episodic Reward is ==> -15.154212951660156
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3661 * exploration epsilon 0.1 * Episodic Reward is ==> -15.899067878723145
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3662 * exploration epsilon 0.1 * Episodic Reward is ==> -15.70302677154541
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3663 * exploration epsilon 0.1 * Episodic Reward is ==> -15.411423683166504
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3664 * exploration epsilon 0.1 * Episodic Reward is ==> -15.484018325805664
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3678 * exploration epsilon 0.1 * Episodic Reward is ==> -15.479687690734863
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3679 * exploration epsilon 0.1 * Episodic Reward is ==> -15.421615600585938
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3680 * exploration epsilon 0.1 * Episodic Reward is ==> -15.873454093933105
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3681 * exploration epsilon 0.1 * Episodic Reward is ==> -15.523097038269043
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3682 * exploration epsilon 0.1 * Episodic Reward is ==> -15.78911018371582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3696 * exploration epsilon 0.1 * Episodic Reward is ==> -15.600784301757812
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3697 * exploration epsilon 0.1 * Episodic Reward is ==> -15.383377075195312
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3698 * exploration epsilon 0.1 * Episodic Reward is ==> -15.717756271362305
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3699 * exploration epsilon 0.1 * Episodic Reward is ==> -15.667482376098633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3700 * exploration epsilon 0.1 * Episodic Reward is ==> -15.706840515136719
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3714 * exploration epsilon 0.1 * Episodic Reward is ==> -15.456789016723633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3715 * exploration epsilon 0.1 * Episodic Reward is ==> -15.271133422851562
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3716 * exploration epsilon 0.1 * Episodic Reward is ==> -15.860686302185059
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3717 * exploration epsilon 0.1 * Episodic Reward is ==> -15.610135078430176
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3718 * exploration epsilon 0.1 * Episodic Reward is ==> -15.912388801574707
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3732 * exploration epsilon 0.1 * Episodic Reward is ==> -15.24416732788086
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3733 * exploration epsilon 0.1 * Episodic Reward is ==> -15.588849067687988
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3734 * exploration epsilon 0.1 * Episodic Reward is ==> -15.913808822631836
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3735 * exploration epsilon 0.1 * Episodic Reward is ==> -15.670576095581055
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3736 * exploration epsilon 0.1 * Episodic Reward is ==> -15.179193496704102
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3750 * exploration epsilon 0.1 * Episodic Reward is ==> -15.287541389465332
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3751 * exploration epsilon 0.1 * Episodic Reward is ==> -15.779020309448242
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3752 * exploration epsilon 0.1 * Episodic Reward is ==> -15.665793418884277
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3753 * exploration epsilon 0.1 * Episodic Reward is ==> -15.265958786010742
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3754 * exploration epsilon 0.1 * Episodic Reward is ==> -15.333617210388184
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 3768 * exploration epsilon 0.1 * Episodic Reward is ==> -15.444281578063965
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3769 * exploration epsilon 0.1 * Episodic Reward is ==> -15.25976276397705
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3770 * exploration epsilon 0.1 * Episodic Reward is ==> -15.409162521362305
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3771 * exploration epsilon 0.1 * Episodic Reward is ==> -15.82373046875
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3772 * exploration epsilon 0.1 * Episodic Reward is ==> -15.065868377685547
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]


Episode * 3786 * exploration epsilon 0.1 * Episodic Reward is ==> -15.88731575012207
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3787 * exploration epsilon 0.1 * Episodic Reward is ==> -15.520788192749023
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3788 * exploration epsilon 0.1 * Episodic Reward is ==> -15.825841903686523
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3789 * exploration epsilon 0.1 * Episodic Reward is ==> -15.440335273742676
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3790 * exploration epsilon 0.1 * Episodic Reward is ==> -15.280414581298828
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3804 * exploration epsilon 0.1 * Episodic Reward is ==> -15.78407096862793
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3805 * exploration epsilon 0.1 * Episodic Reward is ==> -15.686735153198242
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3806 * exploration epsilon 0.1 * Episodic Reward is ==> -15.558746337890625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3807 * exploration epsilon 0.1 * Episodic Reward is ==> -15.335981369018555
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3808 * exploration epsilon 0.1 * Episodic Reward is ==> -15.578065872192383
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3822 * exploration epsilon 0.1 * Episodic Reward is ==> -15.69558334350586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3823 * exploration epsilon 0.1 * Episodic Reward is ==> -15.544272422790527
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3824 * exploration epsilon 0.1 * Episodic Reward is ==> -15.821700096130371
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3825 * exploration epsilon 0.1 * Episodic Reward is ==> -15.470550537109375
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3826 * exploration epsilon 0.1 * Episodic Reward is ==> -15.198670387268066
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3840 * exploration epsilon 0.1 * Episodic Reward is ==> -15.163127899169922
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3841 * exploration epsilon 0.1 * Episodic Reward is ==> -15.33443832397461
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3842 * exploration epsilon 0.1 * Episodic Reward is ==> -15.936803817749023
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3843 * exploration epsilon 0.1 * Episodic Reward is ==> -15.60489559173584
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3844 * exploration epsilon 0.1 * Episodic Reward is ==> -15.361542701721191
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 3858 * exploration epsilon 0.1 * Episodic Reward is ==> -15.209030151367188
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3859 * exploration epsilon 0.1 * Episodic Reward is ==> -15.283866882324219
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3860 * exploration epsilon 0.1 * Episodic Reward is ==> -15.122880935668945
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3861 * exploration epsilon 0.1 * Episodic Reward is ==> -15.50365161895752
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3862 * exploration epsilon 0.1 * Episodic Reward is ==> -15.605999946594238
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3876 * exploration epsilon 0.1 * Episodic Reward is ==> -15.466658592224121
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3877 * exploration epsilon 0.1 * Episodic Reward is ==> -15.59849739074707
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3878 * exploration epsilon 0.1 * Episodic Reward is ==> -15.659698486328125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3879 * exploration epsilon 0.1 * Episodic Reward is ==> -15.401519775390625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3880 * exploration epsilon 0.1 * Episodic Reward is ==> -16.02684783935547
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 3894 * exploration epsilon 0.1 * Episodic Reward is ==> -15.717220306396484
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3895 * exploration epsilon 0.1 * Episodic Reward is ==> -15.327157020568848
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3896 * exploration epsilon 0.1 * Episodic Reward is ==> -15.410161972045898
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3897 * exploration epsilon 0.1 * Episodic Reward is ==> -15.62966537475586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3898 * exploration epsilon 0.1 * Episodic Reward is ==> -15.501138687133789
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3912 * exploration epsilon 0.1 * Episodic Reward is ==> -15.424012184143066
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3913 * exploration epsilon 0.1 * Episodic Reward is ==> -15.166731834411621
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3914 * exploration epsilon 0.1 * Episodic Reward is ==> -15.287144660949707
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3915 * exploration epsilon 0.1 * Episodic Reward is ==> -15.44195556640625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3916 * exploration epsilon 0.1 * Episodic Reward is ==> -15.901670455932617
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3930 * exploration epsilon 0.1 * Episodic Reward is ==> -14.819595336914062
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3931 * exploration epsilon 0.1 * Episodic Reward is ==> -14.698801040649414
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3932 * exploration epsilon 0.1 * Episodic Reward is ==> -15.60746955871582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3933 * exploration epsilon 0.1 * Episodic Reward is ==> -15.779212951660156
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3934 * exploration epsilon 0.1 * Episodic Reward is ==> -15.191965103149414
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 3948 * exploration epsilon 0.1 * Episodic Reward is ==> -15.2424898147583
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3949 * exploration epsilon 0.1 * Episodic Reward is ==> -15.557604789733887
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3950 * exploration epsilon 0.1 * Episodic Reward is ==> -16.03651237487793
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3951 * exploration epsilon 0.1 * Episodic Reward is ==> -15.51218032836914
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3952 * exploration epsilon 0.1 * Episodic Reward is ==> -15.114540100097656
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]

Episode * 3966 * exploration epsilon 0.1 * Episodic Reward is ==> -15.692806243896484
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3967 * exploration epsilon 0.1 * Episodic Reward is ==> -15.345037460327148
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3968 * exploration epsilon 0.1 * Episodic Reward is ==> -15.52629280090332
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3969 * exploration epsilon 0.1 * Episodic Reward is ==> -15.725829124450684
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3970 * exploration epsilon 0.1 * Episodic Reward is ==> -15.30583381652832
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 3984 * exploration epsilon 0.1 * Episodic Reward is ==> -15.637073516845703
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3985 * exploration epsilon 0.1 * Episodic Reward is ==> -15.856776237487793
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3986 * exploration epsilon 0.1 * Episodic Reward is ==> -15.856761932373047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3987 * exploration epsilon 0.1 * Episodic Reward is ==> -15.674627304077148
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 3988 * exploration epsilon 0.1 * Episodic Reward is ==> -15.372032165527344
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4002 * exploration epsilon 0.1 * Episodic Reward is ==> -15.297260284423828
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4003 * exploration epsilon 0.1 * Episodic Reward is ==> -15.798822402954102
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4004 * exploration epsilon 0.1 * Episodic Reward is ==> -15.46127700805664
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4005 * exploration epsilon 0.1 * Episodic Reward is ==> -15.30876350402832
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4006 * exploration epsilon 0.1 * Episodic Reward is ==> -15.538064956665039
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4020 * exploration epsilon 0.1 * Episodic Reward is ==> -15.14929485321045
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4021 * exploration epsilon 0.1 * Episodic Reward is ==> -15.551807403564453
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4022 * exploration epsilon 0.1 * Episodic Reward is ==> -15.477361679077148
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4023 * exploration epsilon 0.1 * Episodic Reward is ==> -15.267024993896484
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4024 * exploration epsilon 0.1 * Episodic Reward is ==> -15.621052742004395
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4038 * exploration epsilon 0.1 * Episodic Reward is ==> -15.55774211883545
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4039 * exploration epsilon 0.1 * Episodic Reward is ==> -15.483989715576172
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4040 * exploration epsilon 0.1 * Episodic Reward is ==> -16.12060546875
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4041 * exploration epsilon 0.1 * Episodic Reward is ==> -15.471527099609375
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4042 * exploration epsilon 0.1 * Episodic Reward is ==> -16.032371520996094
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]


Episode * 4056 * exploration epsilon 0.1 * Episodic Reward is ==> -15.282184600830078
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4057 * exploration epsilon 0.1 * Episodic Reward is ==> -15.164724349975586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4058 * exploration epsilon 0.1 * Episodic Reward is ==> -15.36766529083252
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4059 * exploration epsilon 0.1 * Episodic Reward is ==> -15.163558959960938
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4060 * exploration epsilon 0.1 * Episodic Reward is ==> -15.376324653625488
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4074 * exploration epsilon 0.1 * Episodic Reward is ==> -15.273294448852539
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4075 * exploration epsilon 0.1 * Episodic Reward is ==> -15.28072738647461
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4076 * exploration epsilon 0.1 * Episodic Reward is ==> -15.57929801940918
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4077 * exploration epsilon 0.1 * Episodic Reward is ==> -15.826473236083984
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4078 * exploration epsilon 0.1 * Episodic Reward is ==> -15.332883834838867
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4092 * exploration epsilon 0.1 * Episodic Reward is ==> -15.0784912109375
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4093 * exploration epsilon 0.1 * Episodic Reward is ==> -15.828852653503418
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4094 * exploration epsilon 0.1 * Episodic Reward is ==> -15.43515682220459
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4095 * exploration epsilon 0.1 * Episodic Reward is ==> -15.649210929870605
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4096 * exploration epsilon 0.1 * Episodic Reward is ==> -15.378902435302734
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.

Episode * 4110 * exploration epsilon 0.1 * Episodic Reward is ==> -15.656936645507812
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4111 * exploration epsilon 0.1 * Episodic Reward is ==> -15.314619064331055
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4112 * exploration epsilon 0.1 * Episodic Reward is ==> -15.866964340209961
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4113 * exploration epsilon 0.1 * Episodic Reward is ==> -15.49268913269043
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4114 * exploration epsilon 0.1 * Episodic Reward is ==> -15.938008308410645
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4128 * exploration epsilon 0.1 * Episodic Reward is ==> -15.328865051269531
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4129 * exploration epsilon 0.1 * Episodic Reward is ==> -15.72350788116455
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4130 * exploration epsilon 0.1 * Episodic Reward is ==> -15.399588584899902
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4131 * exploration epsilon 0.1 * Episodic Reward is ==> -15.498273849487305
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4132 * exploration epsilon 0.1 * Episodic Reward is ==> -15.367117881774902
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4146 * exploration epsilon 0.1 * Episodic Reward is ==> -15.196134567260742
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4147 * exploration epsilon 0.1 * Episodic Reward is ==> -15.759647369384766
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4148 * exploration epsilon 0.1 * Episodic Reward is ==> -15.271509170532227
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4149 * exploration epsilon 0.1 * Episodic Reward is ==> -15.41191291809082
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4150 * exploration epsilon 0.1 * Episodic Reward is ==> -15.513860702514648
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4164 * exploration epsilon 0.1 * Episodic Reward is ==> -15.688114166259766
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4165 * exploration epsilon 0.1 * Episodic Reward is ==> -15.617474555969238
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4166 * exploration epsilon 0.1 * Episodic Reward is ==> -15.498037338256836
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4167 * exploration epsilon 0.1 * Episodic Reward is ==> -15.524730682373047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4168 * exploration epsilon 0.1 * Episodic Reward is ==> -15.201791763305664
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4182 * exploration epsilon 0.1 * Episodic Reward is ==> -15.422842979431152
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4183 * exploration epsilon 0.1 * Episodic Reward is ==> -15.213876724243164
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4184 * exploration epsilon 0.1 * Episodic Reward is ==> -15.609838485717773
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4185 * exploration epsilon 0.1 * Episodic Reward is ==> -15.780597686767578
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4186 * exploration epsilon 0.1 * Episodic Reward is ==> -16.218870162963867
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4200 * exploration epsilon 0.1 * Episodic Reward is ==> -15.525445938110352
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4201 * exploration epsilon 0.1 * Episodic Reward is ==> -14.95809268951416
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4202 * exploration epsilon 0.1 * Episodic Reward is ==> -15.386139869689941
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4203 * exploration epsilon 0.1 * Episodic Reward is ==> -15.482791900634766
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4204 * exploration epsilon 0.1 * Episodic Reward is ==> -15.703083038330078
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4218 * exploration epsilon 0.1 * Episodic Reward is ==> -15.6694917678833
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4219 * exploration epsilon 0.1 * Episodic Reward is ==> -15.289286613464355
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4220 * exploration epsilon 0.1 * Episodic Reward is ==> -15.877457618713379
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4221 * exploration epsilon 0.1 * Episodic Reward is ==> -15.465803146362305
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4222 * exploration epsilon 0.1 * Episodic Reward is ==> -15.60818099975586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.

Episode * 4236 * exploration epsilon 0.1 * Episodic Reward is ==> -15.211708068847656
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4237 * exploration epsilon 0.1 * Episodic Reward is ==> -15.402059555053711
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4238 * exploration epsilon 0.1 * Episodic Reward is ==> -15.47269058227539
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4239 * exploration epsilon 0.1 * Episodic Reward is ==> -15.540721893310547
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4240 * exploration epsilon 0.1 * Episodic Reward is ==> -15.888223648071289
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4254 * exploration epsilon 0.1 * Episodic Reward is ==> -15.398202896118164
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4255 * exploration epsilon 0.1 * Episodic Reward is ==> -15.527790069580078
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4256 * exploration epsilon 0.1 * Episodic Reward is ==> -15.768640518188477
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4257 * exploration epsilon 0.1 * Episodic Reward is ==> -15.319612503051758
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4258 * exploration epsilon 0.1 * Episodic Reward is ==> -15.561169624328613
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4272 * exploration epsilon 0.1 * Episodic Reward is ==> -15.063591957092285
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4273 * exploration epsilon 0.1 * Episodic Reward is ==> -15.493735313415527
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4274 * exploration epsilon 0.1 * Episodic Reward is ==> -15.633129119873047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4275 * exploration epsilon 0.1 * Episodic Reward is ==> -15.814559936523438
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4276 * exploration epsilon 0.1 * Episodic Reward is ==> -15.190460205078125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4290 * exploration epsilon 0.1 * Episodic Reward is ==> -15.504814147949219
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4291 * exploration epsilon 0.1 * Episodic Reward is ==> -15.411794662475586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4292 * exploration epsilon 0.1 * Episodic Reward is ==> -15.249253273010254
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4293 * exploration epsilon 0.1 * Episodic Reward is ==> -15.567007064819336
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4294 * exploration epsilon 0.1 * Episodic Reward is ==> -15.64715576171875
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4308 * exploration epsilon 0.1 * Episodic Reward is ==> -15.742666244506836
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4309 * exploration epsilon 0.1 * Episodic Reward is ==> -15.55108642578125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4310 * exploration epsilon 0.1 * Episodic Reward is ==> -15.130467414855957
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4311 * exploration epsilon 0.1 * Episodic Reward is ==> -15.698945999145508
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4312 * exploration epsilon 0.1 * Episodic Reward is ==> -15.2842435836792
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.

Episode * 4326 * exploration epsilon 0.1 * Episodic Reward is ==> -15.012246131896973
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4327 * exploration epsilon 0.1 * Episodic Reward is ==> -15.51007080078125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4328 * exploration epsilon 0.1 * Episodic Reward is ==> -15.607048034667969
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4329 * exploration epsilon 0.1 * Episodic Reward is ==> -15.846912384033203
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4330 * exploration epsilon 0.1 * Episodic Reward is ==> -15.851022720336914
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4344 * exploration epsilon 0.1 * Episodic Reward is ==> -15.36958122253418
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4345 * exploration epsilon 0.1 * Episodic Reward is ==> -15.66093635559082
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4346 * exploration epsilon 0.1 * Episodic Reward is ==> -15.491207122802734
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4347 * exploration epsilon 0.1 * Episodic Reward is ==> -15.506450653076172
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4348 * exploration epsilon 0.1 * Episodic Reward is ==> -15.260744094848633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4362 * exploration epsilon 0.1 * Episodic Reward is ==> -15.239397048950195
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4363 * exploration epsilon 0.1 * Episodic Reward is ==> -15.658985137939453
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4364 * exploration epsilon 0.1 * Episodic Reward is ==> -15.072423934936523
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4365 * exploration epsilon 0.1 * Episodic Reward is ==> -15.739095687866211
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4366 * exploration epsilon 0.1 * Episodic Reward is ==> -15.183003425598145
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4380 * exploration epsilon 0.1 * Episodic Reward is ==> -15.612567901611328
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4381 * exploration epsilon 0.1 * Episodic Reward is ==> -15.588994979858398
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4382 * exploration epsilon 0.1 * Episodic Reward is ==> -15.703034400939941
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4383 * exploration epsilon 0.1 * Episodic Reward is ==> -15.617583274841309
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4384 * exploration epsilon 0.1 * Episodic Reward is ==> -15.765317916870117
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4398 * exploration epsilon 0.1 * Episodic Reward is ==> -15.49911117553711
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4399 * exploration epsilon 0.1 * Episodic Reward is ==> -15.305093765258789
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4400 * exploration epsilon 0.1 * Episodic Reward is ==> -15.6705961227417
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4401 * exploration epsilon 0.1 * Episodic Reward is ==> -15.71782112121582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4402 * exploration epsilon 0.1 * Episodic Reward is ==> -14.908634185791016
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]

Episode * 4416 * exploration epsilon 0.1 * Episodic Reward is ==> -15.388742446899414
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4417 * exploration epsilon 0.1 * Episodic Reward is ==> -15.494359970092773
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4418 * exploration epsilon 0.1 * Episodic Reward is ==> -15.317864418029785
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4419 * exploration epsilon 0.1 * Episodic Reward is ==> -15.507932662963867
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4420 * exploration epsilon 0.1 * Episodic Reward is ==> -15.834933280944824
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4434 * exploration epsilon 0.1 * Episodic Reward is ==> -15.62605094909668
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4435 * exploration epsilon 0.1 * Episodic Reward is ==> -15.468907356262207
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4436 * exploration epsilon 0.1 * Episodic Reward is ==> -15.505867004394531
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4437 * exploration epsilon 0.1 * Episodic Reward is ==> -15.351091384887695
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4438 * exploration epsilon 0.1 * Episodic Reward is ==> -15.679725646972656
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4452 * exploration epsilon 0.1 * Episodic Reward is ==> -15.453381538391113
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4453 * exploration epsilon 0.1 * Episodic Reward is ==> -15.319854736328125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4454 * exploration epsilon 0.1 * Episodic Reward is ==> -15.685720443725586
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4455 * exploration epsilon 0.1 * Episodic Reward is ==> -15.326299667358398
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4456 * exploration epsilon 0.1 * Episodic Reward is ==> -15.574718475341797
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4470 * exploration epsilon 0.1 * Episodic Reward is ==> -15.40766429901123
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4471 * exploration epsilon 0.1 * Episodic Reward is ==> -15.054342269897461
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4472 * exploration epsilon 0.1 * Episodic Reward is ==> -15.277627944946289
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4473 * exploration epsilon 0.1 * Episodic Reward is ==> -15.766429901123047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4474 * exploration epsilon 0.1 * Episodic Reward is ==> -15.56762981414795
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4488 * exploration epsilon 0.1 * Episodic Reward is ==> -15.689050674438477
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4489 * exploration epsilon 0.1 * Episodic Reward is ==> -15.343571662902832
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4490 * exploration epsilon 0.1 * Episodic Reward is ==> -15.603524208068848
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4491 * exploration epsilon 0.1 * Episodic Reward is ==> -15.395331382751465
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4492 * exploration epsilon 0.1 * Episodic Reward is ==> -15.470187187194824
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4506 * exploration epsilon 0.1 * Episodic Reward is ==> -16.214374542236328
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4507 * exploration epsilon 0.1 * Episodic Reward is ==> -15.817148208618164
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4508 * exploration epsilon 0.1 * Episodic Reward is ==> -15.22319221496582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4509 * exploration epsilon 0.1 * Episodic Reward is ==> -15.42183780670166
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4510 * exploration epsilon 0.1 * Episodic Reward is ==> -15.621118545532227
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4524 * exploration epsilon 0.1 * Episodic Reward is ==> -16.165767669677734
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4525 * exploration epsilon 0.1 * Episodic Reward is ==> -15.493280410766602
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4526 * exploration epsilon 0.1 * Episodic Reward is ==> -15.692804336547852
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4527 * exploration epsilon 0.1 * Episodic Reward is ==> -15.137622833251953
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4528 * exploration epsilon 0.1 * Episodic Reward is ==> -15.242250442504883
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4542 * exploration epsilon 0.1 * Episodic Reward is ==> -15.001789093017578
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4543 * exploration epsilon 0.1 * Episodic Reward is ==> -15.527196884155273
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4544 * exploration epsilon 0.1 * Episodic Reward is ==> -15.43760871887207
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4545 * exploration epsilon 0.1 * Episodic Reward is ==> -15.670280456542969
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4546 * exploration epsilon 0.1 * Episodic Reward is ==> -15.177287101745605
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4560 * exploration epsilon 0.1 * Episodic Reward is ==> -15.061487197875977
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4561 * exploration epsilon 0.1 * Episodic Reward is ==> -15.661510467529297
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4562 * exploration epsilon 0.1 * Episodic Reward is ==> -15.877462387084961
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4563 * exploration epsilon 0.1 * Episodic Reward is ==> -15.616549491882324
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4564 * exploration epsilon 0.1 * Episodic Reward is ==> -15.457721710205078
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4578 * exploration epsilon 0.1 * Episodic Reward is ==> -15.651430130004883
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4579 * exploration epsilon 0.1 * Episodic Reward is ==> -15.572577476501465
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4580 * exploration epsilon 0.1 * Episodic Reward is ==> -15.603422164916992
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4581 * exploration epsilon 0.1 * Episodic Reward is ==> -15.364845275878906
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4582 * exploration epsilon 0.1 * Episodic Reward is ==> -15.687993049621582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4596 * exploration epsilon 0.1 * Episodic Reward is ==> -15.707572937011719
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4597 * exploration epsilon 0.1 * Episodic Reward is ==> -15.59253978729248
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4598 * exploration epsilon 0.1 * Episodic Reward is ==> -15.915457725524902
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4599 * exploration epsilon 0.1 * Episodic Reward is ==> -15.664078712463379
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4600 * exploration epsilon 0.1 * Episodic Reward is ==> -15.639033317565918
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4614 * exploration epsilon 0.1 * Episodic Reward is ==> -15.26527214050293
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4615 * exploration epsilon 0.1 * Episodic Reward is ==> -15.77535629272461
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4616 * exploration epsilon 0.1 * Episodic Reward is ==> -14.978668212890625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4617 * exploration epsilon 0.1 * Episodic Reward is ==> -15.683712005615234
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4618 * exploration epsilon 0.1 * Episodic Reward is ==> -15.1925048828125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]

Episode * 4632 * exploration epsilon 0.1 * Episodic Reward is ==> -15.80880069732666
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4633 * exploration epsilon 0.1 * Episodic Reward is ==> -15.239870071411133
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4634 * exploration epsilon 0.1 * Episodic Reward is ==> -15.989112854003906
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4635 * exploration epsilon 0.1 * Episodic Reward is ==> -15.508240699768066
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4636 * exploration epsilon 0.1 * Episodic Reward is ==> -15.829446792602539
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4650 * exploration epsilon 0.1 * Episodic Reward is ==> -15.16917610168457
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4651 * exploration epsilon 0.1 * Episodic Reward is ==> -15.591402053833008
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4652 * exploration epsilon 0.1 * Episodic Reward is ==> -15.198287963867188
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4653 * exploration epsilon 0.1 * Episodic Reward is ==> -15.520736694335938
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4654 * exploration epsilon 0.1 * Episodic Reward is ==> -15.677201271057129
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4668 * exploration epsilon 0.1 * Episodic Reward is ==> -15.61203384399414
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4669 * exploration epsilon 0.1 * Episodic Reward is ==> -15.475936889648438
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4670 * exploration epsilon 0.1 * Episodic Reward is ==> -15.247990608215332
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4671 * exploration epsilon 0.1 * Episodic Reward is ==> -15.119829177856445
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4672 * exploration epsilon 0.1 * Episodic Reward is ==> -15.540924072265625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4686 * exploration epsilon 0.1 * Episodic Reward is ==> -15.635307312011719
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4687 * exploration epsilon 0.1 * Episodic Reward is ==> -15.502738952636719
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4688 * exploration epsilon 0.1 * Episodic Reward is ==> -15.614046096801758
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4689 * exploration epsilon 0.1 * Episodic Reward is ==> -15.684359550476074
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4690 * exploration epsilon 0.1 * Episodic Reward is ==> -15.605188369750977
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4704 * exploration epsilon 0.1 * Episodic Reward is ==> -15.865074157714844
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4705 * exploration epsilon 0.1 * Episodic Reward is ==> -15.499608039855957
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4706 * exploration epsilon 0.1 * Episodic Reward is ==> -15.59956169128418
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4707 * exploration epsilon 0.1 * Episodic Reward is ==> -15.54415225982666
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4708 * exploration epsilon 0.1 * Episodic Reward is ==> -15.504159927368164
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4722 * exploration epsilon 0.1 * Episodic Reward is ==> -15.407689094543457
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4723 * exploration epsilon 0.1 * Episodic Reward is ==> -15.695062637329102
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4724 * exploration epsilon 0.1 * Episodic Reward is ==> -15.53088092803955
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4725 * exploration epsilon 0.1 * Episodic Reward is ==> -15.523686408996582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4726 * exploration epsilon 0.1 * Episodic Reward is ==> -15.255390167236328
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4740 * exploration epsilon 0.1 * Episodic Reward is ==> -15.47833251953125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4741 * exploration epsilon 0.1 * Episodic Reward is ==> -15.478925704956055
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4742 * exploration epsilon 0.1 * Episodic Reward is ==> -15.650762557983398
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4743 * exploration epsilon 0.1 * Episodic Reward is ==> -15.27665901184082
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4744 * exploration epsilon 0.1 * Episodic Reward is ==> -15.354862213134766
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 4758 * exploration epsilon 0.1 * Episodic Reward is ==> -15.299714088439941
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4759 * exploration epsilon 0.1 * Episodic Reward is ==> -15.656869888305664
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4760 * exploration epsilon 0.1 * Episodic Reward is ==> -15.552160263061523
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4761 * exploration epsilon 0.1 * Episodic Reward is ==> -15.651721954345703
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4762 * exploration epsilon 0.1 * Episodic Reward is ==> -15.180963516235352
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4776 * exploration epsilon 0.1 * Episodic Reward is ==> -15.497598648071289
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4777 * exploration epsilon 0.1 * Episodic Reward is ==> -15.575340270996094
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4778 * exploration epsilon 0.1 * Episodic Reward is ==> -15.759317398071289
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4779 * exploration epsilon 0.1 * Episodic Reward is ==> -15.718966484069824
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4780 * exploration epsilon 0.1 * Episodic Reward is ==> -15.477234840393066
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4794 * exploration epsilon 0.1 * Episodic Reward is ==> -15.111040115356445
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4795 * exploration epsilon 0.1 * Episodic Reward is ==> -15.779973983764648
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4796 * exploration epsilon 0.1 * Episodic Reward is ==> -15.18824577331543
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4797 * exploration epsilon 0.1 * Episodic Reward is ==> -15.525633811950684
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4798 * exploration epsilon 0.1 * Episodic Reward is ==> -15.641927719116211
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4812 * exploration epsilon 0.1 * Episodic Reward is ==> -16.098041534423828
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4813 * exploration epsilon 0.1 * Episodic Reward is ==> -15.336764335632324
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4814 * exploration epsilon 0.1 * Episodic Reward is ==> -15.330037117004395
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4815 * exploration epsilon 0.1 * Episodic Reward is ==> -15.34869384765625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4816 * exploration epsilon 0.1 * Episodic Reward is ==> -15.136289596557617
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4830 * exploration epsilon 0.1 * Episodic Reward is ==> -15.524206161499023
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4831 * exploration epsilon 0.1 * Episodic Reward is ==> -15.942623138427734
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4832 * exploration epsilon 0.1 * Episodic Reward is ==> -15.738688468933105
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4833 * exploration epsilon 0.1 * Episodic Reward is ==> -15.356464385986328
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4834 * exploration epsilon 0.1 * Episodic Reward is ==> -15.09756088256836
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4848 * exploration epsilon 0.1 * Episodic Reward is ==> -15.466063499450684
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4849 * exploration epsilon 0.1 * Episodic Reward is ==> -15.962108612060547
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4850 * exploration epsilon 0.1 * Episodic Reward is ==> -15.644770622253418
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4851 * exploration epsilon 0.1 * Episodic Reward is ==> -15.566770553588867
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4852 * exploration epsilon 0.1 * Episodic Reward is ==> -15.511693954467773
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4866 * exploration epsilon 0.1 * Episodic Reward is ==> -15.625470161437988
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4867 * exploration epsilon 0.1 * Episodic Reward is ==> -15.852226257324219
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4868 * exploration epsilon 0.1 * Episodic Reward is ==> -15.833541870117188
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4869 * exploration epsilon 0.1 * Episodic Reward is ==> -15.810952186584473
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4870 * exploration epsilon 0.1 * Episodic Reward is ==> -15.553498268127441
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4884 * exploration epsilon 0.1 * Episodic Reward is ==> -16.01125717163086
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4885 * exploration epsilon 0.1 * Episodic Reward is ==> -15.072044372558594
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4886 * exploration epsilon 0.1 * Episodic Reward is ==> -15.602651596069336
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4887 * exploration epsilon 0.1 * Episodic Reward is ==> -15.60279655456543
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4888 * exploration epsilon 0.1 * Episodic Reward is ==> -15.66162109375
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
E

Episode * 4902 * exploration epsilon 0.1 * Episodic Reward is ==> -15.652616500854492
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4903 * exploration epsilon 0.1 * Episodic Reward is ==> -15.500267028808594
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4904 * exploration epsilon 0.1 * Episodic Reward is ==> -15.723336219787598
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4905 * exploration epsilon 0.1 * Episodic Reward is ==> -15.569757461547852
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4906 * exploration epsilon 0.1 * Episodic Reward is ==> -15.82682991027832
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4920 * exploration epsilon 0.1 * Episodic Reward is ==> -15.37652587890625
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4921 * exploration epsilon 0.1 * Episodic Reward is ==> -15.380122184753418
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4922 * exploration epsilon 0.1 * Episodic Reward is ==> -15.73073673248291
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4923 * exploration epsilon 0.1 * Episodic Reward is ==> -16.072277069091797
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4924 * exploration epsilon 0.1 * Episodic Reward is ==> -15.48326301574707
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.

Episode * 4938 * exploration epsilon 0.1 * Episodic Reward is ==> -15.939465522766113
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4939 * exploration epsilon 0.1 * Episodic Reward is ==> -15.043909072875977
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4940 * exploration epsilon 0.1 * Episodic Reward is ==> -15.479950904846191
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4941 * exploration epsilon 0.1 * Episodic Reward is ==> -15.493132591247559
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4942 * exploration epsilon 0.1 * Episodic Reward is ==> -15.769224166870117
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4956 * exploration epsilon 0.1 * Episodic Reward is ==> -15.476320266723633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4957 * exploration epsilon 0.1 * Episodic Reward is ==> -15.50894546508789
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4958 * exploration epsilon 0.1 * Episodic Reward is ==> -15.719465255737305
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4959 * exploration epsilon 0.1 * Episodic Reward is ==> -15.694183349609375
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4960 * exploration epsilon 0.1 * Episodic Reward is ==> -15.569286346435547
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

Episode * 4974 * exploration epsilon 0.1 * Episodic Reward is ==> -15.838643074035645
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4975 * exploration epsilon 0.1 * Episodic Reward is ==> -15.699821472167969
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4976 * exploration epsilon 0.1 * Episodic Reward is ==> -15.787287712097168
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4977 * exploration epsilon 0.1 * Episodic Reward is ==> -15.562990188598633
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4978 * exploration epsilon 0.1 * Episodic Reward is ==> -15.681743621826172
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 4992 * exploration epsilon 0.1 * Episodic Reward is ==> -15.761277198791504
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4993 * exploration epsilon 0.1 * Episodic Reward is ==> -15.631977081298828
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4994 * exploration epsilon 0.1 * Episodic Reward is ==> -15.751220703125
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4995 * exploration epsilon 0.1 * Episodic Reward is ==> -15.450704574584961
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 4996 * exploration epsilon 0.1 * Episodic Reward is ==> -15.536587715148926
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.

Episode * 5010 * exploration epsilon 0.1 * Episodic Reward is ==> -15.064154624938965
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5011 * exploration epsilon 0.1 * Episodic Reward is ==> -15.274532318115234
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5012 * exploration epsilon 0.1 * Episodic Reward is ==> -15.625336647033691
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5013 * exploration epsilon 0.1 * Episodic Reward is ==> -15.4870023727417
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5014 * exploration epsilon 0.1 * Episodic Reward is ==> -15.489986419677734
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0

Episode * 5028 * exploration epsilon 0.1 * Episodic Reward is ==> -15.141779899597168
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5029 * exploration epsilon 0.1 * Episodic Reward is ==> -15.436552047729492
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5030 * exploration epsilon 0.1 * Episodic Reward is ==> -15.444096565246582
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5031 * exploration epsilon 0.1 * Episodic Reward is ==> -15.470304489135742
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5032 * exploration epsilon 0.1 * Episodic Reward is ==> -15.537304878234863
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 5046 * exploration epsilon 0.1 * Episodic Reward is ==> -15.645345687866211
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5047 * exploration epsilon 0.1 * Episodic Reward is ==> -15.806713104248047
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5048 * exploration epsilon 0.1 * Episodic Reward is ==> -15.442919731140137
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5049 * exploration epsilon 0.1 * Episodic Reward is ==> -15.754854202270508
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5050 * exploration epsilon 0.1 * Episodic Reward is ==> -15.418018341064453
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0.

Episode * 5064 * exploration epsilon 0.1 * Episodic Reward is ==> -15.431245803833008
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5065 * exploration epsilon 0.1 * Episodic Reward is ==> -15.764019012451172
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5066 * exploration epsilon 0.1 * Episodic Reward is ==> -15.187171936035156
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5067 * exploration epsilon 0.1 * Episodic Reward is ==> -15.424558639526367
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 0.]
Episode * 5068 * exploration epsilon 0.1 * Episodic Reward is ==> -15.65575122833252
action means and variances at step -10: [ 2. -2.] [0. 0.]
action means and variances at step -5: [ 2. -2.] [0. 

KeyboardInterrupt: 

In [None]:
plt.plot(reward_records)

In [None]:
import misc

In [None]:
plt.plot(reward_records)
plt.plot(misc.smooth(reward_records,100))
plt.grid()


In [None]:
plt.plot(reward_records)
plt.plot(misc.smooth(reward_records,1000))
plt.grid()

In [None]:
plt.plot(buffer.critic_loss_buffer[100:])
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],100))

In [None]:
plt.plot(buffer.critic_loss_buffer[:10])


In [None]:
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],1))
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],100))
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],1000))
plt.ylim([0,0.05])
plt.grid()

In [None]:
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],1))
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],100))
plt.plot(misc.smooth(buffer.critic_loss_buffer[100:],1000))
plt.grid()

In [None]:
plt.plot(misc.smooth(buffer.critic_loss_buffer[-500:],1))
plt.plot(misc.smooth(buffer.critic_loss_buffer[-500:],100))
plt.plot(misc.smooth(buffer.critic_loss_buffer[-500:],1000))
plt.grid()

In [None]:
## Debugging Nan values in action

In [None]:
plt.plot(buffer.actor_loss_buffer[100:])


In [None]:
deterministic_action.numpy()

In [None]:
reward

In [None]:
env.reset()
for ii in range(55):
    random_action = -2+4*np.random.uniform(size=(config.batch_size,2))
    state, reward, done, info = env.step(random_action)
    print('----------')
    print(reward.numpy()[:5])
    print(critic_model([env.unflatten_observation(state),
                          deterministic_action]).numpy()[:5].T)
    print(critic_model([env.unflatten_observation(state),
                          random_action]).numpy()[:5].T)

In [None]:
plt.imshow(env.unflatten_observation(state)[0].numpy()[0])

In [None]:
env.unflatten_observation(state)[1].numpy()[0]

In [None]:
env.unflatten_observation(state)[2].numpy()[0]

In [None]:
env.unflatten_observation(state)[3].numpy()[0]

In [None]:
random_action

In [None]:
critic_model([env.unflatten_observation(state),
                          random_action])

In [None]:
deterministic_action

In [None]:
actor_model(env.unflatten_observation(state))

In [None]:
actor_model.weights

In [None]:
critic_model.weights