## Sloving Needle Master with Twin Delayed DDPG (TD3)
Code modified from https://github.com/nikhilbarhate99/TD3-PyTorch-BipedalWalker-v2 <br>


In [1]:
import numpy as np
import torch
import argparse
import os
import sys
import random
from environment import Environment
import utils
import TD3
import math
import matplotlib.pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
class Args:
    policy_name = "TD3"
    env_name = "Needle Master"
    seed = 1e6
    start_timesteps = 5e3  # How many time steps purely random policy is run for
    eval_freq = 1e3 # How often (time steps) we evaluate
    max_timesteps = 1e6   # Max time steps to run environment for
    save_models = "store"
    expl_noise = 1    # Std of Gaussian exploration noise
    batch_size = 1000
    discount = 0.99   # Discount factor
    tau = 0.005         # Target network update rate
    policy_noise = 0.2   # Noise added to target policy during critic update
    noise_clip = 0.5
    policy_freq = 2  # Frequency of delayed policy updates
    max_size = 5e3
    random_freq = 2e3 
    random_interval = 5e2
    filename = 'environment_14.txt'
    
args=Args()

In [13]:
# Setup
random.seed(args.seed)
torch.manual_seed(random.randint(1, 10000))
if torch.cuda.is_available():
    args.device = torch.device('cuda')
    torch.cuda.manual_seed(random.randint(1, 10000))
    torch.backends.cudnn.enabled = False  # Disable nondeterministic ops (not sure if critical but better safe than sorry)
else:
    args.device = torch.device('cpu')

In [14]:
sys.path.insert(0, '/home/lifan/workspace/RL/needle_master_tools/data')

### Model evaluation

In [15]:
def evaluate_policy(policy, log_f, eval_episodes= 1):
    avg_reward = 0.
    eval_path = './evaluate/'
    if not os.path.exists(eval_path):
        os.mkdir(eval_path)
    for _ in range(eval_episodes):
        state = env.reset(log_f)
        done = False
        while not done:
            action = policy.select_action(state)
            # print("state: " + str(state))
            # print("action: " + str(action))
            next_state, reward, done = env.step(action, log_f)
            # print("next state: " + str(next_state))
            # print("done: " +str(done))
            state = next_state
            avg_reward += reward
        env.render(save_image=True, save_path=eval_path)

    avg_reward /= eval_episodes
    env.episode_reward = avg_reward

    print ("---------------------------------------")
    print ("Episode_num: %d, Evaluation over %d episodes: %f" % (env.episode_num, eval_episodes, avg_reward))
    print ("---------------------------------------")
    return avg_reward



In [None]:
file_name = "%s_%s_%s_%s" % (args.policy_name, args.env_name, str(args.seed),1)
print ("---------------------------------------")
print ("Settings: %s" % (file_name))
print ("---------------------------------------")

if not os.path.exists("./results"):
    os.makedirs("./results")
if not os.path.exists("./pytorch_models"):
    os.makedirs("./pytorch_models")

## environment set up


""" Adding the log file """
logfile = "%s_%s" % (args.policy_name, args.env_name)
log_f = open("log_"+logfile+".txt","w+")
env = Environment(log_f, filename=args.filename)

# env.GetTargetPoint()
state_dim = 10
action_dim = 2
""""  for PID controller """
action_constrain = [10, np.pi/20]
parameter = [0.1,0.0009]

""" [lower bound],[higher bound] """
env.action_bound = np.array(([-1, -1],[1, 1]))  ## modified lower bound
max_action = 1.0

### for plotting
Reward = []
save_path = './out/'
""" start straightly """
evaluations = []

# Initialize policy
policy = TD3.TD3(state_dim, action_dim, max_action)
replay_buffer = utils.ReplayBuffer(args.max_size)

# Evaluate untrained policy
# evaluations = [evaluate_policy(policy)]


env.total_timesteps = 0
env.random_fre = 1000
timesteps_since_eval = 0
done = True

while env.total_timesteps < args.max_timesteps:

    # Evaluate episode
    if timesteps_since_eval >= args.eval_freq:
        timesteps_since_eval %= args.eval_freq
        evaluations.append(evaluate_policy(policy, log_f))

        policy.save(file_name, directory="./pytorch_models")
        np.save("./results/%s" % (file_name), evaluations)

        continue


    ## finish one episode, and train episode_times
    if done:
        log_f.write('~~~~~~~~~~~~~~~~~~~~~~~~ iteration {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'.format(env.episode_num))


        ## load model
        # policy.load(file_name,"./pytorch_models")

        ## training as usual
            # if env.total_timesteps != 0 and env.episode_reward > 500:
        if env.total_timesteps != 0:
            print (("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (
                env.total_timesteps, env.episode_num, episode_timesteps, env.episode_reward))
            if env.episode_num % 1 == 0:
                env.render( save_image=True, save_path=save_path)

        if env.total_timesteps != 0:
            policy.train(replay_buffer, episode_timesteps, args.batch_size, args.discount, args.tau, args.policy_noise, args.noise_clip, args.policy_freq)


        Reward.append(env.episode_reward)

        # Reset environment
        state = env.reset(log_f)

        done = False

        env.episode_num += 1
        env.episode_reward = 0
        episode_timesteps = 0

        # """ exploration rate decay """
        # if env.total_timesteps % 1000 == 0 and args.expl_noise > 0:
        #     args.expl_noise -= 0.05

    # Select action randomly or according to policy
    if env.total_timesteps % args.random_freq < args.random_interval:
        action = env.sample_action()
#         print("randomly selected: " + str(action))
        # action = env.PIDcontroller(action_constrain, parameter, env.t)
        # print("PID controller: " +str(action))
    else:
        # print("state: " +str(state))
        action = policy.select_action(state)
        print("action based on polilcy:" + str(action))
        # print("action selected: " +str(action))
        if args.expl_noise != 0:
            action = (action + np.random.normal(0, args.expl_noise, size=2)).clip(
                env.action_bound[0,:], env.action_bound[1,:])


    ### select action only based on pure RL
    # action = policy.select_action(state)
    # print("action selected: " +str(action))


    # Perform action
    new_state, reward, done = env.step(action, log_f)

    done_bool = 0 if episode_timesteps + 1 == env.max_time else float(done)
    env.episode_reward += reward

    # Store data in replay buffer
    replay_buffer.add((state, new_state, action, reward, done_bool))
    # print("state: " + str(state))
    state = new_state

    episode_timesteps += 1
    env.total_timesteps += 1
    timesteps_since_eval += 1

plt.plot(range(len(Reward)), np.array(Reward), 'b')
plt.savefig('./results/episode reward.png')

plt.plot(range(len(policy.actor_loss)), policy.actor_loss, 'b')
plt.savefig('./results/actor loss.png')

plt.plot(range(len(policy.critic_loss)), policy.critic_loss, 'b')
plt.savefig('./results/critic loss.png')

plt.plot(range(len(evaluations)), np.array(evaluations), 'b')
plt.savefig('./results/evaluation reward.png')
print(evaluations)



---------------------------------------
Settings: TD3_Needle Master_1000000.0_1
---------------------------------------
Total T: 21 Episode Num: 1 Episode T: 21 Reward: -3263.097738
Total T: 32 Episode Num: 2 Episode T: 11 Reward: -1212.585337
Total T: 81 Episode Num: 3 Episode T: 49 Reward: -7205.027849
Total T: 125 Episode Num: 4 Episode T: 44 Reward: -7161.007733
Total T: 143 Episode Num: 5 Episode T: 18 Reward: -2370.589150
Total T: 193 Episode Num: 6 Episode T: 50 Reward: -5630.131528
Total T: 241 Episode Num: 7 Episode T: 48 Reward: -5555.100389
Total T: 250 Episode Num: 8 Episode T: 9 Reward: -1114.372978
Total T: 297 Episode Num: 9 Episode T: 47 Reward: -7064.102805
Total T: 349 Episode Num: 10 Episode T: 52 Reward: -7012.707205
Total T: 396 Episode Num: 11 Episode T: 47 Reward: -7524.107058
Total T: 446 Episode Num: 12 Episode T: 50 Reward: -7353.510149
Total T: 466 Episode Num: 13 Episode T: 20 Reward: -2746.967836
action based on polilcy:[-1. -1.]
action based on polilcy:[-1

action based on polilcy:[-1.         -0.99130017]
action based on polilcy:[-1.        -0.9983134]
action based on polilcy:[-1.        -0.9987683]
action based on polilcy:[-1.       -0.999966]
action based on polilcy:[-1.       -0.999321]
action based on polilcy:[-1.        -0.9564431]
action based on polilcy:[-1.        -0.9995839]
action based on polilcy:[-1.         -0.29496258]
action based on polilcy:[-1.         -0.55473757]
action based on polilcy:[-1.         -0.46476156]
action based on polilcy:[-1.        -0.9999744]
action based on polilcy:[-1.         0.0071043]
action based on polilcy:[-1.         -0.47137418]
action based on polilcy:[-1.        -0.9999991]
action based on polilcy:[-1. -1.]
action based on polilcy:[-1.        -0.9999999]
action based on polilcy:[-1.         -0.77180505]
action based on polilcy:[-1.        -0.9939459]
action based on polilcy:[-1.       -0.994128]
action based on polilcy:[-1.        -0.9943073]
action based on polilcy:[-1.         -0.99108565

action based on polilcy:[-1.        -0.9772363]
action based on polilcy:[-1.       -0.731555]
action based on polilcy:[-1.        -0.9276709]
action based on polilcy:[-1.         -0.65520394]
action based on polilcy:[-1.        -0.6090406]
action based on polilcy:[-1.        -0.5619935]
action based on polilcy:[-1.         -0.57899415]
action based on polilcy:[-1.        -0.6020205]
action based on polilcy:[-1.       -0.672865]
action based on polilcy:[-1.         -0.75162995]
action based on polilcy:[-1.         -0.92075956]
action based on polilcy:[-1.         -0.87207884]
action based on polilcy:[-1.        -0.4282342]
action based on polilcy:[-1.         -0.83958554]
action based on polilcy:[-1.         -0.13212968]
action based on polilcy:[-1.         -0.12437291]
action based on polilcy:[-1.         -0.03803368]
action based on polilcy:[-1.0000000e+00  7.1890827e-04]
action based on polilcy:[-1.          0.06363791]
action based on polilcy:[-1.          0.01326869]
action based o

action based on polilcy:[-1.     -0.9867]
action based on polilcy:[-1.         -0.75341123]
action based on polilcy:[-1.         -0.71386504]
action based on polilcy:[-1.         -0.75998056]
action based on polilcy:[-1.        -0.9738412]
action based on polilcy:[-1.        -0.8628572]
action based on polilcy:[-1.         -0.92506045]
action based on polilcy:[-1.        -0.6172056]
action based on polilcy:[-1.         -0.45205417]
action based on polilcy:[-0.99999994 -0.57645   ]
action based on polilcy:[-1.         -0.42987975]
action based on polilcy:[-1.       -0.376091]
action based on polilcy:[-1.         -0.23072918]
action based on polilcy:[-1.        -0.5731933]
action based on polilcy:[-1.         -0.19922839]
action based on polilcy:[-1.          0.01656745]
action based on polilcy:[-1.          0.03339807]
action based on polilcy:[-1.          0.16130213]
action based on polilcy:[-1.          0.05286923]
action based on polilcy:[-1.          0.10193295]
action based on poli

action based on polilcy:[-1.         -0.98444337]
action based on polilcy:[-1.         -0.62481356]
action based on polilcy:[-1.        -0.8801521]
action based on polilcy:[-1.        -0.6169386]
action based on polilcy:[-1.         -0.24326941]
action based on polilcy:[-1.         -0.21460535]
action based on polilcy:[-1.         -0.11848043]
action based on polilcy:[-1.         0.4337976]
action based on polilcy:[-1.          0.29659113]
action based on polilcy:[-1.          0.40119338]
action based on polilcy:[-1.          0.17964022]
action based on polilcy:[-1.          0.10929944]
action based on polilcy:[-1.          0.25770873]
action based on polilcy:[-1.          0.03158648]
action based on polilcy:[-1.         -0.10783487]
action based on polilcy:[-1.         -0.19531225]
action based on polilcy:[-1.       -0.478329]
action based on polilcy:[-1.         -0.36011857]
action based on polilcy:[-1.         -0.48062313]
action based on polilcy:[-1.         -0.24468352]
action bas

action based on polilcy:[-1.         -0.97384834]
action based on polilcy:[-1.         -0.55411077]
action based on polilcy:[-1.       -0.855838]
action based on polilcy:[-1.        -0.5541183]
action based on polilcy:[-1.         -0.31579855]
action based on polilcy:[-1.         -0.19497143]
action based on polilcy:[-1.         -0.28902277]
action based on polilcy:[-1.        -0.2993489]
action based on polilcy:[-1.         -0.23742676]
action based on polilcy:[-1.        -0.6864207]
action based on polilcy:[-1.         -0.24343182]
action based on polilcy:[-1.         -0.28594175]
action based on polilcy:[-1.         -0.22603422]
action based on polilcy:[-1.         -0.14485632]
action based on polilcy:[-1.         -0.08518564]
action based on polilcy:[-1.         -0.15742886]
action based on polilcy:[-1.         -0.02985108]
action based on polilcy:[-1.          0.07552949]
action based on polilcy:[-1.          0.17412584]
action based on polilcy:[-1.         -0.00704151]
action bas

action based on polilcy:[-1.         -0.95771754]
action based on polilcy:[-1.         -0.77965844]
action based on polilcy:[-1.        -0.5618372]
action based on polilcy:[-1.         -0.82539153]
action based on polilcy:[-1.         -0.52505183]
action based on polilcy:[-1.         -0.47788817]
action based on polilcy:[-1.        -0.4127362]
action based on polilcy:[-1.         -0.05682759]
action based on polilcy:[-1.          0.01343994]
action based on polilcy:[-1.          0.06908616]
action based on polilcy:[-1.         -0.05883734]
action based on polilcy:[-1.         -0.12864037]
action based on polilcy:[-1.         -0.03469598]
action based on polilcy:[-1.          0.00731506]
action based on polilcy:[-1.          0.09085074]
action based on polilcy:[-1.          0.07765389]
action based on polilcy:[-1.         -0.01965959]
action based on polilcy:[-1.          0.20080887]
action based on polilcy:[-1.          0.63912594]
action based on polilcy:[-1.         0.5325674]
action

action based on polilcy:[-1.        -0.9244061]
action based on polilcy:[-1.         -0.60531604]
action based on polilcy:[-1.         -0.91741633]
action based on polilcy:[-1.         -0.70273864]
action based on polilcy:[-1.         -0.62746763]
action based on polilcy:[-1.         -0.91749376]
action based on polilcy:[-1.        -0.6776514]
action based on polilcy:[-1.        -0.9729528]
action based on polilcy:[-1.       -0.689052]
action based on polilcy:[-1.        -0.6739773]
action based on polilcy:[-1.         -0.69465715]
action based on polilcy:[-1.         -0.98735785]
action based on polilcy:[-1.         -0.47720498]
action based on polilcy:[-1.        -0.5438593]
action based on polilcy:[-1.         -0.45416957]
action based on polilcy:[-1.         -0.33600536]
action based on polilcy:[-1.         -0.21966188]
action based on polilcy:[-1.         -0.11079668]
action based on polilcy:[-1.         -0.12700786]
action based on polilcy:[-1.         -0.10008207]
action based o

action based on polilcy:[-1.        -0.9361982]
action based on polilcy:[-0.99999994 -0.41136387]
action based on polilcy:[-1.         -0.69960916]
action based on polilcy:[-0.99999994 -0.41812655]
action based on polilcy:[-1.        -0.5155124]
action based on polilcy:[-1.        -0.2857786]
action based on polilcy:[-1.         -0.17116445]
action based on polilcy:[-1.       -0.043098]
action based on polilcy:[-1.        -0.1832441]
action based on polilcy:[-1.        -0.3488712]
action based on polilcy:[-1.        -0.3273261]
action based on polilcy:[-1.        -0.2527917]
action based on polilcy:[-1.        -0.3048273]
action based on polilcy:[-1.         -0.36675096]
action based on polilcy:[-1.         -0.33613738]
action based on polilcy:[-1.        -0.3430429]
action based on polilcy:[-1.         -0.46615106]
action based on polilcy:[-1.         -0.30682778]
action based on polilcy:[-1.         -0.21265016]
action based on polilcy:[-1.         -0.23811527]
action based on polilc

action based on polilcy:[-1.         -0.38908163]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 2847 Episode Num: 76 Episode T: 10 Reward: -482.197636
action based on polilcy:[-1.        -0.8510719]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.         0.9902629]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 2858 Episode Num: 77 Episode T: 11 Reward: -731.818399
action based on polilcy:[-1.         -0.46951303]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  

action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3071 Episode Num: 98 Episode T: 8 Reward: -625.354246
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3079 Episode Num: 99 Episode T: 8 Reward: -814.207266
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3088 Episode Nu

action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3286 Episode Num: 120 Episode T: 11 Reward: -603.577743
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3295 Episode Num: 121 Episode T: 9 Reward: -634.657240
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polil

action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3508 Episode Num: 143 Episode T: 9 Reward: -954.505538
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3519 Episode Num: 144 Episode T: 11 Reward: -795.370410
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polil

action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3732 Episode Num: 167 Episode T: 12 Reward: -482.012397
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3739 Episode Num: 168 Episode T: 7 Reward: -646.239516
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polil

action based on polilcy:[-1.         0.9999971]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3936 Episode Num: 188 Episode T: 10 Reward: -451.084312
action based on polilcy:[-1.          0.99603224]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
Total T: 3946 Episode Num: 189 Episode T: 10 Reward: -699.880632
action based on polilcy:[-1.        -0.9430491]
action based on polilcy:[-1.         -0.05107227]
action based on polilcy:[-1.         -0.15485148]
action based

action based on polilcy:[-1.         -0.20529716]
action based on polilcy:[-1.          0.03341372]
action based on polilcy:[-1.         0.0465213]
action based on polilcy:[-1.          0.05158545]
action based on polilcy:[-1.          0.04292153]
action based on polilcy:[-1.          0.06531065]
action based on polilcy:[-1.          0.06096146]
action based on polilcy:[-1.          0.06148241]
action based on polilcy:[-1.         -0.20694762]
action based on polilcy:[-1.         -0.16666396]
action based on polilcy:[-1.         -0.13873914]
action based on polilcy:[-1.         -0.13441399]
action based on polilcy:[-1.         -0.09217391]
action based on polilcy:[-1.         -0.15530995]
action based on polilcy:[-1.         -0.10305057]
action based on polilcy:[-1.         -0.14179689]
action based on polilcy:[-1.          0.01326399]
action based on polilcy:[-1.          0.16322045]
action based on polilcy:[-1.          0.21868423]
action based on polilcy:[-1.          0.03789853]
ac

action based on polilcy:[-1.         -0.03258858]
action based on polilcy:[-1.          0.02844387]
action based on polilcy:[-1.          0.06762929]
action based on polilcy:[-1.         0.1225684]
action based on polilcy:[-1.         0.0501825]
action based on polilcy:[-1.          0.02639962]
action based on polilcy:[-1.          0.03104415]
action based on polilcy:[-1.          0.01250402]
action based on polilcy:[-1.          0.04928688]
action based on polilcy:[-1.          0.07956713]
action based on polilcy:[-1.          0.08892459]
action based on polilcy:[-1.          0.08948132]
action based on polilcy:[-1.         -0.16320646]
action based on polilcy:[-1.        -0.1531438]
action based on polilcy:[-1.         -0.16058992]
action based on polilcy:[-1.         -0.16713451]
action based on polilcy:[-1.         -0.20729208]
action based on polilcy:[-1.         -0.16217779]
action based on polilcy:[-1.         -0.17281485]
action based on polilcy:[-1.         -0.16962029]
action

action based on polilcy:[-1.          0.07440183]
action based on polilcy:[-1.          0.11498363]
action based on polilcy:[-1.          0.08391356]
action based on polilcy:[-1.          0.07197016]
action based on polilcy:[-1.          0.04259952]
action based on polilcy:[-1.         0.0582106]
action based on polilcy:[-1.          0.08952106]
action based on polilcy:[-1.          0.06111902]
action based on polilcy:[-1.          0.05431072]
action based on polilcy:[-1.          0.05870367]
action based on polilcy:[-1.          0.09869123]
action based on polilcy:[-1.          0.06557807]
action based on polilcy:[-1.          0.04184475]
action based on polilcy:[-1.          0.06266373]
action based on polilcy:[-1.         0.0692706]
action based on polilcy:[-1.          0.01214433]
action based on polilcy:[-1.          0.04299191]
action based on polilcy:[-1.          0.03832455]
action based on polilcy:[-1.          0.13771011]
action based on polilcy:[-1.          0.11670072]
acti

action based on polilcy:[-1.         0.0958151]
action based on polilcy:[-1.        0.071363]
action based on polilcy:[-1.          0.12214466]
action based on polilcy:[-1.          0.12500514]
action based on polilcy:[-1.          0.11982553]
action based on polilcy:[-1.          0.09234955]
action based on polilcy:[-1.          0.10066321]
action based on polilcy:[-1.          0.05899186]
action based on polilcy:[-1.         0.0842005]
action based on polilcy:[-1.          0.09180389]
Total T: 5215 Episode Num: 229 Episode T: 10 Reward: -1024.080959
action based on polilcy:[-1.          0.08750173]
action based on polilcy:[-1.        -0.0155443]
action based on polilcy:[-1.         -0.07976394]
action based on polilcy:[-1.         -0.29670918]
action based on polilcy:[-1.         -0.01680735]
action based on polilcy:[-1.        -0.3960556]
action based on polilcy:[-1.         -0.01395125]
action based on polilcy:[-1.         -0.07536515]
action based on polilcy:[-1.         -0.217354

action based on polilcy:[-1.          0.15593868]
action based on polilcy:[-1.          0.16295415]
action based on polilcy:[-1.          0.21228752]
action based on polilcy:[-1.          0.19565098]
action based on polilcy:[-1.          0.13972583]
action based on polilcy:[-1.          0.13734798]
action based on polilcy:[-1.          0.13705815]
action based on polilcy:[-1.          0.19260065]
action based on polilcy:[-1.          0.18468167]
action based on polilcy:[-1.          0.19186854]
action based on polilcy:[-1.          0.17108986]
action based on polilcy:[-1.          0.16315497]
action based on polilcy:[-1.          0.18849167]
Total T: 5427 Episode Num: 234 Episode T: 13 Reward: -981.892259
action based on polilcy:[-1.          0.13097797]
action based on polilcy:[-1.          0.14358196]
action based on polilcy:[-1.          0.15645106]
action based on polilcy:[-1.          0.18066646]
action based on polilcy:[-1.          0.15866007]
action based on polilcy:[-1.       

action based on polilcy:[-1.          0.23590897]
action based on polilcy:[-1.          0.25664228]
action based on polilcy:[-1.          0.02134528]
action based on polilcy:[-1.          0.10702098]
action based on polilcy:[-1.          0.11471951]
action based on polilcy:[-1.          0.27032742]
action based on polilcy:[-1.         0.2435003]
action based on polilcy:[-1.          0.25383252]
action based on polilcy:[-1.          0.26825887]
action based on polilcy:[-1.         0.2035715]
action based on polilcy:[-1.          0.15429932]
action based on polilcy:[-1.          0.19082801]
action based on polilcy:[-1.         0.2043348]
action based on polilcy:[-1.          0.19224024]
action based on polilcy:[-1.          0.17913009]
action based on polilcy:[-1.         0.1772382]
action based on polilcy:[-1.          0.18947555]
action based on polilcy:[-1.          0.15565634]
Total T: 5607 Episode Num: 241 Episode T: 18 Reward: -1172.480168
action based on polilcy:[-1.          0.26

action based on polilcy:[-1.          0.32447737]
action based on polilcy:[-1.          0.18378475]
action based on polilcy:[-1.          0.30866647]
action based on polilcy:[-1.         0.3049669]
action based on polilcy:[-1.          0.40858108]
action based on polilcy:[-1.         0.4745322]
action based on polilcy:[-1.          0.30977508]
action based on polilcy:[-1.          0.27398795]
action based on polilcy:[-1.          0.29176107]
action based on polilcy:[-1.          0.26593414]
action based on polilcy:[-1.         0.2580659]
action based on polilcy:[-1.          0.25352982]
action based on polilcy:[-1.        0.203865]
action based on polilcy:[-1.          0.17761153]
Total T: 5827 Episode Num: 251 Episode T: 14 Reward: -912.417318
action based on polilcy:[-1.         0.2930692]
action based on polilcy:[-1.          0.30615714]
action based on polilcy:[-1.          0.37744904]
action based on polilcy:[-1.         0.2921714]
action based on polilcy:[-1.          0.29214638]

action based on polilcy:[-1.         0.2984837]
action based on polilcy:[-1.         -0.01707649]
action based on polilcy:[-1.       -0.238191]
action based on polilcy:[-1.         -0.20092085]
action based on polilcy:[-1.         -0.09729484]
action based on polilcy:[-1.          0.02366954]
action based on polilcy:[-1.         -0.08358285]
action based on polilcy:[-1.         -0.13517879]
action based on polilcy:[-1.         -0.06817075]
action based on polilcy:[-1.         -0.06755342]
action based on polilcy:[-1.         -0.02142464]
action based on polilcy:[-1.         -0.13956136]
action based on polilcy:[-1.         -0.10780671]
action based on polilcy:[-1.         -0.00318487]
action based on polilcy:[-1.         -0.01019203]
action based on polilcy:[-1.          0.02777129]
action based on polilcy:[-1.         -0.03136472]
action based on polilcy:[-1.         -0.02407923]
action based on polilcy:[-1.          0.04782132]
action based on polilcy:[-1.          0.23164439]
action

action based on polilcy:[-1.         0.7862742]
action based on polilcy:[-1.          0.70551753]
action based on polilcy:[-1.         0.6904429]
action based on polilcy:[-1.          0.44883186]
action based on polilcy:[-1.         0.4471788]
action based on polilcy:[-1.          0.44255355]
action based on polilcy:[-1.          0.47276235]
action based on polilcy:[-1.          0.43522212]
action based on polilcy:[-1.        0.778811]
action based on polilcy:[-1.          0.61266863]
action based on polilcy:[-1.         0.3849934]
action based on polilcy:[-1.        0.823135]
action based on polilcy:[-1.          0.42831028]
action based on polilcy:[-1.          0.47822154]
action based on polilcy:[-1.          0.38045824]
action based on polilcy:[-1.          0.32984948]
action based on polilcy:[-1.          0.21567261]
action based on polilcy:[-1.          0.13832307]
action based on polilcy:[-1.          0.24476062]
action based on polilcy:[-1.          0.21505429]
action based on 

action based on polilcy:[-1.         0.7997189]
action based on polilcy:[-1.          0.61517304]
action based on polilcy:[-1.         0.5110036]
action based on polilcy:[-1.         0.6013812]
action based on polilcy:[-1.          0.34945762]
action based on polilcy:[-1.        0.296026]
action based on polilcy:[-1.          0.18961814]
action based on polilcy:[-1.         0.2909345]
Total T: 6825 Episode Num: 293 Episode T: 8 Reward: -652.473144
action based on polilcy:[-1.         0.8204184]
action based on polilcy:[-1.         0.8191477]
action based on polilcy:[-1.         0.6918923]
action based on polilcy:[-1.          0.46390268]
action based on polilcy:[-1.         0.6571624]
action based on polilcy:[-1.          0.39774507]
action based on polilcy:[-1.         0.2999242]
action based on polilcy:[-1.          0.17595473]
action based on polilcy:[-1.          0.18405853]
Total T: 6834 Episode Num: 294 Episode T: 9 Reward: -887.574075
action based on polilcy:[-1.          0.8428

action based on polilcy:[-1.          0.85147476]
---------------------------------------
Episode_num: 306, Evaluation over 1 episodes: -376.751777
---------------------------------------
action based on polilcy:[-1.          0.62480617]
Total T: 7001 Episode Num: 306 Episode T: 2 Reward: -410.316498
action based on polilcy:[-1.          0.84869087]
action based on polilcy:[-1.          0.46064666]
action based on polilcy:[-1.         0.4100613]
action based on polilcy:[-1.          0.49354535]
action based on polilcy:[-1.          0.46591237]
action based on polilcy:[-1.          0.41223386]
action based on polilcy:[-1.         0.5127449]
action based on polilcy:[-1.          0.35375965]
action based on polilcy:[-1.          0.30147588]
action based on polilcy:[-1.          0.21322088]
action based on polilcy:[-1.          0.29458106]
action based on polilcy:[-1.          0.23079373]
Total T: 7013 Episode Num: 307 Episode T: 12 Reward: -718.383787
action based on polilcy:[-1.         

action based on polilcy:[-1.          0.90164024]
action based on polilcy:[-1.         0.7527495]
action based on polilcy:[-1.          0.38855147]
action based on polilcy:[-1.          0.43847865]
action based on polilcy:[-1.          0.46651766]
action based on polilcy:[-1.          0.53335464]
action based on polilcy:[-1.          0.43610844]
action based on polilcy:[-1.         0.4179004]
action based on polilcy:[-1.          0.48601526]
action based on polilcy:[-1.         0.2902079]
action based on polilcy:[-1.          0.22066401]
action based on polilcy:[-1.          0.23205492]
action based on polilcy:[-1.          0.34917158]
action based on polilcy:[-1.          0.36253026]
Total T: 7173 Episode Num: 315 Episode T: 14 Reward: -942.166332
action based on polilcy:[-1.          0.89776146]
action based on polilcy:[-1.          0.44988465]
action based on polilcy:[-1.         0.6096715]
action based on polilcy:[-1.         0.5613073]
action based on polilcy:[-1.          0.36471

action based on polilcy:[-1.         0.2122833]
action based on polilcy:[-1.          0.15166397]
action based on polilcy:[-1.          0.09006301]
Total T: 7327 Episode Num: 327 Episode T: 11 Reward: -592.966184
action based on polilcy:[-1.          0.93419814]
action based on polilcy:[-1.          0.46266505]
action based on polilcy:[-1.          0.42914274]
action based on polilcy:[-1.         0.6880454]
action based on polilcy:[-1.         0.4350337]
action based on polilcy:[-1.        0.507345]
action based on polilcy:[-1.          0.30971447]
action based on polilcy:[-1.          0.36398563]
action based on polilcy:[-1.          0.26252183]
Total T: 7336 Episode Num: 328 Episode T: 9 Reward: -929.942784
action based on polilcy:[-1.         0.9355576]
action based on polilcy:[-1.          0.49757394]
action based on polilcy:[-1.         0.6423257]
action based on polilcy:[-1.          0.40084422]
action based on polilcy:[-1.         0.8193006]
action based on polilcy:[-1.         

action based on polilcy:[-1.         0.9107498]
action based on polilcy:[-1.          0.72155297]
action based on polilcy:[-1.          0.43168724]
action based on polilcy:[-1.          0.44728065]
action based on polilcy:[-1.          0.42653263]
action based on polilcy:[-1.          0.43653873]
action based on polilcy:[-1.         0.7542393]
action based on polilcy:[-1.          0.41565868]
action based on polilcy:[-1.         0.4414416]
action based on polilcy:[-1.          0.53560966]
action based on polilcy:[-1.          0.41235867]
action based on polilcy:[-1.          0.42199132]
action based on polilcy:[-1.          0.38642085]
action based on polilcy:[-1.          0.56362677]
action based on polilcy:[-1.         0.4030995]
action based on polilcy:[-1.          0.39260992]
action based on polilcy:[-1.          0.33082882]
action based on polilcy:[-1.          0.30999964]
action based on polilcy:[-1.          0.37666336]
action based on polilcy:[-1.          0.22883895]
Total T:

action based on polilcy:[-1.         0.9235134]
action based on polilcy:[-1.         0.4427693]
action based on polilcy:[-1.          0.52497715]
action based on polilcy:[-1.          0.98667973]
action based on polilcy:[-1.         0.4849613]
action based on polilcy:[-1.         0.4136987]
action based on polilcy:[-1.        0.463502]
action based on polilcy:[-1.         0.4543513]
action based on polilcy:[-1.         0.4979001]
action based on polilcy:[-1.         0.4191528]
action based on polilcy:[-1.         0.3193148]
action based on polilcy:[-1.          0.28367516]
action based on polilcy:[-1.          0.34097126]
action based on polilcy:[-1.          0.43127874]
action based on polilcy:[-1.         0.3376655]
Total T: 7688 Episode Num: 353 Episode T: 15 Reward: -1215.593188
action based on polilcy:[-1.         0.9235423]
action based on polilcy:[-1.          0.45005172]
action based on polilcy:[-1.         0.5087637]
action based on polilcy:[-1.         0.5361062]
action based

action based on polilcy:[-1.          0.75345457]
action based on polilcy:[-1.          0.47781837]
action based on polilcy:[-1.         0.4796909]
action based on polilcy:[-1.          0.37261626]
action based on polilcy:[-1.         0.3433964]
action based on polilcy:[-1.          0.43213066]
action based on polilcy:[-1.          0.44538134]
action based on polilcy:[-1.          0.44650698]
action based on polilcy:[-1.          0.43306705]
action based on polilcy:[-1.          0.52975106]
action based on polilcy:[-1.         0.4795706]
action based on polilcy:[-1.         0.3152679]
action based on polilcy:[-1.          0.17589542]
action based on polilcy:[-1.       0.27004]
Total T: 7866 Episode Num: 367 Episode T: 14 Reward: -1136.766990
action based on polilcy:[-1.        0.768824]
action based on polilcy:[-1.          0.57357985]
action based on polilcy:[-1.         0.4828989]
action based on polilcy:[-1.         0.6282706]
action based on polilcy:[-1.         0.5061873]
action b

Total T: 8126 Episode Num: 379 Episode T: 45 Reward: -7388.142515
Total T: 8162 Episode Num: 380 Episode T: 36 Reward: -6024.256217
Total T: 8211 Episode Num: 381 Episode T: 49 Reward: -7350.729617
Total T: 8256 Episode Num: 382 Episode T: 45 Reward: -7421.291880
Total T: 8301 Episode Num: 383 Episode T: 45 Reward: -7521.497733
Total T: 8346 Episode Num: 384 Episode T: 45 Reward: -7586.294367
Total T: 8362 Episode Num: 385 Episode T: 16 Reward: -2180.917439
Total T: 8388 Episode Num: 386 Episode T: 26 Reward: -3485.887561
Total T: 8442 Episode Num: 387 Episode T: 54 Reward: -7221.357530
Total T: 8486 Episode Num: 388 Episode T: 44 Reward: -7149.400805
action based on polilcy:[-1.         -0.45328608]
action based on polilcy:[-1.         -0.28253114]
action based on polilcy:[-1.         -0.21414836]
action based on polilcy:[-1.         -0.15986753]
action based on polilcy:[-1.         -0.08130515]
action based on polilcy:[-1.          0.00919896]
action based on polilcy:[-1.         -0.

action based on polilcy:[-1.          0.87207097]
action based on polilcy:[-1.         0.4489049]
action based on polilcy:[-1.          0.86938196]
action based on polilcy:[-1.         0.3353651]
action based on polilcy:[-1.         0.6859108]
action based on polilcy:[-1.          0.36566275]
action based on polilcy:[-1.          0.48894557]
action based on polilcy:[-1.          0.26836893]
Total T: 8678 Episode Num: 400 Episode T: 8 Reward: -891.260299
action based on polilcy:[-1.         0.8777551]
action based on polilcy:[-1.         0.4573608]
action based on polilcy:[-1.          0.88194335]
action based on polilcy:[-1.          0.43555918]
action based on polilcy:[-1.          0.38918337]
action based on polilcy:[-1.          0.44547525]
action based on polilcy:[-1.          0.40873915]
action based on polilcy:[-1.          0.27973926]
action based on polilcy:[-1.         0.4113333]
action based on polilcy:[-1.          0.58025914]
action based on polilcy:[-1.          0.33168757

action based on polilcy:[-1.         0.9199441]
action based on polilcy:[-1.          0.45875958]
action based on polilcy:[-1.          0.74005544]
action based on polilcy:[-1.         0.3160356]
action based on polilcy:[-1.          0.40824702]
action based on polilcy:[-1.         0.5515293]
action based on polilcy:[-1.          0.16685247]
action based on polilcy:[-1.          0.12081955]
action based on polilcy:[-1.          0.04598038]
Total T: 8846 Episode Num: 413 Episode T: 9 Reward: -620.025677
action based on polilcy:[-1.         0.9189725]
action based on polilcy:[-1.          0.38554266]
action based on polilcy:[-1.          0.40767625]
action based on polilcy:[-1.         0.8142595]
action based on polilcy:[-1.         0.2822971]
action based on polilcy:[-1.         0.8164987]
action based on polilcy:[-1.          0.42103332]
action based on polilcy:[-1.          0.36957595]
action based on polilcy:[-1.          0.28941053]
action based on polilcy:[-1.          0.42359644]


action based on polilcy:[-1.         0.9489721]
action based on polilcy:[-1.         0.5988269]
action based on polilcy:[-1.         0.9079475]
action based on polilcy:[-1.         0.6585948]
action based on polilcy:[-1.         0.9964017]
action based on polilcy:[-1.         0.3629619]
action based on polilcy:[-1.          0.35138398]
action based on polilcy:[-1.          0.56842303]
action based on polilcy:[-1.         0.4614123]
action based on polilcy:[-1.          0.43992567]
action based on polilcy:[-1.          0.15226445]
action based on polilcy:[-1.          0.08945964]
action based on polilcy:[-1.          0.14327294]
action based on polilcy:[-1.          0.10862554]
Total T: 9034 Episode Num: 428 Episode T: 14 Reward: -1123.218773
action based on polilcy:[-1.          0.93960255]
action based on polilcy:[-1.          0.35114965]
action based on polilcy:[-1.          0.34696677]
action based on polilcy:[-1.          0.32207227]
action based on polilcy:[-1.         0.3053472]


action based on polilcy:[-1.         0.9470209]
action based on polilcy:[-0.9999999   0.41535902]
action based on polilcy:[-1.         0.3570198]
action based on polilcy:[-0.9999999   0.41620594]
action based on polilcy:[-0.9999999   0.40623644]
action based on polilcy:[-1.          0.45265055]
action based on polilcy:[-1.          0.33520588]
action based on polilcy:[-1.          0.45797965]
action based on polilcy:[-1.          0.12988739]
action based on polilcy:[-1.          0.16278866]
action based on polilcy:[-1.         0.1337844]
action based on polilcy:[-1.          0.35610744]
action based on polilcy:[-1.         0.6132988]
Total T: 9198 Episode Num: 442 Episode T: 13 Reward: -890.099443
action based on polilcy:[-1.         0.9361982]
action based on polilcy:[-1.          0.55806243]
action based on polilcy:[-1.          0.43516058]
action based on polilcy:[-1.          0.32690904]
action based on polilcy:[-1.          0.38018346]
action based on polilcy:[-1.          0.75483

action based on polilcy:[-0.5758338  0.8874341]
action based on polilcy:[-0.92009497  0.33806765]
action based on polilcy:[-0.8751688   0.27201164]
action based on polilcy:[-0.7609681  0.3864145]
action based on polilcy:[-0.65462524  0.5502423 ]
action based on polilcy:[-0.69753546  0.436669  ]
action based on polilcy:[-0.77527624  0.46639472]
action based on polilcy:[-0.9132947   0.28584528]
action based on polilcy:[-0.7001257   0.43249503]
action based on polilcy:[-0.81030166  0.18358019]
action based on polilcy:[-0.7755299   0.17383812]
action based on polilcy:[-0.29190484  0.20657761]
Total T: 9362 Episode Num: 455 Episode T: 12 Reward: -794.705795
action based on polilcy:[-0.5742186  0.8821945]
action based on polilcy:[-0.6780416  0.6855411]
action based on polilcy:[-0.6625123  0.7485833]
action based on polilcy:[-0.844827   0.2481853]
action based on polilcy:[-0.7684051   0.36895648]
action based on polilcy:[-0.7404804  0.4094758]
action based on polilcy:[-0.6338155   0.70298994]

action based on polilcy:[-0.48531413  0.87634736]
action based on polilcy:[-0.83416796  0.52184606]
action based on polilcy:[-0.88011295  0.4353833 ]
action based on polilcy:[-0.94068617  0.372277  ]
action based on polilcy:[-0.69258106  0.6274768 ]
action based on polilcy:[-0.88995594  0.2482503 ]
action based on polilcy:[-0.55720234  0.7776431 ]
action based on polilcy:[-0.84226376  0.6306311 ]
action based on polilcy:[-0.76493293  0.6790458 ]
action based on polilcy:[-0.9142046  0.2138538]
Total T: 9541 Episode Num: 464 Episode T: 10 Reward: -965.659474
action based on polilcy:[-0.4537923   0.87539506]
action based on polilcy:[-0.9527344   0.38836575]
action based on polilcy:[-0.87878895  0.4501028 ]
action based on polilcy:[-0.78330547  0.5123632 ]
action based on polilcy:[-0.8116698   0.49310955]
action based on polilcy:[-0.92223614  0.42693093]
action based on polilcy:[-0.58243   0.593722]
action based on polilcy:[-0.87036765  0.3810756 ]
action based on polilcy:[-0.9037295  0.58

action based on polilcy:[-0.48383602  0.88457745]
action based on polilcy:[-0.475634   0.8568348]
action based on polilcy:[-0.5782825  0.7259048]
action based on polilcy:[-0.9554875  0.3898239]
action based on polilcy:[-0.91311455  0.32776085]
action based on polilcy:[-0.8202971   0.31855127]
action based on polilcy:[-0.84474623  0.27388483]
action based on polilcy:[-0.90456295  0.21158601]
action based on polilcy:[-0.1868743   0.41231024]
action based on polilcy:[-0.25815895  0.1919125 ]
Total T: 9712 Episode Num: 477 Episode T: 10 Reward: -739.237530
action based on polilcy:[-0.5045333   0.88086164]
action based on polilcy:[-0.8646661  0.5012775]
action based on polilcy:[-0.702955   0.6309779]
action based on polilcy:[-0.6670333   0.63111556]
action based on polilcy:[-0.942489    0.44054598]
action based on polilcy:[-0.6793091  0.5301468]
action based on polilcy:[-0.96638167  0.41350603]
action based on polilcy:[-0.9558507  0.364783 ]
action based on polilcy:[-0.7736341   0.60596704]

action based on polilcy:[-0.46313712  0.8823243 ]
action based on polilcy:[-0.95560366  0.39462483]
action based on polilcy:[-0.84632456  0.3579372 ]
action based on polilcy:[-0.931155    0.48192102]
action based on polilcy:[-0.29541156  0.33825773]
action based on polilcy:[0.03967854 0.4537128 ]
Total T: 9876 Episode Num: 488 Episode T: 6 Reward: -584.323961
action based on polilcy:[-0.56117094  0.8876202 ]
action based on polilcy:[-0.96704113  0.40203527]
action based on polilcy:[-0.47246864  0.7498441 ]
action based on polilcy:[-0.9576619  0.8308876]
action based on polilcy:[-0.7754544   0.39709762]
action based on polilcy:[-0.90559435  0.26887769]
action based on polilcy:[-0.9289397   0.23871328]
action based on polilcy:[-0.5550177  0.2256158]
action based on polilcy:[-0.6972899   0.08982462]
Total T: 9885 Episode Num: 489 Episode T: 9 Reward: -650.256720
action based on polilcy:[-0.54666984  0.8785118 ]
action based on polilcy:[-0.9676801   0.39758462]
action based on polilcy:[-0.

action based on polilcy:[-0.9367565  0.985788 ]
action based on polilcy:[-0.82794863  0.9525658 ]
action based on polilcy:[-0.38434115  0.72703993]
action based on polilcy:[-0.80092776  0.15553647]
action based on polilcy:[-0.20535219  0.7699519 ]
action based on polilcy:[-0.42970872  0.28580436]
action based on polilcy:[-0.19061375  0.8458348 ]
action based on polilcy:[-0.1272191   0.58998954]
action based on polilcy:[-0.83619434  0.6532326 ]
action based on polilcy:[-0.977998   -0.21307652]
action based on polilcy:[-0.9337877  0.0292175]
action based on polilcy:[-0.6260896   0.32393527]
action based on polilcy:[-0.48753387  0.5067481 ]
Total T: 10513 Episode Num: 514 Episode T: 47 Reward: -7462.991293
action based on polilcy:[-0.60665596  0.8793984 ]
action based on polilcy:[-0.8604471  0.528955 ]
action based on polilcy:[-0.9019643   0.45867336]
action based on polilcy:[-0.9244365   0.33669245]
action based on polilcy:[-0.48710734  0.86745805]
action based on polilcy:[-0.858567    0

action based on polilcy:[-0.71680224  0.91460496]
action based on polilcy:[-0.8838551   0.54973346]
action based on polilcy:[-0.9707759   0.78461385]
action based on polilcy:[-0.96120584  0.48465216]
action based on polilcy:[-0.6150378   0.41925943]
action based on polilcy:[-0.97246206  0.34465232]
action based on polilcy:[-0.7711092   0.17283998]
action based on polilcy:[-0.51852965  0.1231287 ]
Total T: 10657 Episode Num: 526 Episode T: 8 Reward: -689.327201
action based on polilcy:[-0.73260504  0.91627294]
action based on polilcy:[-0.9668561   0.43512523]
action based on polilcy:[-0.6357533   0.99310774]
action based on polilcy:[-0.58850944  0.78278387]
action based on polilcy:[-0.8641986  0.7333207]
action based on polilcy:[-0.8287128  0.252316 ]
action based on polilcy:[-0.63845396  0.3247521 ]
action based on polilcy:[-0.67975307  0.42185387]
action based on polilcy:[-0.9310987  0.4339914]
action based on polilcy:[-0.90408474  0.18725948]
action based on polilcy:[-0.431815   0.31

action based on polilcy:[-0.7568438  0.9401777]
action based on polilcy:[-0.94305664  0.5274439 ]
action based on polilcy:[-0.8862485  0.5921414]
action based on polilcy:[-0.46769658  0.7900096 ]
action based on polilcy:[-0.92639136  0.456225  ]
action based on polilcy:[-0.98650646  0.82988733]
action based on polilcy:[-0.86827695  0.24936466]
action based on polilcy:[-0.78060305  0.712806  ]
action based on polilcy:[-0.86214435  0.23864783]
action based on polilcy:[-0.96151924  0.17044121]
action based on polilcy:[-0.85666335  0.14347792]
action based on polilcy:[-0.5127107  0.2622505]
Total T: 10815 Episode Num: 539 Episode T: 12 Reward: -935.813710
action based on polilcy:[-0.7567187  0.9297584]
action based on polilcy:[-0.9726242   0.44136104]
action based on polilcy:[-0.9010694   0.35871097]
action based on polilcy:[-0.7832101  0.479338 ]
action based on polilcy:[-0.871201   0.3418021]
action based on polilcy:[-0.7300665   0.34689373]
action based on polilcy:[-0.965739   0.3872029

action based on polilcy:[-0.7993476  0.9149255]
action based on polilcy:[-0.9778492   0.44666696]
action based on polilcy:[-0.9243009   0.70784473]
action based on polilcy:[-0.85432607  0.31483924]
action based on polilcy:[-0.9391711  0.6630531]
action based on polilcy:[-0.89310503  0.2320525 ]
action based on polilcy:[-0.9750983   0.15793633]
action based on polilcy:[-0.78567785  0.11239191]
action based on polilcy:[-0.6620246   0.02797585]
Total T: 10976 Episode Num: 552 Episode T: 9 Reward: -680.046545
action based on polilcy:[-0.81522846  0.90068966]
action based on polilcy:[-0.98104066  0.45023397]
action based on polilcy:[-0.81654215  0.6100889 ]
action based on polilcy:[-0.82650113  0.4529041 ]
action based on polilcy:[-0.89137757  0.41358978]
action based on polilcy:[-0.88889015  0.29241154]
action based on polilcy:[-0.6659362  0.7019483]
action based on polilcy:[-0.9809718   0.33453542]
action based on polilcy:[-0.98838115  0.51631975]
action based on polilcy:[-0.9586842   0.4

action based on polilcy:[-0.8488706  0.8995733]
action based on polilcy:[-0.9790851   0.43824476]
action based on polilcy:[-0.94213814  0.52754146]
action based on polilcy:[-0.9240037   0.42672846]
action based on polilcy:[-0.7212225   0.65026003]
action based on polilcy:[-0.8952635  0.2746649]
action based on polilcy:[-0.96494174  0.20439607]
action based on polilcy:[-0.875621    0.30264032]
action based on polilcy:[-0.8780781   0.12438247]
action based on polilcy:[-0.55145013  0.26016012]
action based on polilcy:[-0.96039647  0.22629741]
Total T: 11162 Episode Num: 567 Episode T: 11 Reward: -683.784601
action based on polilcy:[-0.8510927  0.8832017]
action based on polilcy:[-0.9806678   0.40991643]
action based on polilcy:[-0.9357161   0.39646682]
action based on polilcy:[-0.998286    0.29119262]
action based on polilcy:[-0.89860964  0.10226803]
action based on polilcy:[-0.9114035   0.16972482]
action based on polilcy:[-0.62922907  0.14882135]
action based on polilcy:[-0.9573219   0.

action based on polilcy:[-0.94380015  0.8991691 ]
action based on polilcy:[-0.97164744  0.4951441 ]
action based on polilcy:[-0.9714416   0.52631485]
action based on polilcy:[-0.92035985  0.43352666]
action based on polilcy:[-0.7345487   0.85237324]
action based on polilcy:[-0.88138556  0.32598522]
action based on polilcy:[-0.9982269   0.41516492]
action based on polilcy:[-0.9798672   0.35635108]
action based on polilcy:[-0.6746894   0.34398162]
action based on polilcy:[-0.9539487   0.19650835]
action based on polilcy:[-0.83211064  0.24955912]
action based on polilcy:[-0.8751398   0.20037863]
Total T: 11338 Episode Num: 579 Episode T: 12 Reward: -1027.244906
action based on polilcy:[-0.9688614   0.89027596]
action based on polilcy:[-0.90662444  0.58881724]
action based on polilcy:[-0.9849257  0.5695834]
action based on polilcy:[-0.87811244  0.5808155 ]
action based on polilcy:[-0.80781776  0.59818137]
action based on polilcy:[-0.95950115  0.98482496]
action based on polilcy:[-0.8049563

action based on polilcy:[-0.98299354  0.87866616]
action based on polilcy:[-0.9759581  0.4269771]
action based on polilcy:[-0.95042586  0.5780701 ]
action based on polilcy:[-0.72637606  0.78485507]
action based on polilcy:[-0.9768418   0.72818875]
action based on polilcy:[-0.962081    0.47381678]
action based on polilcy:[-0.71015966  0.3612691 ]
action based on polilcy:[-0.8441797   0.28569233]
action based on polilcy:[-0.5539535   0.38215303]
action based on polilcy:[-0.95913297  0.32216915]
action based on polilcy:[-0.9532975   0.21624343]
action based on polilcy:[-0.97390956  0.10670359]
action based on polilcy:[-0.5244422  0.2502072]
action based on polilcy:[-0.5362563   0.20957099]
Total T: 11521 Episode Num: 593 Episode T: 14 Reward: -800.526668
action based on polilcy:[-0.9876108   0.88112843]
action based on polilcy:[-0.9727175  0.5184908]
action based on polilcy:[-0.988685   0.5432323]
action based on polilcy:[-0.9003129  0.4502812]
action based on polilcy:[-0.9140782   0.4174

action based on polilcy:[-0.9899668  0.833274 ]
action based on polilcy:[-0.97382057  0.87368894]
action based on polilcy:[-0.93562824  0.477835  ]
action based on polilcy:[-0.9596616   0.57378703]
action based on polilcy:[-0.7017559   0.43093172]
action based on polilcy:[-0.73341835  0.35113263]
action based on polilcy:[-0.9602389   0.22986755]
action based on polilcy:[-0.15251264  0.3374176 ]
action based on polilcy:[-0.33599174  0.32054245]
Total T: 11691 Episode Num: 607 Episode T: 9 Reward: -839.250623
action based on polilcy:[-0.98902225  0.83269763]
action based on polilcy:[-0.9692148   0.46740964]
action based on polilcy:[-0.8080199   0.37715676]
action based on polilcy:[-0.9564405  0.2682051]
action based on polilcy:[-0.9897592  0.1667151]
action based on polilcy:[-0.9295605   0.07551202]
action based on polilcy:[-0.87713885  0.12442283]
action based on polilcy:[-0.8033481   0.24033712]
action based on polilcy:[-0.9906445  0.2660997]
action based on polilcy:[-0.9867035   0.214

action based on polilcy:[-0.9943113  0.8156321]
action based on polilcy:[-0.9654295   0.47057936]
action based on polilcy:[-0.91168845  0.5178657 ]
action based on polilcy:[-0.86702377  0.39944133]
action based on polilcy:[-0.714532   0.4212985]
action based on polilcy:[-0.87147987  0.50271606]
action based on polilcy:[-0.7120701  0.5683396]
action based on polilcy:[-0.93562526  0.49813497]
action based on polilcy:[-0.770514    0.33972064]
action based on polilcy:[-0.996897    0.18481717]
action based on polilcy:[-0.7543881   0.12459607]
action based on polilcy:[-0.9439764   0.18957427]
action based on polilcy:[-0.9955807   0.02054312]
Total T: 11854 Episode Num: 620 Episode T: 13 Reward: -801.184381
action based on polilcy:[-0.9953103   0.83221006]
action based on polilcy:[-0.9726655   0.44947788]
action based on polilcy:[-0.90797603  0.47545847]
action based on polilcy:[-0.9788148  0.6320907]
action based on polilcy:[-0.80348647  0.6173894 ]
action based on polilcy:[-0.95271516  0.47

Total T: 12093 Episode Num: 634 Episode T: 47 Reward: -7254.621853
Total T: 12120 Episode Num: 635 Episode T: 27 Reward: -2623.738545
Total T: 12140 Episode Num: 636 Episode T: 20 Reward: -2617.924521
Total T: 12154 Episode Num: 637 Episode T: 14 Reward: -2076.556464
Total T: 12203 Episode Num: 638 Episode T: 49 Reward: -7399.086437
Total T: 12251 Episode Num: 639 Episode T: 48 Reward: -7288.619343
Total T: 12300 Episode Num: 640 Episode T: 49 Reward: -7216.633764
Total T: 12320 Episode Num: 641 Episode T: 20 Reward: -3062.278777
Total T: 12365 Episode Num: 642 Episode T: 45 Reward: -7321.868503
Total T: 12374 Episode Num: 643 Episode T: 9 Reward: -1279.363096
Total T: 12412 Episode Num: 644 Episode T: 38 Reward: -5765.154787
Total T: 12461 Episode Num: 645 Episode T: 49 Reward: -7080.793673
action based on polilcy:[0.9969354 0.9999999]
action based on polilcy:[0.9956007 0.9999996]
action based on polilcy:[0.997848  0.9999279]
Total T: 12503 Episode Num: 646 Episode T: 42 Reward: -6194

action based on polilcy:[-0.94926876  0.87726694]
action based on polilcy:[-0.85951805  0.5961046 ]
action based on polilcy:[-0.90458447  0.5460454 ]
action based on polilcy:[-0.9060304  0.755107 ]
action based on polilcy:[-0.720808    0.60397375]
action based on polilcy:[-0.8980604   0.55999374]
action based on polilcy:[-0.22894785  0.6403371 ]
action based on polilcy:[-0.90674794  0.5631604 ]
action based on polilcy:[-0.7407861   0.51068175]
action based on polilcy:[-0.6552516   0.42176795]
action based on polilcy:[-0.82609546  0.3483513 ]
action based on polilcy:[-0.957878    0.25254455]
action based on polilcy:[-0.5516921   0.34929854]
action based on polilcy:[-0.96792006  0.24024259]
action based on polilcy:[-0.9864379 -0.0376695]
action based on polilcy:[-0.9497221   0.14344984]
Total T: 12654 Episode Num: 658 Episode T: 16 Reward: -736.179339
action based on polilcy:[-0.8909832   0.86814636]
action based on polilcy:[-0.5272069  0.7396337]
action based on polilcy:[-0.7095636  0.5

action based on polilcy:[-0.96131337  0.85199153]
action based on polilcy:[0.06677739 0.99318075]
action based on polilcy:[-0.9520325   0.53845596]
action based on polilcy:[-0.76946145  0.72190976]
action based on polilcy:[-0.49929056  0.7216711 ]
action based on polilcy:[-0.5893749   0.43953466]
action based on polilcy:[-0.5897453   0.54509735]
action based on polilcy:[-0.99477684  0.4039867 ]
action based on polilcy:[-0.951432   0.2529452]
action based on polilcy:[-0.7379862  0.2982949]
action based on polilcy:[-0.463262   0.4149197]
action based on polilcy:[-0.6830447   0.43994632]
action based on polilcy:[-0.5576271  0.3565843]
action based on polilcy:[-0.62930673  0.5085392 ]
Total T: 12823 Episode Num: 674 Episode T: 14 Reward: -1212.518918
action based on polilcy:[-0.9103383  0.8433653]
action based on polilcy:[-0.88147956  0.81178224]
action based on polilcy:[-0.63699263  0.4980857 ]
action based on polilcy:[-0.5533509   0.51874113]
action based on polilcy:[-0.8281833  0.712810

action based on polilcy:[-0.9222709   0.82785034]
action based on polilcy:[-0.54600894  0.4801445 ]
action based on polilcy:[-0.85128963  0.55925083]
action based on polilcy:[-0.30839387  0.5342513 ]
action based on polilcy:[-0.07214434  0.6895502 ]
action based on polilcy:[-0.6013646   0.48034778]
action based on polilcy:[-0.9999785   0.41316208]
action based on polilcy:[-0.932225    0.25708926]
action based on polilcy:[-0.906969    0.07700752]
Total T: 12981 Episode Num: 688 Episode T: 9 Reward: -810.380760
action based on polilcy:[-0.907654    0.83332694]
action based on polilcy:[-0.55553705  0.4891489 ]
action based on polilcy:[-0.82967985  0.66432965]
action based on polilcy:[-0.9412855   0.42247432]
action based on polilcy:[-0.9931278   0.27804813]
action based on polilcy:[-0.9893567   0.17933792]
action based on polilcy:[-0.95436054  0.14284557]
action based on polilcy:[-0.98369694  0.09792186]
action based on polilcy:[-0.8732247   0.21021271]
Total T: 12990 Episode Num: 689 Epi

action based on polilcy:[-0.85017633  0.8362181 ]
action based on polilcy:[-0.8421751  0.6811032]
action based on polilcy:[-0.1668729  0.5672945]
action based on polilcy:[-0.7823334  0.6595756]
action based on polilcy:[-0.07630157  0.63134   ]
action based on polilcy:[-0.9722966  0.6313876]
action based on polilcy:[-0.8069104  0.6585113]
action based on polilcy:[-0.1598426  0.5916932]
action based on polilcy:[-0.90382206  0.6495185 ]
action based on polilcy:[0.13701917 0.97595966]
action based on polilcy:[-0.0912136  0.6739101]
action based on polilcy:[0.25892788 0.9432953 ]
action based on polilcy:[0.21281864 0.99856174]
action based on polilcy:[-0.4033313  0.5798365]
action based on polilcy:[-0.5697249   0.42210028]
action based on polilcy:[-0.9999991   0.41734007]
action based on polilcy:[-0.48598936  0.31556454]
action based on polilcy:[-0.99994457  0.33112708]
action based on polilcy:[-0.9995224   0.18048918]
Total T: 13167 Episode Num: 704 Episode T: 19 Reward: -2297.819556
actio

action based on polilcy:[-0.6180296   0.39029044]
Total T: 13325 Episode Num: 717 Episode T: 8 Reward: -799.092330
action based on polilcy:[-0.857892   0.8639151]
action based on polilcy:[-0.588004   0.5604552]
action based on polilcy:[-0.99942213  0.67522323]
action based on polilcy:[-0.8097976   0.40938312]
action based on polilcy:[-0.9997456  0.2307272]
action based on polilcy:[-0.9999002   0.20577906]
action based on polilcy:[-0.5462866   0.35706025]
action based on polilcy:[-0.541322    0.40515083]
action based on polilcy:[-0.58528244  0.5118919 ]
Total T: 13334 Episode Num: 718 Episode T: 9 Reward: -756.437623
action based on polilcy:[-0.8773269  0.8632752]
action based on polilcy:[-0.538242    0.58206344]
action based on polilcy:[-0.3236811  0.7704549]
action based on polilcy:[-0.69153094  0.5326792 ]
action based on polilcy:[-0.40551722  0.51166606]
action based on polilcy:[-0.999804    0.46753162]
action based on polilcy:[-0.9740811   0.38188973]
action based on polilcy:[-0.99

action based on polilcy:[-0.86979735  0.8694588 ]
action based on polilcy:[-0.6553767  0.7719561]
action based on polilcy:[-0.6456709  0.5611762]
action based on polilcy:[-0.7232387  0.5371051]
action based on polilcy:[-0.2439039   0.68340766]
action based on polilcy:[-0.23454992  0.66297996]
action based on polilcy:[-0.78605133  0.8064362 ]
action based on polilcy:[-0.39578682  0.6598617 ]
action based on polilcy:[-0.44818467  0.52229536]
action based on polilcy:[-0.99995726  0.5091691 ]
action based on polilcy:[-0.9999998  0.3010456]
Total T: 13486 Episode Num: 731 Episode T: 11 Reward: -1446.857930
action based on polilcy:[-0.8290003  0.8434948]
action based on polilcy:[-0.6330424  0.7266462]
action based on polilcy:[-0.45249853  0.55134284]
action based on polilcy:[-0.9888218   0.53425336]
action based on polilcy:[-0.999961    0.37166756]
action based on polilcy:[-0.9922984   0.27020726]
action based on polilcy:[-0.6100439   0.36072516]
action based on polilcy:[-0.91318166  0.29977

action based on polilcy:[-0.85781914  0.86407757]
action based on polilcy:[-0.25437984  0.5520317 ]
action based on polilcy:[0.05082614 0.9999953 ]
action based on polilcy:[-0.04665489  0.9999321 ]
action based on polilcy:[-0.0422801  0.721624 ]
action based on polilcy:[-0.14147738  0.99999976]
action based on polilcy:[-0.28070873  0.8095449 ]
action based on polilcy:[-0.453333    0.57670844]
action based on polilcy:[0.9788664  0.99998474]
action based on polilcy:[-0.7816614   0.60833883]
action based on polilcy:[0.98397285 0.9994937 ]
action based on polilcy:[-0.30705988  0.6512001 ]
action based on polilcy:[0.98313785 0.7243298 ]
action based on polilcy:[ 0.67891186 -0.01268046]
action based on polilcy:[0.94810575 0.6459561 ]
Total T: 13644 Episode Num: 745 Episode T: 15 Reward: -1911.489296
action based on polilcy:[-0.80737466  0.842856  ]
action based on polilcy:[-0.24384989  0.5405007 ]
action based on polilcy:[-0.47551602  0.65522397]
action based on polilcy:[-0.50217605  0.50888

action based on polilcy:[-0.7483942   0.84636396]
action based on polilcy:[-0.04012915  0.99999994]
action based on polilcy:[-0.6058971   0.48132834]
action based on polilcy:[-0.07854964  0.7930237 ]
action based on polilcy:[-0.692765   0.6825591]
action based on polilcy:[-0.76095414  0.486448  ]
action based on polilcy:[-0.95357835  0.37003323]
action based on polilcy:[-0.7693291   0.33742753]
action based on polilcy:[-0.6959038  0.3512756]
action based on polilcy:[-0.8042774   0.32628626]
action based on polilcy:[-0.99981326  0.18330973]
action based on polilcy:[-0.99978256  0.11923106]
Total T: 13796 Episode Num: 759 Episode T: 12 Reward: -1103.900624
action based on polilcy:[-0.8134983  0.8638038]
action based on polilcy:[-0.37096995  0.56179667]
action based on polilcy:[-0.66804725  0.8262982 ]
action based on polilcy:[-0.9989359  0.6400832]
action based on polilcy:[-0.43461677  0.49432504]
action based on polilcy:[-0.9613326   0.39565685]
action based on polilcy:[-0.3879861   0.4

action based on polilcy:[-0.80931234  0.859524  ]
action based on polilcy:[-0.52651286  0.5177591 ]
action based on polilcy:[0.16443512 0.96817005]
action based on polilcy:[0.27483281 0.9997988 ]
action based on polilcy:[-0.08859368  0.9593961 ]
action based on polilcy:[-0.6874714  0.6970362]
action based on polilcy:[-0.99970704  0.7110171 ]
action based on polilcy:[-0.73553395  0.7343575 ]
action based on polilcy:[-0.37145987  0.58931184]
action based on polilcy:[0.05961485 0.9999969 ]
action based on polilcy:[-0.45320866  0.5100473 ]
action based on polilcy:[-0.7691971   0.56683016]
action based on polilcy:[-0.55736744  0.5088252 ]
action based on polilcy:[-0.45467058  0.43370184]
action based on polilcy:[-0.9995851  0.3689555]
action based on polilcy:[-0.9999978  0.1790923]
Total T: 13971 Episode Num: 775 Episode T: 16 Reward: -2009.461588
action based on polilcy:[-0.7815855   0.87622094]
action based on polilcy:[-0.18016279  0.966818  ]
action based on polilcy:[-0.39374286  0.54117

action based on polilcy:[-0.82742935  0.95500237]
action based on polilcy:[-0.65432966  0.72107744]
action based on polilcy:[-0.9815997   0.67310613]
action based on polilcy:[-0.4858415  0.4964647]
action based on polilcy:[-0.9926049  0.5299469]
action based on polilcy:[-0.99161154  0.31264314]
action based on polilcy:[-0.9909673   0.22784688]
action based on polilcy:[-0.9997302   0.05391468]
Total T: 14603 Episode Num: 798 Episode T: 8 Reward: -738.937342
action based on polilcy:[-0.81160474  0.95139414]
action based on polilcy:[-0.4150134   0.60374427]
action based on polilcy:[-0.6260028  0.5395416]
action based on polilcy:[-0.90808886  0.64308405]
action based on polilcy:[-0.5862553  0.9161099]
action based on polilcy:[-0.82966536  0.4954516 ]
action based on polilcy:[-0.9192592   0.34774682]
action based on polilcy:[-0.99591076  0.2369244 ]
action based on polilcy:[-0.99893427  0.16682349]
action based on polilcy:[-0.9470153   0.19389382]
action based on polilcy:[-0.9999688   0.254

action based on polilcy:[-0.54795206  0.63235843]
action based on polilcy:[-0.98287684  0.87543476]
action based on polilcy:[-0.78172684  0.40805793]
action based on polilcy:[-0.9921931   0.82005215]
action based on polilcy:[-0.5119051  0.5645331]
action based on polilcy:[-0.9994179   0.64816093]
action based on polilcy:[-0.9235544   0.33574176]
action based on polilcy:[-0.749449    0.31148592]
action based on polilcy:[-0.9997682   0.19279031]
Total T: 14755 Episode Num: 812 Episode T: 13 Reward: -1119.937827
action based on polilcy:[-0.6964891  0.9695397]
action based on polilcy:[-0.7461382   0.77954835]
action based on polilcy:[-0.47695374  0.94541806]
action based on polilcy:[-0.79814917  0.59362066]
action based on polilcy:[-0.7825935   0.40540895]
action based on polilcy:[-0.9999805   0.33506322]
action based on polilcy:[-0.9912566   0.23553447]
action based on polilcy:[-0.6683297  0.2854612]
action based on polilcy:[-0.99150693  0.23417008]
action based on polilcy:[-0.7318421   0

action based on polilcy:[-0.7171476  0.9850011]
action based on polilcy:[-0.71943474  0.8548517 ]
action based on polilcy:[-0.7132126   0.46560583]
action based on polilcy:[-0.99999547  0.6002644 ]
action based on polilcy:[-0.6059923  0.5693115]
action based on polilcy:[-0.5685482   0.47888386]
action based on polilcy:[-0.7130183   0.50400984]
action based on polilcy:[-0.96010256  0.3783941 ]
action based on polilcy:[-0.93121487  0.2944364 ]
Total T: 14908 Episode Num: 825 Episode T: 9 Reward: -840.721201
action based on polilcy:[-0.72889376  0.9773642 ]
action based on polilcy:[-0.85752136  0.9375103 ]
action based on polilcy:[-0.946996    0.55850685]
action based on polilcy:[-0.9602608   0.49037537]
action based on polilcy:[-0.9889756   0.42050532]
action based on polilcy:[-0.7997755   0.34523842]
action based on polilcy:[-0.99997085  0.20563829]
action based on polilcy:[-0.49232838  0.29922396]
action based on polilcy:[-0.9999652   0.20624399]
Total T: 14917 Episode Num: 826 Episode

action based on polilcy:[-0.7114899   0.98612934]
action based on polilcy:[-0.7294607   0.78109413]
action based on polilcy:[-0.8260705  0.9019739]
action based on polilcy:[-0.673403    0.47436625]
action based on polilcy:[-0.5441756  0.5067056]
action based on polilcy:[-0.6242708  0.4764429]
action based on polilcy:[-0.5505181   0.71501106]
action based on polilcy:[-0.97081935  0.5342652 ]
action based on polilcy:[-0.99968815  0.35199103]
action based on polilcy:[-0.80774885  0.31478992]
action based on polilcy:[-0.99983406  0.33190024]
Total T: 15067 Episode Num: 838 Episode T: 11 Reward: -951.819761
action based on polilcy:[-0.77070653  0.9833999 ]
action based on polilcy:[-0.6922071  0.7036905]
action based on polilcy:[-0.588289    0.54994667]
action based on polilcy:[-0.6738787  0.6414169]
action based on polilcy:[-0.99999195  0.55630887]
action based on polilcy:[-0.5575479   0.45153594]
action based on polilcy:[-0.4947061   0.74645174]
action based on polilcy:[-0.60224366  0.8774

action based on polilcy:[-0.60673785  0.986805  ]
action based on polilcy:[-0.62159395  0.5566245 ]
action based on polilcy:[-0.69884646  0.4879381 ]
action based on polilcy:[-0.8293712   0.42960685]
action based on polilcy:[-0.9747635   0.31321916]
action based on polilcy:[-0.48617285  0.3825524 ]
action based on polilcy:[-0.98921204  0.30085707]
action based on polilcy:[-0.6039264   0.32747477]
action based on polilcy:[-0.9937513   0.24894339]
action based on polilcy:[-0.9994724   0.16743778]
Total T: 15247 Episode Num: 853 Episode T: 10 Reward: -366.183387
action based on polilcy:[-0.6415453  0.9868781]
action based on polilcy:[-0.07291366  0.9813815 ]
action based on polilcy:[-0.6945929   0.62766236]
action based on polilcy:[-0.9998994   0.54122347]
action based on polilcy:[-0.99999976  0.3854406 ]
action based on polilcy:[-0.5710323  0.3462719]
action based on polilcy:[-0.99764013  0.24703512]
action based on polilcy:[-0.9855218   0.20602334]
action based on polilcy:[-0.72527057  

action based on polilcy:[0.1030222  0.94379807]
action based on polilcy:[-0.6783227  0.6154798]
action based on polilcy:[-0.68928653  0.5181768 ]
action based on polilcy:[0.11104665 0.86926764]
action based on polilcy:[-0.664232    0.58855295]
action based on polilcy:[-0.7070019  0.5079688]
action based on polilcy:[-0.76657504  0.47949156]
action based on polilcy:[-0.9994873   0.34369144]
action based on polilcy:[-0.9893211   0.44185424]
action based on polilcy:[-0.98030365  0.33052176]
action based on polilcy:[-0.5879579   0.35844564]
action based on polilcy:[-0.99155104  0.59422195]
action based on polilcy:[-0.7669474   0.41829836]
Total T: 15420 Episode Num: 868 Episode T: 13 Reward: -1150.722808
action based on polilcy:[-0.02526642  0.9379537 ]
action based on polilcy:[-0.6839525   0.63135076]
action based on polilcy:[-0.9897647  0.5975196]
action based on polilcy:[-0.7758516   0.43060452]
action based on polilcy:[-0.8269763   0.37899607]
action based on polilcy:[-0.99978316  0.284

action based on polilcy:[0.08938234 0.8929119 ]
action based on polilcy:[-0.39613843  0.93598384]
action based on polilcy:[-0.81300247  0.7013042 ]
action based on polilcy:[-0.46820018  0.52441347]
action based on polilcy:[-0.9992188  0.6191808]
action based on polilcy:[-0.990862   0.5284146]
action based on polilcy:[-0.5897825   0.49850014]
action based on polilcy:[-0.9999626   0.43583778]
action based on polilcy:[-0.99297225  0.3405921 ]
Total T: 15590 Episode Num: 883 Episode T: 9 Reward: -1311.243685
action based on polilcy:[0.16162778 0.89438605]
action based on polilcy:[0.50382257 0.993557  ]
action based on polilcy:[-0.25553736  0.9968749 ]
action based on polilcy:[-0.7261053   0.71793926]
action based on polilcy:[-0.53414917  0.52467465]
action based on polilcy:[-0.83668375  0.4424954 ]
action based on polilcy:[-0.75371647  0.40835273]
action based on polilcy:[-0.77228284  0.37832502]
action based on polilcy:[-0.9962388   0.40619352]
action based on polilcy:[-0.7137846   0.4010

action based on polilcy:[0.09502966 0.8438628 ]
action based on polilcy:[0.16691206 0.95934683]
action based on polilcy:[-0.9858575  0.8881219]
action based on polilcy:[-0.7146045  0.468291 ]
action based on polilcy:[-0.99996233  0.40227184]
action based on polilcy:[-0.61759746  0.3742153 ]
action based on polilcy:[-0.8990954   0.32544106]
action based on polilcy:[-0.99224436  0.26706514]
action based on polilcy:[-0.9985746   0.23147744]
action based on polilcy:[-0.9998239   0.14690158]
Total T: 15766 Episode Num: 900 Episode T: 10 Reward: -958.166334
action based on polilcy:[0.04139847 0.8453053 ]
action based on polilcy:[0.4621713  0.99880195]
action based on polilcy:[0.0222971  0.74134994]
action based on polilcy:[-0.5255799  0.7871028]
action based on polilcy:[0.04406668 0.7066586 ]
action based on polilcy:[0.3401201  0.99401635]
action based on polilcy:[0.00534037 0.87634754]
action based on polilcy:[-0.43807852  0.83276355]
action based on polilcy:[-0.6529937   0.51400244]
action

action based on polilcy:[0.13512465 0.86054003]
action based on polilcy:[-0.5732474  0.5226997]
action based on polilcy:[-0.59748954  0.523945  ]
action based on polilcy:[-0.41801998  0.5700345 ]
action based on polilcy:[-0.6662131  0.5239262]
action based on polilcy:[-0.9101044   0.67914975]
action based on polilcy:[-0.55088544  0.5046833 ]
action based on polilcy:[-0.43160683  0.5840745 ]
action based on polilcy:[-0.9838949  0.5300256]
action based on polilcy:[-0.4290506  0.5790058]
action based on polilcy:[-0.6613847   0.57042706]
action based on polilcy:[-0.99261636  0.73450345]
action based on polilcy:[-0.5083586  0.5025489]
action based on polilcy:[-0.8352365  0.4853338]
action based on polilcy:[-0.99998635  0.39471847]
action based on polilcy:[-0.67334294  0.4267612 ]
action based on polilcy:[-0.56322473  0.6664798 ]
Total T: 15927 Episode Num: 913 Episode T: 17 Reward: -1303.104728
action based on polilcy:[0.1395857 0.8691306]
action based on polilcy:[-0.3188182  0.8127729]
act

action based on polilcy:[0.00685778 0.9693716 ]
action based on polilcy:[-0.84583783  0.92658025]
action based on polilcy:[-0.47239923  0.5792478 ]
action based on polilcy:[-0.9228753   0.47698092]
action based on polilcy:[-0.42544273  0.60040486]
action based on polilcy:[-0.48194417  0.5564244 ]
action based on polilcy:[-0.55099326  0.5015781 ]
action based on polilcy:[-0.9824282   0.45647407]
action based on polilcy:[-0.99312365  0.2610867 ]
Total T: 16551 Episode Num: 938 Episode T: 9 Reward: -807.432295
action based on polilcy:[0.03486883 0.97550374]
action based on polilcy:[-0.5497681   0.93461883]
action based on polilcy:[-0.65407586  0.8373586 ]
action based on polilcy:[-0.48526654  0.58014125]
action based on polilcy:[-0.8580566  0.6914622]
action based on polilcy:[-0.8440868   0.41693076]
action based on polilcy:[-0.9025488   0.29576513]
action based on polilcy:[-0.44397417  0.46095526]
action based on polilcy:[-0.6429311   0.40779117]
action based on polilcy:[-0.94130474  0.2

action based on polilcy:[0.04039392 0.9498247 ]
action based on polilcy:[-0.5625767   0.61289865]
action based on polilcy:[-0.42789733  0.6050444 ]
action based on polilcy:[-0.24571192  0.52266884]
action based on polilcy:[-0.82714856  0.29331893]
action based on polilcy:[-0.997352    0.02744031]
action based on polilcy:[-0.97928894  0.12961498]
Total T: 16710 Episode Num: 954 Episode T: 7 Reward: -562.748006
action based on polilcy:[0.0724893 0.9511161]
action based on polilcy:[-0.6349095   0.84291637]
action based on polilcy:[-0.47175583  0.55948436]
action based on polilcy:[-0.4674172  0.7402375]
action based on polilcy:[-0.5359529  0.5605519]
action based on polilcy:[-0.48752278  0.5632439 ]
action based on polilcy:[-0.803426   0.3167414]
action based on polilcy:[-0.9956734   0.12551174]
action based on polilcy:[-0.92039615  0.20085566]
Total T: 16719 Episode Num: 955 Episode T: 9 Reward: -822.318796
action based on polilcy:[0.07138005 0.94858384]
action based on polilcy:[-0.542294

action based on polilcy:[0.03490337 0.9305775 ]
action based on polilcy:[-0.58327544  0.799572  ]
action based on polilcy:[-0.58001864  0.75138557]
action based on polilcy:[-0.54353285  0.6316676 ]
action based on polilcy:[-0.28819603  0.6297418 ]
action based on polilcy:[-0.44003633  0.450174  ]
action based on polilcy:[-0.30269954  0.42121235]
action based on polilcy:[-0.8519515   0.24906884]
action based on polilcy:[-0.7890905   0.22086015]
action based on polilcy:[-0.9671028   0.13826518]
Total T: 16872 Episode Num: 969 Episode T: 10 Reward: -796.502096
action based on polilcy:[0.10659709 0.9341638 ]
action based on polilcy:[-0.525311    0.83602875]
action based on polilcy:[-0.45238075  0.52713656]
action based on polilcy:[-0.41051954  0.4016438 ]
action based on polilcy:[-0.6904311   0.32752284]
action based on polilcy:[-0.9374708   0.26797944]
action based on polilcy:[-0.95631266  0.21939118]
action based on polilcy:[-0.99296623  0.14815268]
Total T: 16880 Episode Num: 970 Episod

action based on polilcy:[0.09908643 0.9251394 ]
action based on polilcy:[0.3115561 0.9987787]
action based on polilcy:[-0.2006852  0.9206658]
action based on polilcy:[0.40708885 1.        ]
action based on polilcy:[-0.38455743  0.94752896]
action based on polilcy:[-0.5766705  0.8478812]
action based on polilcy:[-0.54627675  0.6462548 ]
action based on polilcy:[-0.49707085  0.7596302 ]
action based on polilcy:[-0.50238836  0.6535299 ]
action based on polilcy:[-0.4113849  0.6979158]
action based on polilcy:[-0.5588601  0.7316897]
action based on polilcy:[-0.573969    0.54801375]
action based on polilcy:[-0.525189   0.6367246]
action based on polilcy:[-0.4663342  0.8198323]
action based on polilcy:[-0.5413542   0.56695473]
action based on polilcy:[-0.48135105  0.5977068 ]
action based on polilcy:[-0.6154417   0.35182822]
action based on polilcy:[-0.9390049  0.2344681]
action based on polilcy:[-0.9756415   0.11648332]
action based on polilcy:[-0.9599355   0.14921981]
Total T: 17042 Episode

action based on polilcy:[0.14228082 0.8646761 ]
action based on polilcy:[-0.73460674  0.7057041 ]
action based on polilcy:[-0.7490071  0.7320883]
action based on polilcy:[-0.406633    0.52346396]
action based on polilcy:[-0.25844508  0.5277599 ]
action based on polilcy:[-0.23801939  0.44986433]
action based on polilcy:[-0.3150215   0.43792534]
action based on polilcy:[-0.13929823  0.41867787]
Total T: 17184 Episode Num: 997 Episode T: 8 Reward: -1040.275281
action based on polilcy:[0.10786393 0.8805789 ]
action based on polilcy:[-0.8717798   0.77175516]
action based on polilcy:[-0.44343355  0.55228287]
action based on polilcy:[-0.2852633  0.5977061]
action based on polilcy:[-0.46113494  0.35044837]
action based on polilcy:[-0.85714114  0.23686822]
action based on polilcy:[-0.37635526  0.39503455]
action based on polilcy:[-0.47815225  0.44754642]
Total T: 17192 Episode Num: 998 Episode T: 8 Reward: -947.131273
action based on polilcy:[0.12445807 0.8842573 ]
action based on polilcy:[-0.6

action based on polilcy:[0.08295797 0.88504624]
action based on polilcy:[0.25698835 0.9990841 ]
action based on polilcy:[-0.53641886  0.52618873]
action based on polilcy:[-0.6043117   0.82617986]
action based on polilcy:[-0.36254176  0.6325002 ]
action based on polilcy:[-0.41415069  0.3955716 ]
action based on polilcy:[-0.9156998   0.21476929]
action based on polilcy:[-0.96810186  0.15010981]
action based on polilcy:[-0.99791646  0.07694298]
Total T: 17357 Episode Num: 1012 Episode T: 9 Reward: -954.174868
action based on polilcy:[0.07774342 0.8898744 ]
action based on polilcy:[-0.5503744  0.5608572]
action based on polilcy:[-0.12173722  0.7089411 ]
action based on polilcy:[-0.59577084  0.59504   ]
action based on polilcy:[-0.5171608  0.4826797]
action based on polilcy:[-0.39249936  0.48550102]
action based on polilcy:[-0.4870876   0.45989564]
action based on polilcy:[-0.43181914  0.44587716]
action based on polilcy:[-0.44970223  0.4258634 ]
action based on polilcy:[-0.64957887  0.3528

action based on polilcy:[0.12723047 0.89758545]
action based on polilcy:[-0.87787336  0.73716223]
action based on polilcy:[-0.53141505  0.5063175 ]
action based on polilcy:[-0.6771965   0.48981696]
action based on polilcy:[-0.55522066  0.54061997]
action based on polilcy:[-0.5019662   0.48625204]
action based on polilcy:[-0.45781416  0.43442467]
action based on polilcy:[-0.44832656  0.4082169 ]
action based on polilcy:[-0.44741923  0.3602077 ]
action based on polilcy:[-0.40533778  0.31046662]
action based on polilcy:[-0.44330615  0.34841716]
action based on polilcy:[-0.46814197  0.40229356]
action based on polilcy:[-0.35749698  0.49830684]
action based on polilcy:[-0.4971474  0.3545553]
Total T: 17537 Episode Num: 1027 Episode T: 14 Reward: -1040.289007
action based on polilcy:[0.10563578 0.86693   ]
action based on polilcy:[-0.60952497  0.6172006 ]
action based on polilcy:[-0.5338222  0.5217511]
action based on polilcy:[-0.42786288  0.58859086]
action based on polilcy:[-0.43267804  0.

action based on polilcy:[0.01992571 0.8689236 ]
action based on polilcy:[-0.86449534  0.7086949 ]
action based on polilcy:[-0.5620491   0.60533476]
action based on polilcy:[-0.46423042  0.5456389 ]
action based on polilcy:[-0.28500104  0.52072954]
action based on polilcy:[-0.29167402  0.35396174]
action based on polilcy:[-0.12033725  0.37496838]
Total T: 17679 Episode Num: 1040 Episode T: 7 Reward: -960.324042
action based on polilcy:[0.03637932 0.8247322 ]
action based on polilcy:[-0.89653695  0.7358907 ]
action based on polilcy:[-0.413431   0.5375736]
action based on polilcy:[-0.3835513  0.4001743]
action based on polilcy:[-0.42018896  0.2332539 ]
action based on polilcy:[-0.906682    0.03337797]
action based on polilcy:[-0.9533654  0.122269 ]
Total T: 17686 Episode Num: 1041 Episode T: 7 Reward: -713.269989
action based on polilcy:[-0.00358921  0.86663926]
action based on polilcy:[0.24953161 0.99999505]
action based on polilcy:[-0.12000439  0.9709685 ]
action based on polilcy:[0.202

action based on polilcy:[0.03639663 0.8518679 ]
action based on polilcy:[0.10612444 0.9917581 ]
action based on polilcy:[0.0310249  0.99994713]
action based on polilcy:[0.12440585 0.9550645 ]
action based on polilcy:[-0.846833    0.77882457]
action based on polilcy:[-0.6446612   0.60689753]
action based on polilcy:[-0.38041207  0.51932365]
action based on polilcy:[-0.43463132  0.4206074 ]
action based on polilcy:[-0.14733224  0.4348242 ]
action based on polilcy:[-0.78029364  0.24037765]
action based on polilcy:[-0.6135682   0.27519378]
action based on polilcy:[-0.24095158  0.33046764]
Total T: 17850 Episode Num: 1053 Episode T: 12 Reward: -1490.186455
action based on polilcy:[0.04140836 0.85801196]
action based on polilcy:[-0.6097827  0.6198056]
action based on polilcy:[-0.5001738   0.52983177]
action based on polilcy:[-0.42120355  0.458184  ]
action based on polilcy:[-0.371525    0.41975188]
action based on polilcy:[-0.43549776  0.4761013 ]
action based on polilcy:[-0.1792584   0.6281

Total T: 18056 Episode Num: 1068 Episode T: 55 Reward: -7535.362495
Total T: 18106 Episode Num: 1069 Episode T: 50 Reward: -7261.606598
Total T: 18120 Episode Num: 1070 Episode T: 14 Reward: -2234.512565
Total T: 18135 Episode Num: 1071 Episode T: 15 Reward: -1787.135833
Total T: 18180 Episode Num: 1072 Episode T: 45 Reward: -7286.878684
Total T: 18188 Episode Num: 1073 Episode T: 8 Reward: -1008.639887
Total T: 18233 Episode Num: 1074 Episode T: 45 Reward: -7319.289891
Total T: 18279 Episode Num: 1075 Episode T: 46 Reward: -7470.368464
Total T: 18292 Episode Num: 1076 Episode T: 13 Reward: -1683.745911
Total T: 18322 Episode Num: 1077 Episode T: 30 Reward: -4401.053814
Total T: 18368 Episode Num: 1078 Episode T: 46 Reward: -6999.473470
Total T: 18377 Episode Num: 1079 Episode T: 9 Reward: -1233.156372
Total T: 18398 Episode Num: 1080 Episode T: 21 Reward: -2563.440446
Total T: 18417 Episode Num: 1081 Episode T: 19 Reward: -2347.784772
Total T: 18464 Episode Num: 1082 Episode T: 47 Rew

action based on polilcy:[0.12001596 0.8429123 ]
action based on polilcy:[-0.5438551   0.61805993]
action based on polilcy:[-0.6342745  0.673726 ]
action based on polilcy:[-0.71277535  0.4836133 ]
action based on polilcy:[0.22583623 0.9901667 ]
action based on polilcy:[-0.7680502   0.71830475]
action based on polilcy:[0.2782792  0.99999917]
action based on polilcy:[0.1259842 0.9999997]
action based on polilcy:[-0.10702254  1.        ]
action based on polilcy:[0.4664615 0.9997421]
action based on polilcy:[-0.19761586  0.98996025]
action based on polilcy:[0.14530118 0.99998516]
action based on polilcy:[-0.7134813   0.79563516]
action based on polilcy:[-0.65080994  0.47949848]
action based on polilcy:[-0.86293304  0.68626463]
action based on polilcy:[-0.63636523  0.41088548]
action based on polilcy:[-0.60662484  0.3865177 ]
action based on polilcy:[-0.6770791  0.3256958]
action based on polilcy:[-0.7179829   0.32534382]
action based on polilcy:[-0.61358947  0.38235584]
action based on poli

action based on polilcy:[0.13990828 0.8599832 ]
action based on polilcy:[-0.9358761  0.851572 ]
action based on polilcy:[-0.47246388  0.5606346 ]
action based on polilcy:[-0.48972854  0.48381248]
action based on polilcy:[-0.3346873   0.46582922]
action based on polilcy:[-0.8510242  0.7799075]
action based on polilcy:[-0.5501276   0.48758566]
action based on polilcy:[-0.51667476  0.41851923]
action based on polilcy:[-0.56631786  0.38706928]
action based on polilcy:[-0.45281035  0.5374577 ]
action based on polilcy:[-0.55903435  0.63384485]
action based on polilcy:[-0.6449275   0.45577124]
action based on polilcy:[-0.59364665  0.39163643]
Total T: 18852 Episode Num: 1111 Episode T: 13 Reward: -1231.074751
action based on polilcy:[0.13618936 0.8375976 ]
action based on polilcy:[-0.88482046  0.7907765 ]
action based on polilcy:[-0.4747402  0.5122046]
action based on polilcy:[-0.4779302   0.56419396]
action based on polilcy:[-0.5076003   0.41675928]
action based on polilcy:[-0.5505193  0.564

action based on polilcy:[0.20076367 0.83850867]
action based on polilcy:[-0.77729166  0.7385696 ]
action based on polilcy:[0.3917838 0.9958064]
action based on polilcy:[-0.8311051   0.74808776]
action based on polilcy:[-0.4425051  0.5031784]
action based on polilcy:[-0.5659031   0.48500922]
action based on polilcy:[-0.4676995   0.53646374]
action based on polilcy:[-0.60439456  0.39574265]
action based on polilcy:[-0.51669157  0.42270708]
action based on polilcy:[-0.98535836  0.16174562]
action based on polilcy:[-0.9879466   0.08036361]
Total T: 19012 Episode Num: 1124 Episode T: 11 Reward: -1030.120993
action based on polilcy:[0.16066584 0.8407818 ]
action based on polilcy:[-0.93459517  0.8475119 ]
action based on polilcy:[-0.53241956  0.49102068]
action based on polilcy:[-0.49798745  0.42661384]
action based on polilcy:[-0.37207827  0.53206587]
action based on polilcy:[-0.5676882  0.6812258]
action based on polilcy:[-0.48219466  0.61781484]
action based on polilcy:[-0.5755299  0.48267

action based on polilcy:[0.1871711 0.854066 ]
action based on polilcy:[-0.9343337  0.8682364]
action based on polilcy:[-0.5860512   0.49154422]
action based on polilcy:[-0.6050533  0.6533735]
action based on polilcy:[-0.5180973   0.57932436]
action based on polilcy:[-0.35471132  0.5879857 ]
action based on polilcy:[-0.429832   0.5125598]
action based on polilcy:[-0.4512411  0.4979403]
action based on polilcy:[-0.6633698   0.44128895]
action based on polilcy:[-0.65559167  0.7552005 ]
action based on polilcy:[-0.59023744  0.49160174]
action based on polilcy:[-0.527843   0.4219818]
action based on polilcy:[-0.47399265  0.48029432]
Total T: 19162 Episode Num: 1138 Episode T: 13 Reward: -1300.034554
action based on polilcy:[0.21156885 0.8615977 ]
action based on polilcy:[-0.9429813  0.8552103]
action based on polilcy:[-0.5131297  0.5034896]
action based on polilcy:[-0.37727514  0.7499234 ]
action based on polilcy:[-0.35379666  0.8151706 ]
action based on polilcy:[-0.5528673   0.65395296]
ac

action based on polilcy:[0.17248622 0.8386192 ]
action based on polilcy:[-0.16932854  0.92878026]
action based on polilcy:[-0.7984728  0.7356733]
action based on polilcy:[-0.7470398   0.41210786]
action based on polilcy:[-0.41342533  0.42770165]
action based on polilcy:[-0.45600304  0.42055738]
action based on polilcy:[-0.50590825  0.5127177 ]
action based on polilcy:[-0.5899631   0.43579346]
Total T: 19335 Episode Num: 1152 Episode T: 8 Reward: -1143.531020
action based on polilcy:[0.12174413 0.8377359 ]
action based on polilcy:[-0.7940407  0.8597537]
action based on polilcy:[-0.51660293  0.5370273 ]
action based on polilcy:[-0.5449548   0.46470726]
action based on polilcy:[-0.5998087   0.35547152]
action based on polilcy:[-0.9594633   0.23799185]
action based on polilcy:[-0.2753412   0.31090298]
Total T: 19342 Episode Num: 1153 Episode T: 7 Reward: -639.801762
action based on polilcy:[0.17383651 0.84465724]
action based on polilcy:[-0.34737304  0.92203224]
action based on polilcy:[-0

action based on polilcy:[0.16489735 0.8420439 ]
action based on polilcy:[-0.93413603  0.88559955]
action based on polilcy:[-0.626585   0.6529381]
action based on polilcy:[-0.58453554  0.56634104]
action based on polilcy:[-0.49951348  0.59944797]
action based on polilcy:[-0.7721587   0.40494817]
action based on polilcy:[-0.7181475  0.7798214]
action based on polilcy:[-0.61822957  0.47722572]
action based on polilcy:[-0.5226412   0.41426486]
action based on polilcy:[-0.9889542   0.14689276]
Total T: 19502 Episode Num: 1167 Episode T: 10 Reward: -1218.972738
action based on polilcy:[0.13916473 0.8386791 ]
action based on polilcy:[-0.5966836  0.6594393]
action based on polilcy:[0.39803836 0.9930954 ]
action based on polilcy:[-0.75790983  0.73016787]
action based on polilcy:[-0.48717293  0.523191  ]
action based on polilcy:[-0.49941266  0.4422974 ]
action based on polilcy:[-0.44381312  0.4463681 ]
action based on polilcy:[-0.29731724  0.34918156]
action based on polilcy:[-0.5650737   0.2267

action based on polilcy:[0.18465874 0.8357035 ]
action based on polilcy:[-0.79050034  0.8010295 ]
action based on polilcy:[-0.52099943  0.58992994]
action based on polilcy:[-0.4402436   0.42302066]
action based on polilcy:[-0.31879795  0.41100645]
action based on polilcy:[-0.2985883   0.31044844]
action based on polilcy:[-0.14386335  0.2825162 ]
Total T: 19648 Episode Num: 1180 Episode T: 7 Reward: -825.917356
action based on polilcy:[0.0991993 0.8481304]
action based on polilcy:[-0.69316995  0.7570294 ]
action based on polilcy:[-0.5509125   0.53053075]
action based on polilcy:[-0.8744248   0.76387525]
action based on polilcy:[-0.700094   0.7356614]
action based on polilcy:[-0.64249647  0.5543798 ]
action based on polilcy:[-0.43892598  0.45262888]
action based on polilcy:[-0.48332763  0.8891772 ]
action based on polilcy:[-0.6365137   0.52165425]
action based on polilcy:[-0.64344645  0.5859989 ]
action based on polilcy:[-0.485026   0.4644496]
Total T: 19659 Episode Num: 1181 Episode T: 

action based on polilcy:[0.14383006 0.8482964 ]
action based on polilcy:[-0.6433133   0.66276133]
action based on polilcy:[-0.63451165  0.4997309 ]
action based on polilcy:[-0.52456033  0.57277524]
action based on polilcy:[-0.42970195  0.52674794]
action based on polilcy:[-0.20691288  0.6808667 ]
action based on polilcy:[-0.22689207  0.9004883 ]
action based on polilcy:[-0.87613356  0.9108793 ]
action based on polilcy:[-0.63581514  0.586253  ]
action based on polilcy:[-0.517033    0.42733797]
action based on polilcy:[-0.96845806  0.17746529]
action based on polilcy:[-0.29766303  0.2885452 ]
Total T: 19813 Episode Num: 1193 Episode T: 12 Reward: -1162.320517
action based on polilcy:[0.14409779 0.8420191 ]
action based on polilcy:[-0.22009626  1.        ]
action based on polilcy:[-0.30653042  0.828467  ]
action based on polilcy:[-0.51130944  0.574133  ]
action based on polilcy:[-0.5524338  0.7124881]
action based on polilcy:[-0.5507951   0.44017392]
action based on polilcy:[-0.4231134   

action based on polilcy:[0.14655165 0.84823084]
action based on polilcy:[-0.76924753  0.82267296]
action based on polilcy:[-0.42489415  0.52560055]
action based on polilcy:[-0.52239776  0.44094992]
action based on polilcy:[-0.9430535  0.2243514]
action based on polilcy:[-0.12189619  0.34243202]
action based on polilcy:[-0.9447938   0.18388794]
action based on polilcy:[-0.2944235   0.28968206]
Total T: 19974 Episode Num: 1209 Episode T: 8 Reward: -655.266909
action based on polilcy:[0.15898648 0.8344474 ]
action based on polilcy:[-0.24539544  0.6235051 ]
action based on polilcy:[-0.31680357  0.9309297 ]
action based on polilcy:[-0.46461523  0.5360625 ]
action based on polilcy:[-0.48144898  0.54286265]
action based on polilcy:[-0.39175463  0.47105506]
action based on polilcy:[-0.48474023  0.5746014 ]
action based on polilcy:[-0.18470387  0.50328887]
action based on polilcy:[0.35346946 0.97401565]
action based on polilcy:[-0.764758    0.80916804]
action based on polilcy:[-0.56424797  0.46

action based on polilcy:[0.0506075  0.84425896]
action based on polilcy:[-0.9178076  0.8744456]
action based on polilcy:[-0.8313062   0.74826014]
action based on polilcy:[-0.75777817  0.34319943]
action based on polilcy:[-0.5858074   0.34227976]
action based on polilcy:[-0.9006663   0.18102756]
action based on polilcy:[-0.7624085  0.1921516]
action based on polilcy:[-0.83920485  0.21209548]
action based on polilcy:[-0.6912672   0.25186807]
action based on polilcy:[-0.8476923   0.21561621]
Total T: 20630 Episode Num: 1234 Episode T: 10 Reward: -708.983785
action based on polilcy:[0.11004625 0.84894675]
action based on polilcy:[-0.7658646  0.763517 ]
action based on polilcy:[-0.51606727  0.6466038 ]
action based on polilcy:[-0.52485585  0.61809456]
action based on polilcy:[-0.9114645  0.8032917]
action based on polilcy:[-0.74391425  0.6218643 ]
action based on polilcy:[-0.718402   0.8378955]
action based on polilcy:[-0.94912064  0.84260786]
action based on polilcy:[-0.50249255  0.4803012

action based on polilcy:[0.06033938 0.88825196]
action based on polilcy:[-0.74647456  0.7386316 ]
action based on polilcy:[-0.84047043  0.7206364 ]
action based on polilcy:[-0.8121104  0.3607328]
action based on polilcy:[-0.8454044   0.21950714]
action based on polilcy:[-0.8156443  0.2018683]
action based on polilcy:[-0.80850095  0.17646448]
action based on polilcy:[-0.19034813  0.369639  ]
action based on polilcy:[-0.8201131   0.16788295]
Total T: 20809 Episode Num: 1250 Episode T: 9 Reward: -554.731555
action based on polilcy:[-0.00958465  0.87181723]
action based on polilcy:[-0.9696514  0.9622195]
action based on polilcy:[-0.8810357  0.8074434]
action based on polilcy:[-0.47841933  0.54082984]
action based on polilcy:[-0.7035327   0.41815093]
action based on polilcy:[-0.627398    0.35294616]
action based on polilcy:[-0.7103534   0.41883922]
action based on polilcy:[-0.44111747  0.40791827]
action based on polilcy:[-0.24133547  0.39225587]
Total T: 20818 Episode Num: 1251 Episode T: 

action based on polilcy:[-0.04548927  0.8902949 ]
action based on polilcy:[-0.77530175  0.73347163]
action based on polilcy:[-0.625969    0.51932156]
action based on polilcy:[-0.8653183   0.25140816]
action based on polilcy:[-0.31828997  0.42006633]
action based on polilcy:[-0.4757718   0.37797165]
action based on polilcy:[-0.31216368  0.41604364]
action based on polilcy:[-0.524073   0.3730816]
action based on polilcy:[-0.61089814  0.27310598]
action based on polilcy:[-0.31087005  0.5427395 ]
action based on polilcy:[-0.8102672   0.34277865]
Total T: 20969 Episode Num: 1265 Episode T: 11 Reward: -738.170746
action based on polilcy:[-0.05757837  0.887858  ]
action based on polilcy:[-0.71043944  0.686334  ]
action based on polilcy:[-0.56066996  0.6148131 ]
action based on polilcy:[-0.7735616   0.48598146]
action based on polilcy:[-0.61795175  0.37723726]
action based on polilcy:[-0.7415484   0.23808578]
action based on polilcy:[-0.8867783  0.6696726]
action based on polilcy:[-0.80654275 

action based on polilcy:[-0.02875837  0.9055804 ]
action based on polilcy:[-0.97627383  0.959286  ]
action based on polilcy:[-0.9270035  0.8937445]
action based on polilcy:[-0.8533621   0.60370135]
action based on polilcy:[-0.4218837  0.5671128]
action based on polilcy:[-0.8015376   0.57790947]
action based on polilcy:[-0.54463124  0.3874062 ]
action based on polilcy:[-0.8471271   0.23378032]
action based on polilcy:[-0.7018824   0.51875037]
Total T: 21117 Episode Num: 1280 Episode T: 9 Reward: -1150.448074
action based on polilcy:[-0.02314262  0.90884584]
action based on polilcy:[-0.03561153  0.8452816 ]
action based on polilcy:[0.05340678 0.87754136]
action based on polilcy:[-0.9295311   0.91332185]
action based on polilcy:[-0.56096077  0.61997306]
action based on polilcy:[-0.72360253  0.6511694 ]
action based on polilcy:[-0.7899512  0.454304 ]
action based on polilcy:[-0.6402015   0.40880167]
action based on polilcy:[-0.507216   0.5630401]
action based on polilcy:[-0.86244136  0.353

action based on polilcy:[-0.15180872  0.9224688 ]
action based on polilcy:[-0.80878305  0.73036706]
action based on polilcy:[-0.8104525  0.5520545]
action based on polilcy:[-0.7505408   0.42431247]
action based on polilcy:[-0.8905266  0.6605494]
action based on polilcy:[-0.7490432   0.87939566]
action based on polilcy:[-0.81146634  0.5872118 ]
action based on polilcy:[-0.87123924  0.36333263]
action based on polilcy:[-0.7135517   0.63799554]
action based on polilcy:[-0.9027546   0.37985942]
action based on polilcy:[-0.9207279   0.21871464]
action based on polilcy:[-0.92020124  0.17613031]
action based on polilcy:[-0.47925067  0.2842587 ]
Total T: 21270 Episode Num: 1293 Episode T: 13 Reward: -656.081484
action based on polilcy:[-0.08784041  0.91116   ]
action based on polilcy:[-0.9823317  0.9661098]
action based on polilcy:[-0.78611636  0.5376422 ]
action based on polilcy:[-0.85425806  0.2836162 ]
action based on polilcy:[-0.92072344  0.20423417]
action based on polilcy:[-0.93175685  0

action based on polilcy:[-0.12945275  0.93477094]
action based on polilcy:[-0.98125196  0.97956556]
action based on polilcy:[-0.68933856  0.6895399 ]
action based on polilcy:[-0.76083136  0.59381175]
action based on polilcy:[-0.5468995  0.660578 ]
action based on polilcy:[-0.7181889  0.8429915]
action based on polilcy:[-0.8204373  0.6764794]
action based on polilcy:[-0.7662943   0.55046797]
action based on polilcy:[-0.9007933   0.95297366]
action based on polilcy:[-0.96521217  0.7424282 ]
action based on polilcy:[-0.3989933  0.7231815]
action based on polilcy:[-0.68159163  0.70886815]
action based on polilcy:[-0.90804756  0.8257199 ]
action based on polilcy:[-0.73362863  0.62704015]
action based on polilcy:[-0.3356544   0.65761834]
action based on polilcy:[-0.8502545  0.6688354]
action based on polilcy:[-0.46227807  0.64895725]
action based on polilcy:[-0.67271554  0.6137091 ]
action based on polilcy:[-0.9441719   0.42500103]
action based on polilcy:[-0.9391157   0.22564086]
Total T: 2

action based on polilcy:[-0.17056961  0.9251102 ]
action based on polilcy:[-0.99162054  0.9739494 ]
action based on polilcy:[-0.6995226   0.78977937]
action based on polilcy:[-0.97362113  0.8973669 ]
action based on polilcy:[-0.72786653  0.6262512 ]
action based on polilcy:[-0.9215743  0.4117194]
action based on polilcy:[-0.91782475  0.2230462 ]
action based on polilcy:[-0.81612337  0.17508094]
action based on polilcy:[-0.84152913  0.20548472]
action based on polilcy:[-0.8260562   0.31776497]
action based on polilcy:[-0.8412292  0.2207678]
action based on polilcy:[-0.8639642   0.21020111]
Total T: 21589 Episode Num: 1323 Episode T: 12 Reward: -944.066014
action based on polilcy:[-0.145213    0.92780745]
action based on polilcy:[-0.7382641   0.43468058]
action based on polilcy:[-0.7796263  0.5498977]
action based on polilcy:[-0.8301363   0.84551084]
action based on polilcy:[-0.7361617  0.6085208]
action based on polilcy:[-0.9372031  0.9133687]
action based on polilcy:[-0.58401495  0.738

action based on polilcy:[-0.14963719  0.9233186 ]
action based on polilcy:[-0.12825799  0.9944921 ]
action based on polilcy:[-0.8224528  0.787465 ]
action based on polilcy:[-0.92795783  0.61157256]
action based on polilcy:[-0.56936854  0.61876404]
action based on polilcy:[-0.8289021   0.54241776]
action based on polilcy:[-0.9102153  0.288728 ]
action based on polilcy:[-0.5749462   0.33836517]
action based on polilcy:[-0.3010387  0.305442 ]
Total T: 21762 Episode Num: 1339 Episode T: 9 Reward: -787.281841
action based on polilcy:[-0.17388453  0.9209774 ]
action based on polilcy:[-0.78442955  0.78514576]
action based on polilcy:[-0.88653636  0.8883383 ]
action based on polilcy:[-0.9709006   0.85011685]
action based on polilcy:[-0.87732166  0.5400411 ]
action based on polilcy:[-0.92575884  0.26070297]
action based on polilcy:[-0.7212186   0.25488734]
action based on polilcy:[-0.28587055  0.31208163]
Total T: 21770 Episode Num: 1340 Episode T: 8 Reward: -534.865185
action based on polilcy:

action based on polilcy:[-0.08821245  0.87806803]
action based on polilcy:[-0.76580137  0.7551023 ]
action based on polilcy:[-0.9489934  0.7204375]
action based on polilcy:[-0.9511683  0.3521381]
action based on polilcy:[-0.92302805  0.18098752]
action based on polilcy:[-0.92038643  0.18866605]
action based on polilcy:[-0.8274007   0.19797857]
action based on polilcy:[-0.9249278  0.100137 ]
action based on polilcy:[-0.86019677  0.15851855]
action based on polilcy:[-0.91220576  0.17195225]
Total T: 21923 Episode Num: 1354 Episode T: 10 Reward: -438.574474
action based on polilcy:[-0.11840196  0.88375574]
action based on polilcy:[-0.9558464  0.8598312]
action based on polilcy:[-0.6407051   0.74068385]
action based on polilcy:[-0.92625785  0.76913834]
action based on polilcy:[-0.91942996  0.28388035]
action based on polilcy:[-0.88769364  0.33962506]
action based on polilcy:[-0.49841812  0.58666533]
action based on polilcy:[-0.9471537  0.4330329]
action based on polilcy:[-0.92749274  0.195

action based on polilcy:[-0.13199829  0.80438626]
action based on polilcy:[-0.8219146  0.5965455]
action based on polilcy:[-0.8408573  0.7557517]
action based on polilcy:[-0.7513031   0.55459327]
action based on polilcy:[-0.85746264  0.4360198 ]
action based on polilcy:[-0.8781319  0.2051953]
action based on polilcy:[-0.8691752  0.1873668]
action based on polilcy:[-0.42066866  0.3846235 ]
action based on polilcy:[-0.9320303   0.29019463]
action based on polilcy:[-0.8918149   0.21064387]
action based on polilcy:[-0.8299751   0.21561305]
action based on polilcy:[-0.9030776   0.23985903]
Total T: 22554 Episode Num: 1384 Episode T: 12 Reward: -949.552481
action based on polilcy:[-0.13628803  0.81386155]
action based on polilcy:[-0.09939481  0.8744559 ]
action based on polilcy:[-0.9292525  0.974433 ]
action based on polilcy:[-0.8189843   0.55601895]
action based on polilcy:[-0.5948299  0.5776205]
action based on polilcy:[-0.8913523   0.31857467]
action based on polilcy:[-0.9799433  0.154384

action based on polilcy:[-0.14774995  0.8001701 ]
action based on polilcy:[-0.8652637  0.6655654]
action based on polilcy:[-0.8682999  0.5336786]
action based on polilcy:[-0.8444663   0.57251614]
action based on polilcy:[-0.5084911   0.90617126]
action based on polilcy:[0.14642213 0.88530385]
action based on polilcy:[-0.838769   0.5409858]
action based on polilcy:[-0.8282945   0.57439315]
action based on polilcy:[-0.7636008  0.5833857]
action based on polilcy:[-0.8716588  0.5897279]
action based on polilcy:[-0.9643167  0.518296 ]
action based on polilcy:[-0.34491023  0.48405674]
action based on polilcy:[-0.8549993   0.40437925]
action based on polilcy:[-0.81663036  0.2246936 ]
action based on polilcy:[-0.86537164  0.22565341]
action based on polilcy:[-0.9183086   0.18855962]
action based on polilcy:[-0.9068434   0.15031485]
Total T: 22726 Episode Num: 1396 Episode T: 17 Reward: -1279.490317
action based on polilcy:[-0.07976334  0.7980186 ]
action based on polilcy:[-0.88671964  0.662788

action based on polilcy:[-0.16930985  0.75790423]
action based on polilcy:[-0.16931781  0.94312966]
action based on polilcy:[-0.8280012  0.5040344]
action based on polilcy:[-0.960004    0.72991604]
action based on polilcy:[-0.8841756   0.35007858]
action based on polilcy:[-0.8549536  0.3236655]
action based on polilcy:[-0.8911152   0.20356943]
action based on polilcy:[-0.97923064  0.15627684]
action based on polilcy:[-0.9188459   0.16851172]
action based on polilcy:[-0.89289457  0.15566377]
action based on polilcy:[-0.7747834   0.25078702]
Total T: 22873 Episode Num: 1407 Episode T: 11 Reward: -1087.091869
action based on polilcy:[-0.12447181  0.7405126 ]
action based on polilcy:[-0.8695822  0.6723337]
action based on polilcy:[-0.7571707   0.47073978]
action based on polilcy:[-0.7853416  0.5534005]
action based on polilcy:[-0.63028884  0.5125725 ]
action based on polilcy:[-0.8428618   0.42448077]
action based on polilcy:[-0.8930158   0.19616595]
action based on polilcy:[-0.8913203   0.

action based on polilcy:[-0.06901    0.7889664]
action based on polilcy:[-0.80507165  0.56559384]
action based on polilcy:[-0.9001332   0.62091964]
action based on polilcy:[-0.5195826  0.5254352]
action based on polilcy:[-0.85674965  0.52719855]
action based on polilcy:[-0.8229793   0.24958095]
action based on polilcy:[-0.6775683   0.30196244]
action based on polilcy:[-0.7330383  0.517248 ]
action based on polilcy:[-0.63792515  0.48506942]
action based on polilcy:[-0.89465636  0.38011357]
Total T: 23023 Episode Num: 1421 Episode T: 10 Reward: -940.295762
action based on polilcy:[-0.21513449  0.7651029 ]
action based on polilcy:[-0.97119504  0.962653  ]
action based on polilcy:[-0.861394    0.48805535]
action based on polilcy:[-0.9197027   0.23685868]
action based on polilcy:[-0.93880963  0.19656232]
action based on polilcy:[-0.9578104   0.16050678]
action based on polilcy:[-0.54473317  0.22851501]
action based on polilcy:[-0.7886144  0.2220805]
Total T: 23031 Episode Num: 1422 Episode 

action based on polilcy:[-0.07230036  0.7880641 ]
action based on polilcy:[-0.8847721   0.80207634]
action based on polilcy:[-0.84815353  0.7054087 ]
action based on polilcy:[-0.9335696  0.7719867]
action based on polilcy:[-0.71418774  0.5423989 ]
action based on polilcy:[-0.75187457  0.5430558 ]
action based on polilcy:[-0.7245633   0.42553324]
action based on polilcy:[-0.8531493   0.24504979]
action based on polilcy:[-0.9799048   0.30878076]
action based on polilcy:[-0.8614703  0.2299778]
action based on polilcy:[-0.797717   0.3789252]
action based on polilcy:[-0.6926687   0.33879843]
Total T: 23205 Episode Num: 1437 Episode T: 12 Reward: -1319.309427
action based on polilcy:[-0.07754815  0.76778466]
action based on polilcy:[0.0136208 0.838869 ]
action based on polilcy:[-0.11721167  0.96538544]
action based on polilcy:[-0.7617738   0.95122397]
action based on polilcy:[-0.7914579   0.64055693]
action based on polilcy:[-0.8185991   0.76213324]
action based on polilcy:[0.20774022 0.9098

action based on polilcy:[-0.10537029  0.7612113 ]
action based on polilcy:[-0.81669885  0.6371156 ]
action based on polilcy:[-0.8328209  0.5426227]
action based on polilcy:[-0.9009508  0.728395 ]
action based on polilcy:[-0.80774564  0.68475425]
action based on polilcy:[-0.822024    0.48644853]
action based on polilcy:[-0.88690394  0.48373583]
action based on polilcy:[-0.8857988   0.49343678]
action based on polilcy:[-0.32979557  0.8026704 ]
action based on polilcy:[-0.83505183  0.520523  ]
action based on polilcy:[-0.8090375  0.508183 ]
action based on polilcy:[-0.8894517  0.928565 ]
action based on polilcy:[-0.81031835  0.43648404]
action based on polilcy:[-0.5673958   0.34653643]
action based on polilcy:[-0.47937018  0.43708643]
action based on polilcy:[-0.77367735  0.6582947 ]
action based on polilcy:[-0.6885354   0.56996727]
action based on polilcy:[-0.8344567   0.49353296]
action based on polilcy:[-0.8534919   0.23642851]
action based on polilcy:[-0.77720606  0.20119888]
action b

action based on polilcy:[-0.09521402  0.80235344]
action based on polilcy:[-0.05966576  0.90071964]
action based on polilcy:[-0.7595047  0.940307 ]
action based on polilcy:[-0.6806131  0.5165567]
action based on polilcy:[-0.77514297  0.46140334]
action based on polilcy:[-0.99778724  0.38314337]
action based on polilcy:[-0.93138695  0.2327785 ]
action based on polilcy:[-0.9407566   0.21477327]
action based on polilcy:[-0.94877374  0.20433171]
action based on polilcy:[-0.9369418  0.1600402]
Total T: 23537 Episode Num: 1460 Episode T: 10 Reward: -903.750124
action based on polilcy:[-0.10905633  0.78641593]
action based on polilcy:[-0.76002645  0.6951746 ]
action based on polilcy:[-0.8746313   0.60062444]
action based on polilcy:[-0.91586363  0.5080013 ]
action based on polilcy:[-0.7737886   0.69636595]
action based on polilcy:[-0.79775953  0.67734015]
action based on polilcy:[-0.7720264  0.6719495]
action based on polilcy:[-0.5729705  0.5389103]
action based on polilcy:[-0.735948    0.479

action based on polilcy:[-0.08781115  0.76603603]
action based on polilcy:[-0.7867128   0.81461203]
action based on polilcy:[0.00326278 0.9063906 ]
action based on polilcy:[-0.73349595  0.6987444 ]
action based on polilcy:[-0.76346433  0.5280812 ]
action based on polilcy:[-0.6473044   0.46401167]
action based on polilcy:[-0.98799616  0.27289516]
action based on polilcy:[-0.8702225   0.15753385]
action based on polilcy:[-0.73619795  0.49943748]
action based on polilcy:[-0.69683856  0.4789918 ]
action based on polilcy:[-0.8002525  0.6181896]
action based on polilcy:[-0.5492774   0.47860238]
action based on polilcy:[-0.4442817   0.44355792]
action based on polilcy:[-0.9257029   0.34768245]
action based on polilcy:[-0.9071918   0.21645805]
Total T: 23696 Episode Num: 1472 Episode T: 15 Reward: -1313.114859
action based on polilcy:[-0.00545983  0.7845677 ]
action based on polilcy:[-0.7019489  0.8975017]
action based on polilcy:[-0.9058698  0.6247952]
action based on polilcy:[-0.99655914  0.

action based on polilcy:[-0.9466498   0.52978164]
action based on polilcy:[-0.9821059   0.24388029]
action based on polilcy:[-0.94782466  0.2033262 ]
action based on polilcy:[-0.97518903  0.07864568]
action based on polilcy:[-0.23335166 -0.02999822]
Total T: 23853 Episode Num: 1485 Episode T: 24 Reward: -2726.537820
action based on polilcy:[-0.04674787  0.8022084 ]
action based on polilcy:[-0.6033461  0.9509552]
action based on polilcy:[-0.7092513  0.5384871]
action based on polilcy:[-0.7232568  0.5057185]
action based on polilcy:[-0.83775353  0.52440476]
action based on polilcy:[-0.6795694  0.6927917]
action based on polilcy:[-0.8936199   0.57247484]
action based on polilcy:[-0.83135706  0.528383  ]
action based on polilcy:[-0.90248114  0.4792706 ]
action based on polilcy:[-0.8888931  0.5708327]
action based on polilcy:[-0.81045234  0.61947453]
action based on polilcy:[-0.7400734   0.73486197]
action based on polilcy:[-0.7613729   0.61785984]
action based on polilcy:[-0.6842823  0.505

Total T: 24044 Episode Num: 1497 Episode T: 43 Reward: -5201.196658
Total T: 24066 Episode Num: 1498 Episode T: 22 Reward: -3550.722180
Total T: 24115 Episode Num: 1499 Episode T: 49 Reward: -7308.915104
Total T: 24162 Episode Num: 1500 Episode T: 47 Reward: -7037.869413
Total T: 24176 Episode Num: 1501 Episode T: 14 Reward: -2088.381598
Total T: 24191 Episode Num: 1502 Episode T: 15 Reward: -2111.195097
Total T: 24238 Episode Num: 1503 Episode T: 47 Reward: -7151.749907
Total T: 24248 Episode Num: 1504 Episode T: 10 Reward: -1348.448519
Total T: 24259 Episode Num: 1505 Episode T: 11 Reward: -1524.306966
Total T: 24303 Episode Num: 1506 Episode T: 44 Reward: -7320.336753
Total T: 24323 Episode Num: 1507 Episode T: 20 Reward: -2732.691983
Total T: 24368 Episode Num: 1508 Episode T: 45 Reward: -7245.645662
Total T: 24422 Episode Num: 1509 Episode T: 54 Reward: -7125.217814
Total T: 24472 Episode Num: 1510 Episode T: 50 Reward: -7333.976560
Total T: 24500 Episode Num: 1511 Episode T: 28 R

action based on polilcy:[0.03096558 0.82982993]
action based on polilcy:[-0.15939534  0.9937875 ]
action based on polilcy:[-0.7562916   0.55231965]
action based on polilcy:[-0.81181645  0.49212813]
action based on polilcy:[-0.7890997   0.40196028]
action based on polilcy:[-0.86555475  0.24680063]
action based on polilcy:[-0.85889363  0.14898098]
action based on polilcy:[-0.73751765  0.4765312 ]
action based on polilcy:[-0.85611665  0.24748735]
action based on polilcy:[-0.7340569   0.30026022]
action based on polilcy:[-0.7331314   0.22695091]
action based on polilcy:[-0.9182515   0.15251462]
Total T: 24674 Episode Num: 1525 Episode T: 12 Reward: -807.897516
action based on polilcy:[0.05347089 0.8423787 ]
action based on polilcy:[-0.67916    0.8089335]
action based on polilcy:[-0.6916466  0.4673908]
action based on polilcy:[-0.67315525  0.5443531 ]
action based on polilcy:[-0.79596025  0.46742168]
action based on polilcy:[-0.7125912  0.5180962]
action based on polilcy:[-0.6762593   0.489

action based on polilcy:[0.02867614 0.8523057 ]
action based on polilcy:[-0.79970765  0.502117  ]
action based on polilcy:[-0.8085773   0.41682038]
action based on polilcy:[-0.81594396  0.2971764 ]
action based on polilcy:[-0.7706796   0.31607932]
action based on polilcy:[-0.8921416  0.2209845]
action based on polilcy:[-0.7554124   0.18453966]
action based on polilcy:[-0.84538347  0.16133988]
Total T: 24848 Episode Num: 1540 Episode T: 8 Reward: -422.945790
action based on polilcy:[0.03993347 0.8473261 ]
action based on polilcy:[-0.73710036  0.5584468 ]
action based on polilcy:[-0.72630334  0.45916986]
action based on polilcy:[-0.77542293  0.45564786]
action based on polilcy:[-0.7861464   0.21968313]
action based on polilcy:[-0.691282   0.5200958]
action based on polilcy:[-0.66518605  0.726382  ]
action based on polilcy:[-0.6685921  0.9769829]
action based on polilcy:[-0.77261996  0.40755934]
action based on polilcy:[-0.6329708   0.92940485]
action based on polilcy:[-0.61906964  0.8686

---------------------------------------
Episode_num: 1554, Evaluation over 1 episodes: -634.861443
---------------------------------------
action based on polilcy:[-0.90417725  0.17918801]
Total T: 25001 Episode Num: 1554 Episode T: 18 Reward: -702.188900
action based on polilcy:[0.01728227 0.78977084]
action based on polilcy:[-0.6622351  0.8423531]
action based on polilcy:[-0.7321571   0.46637988]
action based on polilcy:[-0.79379684  0.29060957]
action based on polilcy:[-0.7199478   0.31791067]
action based on polilcy:[-0.74236107  0.25731292]
action based on polilcy:[-0.87983745  0.1850628 ]
action based on polilcy:[-0.67768884  0.16780542]
Total T: 25009 Episode Num: 1555 Episode T: 8 Reward: -502.989039
action based on polilcy:[-0.01070462  0.7595157 ]
action based on polilcy:[-0.6440334  0.7013634]
action based on polilcy:[-0.73339033  0.44678506]
action based on polilcy:[-0.7246412   0.56414306]
action based on polilcy:[-0.90133315  0.22646847]
action based on polilcy:[-0.847639

action based on polilcy:[-0.09592836  0.7012553 ]
action based on polilcy:[-0.5913929  0.8793689]
action based on polilcy:[-0.6238071   0.74214774]
action based on polilcy:[-0.66627216  0.6033814 ]
action based on polilcy:[-0.68967414  0.6306591 ]
action based on polilcy:[-0.73886836  0.5733733 ]
action based on polilcy:[-0.62693036  0.79465806]
action based on polilcy:[-0.7625992  0.4633204]
action based on polilcy:[-0.685337    0.57527876]
action based on polilcy:[-0.87628484  0.23340943]
action based on polilcy:[-0.67267907  0.33249462]
Total T: 25172 Episode Num: 1568 Episode T: 11 Reward: -1411.371391
action based on polilcy:[-0.06629374  0.7057152 ]
action based on polilcy:[-0.68620515  0.85835004]
action based on polilcy:[-0.7572087   0.44998726]
action based on polilcy:[-0.6802163  0.3508612]
action based on polilcy:[-0.88537073  0.24258581]
action based on polilcy:[-0.841395    0.24261376]
action based on polilcy:[-0.8221627   0.20340538]
action based on polilcy:[-0.64846194  

action based on polilcy:[-0.06641112  0.6781317 ]
action based on polilcy:[-0.5613713   0.67902505]
action based on polilcy:[-0.755631   0.6478101]
action based on polilcy:[-0.665962   0.4231286]
action based on polilcy:[-0.75672257  0.31434035]
action based on polilcy:[-0.5566014  0.363375 ]
action based on polilcy:[-0.5192098   0.28700083]
Total T: 25336 Episode Num: 1584 Episode T: 7 Reward: -743.157640
action based on polilcy:[-0.10562926  0.6631257 ]
action based on polilcy:[-0.76004076  0.6257833 ]
action based on polilcy:[-0.7575017   0.48856553]
action based on polilcy:[-0.26614738  0.7155463 ]
action based on polilcy:[-0.75168926  0.5455262 ]
action based on polilcy:[-0.84664464  0.50441   ]
action based on polilcy:[-0.8082758  0.5489315]
action based on polilcy:[-0.79107964  0.51704454]
action based on polilcy:[-0.86825997  0.48276514]
action based on polilcy:[-0.7761463  0.5248265]
action based on polilcy:[-0.7249431  0.5243423]
action based on polilcy:[-0.69332856  0.533034

action based on polilcy:[-0.227206  0.662451]
action based on polilcy:[-0.47474605  0.8260169 ]
action based on polilcy:[-0.73286474  0.4798612 ]
action based on polilcy:[-0.7018363   0.46769354]
action based on polilcy:[-0.80437195  0.7542387 ]
action based on polilcy:[-0.6258777  0.5274322]
action based on polilcy:[-0.5190222   0.38524246]
action based on polilcy:[-0.5189167  0.2967241]
action based on polilcy:[-0.5862652   0.20890585]
Total T: 25505 Episode Num: 1596 Episode T: 9 Reward: -1029.818407
action based on polilcy:[-0.18562213  0.66977155]
action based on polilcy:[-0.73774874  0.5065787 ]
action based on polilcy:[-0.74214715  0.46965203]
action based on polilcy:[-0.70653504  0.4492572 ]
action based on polilcy:[-0.7879446   0.32243255]
action based on polilcy:[-0.8808874   0.24086067]
action based on polilcy:[-0.85398245  0.22126232]
action based on polilcy:[-0.6774596   0.28994492]
action based on polilcy:[-0.6071098  0.4494076]
action based on polilcy:[-0.7261424   0.248

action based on polilcy:[-0.20134294  0.647766  ]
action based on polilcy:[-0.5204655  0.7283087]
action based on polilcy:[-0.7114594   0.49643528]
action based on polilcy:[-0.70861214  0.44548947]
action based on polilcy:[-0.5180305  0.4001219]
action based on polilcy:[-0.59827584  0.30400312]
action based on polilcy:[-0.87394285  0.179127  ]
action based on polilcy:[-0.84526205  0.18658018]
action based on polilcy:[-0.8416923   0.18101408]
action based on polilcy:[-0.84459627  0.19372158]
Total T: 25668 Episode Num: 1609 Episode T: 10 Reward: -544.439012
action based on polilcy:[-0.22489084  0.64145863]
action based on polilcy:[-0.3804709  0.8107425]
action based on polilcy:[-0.72555065  0.5179235 ]
action based on polilcy:[-0.7173475  0.4942735]
action based on polilcy:[-0.64175236  0.6283368 ]
action based on polilcy:[-0.73857033  0.6468978 ]
action based on polilcy:[-0.70949894  0.48741114]
action based on polilcy:[-0.7266618  0.4675975]
action based on polilcy:[-0.7962328  0.6813

action based on polilcy:[-0.24081308  0.652432  ]
action based on polilcy:[-0.66954494  0.67460716]
action based on polilcy:[-0.7000361  0.4809914]
action based on polilcy:[-0.7588184  0.3458137]
action based on polilcy:[-0.70147866  0.37788108]
action based on polilcy:[-0.75633013  0.38212243]
action based on polilcy:[-0.7721815  0.2627115]
action based on polilcy:[-0.6492504   0.42318082]
action based on polilcy:[-0.7964491  0.2911592]
action based on polilcy:[-0.5894035  0.3587343]
action based on polilcy:[-0.70983875  0.35366473]
Total T: 25821 Episode Num: 1620 Episode T: 11 Reward: -815.534229
action based on polilcy:[-0.21294527  0.6593393 ]
action based on polilcy:[-0.5955532  0.6691146]
action based on polilcy:[-0.73903453  0.53139496]
action based on polilcy:[-0.8418747   0.27350023]
action based on polilcy:[-0.67434704  0.26588854]
action based on polilcy:[-0.6276727   0.36022455]
action based on polilcy:[-0.49456757  0.34011143]
action based on polilcy:[-0.56146705  0.33958

Total T: 25966 Episode Num: 1633 Episode T: 9 Reward: -1134.187486
action based on polilcy:[-0.20923181  0.6659715 ]
action based on polilcy:[-0.6288219  0.5898543]
action based on polilcy:[-0.6723089   0.48791513]
action based on polilcy:[-0.6087153   0.68336254]
action based on polilcy:[-0.64219576  0.86121917]
action based on polilcy:[-0.7336719  0.5263069]
action based on polilcy:[-0.6495117  0.8370171]
action based on polilcy:[-0.71368945  0.63606185]
action based on polilcy:[-0.6693547   0.44837102]
action based on polilcy:[-0.88721514  0.2139015 ]
action based on polilcy:[-0.74209434  0.22667941]
Total T: 25977 Episode Num: 1634 Episode T: 11 Reward: -1370.497430
action based on polilcy:[-0.19692504  0.66631067]
action based on polilcy:[-0.7467575   0.83088535]
action based on polilcy:[-0.7244109   0.45560795]
action based on polilcy:[-0.882275    0.24951482]
action based on polilcy:[-0.47358674  0.34389293]
action based on polilcy:[-0.8385574   0.40641087]
action based on polil

action based on polilcy:[-0.23307043  0.6740655 ]
action based on polilcy:[-0.6497915   0.65183425]
action based on polilcy:[-0.6839506   0.48238856]
action based on polilcy:[-0.65474284  0.45112935]
action based on polilcy:[-0.66927755  0.46274027]
action based on polilcy:[-0.91384643  0.21630907]
action based on polilcy:[-0.8180629   0.21461049]
action based on polilcy:[-0.84104717  0.16993882]
action based on polilcy:[-0.7443105   0.18050924]
Total T: 26605 Episode Num: 1661 Episode T: 9 Reward: -561.926670
action based on polilcy:[-0.3113811   0.67519736]
action based on polilcy:[-0.69593465  0.6314796 ]
action based on polilcy:[-0.6643828   0.63426346]
action based on polilcy:[-0.7449832   0.64798033]
action based on polilcy:[-0.7167305  0.5221807]
action based on polilcy:[-0.6763804  0.4455785]
action based on polilcy:[-0.88259095  0.24155954]
action based on polilcy:[-0.5256282   0.26076844]
action based on polilcy:[-0.76075995  0.14643589]
Total T: 26614 Episode Num: 1662 Episo

action based on polilcy:[-0.8263727  0.5865613]
action based on polilcy:[-0.75620425  0.52084637]
action based on polilcy:[-0.6683665  0.6279819]
action based on polilcy:[-0.72478884  0.5835426 ]
action based on polilcy:[-0.78068864  0.49611515]
action based on polilcy:[-0.7912564   0.38636515]
action based on polilcy:[-0.6831326   0.42546672]
action based on polilcy:[-0.87629694  0.22528759]
action based on polilcy:[-0.5343065  0.3593455]
action based on polilcy:[-0.8413975   0.22120363]
action based on polilcy:[-0.70300853  0.25588658]
action based on polilcy:[-0.8457627   0.21644999]
Total T: 26765 Episode Num: 1675 Episode T: 23 Reward: -2020.593161
action based on polilcy:[-0.21553017  0.6666414 ]
action based on polilcy:[-0.61392164  0.6934842 ]
action based on polilcy:[-0.73219097  0.5553412 ]
action based on polilcy:[-0.6343241  0.6880044]
action based on polilcy:[-0.7347467  0.5655322]
action based on polilcy:[-0.7079564  0.5138597]
action based on polilcy:[-0.6187438   0.6239

action based on polilcy:[-0.22383815  0.64534557]
action based on polilcy:[-0.74641573  0.77904236]
action based on polilcy:[-0.81484085  0.6477908 ]
action based on polilcy:[-0.66114515  0.37468895]
action based on polilcy:[-0.82555354  0.26265848]
action based on polilcy:[-0.7965      0.18723199]
action based on polilcy:[-0.80352736  0.13126917]
action based on polilcy:[-0.84437686  0.07504278]
Total T: 26918 Episode Num: 1688 Episode T: 8 Reward: -616.806331
action based on polilcy:[-0.2295058  0.6624553]
action based on polilcy:[-0.7221608  0.5846684]
action based on polilcy:[-0.6820266   0.53222597]
action based on polilcy:[-0.7839288   0.37351257]
action based on polilcy:[-0.7780884  0.3328098]
action based on polilcy:[-0.48935184  0.37516418]
action based on polilcy:[-0.6121144  0.2669641]
action based on polilcy:[-0.79289174  0.1183508 ]
Total T: 26926 Episode Num: 1689 Episode T: 8 Reward: -594.721109
action based on polilcy:[-0.20587598  0.6641861 ]
action based on polilcy:[-

action based on polilcy:[-0.17153117  0.6467519 ]
action based on polilcy:[-0.59181464  0.6886295 ]
action based on polilcy:[-0.719256   0.5392364]
action based on polilcy:[-0.7898798   0.40765214]
action based on polilcy:[-0.6162458  0.509367 ]
action based on polilcy:[-0.775662    0.31770188]
action based on polilcy:[-0.5353577   0.42055887]
action based on polilcy:[-0.57975984  0.40777096]
Total T: 27063 Episode Num: 1702 Episode T: 8 Reward: -829.221832
action based on polilcy:[-0.14128877  0.63891155]
action based on polilcy:[-0.18402457  0.81520545]
action based on polilcy:[-0.55759555  0.7528857 ]
action based on polilcy:[-0.6520605  0.5998527]
action based on polilcy:[-0.717236   0.5995617]
action based on polilcy:[-0.83643335  0.79946136]
action based on polilcy:[-0.6945356   0.57692766]
action based on polilcy:[-0.627609    0.65543455]
action based on polilcy:[-0.7759518  0.428496 ]
action based on polilcy:[-0.7057228   0.44969895]
action based on polilcy:[-0.7815693   0.4230

action based on polilcy:[-0.1697642   0.62108946]
action based on polilcy:[-0.70796347  0.751448  ]
action based on polilcy:[-0.70442855  0.46748024]
action based on polilcy:[-0.87117773  0.3011481 ]
action based on polilcy:[-0.618375   0.4503456]
action based on polilcy:[-0.5052961   0.41566333]
action based on polilcy:[-0.7219386  0.4987727]
action based on polilcy:[-0.8524857   0.28458375]
Total T: 27241 Episode Num: 1716 Episode T: 8 Reward: -859.021323
action based on polilcy:[-0.21252055  0.63366926]
action based on polilcy:[-0.25164393  0.7312385 ]
action based on polilcy:[-0.7520264   0.56606597]
action based on polilcy:[-0.61812544  0.68373495]
action based on polilcy:[-0.6357279  0.5268501]
action based on polilcy:[-0.62853837  0.5030407 ]
action based on polilcy:[-0.7570602  0.6168251]
action based on polilcy:[-0.68708265  0.36088806]
action based on polilcy:[-0.5021783  0.4479802]
action based on polilcy:[-0.4875667   0.41751024]
action based on polilcy:[-0.49954963  0.3728

action based on polilcy:[-0.29091233  0.6374198 ]
action based on polilcy:[-0.7095528  0.6709777]
action based on polilcy:[-0.73100245  0.6490938 ]
action based on polilcy:[-0.6009767   0.68506074]
action based on polilcy:[-0.7531748   0.56562173]
action based on polilcy:[-0.72988594  0.5577909 ]
action based on polilcy:[-0.65639937  0.6805489 ]
action based on polilcy:[-0.71181643  0.60475683]
action based on polilcy:[-0.8199685   0.44755006]
action based on polilcy:[-0.8629703  0.7044228]
action based on polilcy:[-0.85730886  0.38419688]
action based on polilcy:[-0.8291923   0.68040633]
action based on polilcy:[-0.8163186   0.42377117]
action based on polilcy:[-0.7105669  0.6318773]
action based on polilcy:[-0.674739    0.46782303]
action based on polilcy:[-0.8837563   0.29991737]
action based on polilcy:[-0.76651776  0.32324958]
Total T: 27422 Episode Num: 1728 Episode T: 17 Reward: -1552.839153
action based on polilcy:[-0.1768439   0.62606454]
action based on polilcy:[-0.530887   0

action based on polilcy:[-0.23119661  0.6328661 ]
action based on polilcy:[-0.72064614  0.679419  ]
action based on polilcy:[-0.74161386  0.46689808]
action based on polilcy:[-0.89387023  0.7146425 ]
action based on polilcy:[-0.59061027  0.5035987 ]
action based on polilcy:[-0.7785288   0.44121218]
action based on polilcy:[-0.56570935  0.3754511 ]
Total T: 27604 Episode Num: 1741 Episode T: 7 Reward: -1107.398060
action based on polilcy:[-0.21069895  0.62354124]
action based on polilcy:[-0.5431442  0.6880311]
action based on polilcy:[-0.34276688  0.7365174 ]
action based on polilcy:[-0.88219416  0.77498275]
action based on polilcy:[-0.8113516   0.40355998]
action based on polilcy:[-0.53480977  0.44054914]
action based on polilcy:[-0.7969761  0.2781597]
action based on polilcy:[-0.7947218   0.25876957]
action based on polilcy:[-0.7926783   0.21181226]
action based on polilcy:[-0.8175756  0.2202605]
action based on polilcy:[-0.7929494   0.21597916]
Total T: 27615 Episode Num: 1742 Episod

action based on polilcy:[-0.17589504  0.64612234]
action based on polilcy:[-0.6142498   0.57933414]
action based on polilcy:[-0.49412912  0.54837155]
action based on polilcy:[-0.6916871   0.54946554]
action based on polilcy:[-0.8029249  0.6481639]
action based on polilcy:[-0.81093353  0.3583281 ]
action based on polilcy:[-0.45952016  0.4585159 ]
action based on polilcy:[-0.48001114  0.42495552]
action based on polilcy:[-0.7533784   0.19961016]
Total T: 27780 Episode Num: 1753 Episode T: 9 Reward: -746.163252
action based on polilcy:[-0.1901703  0.6454747]
action based on polilcy:[-0.57727396  0.6186942 ]
action based on polilcy:[-0.5450663  0.6243788]
action based on polilcy:[-0.636871    0.59377366]
action based on polilcy:[-0.78934014  0.83344007]
action based on polilcy:[-0.76356083  0.8366741 ]
action based on polilcy:[-0.60593665  0.5376839 ]
action based on polilcy:[-0.8252811  0.6241821]
action based on polilcy:[-0.83707833  0.3333905 ]
action based on polilcy:[-0.7822062   0.30

action based on polilcy:[-0.1911622  0.630633 ]
action based on polilcy:[-0.6059686  0.6042286]
action based on polilcy:[-0.5650675   0.61540794]
action based on polilcy:[-0.7697655   0.74596596]
action based on polilcy:[-0.84446245  0.6564685 ]
action based on polilcy:[-0.8753704  0.2894646]
action based on polilcy:[-0.6536368  0.4444866]
action based on polilcy:[-0.596705   0.4771257]
action based on polilcy:[-0.7700479   0.39690977]
action based on polilcy:[-0.52718425  0.38801238]
action based on polilcy:[-0.6776151   0.33096325]
action based on polilcy:[-0.6174153   0.34079167]
Total T: 27955 Episode Num: 1763 Episode T: 12 Reward: -944.253158
action based on polilcy:[-0.18721446  0.62961745]
action based on polilcy:[-0.69140184  0.78686446]
action based on polilcy:[-0.59460413  0.62177575]
action based on polilcy:[-0.5633234   0.61230326]
action based on polilcy:[-0.6346202   0.64174676]
action based on polilcy:[-0.754176  0.568339]
action based on polilcy:[-0.704707    0.4074641

action based on polilcy:[-0.793403   0.7155402]
action based on polilcy:[-0.65420336  0.5274068 ]
action based on polilcy:[-0.6896682   0.49902824]
action based on polilcy:[-0.71943146  0.52800655]
action based on polilcy:[-0.6680202   0.47563457]
action based on polilcy:[-0.5215606   0.39328483]
action based on polilcy:[-0.6213075  0.5664422]
action based on polilcy:[-0.633036    0.54231143]
action based on polilcy:[-0.63846445  0.5061277 ]
action based on polilcy:[-0.5853724  0.5811569]
action based on polilcy:[-0.6997244  0.5438845]
action based on polilcy:[-0.8558585   0.29507467]
action based on polilcy:[-0.6840518  0.2994838]
Total T: 28605 Episode Num: 1790 Episode T: 13 Reward: -1080.051324
action based on polilcy:[-0.7933841  0.7205398]
action based on polilcy:[-0.84039915  0.8110051 ]
action based on polilcy:[-0.8374113   0.80167735]
action based on polilcy:[-0.61344004  0.52902234]
action based on polilcy:[-0.6750139   0.49700424]
action based on polilcy:[-0.63124585  0.3539

action based on polilcy:[-0.79660696  0.689051  ]
action based on polilcy:[-0.75630236  0.63324594]
action based on polilcy:[-0.86953485  0.61666954]
action based on polilcy:[-0.53650343  0.45957935]
action based on polilcy:[-0.69959605  0.63464904]
action based on polilcy:[-0.5482563   0.48066494]
action based on polilcy:[-0.7304357  0.56175  ]
action based on polilcy:[-0.41048414  0.39505646]
action based on polilcy:[-0.44718993  0.35433912]
action based on polilcy:[-0.3367567  0.3914439]
Total T: 28779 Episode Num: 1805 Episode T: 10 Reward: -980.827441
action based on polilcy:[-0.8116406   0.70356023]
action based on polilcy:[-0.8897277  0.7334063]
action based on polilcy:[-0.7049459  0.4913449]
action based on polilcy:[-0.8416038  0.4661104]
action based on polilcy:[-0.79584676  0.3192693 ]
action based on polilcy:[-0.7761154   0.30156165]
action based on polilcy:[-0.8406423  0.2836717]
action based on polilcy:[-0.4875823  0.3949039]
action based on polilcy:[-0.45101872  0.3724668

action based on polilcy:[-0.77549946  0.69404   ]
action based on polilcy:[-0.55147725  0.5424253 ]
action based on polilcy:[-0.6893048   0.62917286]
action based on polilcy:[-0.6544274   0.59862053]
action based on polilcy:[-0.6835303   0.52511406]
action based on polilcy:[-0.77747494  0.46411222]
action based on polilcy:[-0.3472857   0.45778483]
action based on polilcy:[-0.2311327   0.41429585]
action based on polilcy:[-0.36652133  0.3929539 ]
Total T: 28953 Episode Num: 1821 Episode T: 9 Reward: -825.233981
action based on polilcy:[-0.7842106  0.6952378]
action based on polilcy:[-0.7848214   0.62820035]
action based on polilcy:[-0.6989647  0.4906533]
action based on polilcy:[-0.7660309  0.4540075]
action based on polilcy:[-0.5034269   0.47621098]
action based on polilcy:[-0.8447661   0.28425488]
action based on polilcy:[-0.8900809   0.25361502]
action based on polilcy:[-0.43633217  0.3840788 ]
action based on polilcy:[-0.8900924  0.1635711]
Total T: 28962 Episode Num: 1822 Episode T

action based on polilcy:[-0.8038688  0.6901287]
action based on polilcy:[-0.3099565   0.71242845]
action based on polilcy:[-0.69319344  0.5511324 ]
action based on polilcy:[-0.6402203  0.5369397]
action based on polilcy:[-0.74457914  0.60229903]
action based on polilcy:[-0.7568493  0.4749588]
action based on polilcy:[-0.7202132   0.47507477]
action based on polilcy:[-0.77049094  0.45622322]
action based on polilcy:[-0.62566113  0.41661078]
action based on polilcy:[-0.85316235  0.51547885]
action based on polilcy:[-0.9482096   0.23143041]
action based on polilcy:[-0.5955039   0.36310306]
action based on polilcy:[-0.9118781   0.23627758]
action based on polilcy:[-0.9599727   0.19517908]
action based on polilcy:[-0.93967307  0.15755396]
action based on polilcy:[-0.743793    0.15430535]
Total T: 29117 Episode Num: 1836 Episode T: 16 Reward: -1036.908634
action based on polilcy:[-0.7984724  0.6819086]
action based on polilcy:[-0.671484    0.47835103]
action based on polilcy:[-0.63801765  0.

action based on polilcy:[0.78097385 0.34028003]
action based on polilcy:[0.87529904 0.37707508]
action based on polilcy:[0.51792437 0.3686333 ]
action based on polilcy:[0.6910176  0.30484253]
action based on polilcy:[0.9120665 0.3360505]
action based on polilcy:[0.91839486 0.32857597]
Total T: 29296 Episode Num: 1849 Episode T: 39 Reward: -5094.998210
action based on polilcy:[-0.78978693  0.68438786]
action based on polilcy:[-0.5929033   0.52046335]
action based on polilcy:[-0.63178265  0.5557562 ]
action based on polilcy:[-0.8420339  0.5812441]
action based on polilcy:[-0.60478604  0.49284956]
action based on polilcy:[-0.78107405  0.4896237 ]
action based on polilcy:[-0.8590388  0.4813509]
action based on polilcy:[-0.9547049   0.20591073]
action based on polilcy:[-0.93293345  0.09780978]
action based on polilcy:[-0.5869592   0.30427927]
Total T: 29306 Episode Num: 1850 Episode T: 10 Reward: -758.905466
action based on polilcy:[-0.79255736  0.67241013]
action based on polilcy:[-0.64839

action based on polilcy:[-0.8066735  0.6661421]
action based on polilcy:[-0.676519   0.5431955]
action based on polilcy:[-0.7208376   0.59276396]
action based on polilcy:[-0.7294229   0.57202166]
action based on polilcy:[-0.6447443   0.56178975]
action based on polilcy:[-0.70774883  0.58196914]
action based on polilcy:[-0.88466084  0.7003233 ]
action based on polilcy:[-0.7829728   0.43965974]
action based on polilcy:[-0.83392704  0.5005738 ]
action based on polilcy:[-0.98055893  0.21773069]
action based on polilcy:[-0.9265125   0.48944676]
action based on polilcy:[-0.8940171   0.37505665]
action based on polilcy:[-0.8529737   0.40373397]
action based on polilcy:[-0.98222697  0.19445248]
action based on polilcy:[-0.8831295   0.33780208]
Total T: 29463 Episode Num: 1863 Episode T: 15 Reward: -1474.278463
action based on polilcy:[-0.7893981   0.66194415]
action based on polilcy:[-0.7131171   0.54124105]
action based on polilcy:[-0.69806886  0.46845764]
action based on polilcy:[-0.5533583 

action based on polilcy:[-0.7976265   0.66653216]
action based on polilcy:[-0.81125104  0.65078324]
action based on polilcy:[-0.7742987   0.46437865]
action based on polilcy:[-0.82541394  0.48546407]
action based on polilcy:[-0.91409177  0.34370995]
action based on polilcy:[-0.98278296  0.24057701]
action based on polilcy:[-0.98577696  0.16730243]
action based on polilcy:[-0.99077064  0.13302726]
action based on polilcy:[-0.9906265   0.06748897]
action based on polilcy:[-0.99204355  0.04491375]
Total T: 29610 Episode Num: 1874 Episode T: 10 Reward: -493.998232
action based on polilcy:[-0.78827965  0.66341996]
action based on polilcy:[-0.67293847  0.4742553 ]
action based on polilcy:[-0.88147783  0.5022086 ]
action based on polilcy:[-0.5830653  0.5599092]
action based on polilcy:[-0.7091101   0.46969026]
action based on polilcy:[-0.57735896  0.560498  ]
action based on polilcy:[-0.68965983  0.6333151 ]
action based on polilcy:[-0.740397    0.45769173]
action based on polilcy:[-0.872324 

action based on polilcy:[-0.80425036  0.67148423]
action based on polilcy:[-0.9252929  0.6812893]
action based on polilcy:[-0.77767026  0.46267882]
action based on polilcy:[-0.93981844  0.5747097 ]
action based on polilcy:[-0.8922359   0.32432005]
action based on polilcy:[-0.9790864   0.24533965]
action based on polilcy:[-0.9870505  0.1885616]
action based on polilcy:[-0.92948353  0.25651398]
action based on polilcy:[-0.94849145  0.38805845]
Total T: 29767 Episode Num: 1887 Episode T: 9 Reward: -880.684155
action based on polilcy:[-0.7561983   0.66334385]
action based on polilcy:[-0.69177216  0.48618236]
action based on polilcy:[-0.76344603  0.728808  ]
action based on polilcy:[-0.5155221   0.52477765]
action based on polilcy:[-0.71034604  0.6367667 ]
action based on polilcy:[-0.6316763   0.56538683]
action based on polilcy:[-0.62520593  0.54311013]
action based on polilcy:[-0.62231046  0.56768525]
action based on polilcy:[-0.6873499   0.46918795]
action based on polilcy:[-0.80061716  

action based on polilcy:[-0.76866746  0.6462544 ]
action based on polilcy:[-0.49847093  0.6792656 ]
action based on polilcy:[-0.6798223   0.46711117]
action based on polilcy:[-0.57917804  0.5140551 ]
action based on polilcy:[-0.662952    0.47439775]
action based on polilcy:[-0.7847091  0.4277622]
action based on polilcy:[-0.85267556  0.35067877]
action based on polilcy:[-0.96345353  0.23267423]
action based on polilcy:[-0.985248    0.11942823]
action based on polilcy:[-0.9732564   0.17821242]
action based on polilcy:[-0.9626881   0.20753989]
action based on polilcy:[-0.62754864  0.32295868]
action based on polilcy:[-0.66867125  0.39293736]
Total T: 29932 Episode Num: 1897 Episode T: 13 Reward: -882.478439
action based on polilcy:[-0.8086096   0.65083104]
action based on polilcy:[-0.4660267  0.663744 ]
action based on polilcy:[-0.7116729   0.49723056]
action based on polilcy:[-0.55258894  0.59773797]
action based on polilcy:[-0.6905035  0.5224004]
action based on polilcy:[-0.66045576  0

action based on polilcy:[-0.6034155   0.55814064]
action based on polilcy:[-0.60809183  0.52415764]
action based on polilcy:[-0.69841015  0.43231177]
action based on polilcy:[-0.7616557   0.41663894]
action based on polilcy:[-0.6766156   0.41941848]
action based on polilcy:[-0.8816948   0.23852997]
action based on polilcy:[-0.91189116  0.14255333]
action based on polilcy:[-0.8202      0.18008548]
action based on polilcy:[-0.9523806   0.05620299]
action based on polilcy:[-0.964378    0.06633221]
Total T: 30561 Episode Num: 1922 Episode T: 10 Reward: -563.333680
action based on polilcy:[-0.61987746  0.5513878 ]
action based on polilcy:[-0.7006534   0.45817065]
action based on polilcy:[-0.7853762   0.50754905]
action based on polilcy:[-0.7270365   0.45090145]
action based on polilcy:[-0.7395857   0.42248678]
action based on polilcy:[-0.9702819   0.23489425]
action based on polilcy:[-0.95163393  0.15513244]
action based on polilcy:[-0.94997257  0.08309329]
Total T: 30569 Episode Num: 1923 

action based on polilcy:[-0.5883273   0.53902435]
action based on polilcy:[-0.63715124  0.48040816]
action based on polilcy:[-0.6929494  0.512635 ]
action based on polilcy:[-0.545076    0.53603977]
action based on polilcy:[-0.6556641  0.4782253]
action based on polilcy:[-0.5807853   0.43846935]
action based on polilcy:[-0.7542984   0.40316978]
action based on polilcy:[-0.6133169   0.47308534]
action based on polilcy:[-0.7892361   0.45370063]
action based on polilcy:[-0.5997652   0.51451087]
action based on polilcy:[-0.5344277  0.5359492]
action based on polilcy:[-0.7265105   0.40170994]
action based on polilcy:[-0.6871442   0.45651504]
action based on polilcy:[-0.6773926   0.37300518]
action based on polilcy:[-0.6282575   0.35878518]
action based on polilcy:[-0.7862904   0.28266826]
action based on polilcy:[-0.81592953  0.24760081]
action based on polilcy:[-0.6506932   0.28759247]
action based on polilcy:[-0.826602    0.20352416]
action based on polilcy:[-0.6736965   0.26974726]
Total 

action based on polilcy:[-0.59386736  0.57008314]
action based on polilcy:[-0.75253266  0.51524675]
action based on polilcy:[-0.6499449   0.43777463]
action based on polilcy:[-0.6183306  0.4254613]
action based on polilcy:[-0.72580624  0.44292963]
action based on polilcy:[-0.7304666  0.4938972]
action based on polilcy:[-0.8554921   0.33538473]
action based on polilcy:[-0.9226782  0.2046207]
action based on polilcy:[-0.9403957   0.16196391]
Total T: 30890 Episode Num: 1948 Episode T: 9 Reward: -956.066336
action based on polilcy:[-0.57106614  0.563197  ]
action based on polilcy:[-0.69273996  0.50307214]
action based on polilcy:[-0.61522865  0.4887984 ]
action based on polilcy:[-0.7237673   0.31012145]
action based on polilcy:[-0.85578334  0.28781608]
action based on polilcy:[-0.8319092   0.30606794]
action based on polilcy:[-0.6861067   0.28919145]
Total T: 30897 Episode Num: 1949 Episode T: 7 Reward: -796.017795
action based on polilcy:[-0.6226058  0.5816784]
action based on polilcy:[-

action based on polilcy:[-0.59289527  0.6003887 ]
action based on polilcy:[-0.38655952  0.6024176 ]
action based on polilcy:[-0.73323685  0.47179532]
action based on polilcy:[-0.65602064  0.4438006 ]
action based on polilcy:[-0.79671836  0.3004336 ]
action based on polilcy:[-0.9191761   0.18762858]
action based on polilcy:[-0.9601063   0.12841314]
action based on polilcy:[-0.96064615  0.09012186]
action based on polilcy:[-0.92323434  0.05837021]
action based on polilcy:[-0.9384808  0.0675884]
Total T: 31042 Episode Num: 1961 Episode T: 10 Reward: -515.268361
action based on polilcy:[-0.5926965  0.5993264]
action based on polilcy:[-0.7422519  0.4792582]
action based on polilcy:[-0.5075672   0.58464515]
action based on polilcy:[-0.6316491  0.5223603]
action based on polilcy:[-0.31539708  0.6323588 ]
action based on polilcy:[-0.7845344  0.4485584]
action based on polilcy:[-0.6932143   0.46009725]
action based on polilcy:[-0.66476595  0.43788633]
action based on polilcy:[-0.69224715  0.592

action based on polilcy:[-0.54763687  0.6071199 ]
action based on polilcy:[-0.26539412  0.6188956 ]
action based on polilcy:[-0.6403205  0.5362197]
action based on polilcy:[-0.7011846  0.4992048]
action based on polilcy:[-0.8230605  0.5167507]
action based on polilcy:[-0.6089253  0.4697921]
action based on polilcy:[-0.8029659   0.32915768]
action based on polilcy:[-0.63481146  0.39445868]
action based on polilcy:[-0.5658372  0.5001978]
action based on polilcy:[-0.52996147  0.4141894 ]
action based on polilcy:[-0.5864775   0.33896437]
action based on polilcy:[-0.6473529   0.55155003]
action based on polilcy:[-0.8065156   0.43539876]
action based on polilcy:[-0.8004542   0.29048237]
action based on polilcy:[-0.4460442  0.5537262]
Total T: 31204 Episode Num: 1972 Episode T: 15 Reward: -1386.372473
action based on polilcy:[-0.53253305  0.5946007 ]
action based on polilcy:[-0.55701166  0.5218345 ]
action based on polilcy:[-0.6179837  0.5673213]
action based on polilcy:[-0.7707406  0.5641502

action based on polilcy:[-0.44769913  0.5874064 ]
action based on polilcy:[-0.5556932  0.5368379]
action based on polilcy:[-0.7306893  0.4819493]
action based on polilcy:[-0.6924738  0.5468478]
action based on polilcy:[-0.73404855  0.32669526]
action based on polilcy:[-0.6907717  0.3936771]
action based on polilcy:[-0.85709393  0.4944587 ]
action based on polilcy:[-0.82497966  0.45898357]
action based on polilcy:[-0.83254033  0.33097374]
action based on polilcy:[-0.91615033  0.20022081]
action based on polilcy:[-0.90214574  0.13936883]
Total T: 31375 Episode Num: 1984 Episode T: 11 Reward: -770.652231
action based on polilcy:[-0.4282089  0.5780488]
action based on polilcy:[-0.35799015  0.5955049 ]
action based on polilcy:[-0.5259446  0.525386 ]
action based on polilcy:[-0.55485386  0.5234919 ]
action based on polilcy:[-0.71772844  0.46202132]
action based on polilcy:[-0.5568954  0.6367275]
action based on polilcy:[-0.22241616  0.5950419 ]
action based on polilcy:[-0.3199048  0.5475088]

action based on polilcy:[-0.4077195   0.56431186]
action based on polilcy:[-0.29718998  0.5876169 ]
action based on polilcy:[-0.43453237  0.5345867 ]
action based on polilcy:[-0.46682304  0.53454727]
action based on polilcy:[-0.703114   0.4775556]
action based on polilcy:[-0.86685276  0.50691664]
action based on polilcy:[-0.55342865  0.4634955 ]
action based on polilcy:[-0.8325364   0.31776372]
action based on polilcy:[-0.66013557  0.35706422]
action based on polilcy:[-0.59623206  0.549924  ]
action based on polilcy:[-0.6577133   0.40491226]
action based on polilcy:[-0.5536335  0.3879271]
action based on polilcy:[-0.8657864   0.21277471]
action based on polilcy:[-0.72869074  0.26952383]
action based on polilcy:[-0.6242427   0.33572906]
action based on polilcy:[-0.671605    0.30989826]
action based on polilcy:[-0.53582484  0.34618375]
Total T: 31562 Episode Num: 1996 Episode T: 17 Reward: -1395.095519
action based on polilcy:[-0.36541095  0.5614264 ]
action based on polilcy:[-0.752145  

action based on polilcy:[-0.39631572  0.5616242 ]
action based on polilcy:[-0.44183204  0.5279162 ]
action based on polilcy:[-0.5670701  0.5188222]
action based on polilcy:[-0.6154183   0.49855483]
action based on polilcy:[-0.81096894  0.5169867 ]
action based on polilcy:[-0.75585175  0.36007977]
action based on polilcy:[-0.84960896  0.28336447]
action based on polilcy:[-0.56053996  0.31948218]
action based on polilcy:[-0.5416124  0.3115085]
action based on polilcy:[-0.80693734  0.2625656 ]
Total T: 31724 Episode Num: 2008 Episode T: 10 Reward: -957.531101
action based on polilcy:[-0.43912488  0.5695053 ]
action based on polilcy:[-0.7921626  0.4952974]
action based on polilcy:[-0.6852794   0.45179254]
action based on polilcy:[-0.88976884  0.56844383]
action based on polilcy:[-0.82338107  0.3263846 ]
action based on polilcy:[-0.87913775  0.5846731 ]
action based on polilcy:[-0.82470155  0.31477022]
action based on polilcy:[-0.7099167   0.38916758]
action based on polilcy:[-0.6992794   0

action based on polilcy:[-0.14939776  0.5095666 ]
action based on polilcy:[-0.73626715  0.4978626 ]
action based on polilcy:[-0.78103626  0.53118134]
action based on polilcy:[-0.8516381  0.4605708]
action based on polilcy:[-0.8165716  0.3031416]
action based on polilcy:[-0.8133942   0.28177395]
action based on polilcy:[-0.7615152   0.35842067]
action based on polilcy:[-0.93776745  0.19085963]
action based on polilcy:[-0.64263815  0.2540182 ]
Total T: 31883 Episode Num: 2021 Episode T: 9 Reward: -786.499018
action based on polilcy:[-0.19342482  0.5111073 ]
action based on polilcy:[-0.8249764   0.50944054]
action based on polilcy:[-0.6829442   0.46833774]
action based on polilcy:[-0.810797   0.3259184]
action based on polilcy:[-0.8245952   0.51121354]
action based on polilcy:[-0.6376058   0.61413026]
action based on polilcy:[-0.44082326  0.50124013]
action based on polilcy:[-0.61931586  0.46132985]
action based on polilcy:[-0.67362195  0.42770135]
action based on polilcy:[-0.5075959   0.

action based on polilcy:[-0.24387272  0.5174035 ]
action based on polilcy:[-0.3696509  0.52608  ]
action based on polilcy:[-0.57766676  0.5106424 ]
action based on polilcy:[-0.74804676  0.56815624]
action based on polilcy:[-0.6780735  0.5239716]
action based on polilcy:[-0.6091548   0.47498304]
action based on polilcy:[-0.7622191   0.33377373]
action based on polilcy:[-0.86635864  0.2754255 ]
action based on polilcy:[-0.63420326  0.4488106 ]
action based on polilcy:[-0.6345978  0.5618651]
action based on polilcy:[-0.67291534  0.3982162 ]
action based on polilcy:[-0.6079534  0.3442875]
Total T: 32526 Episode Num: 2045 Episode T: 12 Reward: -1193.509157
action based on polilcy:[-0.16370964  0.51763487]
action based on polilcy:[-0.67747736  0.50319505]
action based on polilcy:[-0.78656316  0.48159137]
action based on polilcy:[-0.6713295  0.3764596]
action based on polilcy:[-0.85632646  0.29667538]
action based on polilcy:[-0.7807829   0.34804732]
action based on polilcy:[-0.72013557  0.43

action based on polilcy:[-0.6745285   0.52732086]
action based on polilcy:[-0.7046888  0.5434141]
action based on polilcy:[-0.6544951  0.3864416]
action based on polilcy:[-0.7554352  0.3284853]
action based on polilcy:[-0.87360215  0.2595177 ]
Total T: 32680 Episode Num: 2057 Episode T: 10 Reward: -1431.037969
action based on polilcy:[-0.638322   0.5992658]
action based on polilcy:[-0.33736965  0.59148204]
action based on polilcy:[-0.7802919  0.5329282]
action based on polilcy:[-0.64775807  0.5054405 ]
action based on polilcy:[-0.45382437  0.6101414 ]
action based on polilcy:[-0.40976003  0.573979  ]
action based on polilcy:[-0.7334443  0.4541418]
action based on polilcy:[-0.585423   0.5167687]
action based on polilcy:[-0.46498826  0.6783255 ]
action based on polilcy:[-0.52754927  0.5565287 ]
action based on polilcy:[-0.5423409   0.59447336]
action based on polilcy:[-0.39365155  0.53831977]
action based on polilcy:[-0.02611872  0.4788468 ]
action based on polilcy:[0.6693485  0.53852123

action based on polilcy:[-0.57881457  0.6244502 ]
action based on polilcy:[-0.41518247  0.5547366 ]
action based on polilcy:[-0.69693875  0.5594809 ]
action based on polilcy:[-0.68717414  0.535009  ]
action based on polilcy:[-0.7596654   0.43886608]
action based on polilcy:[-0.75038683  0.61355054]
action based on polilcy:[-0.65563726  0.5223793 ]
action based on polilcy:[-0.6369361   0.33207992]
action based on polilcy:[-0.6786175   0.28623888]
action based on polilcy:[-0.8365819   0.25522336]
Total T: 32837 Episode Num: 2068 Episode T: 10 Reward: -1012.987185
action based on polilcy:[-0.6229341   0.62357235]
action based on polilcy:[-0.6758001  0.5709168]
action based on polilcy:[-0.6669761   0.67219627]
action based on polilcy:[-0.2577782  0.6093607]
action based on polilcy:[-0.7202169   0.50034946]
action based on polilcy:[-0.21007802  0.596097  ]
action based on polilcy:[-0.55097115  0.43145096]
action based on polilcy:[-0.52298295  0.4325395 ]
action based on polilcy:[-0.9992823 

action based on polilcy:[-0.62675047  0.6535959 ]
action based on polilcy:[-0.7471235  0.68977  ]
action based on polilcy:[-0.83512944  0.58789885]
action based on polilcy:[-0.6894512   0.36101267]
action based on polilcy:[-0.8101589  0.5236368]
action based on polilcy:[-0.6920576   0.36711186]
action based on polilcy:[-0.6946448   0.33795828]
action based on polilcy:[-0.806125   0.3348037]
action based on polilcy:[-0.88686824  0.31615365]
Total T: 33010 Episode Num: 2080 Episode T: 9 Reward: -932.701684
action based on polilcy:[-0.6155954  0.6556717]
action based on polilcy:[-0.6345439   0.53114974]
action based on polilcy:[-0.55517745  0.46758673]
action based on polilcy:[-0.7210493   0.41479808]
action based on polilcy:[-0.66361034  0.3398461 ]
action based on polilcy:[-0.6758716   0.29418957]
action based on polilcy:[-0.6680155   0.27050975]
action based on polilcy:[-0.75865066  0.22239202]
action based on polilcy:[-0.6530499   0.24319972]
Total T: 33019 Episode Num: 2081 Episode T

action based on polilcy:[-0.6588855   0.66708314]
action based on polilcy:[-0.7360487   0.55608135]
action based on polilcy:[-0.7055231   0.56362563]
action based on polilcy:[-0.8079212  0.4190031]
action based on polilcy:[-0.7127528  0.3713108]
action based on polilcy:[-0.74997276  0.35242933]
action based on polilcy:[-0.7982321   0.40597975]
action based on polilcy:[-0.90834486  0.26414025]
action based on polilcy:[-0.854784    0.40980673]
action based on polilcy:[-0.716148   0.3256001]
action based on polilcy:[-0.71181285  0.2793468 ]
Total T: 33166 Episode Num: 2094 Episode T: 11 Reward: -725.310110
action based on polilcy:[-0.6568649  0.6670728]
action based on polilcy:[-0.5391774  0.5797234]
action based on polilcy:[-0.6383048  0.5584272]
action based on polilcy:[-0.59478766  0.61654985]
action based on polilcy:[-0.68948376  0.53453493]
action based on polilcy:[-0.79917     0.55874485]
action based on polilcy:[-0.76803976  0.38974237]
action based on polilcy:[-0.7317729  0.404636

action based on polilcy:[-0.6380987  0.664114 ]
action based on polilcy:[-0.63502306  0.560189  ]
action based on polilcy:[-0.67069256  0.53694654]
action based on polilcy:[-0.8096208   0.55045694]
action based on polilcy:[-0.7125433  0.5509637]
action based on polilcy:[-0.8340882   0.45522952]
action based on polilcy:[-0.7896457  0.5146786]
action based on polilcy:[-0.77462685  0.37567174]
action based on polilcy:[-0.86848754  0.37982607]
action based on polilcy:[-0.90057296  0.3828709 ]
action based on polilcy:[-0.85919964  0.286361  ]
action based on polilcy:[-0.77107155  0.29140705]
action based on polilcy:[-0.7599063   0.31124535]
Total T: 33322 Episode Num: 2108 Episode T: 13 Reward: -906.379808
action based on polilcy:[-0.653617   0.6692592]
action based on polilcy:[-0.7655632  0.6578096]
action based on polilcy:[-0.6976836   0.57637227]
action based on polilcy:[-0.63790405  0.7465678 ]
action based on polilcy:[-0.84786797  0.67157567]
action based on polilcy:[-0.6874696  0.4389

action based on polilcy:[-0.6105975  0.6695751]
action based on polilcy:[-0.7978343  0.6255394]
action based on polilcy:[-0.6462141   0.58153826]
action based on polilcy:[-0.58085525  0.5830151 ]
action based on polilcy:[-0.6436244  0.5684512]
action based on polilcy:[-0.6924009  0.6904137]
action based on polilcy:[-0.85754734  0.61978126]
action based on polilcy:[-0.6678487   0.48068675]
action based on polilcy:[-0.6749841   0.47367957]
action based on polilcy:[-0.7933506   0.38202253]
action based on polilcy:[-0.7668856  0.3197103]
action based on polilcy:[-0.69985545  0.28583556]
action based on polilcy:[-0.8004314   0.22472246]
action based on polilcy:[-0.81449616  0.26499063]
Total T: 33495 Episode Num: 2124 Episode T: 14 Reward: -1138.536110
action based on polilcy:[-0.61783403  0.67914444]
action based on polilcy:[-0.7677981  0.5824611]
action based on polilcy:[-0.8936447   0.58411974]
action based on polilcy:[-0.6327277  0.6943567]
action based on polilcy:[-0.6695155  0.5704509

action based on polilcy:[-0.613124   0.6690712]
action based on polilcy:[-0.6129391  0.5603323]
action based on polilcy:[-0.6296917  0.6688342]
action based on polilcy:[-0.53739333  0.7103535 ]
action based on polilcy:[-0.6994344  0.5681358]
action based on polilcy:[-0.72441065  0.7151656 ]
action based on polilcy:[-0.9025133  0.5158064]
action based on polilcy:[-0.8533526   0.35079005]
action based on polilcy:[-0.9115707  0.324963 ]
action based on polilcy:[-0.76333225  0.3480979 ]
action based on polilcy:[-0.7672206  0.3263683]
action based on polilcy:[-0.7274091  0.2988226]
action based on polilcy:[-0.7459428   0.25479805]
action based on polilcy:[-0.6678952  0.2550269]
Total T: 33673 Episode Num: 2137 Episode T: 14 Reward: -1381.433090
action based on polilcy:[-0.6215615  0.6751528]
action based on polilcy:[-0.7314573  0.5581809]
action based on polilcy:[-0.9309499   0.48045787]
action based on polilcy:[-0.94686127  0.34861416]
action based on polilcy:[-0.7760598   0.31284365]
acti

action based on polilcy:[-0.62003195  0.66931075]
action based on polilcy:[-0.5565949  0.6860721]
action based on polilcy:[-0.7742316  0.5912855]
action based on polilcy:[-0.8508346   0.67348343]
action based on polilcy:[-0.77476627  0.3523258 ]
action based on polilcy:[-0.9171115  0.3263322]
action based on polilcy:[-0.75108004  0.37590155]
action based on polilcy:[-0.748693    0.40097135]
action based on polilcy:[-0.746199    0.33435377]
action based on polilcy:[-0.9104496   0.31542373]
action based on polilcy:[-0.9673413   0.27139285]
Total T: 33824 Episode Num: 2150 Episode T: 11 Reward: -1177.401701
action based on polilcy:[-0.6496608  0.6686438]
action based on polilcy:[-0.6724197   0.62645423]
action based on polilcy:[-0.9310951   0.55520076]
action based on polilcy:[-0.8332273   0.33157894]
action based on polilcy:[-0.89391357  0.28772518]
action based on polilcy:[-0.8904901   0.28339222]
action based on polilcy:[-0.9811768   0.31443295]
action based on polilcy:[-0.94650674  0.

action based on polilcy:[-0.6137645   0.67381686]
action based on polilcy:[-0.6528277  0.5476202]
action based on polilcy:[-0.8059581   0.48263815]
action based on polilcy:[-0.8826166   0.35589266]
action based on polilcy:[-0.96511984  0.3208842 ]
action based on polilcy:[-0.9096468   0.30099195]
action based on polilcy:[-0.7702085  0.3303354]
action based on polilcy:[-0.91457516  0.32962897]
action based on polilcy:[-0.9386202   0.33586577]
---------------------------------------
Episode_num: 2164, Evaluation over 1 episodes: -575.001735
---------------------------------------
Total T: 34001 Episode Num: 2164 Episode T: 10 Reward: -719.322770
Total T: 34047 Episode Num: 2165 Episode T: 46 Reward: -7328.131861
Total T: 34093 Episode Num: 2166 Episode T: 46 Reward: -7219.568587
Total T: 34112 Episode Num: 2167 Episode T: 19 Reward: -2474.628798
Total T: 34155 Episode Num: 2168 Episode T: 43 Reward: -7179.824461
Total T: 34203 Episode Num: 2169 Episode T: 48 Reward: -7327.744391
Total T:

action based on polilcy:[-0.70786357  0.6744739 ]
action based on polilcy:[-0.8221407   0.67277396]
action based on polilcy:[-0.86185014  0.47660244]
action based on polilcy:[-0.8114114   0.36226615]
action based on polilcy:[-0.94886774  0.33354715]
action based on polilcy:[-0.94616944  0.28747562]
action based on polilcy:[-0.8004063   0.30222172]
action based on polilcy:[-0.7534719   0.27785817]
action based on polilcy:[-0.8412006   0.22476542]
Total T: 34635 Episode Num: 2188 Episode T: 9 Reward: -690.266675
action based on polilcy:[-0.65196836  0.66840386]
action based on polilcy:[-0.6853232  0.5292805]
action based on polilcy:[-0.9901504  0.493931 ]
action based on polilcy:[-0.8560189  0.3449262]
action based on polilcy:[-0.80407304  0.30204993]
action based on polilcy:[-0.89451075  0.2417166 ]
action based on polilcy:[-0.9810447   0.18358529]
Total T: 34642 Episode Num: 2189 Episode T: 7 Reward: -765.832208
action based on polilcy:[-0.68032944  0.66504407]
action based on polilcy:

action based on polilcy:[-0.6648605   0.68509173]
action based on polilcy:[-0.4142548  0.6059599]
action based on polilcy:[-0.5583771  0.5511193]
action based on polilcy:[-0.6628568  0.533912 ]
action based on polilcy:[-0.6135756  0.5859544]
action based on polilcy:[-0.84170413  0.61853945]
action based on polilcy:[-0.64921033  0.44353628]
action based on polilcy:[-0.7449      0.37094408]
action based on polilcy:[-0.9108002   0.37775654]
action based on polilcy:[-0.9814501   0.29674307]
action based on polilcy:[-0.7761001   0.29639268]
action based on polilcy:[-0.7722249   0.30372503]
action based on polilcy:[-0.8384627   0.29537982]
action based on polilcy:[-0.9967352   0.19794725]
action based on polilcy:[-0.87596494  0.2533318 ]
Total T: 34807 Episode Num: 2202 Episode T: 15 Reward: -1099.605510
action based on polilcy:[-0.6825454  0.6788048]
action based on polilcy:[-0.7509838  0.6736681]
action based on polilcy:[-0.98020625  0.6329551 ]
action based on polilcy:[-0.9439445   0.3733

action based on polilcy:[-0.6622206  0.6851721]
action based on polilcy:[-0.44767782  0.6962366 ]
action based on polilcy:[-0.7140709  0.5391476]
action based on polilcy:[-0.6981427  0.4936676]
action based on polilcy:[-0.99839544  0.3810884 ]
action based on polilcy:[-0.8274053  0.3225098]
action based on polilcy:[-0.97620773  0.29735318]
action based on polilcy:[-0.9927605   0.23108017]
action based on polilcy:[-0.89894193  0.27613106]
action based on polilcy:[-0.9336263  0.2600523]
Total T: 34961 Episode Num: 2214 Episode T: 10 Reward: -977.949858
action based on polilcy:[-0.68028903  0.67088807]
action based on polilcy:[-0.7090831  0.5783716]
action based on polilcy:[-0.91975754  0.5948305 ]
action based on polilcy:[-0.8101538   0.37606528]
action based on polilcy:[-0.9444945   0.35046962]
action based on polilcy:[-0.848653   0.3279297]
action based on polilcy:[-0.86712146  0.29536766]
action based on polilcy:[-0.9728506   0.27824968]
action based on polilcy:[-0.9169187  0.2621463]

action based on polilcy:[-0.67929935  0.68322384]
action based on polilcy:[-0.9374675   0.67933255]
action based on polilcy:[-0.9563608   0.42709056]
action based on polilcy:[-0.9687475   0.47714108]
action based on polilcy:[-0.9279896   0.35150573]
action based on polilcy:[-0.9731766   0.33622903]
action based on polilcy:[-0.7785052   0.34021637]
action based on polilcy:[-0.99269223  0.38886848]
action based on polilcy:[-0.8815628  0.3227939]
Total T: 35137 Episode Num: 2229 Episode T: 9 Reward: -1074.270030
action based on polilcy:[-0.69556713  0.6794615 ]
action based on polilcy:[-0.736876    0.59753764]
action based on polilcy:[-0.98770314  0.5414865 ]
action based on polilcy:[-0.77198213  0.45866132]
action based on polilcy:[-0.7736411   0.39005187]
action based on polilcy:[-0.98917526  0.41181692]
action based on polilcy:[-0.9903345   0.40626997]
action based on polilcy:[-0.9973716   0.31740916]
action based on polilcy:[-0.8926909   0.28703463]
Total T: 35146 Episode Num: 2230 Ep

action based on polilcy:[-0.6955246   0.70360124]
action based on polilcy:[-0.7348679  0.5593363]
action based on polilcy:[-0.9406538   0.44666973]
action based on polilcy:[-0.8509874   0.35725302]
action based on polilcy:[-0.8638193   0.34499452]
action based on polilcy:[-0.9834729   0.28459468]
action based on polilcy:[-0.8749147   0.30516812]
action based on polilcy:[-0.9534104   0.27622807]
action based on polilcy:[-0.996149   0.2912155]
Total T: 35285 Episode Num: 2242 Episode T: 9 Reward: -520.527425
action based on polilcy:[-0.6771661   0.68996274]
action based on polilcy:[-0.7550135  0.5703293]
action based on polilcy:[-0.6729263  0.6178236]
action based on polilcy:[-0.6411238  0.6402693]
action based on polilcy:[-0.61885023  0.53446466]
action based on polilcy:[-0.6300435   0.53081846]
action based on polilcy:[-0.5651431  0.5322393]
action based on polilcy:[-0.4267919   0.48455623]
action based on polilcy:[-0.68190354  0.4999236 ]
action based on polilcy:[-0.6541662  0.5108527

action based on polilcy:[-0.69559073  0.68539214]
action based on polilcy:[-0.94306433  0.6779712 ]
action based on polilcy:[-0.7961695   0.54203516]
action based on polilcy:[-0.9585241   0.70223945]
action based on polilcy:[-0.971307    0.43807387]
action based on polilcy:[-0.9618727   0.35817036]
action based on polilcy:[-0.9562249  0.3129828]
action based on polilcy:[-0.9951173  0.3098957]
Total T: 35435 Episode Num: 2254 Episode T: 8 Reward: -914.754618
action based on polilcy:[-0.6727299  0.6778918]
action based on polilcy:[-0.89064604  0.61835176]
action based on polilcy:[-0.7026788   0.46534035]
action based on polilcy:[-0.8278976  0.7479939]
action based on polilcy:[-0.68876255  0.48251823]
action based on polilcy:[-0.77339     0.35975605]
action based on polilcy:[-0.8416873  0.3248948]
action based on polilcy:[-0.9983767   0.40484205]
action based on polilcy:[-0.989706    0.30238694]
action based on polilcy:[-0.96756816  0.27477178]
Total T: 35445 Episode Num: 2255 Episode T: 

action based on polilcy:[-0.70620847  0.69867283]
action based on polilcy:[-0.65804553  0.69126374]
action based on polilcy:[-0.75743854  0.6182225 ]
action based on polilcy:[-0.72963345  0.67353356]
action based on polilcy:[-0.5985228  0.618147 ]
action based on polilcy:[-0.7461661  0.5382051]
action based on polilcy:[-0.639048    0.60425943]
action based on polilcy:[-0.70322585  0.53027713]
action based on polilcy:[-0.6104251  0.6645911]
action based on polilcy:[-0.7651135  0.5205257]
action based on polilcy:[-0.88861465  0.55967915]
action based on polilcy:[-0.96567005  0.38753992]
action based on polilcy:[-0.85399276  0.3201144 ]
action based on polilcy:[-0.9785617   0.30848378]
action based on polilcy:[-0.93918437  0.22557524]
action based on polilcy:[-0.88908875  0.25024468]
action based on polilcy:[-0.88388145  0.26950675]
Total T: 35623 Episode Num: 2264 Episode T: 17 Reward: -1266.409219
action based on polilcy:[-0.7017259  0.6969001]
action based on polilcy:[-0.7251899  0.548

action based on polilcy:[-0.70921606  0.69632655]
action based on polilcy:[-0.7295681   0.54834735]
action based on polilcy:[-0.72470737  0.45703846]
action based on polilcy:[-0.82733464  0.36995688]
action based on polilcy:[-0.99358976  0.40643594]
action based on polilcy:[-0.8182073   0.36690527]
action based on polilcy:[-0.8663391  0.3442571]
action based on polilcy:[-0.9381787   0.31974772]
action based on polilcy:[-0.8573154   0.31112397]
action based on polilcy:[-0.94888186  0.31396684]
Total T: 35783 Episode Num: 2276 Episode T: 10 Reward: -478.121736
action based on polilcy:[-0.719109    0.69509554]
action based on polilcy:[-0.87441725  0.59695286]
action based on polilcy:[-0.95069414  0.58001316]
action based on polilcy:[-0.97550887  0.48242944]
action based on polilcy:[-0.94327223  0.36150742]
action based on polilcy:[-0.87190545  0.32641315]
action based on polilcy:[-0.98343974  0.28142944]
action based on polilcy:[-0.78139156  0.2643046 ]
action based on polilcy:[-0.7254495

action based on polilcy:[-0.72871965  0.70877635]
action based on polilcy:[-0.7444594  0.5562265]
action based on polilcy:[-0.8458153   0.50402987]
action based on polilcy:[-0.7910421  0.488286 ]
action based on polilcy:[-0.6989672  0.6038848]
action based on polilcy:[-0.91999876  0.65478134]
action based on polilcy:[-0.972777    0.46877384]
action based on polilcy:[-0.995137    0.39829656]
action based on polilcy:[-0.85663754  0.33183914]
action based on polilcy:[-0.9788783   0.34628627]
action based on polilcy:[-0.98466593  0.35202855]
action based on polilcy:[-0.94330734  0.32197982]
Total T: 35955 Episode Num: 2290 Episode T: 12 Reward: -871.045767
action based on polilcy:[-0.7238103  0.7114292]
action based on polilcy:[-0.62511474  0.68225116]
action based on polilcy:[-0.71540123  0.5457681 ]
action based on polilcy:[-0.908957    0.53761566]
action based on polilcy:[-0.9137993   0.40105328]
action based on polilcy:[-0.9439372   0.34798738]
action based on polilcy:[-0.8764875  0.32

action based on polilcy:[-0.7629225  0.6597034]
action based on polilcy:[-0.7883411   0.50318015]
action based on polilcy:[-0.78927404  0.5225921 ]
action based on polilcy:[-0.94378775  0.67575324]
action based on polilcy:[-0.99125516  0.4843567 ]
action based on polilcy:[-0.990053    0.45284322]
action based on polilcy:[-0.99925894  0.5932287 ]
action based on polilcy:[-0.7752763   0.47014663]
action based on polilcy:[-0.9790878  0.406945 ]
action based on polilcy:[-0.90082765  0.3398631 ]
action based on polilcy:[-0.9978373   0.33695224]
Total T: 36593 Episode Num: 2316 Episode T: 11 Reward: -1089.598584
action based on polilcy:[-0.7543738  0.6649128]
action based on polilcy:[-0.8752004   0.57486796]
action based on polilcy:[-0.9826191   0.46649927]
action based on polilcy:[-0.992924    0.38996953]
action based on polilcy:[-0.9058717   0.30230975]
action based on polilcy:[-0.93760073  0.27171233]
action based on polilcy:[-0.9749912   0.24899939]
action based on polilcy:[-0.9176889  0

action based on polilcy:[-0.79212666  0.6592854 ]
action based on polilcy:[-0.7749887   0.50487053]
action based on polilcy:[-0.6274452  0.6860435]
action based on polilcy:[-0.9694139  0.5667139]
action based on polilcy:[-0.7509204   0.56331503]
action based on polilcy:[-0.8281429  0.7571495]
action based on polilcy:[-0.9004491   0.51745224]
action based on polilcy:[-0.70489943  0.5685258 ]
action based on polilcy:[-0.74831104  0.44081327]
action based on polilcy:[-0.73441505  0.42449027]
action based on polilcy:[-0.9982423   0.47985968]
action based on polilcy:[-0.94493407  0.34391773]
action based on polilcy:[-0.9070138   0.30305812]
action based on polilcy:[-0.8892508  0.280539 ]
action based on polilcy:[-0.8601756  0.2549863]
Total T: 36780 Episode Num: 2329 Episode T: 15 Reward: -1104.927765
action based on polilcy:[-0.8022327   0.65864277]
action based on polilcy:[-0.94149494  0.57035756]
action based on polilcy:[-0.94274426  0.6920997 ]
action based on polilcy:[-0.77682376  0.44

action based on polilcy:[-0.8603473  0.647223 ]
action based on polilcy:[-0.9280743   0.54197586]
action based on polilcy:[-0.7829443   0.70325804]
action based on polilcy:[-0.7997632   0.48720416]
action based on polilcy:[-0.97352177  0.4698523 ]
action based on polilcy:[-0.8708273  0.3810541]
action based on polilcy:[-0.99617267  0.5883033 ]
action based on polilcy:[-0.86943924  0.37223023]
action based on polilcy:[-0.92992634  0.33154625]
action based on polilcy:[-0.98439765  0.30558255]
action based on polilcy:[-0.9996233  0.2561925]
Total T: 36945 Episode Num: 2341 Episode T: 11 Reward: -1109.743944
action based on polilcy:[-0.83924204  0.6443569 ]
action based on polilcy:[-0.69535196  0.6011855 ]
action based on polilcy:[-0.4414027   0.57136786]
action based on polilcy:[-0.6168664   0.52683103]
action based on polilcy:[-0.6446626   0.52847743]
action based on polilcy:[-0.3455416  0.590855 ]
action based on polilcy:[-0.324271   0.5664209]
action based on polilcy:[-0.69516337  0.30

action based on polilcy:[-0.9167198  0.6545075]
action based on polilcy:[-0.86739767  0.5718129 ]
action based on polilcy:[-0.9427613   0.56022274]
action based on polilcy:[-0.7077892   0.62273794]
action based on polilcy:[-0.7201417  0.6419706]
action based on polilcy:[-0.54902256  0.5846211 ]
action based on polilcy:[-0.93056947  0.5105331 ]
action based on polilcy:[-0.7881391   0.45036986]
action based on polilcy:[-0.7674217   0.47557637]
action based on polilcy:[-0.71377516  0.5585288 ]
action based on polilcy:[-0.98226714  0.44920003]
action based on polilcy:[-0.89350533  0.34759447]
action based on polilcy:[-0.8497991   0.32973415]
action based on polilcy:[-0.9570534   0.30106828]
action based on polilcy:[-0.85781753  0.3199493 ]
action based on polilcy:[-0.9996291   0.35867825]
action based on polilcy:[-0.9414866   0.29051816]
action based on polilcy:[-0.9696199   0.25885832]
Total T: 37111 Episode Num: 2352 Episode T: 18 Reward: -1180.730103
action based on polilcy:[-0.9162725 

action based on polilcy:[-0.93776685  0.6459764 ]
action based on polilcy:[-0.7996572   0.47877094]
action based on polilcy:[-0.79677933  0.5031743 ]
action based on polilcy:[-0.7811065  0.6411247]
action based on polilcy:[-0.7117654   0.59777176]
action based on polilcy:[-0.8718942   0.55710614]
action based on polilcy:[-0.8550195   0.69053686]
action based on polilcy:[-0.90527177  0.48706347]
action based on polilcy:[-0.7523732   0.51881754]
action based on polilcy:[-0.8090002  0.4796914]
action based on polilcy:[-0.8448683  0.5296843]
action based on polilcy:[-0.9769848  0.4001477]
action based on polilcy:[-0.7746855  0.3767971]
action based on polilcy:[-0.72483975  0.5205986 ]
action based on polilcy:[-0.7369156   0.47368142]
action based on polilcy:[-0.82140154  0.36481646]
action based on polilcy:[-0.73497283  0.45114478]
action based on polilcy:[-0.81816494  0.35994032]
action based on polilcy:[-0.73133063  0.44856063]
action based on polilcy:[-0.76572484  0.37706923]
action bas

action based on polilcy:[-0.9599345   0.64256406]
action based on polilcy:[-0.8698402  0.5926194]
action based on polilcy:[-0.9146138   0.49162564]
action based on polilcy:[-0.7563706  0.6057403]
action based on polilcy:[-0.9727421   0.69990313]
action based on polilcy:[-0.7185568  0.5239857]
action based on polilcy:[-0.9254217  0.4838129]
action based on polilcy:[-0.9821133  0.4054891]
action based on polilcy:[-0.7899161   0.39016426]
action based on polilcy:[-0.9921242   0.38230163]
action based on polilcy:[-0.96211886  0.28175032]
action based on polilcy:[-0.86086226  0.31141222]
action based on polilcy:[-0.84304786  0.37820256]
action based on polilcy:[-0.78385985  0.63658   ]
action based on polilcy:[-0.8083061   0.39118135]
action based on polilcy:[-0.7153708  0.5542052]
action based on polilcy:[-0.9339001  0.6754885]
action based on polilcy:[-0.8999145  0.6040201]
action based on polilcy:[-0.8576872   0.41716972]
action based on polilcy:[-0.82842135  0.5681853 ]
action based on 

action based on polilcy:[-0.9597386   0.63921285]
action based on polilcy:[-0.7052133   0.57578087]
action based on polilcy:[-0.69160485  0.5419383 ]
action based on polilcy:[-0.97816294  0.55063814]
action based on polilcy:[-0.6711905  0.5622821]
action based on polilcy:[-0.90180784  0.50853854]
action based on polilcy:[-0.82204807  0.43569392]
action based on polilcy:[-0.8803303   0.53118867]
action based on polilcy:[-0.8444412   0.46826595]
action based on polilcy:[-0.89169234  0.3604091 ]
action based on polilcy:[-0.9995584  0.3188259]
action based on polilcy:[-0.9391398  0.2863239]
action based on polilcy:[-0.9977408   0.27737784]
action based on polilcy:[-0.98189765  0.2362348 ]
action based on polilcy:[-0.9841727   0.19431001]
Total T: 37632 Episode Num: 2384 Episode T: 15 Reward: -961.683439
action based on polilcy:[-0.95586276  0.6405968 ]
action based on polilcy:[-0.7995168   0.46555153]
action based on polilcy:[-0.7934643   0.44038174]
action based on polilcy:[-0.9379134   0

action based on polilcy:[-0.95745003  0.6529938 ]
action based on polilcy:[-0.9163769  0.686049 ]
action based on polilcy:[-0.8940422   0.51731706]
action based on polilcy:[-0.961381    0.65634686]
action based on polilcy:[-0.82358253  0.44703102]
action based on polilcy:[-0.70577073  0.65209204]
action based on polilcy:[-0.9785387  0.5003759]
action based on polilcy:[-0.93985146  0.6391238 ]
action based on polilcy:[-0.6879474   0.56270444]
action based on polilcy:[-0.72526276  0.5934851 ]
action based on polilcy:[-0.94735664  0.4649585 ]
action based on polilcy:[-0.5275968  0.6406651]
action based on polilcy:[-0.7732752  0.3970587]
action based on polilcy:[-0.74609196  0.42297256]
action based on polilcy:[-0.6589701   0.58264124]
action based on polilcy:[-0.7623096   0.52957386]
action based on polilcy:[-0.98797834  0.45193183]
action based on polilcy:[-0.65469366  0.54482627]
action based on polilcy:[-0.6909037   0.45196924]
action based on polilcy:[-0.7401608   0.40746614]
action b

action based on polilcy:[-0.9564615   0.63479555]
action based on polilcy:[-0.8167603   0.46083188]
action based on polilcy:[-0.84372663  0.40009862]
action based on polilcy:[-0.7488891   0.58935106]
action based on polilcy:[-0.94491833  0.41235223]
action based on polilcy:[-0.9583456   0.33562103]
action based on polilcy:[-0.96821976  0.29836664]
action based on polilcy:[-0.9603629  0.2871432]
action based on polilcy:[-0.9963902   0.32044485]
action based on polilcy:[-0.9896888   0.25264516]
action based on polilcy:[-0.98377925  0.22197577]
action based on polilcy:[-0.9731786   0.20954956]
Total T: 37965 Episode Num: 2406 Episode T: 12 Reward: -588.385259
action based on polilcy:[-0.9575954   0.63588506]
action based on polilcy:[-0.8955719  0.5759431]
action based on polilcy:[-0.93456787  0.47482306]
action based on polilcy:[-0.9975683   0.49129403]
action based on polilcy:[-0.83398485  0.38228396]
action based on polilcy:[-0.8126812  0.3743943]
action based on polilcy:[-0.99938804  0

action based on polilcy:[-0.9530968  0.5890151]
action based on polilcy:[-0.8599323   0.44902033]
action based on polilcy:[-0.8984109   0.36274472]
action based on polilcy:[-0.986892    0.30430436]
action based on polilcy:[-0.88176596  0.36482757]
action based on polilcy:[-0.98615867  0.30498722]
action based on polilcy:[-0.99265844  0.2625073 ]
action based on polilcy:[-0.9755648   0.26662576]
action based on polilcy:[-0.99940044  0.2998597 ]
action based on polilcy:[-0.87589335  0.3364926 ]
action based on polilcy:[-0.9975205   0.30543306]
Total T: 38645 Episode Num: 2428 Episode T: 11 Reward: -704.281098
action based on polilcy:[-0.9503273  0.5872081]
action based on polilcy:[-0.8270751  0.5341209]
action based on polilcy:[-0.96710354  0.4768485 ]
action based on polilcy:[-0.83038133  0.4294621 ]
action based on polilcy:[-0.92895323  0.3653562 ]
action based on polilcy:[-0.99347264  0.29467392]
action based on polilcy:[-0.99628437  0.21859622]
action based on polilcy:[-0.991421    0

action based on polilcy:[-0.9595087  0.5865648]
action based on polilcy:[-0.9428178  0.4631362]
action based on polilcy:[-0.93319     0.36886743]
action based on polilcy:[-0.9690888   0.35340574]
action based on polilcy:[-0.9992263   0.44646898]
action based on polilcy:[-0.9953217   0.31497154]
action based on polilcy:[-0.99780536  0.2979016 ]
action based on polilcy:[-0.9934825   0.28056946]
action based on polilcy:[-0.97637403  0.28967854]
action based on polilcy:[-0.98655045  0.2823855 ]
action based on polilcy:[-0.99566364  0.28033155]
Total T: 38805 Episode Num: 2440 Episode T: 11 Reward: -670.481478
action based on polilcy:[-0.9523972  0.584511 ]
action based on polilcy:[-0.85203916  0.44791308]
action based on polilcy:[-0.9127634  0.3714856]
action based on polilcy:[-0.9807507  0.3636901]
action based on polilcy:[-0.9947802   0.31070542]
action based on polilcy:[-0.9993242   0.27289122]
action based on polilcy:[-0.9975309   0.26961875]
action based on polilcy:[-0.98737764  0.182

action based on polilcy:[-0.95937556  0.5904189 ]
action based on polilcy:[-0.9907015   0.42017484]
action based on polilcy:[-0.9867478   0.37704927]
action based on polilcy:[-0.992311    0.33288187]
action based on polilcy:[-0.9999051   0.31574726]
action based on polilcy:[-0.8269842  0.3925489]
action based on polilcy:[-0.98594403  0.33428514]
action based on polilcy:[-0.99092543  0.28681844]
action based on polilcy:[-0.9878361  0.2668359]
Total T: 38961 Episode Num: 2452 Episode T: 9 Reward: -806.075149
action based on polilcy:[-0.9642176   0.59983593]
action based on polilcy:[-0.756685  0.545604]
action based on polilcy:[-0.97284037  0.58263063]
action based on polilcy:[-0.74628705  0.5507767 ]
action based on polilcy:[-0.7900501   0.49289924]
action based on polilcy:[-0.91866875  0.38098162]
action based on polilcy:[-0.9976956   0.43529564]
action based on polilcy:[-0.79134184  0.56720793]
action based on polilcy:[-0.97848934  0.47135916]
action based on polilcy:[-0.94018173  0.35

action based on polilcy:[-0.9637309  0.587399 ]
action based on polilcy:[-0.8628459  0.592618 ]
action based on polilcy:[-0.75572014  0.48219094]
action based on polilcy:[-0.9438844   0.44181138]
action based on polilcy:[-0.7630165   0.58544016]
action based on polilcy:[-0.9956995   0.39923972]
action based on polilcy:[-0.9848568   0.41012603]
action based on polilcy:[-0.7384223   0.49240133]
action based on polilcy:[-0.97741586  0.34882173]
action based on polilcy:[-0.90547496  0.37821725]
action based on polilcy:[-0.91275865  0.51947856]
action based on polilcy:[-0.69334126  0.51327384]
action based on polilcy:[-0.96902215  0.43595076]
action based on polilcy:[-0.68392473  0.521075  ]
action based on polilcy:[-0.99754363  0.36227614]
action based on polilcy:[-0.9960504   0.46948412]
action based on polilcy:[-0.9567561   0.33390906]
action based on polilcy:[-0.9955236   0.26272023]
action based on polilcy:[-0.7698173   0.30847684]
action based on polilcy:[-0.93715537  0.3204486 ]
Tota

action based on polilcy:[-0.9665556  0.5887902]
action based on polilcy:[-0.955824    0.44423923]
action based on polilcy:[-0.99997514  0.3555213 ]
action based on polilcy:[-0.9992683  0.3269097]
action based on polilcy:[-0.99976635  0.2702849 ]
action based on polilcy:[-0.9898605  0.2522596]
action based on polilcy:[-0.9994198  0.2917736]
action based on polilcy:[-0.9976288   0.28183544]
action based on polilcy:[-0.9995327   0.25593188]
Total T: 39265 Episode Num: 2475 Episode T: 9 Reward: -572.688958
action based on polilcy:[-0.9625074  0.5875864]
action based on polilcy:[-0.7533729   0.57589936]
action based on polilcy:[-0.72079337  0.5460262 ]
action based on polilcy:[-0.90286756  0.5465789 ]
action based on polilcy:[-0.9865794   0.43072215]
action based on polilcy:[-0.98108935  0.4286112 ]
action based on polilcy:[-0.9987059   0.37449127]
action based on polilcy:[-0.99928755  0.29310578]
action based on polilcy:[-0.9981041   0.46231326]
action based on polilcy:[-0.9942498   0.3189

action based on polilcy:[-0.9609392   0.59523016]
action based on polilcy:[-0.7773839  0.5295745]
action based on polilcy:[-0.9566187   0.44207633]
action based on polilcy:[-0.958759   0.5651889]
action based on polilcy:[-0.9966837   0.46329048]
action based on polilcy:[-0.99602175  0.36832926]
action based on polilcy:[-0.94791794  0.40193772]
action based on polilcy:[-0.9899411  0.5319514]
action based on polilcy:[-0.9763602  0.3821614]
action based on polilcy:[-0.99995244  0.3376405 ]
action based on polilcy:[-0.9979731  0.3098544]
action based on polilcy:[-0.99938     0.25037262]
Total T: 39432 Episode Num: 2486 Episode T: 12 Reward: -1199.220465
action based on polilcy:[-0.95455515  0.604811  ]
action based on polilcy:[-0.8925914  0.5594175]
action based on polilcy:[-0.7919774  0.5044106]
action based on polilcy:[-0.84685737  0.65203273]
action based on polilcy:[-0.3865557   0.44360545]
action based on polilcy:[-0.3713177  0.4838441]
action based on polilcy:[-0.30558586  0.50026804

action based on polilcy:[-0.9544721  0.6293099]
action based on polilcy:[-0.7619536   0.58232236]
action based on polilcy:[-0.79744077  0.52099717]
action based on polilcy:[-0.76789224  0.55520976]
action based on polilcy:[-0.9975599   0.45797807]
action based on polilcy:[-0.9990268   0.45048904]
action based on polilcy:[-0.9990933   0.34264073]
action based on polilcy:[-0.99963385  0.29021358]
action based on polilcy:[-0.9998828  0.3498075]
action based on polilcy:[-0.9998656   0.19765736]
Total T: 39622 Episode Num: 2498 Episode T: 10 Reward: -917.511745
action based on polilcy:[-0.9567594  0.6229496]
action based on polilcy:[-0.85608107  0.5036179 ]
action based on polilcy:[-0.92852426  0.67509604]
action based on polilcy:[-0.96875685  0.515152  ]
action based on polilcy:[-0.77901095  0.5720001 ]
action based on polilcy:[-0.99945045  0.37531447]
action based on polilcy:[-0.9118469   0.43439087]
action based on polilcy:[-0.9954308   0.37227347]
action based on polilcy:[-0.98790807  0

action based on polilcy:[-0.94905764  0.64267915]
action based on polilcy:[-0.9933844   0.53743684]
action based on polilcy:[-0.9812669   0.44166058]
action based on polilcy:[-0.8626314   0.47518146]
action based on polilcy:[-0.99994636  0.4403919 ]
action based on polilcy:[-0.99742573  0.3770149 ]
action based on polilcy:[-0.9997504   0.32411098]
action based on polilcy:[-0.9997194   0.26841775]
action based on polilcy:[-0.9978187   0.27340615]
action based on polilcy:[-0.99573135  0.22113487]
action based on polilcy:[-0.99427325  0.25884283]
Total T: 39799 Episode Num: 2512 Episode T: 11 Reward: -736.911621
action based on polilcy:[-0.944142   0.6387032]
action based on polilcy:[-0.96838117  0.43431738]
action based on polilcy:[-0.9996038   0.45803323]
action based on polilcy:[-0.7496077  0.5904105]
action based on polilcy:[-0.9912368   0.51801133]
action based on polilcy:[-0.759159   0.5992475]
action based on polilcy:[-0.8480592   0.57655543]
action based on polilcy:[-0.9960237   0

action based on polilcy:[-0.93250954  0.63041496]
action based on polilcy:[-0.9745304   0.43014348]
action based on polilcy:[-0.77387214  0.55321825]
action based on polilcy:[-0.99927413  0.4744263 ]
action based on polilcy:[-0.77898175  0.59308314]
action based on polilcy:[-0.9997137  0.4201353]
action based on polilcy:[-0.9999879   0.37038392]
action based on polilcy:[-0.9994152  0.458606 ]
action based on polilcy:[-0.9995687   0.34475166]
action based on polilcy:[-0.9993563  0.272727 ]
action based on polilcy:[-0.9408792  0.3268076]
action based on polilcy:[-0.9995768   0.45685118]
Total T: 39974 Episode Num: 2525 Episode T: 12 Reward: -1239.371355
action based on polilcy:[-0.9395305   0.63471466]
action based on polilcy:[-0.7888355   0.60225534]
action based on polilcy:[-0.7945744  0.5587591]
action based on polilcy:[-0.9914751   0.40392974]
action based on polilcy:[-0.98386014  0.42114156]
action based on polilcy:[-0.9927501   0.41083157]
action based on polilcy:[-0.9964014   0.40

action based on polilcy:[-0.6950425   0.66687953]
action based on polilcy:[-0.823032   0.6083775]
action based on polilcy:[-0.763979    0.59083235]
action based on polilcy:[-0.64695835  0.6120051 ]
action based on polilcy:[-0.77398515  0.5288594 ]
action based on polilcy:[-0.7906046   0.62700117]
action based on polilcy:[-0.6366228   0.58729774]
action based on polilcy:[-0.9215505  0.5640017]
action based on polilcy:[-0.99241245  0.51248634]
action based on polilcy:[-0.95552814  0.35714862]
action based on polilcy:[-0.99892294  0.35211998]
action based on polilcy:[-0.9935134   0.28586212]
action based on polilcy:[-0.9638531   0.28277567]
action based on polilcy:[-0.96381795  0.07449206]
Total T: 40610 Episode Num: 2553 Episode T: 14 Reward: -1326.514355
action based on polilcy:[-0.752309   0.6801276]
action based on polilcy:[-0.7764899   0.59082973]
action based on polilcy:[-0.99305075  0.6001857 ]
action based on polilcy:[-0.9999769   0.48769665]
action based on polilcy:[-0.9999042   

action based on polilcy:[-0.72558606  0.6661117 ]
action based on polilcy:[-0.96915376  0.6690756 ]
action based on polilcy:[-0.98034084  0.41235054]
action based on polilcy:[-0.97655326  0.39385813]
action based on polilcy:[-0.99825597  0.36369723]
action based on polilcy:[-0.99935406  0.34350276]
action based on polilcy:[-0.9992218   0.38059866]
action based on polilcy:[-0.9983216   0.33130842]
action based on polilcy:[-0.99782425  0.32168743]
action based on polilcy:[-0.9998173   0.26784852]
action based on polilcy:[-0.9982735   0.22333212]
Total T: 40767 Episode Num: 2566 Episode T: 11 Reward: -708.447165
action based on polilcy:[-0.73168254  0.6678815 ]
action based on polilcy:[-0.9129273  0.6662117]
action based on polilcy:[-0.9579838  0.4708502]
action based on polilcy:[-0.999914    0.53636205]
action based on polilcy:[-0.76696575  0.5338645 ]
action based on polilcy:[-0.9457227   0.67824686]
action based on polilcy:[-0.99867433  0.4977411 ]
action based on polilcy:[-0.99971265 

action based on polilcy:[-0.73064303  0.7267591 ]
action based on polilcy:[-0.45407555  0.7327806 ]
action based on polilcy:[-0.7967171  0.6792989]
action based on polilcy:[-0.7671139  0.5581479]
action based on polilcy:[-0.7489904  0.5204265]
action based on polilcy:[-0.93505406  0.48669228]
action based on polilcy:[-0.98333156  0.3739294 ]
action based on polilcy:[-0.9999051   0.37807506]
action based on polilcy:[-0.9992685   0.23361054]
action based on polilcy:[-0.99943084  0.2044585 ]
action based on polilcy:[-0.99857104  0.2302384 ]
action based on polilcy:[-0.9986054   0.42694578]
action based on polilcy:[-0.8567709   0.48165464]
action based on polilcy:[-0.74923134  0.5586747 ]
Total T: 40935 Episode Num: 2580 Episode T: 14 Reward: -1092.459510
action based on polilcy:[-0.71500313  0.73762953]
action based on polilcy:[-0.7730286  0.5378148]
action based on polilcy:[-0.97155666  0.4250862 ]
action based on polilcy:[-0.9975915   0.38692182]
action based on polilcy:[-0.99936426  0.

Total T: 41088 Episode Num: 2592 Episode T: 16 Reward: -1150.670841
action based on polilcy:[-0.73075664  0.7297622 ]
action based on polilcy:[-0.8017823  0.5421953]
action based on polilcy:[-0.7942552   0.50929797]
action based on polilcy:[-0.8126161  0.591583 ]
action based on polilcy:[-0.8022388  0.5537766]
action based on polilcy:[-0.75796205  0.70208716]
action based on polilcy:[-0.7666707   0.48526952]
action based on polilcy:[-0.9413101   0.60801023]
action based on polilcy:[-0.8137183   0.61660194]
action based on polilcy:[-0.7804236  0.6712451]
action based on polilcy:[-0.7913792   0.60336554]
action based on polilcy:[-0.97984946  0.6181154 ]
action based on polilcy:[-0.8878549   0.33898017]
action based on polilcy:[-0.97617525  0.32706633]
action based on polilcy:[-0.97809994  0.2277987 ]
action based on polilcy:[-0.97321427  0.20082216]
Total T: 41104 Episode Num: 2593 Episode T: 16 Reward: -1225.710836
action based on polilcy:[-0.71858335  0.7335942 ]
action based on polilc

action based on polilcy:[-0.74922335  0.7682606 ]
action based on polilcy:[-0.80166173  0.553056  ]
action based on polilcy:[-0.81295866  0.5880772 ]
action based on polilcy:[-0.799798   0.5507693]
action based on polilcy:[-0.82663006  0.55944175]
action based on polilcy:[-0.8422643   0.45108664]
action based on polilcy:[-0.94195956  0.7219454 ]
action based on polilcy:[-0.74023306  0.4559748 ]
action based on polilcy:[-0.93687075  0.47901523]
action based on polilcy:[-0.92228544  0.41056743]
action based on polilcy:[-0.6548413  0.4499181]
Total T: 41259 Episode Num: 2606 Episode T: 11 Reward: -787.690961
action based on polilcy:[-0.7292348   0.76518804]
action based on polilcy:[-0.7920048  0.5403382]
action based on polilcy:[-0.84413856  0.6141642 ]
action based on polilcy:[-0.8984197  0.6531025]
action based on polilcy:[-0.78001225  0.43087554]
action based on polilcy:[-0.3630622  0.8059031]
action based on polilcy:[-0.1887505  0.5844661]
Total T: 41266 Episode Num: 2607 Episode T: 7

action based on polilcy:[-0.24829417  0.88157386]
action based on polilcy:[0.01692097 0.92954934]
action based on polilcy:[-0.7203869  0.6480849]
action based on polilcy:[-0.7461728  0.5752069]
action based on polilcy:[-0.2512046   0.92319024]
action based on polilcy:[-0.6960318  0.5234568]
action based on polilcy:[-0.7447485   0.62479585]
action based on polilcy:[-0.71308684  0.67585045]
action based on polilcy:[-0.5370275   0.74663806]
action based on polilcy:[0.23607548 0.85408026]
action based on polilcy:[-0.49626076  0.97136676]
action based on polilcy:[-0.77490747  0.45315108]
action based on polilcy:[-0.72106564  0.4440104 ]
action based on polilcy:[-0.36155114  0.5253763 ]
action based on polilcy:[-0.39136592  0.4274124 ]
Total T: 41434 Episode Num: 2623 Episode T: 15 Reward: -1575.759584
action based on polilcy:[-0.33047703  0.8880368 ]
action based on polilcy:[-0.79489124  0.58006793]
action based on polilcy:[-0.753162    0.51049745]
action based on polilcy:[-0.9108983  0.430

action based on polilcy:[-0.2981171  0.940358 ]
action based on polilcy:[-0.7019931  0.6738527]
action based on polilcy:[-0.28870517  0.86400926]
action based on polilcy:[0.13542731 0.6476136 ]
action based on polilcy:[0.03274783 0.9912628 ]
action based on polilcy:[-0.01744832  0.9983045 ]
action based on polilcy:[0.5975791 0.9332854]
Total T: 41597 Episode Num: 2641 Episode T: 7 Reward: -586.289578
action based on polilcy:[-0.2994029  0.944017 ]
action based on polilcy:[-0.71773064  0.5857062 ]
action based on polilcy:[-0.73675567  0.8077605 ]
action based on polilcy:[-0.75964075  0.6069436 ]
action based on polilcy:[-0.6038298  0.8548814]
action based on polilcy:[-0.6583936   0.54678273]
action based on polilcy:[-0.754789    0.70655847]
action based on polilcy:[-0.7151511  0.5289582]
action based on polilcy:[-0.8197299  0.6491462]
action based on polilcy:[-0.7719157   0.58699006]
action based on polilcy:[-0.59503484  0.9537105 ]
action based on polilcy:[-0.67653286  0.79781586]
acti

action based on polilcy:[-0.29360932  0.9729303 ]
action based on polilcy:[-0.24958634  0.7134418 ]
action based on polilcy:[0.03836003 0.68825305]
action based on polilcy:[0.41418278 0.9677576 ]
action based on polilcy:[0.29098535 0.9974215 ]
action based on polilcy:[-0.39068034  0.99921584]
action based on polilcy:[0.37693855 0.6343982 ]
Total T: 41748 Episode Num: 2658 Episode T: 7 Reward: -703.125220
action based on polilcy:[-0.19776104  0.97604626]
action based on polilcy:[0.1512751  0.90935725]
action based on polilcy:[0.17750405 0.98617154]
action based on polilcy:[0.16006227 0.9908297 ]
action based on polilcy:[-0.36603114  0.99979967]
action based on polilcy:[0.49618503 0.80045265]
Total T: 41754 Episode Num: 2659 Episode T: 6 Reward: -748.410862
action based on polilcy:[-0.2582494  0.9781763]
action based on polilcy:[-0.20857136  0.8055449 ]
action based on polilcy:[-0.01469657  0.8002287 ]
action based on polilcy:[0.09209346 0.8754797 ]
action based on polilcy:[0.27288514 0.

action based on polilcy:[-0.22029401  0.9835803 ]
action based on polilcy:[-0.0267889  0.9849214]
action based on polilcy:[0.10408576 0.95550126]
action based on polilcy:[0.30313423 0.96180904]
action based on polilcy:[0.37662223 0.9948861 ]
action based on polilcy:[-0.6177468  0.9998946]
Total T: 41897 Episode Num: 2676 Episode T: 6 Reward: -764.142127
action based on polilcy:[-0.22636405  0.9839619 ]
action based on polilcy:[0.00273612 0.9708663 ]
action based on polilcy:[-0.29610914  0.99984014]
action based on polilcy:[0.31620824 0.9565209 ]
action based on polilcy:[-0.46003976  0.99984926]
action based on polilcy:[0.22102064 0.7506957 ]
action based on polilcy:[-0.00509191  0.99978113]
action based on polilcy:[0.4652594 0.9857344]
Total T: 41905 Episode Num: 2677 Episode T: 8 Reward: -1022.986764
action based on polilcy:[-0.21314476  0.98359114]
action based on polilcy:[-0.1474383   0.94242704]
action based on polilcy:[-0.00786986  0.99682015]
action based on polilcy:[-0.10396004 

action based on polilcy:[-0.25179896  0.9855988 ]
action based on polilcy:[-0.3014287  0.7236875]
action based on polilcy:[-0.9745617  0.9999932]
action based on polilcy:[-0.99912924  0.54512566]
action based on polilcy:[-0.97219723  0.7162515 ]
action based on polilcy:[-0.49608293  0.99799603]
action based on polilcy:[0.3726554 0.9533712]
action based on polilcy:[-0.9945521   0.99999523]
action based on polilcy:[0.26796335 0.7507934 ]
Total T: 42551 Episode Num: 2704 Episode T: 9 Reward: -972.057701
action based on polilcy:[-0.21991913  0.9855472 ]
action based on polilcy:[-0.6513898   0.68905455]
action based on polilcy:[-0.75682265  0.9995488 ]
action based on polilcy:[-0.9885493   0.99999785]
action based on polilcy:[0.14754522 0.71149725]
action based on polilcy:[-0.9783504   0.99999964]
action based on polilcy:[0.43904132 0.7997919 ]
Total T: 42558 Episode Num: 2705 Episode T: 7 Reward: -740.156105
action based on polilcy:[-0.20242035  0.98494434]
action based on polilcy:[-0.4946

action based on polilcy:[-0.25166577  0.98358613]
action based on polilcy:[-0.48511392  0.99026734]
action based on polilcy:[-0.612682   0.9984451]
action based on polilcy:[-0.9884373   0.99962175]
action based on polilcy:[-0.98039514  0.9999969 ]
action based on polilcy:[0.41313243 0.8753025 ]
Total T: 42714 Episode Num: 2721 Episode T: 6 Reward: -822.628159
action based on polilcy:[-0.30757242  0.9840305 ]
action based on polilcy:[-0.64174867  0.99469507]
action based on polilcy:[-0.99932224  0.9998761 ]
action based on polilcy:[-0.9801683   0.99998456]
action based on polilcy:[-0.6770413  0.9994418]
action based on polilcy:[-0.921527   0.9999977]
Total T: 42720 Episode Num: 2722 Episode T: 6 Reward: -1082.074136
action based on polilcy:[-0.33218879  0.98391604]
action based on polilcy:[-0.49229574  0.9609238 ]
action based on polilcy:[-0.34428254  0.98058105]
action based on polilcy:[-0.98540413  0.9999961 ]
action based on polilcy:[-0.9843295  0.9999995]
action based on polilcy:[0.

action based on polilcy:[-0.26968524  0.98378444]
action based on polilcy:[-0.8565191  0.9861503]
action based on polilcy:[0.00999554 0.75118047]
action based on polilcy:[-0.97783965  0.9999983 ]
action based on polilcy:[0.4124083  0.99521047]
action based on polilcy:[0.07456639 0.9956855 ]
Total T: 42868 Episode Num: 2739 Episode T: 6 Reward: -747.351229
action based on polilcy:[-0.29661313  0.98360324]
action based on polilcy:[-0.99023443  0.99673426]
action based on polilcy:[0.14961955 0.97280186]
action based on polilcy:[-0.9564303   0.99999934]
action based on polilcy:[0.27984688 0.8132629 ]
action based on polilcy:[-0.47344154  0.99939156]
action based on polilcy:[0.48197624 0.9509282 ]
Total T: 42875 Episode Num: 2740 Episode T: 7 Reward: -800.730332
action based on polilcy:[-0.30111665  0.98345864]
action based on polilcy:[-0.11087506  0.71253866]
action based on polilcy:[-0.9013388  0.9999706]
action based on polilcy:[-0.980293    0.75106955]
action based on polilcy:[0.2008476

action based on polilcy:[-0.2536832  0.9849281]
action based on polilcy:[-0.42108735  0.97380507]
action based on polilcy:[-0.9836204   0.99947804]
action based on polilcy:[0.13623968 0.7921281 ]
action based on polilcy:[-0.7586862   0.99999976]
action based on polilcy:[0.38053164 0.8183045 ]
action based on polilcy:[-0.06038287  0.9999983 ]
Total T: 43014 Episode Num: 2757 Episode T: 7 Reward: -690.821604
action based on polilcy:[-0.30656114  0.98550904]
action based on polilcy:[-0.974985   0.6122164]
action based on polilcy:[-0.6029937  0.7027463]
action based on polilcy:[-0.15333505  0.9576966 ]
action based on polilcy:[-0.8004434  0.9999172]
action based on polilcy:[0.13332199 0.88533765]
action based on polilcy:[-0.12655051  0.99683905]
action based on polilcy:[0.3221555 0.998804 ]
action based on polilcy:[0.32635692 0.8149405 ]
action based on polilcy:[0.6815064 0.9995647]
Total T: 43024 Episode Num: 2758 Episode T: 10 Reward: -857.162393
action based on polilcy:[-0.27653015  0.9

action based on polilcy:[-0.32349828  0.98552614]
action based on polilcy:[-0.14263937  0.7894907 ]
action based on polilcy:[-0.9999578  0.9998623]
action based on polilcy:[0.10507914 0.9321494 ]
action based on polilcy:[0.32977355 0.9870789 ]
action based on polilcy:[-0.19642714  0.99998474]
action based on polilcy:[-0.13941738  0.99999887]
Total T: 43171 Episode Num: 2772 Episode T: 7 Reward: -731.739725
action based on polilcy:[-0.26068503  0.9853478 ]
action based on polilcy:[-0.1525721   0.78496665]
action based on polilcy:[-0.9084498  0.9999904]
action based on polilcy:[0.26903975 0.9486883 ]
action based on polilcy:[0.41429734 0.9416885 ]
action based on polilcy:[0.23943275 0.99983424]
action based on polilcy:[0.35297188 0.80254513]
action based on polilcy:[0.6062263 0.9971593]
Total T: 43179 Episode Num: 2773 Episode T: 8 Reward: -749.209874
action based on polilcy:[-0.3307539   0.98537093]
action based on polilcy:[0.00350741 0.9663825 ]
action based on polilcy:[-0.5170653   0.

action based on polilcy:[-0.29244444  0.9874247 ]
action based on polilcy:[-0.12884821  0.812935  ]
action based on polilcy:[-0.3578105   0.99896663]
action based on polilcy:[0.19172719 0.8746612 ]
action based on polilcy:[0.3571008 0.9942524]
action based on polilcy:[0.43081495 0.9299977 ]
action based on polilcy:[0.42860353 0.8187534 ]
action based on polilcy:[-0.19007745  0.99999994]
Total T: 43321 Episode Num: 2789 Episode T: 8 Reward: -642.568507
action based on polilcy:[-0.2967317   0.98721546]
action based on polilcy:[-0.15829149  0.77285284]
action based on polilcy:[-0.69061804  0.999793  ]
action based on polilcy:[-0.3154272  0.9999101]
action based on polilcy:[0.10573514 0.9999836 ]
action based on polilcy:[-0.30093282  1.        ]
Total T: 43327 Episode Num: 2790 Episode T: 6 Reward: -733.971442
action based on polilcy:[-0.2883042  0.9868125]
action based on polilcy:[-0.8204353   0.88412017]
action based on polilcy:[-0.39236873  0.9636471 ]
action based on polilcy:[-0.242822

action based on polilcy:[-0.3070189   0.98926103]
action based on polilcy:[0.04070472 0.98213184]
action based on polilcy:[-0.942231    0.99999905]
action based on polilcy:[-0.9930787   0.83062786]
action based on polilcy:[-0.9889317   0.91972244]
action based on polilcy:[-0.9257051  0.9999979]
action based on polilcy:[0.12117346 0.82885647]
action based on polilcy:[-0.99995524  0.9998679 ]
action based on polilcy:[0.12751545 0.99873894]
action based on polilcy:[0.3464698 0.8347152]
Total T: 43480 Episode Num: 2807 Episode T: 10 Reward: -1248.693100
action based on polilcy:[-0.24365158  0.9895    ]
action based on polilcy:[-0.97440463  0.99886876]
action based on polilcy:[-0.8715437  0.9999941]
action based on polilcy:[0.25464964 0.9879264 ]
action based on polilcy:[0.40852645 0.9922801 ]
action based on polilcy:[-0.5196638   0.99994946]
action based on polilcy:[0.42968336 0.99970096]
Total T: 43487 Episode Num: 2808 Episode T: 7 Reward: -913.304662
action based on polilcy:[-0.32469493

action based on polilcy:[-0.27270913  0.9927298 ]
action based on polilcy:[-0.9999922  0.9998414]
action based on polilcy:[-0.08977894  0.81787884]
action based on polilcy:[0.06579349 0.88196295]
action based on polilcy:[-0.9207004   0.99999994]
action based on polilcy:[0.46988007 0.9959705 ]
action based on polilcy:[0.72901374 0.9996505 ]
action based on polilcy:[0.52460444 0.96430945]
Total T: 43647 Episode Num: 2826 Episode T: 8 Reward: -814.741479
action based on polilcy:[-0.33479217  0.9930049 ]
action based on polilcy:[-0.5063764   0.99832594]
action based on polilcy:[-0.15846086  0.92318034]
action based on polilcy:[-0.9653616  0.9999991]
action based on polilcy:[0.14790341 0.94439536]
action based on polilcy:[0.2560858  0.86042726]
action based on polilcy:[-0.06732021  0.99999845]
action based on polilcy:[0.71807796 0.99971044]
Total T: 43655 Episode Num: 2827 Episode T: 8 Reward: -977.293267
action based on polilcy:[-0.2895914   0.99271226]
action based on polilcy:[0.0361659  

action based on polilcy:[-0.30086675  0.9947426 ]
action based on polilcy:[-0.0455879  0.9979718]
action based on polilcy:[-0.70657516  0.9999662 ]
action based on polilcy:[0.11911096 0.89007235]
action based on polilcy:[0.201789  0.8767256]
action based on polilcy:[0.04943252 0.99999964]
action based on polilcy:[-0.12702648  1.        ]
Total T: 43790 Episode Num: 2844 Episode T: 7 Reward: -720.283740
action based on polilcy:[-0.29982758  0.99488765]
action based on polilcy:[-0.11251242  0.851531  ]
action based on polilcy:[-0.89273846  0.9666534 ]
action based on polilcy:[-0.11228157  0.8385921 ]
action based on polilcy:[-0.04545615  0.8321039 ]
action based on polilcy:[-0.9977252  0.6863289]
action based on polilcy:[-0.29752436  0.79087836]
action based on polilcy:[-0.97266895  0.76783293]
action based on polilcy:[-0.01246196  0.9354072 ]
action based on polilcy:[-1.          0.99987715]
action based on polilcy:[-0.33674636  0.9972006 ]
action based on polilcy:[-0.13309044  0.811105

action based on polilcy:[-0.2768251   0.99685556]
action based on polilcy:[-0.48533016  0.981839  ]
action based on polilcy:[-0.01975821  0.96084285]
action based on polilcy:[-0.9999812   0.99995613]
action based on polilcy:[0.16426575 0.92334145]
action based on polilcy:[0.2945438  0.97955585]
action based on polilcy:[-0.6246383  0.9999998]
action based on polilcy:[0.50397086 0.9487091 ]
action based on polilcy:[0.5415286  0.91788256]
Total T: 43940 Episode Num: 2860 Episode T: 9 Reward: -855.804370
action based on polilcy:[-0.28166184  0.9972377 ]
action based on polilcy:[-0.99997437  0.9999707 ]
action based on polilcy:[0.14155011 0.94265324]
action based on polilcy:[0.1889704  0.90086985]
action based on polilcy:[-0.7114599  1.       ]
action based on polilcy:[-0.2746424  1.       ]
Total T: 43946 Episode Num: 2861 Episode T: 6 Reward: -796.920946
action based on polilcy:[-0.31793004  0.9972849 ]
action based on polilcy:[-0.54776394  0.9873573 ]
action based on polilcy:[-0.2900649 

action based on polilcy:[-0.32269752  0.9963668 ]
action based on polilcy:[-0.3177946  0.9950065]
action based on polilcy:[-0.9974776  0.9997966]
action based on polilcy:[-0.04612476  0.91421187]
action based on polilcy:[-0.6179991  0.7887639]
action based on polilcy:[-0.05540159  0.8894729 ]
action based on polilcy:[-0.10632274  0.88841397]
action based on polilcy:[-0.09213994  0.8499283 ]
action based on polilcy:[-0.10633297  0.98687226]
action based on polilcy:[-0.20067984  1.        ]
Total T: 44589 Episode Num: 2890 Episode T: 10 Reward: -773.493392
action based on polilcy:[-0.32680622  0.9963373 ]
action based on polilcy:[-0.10900945  0.95908886]
action based on polilcy:[-0.13767749  0.9997297 ]
action based on polilcy:[-0.18443029  0.9999967 ]
action based on polilcy:[0.10989677 0.9974414 ]
action based on polilcy:[0.47902584 0.9985731 ]
Total T: 44595 Episode Num: 2891 Episode T: 6 Reward: -560.429626
action based on polilcy:[-0.28187558  0.9964193 ]
action based on polilcy:[-0

action based on polilcy:[-0.26756316  0.9969258 ]
action based on polilcy:[-0.12505364  0.9486383 ]
action based on polilcy:[-0.9999962   0.99909306]
action based on polilcy:[-0.8422545   0.94056094]
action based on polilcy:[-0.27373466  0.9848681 ]
action based on polilcy:[-0.40429223  0.9986754 ]
action based on polilcy:[-0.90919435  0.9993405 ]
action based on polilcy:[-0.07462437  0.84386283]
action based on polilcy:[-0.2780552  0.999963 ]
action based on polilcy:[-0.03195588  0.8542298 ]
action based on polilcy:[-0.08205931  0.88283   ]
action based on polilcy:[-0.00354663  1.        ]
action based on polilcy:[0.36727607 0.9972272 ]
Total T: 44739 Episode Num: 2906 Episode T: 13 Reward: -1394.355407
action based on polilcy:[-0.33125266  0.99726856]
action based on polilcy:[-0.15858643  0.99796563]
action based on polilcy:[-0.14084622  0.87807566]
action based on polilcy:[-0.09679602  0.86163896]
action based on polilcy:[-0.13047774  0.9818578 ]
action based on polilcy:[0.03753811 

action based on polilcy:[-0.33044666  0.9974643 ]
action based on polilcy:[-0.13479385  0.978894  ]
action based on polilcy:[-0.16269347  0.9975025 ]
action based on polilcy:[-0.26095563  0.9999932 ]
action based on polilcy:[0.00426658 0.9999927 ]
action based on polilcy:[0.16901562 0.99773246]
action based on polilcy:[0.48320603 0.9999979 ]
Total T: 44900 Episode Num: 2924 Episode T: 7 Reward: -555.069114
action based on polilcy:[-0.33004355  0.99758446]
action based on polilcy:[-0.20226717  0.9997129 ]
action based on polilcy:[-0.74619997  1.        ]
action based on polilcy:[-1.          0.99953574]
action based on polilcy:[-0.7149868  0.9950883]
action based on polilcy:[-0.99996746  0.97578275]
action based on polilcy:[-0.24343507  0.99989825]
action based on polilcy:[-0.26685157  0.99996966]
action based on polilcy:[-0.09980341  0.89314395]
action based on polilcy:[-0.35611275  0.9582171 ]
action based on polilcy:[-0.37431824  0.9999999 ]
Total T: 44911 Episode Num: 2925 Episode T

action based on polilcy:[-0.34514585  0.9977671 ]
action based on polilcy:[-0.1703194  0.9996588]
action based on polilcy:[-0.2890099  0.9999816]
action based on polilcy:[-0.25582337  0.99997395]
action based on polilcy:[-0.21261622  0.9999979 ]
action based on polilcy:[-0.06378181  0.902276  ]
action based on polilcy:[0.4067985 0.9953856]
Total T: 45040 Episode Num: 2942 Episode T: 7 Reward: -628.913804
action based on polilcy:[-0.3482387  0.9978241]
action based on polilcy:[-0.9492314  0.9994753]
action based on polilcy:[-0.12831876  0.9997854 ]
action based on polilcy:[-0.49111024  0.99999976]
action based on polilcy:[-0.99998367  0.999945  ]
action based on polilcy:[-0.76684797  1.        ]
action based on polilcy:[-0.17734931  0.8929465 ]
action based on polilcy:[-0.1666676  0.9999972]
Total T: 45048 Episode Num: 2943 Episode T: 8 Reward: -1048.570861
action based on polilcy:[-0.31162503  0.99787265]
action based on polilcy:[-0.13547947  0.9608783 ]
action based on polilcy:[-0.997

action based on polilcy:[-0.35216838  0.99809146]
action based on polilcy:[-0.9999219  0.9999971]
action based on polilcy:[-0.06056246  0.89733607]
action based on polilcy:[-0.13076402  0.9908063 ]
action based on polilcy:[-0.1433387  0.878608 ]
action based on polilcy:[-0.21983327  0.9999914 ]
action based on polilcy:[-0.5907604  1.       ]
action based on polilcy:[-0.01804838  0.9459153 ]
action based on polilcy:[-0.02815096  0.9248766 ]
Total T: 45199 Episode Num: 2959 Episode T: 9 Reward: -770.016961
action based on polilcy:[-0.336913  0.998176]
action based on polilcy:[-0.14461248  0.90649647]
action based on polilcy:[-0.2787458  0.9999726]
action based on polilcy:[-0.9999528  0.7066176]
action based on polilcy:[-0.06644956  0.9076495 ]
action based on polilcy:[-0.8189332  1.       ]
action based on polilcy:[-0.15236478  0.9279283 ]
action based on polilcy:[-0.02293291  0.9192133 ]
action based on polilcy:[0.60565424 0.999999  ]
Total T: 45208 Episode Num: 2960 Episode T: 9 Reward

action based on polilcy:[-0.32492572  0.99857855]
action based on polilcy:[-0.9154534   0.99987906]
action based on polilcy:[-0.10863386  0.8946588 ]
action based on polilcy:[-0.9999941   0.99999994]
action based on polilcy:[-0.03948323  0.86121875]
action based on polilcy:[-0.8938262  1.       ]
action based on polilcy:[-0.10754814  0.94313014]
action based on polilcy:[-0.00723664  0.9415626 ]
action based on polilcy:[0.91022617 1.        ]
Total T: 45350 Episode Num: 2977 Episode T: 9 Reward: -989.143464
action based on polilcy:[-0.32669172  0.99857914]
action based on polilcy:[-0.27041024  0.99012107]
action based on polilcy:[-0.17044316  0.84607977]
action based on polilcy:[-0.91225463  0.851594  ]
action based on polilcy:[-0.12497551  0.90885735]
action based on polilcy:[-0.18955703  0.99989635]
action based on polilcy:[-0.26599324  0.99999297]
action based on polilcy:[-0.46121487  0.99999994]
action based on polilcy:[-0.12239996  0.942093  ]
action based on polilcy:[0.05074676 0.

action based on polilcy:[-0.32561058  0.9990708 ]
action based on polilcy:[-0.9999997  0.9999999]
action based on polilcy:[-0.61468875  0.99999994]
action based on polilcy:[-0.19042714  0.9999789 ]
action based on polilcy:[-0.9388716  1.       ]
action based on polilcy:[-0.2680919   0.97782385]
action based on polilcy:[0.38839868 1.        ]
Total T: 45497 Episode Num: 2994 Episode T: 7 Reward: -983.953671
action based on polilcy:[-0.3059178  0.9990441]
action based on polilcy:[-0.11280079  0.93223804]
action based on polilcy:[-0.05948767  0.96933186]
action based on polilcy:[-0.39916527  0.87853265]
action based on polilcy:[-0.14937626  0.9637264 ]
action based on polilcy:[-0.945965  1.      ]
action based on polilcy:[-0.51446974  1.        ]
Total T: 45504 Episode Num: 2995 Episode T: 7 Reward: -694.281600
action based on polilcy:[-0.3397652  0.9990373]
action based on polilcy:[-0.18551032  0.99991626]
action based on polilcy:[-0.00712488  0.95875716]
action based on polilcy:[-0.1893

action based on polilcy:[-0.33496934  0.9991481 ]
action based on polilcy:[-0.2034764  0.9999151]
action based on polilcy:[-0.9483278  1.       ]
action based on polilcy:[-0.39029562  0.99999976]
action based on polilcy:[-0.28823382  0.9962149 ]
action based on polilcy:[-0.14242356  0.99214166]
action based on polilcy:[0.9649062 1.       ]
Total T: 45644 Episode Num: 3010 Episode T: 7 Reward: -626.098742
action based on polilcy:[-0.34630156  0.999145  ]
action based on polilcy:[-0.5753319   0.99708104]
action based on polilcy:[-0.42313272  0.98800814]
action based on polilcy:[-0.3374392   0.99974436]
action based on polilcy:[-0.5396893  0.9999997]
action based on polilcy:[-0.79651505  0.99977356]
action based on polilcy:[-1.          0.99972504]
action based on polilcy:[-0.26779246  0.87149084]
action based on polilcy:[-0.99999857  0.99594027]
action based on polilcy:[-0.73279643  0.99999994]
action based on polilcy:[-0.1264971   0.99989223]
action based on polilcy:[-0.8889141  1.     

action based on polilcy:[-0.30815488  0.9994367 ]
action based on polilcy:[-0.8465977  0.9686686]
action based on polilcy:[-0.9993282  0.9999916]
action based on polilcy:[-0.00533949  0.98111194]
action based on polilcy:[-0.9839002  1.       ]
action based on polilcy:[-0.31053025  0.9954576 ]
action based on polilcy:[-0.20826487  0.9970559 ]
action based on polilcy:[0.67361045 1.        ]
action based on polilcy:[0.02644568 1.        ]
Total T: 45801 Episode Num: 3028 Episode T: 9 Reward: -772.765756
action based on polilcy:[-0.31797948  0.99944204]
action based on polilcy:[-0.11490143  0.9688521 ]
action based on polilcy:[-0.8305507  1.       ]
action based on polilcy:[-0.4041092  0.9936502]
action based on polilcy:[-0.49934927  0.9933675 ]
action based on polilcy:[-0.08627924  0.9999992 ]
action based on polilcy:[-0.2989455   0.99719346]
Total T: 45808 Episode Num: 3029 Episode T: 7 Reward: -510.393130
action based on polilcy:[-0.35323128  0.9994438 ]
action based on polilcy:[-0.1192

action based on polilcy:[-0.30235627  0.9995204 ]
action based on polilcy:[-0.98851776  0.9999845 ]
action based on polilcy:[-0.05458711  0.9891011 ]
action based on polilcy:[-0.99184686  1.        ]
action based on polilcy:[0.01164086 0.9999771 ]
action based on polilcy:[0.6661246 1.       ]
Total T: 45954 Episode Num: 3046 Episode T: 6 Reward: -650.869358
action based on polilcy:[-0.30164683  0.9995242 ]
action based on polilcy:[-0.06605278  0.9743324 ]
action based on polilcy:[-0.24208143  0.9999855 ]
action based on polilcy:[-0.9923445  1.       ]
action based on polilcy:[-0.3087148  0.9964185]
action based on polilcy:[0.5878926  0.99999994]
action based on polilcy:[0.82771707 0.99999905]
action based on polilcy:[0.7817062 0.9997366]
Total T: 45962 Episode Num: 3047 Episode T: 8 Reward: -561.985346
action based on polilcy:[-0.2977366   0.99952054]
action based on polilcy:[-1.         0.9999999]
action based on polilcy:[-0.04733726  0.9845068 ]
action based on polilcy:[-0.8132755  1

action based on polilcy:[-0.99765223  0.9998265 ]
action based on polilcy:[-0.03095833  0.9784082 ]
action based on polilcy:[-0.6466194   0.99999934]
action based on polilcy:[-0.99641484  1.        ]
action based on polilcy:[-0.40440607  0.99927837]
action based on polilcy:[0.06964014 0.99962187]
action based on polilcy:[0.7792119 1.       ]
Total T: 46586 Episode Num: 3074 Episode T: 7 Reward: -601.545776
action based on polilcy:[-0.9966152   0.99984205]
action based on polilcy:[-0.15520938  0.99925786]
action based on polilcy:[-0.9987952  0.9999341]
action based on polilcy:[-0.7129684  0.999843 ]
action based on polilcy:[-0.9992351  1.       ]
action based on polilcy:[-0.6889838   0.99711365]
action based on polilcy:[-0.00863829  1.        ]
Total T: 46593 Episode Num: 3075 Episode T: 7 Reward: -754.547356
action based on polilcy:[-0.99589914  0.9998539 ]
action based on polilcy:[-0.9708782   0.96997225]
action based on polilcy:[-0.6241964  0.9937529]
action based on polilcy:[-0.9999

action based on polilcy:[-0.99687266  0.9998853 ]
action based on polilcy:[-0.06768663  0.9820115 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.64576894  0.99999326]
action based on polilcy:[-0.9845377  1.       ]
action based on polilcy:[-0.52471197  0.9989558 ]
action based on polilcy:[-0.2952409   0.99999136]
action based on polilcy:[0.07937739 0.99999994]
Total T: 46736 Episode Num: 3091 Episode T: 8 Reward: -803.467979
action based on polilcy:[-0.9982045  0.9998745]
action based on polilcy:[-0.12757507  0.99970335]
action based on polilcy:[-1.         0.9999998]
action based on polilcy:[-0.11395788  0.9775567 ]
action based on polilcy:[-0.9995667   0.99999994]
action based on polilcy:[-0.77558184  0.99719584]
action based on polilcy:[-0.6018268  0.9991812]
action based on polilcy:[-0.36700672  0.99948424]
action based on polilcy:[0.36562493 0.99972826]
Total T: 46745 Episode Num: 3092 Episode T: 9 Reward: -677.301861
action based on polilcy:[-0.99897814  0.999870

action based on polilcy:[-0.9901832  0.9999065]
action based on polilcy:[-0.10094721  0.9815505 ]
action based on polilcy:[-0.25777224  0.8985162 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9482152  1.       ]
action based on polilcy:[-0.5360083  0.9989242]
action based on polilcy:[-0.24536519  0.9994826 ]
action based on polilcy:[0.88681775 1.        ]
Total T: 46887 Episode Num: 3108 Episode T: 8 Reward: -633.268511
action based on polilcy:[-0.9898351  0.9999093]
action based on polilcy:[-0.12376224  0.9990333 ]
action based on polilcy:[-1.          0.99999934]
action based on polilcy:[-0.4310629   0.98727584]
action based on polilcy:[-0.69077665  0.9998394 ]
action based on polilcy:[-0.8303645  1.       ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.          0.99999976]
action based on polilcy:[-0.5166631   0.99999756]
action based on polilcy:[-0.72512466  0.9958058 ]
action based on polilcy:[-0.99999857  0.98172045]
action based on polilcy:[-0.

action based on polilcy:[-0.99563366  0.9998774 ]
action based on polilcy:[-0.4434204  0.9960753]
action based on polilcy:[-0.09605096  0.9777678 ]
action based on polilcy:[-1.          0.99999994]
action based on polilcy:[-0.63408995  0.9689276 ]
action based on polilcy:[-1.         0.9999996]
action based on polilcy:[-0.6433953   0.98759663]
action based on polilcy:[-0.86056674  0.99348146]
action based on polilcy:[-0.4192633  0.9999992]
action based on polilcy:[-0.70594656  0.99599546]
action based on polilcy:[0.05381768 1.        ]
Total T: 47050 Episode Num: 3126 Episode T: 11 Reward: -1041.081608
action based on polilcy:[-0.99541146  0.99986136]
action based on polilcy:[-0.17427409  0.8655261 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.64166987  0.9997001 ]
action based on polilcy:[-0.586007   0.9999997]
action based on polilcy:[-1.          0.99995244]
action based on polilcy:[-0.8803847  1.       ]
action based on polilcy:[-0.23605561  0.99999845]
action bas

action based on polilcy:[-0.9969583   0.99978364]
action based on polilcy:[-0.08763083  0.9683689 ]
action based on polilcy:[-0.69748414  0.9996605 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.6290077  0.9977417]
action based on polilcy:[-0.30633178  0.99929136]
action based on polilcy:[0.6290551 1.       ]
Total T: 47206 Episode Num: 3143 Episode T: 7 Reward: -513.988583
action based on polilcy:[-0.9969954  0.9997592]
action based on polilcy:[-0.553588    0.99115443]
action based on polilcy:[-0.6319182   0.87716174]
action based on polilcy:[-0.99900854  0.7714499 ]
action based on polilcy:[-0.40842286  0.9692121 ]
action based on polilcy:[-0.37938154  0.99942577]
action based on polilcy:[-0.41212603  0.82739675]
action based on polilcy:[-0.9999989  0.9999831]
action based on polilcy:[-0.17208657  0.7726244 ]
action based on polilcy:[-0.4971388  0.9849923]
action based on polilcy:[-0.7697259   0.99304736]
action based on polilcy:[-0.71985453  0.9998292 ]
action based

action based on polilcy:[-0.9970621   0.99921536]
action based on polilcy:[-0.99333346  0.7530114 ]
action based on polilcy:[-0.31357265  0.9968675 ]
action based on polilcy:[-0.998712    0.99741995]
action based on polilcy:[-0.89087355  0.9217922 ]
action based on polilcy:[-0.9708577   0.99995893]
action based on polilcy:[-0.70770735  0.9999584 ]
action based on polilcy:[-0.3311195   0.99999976]
action based on polilcy:[0.18105148 0.9999999 ]
action based on polilcy:[0.41037384 1.        ]
action based on polilcy:[-0.99999994  1.        ]
Total T: 47374 Episode Num: 3160 Episode T: 11 Reward: -1174.399216
action based on polilcy:[-0.9976638   0.99919105]
action based on polilcy:[-0.8610023   0.98972315]
action based on polilcy:[-0.0994302  0.8782161]
action based on polilcy:[-0.5247814  0.9998749]
action based on polilcy:[-1.         0.9995778]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.74760854  0.99244577]
action based on polilcy:[-0.99989474  0.9999986 ]
action b

action based on polilcy:[-0.99934894  0.9986652 ]
action based on polilcy:[-0.12255571  0.99488807]
action based on polilcy:[-0.64122605  0.9769811 ]
action based on polilcy:[-0.8683737  1.       ]
action based on polilcy:[0.35013708 1.        ]
action based on polilcy:[0.6470864 1.       ]
action based on polilcy:[0.5178902  0.99999994]
Total T: 47539 Episode Num: 3180 Episode T: 7 Reward: -566.073805
action based on polilcy:[-0.99963033  0.9985194 ]
action based on polilcy:[-0.18644078  0.8378519 ]
action based on polilcy:[-0.6678915  0.9804489]
action based on polilcy:[-0.99687415  1.        ]
action based on polilcy:[-0.99993926  0.9999999 ]
action based on polilcy:[0.23112677 1.        ]
action based on polilcy:[-0.02571166  1.        ]
Total T: 47546 Episode Num: 3181 Episode T: 7 Reward: -736.929722
action based on polilcy:[-0.9997578  0.9984622]
action based on polilcy:[-1.          0.99999905]
action based on polilcy:[-0.20188528  0.8469729 ]
action based on polilcy:[-0.528570

action based on polilcy:[-0.99981135  0.9974644 ]
action based on polilcy:[-0.9999998  0.9994436]
action based on polilcy:[-0.18782501  0.9286332 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9180495  0.9749184]
action based on polilcy:[-0.74445033  0.9963767 ]
action based on polilcy:[-0.55114126  0.999111  ]
action based on polilcy:[-0.22681582  0.9998375 ]
action based on polilcy:[0.7342752 1.       ]
Total T: 47687 Episode Num: 3197 Episode T: 9 Reward: -613.686888
action based on polilcy:[-0.99966896  0.9976143 ]
action based on polilcy:[-0.26930985  0.80419004]
action based on polilcy:[-1.         0.9999987]
action based on polilcy:[-0.23085263  0.88231885]
action based on polilcy:[-0.9390839  0.8353236]
action based on polilcy:[-0.61119163  0.98386914]
action based on polilcy:[-0.5891639  1.       ]
action based on polilcy:[-0.6233214   0.99999946]
action based on polilcy:[-0.76936835  0.99230105]
action based on polilcy:[-0.5658293   0.99946564]
action based o

action based on polilcy:[-0.999859    0.98765576]
action based on polilcy:[-0.22070475  0.8966568 ]
action based on polilcy:[-0.6403459  0.9848347]
action based on polilcy:[-0.4754855   0.99998444]
action based on polilcy:[-0.53445154  0.998712  ]
action based on polilcy:[0.31698766 1.        ]
action based on polilcy:[0.44819024 1.        ]
Total T: 47850 Episode Num: 3215 Episode T: 7 Reward: -427.401899
action based on polilcy:[-0.99984586  0.98639196]
action based on polilcy:[-1.         0.9999832]
action based on polilcy:[-0.24973282  0.9388085 ]
action based on polilcy:[-0.9916229  1.       ]
action based on polilcy:[-0.77919716  0.9749704 ]
action based on polilcy:[-0.9307769  1.       ]
action based on polilcy:[-0.98757994  0.98068583]
action based on polilcy:[-0.1301159  0.9999991]
action based on polilcy:[0.45568597 1.        ]
Total T: 47859 Episode Num: 3216 Episode T: 9 Reward: -850.331708
action based on polilcy:[-0.9997262   0.98373306]
action based on polilcy:[-0.933907

Total T: 48025 Episode Num: 3234 Episode T: 10 Reward: -1306.786726
Total T: 48073 Episode Num: 3235 Episode T: 48 Reward: -7379.767255
Total T: 48118 Episode Num: 3236 Episode T: 45 Reward: -6390.214162
Total T: 48145 Episode Num: 3237 Episode T: 27 Reward: -4194.382196
Total T: 48165 Episode Num: 3238 Episode T: 20 Reward: -2761.579981
Total T: 48210 Episode Num: 3239 Episode T: 45 Reward: -7290.853905
Total T: 48255 Episode Num: 3240 Episode T: 45 Reward: -7312.903928
Total T: 48299 Episode Num: 3241 Episode T: 44 Reward: -7455.761716
Total T: 48311 Episode Num: 3242 Episode T: 12 Reward: -1698.993678
Total T: 48338 Episode Num: 3243 Episode T: 27 Reward: -3151.003379
Total T: 48386 Episode Num: 3244 Episode T: 48 Reward: -7598.570596
Total T: 48438 Episode Num: 3245 Episode T: 52 Reward: -6781.779460
Total T: 48484 Episode Num: 3246 Episode T: 46 Reward: -7122.985065
Total T: 48499 Episode Num: 3247 Episode T: 15 Reward: -2473.686166
action based on polilcy:[-0.9999921   0.97624487

action based on polilcy:[-0.99996495  0.98503864]
action based on polilcy:[-0.14195196  0.9684196 ]
action based on polilcy:[-0.03183536  0.99999994]
action based on polilcy:[-0.51509213  0.999773  ]
action based on polilcy:[-0.65091276  1.        ]
action based on polilcy:[0.12407295 1.        ]
Total T: 48637 Episode Num: 3263 Episode T: 6 Reward: -656.991525
action based on polilcy:[-0.99997073  0.9844779 ]
action based on polilcy:[-0.14198679  0.96723807]
action based on polilcy:[-0.10335255  1.        ]
action based on polilcy:[-0.28265157  0.9999989 ]
action based on polilcy:[-0.5221384  0.9990651]
action based on polilcy:[-0.14863333  1.        ]
action based on polilcy:[-0.11288224  1.        ]
Total T: 48644 Episode Num: 3264 Episode T: 7 Reward: -788.021201
action based on polilcy:[-0.9999808  0.9846139]
action based on polilcy:[-0.23893744  0.9384974 ]
action based on polilcy:[-0.19682196  0.9476642 ]
action based on polilcy:[-0.17078514  0.9983466 ]
action based on polilcy:

action based on polilcy:[-0.9999849  0.9933755]
action based on polilcy:[-0.22048847  0.9854316 ]
action based on polilcy:[-0.14576799  0.99710876]
action based on polilcy:[-0.51245296  1.        ]
action based on polilcy:[-0.35540134  0.9999983 ]
action based on polilcy:[-0.2659148  1.       ]
action based on polilcy:[-0.20414582  1.        ]
Total T: 48799 Episode Num: 3282 Episode T: 7 Reward: -541.367803
action based on polilcy:[-0.9999883   0.99328136]
action based on polilcy:[-0.9995012  0.9997553]
action based on polilcy:[-0.9999982  0.9999985]
action based on polilcy:[-0.3475368  0.9995387]
action based on polilcy:[-0.16631706  1.        ]
action based on polilcy:[-0.18611564  0.99999154]
action based on polilcy:[0.30677286 1.        ]
Total T: 48806 Episode Num: 3283 Episode T: 7 Reward: -586.452709
action based on polilcy:[-0.9999896  0.993432 ]
action based on polilcy:[-0.9998226  0.7742263]
action based on polilcy:[-0.10730578  0.90481335]
action based on polilcy:[-1.      

action based on polilcy:[-0.9999941  0.9954816]
action based on polilcy:[-0.61479056  0.9995362 ]
action based on polilcy:[-0.99228066  0.93726873]
action based on polilcy:[-0.27373418  0.9081634 ]
action based on polilcy:[-0.23308069  0.9309053 ]
action based on polilcy:[-0.18874261  0.99534285]
action based on polilcy:[-0.30404    0.9948903]
action based on polilcy:[-0.35080233  1.        ]
action based on polilcy:[-0.99985266  1.        ]
action based on polilcy:[-0.43068606  0.9999398 ]
action based on polilcy:[-0.11853439  1.        ]
Total T: 48959 Episode Num: 3299 Episode T: 11 Reward: -931.507682
action based on polilcy:[-0.9999967  0.9951899]
action based on polilcy:[-0.999895  0.81358 ]
action based on polilcy:[-0.9999973   0.99637306]
action based on polilcy:[-0.9983267  0.9997712]
action based on polilcy:[-0.99994236  0.8587037 ]
action based on polilcy:[-0.9999799  0.7865639]
action based on polilcy:[-0.9998504  0.8276398]
action based on polilcy:[-0.99131227  0.9569499 ]

action based on polilcy:[-0.9999963  0.9967081]
action based on polilcy:[-1.         0.9999992]
action based on polilcy:[-0.11521633  0.9981195 ]
action based on polilcy:[-0.99978155  0.99999976]
action based on polilcy:[-0.45409447  0.99969625]
action based on polilcy:[-0.52781713  0.9994722 ]
action based on polilcy:[-0.9687643  1.       ]
action based on polilcy:[-0.9999971  1.       ]
action based on polilcy:[-0.55899346  0.9994848 ]
action based on polilcy:[-0.22111419  0.99999607]
Total T: 49113 Episode Num: 3316 Episode T: 10 Reward: -824.383316
action based on polilcy:[-0.9999973  0.9969154]
action based on polilcy:[-0.9996308  0.9047003]
action based on polilcy:[-1.          0.99965733]
action based on polilcy:[-1.        0.999631]
action based on polilcy:[-0.9999996  0.8569139]
action based on polilcy:[-0.16697748  0.9679703 ]
action based on polilcy:[-0.37971148  0.99999243]
action based on polilcy:[-0.36367422  1.        ]
action based on polilcy:[-0.2885504  1.       ]
act

action based on polilcy:[-0.9999996   0.99803966]
action based on polilcy:[-1.          0.99999994]
action based on polilcy:[-1.          0.99999976]
action based on polilcy:[-0.19380935  0.9897275 ]
action based on polilcy:[-0.64350116  1.        ]
action based on polilcy:[-0.33831134  1.        ]
action based on polilcy:[-0.18946628  1.        ]
Total T: 49266 Episode Num: 3332 Episode T: 7 Reward: -1000.044607
action based on polilcy:[-0.99999976  0.9979752 ]
action based on polilcy:[-0.22563668  0.9804173 ]
action based on polilcy:[-0.9999998  0.9769777]
action based on polilcy:[-0.12151682  0.9991822 ]
action based on polilcy:[-0.45022023  1.        ]
action based on polilcy:[-0.2719275   0.99999994]
action based on polilcy:[-0.0853799  1.       ]
action based on polilcy:[0.07555512 0.99999744]
action based on polilcy:[0.5109352 1.       ]
Total T: 49275 Episode Num: 3333 Episode T: 9 Reward: -520.200819
action based on polilcy:[-0.99999976  0.99791694]
action based on polilcy:[-0

action based on polilcy:[-0.99999994  0.9988149 ]
action based on polilcy:[-1.          0.99998474]
action based on polilcy:[-0.13212764  0.99999636]
action based on polilcy:[-0.99994534  1.        ]
action based on polilcy:[-0.44393283  0.9999415 ]
action based on polilcy:[-0.15693709  1.        ]
action based on polilcy:[-0.9984949  1.       ]
action based on polilcy:[-0.16953464  0.9999925 ]
Total T: 49418 Episode Num: 3349 Episode T: 8 Reward: -695.766816
action based on polilcy:[-0.99999994  0.99880755]
action based on polilcy:[-1.         0.9999941]
action based on polilcy:[-0.13602348  0.99999595]
action based on polilcy:[-0.99171656  1.        ]
action based on polilcy:[-0.2677472  0.9999901]
action based on polilcy:[-0.01980762  0.99999666]
action based on polilcy:[0.36135226 1.        ]
Total T: 49425 Episode Num: 3350 Episode T: 7 Reward: -670.226678
action based on polilcy:[-0.99999994  0.9987009 ]
action based on polilcy:[-0.21644619  0.9854914 ]
action based on polilcy:[-

action based on polilcy:[-0.99999845  0.9989949 ]
action based on polilcy:[-1.          0.99999774]
action based on polilcy:[-0.2011505  0.9989309]
action based on polilcy:[-0.9999956  1.       ]
action based on polilcy:[-0.39958563  0.99980074]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.32093534  0.9998639 ]
action based on polilcy:[-0.99999994  0.9999969 ]
action based on polilcy:[-0.39198673  0.99991494]
action based on polilcy:[-0.00841277  1.        ]
Total T: 49579 Episode Num: 3367 Episode T: 10 Reward: -1060.914781
action based on polilcy:[-0.99999774  0.9990123 ]
action based on polilcy:[-0.99996835  0.92578554]
action based on polilcy:[-1.        0.999998]
action based on polilcy:[-0.17806937  0.97365206]
action based on polilcy:[-0.41633353  0.9999982 ]
action based on polilcy:[-0.4524386   0.99999434]
action based on polilcy:[-0.2912256  1.       ]
action based on polilcy:[-0.04141315  1.        ]
Total T: 49587 Episode Num: 3368 Episode T: 8 Reward: -711

action based on polilcy:[-0.99999934  0.9986479 ]
action based on polilcy:[-0.22562471  0.9789525 ]
action based on polilcy:[-0.05718261  0.9998618 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.3005476   0.99996555]
action based on polilcy:[-0.31045422  0.9998429 ]
action based on polilcy:[0.03584507 1.        ]
Total T: 49727 Episode Num: 3383 Episode T: 7 Reward: -600.676128
action based on polilcy:[-0.9999996   0.99863416]
action based on polilcy:[-0.22671846  0.94419426]
action based on polilcy:[-1.         0.9999974]
action based on polilcy:[-0.12044466  0.9987742 ]
action based on polilcy:[-0.9994429  1.       ]
action based on polilcy:[-0.4457427   0.99959576]
action based on polilcy:[-0.99358714  1.        ]
action based on polilcy:[-0.30742553  0.99995255]
action based on polilcy:[-0.44359836  0.9988076 ]
Total T: 49736 Episode Num: 3384 Episode T: 9 Reward: -791.315749
action based on polilcy:[-0.9999992   0.99877024]
action based on polilcy:[-0.2575208  0.9

action based on polilcy:[-0.999999   0.9982384]
action based on polilcy:[-0.9999219   0.84082925]
action based on polilcy:[-0.24704348  0.9168731 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.18261279  0.9922731 ]
action based on polilcy:[-0.27166694  1.        ]
action based on polilcy:[-0.24880372  1.        ]
action based on polilcy:[0.0320969 1.       ]
Total T: 49887 Episode Num: 3400 Episode T: 8 Reward: -780.283642
action based on polilcy:[-0.99999917  0.99839264]
action based on polilcy:[-0.9996772  0.8262551]
action based on polilcy:[-0.99964374  0.99999857]
action based on polilcy:[-0.22651848  0.9479231 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.09863221  0.9991226 ]
action based on polilcy:[-0.76433897  1.        ]
action based on polilcy:[0.03059789 1.        ]
Total T: 49895 Episode Num: 3401 Episode T: 8 Reward: -1086.020882
action based on polilcy:[-0.9999984   0.99850047]
action based on polilcy:[-1.          0.99985653]
action ba

action based on polilcy:[-1.         0.5838491]
action based on polilcy:[-1.         0.9200701]
action based on polilcy:[-0.24447377  0.9336518 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.15389562  1.        ]
action based on polilcy:[-0.132618  1.      ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.0560653  0.9999572]
Total T: 50527 Episode Num: 3431 Episode T: 8 Reward: -1006.773907
action based on polilcy:[-1.         0.5308125]
action based on polilcy:[-0.9999992   0.36920094]
action based on polilcy:[-0.9999841   0.60526854]
action based on polilcy:[-0.24351402  0.9416957 ]
action based on polilcy:[-0.17694423  0.9999969 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.40234318  0.99999964]
action based on polilcy:[-0.00204728  0.999753  ]
action based on polilcy:[-0.06901005  1.        ]
action based on polilcy:[0.01775092 1.        ]
Total T: 50537 Episode Num: 3432 Episode T: 10 Reward: -767.157909
action based on polilcy:[-1.

action based on polilcy:[-1.         0.4918903]
action based on polilcy:[-0.9993831  0.6969662]
action based on polilcy:[-0.24920246  0.9268924 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9898203  0.9984466]
action based on polilcy:[-0.16706088  0.9952417 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.04760484  0.99954504]
Total T: 50705 Episode Num: 3447 Episode T: 9 Reward: -886.724885
action based on polilcy:[-1.          0.48159418]
action based on polilcy:[-0.27754566  0.8978436 ]
action based on polilcy:[-0.22674386  0.97321403]
action based on polilcy:[-0.22173667  0.97270375]
action based on polilcy:[-1.         0.9942907]
action based on polilcy:[-0.24494451  0.96408314]
action based on polilcy:[-1.        0.999999]
action based on polilcy:[-0.9999976  0.9990653]
action based on polilcy:[-0.39483795  1.        ]
action based on polilcy:[-0.2815593  1.       ]
Total T: 50715 Episode Num: 3448 Episode T: 10 R

action based on polilcy:[-1.          0.47818094]
action based on polilcy:[-0.99999946  0.41021845]
action based on polilcy:[-0.9994155   0.44089973]
action based on polilcy:[-0.82703614  0.76874185]
action based on polilcy:[-0.19761966  0.96655786]
action based on polilcy:[-0.0546882   0.99951094]
action based on polilcy:[-0.27290842  0.998162  ]
action based on polilcy:[-0.10186776  0.99966437]
action based on polilcy:[-0.00466087  0.9999721 ]
action based on polilcy:[-0.05953464  1.        ]
action based on polilcy:[-0.00118637  1.        ]
action based on polilcy:[0.22601016 0.9999998 ]
Total T: 50870 Episode Num: 3464 Episode T: 12 Reward: -820.410453
action based on polilcy:[-1.          0.46384966]
action based on polilcy:[-0.27187082  0.9304706 ]
action based on polilcy:[-0.9994918  0.9995583]
action based on polilcy:[-0.9999991  0.6704021]
action based on polilcy:[-1.         0.9669524]
action based on polilcy:[-1.          0.99977636]
action based on polilcy:[-0.22594994  0.9

action based on polilcy:[-1.          0.49860057]
action based on polilcy:[-0.23350649  0.88307273]
action based on polilcy:[-0.21853387  0.9720622 ]
action based on polilcy:[-0.9999957  1.       ]
action based on polilcy:[-0.24063738  1.        ]
action based on polilcy:[0.0295469  0.99999934]
action based on polilcy:[0.41994214 0.99999815]
Total T: 51027 Episode Num: 3478 Episode T: 7 Reward: -509.004995
action based on polilcy:[-1.          0.45827103]
action based on polilcy:[-0.99999964  0.30233446]
action based on polilcy:[-0.99999905  0.35116932]
action based on polilcy:[-0.9988072  0.5911695]
action based on polilcy:[-0.9999982  0.4461536]
action based on polilcy:[-0.23367198  0.9429144 ]
action based on polilcy:[-0.9999994  0.6332302]
action based on polilcy:[-0.21681331  0.9662011 ]
action based on polilcy:[-0.99950695  1.        ]
action based on polilcy:[-0.53382266  0.9996745 ]
action based on polilcy:[-0.1389255  1.       ]
action based on polilcy:[-1.  1.]
action based o

action based on polilcy:[-1.          0.56363595]
action based on polilcy:[-0.99999124  0.37185434]
action based on polilcy:[-0.3786726   0.75866777]
action based on polilcy:[-1.         0.9996526]
action based on polilcy:[-0.25215465  0.9909373 ]
action based on polilcy:[-0.38428265  1.        ]
action based on polilcy:[-0.28735283  1.        ]
action based on polilcy:[0.02076774 0.99999994]
action based on polilcy:[0.42873856 0.9999984 ]
Total T: 51211 Episode Num: 3495 Episode T: 9 Reward: -728.984664
action based on polilcy:[-1.          0.57180005]
action based on polilcy:[-0.9999906   0.43563855]
action based on polilcy:[-1.          0.95504797]
action based on polilcy:[-1.          0.99574065]
action based on polilcy:[-0.18205313  0.9763569 ]
action based on polilcy:[-0.3121559  1.       ]
action based on polilcy:[-0.0644585  0.9997919]
action based on polilcy:[-0.115215  1.      ]
action based on polilcy:[0.43655753 1.        ]
Total T: 51220 Episode Num: 3496 Episode T: 9 Rewa

action based on polilcy:[-0.99999994  0.5932417 ]
action based on polilcy:[-0.16105218  0.9357885 ]
action based on polilcy:[-0.3073078  0.9996349]
action based on polilcy:[-0.2975127  1.       ]
action based on polilcy:[-0.38042262  1.        ]
action based on polilcy:[0.02398068 0.9998856 ]
Total T: 51371 Episode Num: 3511 Episode T: 6 Reward: -549.258868
action based on polilcy:[-0.99999994  0.5321852 ]
action based on polilcy:[-0.99999636  0.37201604]
action based on polilcy:[-0.9999971   0.26838565]
action based on polilcy:[-0.9999993  0.2988337]
action based on polilcy:[-0.99998987  0.30945155]
action based on polilcy:[-0.99291265  0.8762018 ]
action based on polilcy:[-0.2250994   0.97730994]
action based on polilcy:[-0.17912576  0.99827456]
action based on polilcy:[-0.09743004  0.99988437]
action based on polilcy:[-0.20647052  1.        ]
action based on polilcy:[-0.00758876  1.        ]
action based on polilcy:[0.66438293 1.        ]
Total T: 51383 Episode Num: 3512 Episode T: 

action based on polilcy:[-0.9999995   0.57813686]
action based on polilcy:[-0.17129216  0.8942375 ]
action based on polilcy:[-0.6827532  0.8402384]
action based on polilcy:[-0.99999875  0.41574955]
action based on polilcy:[-0.99992895  0.90216136]
action based on polilcy:[-0.9431944  0.9999956]
action based on polilcy:[-0.32401186  0.9999999 ]
action based on polilcy:[-0.25836372  1.        ]
action based on polilcy:[0.12011791 1.        ]
Total T: 51541 Episode Num: 3528 Episode T: 9 Reward: -769.490829
action based on polilcy:[-0.9999996   0.57721627]
action based on polilcy:[-0.20488246  0.8617458 ]
action based on polilcy:[-0.28523648  0.9956136 ]
action based on polilcy:[-1.          0.99999934]
action based on polilcy:[-0.3502127  0.9974561]
action based on polilcy:[-0.5449996  0.9817778]
action based on polilcy:[-0.14784493  0.9995502 ]
action based on polilcy:[0.09327788 0.99997216]
Total T: 51549 Episode Num: 3529 Episode T: 8 Reward: -532.802186
action based on polilcy:[-0.99

action based on polilcy:[-0.9999989  0.5702282]
action based on polilcy:[-0.99999684  0.9170182 ]
action based on polilcy:[-0.96500957  0.9548431 ]
action based on polilcy:[-0.41517255  0.99999446]
action based on polilcy:[-0.17908302  1.        ]
action based on polilcy:[-0.11318717  1.        ]
action based on polilcy:[0.22876078 1.        ]
Total T: 51701 Episode Num: 3544 Episode T: 7 Reward: -605.023118
action based on polilcy:[-0.9999988  0.597754 ]
action based on polilcy:[-0.99998444  0.3596589 ]
action based on polilcy:[-1.          0.71125233]
action based on polilcy:[-0.23377985  0.94801354]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.35167328  0.99990755]
action based on polilcy:[-0.15287836  1.        ]
action based on polilcy:[-0.08489534  1.        ]
action based on polilcy:[0.38106108 0.99999577]
Total T: 51710 Episode Num: 3545 Episode T: 9 Reward: -712.081328
action based on polilcy:[-0.99999917  0.55567133]
action based on polilcy:[-0.17266184  0.92

action based on polilcy:[-0.9999975  0.592324 ]
action based on polilcy:[-0.9999655  0.3838044]
action based on polilcy:[-0.20065726  0.8659144 ]
action based on polilcy:[-0.9969333  0.6885172]
action based on polilcy:[-0.9999815   0.39795846]
action based on polilcy:[-0.99999475  0.38722566]
action based on polilcy:[-0.9999973  0.4987993]
action based on polilcy:[-1.          0.82763445]
action based on polilcy:[-0.2009759  0.9801013]
action based on polilcy:[-0.38456944  0.9995691 ]
action based on polilcy:[-0.02493641  0.99981326]
action based on polilcy:[0.32780063 0.99998885]
action based on polilcy:[0.8156322 1.       ]
Total T: 51856 Episode Num: 3561 Episode T: 13 Reward: -817.866401
action based on polilcy:[-0.999997   0.5822238]
action based on polilcy:[-0.18066956  0.92986554]
action based on polilcy:[-1.         0.9999941]
action based on polilcy:[-0.5077424  0.9999809]
action based on polilcy:[-0.08177648  0.99968725]
action based on polilcy:[0.06000322 1.        ]
action 

Total T: 52065 Episode Num: 3570 Episode T: 13 Reward: -1715.001647
Total T: 52074 Episode Num: 3571 Episode T: 9 Reward: -1244.278420
Total T: 52096 Episode Num: 3572 Episode T: 22 Reward: -1960.128066
Total T: 52144 Episode Num: 3573 Episode T: 48 Reward: -7082.439909
Total T: 52194 Episode Num: 3574 Episode T: 50 Reward: -7033.012114
Total T: 52241 Episode Num: 3575 Episode T: 47 Reward: -5132.032442
Total T: 52287 Episode Num: 3576 Episode T: 46 Reward: -7358.178308
Total T: 52333 Episode Num: 3577 Episode T: 46 Reward: -7079.228410
Total T: 52384 Episode Num: 3578 Episode T: 51 Reward: -7066.806398
Total T: 52401 Episode Num: 3579 Episode T: 17 Reward: -2056.648737
Total T: 52449 Episode Num: 3580 Episode T: 48 Reward: -6957.038529
Total T: 52458 Episode Num: 3581 Episode T: 9 Reward: -1121.734939
action based on polilcy:[-0.9999768   0.58996344]
action based on polilcy:[-1.         0.5307825]
action based on polilcy:[-0.32205674  0.72903293]
action based on polilcy:[-0.9936512   

action based on polilcy:[-0.9996292   0.64438635]
action based on polilcy:[-0.9990634   0.41374633]
action based on polilcy:[-0.9999625  0.3051086]
action based on polilcy:[-0.99999523  0.5026521 ]
action based on polilcy:[-0.9999825   0.36733392]
action based on polilcy:[-0.9999981   0.39858076]
action based on polilcy:[-0.99977607  0.43277314]
action based on polilcy:[-0.9999363  0.4397463]
action based on polilcy:[-0.99977624  0.39947724]
action based on polilcy:[-0.9982091  0.830536 ]
action based on polilcy:[-0.9999991   0.85778785]
action based on polilcy:[-0.9999993   0.89641696]
action based on polilcy:[-0.9737762  0.8172268]
action based on polilcy:[-0.79641986  0.78903556]
action based on polilcy:[-0.7141226  0.7836425]
action based on polilcy:[-0.8136673  0.9816906]
action based on polilcy:[-0.5987176  0.8017601]
action based on polilcy:[-0.5138884  0.8765957]
action based on polilcy:[-0.11708898  0.85127336]
action based on polilcy:[0.44708553 0.9318144 ]
action based on po

action based on polilcy:[-0.99993175  0.6263021 ]
action based on polilcy:[-0.02670168  0.91258025]
action based on polilcy:[-1.         0.9150224]
action based on polilcy:[-0.99995035  0.38419816]
action based on polilcy:[-0.9998139   0.35094467]
action based on polilcy:[-0.9999704   0.32666245]
action based on polilcy:[-0.9999977  0.4012535]
action based on polilcy:[-0.9999027   0.35617512]
action based on polilcy:[-0.9988569  0.5267435]
action based on polilcy:[-0.9999896   0.37359598]
action based on polilcy:[-0.13085428  0.9447234 ]
action based on polilcy:[-0.9999995  0.9999998]
action based on polilcy:[0.13577537 0.9950596 ]
action based on polilcy:[-0.23272166  1.        ]
action based on polilcy:[0.1146607 1.       ]
Total T: 52832 Episode Num: 3613 Episode T: 15 Reward: -1228.304813
action based on polilcy:[-0.9998701  0.6118111]
action based on polilcy:[-1.         0.9399288]
action based on polilcy:[-0.99995846  0.36608458]
action based on polilcy:[-0.9999408  0.5742041]
ac

action based on polilcy:[-0.9998282  0.6478704]
action based on polilcy:[-0.00336013  0.91458404]
action based on polilcy:[-0.48470634  0.99920017]
action based on polilcy:[-0.95447564  0.7651523 ]
action based on polilcy:[-0.97386533  0.6912738 ]
action based on polilcy:[-0.16303812  0.9848894 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.00513437 0.99124956]
action based on polilcy:[-0.36451116  0.9999997 ]
action based on polilcy:[-0.9050128  1.       ]
Total T: 52997 Episode Num: 3626 Episode T: 10 Reward: -759.343976
action based on polilcy:[-0.9998034   0.62799495]
action based on polilcy:[0.07596169 0.91470414]
action based on polilcy:[-0.17522201  0.9874543 ]
---------------------------------------
Episode_num: 3627, Evaluation over 1 episodes: -567.990319
---------------------------------------
action based on polilcy:[-1.  1.]
Total T: 53001 Episode Num: 3627 Episode T: 4 Reward: -620.614112
action based on polilcy:[-0.99980617  0.64283216]
action based on po

action based on polilcy:[-0.9999105   0.64183974]
action based on polilcy:[-0.9999706   0.33954036]
action based on polilcy:[-0.9997749   0.30207026]
action based on polilcy:[-1.          0.77583575]
action based on polilcy:[-0.04572789  0.8272507 ]
action based on polilcy:[-0.9999942   0.43733132]
action based on polilcy:[-0.9999079   0.41804203]
action based on polilcy:[-0.9999983   0.57369804]
action based on polilcy:[0.06226429 0.9398989 ]
action based on polilcy:[-0.999951   0.9997766]
action based on polilcy:[-0.3057532  1.       ]
action based on polilcy:[-0.33966044  0.9999998 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.16309185  0.9549604 ]
Total T: 53156 Episode Num: 3640 Episode T: 14 Reward: -1263.437721
action based on polilcy:[-0.9998871  0.6240392]
action based on polilcy:[0.05349277 0.8307303 ]
action based on polilcy:[-0.9999965  0.5012297]
action based on polilcy:[-1.         0.9999724]
action based on polilcy:[0.16584426 0.9963529 ]
action based o

action based on polilcy:[-0.99987286  0.65245855]
action based on polilcy:[0.15096886 0.8882168 ]
action based on polilcy:[-1.         0.9999959]
action based on polilcy:[0.04883931 0.99901575]
action based on polilcy:[0.4371917 0.999997 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.59525734 0.9996943 ]
Total T: 53313 Episode Num: 3656 Episode T: 7 Reward: -707.927509
action based on polilcy:[-0.99993646  0.6256417 ]
action based on polilcy:[-0.9996145  0.275206 ]
action based on polilcy:[-0.9998345   0.29012096]
action based on polilcy:[0.07145191 0.92864144]
action based on polilcy:[-1.          0.99999994]
action based on polilcy:[-0.99994177  1.        ]
action based on polilcy:[0.54566836 0.9996323 ]
action based on polilcy:[0.8288397 1.       ]
action based on polilcy:[0.04861298 1.        ]
Total T: 53322 Episode Num: 3657 Episode T: 9 Reward: -707.872742
action based on polilcy:[-0.9999526  0.6572372]
action based on polilcy:[0.0704688  0.89069825]
action based

action based on polilcy:[-0.99990606  0.63643515]
action based on polilcy:[-0.23067565  0.69926566]
action based on polilcy:[-0.99906814  0.9618427 ]
action based on polilcy:[-0.3565963   0.79541606]
action based on polilcy:[-0.9999975  0.999959 ]
action based on polilcy:[-0.11374474  0.9561576 ]
action based on polilcy:[-0.17768164  0.9257315 ]
action based on polilcy:[-0.4537559  1.       ]
action based on polilcy:[0.6250891 0.9990891]
Total T: 53471 Episode Num: 3671 Episode T: 9 Reward: -806.343393
action based on polilcy:[-0.99987215  0.6438308 ]
action based on polilcy:[-0.9999597  0.3104887]
action based on polilcy:[-0.99998975  0.3945307 ]
action based on polilcy:[-0.99993664  0.28220212]
action based on polilcy:[-0.9996764   0.33407447]
action based on polilcy:[-0.99999976  0.68818724]
action based on polilcy:[-0.99994975  0.69136417]
action based on polilcy:[-0.99999386  0.33438307]
action based on polilcy:[-0.9999713  0.3954811]
action based on polilcy:[-0.9999639   0.346731

action based on polilcy:[-0.9999767  0.312038 ]
action based on polilcy:[-0.99996936  0.28932682]
action based on polilcy:[-0.99999267  0.3603289 ]
action based on polilcy:[-0.9999942  0.2581404]
action based on polilcy:[-0.99999577  0.32541707]
action based on polilcy:[-0.9999854   0.27315116]
action based on polilcy:[-0.99999046  0.27877843]
action based on polilcy:[-0.99987537  0.29923823]
action based on polilcy:[-0.9999361  0.3503594]
action based on polilcy:[0.07429739 0.84357995]
action based on polilcy:[0.28255948 0.983662  ]
action based on polilcy:[-0.9474082  1.       ]
action based on polilcy:[0.7497798  0.99941593]
action based on polilcy:[0.8256773 1.       ]
Total T: 53626 Episode Num: 3686 Episode T: 14 Reward: -849.080155
action based on polilcy:[-0.99996805  0.37040848]
action based on polilcy:[-0.9999669   0.40330577]
action based on polilcy:[-0.999358    0.33078462]
action based on polilcy:[-1.          0.84116423]
action based on polilcy:[-0.48665208  0.99968034]
a

action based on polilcy:[-0.9999538  0.3480541]
action based on polilcy:[-0.9999415  0.2635901]
action based on polilcy:[-0.9999661  0.2915042]
action based on polilcy:[-0.99999815  0.45377517]
action based on polilcy:[-0.9999999  0.5067886]
action based on polilcy:[-0.9999989   0.20842017]
action based on polilcy:[-0.99999964  0.36024398]
action based on polilcy:[-0.9999918   0.03497779]
action based on polilcy:[-0.9999978   0.45487094]
action based on polilcy:[-0.99998325  0.7272288 ]
action based on polilcy:[-0.9753081  0.7033423]
action based on polilcy:[-0.8358003   0.66085523]
action based on polilcy:[-0.6597585  0.5639942]
action based on polilcy:[-0.60355043  0.74927545]
action based on polilcy:[-0.8963235   0.99952245]
action based on polilcy:[-0.13037364  0.9806302 ]
action based on polilcy:[0.10489154 1.        ]
action based on polilcy:[0.47383028 0.7132416 ]
action based on polilcy:[0.94768476 1.        ]
action based on polilcy:[0.5189078 1.       ]
Total T: 53785 Episode

action based on polilcy:[-0.9999818  0.3354633]
action based on polilcy:[-1.         0.5631137]
action based on polilcy:[-0.99999946  0.9044967 ]
action based on polilcy:[-0.32513654  0.9996358 ]
action based on polilcy:[-0.22345954  1.        ]
action based on polilcy:[0.27017695 1.        ]
Total T: 53942 Episode Num: 3712 Episode T: 6 Reward: -848.864870
action based on polilcy:[-0.9999843   0.28669894]
action based on polilcy:[-0.9999955   0.30937156]
action based on polilcy:[-0.9998044   0.30053076]
action based on polilcy:[0.20725214 0.92106396]
action based on polilcy:[-1.         0.9999995]
action based on polilcy:[0.29324028 0.9919275 ]
action based on polilcy:[-1.         0.9789894]
action based on polilcy:[-0.08235419  0.99972725]
action based on polilcy:[0.39984673 0.99999917]
Total T: 53951 Episode Num: 3713 Episode T: 9 Reward: -931.338560
action based on polilcy:[-0.9999805   0.33390442]
action based on polilcy:[-0.99995047  0.2927335 ]
action based on polilcy:[-0.999999

action based on polilcy:[-0.9999949   0.36341047]
action based on polilcy:[-1.         0.6035886]
action based on polilcy:[-1.         0.6665379]
action based on polilcy:[0.2729313  0.98900646]
action based on polilcy:[0.55736053 0.99583066]
action based on polilcy:[-0.306222  1.      ]
action based on polilcy:[0.6311434 0.9998771]
action based on polilcy:[0.550967   0.99245894]
Total T: 54581 Episode Num: 3742 Episode T: 8 Reward: -744.521424
action based on polilcy:[-0.99999356  0.45275736]
action based on polilcy:[-0.99999976  0.5516641 ]
action based on polilcy:[-0.9999032   0.41959754]
action based on polilcy:[-0.9999849   0.78200126]
action based on polilcy:[0.27116883 0.99095374]
action based on polilcy:[0.31252372 0.9997003 ]
action based on polilcy:[0.12551476 1.        ]
Total T: 54588 Episode Num: 3743 Episode T: 7 Reward: -763.152991
action based on polilcy:[-0.9999969  0.3259185]
action based on polilcy:[-0.99996126  0.35274994]
action based on polilcy:[-0.9999111  0.29231

action based on polilcy:[-0.9999959   0.36556232]
action based on polilcy:[-0.9999994   0.32128492]
action based on polilcy:[-0.9999984   0.38983107]
action based on polilcy:[-0.99994206  0.32702053]
action based on polilcy:[-0.99997705  0.3682615 ]
action based on polilcy:[-1.         0.5978794]
action based on polilcy:[-1.         0.8022661]
action based on polilcy:[0.3998958 0.9886108]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.6335992 0.9908041]
action based on polilcy:[-1.         0.9997979]
action based on polilcy:[0.6479342 0.9909584]
action based on polilcy:[0.3242853  0.99844724]
Total T: 54765 Episode Num: 3758 Episode T: 13 Reward: -1452.340320
action based on polilcy:[-0.9999969   0.38019955]
action based on polilcy:[-0.9999918   0.47384864]
action based on polilcy:[-0.99999774  0.28180662]
action based on polilcy:[-0.99986255  0.3284057 ]
action based on polilcy:[0.13472778 0.9215647 ]
action based on polilcy:[-1.         0.9999984]
action based on polilc

action based on polilcy:[-0.99999607  0.35617575]
action based on polilcy:[-0.99999243  0.4252575 ]
action based on polilcy:[-0.9999954  0.3261015]
action based on polilcy:[-0.99998164  0.3892873 ]
action based on polilcy:[-0.9999905   0.33201185]
action based on polilcy:[-0.9999695   0.30942112]
action based on polilcy:[-1.         0.5481937]
action based on polilcy:[-1.         0.6337611]
action based on polilcy:[-1.          0.63287103]
action based on polilcy:[-1.        0.979815]
action based on polilcy:[0.6850182  0.98973286]
action based on polilcy:[-0.3998756  1.       ]
action based on polilcy:[0.9964657 0.9997309]
Total T: 54919 Episode Num: 3774 Episode T: 13 Reward: -1220.407623
action based on polilcy:[-0.9999972   0.37040892]
action based on polilcy:[-0.9997865   0.30901772]
action based on polilcy:[0.15200518 0.95011497]
action based on polilcy:[-1.        0.723378]
action based on polilcy:[0.20556116 0.9800788 ]
action based on polilcy:[-0.48077264  1.        ]
action b

action based on polilcy:[-0.9999964   0.36062127]
action based on polilcy:[-0.9999795  0.3270837]
action based on polilcy:[-0.11538568  0.786323  ]
action based on polilcy:[0.14831746 0.9736726 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.8420289  0.98943514]
action based on polilcy:[0.44788477 1.        ]
Total T: 55075 Episode Num: 3792 Episode T: 7 Reward: -586.160407
action based on polilcy:[-0.9999952   0.40159485]
action based on polilcy:[-0.9999918   0.35906595]
action based on polilcy:[-1.         0.6426537]
action based on polilcy:[0.22013724 0.9734667 ]
action based on polilcy:[-0.9999986   0.81556267]
action based on polilcy:[0.24778187 0.95545137]
action based on polilcy:[0.52703714 0.9936476 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.18755546  1.        ]
Total T: 55084 Episode Num: 3793 Episode T: 9 Reward: -864.405877
action based on polilcy:[-0.999996   0.3846072]
action based on polilcy:[-0.99961734  0.32867563]
action based on po

action based on polilcy:[-0.9999918  0.4069641]
action based on polilcy:[-0.9999501   0.34619406]
action based on polilcy:[-0.06074478  0.8128575 ]
action based on polilcy:[0.20513763 0.97341865]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.90535116 0.9875781 ]
action based on polilcy:[0.64165497 1.        ]
Total T: 55224 Episode Num: 3807 Episode T: 7 Reward: -658.815317
action based on polilcy:[-0.999992  0.379251]
action based on polilcy:[-0.9999988   0.36688185]
action based on polilcy:[-0.9999815  0.3325734]
action based on polilcy:[-0.99998003  0.3526939 ]
action based on polilcy:[-0.13466312  0.7311733 ]
action based on polilcy:[-1.         0.7303886]
action based on polilcy:[-0.99998814  0.3983319 ]
action based on polilcy:[-0.9994828   0.36229968]
action based on polilcy:[-0.9999998   0.52616906]
action based on polilcy:[0.35381863 0.9576721 ]
action based on polilcy:[-0.42267385  1.        ]
action based on polilcy:[0.22290754 1.        ]
Total T: 55236 Episo

action based on polilcy:[-0.9999958  0.3772845]
action based on polilcy:[-0.9999946   0.42555314]
action based on polilcy:[-0.99999917  0.47946236]
action based on polilcy:[-1.          0.04526667]
action based on polilcy:[-0.9999998  0.4185349]
action based on polilcy:[-0.999999    0.31184503]
action based on polilcy:[-0.99999285  0.35912687]
action based on polilcy:[-0.99999875  0.33919433]
action based on polilcy:[-0.87233865  0.7421996 ]
action based on polilcy:[-0.92335063  0.83905613]
action based on polilcy:[-0.98400086  0.9973408 ]
action based on polilcy:[-0.7121258   0.74149907]
action based on polilcy:[-0.59633267  0.54248846]
action based on polilcy:[-0.67342794  0.9986609 ]
action based on polilcy:[-0.11119697  0.4810032 ]
action based on polilcy:[-0.539787   0.9998204]
action based on polilcy:[0.00911024 0.70974314]
action based on polilcy:[-0.35325947  1.        ]
Total T: 55386 Episode Num: 3820 Episode T: 18 Reward: -1707.109400
action based on polilcy:[-0.99999726  0.

action based on polilcy:[-0.99999946  0.37847552]
action based on polilcy:[-0.9999977   0.38582608]
action based on polilcy:[-0.9999988  0.3910281]
action based on polilcy:[-0.9999994   0.37859473]
action based on polilcy:[-0.9999945  0.2920509]
action based on polilcy:[-0.99993676  0.4747972 ]
action based on polilcy:[0.13235317 0.8346914 ]
action based on polilcy:[-1.          0.87554884]
action based on polilcy:[-0.9958922   0.53722763]
action based on polilcy:[-0.9999791  0.4487063]
action based on polilcy:[-0.9999983   0.44715476]
action based on polilcy:[-0.99999976  0.51459444]
action based on polilcy:[-0.999998    0.33941263]
action based on polilcy:[-0.9999998  0.3685318]
action based on polilcy:[-0.99999994  0.49810094]
action based on polilcy:[-0.99999994  0.56845725]
action based on polilcy:[-1.          0.49525836]
action based on polilcy:[-0.99999994  0.4159136 ]
action based on polilcy:[-0.99999994  0.5680624 ]
action based on polilcy:[-0.99999994  0.42166886]
action bas

action based on polilcy:[-0.9999993   0.32456037]
action based on polilcy:[-1.          0.79571414]
action based on polilcy:[-0.99938595  0.4490441 ]
action based on polilcy:[-0.31662977  0.7191343 ]
action based on polilcy:[-0.5978805  0.5772177]
action based on polilcy:[-1.         0.9998985]
action based on polilcy:[0.11606246 0.94119185]
action based on polilcy:[-0.48864263  0.9985327 ]
action based on polilcy:[-0.99977756  0.9947355 ]
Total T: 55694 Episode Num: 3840 Episode T: 9 Reward: -932.318871
action based on polilcy:[-0.9999992   0.34509254]
action based on polilcy:[-0.999999   0.4276742]
action based on polilcy:[-0.9999995   0.34067386]
action based on polilcy:[-0.99999857  0.33377582]
action based on polilcy:[-0.9999994   0.32024154]
action based on polilcy:[-0.9999964  0.360651 ]
action based on polilcy:[-0.9999988   0.35541233]
action based on polilcy:[-0.9999991  0.2929991]
action based on polilcy:[-0.9999988  0.348585 ]
action based on polilcy:[-0.9999997  0.3773914]


action based on polilcy:[-0.99999696  0.36142874]
action based on polilcy:[-0.9997652  0.4108176]
action based on polilcy:[-0.999854   0.5240087]
action based on polilcy:[-0.99998015  0.38518313]
action based on polilcy:[-0.99999124  0.33145034]
action based on polilcy:[-0.99999446  0.29897872]
action based on polilcy:[-0.9998261  0.4608411]
action based on polilcy:[-1.          0.95624197]
action based on polilcy:[-1.         0.9867378]
action based on polilcy:[-1.       0.99966]
action based on polilcy:[0.01101257 0.5863916 ]
action based on polilcy:[0.7584085 0.8466705]
action based on polilcy:[-0.13855042  0.7724052 ]
Total T: 55859 Episode Num: 3854 Episode T: 13 Reward: -1148.604449
action based on polilcy:[-0.9999965   0.32850224]
action based on polilcy:[-0.99986446  0.34921664]
action based on polilcy:[-0.9999931   0.40095195]
action based on polilcy:[-0.99999166  0.26246268]
action based on polilcy:[-0.99095225  0.47802162]
action based on polilcy:[-0.9993274  0.5472852]
acti

Total T: 56051 Episode Num: 3865 Episode T: 50 Reward: -7116.604768
Total T: 56098 Episode Num: 3866 Episode T: 47 Reward: -7072.184556
Total T: 56147 Episode Num: 3867 Episode T: 49 Reward: -7118.328094
Total T: 56182 Episode Num: 3868 Episode T: 35 Reward: -5574.588184
Total T: 56227 Episode Num: 3869 Episode T: 45 Reward: -7183.960449
Total T: 56273 Episode Num: 3870 Episode T: 46 Reward: -7437.265933
Total T: 56321 Episode Num: 3871 Episode T: 48 Reward: -7054.582838
Total T: 56352 Episode Num: 3872 Episode T: 31 Reward: -4255.645677
Total T: 56372 Episode Num: 3873 Episode T: 20 Reward: -2823.292400
Total T: 56390 Episode Num: 3874 Episode T: 18 Reward: -3070.898896
Total T: 56442 Episode Num: 3875 Episode T: 52 Reward: -6998.925404
Total T: 56461 Episode Num: 3876 Episode T: 19 Reward: -2457.784943
action based on polilcy:[-0.9903683   0.56675255]
action based on polilcy:[-0.96843356  0.66096365]
action based on polilcy:[-0.77624124  0.29918557]
action based on polilcy:[-0.762134

action based on polilcy:[-0.9999704  0.3881777]
action based on polilcy:[-0.99996793  0.3752044 ]
action based on polilcy:[-0.99999994  0.7208264 ]
action based on polilcy:[-0.9878663   0.58185405]
action based on polilcy:[-0.8927014  0.7026336]
action based on polilcy:[-0.7552772   0.60379803]
action based on polilcy:[-0.46703967  0.60833466]
action based on polilcy:[-0.99944305  0.99949163]
action based on polilcy:[-0.7977407  0.6030454]
action based on polilcy:[-0.87810147  0.73706317]
action based on polilcy:[-0.4404554   0.58867466]
action based on polilcy:[-0.45735353  0.9841219 ]
Total T: 56647 Episode Num: 3889 Episode T: 12 Reward: -945.164115
action based on polilcy:[-0.99996465  0.38945952]
action based on polilcy:[-0.9999202   0.32950193]
action based on polilcy:[-0.9998648   0.33278903]
action based on polilcy:[-0.9999326  0.2647123]
action based on polilcy:[-0.9998735  0.3197793]
action based on polilcy:[-0.99995327  0.31501648]
action based on polilcy:[-0.99999917  0.339

action based on polilcy:[-0.9999623  0.4042112]
action based on polilcy:[-0.9999869  0.3529616]
action based on polilcy:[-0.99996686  0.26905146]
action based on polilcy:[-0.9998985   0.34580734]
action based on polilcy:[-0.98695654  0.49887013]
action based on polilcy:[-0.75321686  0.58228326]
action based on polilcy:[-0.6732224  0.9904065]
action based on polilcy:[-0.783059    0.60833883]
action based on polilcy:[-0.8532434  0.6190446]
action based on polilcy:[-0.80597633  0.5854243 ]
action based on polilcy:[-0.7286211  0.6004201]
action based on polilcy:[-0.49063942  0.5751606 ]
action based on polilcy:[-0.6488108  0.9819678]
action based on polilcy:[-0.30739352  0.7752712 ]
action based on polilcy:[-0.5578512  0.6177598]
action based on polilcy:[-0.20801193  0.60676706]
Total T: 56817 Episode Num: 3901 Episode T: 16 Reward: -1010.606212
action based on polilcy:[-0.9999683   0.37853473]
action based on polilcy:[-0.9999492  0.3330674]
action based on polilcy:[-0.99999857  0.4138004 

action based on polilcy:[-0.99987286  0.39788693]
action based on polilcy:[-0.9868313   0.54000235]
action based on polilcy:[-0.85660994  0.5981304 ]
action based on polilcy:[-0.850473   0.6323854]
action based on polilcy:[-0.8543289  0.6693796]
action based on polilcy:[-0.8799387  0.6525867]
action based on polilcy:[-0.9974985   0.49527356]
action based on polilcy:[-0.9981013   0.40269125]
action based on polilcy:[-0.97504985  0.51824784]
action based on polilcy:[-1.          0.99157053]
action based on polilcy:[-0.6487607   0.64271533]
action based on polilcy:[-0.1841804   0.87327945]
action based on polilcy:[0.45919627 0.85011995]
action based on polilcy:[-0.47269368  0.749884  ]
Total T: 56969 Episode Num: 3911 Episode T: 14 Reward: -967.190143
action based on polilcy:[-0.99990416  0.37240413]
action based on polilcy:[-0.9996508  0.2907323]
action based on polilcy:[-0.9999165   0.26889116]
action based on polilcy:[-0.9995991   0.31701037]
action based on polilcy:[-1.         0.8676

action based on polilcy:[-0.999903    0.39491463]
action based on polilcy:[-0.99379724  0.46348965]
action based on polilcy:[-0.9973819  0.4527067]
action based on polilcy:[-0.92199993  0.5570079 ]
action based on polilcy:[-0.9561485   0.94936216]
action based on polilcy:[-0.6507973  0.6101512]
action based on polilcy:[-0.82799506  0.621741  ]
action based on polilcy:[-0.7727985  0.6096598]
action based on polilcy:[-0.5119224   0.98712724]
action based on polilcy:[-0.12142308  0.9538673 ]
Total T: 57142 Episode Num: 3925 Episode T: 10 Reward: -751.997508
action based on polilcy:[-0.9999049   0.41407165]
action based on polilcy:[-0.8809906  0.5856234]
action based on polilcy:[-0.9451742   0.77540237]
action based on polilcy:[-1.         0.9388335]
action based on polilcy:[-0.9965598  0.9984231]
action based on polilcy:[-0.68987155  0.67499745]
action based on polilcy:[-0.6956918  0.6038668]
action based on polilcy:[-0.29749092  0.6224754 ]
action based on polilcy:[-0.747729   0.9782801]

action based on polilcy:[-0.9999607   0.37511954]
action based on polilcy:[-0.993459    0.44945186]
action based on polilcy:[-0.99999666  0.3162968 ]
action based on polilcy:[-0.9666532  0.4935756]
action based on polilcy:[-0.9788112  0.5549341]
action based on polilcy:[-0.9904813  0.9876718]
action based on polilcy:[-0.81474894  0.5529876 ]
action based on polilcy:[-0.4722662   0.95326483]
action based on polilcy:[0.19809794 0.7191833 ]
action based on polilcy:[0.04776607 0.82111967]
Total T: 57301 Episode Num: 3939 Episode T: 10 Reward: -1007.985052
action based on polilcy:[-0.9999401   0.40161392]
action based on polilcy:[-0.87551594  0.5578979 ]
action based on polilcy:[-0.908829    0.57816184]
action based on polilcy:[-0.85011387  0.5681735 ]
action based on polilcy:[-0.88270086  0.97688085]
action based on polilcy:[-0.65045846  0.5668839 ]
action based on polilcy:[-0.72977245  0.9931292 ]
action based on polilcy:[0.09073331 0.6993443 ]
action based on polilcy:[-0.696059   0.99655

action based on polilcy:[-0.99995244  0.42340297]
action based on polilcy:[-0.86877507  0.7755395 ]
action based on polilcy:[-0.8044355  0.642378 ]
action based on polilcy:[-0.9769824  0.5600866]
action based on polilcy:[-0.8585358  0.5757371]
action based on polilcy:[-0.79977936  0.7844079 ]
action based on polilcy:[-0.79161084  0.9754828 ]
action based on polilcy:[-0.8372538  0.5976398]
action based on polilcy:[-0.6417718   0.53648365]
action based on polilcy:[-0.78777003  0.9756572 ]
action based on polilcy:[-0.764243   0.8452748]
action based on polilcy:[-0.799977   0.5381557]
action based on polilcy:[-0.37604117  0.7385899 ]
action based on polilcy:[0.07843332 0.6489941 ]
Total T: 57471 Episode Num: 3952 Episode T: 14 Reward: -1091.782891
action based on polilcy:[-0.9999435  0.4335636]
action based on polilcy:[-0.9052685  0.5788817]
action based on polilcy:[-0.87139773  0.6145705 ]
action based on polilcy:[-0.9054257   0.58313394]
action based on polilcy:[-0.61259997  0.8975109 ]


action based on polilcy:[-0.99996704  0.42006722]
action based on polilcy:[-0.8976095   0.77968585]
action based on polilcy:[-0.79541916  0.69760406]
action based on polilcy:[-0.894441    0.52328527]
action based on polilcy:[-0.88045365  0.78574127]
action based on polilcy:[-0.48416963  0.9219231 ]
action based on polilcy:[-0.41621    0.9789942]
action based on polilcy:[0.4248779 0.72498  ]
action based on polilcy:[0.5966861 0.9552681]
Total T: 57627 Episode Num: 3967 Episode T: 9 Reward: -708.426730
action based on polilcy:[-0.99997264  0.38962787]
action based on polilcy:[-0.9041629   0.84780514]
action based on polilcy:[-0.843045    0.60606647]
action based on polilcy:[-0.88084304  0.83486414]
action based on polilcy:[-0.5185346  0.998187 ]
action based on polilcy:[-0.25403365  0.98808116]
action based on polilcy:[-0.00727321  0.6637306 ]
action based on polilcy:[-0.38138235  0.5884179 ]
action based on polilcy:[-0.4622736   0.99965143]
Total T: 57636 Episode Num: 3968 Episode T: 9 

action based on polilcy:[-0.9999486   0.52606475]
action based on polilcy:[-0.9999939   0.34627962]
action based on polilcy:[-0.9999768   0.32430863]
action based on polilcy:[-0.86424625  0.6620246 ]
action based on polilcy:[-0.99578565  0.5440488 ]
action based on polilcy:[-0.9996214   0.39197206]
action based on polilcy:[-0.9785144  0.4996526]
action based on polilcy:[-0.9998711  0.3808183]
action based on polilcy:[-0.9999995   0.36380956]
action based on polilcy:[-0.9999996   0.33435896]
action based on polilcy:[-0.9890876   0.65278447]
action based on polilcy:[-0.8930842  0.9035407]
action based on polilcy:[-0.53279936  0.6984566 ]
action based on polilcy:[-0.42965376  0.94120455]
action based on polilcy:[-0.28340304  0.6084628 ]
action based on polilcy:[-0.47087023  0.64618814]
action based on polilcy:[-0.07417729  0.59664464]
action based on polilcy:[-0.810136    0.27887177]
Total T: 57803 Episode Num: 3983 Episode T: 18 Reward: -1357.655898
action based on polilcy:[-0.9999305  0

action based on polilcy:[-0.9998593  0.5861118]
action based on polilcy:[-0.8916264   0.94425344]
action based on polilcy:[-0.92719245  0.57086706]
action based on polilcy:[-0.7984343  0.6420902]
action based on polilcy:[-0.55698806  0.98682284]
action based on polilcy:[-0.37409323  0.94730926]
action based on polilcy:[0.4433308 0.6746565]
Total T: 57949 Episode Num: 3998 Episode T: 7 Reward: -800.267939
action based on polilcy:[-0.9997635  0.5939652]
action based on polilcy:[-0.99848837  0.49146318]
action based on polilcy:[-0.9877814  0.5321585]
action based on polilcy:[-0.9890423   0.54280174]
action based on polilcy:[-0.90896034  0.5786155 ]
action based on polilcy:[-0.77594805  0.8859418 ]
action based on polilcy:[-0.59939635  0.5269112 ]
action based on polilcy:[-0.4024081   0.95847344]
action based on polilcy:[0.02520488 0.6478472 ]
action based on polilcy:[0.3955958  0.91053647]
Total T: 57959 Episode Num: 3999 Episode T: 10 Reward: -606.831095
action based on polilcy:[-0.99983

action based on polilcy:[-0.9999268  0.6563647]
action based on polilcy:[-0.9974556  0.4896413]
action based on polilcy:[-0.99975663  0.42749166]
action based on polilcy:[-0.99349797  0.63781214]
action based on polilcy:[-0.92781466  0.55294794]
action based on polilcy:[-0.7692622   0.66805005]
action based on polilcy:[-0.8559082  0.4240339]
action based on polilcy:[-0.4245313   0.95605356]
action based on polilcy:[-0.5772468  0.6556425]
action based on polilcy:[-0.8503427   0.45180482]
action based on polilcy:[-0.8879426  0.4894061]
action based on polilcy:[-0.6908495   0.59359765]
Total T: 58593 Episode Num: 4030 Episode T: 12 Reward: -1137.211273
action based on polilcy:[-0.9999494  0.6423824]
action based on polilcy:[-0.9631643   0.50705826]
action based on polilcy:[-0.99894434  0.4667249 ]
action based on polilcy:[-0.999417    0.62677187]
action based on polilcy:[-0.997117    0.45873702]
action based on polilcy:[-0.99999505  0.56398827]
action based on polilcy:[-0.99986464  0.3620

Total T: 58786 Episode Num: 4040 Episode T: 37 Reward: -2802.697787
action based on polilcy:[-0.999962    0.55585575]
action based on polilcy:[-0.99988806  0.4467946 ]
action based on polilcy:[-0.9993054   0.47695085]
action based on polilcy:[-0.9999548   0.36364105]
action based on polilcy:[-0.99999416  0.31415936]
action based on polilcy:[-0.99999946  0.3184848 ]
action based on polilcy:[-0.99999994  0.30935234]
action based on polilcy:[-0.9999999   0.33545646]
action based on polilcy:[-1.          0.29518676]
action based on polilcy:[-0.99999887  0.2587483 ]
action based on polilcy:[-0.99991864  0.3694038 ]
action based on polilcy:[-0.99999833  0.406014  ]
action based on polilcy:[-0.99999446  0.37144825]
action based on polilcy:[-0.9999424   0.42713702]
action based on polilcy:[-0.9735473  0.5520854]
action based on polilcy:[-0.9993526  0.6143027]
action based on polilcy:[-0.985601  0.94524 ]
action based on polilcy:[-0.9111233  0.9337364]
action based on polilcy:[-0.574322   0.574

action based on polilcy:[-0.9999824   0.48253024]
action based on polilcy:[-0.9999823   0.49413705]
action based on polilcy:[-0.9999991   0.40681908]
action based on polilcy:[-0.99999917  0.297754  ]
action based on polilcy:[-0.99999285  0.4339648 ]
action based on polilcy:[-0.967129   0.4684943]
action based on polilcy:[-0.94538397  0.39648253]
action based on polilcy:[-0.979617    0.41598564]
action based on polilcy:[-0.5917512   0.62707806]
action based on polilcy:[-0.94958985  0.4087306 ]
action based on polilcy:[-0.8920946  0.4138126]
action based on polilcy:[-0.8639288   0.47427243]
action based on polilcy:[-0.9166596   0.42604125]
action based on polilcy:[-0.8885775   0.47499034]
action based on polilcy:[-0.8324031   0.48553017]
action based on polilcy:[-0.54801404  0.5584135 ]
action based on polilcy:[-0.26097816  0.8436268 ]
Total T: 58957 Episode Num: 4053 Episode T: 17 Reward: -1771.760457
action based on polilcy:[-0.9999918   0.42373547]
action based on polilcy:[-0.9999988 

action based on polilcy:[-0.99999887  0.43015257]
action based on polilcy:[-0.99999106  0.42264268]
action based on polilcy:[-0.9999652  0.4114784]
action based on polilcy:[-0.9948397  0.5431678]
action based on polilcy:[-0.9956356  0.4174527]
action based on polilcy:[-0.9964246  0.419557 ]
action based on polilcy:[-0.9951111   0.42412862]
action based on polilcy:[-0.9696851   0.38685182]
action based on polilcy:[-0.39449015  0.58134294]
action based on polilcy:[-0.42796263  0.7466967 ]
action based on polilcy:[-0.34946638  0.4233541 ]
action based on polilcy:[-0.6721268   0.40480682]
Total T: 59112 Episode Num: 4063 Episode T: 12 Reward: -697.659143
action based on polilcy:[-0.9999987  0.4286633]
action based on polilcy:[-0.99983287  0.53808236]
action based on polilcy:[-0.9973104   0.45521778]
action based on polilcy:[-0.9850317  0.4208139]
action based on polilcy:[-0.8460562  0.4298061]
action based on polilcy:[-0.58032316  0.6650816 ]
action based on polilcy:[-0.31081018  0.8840043

action based on polilcy:[-0.9999998   0.43392736]
action based on polilcy:[-0.9999985   0.42103645]
action based on polilcy:[-0.9999993   0.36607116]
action based on polilcy:[-0.99999994  0.3173786 ]
action based on polilcy:[-0.9999999   0.38876274]
action based on polilcy:[-0.99999547  0.42190108]
action based on polilcy:[-0.9999999   0.31996655]
action based on polilcy:[-0.99999994  0.2877449 ]
action based on polilcy:[-0.9998903   0.44186345]
action based on polilcy:[-0.9992111   0.40445527]
action based on polilcy:[-0.99899626  0.46745825]
action based on polilcy:[-0.99954903  0.38211027]
action based on polilcy:[-0.99754673  0.3884882 ]
action based on polilcy:[-0.99142456  0.3868472 ]
action based on polilcy:[-0.9311273   0.44189507]
action based on polilcy:[-0.97793984  0.44936162]
action based on polilcy:[-0.96667594  0.46581522]
action based on polilcy:[-0.9899546  0.3941433]
action based on polilcy:[-0.99992806  0.43124992]
action based on polilcy:[-0.9999632  0.4383219]
acti

action based on polilcy:[-1.          0.42568442]
action based on polilcy:[-0.9999903  0.5366088]
action based on polilcy:[-0.9999154   0.43421015]
action based on polilcy:[-0.9998821  0.5480658]
action based on polilcy:[-0.99973863  0.4332952 ]
action based on polilcy:[-0.9893812  0.5082533]
action based on polilcy:[-0.96901107  0.3929151 ]
action based on polilcy:[-0.45176852  0.8788414 ]
action based on polilcy:[-0.23773551  0.84944034]
Total T: 59430 Episode Num: 4084 Episode T: 9 Reward: -820.166578
action based on polilcy:[-1.         0.4262603]
action based on polilcy:[-0.99999005  0.52958846]
action based on polilcy:[-0.99995047  0.43618938]
action based on polilcy:[-0.9997206   0.43008307]
action based on polilcy:[-0.92964494  0.41451636]
action based on polilcy:[-0.5885563   0.40460047]
action based on polilcy:[-0.935989    0.40297174]
action based on polilcy:[-0.3443191  0.5107879]
action based on polilcy:[0.19692855 0.6260867 ]
action based on polilcy:[-0.2832578   0.776224

action based on polilcy:[-1.         0.4332118]
action based on polilcy:[-0.9999951   0.44500908]
action based on polilcy:[-0.9999696   0.49439397]
action based on polilcy:[-0.94022995  0.6269673 ]
action based on polilcy:[-0.580418   0.6155617]
action based on polilcy:[-0.03793841  0.7039603 ]
action based on polilcy:[-0.3755312  0.8587749]
Total T: 59605 Episode Num: 4098 Episode T: 7 Reward: -624.948626
action based on polilcy:[-1.          0.40641835]
action based on polilcy:[-0.9999952   0.43458837]
action based on polilcy:[-0.9999708  0.4254079]
action based on polilcy:[-0.99999875  0.44132593]
action based on polilcy:[-0.99999994  0.32191277]
action based on polilcy:[-0.9999999   0.36747426]
action based on polilcy:[-0.9999985  0.5278384]
action based on polilcy:[-0.99894977  0.3994406 ]
action based on polilcy:[-0.7357136  0.3753168]
action based on polilcy:[-0.03515259  0.63803315]
action based on polilcy:[-0.85709196  0.34578803]
action based on polilcy:[-0.9888315  0.6670855

action based on polilcy:[-1.          0.41671845]
action based on polilcy:[-1.          0.38096505]
action based on polilcy:[-1.          0.33442494]
action based on polilcy:[-0.99999934  0.50700426]
action based on polilcy:[-0.9999796   0.40348342]
action based on polilcy:[-0.99960387  0.4698684 ]
action based on polilcy:[-0.87677157  0.75682145]
action based on polilcy:[-0.06640715  0.6746656 ]
action based on polilcy:[0.02971315 0.8106882 ]
action based on polilcy:[-0.49224344  0.36247876]
Total T: 59776 Episode Num: 4108 Episode T: 10 Reward: -925.314008
action based on polilcy:[-1.         0.4130021]
action based on polilcy:[-0.9999999   0.42806536]
action based on polilcy:[-0.99999994  0.41264996]
action based on polilcy:[-0.99999994  0.3647243 ]
action based on polilcy:[-1.          0.36074477]
action based on polilcy:[-0.9999999  0.4866709]
action based on polilcy:[-0.99999905  0.52243024]
action based on polilcy:[-0.9999959   0.45571032]
action based on polilcy:[-0.9999513   0

action based on polilcy:[-1.          0.47352612]
action based on polilcy:[-1.          0.35765806]
action based on polilcy:[-1.          0.42032182]
action based on polilcy:[-1.          0.34971923]
action based on polilcy:[-1.          0.30032557]
action based on polilcy:[-1.          0.40521997]
action based on polilcy:[-0.99999994  0.36312675]
action based on polilcy:[-0.9999969  0.5025705]
action based on polilcy:[-0.9999993  0.416907 ]
action based on polilcy:[-1.          0.29448453]
action based on polilcy:[-1.          0.29799625]
action based on polilcy:[-1.          0.40510863]
action based on polilcy:[-1.         0.3391064]
action based on polilcy:[-1.          0.35622665]
action based on polilcy:[-1.          0.36638725]
action based on polilcy:[-1.         0.4001655]
action based on polilcy:[-1.         0.3373689]
action based on polilcy:[-1.          0.35045707]
action based on polilcy:[-1.          0.39613956]
action based on polilcy:[-1.          0.28612107]
action bas

action based on polilcy:[-1.         0.5736139]
action based on polilcy:[-1.        0.588816]
action based on polilcy:[-0.99999994  0.4389971 ]
action based on polilcy:[-0.99999994  0.47462812]
action based on polilcy:[-1.          0.64222795]
action based on polilcy:[-0.9999997   0.42469108]
action based on polilcy:[-0.9999817   0.40710607]
action based on polilcy:[-0.9999848  0.4051849]
action based on polilcy:[-0.99994254  0.3899694 ]
action based on polilcy:[-0.8822372   0.33094466]
action based on polilcy:[0.18231438 0.723891  ]
Total T: 60596 Episode Num: 4143 Episode T: 11 Reward: -766.470078
action based on polilcy:[-1.         0.5244639]
action based on polilcy:[-1.         0.3114953]
action based on polilcy:[-1.         0.4566472]
action based on polilcy:[-1.          0.48767638]
action based on polilcy:[-1.          0.37022936]
action based on polilcy:[-1.         0.3129446]
action based on polilcy:[-1.          0.28109664]
action based on polilcy:[-1.          0.36466298]
a

action based on polilcy:[-1.         0.4987328]
action based on polilcy:[-0.99999994  0.43764168]
action based on polilcy:[-0.9999998   0.45446122]
action based on polilcy:[-0.99999565  0.4237679 ]
action based on polilcy:[-0.97371197  0.31936795]
action based on polilcy:[-0.999913    0.35151282]
action based on polilcy:[-0.92858195  0.47800428]
action based on polilcy:[-0.46872157  0.31523693]
action based on polilcy:[-0.14490654  0.31089053]
Total T: 60749 Episode Num: 4155 Episode T: 14 Reward: -711.319190
action based on polilcy:[-1.          0.51403964]
action based on polilcy:[-1.          0.53336835]
action based on polilcy:[-1.         0.5284091]
action based on polilcy:[-1.       0.47471]
action based on polilcy:[-1.         0.4077418]
action based on polilcy:[-0.99999994  0.46979582]
action based on polilcy:[-0.99999994  0.48834446]
action based on polilcy:[-0.9996679  0.827263 ]
action based on polilcy:[-0.32590607  0.60466135]
action based on polilcy:[0.04061629 0.43140417]

action based on polilcy:[-1.         0.5107681]
action based on polilcy:[-1.          0.42566234]
action based on polilcy:[-1.          0.43530142]
action based on polilcy:[-1.          0.41717228]
action based on polilcy:[-1.          0.43439963]
action based on polilcy:[-1.          0.57699496]
action based on polilcy:[-0.9999958   0.41677317]
action based on polilcy:[-0.99999875  0.43615875]
action based on polilcy:[-0.99999934  0.45012343]
action based on polilcy:[-0.9824793  0.5590974]
action based on polilcy:[-0.91301423  0.35402796]
action based on polilcy:[-0.7920266   0.29603854]
action based on polilcy:[-0.20880665  0.36210817]
Total T: 60905 Episode Num: 4168 Episode T: 13 Reward: -629.819602
action based on polilcy:[-1.          0.49943438]
action based on polilcy:[-1.         0.4204069]
action based on polilcy:[-1.         0.5381408]
action based on polilcy:[-0.99999994  0.49972647]
action based on polilcy:[-0.9999999   0.50219226]
action based on polilcy:[-0.99999917  0.4

action based on polilcy:[-1.         0.4517246]
action based on polilcy:[-1.          0.33973065]
action based on polilcy:[-1.          0.40690055]
action based on polilcy:[-1.          0.55951715]
action based on polilcy:[-0.9999976   0.41647238]
action based on polilcy:[-0.9969131  0.6139972]
action based on polilcy:[-0.31514066  0.58162355]
action based on polilcy:[-0.54335594  0.29084316]
Total T: 61066 Episode Num: 4182 Episode T: 8 Reward: -684.692942
action based on polilcy:[-1.          0.44100162]
action based on polilcy:[-1.          0.38775644]
action based on polilcy:[-1.         0.5701207]
action based on polilcy:[-1.          0.42768478]
action based on polilcy:[-1.          0.30037764]
action based on polilcy:[-1.          0.32969758]
action based on polilcy:[-1.          0.30172354]
action based on polilcy:[-1.          0.45055628]
action based on polilcy:[-1.          0.41360247]
action based on polilcy:[-1.          0.40963823]
action based on polilcy:[-1.          0.

action based on polilcy:[-1.         0.4672519]
action based on polilcy:[-1.         0.5230515]
action based on polilcy:[-1.         0.4575379]
action based on polilcy:[-1.          0.35348332]
action based on polilcy:[-1.          0.41251087]
action based on polilcy:[-0.99999595  0.62523115]
action based on polilcy:[-0.99999964  0.50138485]
action based on polilcy:[-0.9999936   0.39277267]
action based on polilcy:[-0.9898045  0.7084037]
action based on polilcy:[-0.8105411   0.24995342]
Total T: 61251 Episode Num: 4197 Episode T: 10 Reward: -918.252837
action based on polilcy:[-1.          0.46558252]
action based on polilcy:[-1.          0.41645277]
action based on polilcy:[-1.          0.43112633]
action based on polilcy:[-1.          0.62999165]
action based on polilcy:[-0.99999994  0.45849413]
action based on polilcy:[-0.9992325   0.35651144]
action based on polilcy:[-0.5887696   0.31277496]
action based on polilcy:[-0.98845446  0.72595495]
action based on polilcy:[-0.97073394  0.6

action based on polilcy:[-1.          0.49490649]
action based on polilcy:[-1.          0.33060354]
action based on polilcy:[-1.          0.36798033]
action based on polilcy:[-1.          0.31426948]
action based on polilcy:[-1.          0.38117012]
action based on polilcy:[-1.          0.33595362]
action based on polilcy:[-1.          0.37460163]
action based on polilcy:[-1.          0.57315093]
action based on polilcy:[-1.         0.4594324]
action based on polilcy:[-1.          0.32284737]
action based on polilcy:[-1.          0.42831504]
action based on polilcy:[-1.          0.42060956]
action based on polilcy:[-1.          0.44368416]
action based on polilcy:[-1.          0.34417918]
action based on polilcy:[-1.          0.38551807]
action based on polilcy:[-1.          0.41066816]
action based on polilcy:[-0.99999994  0.41520315]
action based on polilcy:[-1.          0.32925963]
action based on polilcy:[-1.          0.39842695]
action based on polilcy:[-1.          0.42090666]
ac

action based on polilcy:[-1.          0.48063946]
action based on polilcy:[-1.          0.41101143]
action based on polilcy:[-1.          0.42993438]
action based on polilcy:[-1.          0.54546857]
action based on polilcy:[-1.         0.3788177]
action based on polilcy:[-1.         0.4622827]
action based on polilcy:[-1.         0.4325573]
action based on polilcy:[-1.         0.4037284]
action based on polilcy:[-1.          0.42363366]
action based on polilcy:[-1.         0.4218389]
action based on polilcy:[-1.          0.39313996]
action based on polilcy:[-1.         0.4609407]
action based on polilcy:[-1.         0.4343097]
action based on polilcy:[-1.          0.32629395]
action based on polilcy:[-1.          0.56991804]
action based on polilcy:[-1.          0.37221843]
action based on polilcy:[-1.          0.38311642]
action based on polilcy:[-1.         0.4887798]
action based on polilcy:[-1.          0.35661963]
action based on polilcy:[-1.          0.42625645]
action based on 

action based on polilcy:[-1.         0.4440415]
action based on polilcy:[-1.          0.40640774]
action based on polilcy:[-0.99999994  0.41902366]
action based on polilcy:[-1.          0.69765735]
action based on polilcy:[-0.99999994  0.45691803]
action based on polilcy:[-1.          0.42152742]
action based on polilcy:[-0.9999998   0.42742914]
action based on polilcy:[-1.          0.50994396]
action based on polilcy:[-1.          0.43211898]
action based on polilcy:[-0.9999997   0.42424324]
action based on polilcy:[-0.9999998  0.4657613]
action based on polilcy:[-1.         0.5760156]
action based on polilcy:[-0.999999    0.42537868]
action based on polilcy:[-0.9998996   0.39651012]
action based on polilcy:[-0.9999619   0.42994806]
action based on polilcy:[-0.999999    0.42637756]
action based on polilcy:[-0.99999845  0.41899282]
action based on polilcy:[-0.99955475  0.38037828]
action based on polilcy:[-0.8088199   0.34414813]
Total T: 61788 Episode Num: 4228 Episode T: 19 Reward: -

action based on polilcy:[-1.          0.47760645]
action based on polilcy:[-1.         0.6639454]
action based on polilcy:[-0.9999999   0.46785653]
action based on polilcy:[-0.99999994  0.5084312 ]
action based on polilcy:[-1.         0.4936829]
action based on polilcy:[-1.          0.64102685]
action based on polilcy:[-0.9999992   0.46909216]
action based on polilcy:[-0.99999225  0.9183326 ]
action based on polilcy:[-0.5657388   0.40163848]
action based on polilcy:[-0.59729636  0.43280765]
action based on polilcy:[0.09866153 0.45742816]
Total T: 61958 Episode Num: 4242 Episode T: 11 Reward: -960.714808
action based on polilcy:[-1.         0.5077516]
action based on polilcy:[-1.         0.5175099]
action based on polilcy:[-1.          0.79819584]
action based on polilcy:[-0.9999992  0.8817024]
action based on polilcy:[-0.90577334  0.39471486]
action based on polilcy:[-0.9512833  0.3125867]
action based on polilcy:[0.1174242 0.6898743]
Total T: 61965 Episode Num: 4243 Episode T: 7 Rewar

action based on polilcy:[-1.         0.4872529]
action based on polilcy:[-1.          0.43389073]
action based on polilcy:[-1.          0.35671803]
action based on polilcy:[-1.         0.4145604]
action based on polilcy:[-1.          0.38675946]
action based on polilcy:[-1.          0.47088608]
action based on polilcy:[-1.         0.3501577]
action based on polilcy:[-1.         0.3469722]
action based on polilcy:[-1.         0.4088498]
action based on polilcy:[-0.9999996   0.37917042]
action based on polilcy:[-0.99994856  0.749059  ]
action based on polilcy:[-0.9860747  0.53714  ]
action based on polilcy:[-0.99985063  0.9737215 ]
action based on polilcy:[-0.6011975   0.39292285]
action based on polilcy:[-0.99672735  0.60469735]
action based on polilcy:[-0.6299603   0.66307545]
action based on polilcy:[-0.61081433  0.35371777]
action based on polilcy:[-0.72431016  0.32675946]
action based on polilcy:[-0.9929828   0.30126932]
Total T: 62628 Episode Num: 4270 Episode T: 19 Reward: -1253.8

action based on polilcy:[-1.         0.5263716]
action based on polilcy:[-1.         0.5450498]
action based on polilcy:[-0.99999994  0.57071495]
action based on polilcy:[-0.9999919  0.9228878]
action based on polilcy:[-0.9291689   0.46714053]
action based on polilcy:[-0.06571385  0.6009823 ]
action based on polilcy:[-0.02292988  0.74350035]
action based on polilcy:[0.4686327 0.6370413]
action based on polilcy:[0.9718398 0.7720747]
Total T: 62772 Episode Num: 4280 Episode T: 9 Reward: -682.557693
action based on polilcy:[-1.          0.49668673]
action based on polilcy:[-1.          0.33582848]
action based on polilcy:[-1.        0.357468]
action based on polilcy:[-1.          0.37518728]
action based on polilcy:[-1.       0.39064]
action based on polilcy:[-1.         0.4119784]
action based on polilcy:[-1.          0.39286238]
action based on polilcy:[-1.          0.36862603]
action based on polilcy:[-1.         0.5055026]
action based on polilcy:[-1.         0.7141917]
action based o

action based on polilcy:[-1.          0.52479863]
action based on polilcy:[-1.         0.4512669]
action based on polilcy:[-0.99999994  0.4971911 ]
action based on polilcy:[-0.99999994  0.44128808]
action based on polilcy:[-0.9999928  0.5002518]
action based on polilcy:[-0.95864666  0.48063746]
action based on polilcy:[-0.18007557  0.8273683 ]
action based on polilcy:[-0.97961795  0.35350427]
action based on polilcy:[-0.9881847   0.42187026]
action based on polilcy:[-0.9969651  0.5983285]
action based on polilcy:[-0.5642953   0.62800825]
action based on polilcy:[-0.9902851  0.3753637]
action based on polilcy:[-0.5312372  0.5070734]
Total T: 62930 Episode Num: 4290 Episode T: 13 Reward: -637.347512
action based on polilcy:[-1.          0.51763177]
action based on polilcy:[-1.          0.43809885]
action based on polilcy:[-0.999999   0.5489272]
action based on polilcy:[-0.9998543   0.80492103]
action based on polilcy:[-0.52888024  0.5351249 ]
action based on polilcy:[-0.84913456  0.46904

action based on polilcy:[-1.         0.5053543]
action based on polilcy:[-1.          0.46248415]
action based on polilcy:[-0.9999997  0.6313343]
action based on polilcy:[-0.99804634  0.7983384 ]
action based on polilcy:[-0.17176451  0.77748764]
action based on polilcy:[0.42416126 0.9712936 ]
action based on polilcy:[0.25864246 0.96729076]
Total T: 63095 Episode Num: 4304 Episode T: 7 Reward: -727.733015
action based on polilcy:[-1.         0.5295601]
action based on polilcy:[-0.99999964  0.4473184 ]
action based on polilcy:[-0.9999956   0.99751186]
action based on polilcy:[-0.99989784  0.98729545]
action based on polilcy:[-0.51353514  0.79224175]
action based on polilcy:[0.28904098 0.7503274 ]
action based on polilcy:[0.519121 0.983648]
Total T: 63102 Episode Num: 4305 Episode T: 7 Reward: -802.670554
action based on polilcy:[-1.          0.52659154]
action based on polilcy:[-1.         0.4698618]
action based on polilcy:[-0.99999994  0.53276515]
action based on polilcy:[-0.99997836  

action based on polilcy:[-0.9999652  0.5468197]
action based on polilcy:[-0.9736726   0.56807023]
action based on polilcy:[-0.41846558  0.7959055 ]
action based on polilcy:[-0.7420118  0.9912957]
action based on polilcy:[-0.48023456  0.96473205]
action based on polilcy:[0.14060682 0.83400595]
action based on polilcy:[0.06693514 0.96984804]
action based on polilcy:[-0.5688542  0.9982496]
action based on polilcy:[0.5634282  0.76124835]
Total T: 63260 Episode Num: 4321 Episode T: 9 Reward: -931.576598
action based on polilcy:[-0.9998963  0.5454786]
action based on polilcy:[-0.9999585   0.48588604]
action based on polilcy:[-0.99999595  0.50797975]
action based on polilcy:[-1.         0.4015993]
action based on polilcy:[-1.          0.42894363]
action based on polilcy:[-1.          0.41734487]
action based on polilcy:[-1.         0.5745605]
action based on polilcy:[-1.          0.42101902]
action based on polilcy:[-1.         0.5765073]
action based on polilcy:[-0.99994725  0.70774055]
acti

action based on polilcy:[-0.23029882  0.9892423 ]
action based on polilcy:[-0.21972564  0.8437567 ]
action based on polilcy:[-0.98361117  1.        ]
action based on polilcy:[0.0652573  0.75508624]
action based on polilcy:[-0.10612081  0.99015176]
action based on polilcy:[0.45404387 0.9154383 ]
action based on polilcy:[0.731333   0.99799424]
Total T: 63420 Episode Num: 4338 Episode T: 7 Reward: -672.754369
action based on polilcy:[-0.24480109  0.98897636]
action based on polilcy:[-0.0953094  0.8940907]
action based on polilcy:[-0.8032328  0.9999992]
action based on polilcy:[-0.18746561  0.9497792 ]
action based on polilcy:[-0.64952    0.9999931]
action based on polilcy:[-0.06320168  0.7848666 ]
action based on polilcy:[-0.4029333  0.9985575]
action based on polilcy:[0.31761003 0.7249607 ]
action based on polilcy:[0.84887075 0.8667767 ]
action based on polilcy:[0.9848566 0.9813312]
Total T: 63430 Episode Num: 4339 Episode T: 10 Reward: -992.592199
action based on polilcy:[-0.23727754  0

action based on polilcy:[-0.25072977  0.9960816 ]
action based on polilcy:[-0.2034666  0.9999276]
action based on polilcy:[-0.0596375  0.9118159]
action based on polilcy:[0.04083477 0.9641474 ]
action based on polilcy:[-0.05512307  0.9981801 ]
action based on polilcy:[0.4903451 0.8571838]
action based on polilcy:[0.7590956  0.99732757]
Total T: 63581 Episode Num: 4359 Episode T: 7 Reward: -825.641936
action based on polilcy:[-0.2612644  0.9965983]
action based on polilcy:[-0.18220773  0.9998036 ]
action based on polilcy:[-0.5407096  0.9998248]
action based on polilcy:[0.14387017 0.6942061 ]
action based on polilcy:[0.45836124 0.8730998 ]
action based on polilcy:[0.2557651  0.60604966]
action based on polilcy:[0.53900325 0.6199784 ]
action based on polilcy:[0.74883235 0.99481803]
Total T: 63589 Episode Num: 4360 Episode T: 8 Reward: -704.685656
action based on polilcy:[-0.26833382  0.99687296]
action based on polilcy:[-0.21175148  0.9854709 ]
action based on polilcy:[-0.21735875  0.9902

action based on polilcy:[-0.28157762  0.9978545 ]
action based on polilcy:[-0.3642009  0.9999245]
action based on polilcy:[-0.10444625  0.73646724]
action based on polilcy:[-0.99424905  1.        ]
action based on polilcy:[0.0721499 0.6867026]
action based on polilcy:[-0.37878478  0.99992424]
action based on polilcy:[0.31459197 0.58205545]
Total T: 63732 Episode Num: 4377 Episode T: 7 Reward: -830.366324
action based on polilcy:[-0.24695088  0.9983263 ]
action based on polilcy:[-0.16661018  0.7874556 ]
action based on polilcy:[-0.05602768  0.805778  ]
action based on polilcy:[-0.9798163   0.99999994]
action based on polilcy:[-0.04050067  0.99566126]
action based on polilcy:[0.5063766  0.72928727]
action based on polilcy:[0.9608072 0.9926648]
Total T: 63739 Episode Num: 4378 Episode T: 7 Reward: -765.573562
action based on polilcy:[-0.2551056  0.9987015]
action based on polilcy:[-0.26123968  0.99782187]
action based on polilcy:[-0.01735125  0.7472149 ]
action based on polilcy:[-0.550863

action based on polilcy:[-0.32253417  0.9988416 ]
action based on polilcy:[-0.40300685  0.999998  ]
action based on polilcy:[-0.12623987  0.86794984]
action based on polilcy:[-0.91745853  0.99999976]
action based on polilcy:[-0.07437638  0.7227391 ]
action based on polilcy:[-0.9438964   0.99999976]
action based on polilcy:[-0.03699813  0.9748754 ]
action based on polilcy:[0.1353407 0.6118206]
action based on polilcy:[0.2536162  0.80072284]
action based on polilcy:[-0.4036537  0.9999474]
Total T: 63881 Episode Num: 4395 Episode T: 10 Reward: -1114.526105
action based on polilcy:[-0.3273763  0.9990167]
action based on polilcy:[-0.83132267  0.99999994]
action based on polilcy:[-0.04762584  0.7448522 ]
action based on polilcy:[-0.9149568   0.99999917]
action based on polilcy:[0.1334223 0.8160255]
action based on polilcy:[0.09082142 0.673864  ]
action based on polilcy:[0.34241655 0.9871134 ]
Total T: 63888 Episode Num: 4396 Episode T: 7 Reward: -816.398655
action based on polilcy:[-0.323834

Total T: 64408 Episode Num: 4421 Episode T: 49 Reward: -7413.012199
Total T: 64418 Episode Num: 4422 Episode T: 10 Reward: -1362.293196
Total T: 64468 Episode Num: 4423 Episode T: 50 Reward: -6322.730859
action based on polilcy:[-0.9999991  0.6656133]
action based on polilcy:[-0.9979499  0.819121 ]
action based on polilcy:[-0.29326183  0.85404336]
action based on polilcy:[-0.99999976  0.72862726]
action based on polilcy:[0.7065802 0.8878475]
action based on polilcy:[-0.8959679   0.98841095]
action based on polilcy:[0.9990621 0.9139338]
action based on polilcy:[0.824174 1.      ]
action based on polilcy:[0.9991821  0.99999994]
action based on polilcy:[0.99855274 0.9999999 ]
action based on polilcy:[0.99934125 0.9800348 ]
action based on polilcy:[0.99996   0.9161101]
action based on polilcy:[0.999996  0.7831222]
action based on polilcy:[0.9999893 0.765869 ]
action based on polilcy:[0.99999934 0.9878239 ]
Total T: 64515 Episode Num: 4424 Episode T: 47 Reward: -7154.065046
action based on 

action based on polilcy:[-0.4318811  0.9996218]
action based on polilcy:[-0.16755031  0.9609037 ]
action based on polilcy:[-0.24058384  0.99612427]
action based on polilcy:[-0.15245804  0.9801044 ]
action based on polilcy:[-0.2767597  0.9997348]
action based on polilcy:[-0.28951177  0.99999523]
action based on polilcy:[-0.22878923  0.7789888 ]
action based on polilcy:[-0.1200425   0.72576165]
action based on polilcy:[-0.9975546  1.       ]
action based on polilcy:[-0.02493875  0.66756314]
action based on polilcy:[-0.9894217  1.       ]
action based on polilcy:[-4.1634587e-04  6.8093145e-01]
action based on polilcy:[0.3267203  0.88963205]
Total T: 64664 Episode Num: 4442 Episode T: 13 Reward: -1520.920441
action based on polilcy:[-0.450354   0.9994212]
action based on polilcy:[-0.25977266  0.9998862 ]
action based on polilcy:[-0.12540807  0.7590451 ]
action based on polilcy:[-0.9489781  0.9999999]
action based on polilcy:[0.10236558 0.63189596]
action based on polilcy:[-0.63292646  0.99

action based on polilcy:[-0.47875217  0.9995286 ]
action based on polilcy:[-0.25474158  0.8075469 ]
action based on polilcy:[-0.5417582  0.9999913]
action based on polilcy:[-0.20196718  0.93285984]
action based on polilcy:[0.02653231 0.6770256 ]
action based on polilcy:[0.0165781  0.66041565]
action based on polilcy:[0.10599211 0.8221683 ]
action based on polilcy:[0.23012406 0.56728494]
Total T: 64808 Episode Num: 4460 Episode T: 8 Reward: -539.287700
action based on polilcy:[-0.45630583  0.9995906 ]
action based on polilcy:[-0.31679025  0.94558114]
action based on polilcy:[-0.64151657  1.        ]
action based on polilcy:[-0.22296922  0.80457115]
action based on polilcy:[-0.22750306  0.9999584 ]
action based on polilcy:[-0.829705  1.      ]
action based on polilcy:[-0.18263742  0.76142275]
action based on polilcy:[-0.11732998  0.7527698 ]
action based on polilcy:[-0.9978187  1.       ]
action based on polilcy:[-0.01885369  0.89666426]
action based on polilcy:[-0.04952721  0.9939825 ]


Total T: 64948 Episode Num: 4476 Episode T: 10 Reward: -862.146062
action based on polilcy:[-0.41681772  0.9997167 ]
action based on polilcy:[-0.30422887  0.8863263 ]
action based on polilcy:[-0.15935132  0.7740357 ]
action based on polilcy:[-0.6673647  0.999926 ]
action based on polilcy:[-0.19952346  0.98570913]
action based on polilcy:[-0.32889193  0.99998534]
action based on polilcy:[0.02141598 0.67296904]
action based on polilcy:[0.01503092 0.8000209 ]
action based on polilcy:[-0.96062255  0.99999994]
Total T: 64957 Episode Num: 4477 Episode T: 9 Reward: -891.988800
action based on polilcy:[-0.42453855  0.99963677]
action based on polilcy:[-0.55588543  0.99999994]
action based on polilcy:[-0.39090574  0.99791473]
action based on polilcy:[-0.9390172  0.7611275]
action based on polilcy:[-0.21640512  0.9681049 ]
action based on polilcy:[-0.28840274  0.8643378 ]
action based on polilcy:[-0.33005273  0.9984758 ]
action based on polilcy:[-0.2636215   0.79459894]
action based on polilcy:[

action based on polilcy:[-0.42590007  0.9997512 ]
action based on polilcy:[-0.2901583  0.9701259]
action based on polilcy:[-0.1126157  0.7918644]
action based on polilcy:[-0.48874825  0.99994886]
action based on polilcy:[-0.4299462   0.99924856]
action based on polilcy:[0.04143818 0.71336186]
action based on polilcy:[-0.8383695  0.9999971]
action based on polilcy:[0.03934483 0.776259  ]
action based on polilcy:[0.07866849 0.8093149 ]
Total T: 65113 Episode Num: 4494 Episode T: 9 Reward: -771.956678
action based on polilcy:[-0.4293872  0.9997909]
action based on polilcy:[-0.60261995  1.        ]
action based on polilcy:[-0.31076398  0.99996996]
action based on polilcy:[-0.16979861  0.76646227]
action based on polilcy:[-0.01873094  0.7078062 ]
action based on polilcy:[-0.57430875  0.9998218 ]
action based on polilcy:[-0.12756465  0.7364459 ]
action based on polilcy:[-0.9897122  1.       ]
action based on polilcy:[-0.0406854  0.7856469]
action based on polilcy:[0.28785855 0.99998987]
Tota

action based on polilcy:[-0.3535143  0.99985  ]
action based on polilcy:[-0.69098794  1.        ]
action based on polilcy:[-0.8804388  1.       ]
action based on polilcy:[-0.13443297  0.77472687]
action based on polilcy:[-0.25721428  0.9968687 ]
action based on polilcy:[-0.0927837   0.99936724]
action based on polilcy:[-0.2641488  0.9996391]
action based on polilcy:[-0.01055912  0.7223145 ]
Total T: 65268 Episode Num: 4511 Episode T: 8 Reward: -1051.868843
action based on polilcy:[-0.38885018  0.9998328 ]
action based on polilcy:[-0.33341753  0.99620885]
action based on polilcy:[-0.31706226  0.9908422 ]
action based on polilcy:[-0.03222384  0.71346104]
action based on polilcy:[-0.01586218  0.68247586]
action based on polilcy:[-0.18561223  0.9251604 ]
action based on polilcy:[0.20173436 0.8461081 ]
action based on polilcy:[0.09417386 0.6565174 ]
Total T: 65276 Episode Num: 4512 Episode T: 8 Reward: -659.195727
action based on polilcy:[-0.3609388  0.9998141]
action based on polilcy:[-0.5

action based on polilcy:[-0.41715395  0.9998423 ]
action based on polilcy:[-0.30296817  0.99982685]
action based on polilcy:[-0.36761233  0.9999994 ]
action based on polilcy:[-0.11528985  0.99953276]
action based on polilcy:[-0.20347737  0.8423687 ]
action based on polilcy:[-0.9901911  1.       ]
action based on polilcy:[-0.07186792  0.87189794]
action based on polilcy:[0.07217342 0.6369082 ]
action based on polilcy:[0.04388239 0.651664  ]
action based on polilcy:[0.53778887 0.9999468 ]
Total T: 65432 Episode Num: 4528 Episode T: 10 Reward: -1151.347993
action based on polilcy:[-0.40567604  0.99985135]
action based on polilcy:[-0.302828   0.9974026]
action based on polilcy:[-0.36944115  0.9998515 ]
action based on polilcy:[-0.08903206  0.745496  ]
action based on polilcy:[-0.972435  1.      ]
action based on polilcy:[0.20775892 0.61141646]
action based on polilcy:[0.38118517 0.65973103]
Total T: 65439 Episode Num: 4529 Episode T: 7 Reward: -603.501219
action based on polilcy:[-0.398468

action based on polilcy:[-0.42595506  0.99987894]
action based on polilcy:[-0.3047379   0.99701643]
action based on polilcy:[-0.10762539  0.74910176]
action based on polilcy:[-0.96167654  1.        ]
action based on polilcy:[-0.9420604  1.       ]
action based on polilcy:[0.04360521 0.8933253 ]
action based on polilcy:[0.4068603  0.82573956]
Total T: 65581 Episode Num: 4546 Episode T: 7 Reward: -773.677947
action based on polilcy:[-0.45302656  0.999891  ]
action based on polilcy:[-0.23207688  0.80384403]
action based on polilcy:[-0.85285735  1.        ]
action based on polilcy:[-0.19169885  0.7875428 ]
action based on polilcy:[-0.14901577  0.7699443 ]
action based on polilcy:[-0.33649403  0.9997106 ]
action based on polilcy:[-0.13985427  0.8408388 ]
action based on polilcy:[-0.04278223  0.8292693 ]
action based on polilcy:[-0.33215746  0.9998795 ]
action based on polilcy:[-0.9957938  1.       ]
action based on polilcy:[-0.03275082  0.88627857]
action based on polilcy:[0.1025251 0.92596

action based on polilcy:[-0.42494687  0.9999328 ]
action based on polilcy:[-0.602656  1.      ]
action based on polilcy:[-0.9967079  1.       ]
action based on polilcy:[-0.70815945  0.9999929 ]
action based on polilcy:[-0.27922577  0.9996457 ]
action based on polilcy:[0.07748952 0.7916951 ]
action based on polilcy:[0.3062225 0.9197284]
Total T: 65728 Episode Num: 4563 Episode T: 7 Reward: -968.799507
action based on polilcy:[-0.44977057  0.9999224 ]
action based on polilcy:[-0.23535056  0.8062817 ]
action based on polilcy:[-0.9297898  1.       ]
action based on polilcy:[-0.09644072  0.8842237 ]
action based on polilcy:[-0.4819342  0.9999985]
action based on polilcy:[-0.28388983  0.9995402 ]
action based on polilcy:[0.04219046 0.99963886]
Total T: 65735 Episode Num: 4564 Episode T: 7 Reward: -818.499315
action based on polilcy:[-0.4176002  0.9999172]
action based on polilcy:[-0.27425337  0.85106045]
action based on polilcy:[-0.1300752  0.7509704]
action based on polilcy:[0.01516643 0.69

Total T: 65873 Episode Num: 4579 Episode T: 8 Reward: -619.840610
action based on polilcy:[-0.40162036  0.9999371 ]
action based on polilcy:[-0.72182107  1.        ]
action based on polilcy:[-0.9828031  1.       ]
action based on polilcy:[0.06691916 0.6964598 ]
action based on polilcy:[-0.14669478  1.        ]
action based on polilcy:[0.160321  0.6720545]
action based on polilcy:[0.04119397 0.7745578 ]
Total T: 65880 Episode Num: 4580 Episode T: 7 Reward: -879.961908
action based on polilcy:[-0.33707556  0.9999447 ]
action based on polilcy:[-0.255703    0.99969935]
action based on polilcy:[-0.402737    0.99999994]
action based on polilcy:[-0.2957612  0.9992835]
action based on polilcy:[-0.03269722  0.73336434]
action based on polilcy:[-0.99726856  1.        ]
action based on polilcy:[0.01731923 0.71291494]
action based on polilcy:[-0.10567032  0.999976  ]
action based on polilcy:[0.2591472  0.59062386]
action based on polilcy:[0.4468997  0.70048976]
Total T: 65890 Episode Num: 4581 Epi

Total T: 66416 Episode Num: 4607 Episode T: 52 Reward: -7407.635717
Total T: 66466 Episode Num: 4608 Episode T: 50 Reward: -6926.822787
Total T: 66491 Episode Num: 4609 Episode T: 25 Reward: -2830.844383
action based on polilcy:[-0.20094495  0.99997306]
action based on polilcy:[-0.155096    0.75165933]
action based on polilcy:[-0.9954742  1.       ]
action based on polilcy:[0.0489075 0.6730908]
action based on polilcy:[0.02738657 0.7579679 ]
action based on polilcy:[-0.3790075  0.9999899]
action based on polilcy:[-0.735574   0.9999991]
Total T: 66507 Episode Num: 4610 Episode T: 16 Reward: -1906.297721
action based on polilcy:[-0.33703265  0.99991226]
action based on polilcy:[-0.22457545  0.99994963]
action based on polilcy:[-0.2021927  0.8584943]
action based on polilcy:[-0.9899018  1.       ]
action based on polilcy:[-0.07858993  0.7552722 ]
action based on polilcy:[-0.336297   0.9995289]
action based on polilcy:[-0.4564537   0.99997526]
action based on polilcy:[-0.985083  1.      ]


action based on polilcy:[-0.30682865  0.9999159 ]
action based on polilcy:[-0.20418055  0.8375496 ]
action based on polilcy:[-0.23170759  0.98431903]
action based on polilcy:[-0.87689936  0.99999994]
action based on polilcy:[-0.01785042  0.85897636]
action based on polilcy:[-0.461346    0.99994093]
action based on polilcy:[-0.0327788  0.883441 ]
action based on polilcy:[-0.9272023  1.       ]
action based on polilcy:[0.14555256 0.6569064 ]
action based on polilcy:[0.26820582 0.6365585 ]
Total T: 66673 Episode Num: 4628 Episode T: 10 Reward: -874.950555
action based on polilcy:[-0.29129308  0.99993366]
action based on polilcy:[-0.09992405  0.9183341 ]
action based on polilcy:[-0.16875584  0.99999994]
action based on polilcy:[-0.6433104  1.       ]
action based on polilcy:[-0.05134129  0.8490335 ]
action based on polilcy:[-0.03530762  0.91556036]
action based on polilcy:[-0.05147953  0.86713374]
action based on polilcy:[0.39588302 0.9999971 ]
Total T: 66681 Episode Num: 4629 Episode T: 8

action based on polilcy:[-0.25305283  0.99996287]
action based on polilcy:[-0.45350304  1.        ]
action based on polilcy:[-0.11874752  0.83956695]
action based on polilcy:[-0.99825186  1.        ]
action based on polilcy:[-0.6163354  1.       ]
action based on polilcy:[-0.1620217  0.9999918]
action based on polilcy:[-0.12097551  0.9893145 ]
action based on polilcy:[-0.2047367  0.8427595]
action based on polilcy:[-0.9903283  1.       ]
action based on polilcy:[0.00228107 0.75150955]
action based on polilcy:[0.12797703 0.671108  ]
action based on polilcy:[0.34494716 0.9995086 ]
Total T: 66835 Episode Num: 4648 Episode T: 12 Reward: -1288.184236
action based on polilcy:[-0.25951713  0.9999582 ]
action based on polilcy:[-0.96780854  1.        ]
action based on polilcy:[-0.11956008  0.79622495]
action based on polilcy:[-0.42461213  0.9997825 ]
action based on polilcy:[0.03262013 0.745478  ]
action based on polilcy:[-0.7351172  1.       ]
action based on polilcy:[0.20985252 0.6325775 ]
ac

action based on polilcy:[-0.21870749  0.9999629 ]
action based on polilcy:[-0.24488196  0.9999808 ]
action based on polilcy:[-0.9981261  1.       ]
action based on polilcy:[0.02104813 0.85581493]
action based on polilcy:[0.11856021 0.99967784]
action based on polilcy:[0.21915577 0.68994975]
Total T: 66988 Episode Num: 4668 Episode T: 6 Reward: -692.205562
action based on polilcy:[-0.20638429  0.9999665 ]
action based on polilcy:[-0.19943026  0.8558341 ]
action based on polilcy:[-0.14543483  0.8490727 ]
action based on polilcy:[-0.30076766  1.        ]
action based on polilcy:[-0.1775371  0.8283241]
action based on polilcy:[-0.01807196  0.7785995 ]
action based on polilcy:[-0.03294831  0.91228503]
action based on polilcy:[0.06978196 0.79496735]
action based on polilcy:[0.34027356 1.        ]
Total T: 66997 Episode Num: 4669 Episode T: 9 Reward: -847.230168
action based on polilcy:[-0.2669023   0.99996686]
action based on polilcy:[-0.16908039  0.9999992 ]
action based on polilcy:[-0.3229

action based on polilcy:[-0.19884646  0.9999657 ]
action based on polilcy:[-0.2748575  0.999962 ]
action based on polilcy:[-0.08911373  0.77515715]
action based on polilcy:[0.02598834 0.87834245]
action based on polilcy:[-0.17866653  0.9999996 ]
action based on polilcy:[-0.9970494  1.       ]
action based on polilcy:[0.14304091 0.68525636]
Total T: 67132 Episode Num: 4686 Episode T: 7 Reward: -806.056760
action based on polilcy:[-0.2548657   0.99996346]
action based on polilcy:[-0.08497176  0.95756453]
action based on polilcy:[-0.19854821  0.99999994]
action based on polilcy:[-0.14692114  0.8713296 ]
action based on polilcy:[-0.9276044  1.       ]
action based on polilcy:[-0.6974052  1.       ]
action based on polilcy:[-0.4775703  0.9999998]
action based on polilcy:[-0.9342981  1.       ]
action based on polilcy:[-0.03927046  0.74682117]
action based on polilcy:[0.02328264 0.71058476]
action based on polilcy:[0.0765738  0.83174133]
action based on polilcy:[0.4715096 0.8491195]
Total T:

action based on polilcy:[-0.232776   0.9999755]
action based on polilcy:[-0.9997435   0.28894293]
action based on polilcy:[-0.9976916   0.25497326]
action based on polilcy:[-0.13430442  0.88877255]
action based on polilcy:[-0.19887201  0.98422325]
action based on polilcy:[-0.22761235  0.9992411 ]
action based on polilcy:[-0.10787772  0.99979687]
action based on polilcy:[-0.61288375  1.        ]
action based on polilcy:[0.2815071  0.61894643]
action based on polilcy:[0.3576349  0.59197676]
Total T: 67297 Episode Num: 4703 Episode T: 10 Reward: -1021.129869
action based on polilcy:[-0.24768123  0.99997795]
action based on polilcy:[-0.27556542  0.99998605]
action based on polilcy:[-0.9896521  1.       ]
action based on polilcy:[-0.03739916  0.7696599 ]
action based on polilcy:[-0.41970336  0.99991894]
action based on polilcy:[-0.97982484  1.        ]
action based on polilcy:[0.10848604 0.7377905 ]
action based on polilcy:[0.12245609 0.8195317 ]
action based on polilcy:[0.11284435 0.958240

action based on polilcy:[-0.31358287  0.9999818 ]
action based on polilcy:[-0.37219185  1.        ]
action based on polilcy:[-0.02319447  0.7834954 ]
action based on polilcy:[0.11819261 0.72198427]
action based on polilcy:[0.24013904 1.        ]
action based on polilcy:[-0.03066248  1.        ]
Total T: 67459 Episode Num: 4723 Episode T: 6 Reward: -720.058599
action based on polilcy:[-0.30623436  0.99998313]
action based on polilcy:[-0.9827095  1.       ]
action based on polilcy:[-0.07597984  0.91390276]
action based on polilcy:[-0.13274726  0.9132347 ]
action based on polilcy:[-0.9984113  1.       ]
action based on polilcy:[0.08835735 0.7066941 ]
action based on polilcy:[-0.03882079  0.992503  ]
action based on polilcy:[0.01782948 0.9528207 ]
action based on polilcy:[-0.6988888  1.       ]
Total T: 67468 Episode Num: 4724 Episode T: 9 Reward: -994.130153
action based on polilcy:[-0.32049963  0.9999878 ]
action based on polilcy:[-0.19694264  0.99999994]
action based on polilcy:[-0.2043

action based on polilcy:[-0.3394285   0.99999154]
action based on polilcy:[-0.24819681  0.9999757 ]
action based on polilcy:[-0.07878201  0.81535447]
action based on polilcy:[-0.8576133  1.       ]
action based on polilcy:[0.13356516 0.709507  ]
action based on polilcy:[-0.30897173  0.99999845]
action based on polilcy:[0.17479986 0.8823008 ]
action based on polilcy:[-0.14729716  0.9999541 ]
action based on polilcy:[0.10720312 0.73382527]
action based on polilcy:[0.2547186  0.66197115]
Total T: 67626 Episode Num: 4743 Episode T: 10 Reward: -843.895307
action based on polilcy:[-0.32572183  0.99999225]
action based on polilcy:[-0.24605462  0.9999737 ]
action based on polilcy:[-0.26251155  0.9999992 ]
action based on polilcy:[-0.9736478  1.       ]
action based on polilcy:[-0.99887633  1.        ]
action based on polilcy:[-0.96173406  1.        ]
action based on polilcy:[-0.02094156  0.8798392 ]
action based on polilcy:[-0.03459825  0.9946624 ]
action based on polilcy:[-0.99935365  1.     

action based on polilcy:[-0.33281866  0.99999315]
action based on polilcy:[-0.20888712  0.9999813 ]
action based on polilcy:[-0.17852417  0.99959505]
action based on polilcy:[-0.99944377  1.        ]
action based on polilcy:[0.09653569 0.74384165]
action based on polilcy:[0.21408263 0.71853113]
action based on polilcy:[0.26937482 1.        ]
Total T: 67775 Episode Num: 4760 Episode T: 7 Reward: -804.876059
action based on polilcy:[-0.38204336  0.9999897 ]
action based on polilcy:[-0.9216717  1.       ]
action based on polilcy:[-0.5479033  1.       ]
action based on polilcy:[-0.9986504   0.32763603]
action based on polilcy:[-0.02261914  0.95687157]
action based on polilcy:[-0.26554492  0.83634126]
action based on polilcy:[-0.16914864  0.8525835 ]
action based on polilcy:[-0.12098242  0.7855041 ]
action based on polilcy:[-0.57274354  0.9999998 ]
action based on polilcy:[0.14399642 0.7459811 ]
action based on polilcy:[0.3026111  0.62471664]
action based on polilcy:[0.767444  0.8010211]
To

action based on polilcy:[-0.33752605  0.9999928 ]
action based on polilcy:[-0.20991348  1.        ]
action based on polilcy:[-0.99972004  1.        ]
action based on polilcy:[0.15170401 0.8574953 ]
action based on polilcy:[0.12030316 0.9846823 ]
action based on polilcy:[0.41300273 0.99999994]
Total T: 67925 Episode Num: 4775 Episode T: 6 Reward: -831.281120
action based on polilcy:[-0.35957667  0.99999386]
action based on polilcy:[-0.41463387  1.        ]
action based on polilcy:[-0.04611646  0.79447055]
action based on polilcy:[0.04445885 0.9253028 ]
action based on polilcy:[-0.07779323  0.9993313 ]
action based on polilcy:[-0.17889656  1.        ]
action based on polilcy:[-0.8387381  1.       ]
Total T: 67932 Episode Num: 4776 Episode T: 7 Reward: -898.755190
action based on polilcy:[-0.3513892   0.99999326]
action based on polilcy:[-0.22165316  0.9990625 ]
action based on polilcy:[-0.99986243  1.        ]
action based on polilcy:[0.1228985 0.9144485]
action based on polilcy:[-0.0607

action based on polilcy:[-0.40327546  0.99999726]
action based on polilcy:[-0.13943766  0.99901783]
action based on polilcy:[-0.99210036  1.        ]
action based on polilcy:[0.1972867 0.7543679]
action based on polilcy:[-0.00534678  0.99947363]
action based on polilcy:[-0.18690431  0.9999853 ]
action based on polilcy:[0.02099191 0.999984  ]
action based on polilcy:[-0.7930025  1.       ]
action based on polilcy:[0.28244084 0.7104465 ]
Total T: 68551 Episode Num: 4809 Episode T: 9 Reward: -827.832881
action based on polilcy:[-0.40810323  0.99999833]
action based on polilcy:[-0.9816469  1.       ]
action based on polilcy:[-0.57430243  0.9999999 ]
action based on polilcy:[0.114627  0.8033377]
action based on polilcy:[0.20437855 0.9084034 ]
action based on polilcy:[-0.9245746  1.       ]
action based on polilcy:[0.1826752 0.7638216]
action based on polilcy:[0.10097472 1.        ]
Total T: 68559 Episode Num: 4810 Episode T: 8 Reward: -954.273710
action based on polilcy:[-0.4043761   0.9999

action based on polilcy:[-0.4284811  0.9999986]
action based on polilcy:[-0.22517477  1.        ]
action based on polilcy:[-0.49694887  0.99999994]
action based on polilcy:[0.21256529 0.91236925]
action based on polilcy:[-0.8344955  1.       ]
action based on polilcy:[0.41108578 0.72389376]
action based on polilcy:[0.3623655 0.8397652]
Total T: 68714 Episode Num: 4829 Episode T: 7 Reward: -746.899703
action based on polilcy:[-0.40052712  0.99999845]
action based on polilcy:[-0.21077283  0.9126942 ]
action based on polilcy:[-0.11374895  0.9604197 ]
action based on polilcy:[-0.99973917  1.        ]
action based on polilcy:[0.23720832 0.83816826]
action based on polilcy:[-0.7980275  1.       ]
action based on polilcy:[0.3394161 0.7764868]
action based on polilcy:[-0.22208822  1.        ]
action based on polilcy:[0.4235266  0.71525896]
Total T: 68723 Episode Num: 4830 Episode T: 9 Reward: -958.934392
action based on polilcy:[-0.42268646  0.99999845]
action based on polilcy:[-0.17960352  0.

action based on polilcy:[-0.99943894  1.        ]
action based on polilcy:[0.41509107 0.8141238 ]
action based on polilcy:[0.4425259 0.7616863]
Total T: 68876 Episode Num: 4849 Episode T: 10 Reward: -1106.509198
action based on polilcy:[-0.4023798   0.99999934]
action based on polilcy:[0.02307947 0.98284775]
action based on polilcy:[-0.99992716  1.        ]
action based on polilcy:[-0.99715686  1.        ]
action based on polilcy:[0.0296526 0.9024516]
action based on polilcy:[-0.10806221  1.        ]
action based on polilcy:[0.2693523  0.89236015]
action based on polilcy:[-0.9925988  1.       ]
action based on polilcy:[0.2746303 0.9999723]
Total T: 68885 Episode Num: 4850 Episode T: 9 Reward: -1189.461151
action based on polilcy:[-0.40874633  0.9999993 ]
action based on polilcy:[-0.18913975  1.        ]
action based on polilcy:[-0.6905191  1.       ]
action based on polilcy:[0.33507934 0.86735094]
action based on polilcy:[0.1190982  0.99990165]
action based on polilcy:[0.49574795 0.986

action based on polilcy:[-0.97978157  1.        ]
action based on polilcy:[-0.9997586  1.       ]
action based on polilcy:[0.35216406 0.8788652 ]
action based on polilcy:[-0.04431603  0.9999957 ]
action based on polilcy:[-0.22091235  1.        ]
action based on polilcy:[0.44906184 0.73841006]
Total T: 69031 Episode Num: 4870 Episode T: 10 Reward: -1091.217794
action based on polilcy:[-0.38749555  0.9999997 ]
action based on polilcy:[-0.04320718  0.93878305]
action based on polilcy:[-0.37748745  0.9999997 ]
action based on polilcy:[-0.31679592  1.        ]
action based on polilcy:[-0.5849204  1.       ]
action based on polilcy:[0.27055988 0.9504462 ]
action based on polilcy:[0.3065188 0.8332757]
action based on polilcy:[0.4279084 1.       ]
Total T: 69039 Episode Num: 4871 Episode T: 8 Reward: -965.745854
action based on polilcy:[-0.4015018   0.99999964]
action based on polilcy:[-0.99971116  1.        ]
action based on polilcy:[0.3081105 0.9181231]
action based on polilcy:[-0.05662113  

action based on polilcy:[-0.40134493  0.99999976]
action based on polilcy:[-0.12776664  0.9977017 ]
action based on polilcy:[0.08294849 0.9450415 ]
action based on polilcy:[0.26758385 0.9410518 ]
action based on polilcy:[0.03453125 0.99971503]
action based on polilcy:[-0.00156065  0.99999136]
action based on polilcy:[0.4298228  0.82978076]
action based on polilcy:[0.3623393  0.79203594]
action based on polilcy:[0.48830557 0.7032089 ]
action based on polilcy:[0.7649896  0.83721757]
Total T: 69197 Episode Num: 4891 Episode T: 10 Reward: -560.978858
action based on polilcy:[-0.39527044  0.9999997 ]
action based on polilcy:[0.03572924 0.93062216]
action based on polilcy:[-0.92401034  1.        ]
action based on polilcy:[0.2613556 0.9354491]
action based on polilcy:[0.3677066 0.8980557]
action based on polilcy:[-0.99880993  1.        ]
action based on polilcy:[0.41774774 0.83688337]
action based on polilcy:[-0.9614203  1.       ]
action based on polilcy:[0.40847364 0.8117231 ]
action based 

action based on polilcy:[-0.38140705  0.99999976]
action based on polilcy:[0.16265975 0.95789474]
action based on polilcy:[-0.18358679  0.9999973 ]
action based on polilcy:[-0.999993  1.      ]
action based on polilcy:[-0.99995995  1.        ]
action based on polilcy:[0.38610178 0.90864426]
action based on polilcy:[-0.9999927  1.       ]
action based on polilcy:[0.36968118 0.9092499 ]
action based on polilcy:[0.28480288 0.93044955]
Total T: 69357 Episode Num: 4911 Episode T: 9 Reward: -1118.876625
action based on polilcy:[-0.36230946  0.9999998 ]
action based on polilcy:[-0.9291829  1.       ]
action based on polilcy:[0.2997139 0.9476008]
action based on polilcy:[-0.71392184  1.        ]
action based on polilcy:[0.4444518 0.7738   ]
action based on polilcy:[0.42846572 0.99633366]
action based on polilcy:[0.36126298 0.9999085 ]
Total T: 69364 Episode Num: 4912 Episode T: 7 Reward: -793.127318
action based on polilcy:[-0.3743206  0.9999998]
action based on polilcy:[-0.9272026  1.       ]

action based on polilcy:[-0.38944572  0.9999998 ]
action based on polilcy:[-0.11233982  0.9999924 ]
action based on polilcy:[-0.9999002  1.       ]
action based on polilcy:[0.35133263 0.9367445 ]
action based on polilcy:[-0.99999523  1.        ]
action based on polilcy:[0.18344812 0.9857976 ]
action based on polilcy:[-0.8977322  1.       ]
action based on polilcy:[0.26840115 0.94973797]
action based on polilcy:[-0.9999246  1.       ]
action based on polilcy:[0.41790926 0.80353045]
action based on polilcy:[0.41157916 0.74918795]
Total T: 69529 Episode Num: 4932 Episode T: 11 Reward: -1200.635175
action based on polilcy:[-0.3789993   0.99999994]
action based on polilcy:[-0.9999808  1.       ]
action based on polilcy:[0.3732406  0.94142413]
action based on polilcy:[-0.9884915  1.       ]
action based on polilcy:[0.29398024 0.9785296 ]
action based on polilcy:[-0.70296526  1.        ]
action based on polilcy:[0.50542235 0.73928964]
Total T: 69536 Episode Num: 4933 Episode T: 7 Reward: -931

action based on polilcy:[-0.3805524   0.99999994]
action based on polilcy:[-0.99999154  1.        ]
action based on polilcy:[-0.99999243  1.        ]
action based on polilcy:[0.51618797 0.9381732 ]
action based on polilcy:[0.593395   0.85243905]
action based on polilcy:[0.71836615 1.        ]
Total T: 69693 Episode Num: 4952 Episode T: 6 Reward: -832.262721
action based on polilcy:[-0.38197675  0.99999994]
action based on polilcy:[-0.9999818  1.       ]
action based on polilcy:[0.21320954 0.9392028 ]
action based on polilcy:[0.35066926 0.9656911 ]
action based on polilcy:[-0.99849653  1.        ]
action based on polilcy:[0.61803156 0.8464468 ]
action based on polilcy:[0.8605485  0.96818405]
Total T: 69700 Episode Num: 4953 Episode T: 7 Reward: -764.602787
action based on polilcy:[-0.41084957  0.99999994]
action based on polilcy:[0.07335822 0.99912584]
action based on polilcy:[-0.9941245  1.       ]
action based on polilcy:[0.29166242 0.9102507 ]
action based on polilcy:[0.3045324 0.955

action based on polilcy:[-0.40003055  0.99999994]
action based on polilcy:[-0.92642176  1.        ]
action based on polilcy:[0.30162257 0.9605648 ]
action based on polilcy:[-0.9988705  1.       ]
action based on polilcy:[0.49432468 0.9612817 ]
action based on polilcy:[-0.62466294  1.        ]
action based on polilcy:[0.7583177  0.95148414]
Total T: 69859 Episode Num: 4973 Episode T: 7 Reward: -804.109156
action based on polilcy:[-0.38095295  0.99999994]
action based on polilcy:[-0.06958338  0.99950796]
action based on polilcy:[-0.99459946  1.        ]
action based on polilcy:[-0.6272161  1.       ]
action based on polilcy:[0.5923867 0.9984946]
action based on polilcy:[-0.5126956  1.       ]
action based on polilcy:[0.5721756 0.8569486]
action based on polilcy:[0.53607893 1.        ]
Total T: 69867 Episode Num: 4974 Episode T: 8 Reward: -809.735631
action based on polilcy:[-0.4050811   0.99999994]
action based on polilcy:[-0.99999774  1.        ]
action based on polilcy:[0.40663108 0.96

Total T: 70090 Episode Num: 4992 Episode T: 43 Reward: -7568.568493
Total T: 70104 Episode Num: 4993 Episode T: 14 Reward: -2292.143628
Total T: 70156 Episode Num: 4994 Episode T: 52 Reward: -6790.831137
Total T: 70203 Episode Num: 4995 Episode T: 47 Reward: -7337.995274
Total T: 70249 Episode Num: 4996 Episode T: 46 Reward: -7401.063974
Total T: 70263 Episode Num: 4997 Episode T: 14 Reward: -1811.422910
Total T: 70279 Episode Num: 4998 Episode T: 16 Reward: -2325.594509
Total T: 70328 Episode Num: 4999 Episode T: 49 Reward: -7054.706662
Total T: 70348 Episode Num: 5000 Episode T: 20 Reward: -2681.019984
Total T: 70393 Episode Num: 5001 Episode T: 45 Reward: -7693.832877
Total T: 70404 Episode Num: 5002 Episode T: 11 Reward: -1806.685420
Total T: 70434 Episode Num: 5003 Episode T: 30 Reward: -4818.717221
Total T: 70444 Episode Num: 5004 Episode T: 10 Reward: -1188.386905
Total T: 70492 Episode Num: 5005 Episode T: 48 Reward: -7273.541817
action based on polilcy:[-0.9999938  0.97139  ]


action based on polilcy:[-0.5328673  1.       ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.27574345  0.991067  ]
action based on polilcy:[-0.9997281  1.       ]
action based on polilcy:[-0.9008653  1.       ]
action based on polilcy:[0.37809098 0.9999999 ]
action based on polilcy:[0.36473295 0.9999999 ]
action based on polilcy:[0.26194984 1.        ]
action based on polilcy:[0.9827695 0.9998089]
Total T: 70639 Episode Num: 5022 Episode T: 9 Reward: -1092.938297
action based on polilcy:[-0.5484477  1.       ]
action based on polilcy:[-0.9637269  1.       ]
action based on polilcy:[0.25904295 0.9877159 ]
action based on polilcy:[0.55302644 0.9839624 ]
action based on polilcy:[0.26780528 0.9998356 ]
action based on polilcy:[-0.8702815  1.       ]
action based on polilcy:[0.33931655 1.        ]
action based on polilcy:[0.96708333 0.8648626 ]
Total T: 70647 Episode Num: 5023 Episode T: 8 Reward: -821.030105
action based on polilcy:[-0.5596828  1.       ]
action based on p

action based on polilcy:[-0.6199055  1.       ]
action based on polilcy:[-0.6601606  0.9999723]
action based on polilcy:[-1.         0.7799647]
action based on polilcy:[-0.21836914  0.9913221 ]
action based on polilcy:[-0.07538845  0.9877349 ]
action based on polilcy:[-0.9999974  1.       ]
action based on polilcy:[0.4313601  0.98006254]
action based on polilcy:[-0.44178176  1.        ]
action based on polilcy:[0.8351625  0.90153605]
action based on polilcy:[0.57845336 0.9999946 ]
action based on polilcy:[0.7749109 1.       ]
Total T: 70806 Episode Num: 5041 Episode T: 11 Reward: -1087.520102
action based on polilcy:[-0.81657386  1.        ]
action based on polilcy:[-0.5197967   0.99999976]
action based on polilcy:[0.1891761 0.9833447]
action based on polilcy:[-0.99839634  1.        ]
action based on polilcy:[0.04187842 0.87379086]
action based on polilcy:[-0.99999994  1.        ]
action based on polilcy:[-0.9999785  1.       ]
action based on polilcy:[-0.5594084  1.       ]
action bas

action based on polilcy:[-0.9987856  1.       ]
action based on polilcy:[0.12462738 0.98855937]
action based on polilcy:[-0.99990976  1.        ]
action based on polilcy:[0.7711675 0.9692935]
action based on polilcy:[0.42961383 1.        ]
action based on polilcy:[0.7310431 1.       ]
action based on polilcy:[0.97605133 0.999873  ]
Total T: 70963 Episode Num: 5060 Episode T: 7 Reward: -687.532289
action based on polilcy:[-0.9987173  1.       ]
action based on polilcy:[0.13220002 0.9853302 ]
action based on polilcy:[0.09570325 0.9999896 ]
action based on polilcy:[-0.99999964  1.        ]
action based on polilcy:[0.72962236 0.99962   ]
action based on polilcy:[0.5230576 1.       ]
action based on polilcy:[0.42051348 1.        ]
Total T: 70970 Episode Num: 5061 Episode T: 7 Reward: -890.445240
action based on polilcy:[-0.99849725  1.        ]
action based on polilcy:[0.1135529 0.9861305]
action based on polilcy:[-0.99564177  1.        ]
action based on polilcy:[-0.04596608  0.947943  ]
ac

action based on polilcy:[-0.9990957  1.       ]
action based on polilcy:[-0.3697719   0.89719987]
action based on polilcy:[-0.9816041  0.9954651]
action based on polilcy:[-0.9959243  1.       ]
action based on polilcy:[-0.87848026  1.        ]
action based on polilcy:[-0.99993867  1.        ]
action based on polilcy:[0.18164441 0.9341165 ]
action based on polilcy:[-0.09431992  1.        ]
action based on polilcy:[0.75371236 0.96756417]
action based on polilcy:[0.41739237 1.        ]
action based on polilcy:[0.99944156 1.        ]
Total T: 71132 Episode Num: 5081 Episode T: 11 Reward: -1068.731189
action based on polilcy:[-0.9993528  1.       ]
action based on polilcy:[-0.09255719  0.95825416]
action based on polilcy:[-0.999548  1.      ]
action based on polilcy:[-0.18034013  1.        ]
action based on polilcy:[-0.99956924  1.        ]
action based on polilcy:[0.94718647 0.94453263]
action based on polilcy:[0.82345474 1.        ]
action based on polilcy:[0.9980938 1.       ]
Total T: 7

action based on polilcy:[-0.99954444  1.        ]
action based on polilcy:[-0.03199777  0.998722  ]
action based on polilcy:[-0.40487337  1.        ]
action based on polilcy:[-0.9984341  1.       ]
action based on polilcy:[0.36207372 0.9229033 ]
action based on polilcy:[-0.99028295  1.        ]
action based on polilcy:[0.97112036 0.9397521 ]
Total T: 71289 Episode Num: 5101 Episode T: 7 Reward: -779.448996
action based on polilcy:[-0.9996009  1.       ]
action based on polilcy:[-0.76895773  1.        ]
action based on polilcy:[0.01132814 0.9234211 ]
action based on polilcy:[-0.9999545  1.       ]
action based on polilcy:[-0.7302166  1.       ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.12896366 0.93887955]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.44717917 0.96205235]
action based on polilcy:[0.40809488 1.        ]
action based on polilcy:[0.9776462 1.       ]
Total T: 71300 Episode Num: 5102 Episode T: 11 Reward: -1313.663200
action based on polilc

action based on polilcy:[-0.99964523  1.        ]
action based on polilcy:[-0.06301191  0.9330988 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.59723806 0.9571714 ]
action based on polilcy:[0.9569234 0.9208543]
action based on polilcy:[-0.9790805  1.       ]
action based on polilcy:[0.9642377 0.9336184]
action based on polilcy:[0.53724605 1.        ]
Total T: 71459 Episode Num: 5122 Episode T: 8 Reward: -810.093298
action based on polilcy:[-0.9996806  1.       ]
action based on polilcy:[-0.17205311  0.9144543 ]
action based on polilcy:[0.00519036 0.99291986]
action based on polilcy:[-0.95871073  1.        ]
action based on polilcy:[-1.  1.]
action based on polilcy:[0.6992961 0.9562254]
action based on polilcy:[0.8216027 1.       ]
action based on polilcy:[0.9999906 1.       ]
Total T: 71467 Episode Num: 5123 Episode T: 8 Reward: -844.804303
action based on polilcy:[-0.99974036  1.        ]
action based on polilcy:[0.05181778 0.95648116]
action based on polilcy:[-0.9974

action based on polilcy:[-0.99990237  1.        ]
action based on polilcy:[-0.9999925  1.       ]
action based on polilcy:[-0.0303323   0.99845165]
action based on polilcy:[-0.23363894  1.        ]
action based on polilcy:[0.5921118 1.       ]
action based on polilcy:[0.23377877 1.        ]
action based on polilcy:[0.99670285 1.        ]
Total T: 71613 Episode Num: 5142 Episode T: 7 Reward: -869.611414
action based on polilcy:[-0.9999074  1.       ]
action based on polilcy:[-0.04228801  0.9328498 ]
action based on polilcy:[-0.9930011  1.       ]
action based on polilcy:[-0.02462839  0.8640247 ]
action based on polilcy:[0.43513373 0.99999887]
action based on polilcy:[0.1188029 1.       ]
action based on polilcy:[0.9966857 1.       ]
Total T: 71620 Episode Num: 5143 Episode T: 7 Reward: -736.159371
action based on polilcy:[-0.99990433  1.        ]
action based on polilcy:[-0.67852473  1.        ]
action based on polilcy:[-0.45504016  1.        ]
action based on polilcy:[-0.6230844  1.   

action based on polilcy:[-0.9999154  1.       ]
action based on polilcy:[-0.99999744  1.        ]
action based on polilcy:[-0.93379444  1.        ]
action based on polilcy:[0.64883804 0.9383435 ]
action based on polilcy:[0.9192893 1.       ]
action based on polilcy:[0.9969825 1.       ]
Total T: 71783 Episode Num: 5162 Episode T: 6 Reward: -717.305481
action based on polilcy:[-0.99988484  1.        ]
action based on polilcy:[-0.05791591  0.914194  ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.99999976  1.        ]
action based on polilcy:[0.68233764 0.938795  ]
action based on polilcy:[-0.07863402  1.        ]
action based on polilcy:[0.5893429 1.       ]
Total T: 71790 Episode Num: 5163 Episode T: 7 Reward: -887.404996
action based on polilcy:[-0.9998832  1.       ]
action based on polilcy:[-0.28041166  0.9136088 ]
action based on polilcy:[-0.955182  1.      ]
action based on polilcy:[-0.28359023  0.8831188 ]
action based on polilcy:[-0.38360378  0.88876486]
action b

action based on polilcy:[-0.9997893  1.       ]
action based on polilcy:[-0.00694003  0.9079131 ]
action based on polilcy:[-0.99999857  1.        ]
action based on polilcy:[0.47249088 0.99997425]
action based on polilcy:[0.377959 1.      ]
action based on polilcy:[0.99993443 1.        ]
Total T: 71945 Episode Num: 5182 Episode T: 6 Reward: -675.514965
action based on polilcy:[-0.999781  1.      ]
action based on polilcy:[-0.01032986  0.9060493 ]
action based on polilcy:[-0.99999547  1.        ]
action based on polilcy:[0.62457645 0.9244979 ]
action based on polilcy:[0.8620728 1.       ]
action based on polilcy:[0.99999994 1.        ]
Total T: 71951 Episode Num: 5183 Episode T: 6 Reward: -677.528561
action based on polilcy:[-0.99979275  1.        ]
action based on polilcy:[-0.02792048  0.90763783]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.30660143  0.99999887]
action based on polilcy:[-0.13297108  1.        ]
action based on polilcy:[0.6953527  0.99250335]
action bas

action based on polilcy:[-0.99801284  0.99997616]
action based on polilcy:[-0.781638   0.5598235]
action based on polilcy:[-0.9358914   0.54788244]
action based on polilcy:[-0.78515595  0.55432504]
action based on polilcy:[-0.5631102  0.6462939]
action based on polilcy:[-0.68368983  0.46732405]
action based on polilcy:[0.5220026 0.9999999]
Total T: 72594 Episode Num: 5215 Episode T: 8 Reward: -690.115409
action based on polilcy:[-0.9999371   0.99996567]
action based on polilcy:[-0.9917357   0.99893606]
action based on polilcy:[-0.8405649  0.580796 ]
action based on polilcy:[-0.65938234  0.63797927]
action based on polilcy:[-0.30252844  0.7217978 ]
action based on polilcy:[-0.5882687  0.5028808]
action based on polilcy:[0.83049726 0.9291545 ]
Total T: 72601 Episode Num: 5216 Episode T: 7 Reward: -597.064174
action based on polilcy:[-0.9999362   0.99993736]
action based on polilcy:[-0.99999994  1.        ]
action based on polilcy:[-0.86953056  0.5879345 ]
action based on polilcy:[-0.6509

action based on polilcy:[-0.99996185  0.99978346]
action based on polilcy:[-0.81803215  0.6092796 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.6946258   0.53349847]
action based on polilcy:[-0.5527568   0.50372994]
action based on polilcy:[-0.5004092   0.45665509]
action based on polilcy:[-0.27684224  0.42272717]
action based on polilcy:[-0.08593801  0.43296596]
action based on polilcy:[-0.0700266   0.37071916]
action based on polilcy:[0.91946673 0.65329623]
Total T: 72756 Episode Num: 5231 Episode T: 10 Reward: -573.669273
action based on polilcy:[-0.9999542  0.999769 ]
action based on polilcy:[-0.8211409  0.8036242]
action based on polilcy:[-0.8936976   0.71853787]
action based on polilcy:[-1.         0.7716346]
action based on polilcy:[-1.          0.72371274]
action based on polilcy:[-1.         0.7345511]
action based on polilcy:[-1.         0.4113329]
action based on polilcy:[-1.          0.40377954]
action based on polilcy:[-1.         0.7247188]
action based 

action based on polilcy:[-0.9998303   0.99829334]
action based on polilcy:[-0.9999305   0.99981165]
action based on polilcy:[-0.7505293  0.5686273]
action based on polilcy:[-0.72332865  0.52356577]
action based on polilcy:[-0.9996996   0.99998575]
action based on polilcy:[-0.6424788   0.52891815]
action based on polilcy:[-0.42549208  0.6104336 ]
action based on polilcy:[0.1395711 0.6560192]
action based on polilcy:[-0.09890537  0.38112563]
action based on polilcy:[0.36834744 0.36045778]
Total T: 72913 Episode Num: 5245 Episode T: 10 Reward: -681.987819
action based on polilcy:[-0.9998445   0.99853885]
action based on polilcy:[-0.7502616   0.59385645]
action based on polilcy:[-1.         0.9098452]
action based on polilcy:[-0.9999986   0.99971116]
action based on polilcy:[-0.8066871  0.5304366]
action based on polilcy:[-0.6707473  0.5235155]
action based on polilcy:[-0.7961285   0.60861856]
action based on polilcy:[-0.27709252  0.8858106 ]
action based on polilcy:[-0.30262676  0.9541284

action based on polilcy:[-0.99984515  0.9994223 ]
action based on polilcy:[-0.99926126  0.7981814 ]
action based on polilcy:[-0.85433286  0.56930476]
action based on polilcy:[-1.          0.99881554]
action based on polilcy:[-0.9819035  0.6157343]
action based on polilcy:[-0.9680899   0.77141976]
action based on polilcy:[-1.          0.99783057]
action based on polilcy:[-0.9986144   0.91989934]
action based on polilcy:[-0.840713    0.51457703]
action based on polilcy:[-0.98760796  0.6466819 ]
action based on polilcy:[-0.9074298  0.8375049]
action based on polilcy:[-0.82550806  0.7502922 ]
action based on polilcy:[-0.6721773  0.9411146]
action based on polilcy:[-0.47458068  0.7911818 ]
action based on polilcy:[-0.3699349  0.7744664]
action based on polilcy:[0.2674472  0.95060086]
action based on polilcy:[0.9137473  0.99012107]
Total T: 73082 Episode Num: 5260 Episode T: 17 Reward: -1422.276654
action based on polilcy:[-0.9999043   0.99956644]
action based on polilcy:[-0.9344171  0.79016

action based on polilcy:[-0.99989504  0.9996142 ]
action based on polilcy:[-0.83712125  0.65451   ]
action based on polilcy:[-0.5959612   0.53106034]
action based on polilcy:[-0.43860754  0.48582757]
action based on polilcy:[0.41476575 0.9045043 ]
action based on polilcy:[0.57691216 0.9537517 ]
action based on polilcy:[-0.36007565  0.54672587]
action based on polilcy:[-0.41571832  0.5061158 ]
action based on polilcy:[-0.39826715  0.46840158]
Total T: 73231 Episode Num: 5276 Episode T: 9 Reward: -749.253017
action based on polilcy:[-0.9999322  0.9997669]
action based on polilcy:[-0.99962175  0.99672765]
action based on polilcy:[-0.6963159  0.5409883]
action based on polilcy:[-0.60101205  0.5341028 ]
action based on polilcy:[-0.60812104  0.6075928 ]
action based on polilcy:[-0.5828531  0.589272 ]
action based on polilcy:[-0.09377966  0.70345384]
action based on polilcy:[-0.22456053  0.40787807]
action based on polilcy:[0.04782741 0.4061161 ]
Total T: 73240 Episode Num: 5277 Episode T: 9 

action based on polilcy:[-0.99987674  0.99966   ]
action based on polilcy:[-0.6741751   0.61647546]
action based on polilcy:[-1.         0.9400386]
action based on polilcy:[-0.7960807   0.58988816]
action based on polilcy:[-0.840803   0.6527904]
action based on polilcy:[-0.99394965  0.9240559 ]
action based on polilcy:[-0.67674536  0.5141599 ]
action based on polilcy:[-0.6098635  0.5189663]
action based on polilcy:[-0.50256115  0.47226477]
action based on polilcy:[-0.47083923  0.4791682 ]
action based on polilcy:[-0.42475215  0.4325541 ]
action based on polilcy:[-0.37306336  0.40388694]
action based on polilcy:[0.37547183 0.99957764]
Total T: 73381 Episode Num: 5293 Episode T: 13 Reward: -951.920661
action based on polilcy:[-0.9998515  0.9996472]
action based on polilcy:[-0.6624437  0.5484251]
action based on polilcy:[-0.9709653   0.88086617]
action based on polilcy:[-0.5855274  0.5299097]
action based on polilcy:[-0.6504086   0.56568813]
action based on polilcy:[-0.64583695  0.6090373

action based on polilcy:[-0.9998387   0.99967176]
action based on polilcy:[-0.99999994  0.9999996 ]
action based on polilcy:[-0.66166544  0.60827315]
action based on polilcy:[-0.959175   0.7845605]
action based on polilcy:[-0.5265594   0.60027885]
action based on polilcy:[-0.47155502  0.5537443 ]
action based on polilcy:[-0.09493815  0.7724956 ]
action based on polilcy:[0.35054848 0.70666397]
action based on polilcy:[0.52909124 0.9910975 ]
Total T: 73553 Episode Num: 5310 Episode T: 9 Reward: -1066.283455
action based on polilcy:[-0.99987125  0.99975896]
action based on polilcy:[-0.63250375  0.6051327 ]
action based on polilcy:[-0.6148374   0.53147376]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.67828923  0.57431567]
action based on polilcy:[-0.90805864  0.7140497 ]
action based on polilcy:[-0.99993926  0.9968183 ]
action based on polilcy:[-0.54183245  0.48032367]
action based on polilcy:[-0.5377252  0.5241992]
action based on polilcy:[-0.635425   0.5425726]
action ba

action based on polilcy:[-0.9999582  0.9998216]
action based on polilcy:[-0.60265505  0.6014333 ]
action based on polilcy:[-0.62121046  0.54408646]
action based on polilcy:[-0.51221824  0.6320894 ]
action based on polilcy:[-0.44574684  0.5015079 ]
action based on polilcy:[-0.6872726  0.9997394]
action based on polilcy:[-0.49147764  0.51713836]
action based on polilcy:[-0.39738464  0.6299902 ]
action based on polilcy:[0.66448843 0.92519337]
Total T: 73734 Episode Num: 5326 Episode T: 9 Reward: -794.373973
action based on polilcy:[-0.99995404  0.9998207 ]
action based on polilcy:[-0.87660956  0.69801456]
action based on polilcy:[-0.999992    0.99999994]
action based on polilcy:[-0.4821496  0.6553023]
action based on polilcy:[-0.3675911  0.6112588]
action based on polilcy:[-0.32117307  0.5474889 ]
action based on polilcy:[0.5149549  0.80572265]
Total T: 73741 Episode Num: 5327 Episode T: 7 Reward: -699.095395
action based on polilcy:[-0.9999518  0.9998258]
action based on polilcy:[-0.9988

action based on polilcy:[-0.60469484  0.5984566 ]
action based on polilcy:[-0.99995625  0.9999376 ]
action based on polilcy:[-0.572647   0.5371903]
action based on polilcy:[-0.95389545  0.7133641 ]
action based on polilcy:[-0.65833056  0.5786562 ]
action based on polilcy:[-0.5704477   0.56508243]
action based on polilcy:[-0.5154048  0.6056632]
action based on polilcy:[0.64449626 0.97087455]
Total T: 73884 Episode Num: 5342 Episode T: 9 Reward: -918.896812
action based on polilcy:[-0.9999776  0.9998017]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.6443479   0.51602226]
action based on polilcy:[-0.57345784  0.51610136]
action based on polilcy:[-0.2915628  0.6831088]
action based on polilcy:[-0.44865912  0.6104497 ]
action based on polilcy:[0.03716739 0.71326303]
action based on polilcy:[0.9386998 0.8829763]
Total T: 73892 Episode Num: 5343 Episode T: 8 Reward: -623.637812
action based on polilcy:[-0.9999729   0.99978983]
action based on polilcy:[-1.          0.99999934]


action based on polilcy:[-0.4873185   0.47290114]
action based on polilcy:[-0.37430903  0.5684532 ]
action based on polilcy:[0.65965647 0.8580215 ]
Total T: 74544 Episode Num: 5369 Episode T: 15 Reward: -1207.877451
action based on polilcy:[-0.99998933  0.99970406]
action based on polilcy:[-0.7540917   0.53049797]
action based on polilcy:[-0.8079191  0.5211017]
action based on polilcy:[-0.5166625   0.46328014]
action based on polilcy:[-0.03047826  0.6598565 ]
action based on polilcy:[0.92149746 0.99846655]
Total T: 74550 Episode Num: 5370 Episode T: 6 Reward: -526.869236
action based on polilcy:[-0.9999892   0.99974334]
action based on polilcy:[-0.73254025  0.5419611 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9421007  0.6484451]
action based on polilcy:[-0.62242603  0.47220242]
action based on polilcy:[-0.55829406  0.4835749 ]
action based on polilcy:[-0.9896309  0.761201 ]
action based on polilcy:[-0.9318988   0.81059736]
action based on polilcy:[-0.414133   0.503

action based on polilcy:[-0.99999297  0.99965644]
action based on polilcy:[-0.96012163  0.72029936]
action based on polilcy:[-0.9927238  0.7530564]
action based on polilcy:[-0.53291595  0.4311244 ]
action based on polilcy:[-0.39847884  0.44721842]
action based on polilcy:[-0.26983708  0.52258885]
action based on polilcy:[-0.24809554  0.7909566 ]
action based on polilcy:[-0.37502182  0.44679487]
action based on polilcy:[-0.19353509  0.5262038 ]
Total T: 74704 Episode Num: 5384 Episode T: 9 Reward: -605.577369
action based on polilcy:[-0.9999937  0.9996894]
action based on polilcy:[-0.8504813   0.57795835]
action based on polilcy:[-0.6929866   0.55185866]
action based on polilcy:[-0.990571   0.7770809]
action based on polilcy:[-0.9017319   0.47105595]
action based on polilcy:[-0.45352736  0.42035687]
action based on polilcy:[-0.65491796  0.7415776 ]
action based on polilcy:[-0.42611384  0.42198062]
action based on polilcy:[0.29152483 0.72069633]
Total T: 74713 Episode Num: 5385 Episode T

action based on polilcy:[-0.9999976  0.9998944]
action based on polilcy:[-0.7976372   0.53530097]
action based on polilcy:[-1.          0.99998605]
action based on polilcy:[-0.5220943  0.4286706]
action based on polilcy:[0.15866236 0.7763207 ]
action based on polilcy:[0.92464876 0.983682  ]
Total T: 74861 Episode Num: 5401 Episode T: 6 Reward: -662.418001
action based on polilcy:[-0.9999978   0.99989104]
action based on polilcy:[-0.99926156  0.9262395 ]
action based on polilcy:[-0.7526731   0.50796866]
action based on polilcy:[-0.954007    0.62813485]
action based on polilcy:[-0.66662234  0.4446493 ]
action based on polilcy:[-0.9568805   0.79809976]
action based on polilcy:[-0.5054225   0.43378785]
action based on polilcy:[-0.37675837  0.4710719 ]
action based on polilcy:[-0.6057062   0.93604195]
action based on polilcy:[-0.39748508  0.43383902]
action based on polilcy:[-0.4054224  0.4488164]
Total T: 74872 Episode Num: 5402 Episode T: 11 Reward: -879.886226
action based on polilcy:[-0

action based on polilcy:[-0.99999845  0.9999588 ]
action based on polilcy:[-0.7947856  0.5402798]
action based on polilcy:[-1.         0.9999279]
action based on polilcy:[-0.5209677   0.44444543]
action based on polilcy:[-0.85261047  0.5211128 ]
action based on polilcy:[-0.47455496  0.44512382]
action based on polilcy:[-0.41119227  0.4415255 ]
action based on polilcy:[-0.3469253   0.46624714]
action based on polilcy:[0.38233662 0.8327906 ]
Total T: 75026 Episode Num: 5416 Episode T: 9 Reward: -598.386615
action based on polilcy:[-0.9999981  0.9999555]
action based on polilcy:[-0.9111273   0.65910065]
action based on polilcy:[-0.6944266  0.4725355]
action based on polilcy:[-0.43480498  0.88575846]
action based on polilcy:[-0.32657775  0.46131447]
action based on polilcy:[-0.21460448  0.5110401 ]
action based on polilcy:[0.32102028 0.68177855]
Total T: 75033 Episode Num: 5417 Episode T: 7 Reward: -521.462353
action based on polilcy:[-0.9999983  0.9999642]
action based on polilcy:[-0.8033

action based on polilcy:[-0.9999991  0.9999877]
action based on polilcy:[-0.99988794  0.99634916]
action based on polilcy:[-0.9397912  0.7098303]
action based on polilcy:[-0.99720216  0.89468604]
action based on polilcy:[-0.7386755  0.5600544]
action based on polilcy:[-0.78262174  0.5304767 ]
action based on polilcy:[-0.98832613  0.7898242 ]
action based on polilcy:[-0.76268005  0.8607466 ]
action based on polilcy:[-0.3708392   0.45400882]
action based on polilcy:[-0.11604191  0.981256  ]
Total T: 75204 Episode Num: 5432 Episode T: 10 Reward: -1095.774817
action based on polilcy:[-0.9999989  0.9999871]
action based on polilcy:[-0.9981622  0.9027229]
action based on polilcy:[-0.9894866  0.7608661]
action based on polilcy:[-0.6543304  0.5053069]
action based on polilcy:[-0.6910287   0.52466035]
action based on polilcy:[-0.927924    0.63078016]
action based on polilcy:[-0.80649245  0.5081277 ]
action based on polilcy:[0.01414664 0.87489766]
action based on polilcy:[0.18638134 0.6123202 ]


action based on polilcy:[-0.99999845  0.9999798 ]
action based on polilcy:[-0.77305174  0.59235656]
action based on polilcy:[-0.92707   0.677228]
action based on polilcy:[-0.60672677  0.4923595 ]
action based on polilcy:[-0.3697234   0.54533863]
action based on polilcy:[0.16314588 0.63165486]
action based on polilcy:[0.930592  0.8295903]
Total T: 75358 Episode Num: 5448 Episode T: 7 Reward: -478.952592
action based on polilcy:[-0.9999987  0.9999821]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.694632  0.544292]
action based on polilcy:[-0.9637893   0.79224527]
action based on polilcy:[-0.73899984  0.5600296 ]
action based on polilcy:[-0.72460055  0.5511522 ]
action based on polilcy:[-0.94861215  0.7055049 ]
action based on polilcy:[-0.59893095  0.5063094 ]
action based on polilcy:[-0.03268527  0.9989333 ]
action based on polilcy:[0.13374938 0.87393427]
Total T: 75368 Episode Num: 5449 Episode T: 10 Reward: -1044.082580
action based on polilcy:[-0.9999991   0.99999017]


action based on polilcy:[-0.97869194  0.83175516]
action based on polilcy:[-0.8888655  0.602016 ]
action based on polilcy:[-0.49421957  0.49784338]
action based on polilcy:[-0.98351187  0.81490946]
action based on polilcy:[-0.43660548  0.9103342 ]
action based on polilcy:[0.27772263 0.99889493]
Total T: 75526 Episode Num: 5463 Episode T: 7 Reward: -804.624532
action based on polilcy:[-0.99999857  0.99998736]
action based on polilcy:[-1.        0.999999]
action based on polilcy:[-0.63611436  0.5217101 ]
action based on polilcy:[-0.67567015  0.821031  ]
action based on polilcy:[-0.17610963  0.6299056 ]
action based on polilcy:[0.12844442 0.7557869 ]
action based on polilcy:[0.48033115 0.9453078 ]
Total T: 75533 Episode Num: 5464 Episode T: 7 Reward: -827.108561
action based on polilcy:[-0.9999986   0.99998724]
action based on polilcy:[-0.7592552  0.5733286]
action based on polilcy:[-0.98618054  0.83590585]
action based on polilcy:[-0.44934294  0.6858121 ]
action based on polilcy:[-0.9963

action based on polilcy:[-0.9999987  0.9999881]
action based on polilcy:[-0.8298836   0.61042285]
action based on polilcy:[-0.81788033  0.658528  ]
action based on polilcy:[-0.70698667  0.60061777]
action based on polilcy:[-0.7660302   0.59973687]
action based on polilcy:[-0.7662145  0.6324769]
action based on polilcy:[-1.         0.9999988]
action based on polilcy:[-0.8028081   0.64741826]
action based on polilcy:[-0.7708856   0.54996186]
action based on polilcy:[-0.6119026  0.5141877]
action based on polilcy:[-1.         0.9999895]
action based on polilcy:[-0.62038726  0.5116869 ]
action based on polilcy:[-0.45072958  0.47572216]
action based on polilcy:[-0.29772833  0.999974  ]
action based on polilcy:[-0.19745907  0.80560136]
action based on polilcy:[-0.39025778  0.49865073]
action based on polilcy:[-0.5780653  0.6680076]
action based on polilcy:[-0.28468156  0.51037717]
Total T: 75715 Episode Num: 5483 Episode T: 18 Reward: -1344.140155
action based on polilcy:[-0.99999845  0.9999

action based on polilcy:[-0.9999987   0.99998343]
action based on polilcy:[-0.6785261   0.60164094]
action based on polilcy:[-0.866173   0.6382693]
action based on polilcy:[-0.44046998  0.49464375]
action based on polilcy:[-0.14300832  0.580842  ]
action based on polilcy:[-0.30583802  0.5026403 ]
action based on polilcy:[-0.9945256  0.9120563]
action based on polilcy:[-0.4628836  0.5080194]
action based on polilcy:[0.19065627 0.8453127 ]
action based on polilcy:[-0.19913025  0.51131356]
Total T: 75857 Episode Num: 5497 Episode T: 10 Reward: -682.402038
action based on polilcy:[-0.99999917  0.999984  ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.7645713   0.60696656]
action based on polilcy:[-0.66657984  0.5765367 ]
action based on polilcy:[-0.57359546  0.5127287 ]
action based on polilcy:[-0.12045567  0.8860505 ]
action based on polilcy:[-0.14520167  0.5033223 ]
action based on polilcy:[0.30681556 0.65539885]
action based on polilcy:[0.8426217  0.87247396]
Total T: 75

Total T: 76048 Episode Num: 5513 Episode T: 47 Reward: -7452.557954
Total T: 76093 Episode Num: 5514 Episode T: 45 Reward: -7177.048577
Total T: 76122 Episode Num: 5515 Episode T: 29 Reward: -3097.896202
Total T: 76131 Episode Num: 5516 Episode T: 9 Reward: -1237.510308
Total T: 76165 Episode Num: 5517 Episode T: 34 Reward: -4645.317763
Total T: 76218 Episode Num: 5518 Episode T: 53 Reward: -7492.269703
Total T: 76231 Episode Num: 5519 Episode T: 13 Reward: -1517.329778
Total T: 76261 Episode Num: 5520 Episode T: 30 Reward: -3134.463854
Total T: 76300 Episode Num: 5521 Episode T: 39 Reward: -5807.579661
Total T: 76317 Episode Num: 5522 Episode T: 17 Reward: -2853.703659
Total T: 76365 Episode Num: 5523 Episode T: 48 Reward: -7203.532978
Total T: 76416 Episode Num: 5524 Episode T: 51 Reward: -7126.817758
Total T: 76461 Episode Num: 5525 Episode T: 45 Reward: -7031.851738
Total T: 76481 Episode Num: 5526 Episode T: 20 Reward: -2338.990255
action based on polilcy:[-0.99999964  1.        ]

action based on polilcy:[-0.999998   0.9999647]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.5841255  0.5326812]
action based on polilcy:[-0.9158332  0.692768 ]
action based on polilcy:[-0.45855108  0.96057814]
action based on polilcy:[0.09068928 0.77231205]
action based on polilcy:[0.46982893 0.8382448 ]
Total T: 76650 Episode Num: 5542 Episode T: 7 Reward: -835.272858
action based on polilcy:[-0.9999983  0.9999621]
action based on polilcy:[-0.9999787  0.9941088]
action based on polilcy:[-0.56839824  0.5411701 ]
action based on polilcy:[-0.435328   0.5204532]
action based on polilcy:[0.29824057 0.9986687 ]
action based on polilcy:[0.3353161 0.6034192]
action based on polilcy:[0.32955024 0.5605054 ]
Total T: 76657 Episode Num: 5543 Episode T: 7 Reward: -582.741267
action based on polilcy:[-0.99999815  0.9999595 ]
action based on polilcy:[-0.6645072  0.5975727]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.44373566  0.48691887]
action based on polilcy:[-

action based on polilcy:[-0.9999991  0.9999794]
action based on polilcy:[-0.8812078  0.73626  ]
action based on polilcy:[-0.58770514  0.53165686]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.5891919  0.5430788]
action based on polilcy:[-0.47884777  0.54672325]
action based on polilcy:[-0.5530159  0.6651912]
action based on polilcy:[0.14459288 0.8936968 ]
Total T: 76818 Episode Num: 5556 Episode T: 8 Reward: -870.051901
action based on polilcy:[-0.9999991  0.9999781]
action based on polilcy:[-0.83462924  0.7092844 ]
action based on polilcy:[-0.6933752  0.611764 ]
action based on polilcy:[-0.62024486  0.6042298 ]
action based on polilcy:[-0.9363966   0.99526507]
action based on polilcy:[-0.9535748  0.7194796]
action based on polilcy:[-0.50517356  0.55362165]
action based on polilcy:[-0.4211045   0.48996493]
action based on polilcy:[-0.99913615  0.9999605 ]
action based on polilcy:[-0.52727807  0.5556465 ]
action based on polilcy:[-0.54351467  0.6858959 ]
Total T: 76829 E

action based on polilcy:[-0.9999991  0.9999859]
action based on polilcy:[-0.65924287  0.61011124]
action based on polilcy:[-0.9999955  0.9997674]
action based on polilcy:[-0.685235   0.5984471]
action based on polilcy:[-0.5898583  0.6018676]
action based on polilcy:[-0.579036   0.5643802]
action based on polilcy:[-0.9999986   0.99956214]
action based on polilcy:[-0.47650522  0.50868964]
action based on polilcy:[-0.41300863  0.9999813 ]
action based on polilcy:[-0.2844509   0.50502086]
action based on polilcy:[0.4022228 0.831767 ]
Total T: 76985 Episode Num: 5572 Episode T: 11 Reward: -929.739408
action based on polilcy:[-0.9999988   0.99997866]
action based on polilcy:[-0.8195909  0.6435099]
action based on polilcy:[-0.61667347  0.58441806]
action based on polilcy:[-0.99588853  0.87390256]
action based on polilcy:[-0.5589302  0.5687455]
action based on polilcy:[-0.57630515  0.58083665]
action based on polilcy:[-0.8997922   0.58259356]
action based on polilcy:[-0.90137136  0.5849289 ]
a

action based on polilcy:[-0.9700644  0.7988461]
action based on polilcy:[-0.8028742   0.64364207]
action based on polilcy:[-0.7190369   0.59071887]
action based on polilcy:[-0.99249125  0.8758813 ]
action based on polilcy:[-1.          0.99999505]
action based on polilcy:[-0.54773957  0.52411574]
action based on polilcy:[-0.3400124  0.7794685]
action based on polilcy:[-0.7428415   0.99997747]
action based on polilcy:[-0.2845904   0.54105663]
Total T: 77133 Episode Num: 5586 Episode T: 16 Reward: -1452.533640
action based on polilcy:[-0.9999997  0.9999886]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9431425   0.61931586]
action based on polilcy:[-1.         0.9999956]
action based on polilcy:[-0.84559     0.66087127]
action based on polilcy:[-0.8018993  0.6275045]
action based on polilcy:[-0.7060094   0.59450203]
action based on polilcy:[-0.59323     0.55621827]
action based on polilcy:[-0.55346084  0.5388596 ]
action based on polilcy:[-0.9891349   0.85343784]
action b

action based on polilcy:[-0.9999997   0.99999434]
action based on polilcy:[-0.99400777  0.89081   ]
action based on polilcy:[-0.69971806  0.6226557 ]
action based on polilcy:[-0.6338134  0.5967071]
action based on polilcy:[-0.64730775  0.5716181 ]
action based on polilcy:[-0.13705292  0.969789  ]
action based on polilcy:[0.3439832  0.82329214]
Total T: 77294 Episode Num: 5600 Episode T: 7 Reward: -637.054511
action based on polilcy:[-0.9999999  0.9999966]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.60419774  0.57668686]
action based on polilcy:[-0.5095216   0.54757553]
action based on polilcy:[-0.08301723  0.70934343]
action based on polilcy:[-0.20484294  0.68470997]
action based on polilcy:[0.22807539 0.9573595 ]
Total T: 77301 Episode Num: 5601 Episode T: 7 Reward: -766.527641
action based on polilcy:[-0.99999994  0.9999971 ]
action based on polilcy:[-0.93057644  0.7938404 ]
action based on polilcy:[-1.         0.9999999]
action based on polilcy:[-0.591953   0.62585

action based on polilcy:[-0.99999917  0.99995995]
action based on polilcy:[-0.9999975  0.9999025]
action based on polilcy:[-0.60580194  0.6035387 ]
action based on polilcy:[-0.90309    0.6249361]
action based on polilcy:[-0.40881824  0.5807729 ]
action based on polilcy:[-0.4515918   0.56684506]
action based on polilcy:[-0.5290198   0.58728564]
action based on polilcy:[-0.98688984  0.9195476 ]
action based on polilcy:[-0.4548539  0.5136233]
action based on polilcy:[-0.28195682  0.96288764]
Total T: 77451 Episode Num: 5615 Episode T: 10 Reward: -887.227375
action based on polilcy:[-0.9999993  0.9999564]
action based on polilcy:[-0.8501839  0.5833478]
action based on polilcy:[-0.74189216  0.59730023]
action based on polilcy:[-0.6786253  0.600965 ]
action based on polilcy:[-0.66515726  0.6008363 ]
action based on polilcy:[-0.93997824  0.71467555]
action based on polilcy:[-0.9865394  0.7930769]
action based on polilcy:[-0.5476781  0.6621407]
action based on polilcy:[-0.12576757  0.5479684 ]

action based on polilcy:[-0.9999987  0.9999286]
action based on polilcy:[-0.9826936  0.8571192]
action based on polilcy:[-0.9999996  0.9999076]
action based on polilcy:[-0.566713   0.5450391]
action based on polilcy:[-0.83375865  0.6193825 ]
action based on polilcy:[-0.55572045  0.5298669 ]
action based on polilcy:[-0.44330135  0.50719523]
action based on polilcy:[-0.20620677  0.9745977 ]
action based on polilcy:[-0.40432093  0.53811836]
action based on polilcy:[-0.62840104  0.64685917]
action based on polilcy:[-0.6964723  0.6525964]
action based on polilcy:[-0.7332232  0.6012281]
Total T: 77614 Episode Num: 5631 Episode T: 12 Reward: -969.290615
action based on polilcy:[-0.9999989   0.99989784]
action based on polilcy:[-0.7203087  0.5987085]
action based on polilcy:[-0.60375583  0.59799314]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.58190143  0.5187451 ]
action based on polilcy:[-0.99993885  0.95658225]
action based on polilcy:[-0.58143616  0.5631338 ]
action based 

action based on polilcy:[-0.999997    0.99983865]
action based on polilcy:[-0.79493153  0.7542421 ]
action based on polilcy:[-0.80890894  0.63122505]
action based on polilcy:[-0.40115112  0.5039811 ]
action based on polilcy:[0.14751683 0.9134286 ]
action based on polilcy:[0.24464391 0.9127731 ]
Total T: 77777 Episode Num: 5647 Episode T: 6 Reward: -569.810232
action based on polilcy:[-0.99999774  0.9998879 ]
action based on polilcy:[-0.73868185  0.7261726 ]
action based on polilcy:[-0.57504296  0.56814265]
action based on polilcy:[-0.68256474  0.6575217 ]
action based on polilcy:[-0.9355903  0.6904367]
action based on polilcy:[-0.36792657  0.99998003]
action based on polilcy:[0.2746411  0.98291385]
Total T: 77784 Episode Num: 5648 Episode T: 7 Reward: -789.818358
action based on polilcy:[-0.9999979  0.9999058]
action based on polilcy:[-0.59001803  0.6169492 ]
action based on polilcy:[-0.9999896  0.9867777]
action based on polilcy:[-1.          0.99999946]
action based on polilcy:[-0.99

action based on polilcy:[-0.99999714  0.99986404]
action based on polilcy:[-0.638947   0.6222614]
action based on polilcy:[-0.5550036  0.5910296]
action based on polilcy:[-0.8281459   0.73695326]
action based on polilcy:[-0.21576467  0.5626496 ]
action based on polilcy:[-0.20537122  0.74976206]
action based on polilcy:[0.11081196 0.68464947]
action based on polilcy:[0.22671711 0.60867095]
Total T: 77949 Episode Num: 5664 Episode T: 8 Reward: -509.680731
action based on polilcy:[-0.99999714  0.99988365]
action based on polilcy:[-0.77830356  0.5767238 ]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.55029726  0.60631585]
action based on polilcy:[-0.5724338   0.52860224]
action based on polilcy:[-0.829466    0.61097765]
action based on polilcy:[-0.4395567   0.49716347]
action based on polilcy:[-0.44164407  0.5040127 ]
action based on polilcy:[-0.237412   0.5696411]
action based on polilcy:[0.00467814 0.6117554 ]
action based on polilcy:[0.2580397 0.6081815]
action based on 

action based on polilcy:[-0.99999577  0.99965346]
action based on polilcy:[-0.9700115  0.5253594]
action based on polilcy:[-1.          0.99973774]
action based on polilcy:[-0.8403442  0.7060577]
action based on polilcy:[-0.584167   0.6105555]
action based on polilcy:[-0.7283485  0.7113075]
action based on polilcy:[-0.5953814   0.50884044]
action based on polilcy:[-0.46388707  0.48585907]
action based on polilcy:[-0.45208535  0.60724545]
action based on polilcy:[-0.16163959  0.545249  ]
action based on polilcy:[0.16309915 0.96523464]
action based on polilcy:[0.57480884 0.85089844]
Total T: 78582 Episode Num: 5695 Episode T: 12 Reward: -880.818533
action based on polilcy:[-0.99999636  0.9997931 ]
action based on polilcy:[-0.5768907   0.61169803]
action based on polilcy:[-0.5885038   0.50487125]
action based on polilcy:[-0.55902565  0.47620142]
action based on polilcy:[-0.5498825  0.4811722]
action based on polilcy:[-0.1997443  0.9577536]
action based on polilcy:[0.32064137 0.8695087 ]
T

action based on polilcy:[-0.99999774  0.9998619 ]
action based on polilcy:[-0.9306644  0.5515275]
action based on polilcy:[-0.69581795  0.6047777 ]
action based on polilcy:[-0.64073557  0.599772  ]
action based on polilcy:[-0.6800177   0.69467735]
action based on polilcy:[-0.6043175   0.80354625]
action based on polilcy:[-0.0741405   0.62240386]
action based on polilcy:[-0.10909784  0.999927  ]
action based on polilcy:[-0.40160608  0.88314855]
action based on polilcy:[-0.85623443  0.7762395 ]
Total T: 78742 Episode Num: 5710 Episode T: 10 Reward: -1057.115791
action based on polilcy:[-0.99999785  0.9998462 ]
action based on polilcy:[-0.64800936  0.6730888 ]
action based on polilcy:[-0.670751   0.5966512]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.81989014  0.6209726 ]
action based on polilcy:[-0.61480206  0.94975716]
action based on polilcy:[-0.13811773  0.5898853 ]
action based on polilcy:[-0.0272495  0.7628757]
action based on polilcy:[0.14398186 0.7760266 ]
Total 

action based on polilcy:[-0.99999744  0.99989104]
action based on polilcy:[-0.999999    0.99997807]
action based on polilcy:[-0.628232    0.61283827]
action based on polilcy:[-0.6096952   0.52693987]
action based on polilcy:[-0.37489763  0.9409756 ]
action based on polilcy:[-0.43273956  0.6003159 ]
action based on polilcy:[0.07040377 0.6558887 ]
action based on polilcy:[0.4878367 0.9302511]
Total T: 78894 Episode Num: 5726 Episode T: 8 Reward: -759.509871
action based on polilcy:[-0.9999963  0.9997741]
action based on polilcy:[-0.8343717   0.83628327]
action based on polilcy:[-0.9159299  0.8343116]
action based on polilcy:[-0.6034558  0.5507113]
action based on polilcy:[-0.24267294  0.5995964 ]
action based on polilcy:[0.15671006 0.59708154]
action based on polilcy:[0.05313416 0.7668607 ]
action based on polilcy:[0.47890508 0.80267274]
action based on polilcy:[0.47637162 0.6207458 ]
Total T: 78903 Episode Num: 5727 Episode T: 9 Reward: -627.996270
action based on polilcy:[-0.9999975  0

action based on polilcy:[-0.99999774  0.9998085 ]
action based on polilcy:[-0.59855044  0.62333494]
action based on polilcy:[-0.99985784  0.9742489 ]
action based on polilcy:[-0.5435611   0.48121583]
action based on polilcy:[-0.03520458  0.6868542 ]
action based on polilcy:[0.49725816 0.85393006]
Total T: 79059 Episode Num: 5743 Episode T: 6 Reward: -634.353538
action based on polilcy:[-0.99999803  0.9998335 ]
action based on polilcy:[-0.5872942  0.6097906]
action based on polilcy:[-0.5930486  0.5848165]
action based on polilcy:[-0.5971139   0.50707674]
action based on polilcy:[-0.59631044  0.8053688 ]
action based on polilcy:[0.0861841 0.7964958]
action based on polilcy:[0.45880345 0.7506787 ]
action based on polilcy:[0.7801335  0.66412055]
Total T: 79067 Episode Num: 5744 Episode T: 8 Reward: -535.890868
action based on polilcy:[-0.9999977  0.999813 ]
action based on polilcy:[-0.5811089  0.6189378]
action based on polilcy:[-0.74026096  0.59024453]
action based on polilcy:[-1.        

action based on polilcy:[-0.9999973   0.99965304]
action based on polilcy:[-0.9714201  0.9136064]
action based on polilcy:[-0.83410275  0.78028166]
action based on polilcy:[-0.55724716  0.47989276]
action based on polilcy:[0.04244998 0.84510255]
action based on polilcy:[0.32913637 0.6769949 ]
action based on polilcy:[0.52846694 0.839992  ]
Total T: 79223 Episode Num: 5760 Episode T: 7 Reward: -715.628605
action based on polilcy:[-0.9999976  0.9996828]
action based on polilcy:[-0.6747972   0.60628754]
action based on polilcy:[-0.99999976  0.9999117 ]
action based on polilcy:[-0.54678833  0.4784323 ]
action based on polilcy:[-0.5073664   0.47936934]
action based on polilcy:[-0.94913495  0.78241825]
action based on polilcy:[-0.30944455  0.60641825]
action based on polilcy:[-0.28377113  0.9188854 ]
action based on polilcy:[-0.18254595  0.8227778 ]
Total T: 79232 Episode Num: 5761 Episode T: 9 Reward: -1042.012195
action based on polilcy:[-0.99999803  0.99967307]
action based on polilcy:[-0

action based on polilcy:[-0.99999714  0.9982645 ]
action based on polilcy:[-0.8957381   0.57104266]
action based on polilcy:[-0.6890093   0.60176283]
action based on polilcy:[-1.  1.]
action based on polilcy:[-0.9528116  0.7195946]
action based on polilcy:[-0.8009666   0.63370895]
action based on polilcy:[-1.         0.9999813]
action based on polilcy:[-0.9979155  0.589432 ]
action based on polilcy:[-1.          0.85863656]
action based on polilcy:[-0.99999666  0.8971516 ]
action based on polilcy:[-0.9997056  0.6760168]
action based on polilcy:[-0.99606633  0.71430063]
action based on polilcy:[-0.9421487  0.6451732]
action based on polilcy:[-0.9982341  0.9997648]
action based on polilcy:[-0.91091937  0.62629753]
action based on polilcy:[-0.85240805  0.6141021 ]
action based on polilcy:[-0.74537975  0.64737344]
action based on polilcy:[-0.5149776   0.69853306]
action based on polilcy:[-0.28043598  0.743603  ]
action based on polilcy:[-0.01204543  0.88033164]
action based on polilcy:[0.0

action based on polilcy:[-0.9999993  0.9994744]
action based on polilcy:[-0.6349493   0.59846306]
action based on polilcy:[-0.64199954  0.5298579 ]
action based on polilcy:[-0.66172314  0.6250385 ]
action based on polilcy:[-0.06006554  0.62996244]
action based on polilcy:[0.27612257 0.6418476 ]
action based on polilcy:[0.3962076 0.9980198]
Total T: 79548 Episode Num: 5795 Episode T: 7 Reward: -512.896284
action based on polilcy:[-0.99999887  0.99937266]
action based on polilcy:[-0.99998194  0.98815894]
action based on polilcy:[-1.         0.9999982]
action based on polilcy:[-0.7418282   0.74747694]
action based on polilcy:[-0.9997655   0.93055886]
action based on polilcy:[-0.45842412  0.5554242 ]
action based on polilcy:[0.23066339 0.82663906]
action based on polilcy:[0.3360617 0.9046843]
Total T: 79556 Episode Num: 5796 Episode T: 8 Reward: -1002.113228
action based on polilcy:[-0.9999987  0.9990289]
action based on polilcy:[-0.99998224  0.90351343]
action based on polilcy:[-0.7279864

action based on polilcy:[-0.9999983  0.9986355]
action based on polilcy:[-0.9833543   0.69228935]
action based on polilcy:[-0.694598   0.5938184]
action based on polilcy:[-0.6487689  0.590312 ]
action based on polilcy:[-0.629943   0.5923038]
action based on polilcy:[-0.61413926  0.5285178 ]
action based on polilcy:[-0.55003154  0.54990244]
action based on polilcy:[0.14772119 0.8769043 ]
action based on polilcy:[0.43457356 0.813053  ]
action based on polilcy:[0.41672072 0.81499857]
Total T: 79710 Episode Num: 5812 Episode T: 10 Reward: -765.895955
action based on polilcy:[-0.9999979  0.9988731]
action based on polilcy:[-0.7427691  0.5916413]
action based on polilcy:[-0.63955665  0.70738196]
action based on polilcy:[-0.6159994   0.55375516]
action based on polilcy:[-0.75750715  0.592945  ]
action based on polilcy:[0.07708962 0.82538605]
action based on polilcy:[0.17743818 0.9188354 ]
action based on polilcy:[0.21422705 0.7058109 ]
Total T: 79718 Episode Num: 5813 Episode T: 8 Reward: -67

action based on polilcy:[-0.99999833  0.99767864]
action based on polilcy:[-0.5973258  0.5751157]
action based on polilcy:[-0.59325105  0.5858093 ]
action based on polilcy:[-0.6044197   0.52204895]
action based on polilcy:[-0.5429774   0.58481014]
action based on polilcy:[-0.99999976  0.9999842 ]
action based on polilcy:[-0.41128024  0.50262153]
action based on polilcy:[0.1900639 0.8420664]
action based on polilcy:[0.32260653 0.69865197]
action based on polilcy:[0.19776365 0.9791224 ]
Total T: 79867 Episode Num: 5826 Episode T: 10 Reward: -943.967055
action based on polilcy:[-0.99999833  0.996496  ]
action based on polilcy:[-0.69198644  0.59890884]
action based on polilcy:[-0.82275915  0.81128216]
action based on polilcy:[-0.5911647   0.48192546]
action based on polilcy:[-0.5188108  0.4746694]
action based on polilcy:[-0.7879538  0.7431418]
action based on polilcy:[-0.68657434  0.70746434]
action based on polilcy:[-0.71214616  0.74042165]
action based on polilcy:[0.12741695 0.95506537]

Total T: 80034 Episode Num: 5844 Episode T: 12 Reward: -1388.914607
Total T: 80062 Episode Num: 5845 Episode T: 28 Reward: -3478.301113
Total T: 80107 Episode Num: 5846 Episode T: 45 Reward: -7135.953782
Total T: 80117 Episode Num: 5847 Episode T: 10 Reward: -1616.067633
Total T: 80163 Episode Num: 5848 Episode T: 46 Reward: -7246.785438
Total T: 80171 Episode Num: 5849 Episode T: 8 Reward: -927.344162
Total T: 80189 Episode Num: 5850 Episode T: 18 Reward: -2722.412089
Total T: 80239 Episode Num: 5851 Episode T: 50 Reward: -6939.634773
Total T: 80290 Episode Num: 5852 Episode T: 51 Reward: -7066.426633
Total T: 80334 Episode Num: 5853 Episode T: 44 Reward: -7612.038331
Total T: 80355 Episode Num: 5854 Episode T: 21 Reward: -2729.236054
Total T: 80380 Episode Num: 5855 Episode T: 25 Reward: -3263.018673
Total T: 80409 Episode Num: 5856 Episode T: 29 Reward: -4566.530958
Total T: 80446 Episode Num: 5857 Episode T: 37 Reward: -5135.548997
Total T: 80465 Episode Num: 5858 Episode T: 19 Rew

action based on polilcy:[-0.99999976  0.94122845]
action based on polilcy:[-0.4229183  0.6687683]
action based on polilcy:[-0.89850557  0.6597016 ]
action based on polilcy:[0.01584274 0.6624073 ]
action based on polilcy:[-0.00895387  0.9587645 ]
action based on polilcy:[0.18601106 0.6222451 ]
action based on polilcy:[0.44027838 0.77138484]
Total T: 80657 Episode Num: 5875 Episode T: 7 Reward: -709.343877
action based on polilcy:[-0.9999997  0.9308873]
action based on polilcy:[-0.99650604  0.83549064]
action based on polilcy:[-0.3936085   0.62644506]
action based on polilcy:[-0.5112487  0.5393827]
action based on polilcy:[-0.49062052  0.55241156]
action based on polilcy:[-0.47678632  0.603364  ]
action based on polilcy:[-0.5237939   0.53204644]
action based on polilcy:[-0.41190094  0.59798694]
action based on polilcy:[-1.         0.9379926]
action based on polilcy:[-0.4359701  0.5379896]
action based on polilcy:[-0.59215295  0.7780068 ]
action based on polilcy:[-0.44578174  0.5064017 ]


action based on polilcy:[-1.         0.9272274]
action based on polilcy:[-0.60594857  0.7192486 ]
action based on polilcy:[-0.5291058  0.5297912]
action based on polilcy:[-0.39628768  0.63047683]
action based on polilcy:[-0.5406225  0.5301155]
action based on polilcy:[-0.8908291  0.5820636]
action based on polilcy:[-0.05752207  0.7772906 ]
action based on polilcy:[0.11541717 0.90485317]
action based on polilcy:[-0.44124976  0.9273899 ]
Total T: 80810 Episode Num: 5890 Episode T: 9 Reward: -971.898005
action based on polilcy:[-1.          0.93163145]
action based on polilcy:[-0.42026606  0.6272316 ]
action based on polilcy:[-0.4518056   0.69833434]
action based on polilcy:[-0.7841675   0.64182985]
action based on polilcy:[-0.508672    0.55859023]
action based on polilcy:[-0.20798087  0.5995169 ]
action based on polilcy:[-0.40105352  0.94978315]
action based on polilcy:[0.12113298 0.6197499 ]
action based on polilcy:[0.5802654 0.7958938]
Total T: 80819 Episode Num: 5891 Episode T: 9 Rewa

action based on polilcy:[-1.         0.9295932]
action based on polilcy:[-1.         0.9999486]
action based on polilcy:[-0.98595554  0.48320132]
action based on polilcy:[-1.          0.98931277]
action based on polilcy:[-0.9936219  0.4755336]
action based on polilcy:[-1.          0.94767565]
action based on polilcy:[-0.721421   0.5415616]
action based on polilcy:[-0.9786417  0.6729698]
action based on polilcy:[-1.          0.97769994]
action based on polilcy:[-0.5004132  0.6478392]
action based on polilcy:[0.03112791 0.61801124]
action based on polilcy:[0.27924663 0.7915937 ]
action based on polilcy:[0.58798003 0.73693526]
Total T: 80982 Episode Num: 5906 Episode T: 13 Reward: -1598.226066
action based on polilcy:[-1.         0.9287565]
action based on polilcy:[-0.9847242  0.7048813]
action based on polilcy:[-0.7409246  0.5742661]
action based on polilcy:[-0.00170017  0.6315752 ]
action based on polilcy:[0.16712433 0.7646336 ]
action based on polilcy:[0.11276793 0.8302602 ]
action bas

action based on polilcy:[-1.          0.93102854]
action based on polilcy:[-1.         0.9967177]
action based on polilcy:[-0.9989099   0.66980255]
action based on polilcy:[-0.47356498  0.5992849 ]
action based on polilcy:[-0.791898   0.6248396]
action based on polilcy:[-0.7698234   0.57675517]
action based on polilcy:[-0.974047    0.59666836]
action based on polilcy:[0.05319092 0.5899926 ]
action based on polilcy:[0.07096369 0.60287124]
action based on polilcy:[0.1601955  0.60724926]
Total T: 81136 Episode Num: 5922 Episode T: 10 Reward: -822.639541
action based on polilcy:[-1.         0.9133878]
action based on polilcy:[-0.441436   0.6457093]
action based on polilcy:[-0.99999183  0.73776805]
action based on polilcy:[-0.99823076  0.46682885]
action based on polilcy:[-0.94637525  0.470211  ]
action based on polilcy:[-0.94913304  0.6718021 ]
action based on polilcy:[-0.99991155  0.7198666 ]
action based on polilcy:[-0.7069219  0.5332038]
action based on polilcy:[-0.4817206  0.6102352]
a

action based on polilcy:[-1.          0.90586627]
action based on polilcy:[-0.43579715  0.6099076 ]
action based on polilcy:[-0.54687893  0.5160217 ]
action based on polilcy:[-0.3783772  0.6256577]
action based on polilcy:[-0.22914399  0.6148374 ]
action based on polilcy:[-0.9914073  0.6109496]
action based on polilcy:[-0.2536105  0.5573845]
action based on polilcy:[0.08393797 0.6131953 ]
action based on polilcy:[0.22481668 0.73739415]
Total T: 81308 Episode Num: 5938 Episode T: 9 Reward: -705.064051
action based on polilcy:[-1.          0.90314096]
action based on polilcy:[-1.         0.8759909]
action based on polilcy:[-0.7631992   0.54881155]
action based on polilcy:[-0.00493023  0.6219547 ]
action based on polilcy:[-0.64174855  0.60307264]
action based on polilcy:[-0.5919356   0.59477437]
action based on polilcy:[-0.92706513  0.5455786 ]
action based on polilcy:[-0.9999681  0.7302034]
action based on polilcy:[-0.5899875   0.52046275]
action based on polilcy:[-1.         0.9143782]


action based on polilcy:[-1.          0.89326537]
action based on polilcy:[-0.9999945  0.7546383]
action based on polilcy:[-1.         0.8941416]
action based on polilcy:[-0.41233143  0.6338805 ]
action based on polilcy:[-1.         0.9027964]
action based on polilcy:[-0.8789968  0.6489589]
action based on polilcy:[-1.         0.8121118]
action based on polilcy:[-0.5820841  0.5254921]
action based on polilcy:[-0.5938616   0.50386274]
action based on polilcy:[-0.41843978  0.53499115]
action based on polilcy:[-0.45605287  0.51417154]
action based on polilcy:[-0.9151136   0.54336596]
action based on polilcy:[-0.02937506  0.6853472 ]
action based on polilcy:[0.12697722 0.83704007]
Total T: 81479 Episode Num: 5953 Episode T: 14 Reward: -1318.162929
action based on polilcy:[-1.          0.88911587]
action based on polilcy:[-0.48369053  0.66106206]
action based on polilcy:[-0.601443    0.54892695]
action based on polilcy:[-0.07202867  0.7000822 ]
action based on polilcy:[0.10458712 0.81737816

action based on polilcy:[-1.          0.88802826]
action based on polilcy:[-1.          0.99180716]
action based on polilcy:[-1.          0.92031384]
action based on polilcy:[-0.5914332   0.51505816]
action based on polilcy:[-0.21825393  0.58639383]
action based on polilcy:[0.0363304 0.6131426]
action based on polilcy:[0.18066297 0.655591  ]
action based on polilcy:[0.4764234 0.5781431]
Total T: 81624 Episode Num: 5965 Episode T: 8 Reward: -797.603356
action based on polilcy:[-1.          0.88424414]
action based on polilcy:[-0.99726754  0.44640806]
action based on polilcy:[-1.        0.995366]
action based on polilcy:[-0.99592614  0.43549716]
action based on polilcy:[-0.9999085   0.48020127]
action based on polilcy:[-1.          0.46727872]
action based on polilcy:[-1.          0.74304366]
action based on polilcy:[-1.         0.7032515]
action based on polilcy:[-0.9935712   0.42801294]
action based on polilcy:[-0.9446988  0.6084671]
action based on polilcy:[-0.95028436  0.5676087 ]
ac

action based on polilcy:[-1.          0.87072057]
action based on polilcy:[-0.9593179   0.43641058]
action based on polilcy:[-0.41132534  0.6084598 ]
action based on polilcy:[-1.         0.7749514]
action based on polilcy:[-0.9879714   0.62322396]
action based on polilcy:[-1.          0.76821184]
action based on polilcy:[-0.5733347  0.5145091]
action based on polilcy:[-0.5287243   0.47318324]
action based on polilcy:[0.02523144 0.6170418 ]
action based on polilcy:[-0.8519791   0.50530434]
action based on polilcy:[0.07691129 0.6523752 ]
action based on polilcy:[0.23623796 0.6860343 ]
Total T: 81787 Episode Num: 5977 Episode T: 12 Reward: -1072.046346
action based on polilcy:[-1.          0.85934037]
action based on polilcy:[-0.99999917  0.7560772 ]
action based on polilcy:[-0.5682354   0.46305662]
action based on polilcy:[-0.56884795  0.4737803 ]
action based on polilcy:[-0.6504755   0.55137545]
action based on polilcy:[-0.9910498  0.5058366]
action based on polilcy:[0.06833559 0.599855

action based on polilcy:[-1.          0.85441726]
action based on polilcy:[-0.93123144  0.50913936]
action based on polilcy:[-0.42302153  0.6176517 ]
action based on polilcy:[-0.99902195  0.63040984]
action based on polilcy:[-0.71351016  0.54335487]
action based on polilcy:[-0.78056026  0.58713555]
action based on polilcy:[-0.99999374  0.6521205 ]
action based on polilcy:[0.05082322 0.61333215]
action based on polilcy:[0.15777491 0.5953466 ]
action based on polilcy:[0.54605913 0.76093394]
Total T: 81933 Episode Num: 5991 Episode T: 10 Reward: -808.954886
action based on polilcy:[-1.         0.8628652]
action based on polilcy:[-0.48924825  0.6111312 ]
action based on polilcy:[-0.93213356  0.5788062 ]
action based on polilcy:[-0.60188824  0.4720007 ]
action based on polilcy:[-0.4801633  0.4923809]
action based on polilcy:[0.03441257 0.6157651 ]
action based on polilcy:[0.0627133  0.61682093]
action based on polilcy:[0.21549152 0.6673475 ]
action based on polilcy:[0.29796097 0.54055095]
a

action based on polilcy:[-1.         0.8176241]
action based on polilcy:[-0.9971405   0.42236862]
action based on polilcy:[-1.         0.9340466]
action based on polilcy:[-0.65091205  0.4469925 ]
action based on polilcy:[-0.6423154  0.5079012]
action based on polilcy:[-0.9998937  0.6016443]
action based on polilcy:[-0.99996495  0.64644194]
action based on polilcy:[-0.06691711  0.7132611 ]
action based on polilcy:[-0.96180147  0.5050783 ]
Total T: 82560 Episode Num: 6021 Episode T: 9 Reward: -1157.456064
action based on polilcy:[-1.          0.82625943]
action based on polilcy:[-0.7847525   0.58475256]
action based on polilcy:[-0.99973994  0.59763724]
action based on polilcy:[-0.5523581  0.5008534]
action based on polilcy:[0.02047959 0.70368445]
action based on polilcy:[0.14793026 0.59460795]
action based on polilcy:[0.1984945 0.5113125]
action based on polilcy:[0.4776242 0.5371522]
Total T: 82568 Episode Num: 6022 Episode T: 8 Reward: -572.282593
action based on polilcy:[-1.         0.

action based on polilcy:[-0.99999994  0.8163332 ]
action based on polilcy:[-0.6338073   0.48680854]
action based on polilcy:[-0.7133297   0.52277327]
action based on polilcy:[-0.6444433   0.48338127]
action based on polilcy:[-0.60347694  0.42560634]
action based on polilcy:[-0.03330753  0.5830474 ]
action based on polilcy:[-0.9971413  0.5408886]
action based on polilcy:[0.10044637 0.5643064 ]
action based on polilcy:[-0.73299474  0.5184611 ]
action based on polilcy:[0.09192339 0.5510257 ]
Total T: 82734 Episode Num: 6038 Episode T: 10 Reward: -740.355053
action based on polilcy:[-0.99999994  0.81948555]
action based on polilcy:[-0.7883644  0.6051619]
action based on polilcy:[-0.5490266   0.50655043]
action based on polilcy:[-0.9990363   0.70993185]
action based on polilcy:[-1.         0.7999541]
action based on polilcy:[-0.5599912   0.51672053]
action based on polilcy:[-0.99999905  0.80928254]
action based on polilcy:[-0.99996495  0.57667357]
action based on polilcy:[-0.99999946  0.508

action based on polilcy:[-0.99999994  0.8334579 ]
action based on polilcy:[-0.65415835  0.5476825 ]
action based on polilcy:[-0.5452758   0.58564055]
action based on polilcy:[-0.9999619   0.60556674]
action based on polilcy:[-0.90312076  0.47098726]
action based on polilcy:[-0.99986255  0.59431714]
action based on polilcy:[-0.8797191   0.47221938]
action based on polilcy:[-0.9261247   0.46285382]
action based on polilcy:[-0.9882331   0.44579548]
action based on polilcy:[-0.9995775  0.7210367]
action based on polilcy:[-0.8685854   0.54127574]
action based on polilcy:[-1.        0.770638]
action based on polilcy:[-0.3675018  0.5890616]
action based on polilcy:[-0.999588    0.73794484]
action based on polilcy:[-0.6384851   0.45046532]
action based on polilcy:[-0.6731726   0.57609713]
action based on polilcy:[0.12258872 0.66847456]
Total T: 82896 Episode Num: 6051 Episode T: 17 Reward: -1440.901429
action based on polilcy:[-0.99999994  0.83299077]
action based on polilcy:[-0.6667656  0.487

action based on polilcy:[-0.99999994  0.81577337]
action based on polilcy:[-0.6666628   0.48083648]
action based on polilcy:[-0.6497922   0.47071153]
action based on polilcy:[-0.50399494  0.5481743 ]
action based on polilcy:[-0.14801176  0.6241591 ]
action based on polilcy:[-1.         0.7482228]
action based on polilcy:[-0.44435507  0.5606204 ]
action based on polilcy:[-0.88958406  0.4795581 ]
action based on polilcy:[-0.4032193   0.48261434]
action based on polilcy:[0.03137342 0.557719  ]
action based on polilcy:[0.17386805 0.71514857]
Total T: 83039 Episode Num: 6065 Episode T: 11 Reward: -811.551294
action based on polilcy:[-0.99999994  0.8216875 ]
action based on polilcy:[-0.65921336  0.5081209 ]
action based on polilcy:[-0.6763929   0.43282992]
action based on polilcy:[-0.82914066  0.63580596]
action based on polilcy:[-0.99999934  0.75411564]
action based on polilcy:[-0.51511663  0.45333984]
action based on polilcy:[-0.5695164  0.6074581]
action based on polilcy:[0.13356504 0.579

action based on polilcy:[-0.99999994  0.83167547]
action based on polilcy:[-0.77644265  0.5674947 ]
action based on polilcy:[-0.613042   0.5157348]
action based on polilcy:[-0.80973566  0.4743226 ]
action based on polilcy:[-0.20038226  0.5302813 ]
action based on polilcy:[0.0406129 0.5384649]
action based on polilcy:[0.00718025 0.73008716]
action based on polilcy:[-0.9897168   0.49238506]
action based on polilcy:[0.06769413 0.5370436 ]
Total T: 83200 Episode Num: 6080 Episode T: 9 Reward: -816.306450
action based on polilcy:[-0.99999994  0.820256  ]
action based on polilcy:[-0.6660968   0.48087186]
action based on polilcy:[-1.          0.81630874]
action based on polilcy:[-0.6146629  0.4679484]
action based on polilcy:[-0.6042414  0.4136227]
action based on polilcy:[-1.         0.7761075]
action based on polilcy:[-0.494727   0.5389837]
action based on polilcy:[-1.         0.9318844]
action based on polilcy:[-1.          0.94653505]
action based on polilcy:[-0.9595553  0.6045126]
action

action based on polilcy:[-1.         0.8320943]
action based on polilcy:[-0.71550035  0.5279864 ]
action based on polilcy:[-0.66003716  0.54745835]
action based on polilcy:[-0.99493563  0.6784711 ]
action based on polilcy:[-0.66954494  0.44675538]
action based on polilcy:[-0.53552914  0.55089176]
action based on polilcy:[-0.4845769  0.5387303]
action based on polilcy:[-1.         0.8057879]
action based on polilcy:[-0.68827033  0.41403595]
action based on polilcy:[-1.         0.8339934]
action based on polilcy:[-0.8609631  0.670967 ]
action based on polilcy:[-1.       0.99974]
action based on polilcy:[-0.666154  0.516255]
action based on polilcy:[-0.65354776  0.4585703 ]
action based on polilcy:[-0.23644966  0.529462  ]
Total T: 83375 Episode Num: 6095 Episode T: 15 Reward: -1468.284754
action based on polilcy:[-0.99999994  0.8217331 ]
action based on polilcy:[-0.70165     0.47871384]
action based on polilcy:[-0.9637856  0.5083874]
action based on polilcy:[-0.1589332  0.5313642]
action

action based on polilcy:[-1.          0.82030857]
action based on polilcy:[-1.          0.85289973]
action based on polilcy:[-1.         0.5919256]
action based on polilcy:[-1.         0.6975335]
action based on polilcy:[-0.99986815  0.6219003 ]
action based on polilcy:[-0.69931215  0.47828078]
action based on polilcy:[-0.56033075  0.5504594 ]
action based on polilcy:[-0.5819402   0.55362713]
action based on polilcy:[-0.6339042  0.514187 ]
action based on polilcy:[-0.74263763  0.42312834]
action based on polilcy:[-0.67598504  0.38330123]
action based on polilcy:[-0.9824085  0.5042287]
action based on polilcy:[-0.04298938  0.6369827 ]
action based on polilcy:[0.24189667 0.665547  ]
Total T: 83549 Episode Num: 6109 Episode T: 14 Reward: -1069.883757
action based on polilcy:[-1.          0.81676173]
action based on polilcy:[-0.9999995  0.7609606]
action based on polilcy:[-0.71519166  0.48838222]
action based on polilcy:[-0.68283725  0.44533232]
action based on polilcy:[-0.7107916  0.49937

action based on polilcy:[-1.          0.81050384]
action based on polilcy:[-0.9209604   0.49367523]
action based on polilcy:[-0.68015367  0.47905827]
action based on polilcy:[-0.99998164  0.68145114]
action based on polilcy:[-0.7152105   0.35079414]
action based on polilcy:[-0.61150014  0.4688949 ]
action based on polilcy:[-0.9998507   0.49163377]
action based on polilcy:[-0.6525311   0.42081112]
action based on polilcy:[-0.97604275  0.4417912 ]
action based on polilcy:[-0.5457611   0.51550555]
action based on polilcy:[-0.4693006   0.60277253]
action based on polilcy:[-0.6463347  0.4847168]
action based on polilcy:[-0.8498777   0.48702353]
action based on polilcy:[-0.5477454  0.5894315]
action based on polilcy:[-0.73620963  0.6325549 ]
Total T: 83711 Episode Num: 6122 Episode T: 15 Reward: -1115.774494
action based on polilcy:[-1.         0.8003596]
action based on polilcy:[-0.99898225  0.6711421 ]
action based on polilcy:[-0.6076876  0.5659597]
action based on polilcy:[-0.99913377  0.

action based on polilcy:[-1.        0.804037]
action based on polilcy:[-0.87034637  0.59191084]
action based on polilcy:[-0.697557   0.5308927]
action based on polilcy:[-0.8813979  0.5871104]
action based on polilcy:[-0.60401595  0.47813404]
action based on polilcy:[-0.72714365  0.3417943 ]
action based on polilcy:[-0.71924186  0.34792757]
action based on polilcy:[-0.27228934  0.43595415]
action based on polilcy:[-0.99999785  0.5322066 ]
action based on polilcy:[-0.54779905  0.5749373 ]
action based on polilcy:[-0.99780136  0.68130517]
action based on polilcy:[-0.60879934  0.55343753]
action based on polilcy:[-0.66144025  0.53139913]
action based on polilcy:[-0.74697626  0.5264853 ]
action based on polilcy:[-0.8442112  0.6383977]
action based on polilcy:[-0.6346904   0.45284426]
action based on polilcy:[-0.72597396  0.42263356]
Total T: 83868 Episode Num: 6134 Episode T: 17 Reward: -1313.079343
action based on polilcy:[-1.          0.79705775]
action based on polilcy:[-0.691453    0.48

Total T: 84047 Episode Num: 6145 Episode T: 46 Reward: -7319.189240
Total T: 84061 Episode Num: 6146 Episode T: 14 Reward: -1825.510994
Total T: 84070 Episode Num: 6147 Episode T: 9 Reward: -1182.042260
Total T: 84115 Episode Num: 6148 Episode T: 45 Reward: -7638.093607
Total T: 84143 Episode Num: 6149 Episode T: 28 Reward: -2650.664089
Total T: 84170 Episode Num: 6150 Episode T: 27 Reward: -3100.720463
Total T: 84180 Episode Num: 6151 Episode T: 10 Reward: -1494.113069
Total T: 84194 Episode Num: 6152 Episode T: 14 Reward: -1553.925367
Total T: 84242 Episode Num: 6153 Episode T: 48 Reward: -6971.688271
Total T: 84251 Episode Num: 6154 Episode T: 9 Reward: -1100.849977
Total T: 84270 Episode Num: 6155 Episode T: 19 Reward: -2825.402901
Total T: 84284 Episode Num: 6156 Episode T: 14 Reward: -2058.757606
Total T: 84296 Episode Num: 6157 Episode T: 12 Reward: -1570.211465
Total T: 84349 Episode Num: 6158 Episode T: 53 Reward: -6994.984818
Total T: 84393 Episode Num: 6159 Episode T: 44 Rew

action based on polilcy:[-1.         0.5613551]
action based on polilcy:[-0.99996203  0.48093098]
action based on polilcy:[-0.9988319   0.43892524]
action based on polilcy:[-0.9999759   0.46427336]
action based on polilcy:[-0.9999979   0.48405084]
action based on polilcy:[-1.         0.5823598]
action based on polilcy:[-0.99997216  0.47993734]
action based on polilcy:[-1.         0.6201982]
action based on polilcy:[-0.77956015  0.4461899 ]
action based on polilcy:[-1.         0.6603365]
action based on polilcy:[-0.99815726  0.5037652 ]
action based on polilcy:[-0.7809199   0.28741828]
action based on polilcy:[-0.52681786  0.374529  ]
action based on polilcy:[-0.29218394  0.42070934]
action based on polilcy:[-0.9999991   0.46631327]
action based on polilcy:[-0.52529275  0.3377404 ]
Total T: 84655 Episode Num: 6176 Episode T: 16 Reward: -1377.419970
action based on polilcy:[-1.         0.5811173]
action based on polilcy:[-0.77923226  0.42726207]
action based on polilcy:[-1.         0.673

action based on polilcy:[-1.          0.55626464]
action based on polilcy:[-0.85555434  0.44257838]
action based on polilcy:[-0.75540066  0.46101794]
action based on polilcy:[-0.7765009   0.47451568]
action based on polilcy:[-0.6542915   0.46398118]
action based on polilcy:[-0.99999887  0.6199596 ]
action based on polilcy:[-0.6371752   0.44603917]
action based on polilcy:[-1.         0.5813854]
action based on polilcy:[-0.6652378  0.3872764]
action based on polilcy:[-1.          0.77391267]
action based on polilcy:[-0.6092123   0.46455437]
action based on polilcy:[-0.9999935   0.60193074]
action based on polilcy:[-0.7225162   0.41468814]
action based on polilcy:[-0.9352942   0.47733855]
action based on polilcy:[-0.9013225   0.33487603]
Total T: 84811 Episode Num: 6188 Episode T: 15 Reward: -1402.304584
action based on polilcy:[-1.         0.5624666]
action based on polilcy:[-0.90144753  0.4637794 ]
action based on polilcy:[-0.6551915  0.4270279]
action based on polilcy:[-0.8820145  0.5

action based on polilcy:[-1.          0.56278664]
action based on polilcy:[-0.99999744  0.46192643]
action based on polilcy:[-0.9999999   0.44979867]
action based on polilcy:[-1.         0.7773207]
action based on polilcy:[-0.9999841   0.39848638]
action based on polilcy:[-0.9122535   0.36239588]
action based on polilcy:[-0.7908573   0.43173513]
action based on polilcy:[-0.73032284  0.41296256]
action based on polilcy:[-0.78236324  0.27170736]
action based on polilcy:[-0.7545023  0.3017057]
action based on polilcy:[-0.62981284  0.5109604 ]
action based on polilcy:[-0.7704218   0.28867924]
action based on polilcy:[0.11818143 0.61056674]
action based on polilcy:[0.5423299 0.5842444]
Total T: 84984 Episode Num: 6202 Episode T: 14 Reward: -1002.097396
action based on polilcy:[-1.        0.554857]
action based on polilcy:[-0.99999225  0.43228438]
action based on polilcy:[-1.          0.47565034]
action based on polilcy:[-1.          0.46593893]
action based on polilcy:[-1.         0.4598312

action based on polilcy:[-1.          0.57893467]
action based on polilcy:[-1.         0.9942917]
action based on polilcy:[-0.99999756  0.4237847 ]
action based on polilcy:[-0.99999917  0.4346363 ]
action based on polilcy:[-0.9999626   0.36084443]
action based on polilcy:[-0.9999834   0.60024065]
action based on polilcy:[-0.99971354  0.53277075]
action based on polilcy:[-0.82054996  0.5868293 ]
action based on polilcy:[-1.         0.7401166]
action based on polilcy:[-0.64095914  0.5133077 ]
action based on polilcy:[-0.6774384   0.36893395]
action based on polilcy:[-0.7769326   0.24998024]
action based on polilcy:[-0.7419788   0.28795198]
action based on polilcy:[-0.745591   0.3051981]
action based on polilcy:[0.23342635 0.60593957]
Total T: 85136 Episode Num: 6213 Episode T: 15 Reward: -1464.066359
action based on polilcy:[-1.         0.6054858]
action based on polilcy:[-0.9999271   0.43009037]
action based on polilcy:[-0.99999785  0.46246836]
action based on polilcy:[-0.99998546  0.42

action based on polilcy:[-1.         0.5808599]
action based on polilcy:[-0.9545373   0.34906185]
action based on polilcy:[-0.991965    0.40537336]
action based on polilcy:[-1.          0.72948456]
action based on polilcy:[-0.999821    0.41228274]
action based on polilcy:[-0.9999988   0.47038513]
action based on polilcy:[-0.99999803  0.4271494 ]
action based on polilcy:[-0.9999321   0.40199837]
action based on polilcy:[-0.9999097  0.5774597]
action based on polilcy:[-0.74436164  0.38635013]
action based on polilcy:[-0.63177764  0.49666014]
action based on polilcy:[-0.69955325  0.36191225]
action based on polilcy:[-0.6620543  0.3820502]
action based on polilcy:[-0.65499914  0.45995608]
action based on polilcy:[-0.5976439  0.4845455]
action based on polilcy:[-0.89510006  0.39857167]
action based on polilcy:[-0.7952712   0.36056155]
action based on polilcy:[-0.7621019   0.33086738]
action based on polilcy:[-0.9019879   0.34237164]
action based on polilcy:[-0.6707866  0.2735168]
action bas

action based on polilcy:[-1.         0.5882244]
action based on polilcy:[-0.9620268  0.3400694]
action based on polilcy:[-0.815652    0.42962316]
action based on polilcy:[-1.         0.6896274]
action based on polilcy:[-0.97842956  0.35937202]
action based on polilcy:[-0.77555263  0.42972475]
action based on polilcy:[-0.77696955  0.28309435]
action based on polilcy:[-0.575076   0.4546452]
action based on polilcy:[-0.998852   0.4031526]
action based on polilcy:[-0.76762444  0.27068022]
action based on polilcy:[-0.99999267  0.60241497]
action based on polilcy:[-0.9972549  0.4855557]
action based on polilcy:[-0.73085225  0.33259672]
action based on polilcy:[-0.6582527   0.38464817]
Total T: 85443 Episode Num: 6234 Episode T: 14 Reward: -1106.680843
action based on polilcy:[-1.          0.60390455]
action based on polilcy:[-0.9999913   0.43641764]
action based on polilcy:[-0.9999687  0.3817776]
action based on polilcy:[-0.99417675  0.3717961 ]
action based on polilcy:[-0.8065572   0.380379

action based on polilcy:[-0.99999994  0.5913756 ]
action based on polilcy:[-0.99997467  0.40848392]
action based on polilcy:[-0.9996742   0.35556096]
action based on polilcy:[-0.8735132  0.3507664]
action based on polilcy:[-0.7263049   0.42083716]
action based on polilcy:[-0.7775809  0.4151584]
action based on polilcy:[-0.7252523   0.36121964]
action based on polilcy:[-0.7070934   0.33879513]
action based on polilcy:[-0.73967445  0.37028676]
action based on polilcy:[-0.8408996   0.41310135]
action based on polilcy:[-0.02078165  0.40874934]
Total T: 85603 Episode Num: 6245 Episode T: 11 Reward: -660.433726
action based on polilcy:[-0.99999994  0.60669637]
action based on polilcy:[-0.9498596   0.33613527]
action based on polilcy:[-0.6939459   0.37389952]
action based on polilcy:[-0.6063775   0.46368697]
action based on polilcy:[-0.8509731   0.55432606]
action based on polilcy:[-0.70433855  0.44085035]
action based on polilcy:[-1.         0.7180892]
action based on polilcy:[-0.686131    0

action based on polilcy:[0.2706295 0.4857327]
Total T: 85743 Episode Num: 6257 Episode T: 9 Reward: -685.248259
action based on polilcy:[-0.9999999   0.60309196]
action based on polilcy:[-0.97073865  0.32038194]
action based on polilcy:[-0.8088373   0.37761226]
action based on polilcy:[-0.7355491  0.3094151]
action based on polilcy:[-0.7007133   0.34533054]
action based on polilcy:[-0.6731976  0.5102301]
action based on polilcy:[-0.90411043  0.38010415]
action based on polilcy:[-0.7894699   0.24117778]
action based on polilcy:[-0.3994129   0.31278116]
action based on polilcy:[0.2558766  0.43643007]
action based on polilcy:[0.42698243 0.67464197]
Total T: 85754 Episode Num: 6258 Episode T: 11 Reward: -760.446632
action based on polilcy:[-0.9999999   0.59260607]
action based on polilcy:[-0.9999684   0.39261696]
action based on polilcy:[-1.         0.6363586]
action based on polilcy:[-1.         0.5525968]
action based on polilcy:[-1.         0.7620286]
action based on polilcy:[-0.9999999

action based on polilcy:[-0.99999976  0.5962995 ]
action based on polilcy:[-0.99999994  0.63341856]
action based on polilcy:[-0.7835664  0.3144178]
action based on polilcy:[-0.8878574   0.23687655]
action based on polilcy:[-0.5493775   0.27184683]
action based on polilcy:[-0.9351777  0.3587003]
action based on polilcy:[-0.94509095  0.2829281 ]
action based on polilcy:[-0.8079331   0.23250163]
Total T: 85907 Episode Num: 6268 Episode T: 8 Reward: -709.736547
action based on polilcy:[-0.99999976  0.6040302 ]
action based on polilcy:[-0.99999285  0.43797255]
action based on polilcy:[-1.          0.45894718]
action based on polilcy:[-0.99999684  0.37086275]
action based on polilcy:[-1.         0.8622407]
action based on polilcy:[-1.          0.62432516]
action based on polilcy:[-0.9999261  0.5136745]
action based on polilcy:[-0.99997365  0.38356283]
action based on polilcy:[-0.9999307  0.3752149]
action based on polilcy:[-0.9929553   0.42819476]
action based on polilcy:[-0.86411434  0.4061

action based on polilcy:[-0.9999871  0.6832838]
action based on polilcy:[-0.8897359   0.33600068]
action based on polilcy:[-0.99997026  0.7292861 ]
action based on polilcy:[-1.         0.8593396]
action based on polilcy:[-0.91353345  0.34017852]
action based on polilcy:[-0.78846264  0.40575114]
action based on polilcy:[-0.8037478  0.3586706]
action based on polilcy:[-0.9746161   0.55644953]
action based on polilcy:[-0.7551054  0.3796306]
action based on polilcy:[-0.78796345  0.50559974]
action based on polilcy:[-0.8049919   0.35414457]
action based on polilcy:[-0.85755104  0.34479353]
action based on polilcy:[-0.8270643  0.2226646]
action based on polilcy:[-0.99878675  0.39320764]
action based on polilcy:[-0.9997513   0.38699397]
action based on polilcy:[-0.84608996  0.4212196 ]
action based on polilcy:[-0.85601044  0.34229934]
action based on polilcy:[-0.6851498   0.36514273]
Total T: 86570 Episode Num: 6295 Episode T: 18 Reward: -1454.662269
action based on polilcy:[-0.9999863  0.697

action based on polilcy:[-0.9992614   0.72176015]
action based on polilcy:[-0.8923423   0.56956685]
action based on polilcy:[-0.9989907  0.7034191]
action based on polilcy:[-0.7855455   0.33745852]
action based on polilcy:[-0.9958789  0.5617842]
action based on polilcy:[-0.88479304  0.29924828]
action based on polilcy:[-0.9556051  0.3168939]
action based on polilcy:[-0.8088337   0.23738669]
action based on polilcy:[-0.69486547  0.2148259 ]
action based on polilcy:[-0.6529641   0.23231217]
action based on polilcy:[-0.7663584   0.23186532]
Total T: 86717 Episode Num: 6305 Episode T: 11 Reward: -767.897745
action based on polilcy:[-0.9984496  0.7114082]
action based on polilcy:[-1.          0.89317894]
action based on polilcy:[-0.9999453   0.42633513]
action based on polilcy:[-0.99882483  0.38686776]
action based on polilcy:[-0.99437153  0.4059918 ]
action based on polilcy:[-0.9999982   0.55934954]
action based on polilcy:[-0.9975672  0.5999193]
action based on polilcy:[-0.99999523  0.749

action based on polilcy:[-0.9872746  0.7271659]
action based on polilcy:[-0.81092256  0.59389853]
action based on polilcy:[-0.87164974  0.5640023 ]
action based on polilcy:[-0.6704423   0.44823682]
action based on polilcy:[-0.8499753   0.46138674]
action based on polilcy:[-0.6975986   0.48427162]
action based on polilcy:[-0.8510866  0.3365699]
action based on polilcy:[-0.7240006   0.38017648]
action based on polilcy:[-0.8992652  0.2931385]
action based on polilcy:[-0.7985789   0.25777102]
action based on polilcy:[-0.7466475   0.36245328]
Total T: 86901 Episode Num: 6317 Episode T: 11 Reward: -843.774781
action based on polilcy:[-0.98544085  0.7193321 ]
action based on polilcy:[-0.7936395   0.36939207]
action based on polilcy:[-0.94568205  0.42843226]
action based on polilcy:[-1.          0.81552887]
action based on polilcy:[-0.68145376  0.43452418]
action based on polilcy:[-0.8844465   0.53007686]
action based on polilcy:[-0.79728204  0.2945656 ]
action based on polilcy:[-0.84200466  0

action based on polilcy:[-0.98639107  0.72508734]
action based on polilcy:[-0.78755075  0.4186488 ]
action based on polilcy:[-0.67521894  0.46112195]
action based on polilcy:[-0.7843568  0.617931 ]
action based on polilcy:[-0.86631244  0.6150043 ]
action based on polilcy:[-1.         0.7794958]
action based on polilcy:[-0.7967692   0.48625022]
action based on polilcy:[-0.8408786   0.41594213]
action based on polilcy:[-0.77856076  0.42363665]
action based on polilcy:[-0.84299165  0.48854718]
action based on polilcy:[-0.72177243  0.47392288]
action based on polilcy:[-0.80706596  0.3080009 ]
action based on polilcy:[-0.99996203  0.46784785]
action based on polilcy:[-0.9941002   0.36822507]
action based on polilcy:[-0.78953123  0.26242185]
action based on polilcy:[-0.9414684   0.39995772]
action based on polilcy:[-0.72990525  0.35139406]
action based on polilcy:[-0.8043306   0.24714181]
Total T: 87055 Episode Num: 6331 Episode T: 18 Reward: -1339.667531
action based on polilcy:[-0.9920634 

action based on polilcy:[-0.9819594   0.72232026]
action based on polilcy:[-0.73491335  0.41772676]
action based on polilcy:[-0.99999994  0.75772166]
action based on polilcy:[-0.99975455  0.6520383 ]
action based on polilcy:[-0.69724184  0.39531004]
action based on polilcy:[-0.89595616  0.37105536]
action based on polilcy:[-0.7491703   0.37306744]
action based on polilcy:[-0.98329043  0.46030727]
action based on polilcy:[-0.8063255   0.23952053]
action based on polilcy:[-0.93585235  0.30708194]
action based on polilcy:[-0.64153063  0.23213021]
action based on polilcy:[0.23177391 0.43147472]
Total T: 87210 Episode Num: 6342 Episode T: 12 Reward: -1048.384994
action based on polilcy:[-0.97890466  0.7189019 ]
action based on polilcy:[-0.99993336  0.6531446 ]
action based on polilcy:[-0.9996797   0.62862486]
action based on polilcy:[-0.99897575  0.66972953]
action based on polilcy:[-0.72711384  0.41052502]
action based on polilcy:[-0.6817722   0.43491927]
action based on polilcy:[-0.981136

action based on polilcy:[-0.9516246  0.7114916]
action based on polilcy:[-0.999968    0.70499593]
action based on polilcy:[-0.7383146   0.31552687]
action based on polilcy:[-0.6189337   0.45120457]
action based on polilcy:[-0.84842783  0.5507587 ]
action based on polilcy:[-0.6652602   0.39446297]
action based on polilcy:[-0.999735   0.5893157]
action based on polilcy:[-0.7652215   0.27080688]
action based on polilcy:[-0.75085574  0.28111586]
action based on polilcy:[-0.99949336  0.38408646]
action based on polilcy:[-0.67344207  0.38243696]
action based on polilcy:[-0.94753313  0.31358153]
Total T: 87388 Episode Num: 6357 Episode T: 12 Reward: -1023.193192
action based on polilcy:[-0.94038916  0.7044284 ]
action based on polilcy:[-0.79692507  0.46797794]
action based on polilcy:[-0.6310607   0.44446468]
action based on polilcy:[-0.6919682  0.472119 ]
action based on polilcy:[-0.69567263  0.46575263]
action based on polilcy:[-0.9999249  0.7806274]
action based on polilcy:[-0.9999969  0.7

action based on polilcy:[-0.95467305  0.7052094 ]
action based on polilcy:[-0.6652681   0.45641732]
action based on polilcy:[-0.704985    0.37000683]
action based on polilcy:[-0.76499355  0.47673467]
action based on polilcy:[-0.79353684  0.5501566 ]
action based on polilcy:[-0.8755426  0.5626652]
action based on polilcy:[-0.8803369   0.43689388]
action based on polilcy:[-0.7971355   0.23994634]
action based on polilcy:[-0.66503537  0.21645829]
action based on polilcy:[-0.853775    0.29297137]
action based on polilcy:[-0.71309674  0.40636337]
action based on polilcy:[-0.8606778   0.37568918]
action based on polilcy:[-0.66532683  0.41379148]
action based on polilcy:[-0.84636056  0.4507927 ]
action based on polilcy:[-0.7840507   0.25343063]
action based on polilcy:[-0.78289574  0.24694909]
Total T: 87561 Episode Num: 6366 Episode T: 16 Reward: -953.459658
action based on polilcy:[-0.971887   0.7020807]
action based on polilcy:[-0.67025083  0.4052794 ]
action based on polilcy:[-0.9624296  

action based on polilcy:[-0.9312109  0.6776793]
action based on polilcy:[-0.7535786   0.45716253]
action based on polilcy:[-0.8532529   0.42319497]
action based on polilcy:[-0.73084     0.29195213]
action based on polilcy:[-0.9999433   0.45094964]
action based on polilcy:[-0.7310567   0.27391708]
action based on polilcy:[-0.8523733   0.46266335]
action based on polilcy:[-0.9076914   0.28584626]
action based on polilcy:[-0.6970737   0.31625333]
action based on polilcy:[-0.6981424  0.3156568]
action based on polilcy:[-0.93217295  0.5327507 ]
action based on polilcy:[-0.697445    0.33964851]
action based on polilcy:[-0.6589787   0.42081782]
Total T: 87716 Episode Num: 6377 Episode T: 13 Reward: -1158.061858
action based on polilcy:[-0.94698274  0.6916546 ]
action based on polilcy:[-0.6923777  0.513728 ]
action based on polilcy:[-0.7448704  0.4069527]
action based on polilcy:[-0.88602376  0.5926579 ]
action based on polilcy:[-0.7512289  0.4477973]
action based on polilcy:[-0.8719732   0.57

action based on polilcy:[-0.7683735   0.26839074]
action based on polilcy:[-0.9885174  0.4216387]
action based on polilcy:[-0.66018045  0.42569134]
action based on polilcy:[-0.7232559   0.41912732]
action based on polilcy:[-0.7510423  0.2875317]
action based on polilcy:[-0.9959763   0.41524145]
action based on polilcy:[-0.92712295  0.30622542]
action based on polilcy:[-0.7488723   0.23875876]
action based on polilcy:[-0.17609254  0.38846043]
Total T: 87883 Episode Num: 6389 Episode T: 14 Reward: -1229.088311
action based on polilcy:[-0.9680887  0.6846184]
action based on polilcy:[-0.995976    0.60516727]
action based on polilcy:[-0.93053746  0.45301566]
action based on polilcy:[-0.94124377  0.41784343]
action based on polilcy:[-0.996985    0.45204532]
action based on polilcy:[-0.9998976   0.44469586]
action based on polilcy:[-0.9999981  0.4692416]
action based on polilcy:[-0.9999993  0.418604 ]
action based on polilcy:[-0.9897403   0.37033817]
action based on polilcy:[-0.8663555   0.39

action based on polilcy:[-0.96877646  0.65195465]
action based on polilcy:[-0.63628703  0.46000573]
action based on polilcy:[-0.81911397  0.5138659 ]
action based on polilcy:[-0.6766839   0.33012274]
action based on polilcy:[-0.77402556  0.51558846]
action based on polilcy:[-0.67685854  0.4866845 ]
action based on polilcy:[-0.85944825  0.455613  ]
action based on polilcy:[-0.999999   0.6575233]
action based on polilcy:[-0.99831617  0.4961632 ]
action based on polilcy:[-0.9820579   0.39070138]
action based on polilcy:[-0.9689375   0.43844056]
action based on polilcy:[-0.7659261   0.50203454]
action based on polilcy:[-0.8130965   0.58469933]
action based on polilcy:[-0.99950427  0.9723566 ]
action based on polilcy:[-0.6450361  0.4853815]
action based on polilcy:[-0.84732443  0.52343875]
action based on polilcy:[-0.50747645  0.45430723]
action based on polilcy:[-0.24544683  0.49515164]
action based on polilcy:[0.0298136 0.499611 ]
Total T: 88528 Episode Num: 6415 Episode T: 19 Reward: -15

action based on polilcy:[-0.9507696   0.64416707]
action based on polilcy:[-0.99827653  0.60445803]
action based on polilcy:[-0.64332426  0.31424403]
action based on polilcy:[-0.66612077  0.23895259]
action based on polilcy:[-0.64562976  0.2983464 ]
action based on polilcy:[-0.7725134   0.43625423]
action based on polilcy:[-0.67227364  0.34407043]
action based on polilcy:[-0.67591596  0.23770286]
action based on polilcy:[0.01765846 0.5031625 ]
action based on polilcy:[0.6354537 0.5437105]
Total T: 88693 Episode Num: 6424 Episode T: 10 Reward: -769.024398
action based on polilcy:[-0.9527619  0.6502143]
action based on polilcy:[-0.6476221   0.35820812]
action based on polilcy:[-0.9792007   0.38083938]
action based on polilcy:[-0.692191   0.2859533]
action based on polilcy:[0.3732082 0.581404 ]
action based on polilcy:[0.523875   0.64118785]
action based on polilcy:[-0.91202116  0.38216105]
action based on polilcy:[-0.65558875  0.29030925]
Total T: 88701 Episode Num: 6425 Episode T: 8 Rew

action based on polilcy:[-0.9535346   0.63987803]
action based on polilcy:[-0.9721722   0.57310057]
action based on polilcy:[-0.6357502   0.30716917]
action based on polilcy:[-0.70555985  0.24563178]
action based on polilcy:[-0.6362746   0.34473675]
action based on polilcy:[-0.85210395  0.47128034]
action based on polilcy:[-0.63300836  0.34136686]
action based on polilcy:[-0.60675776  0.3725423 ]
action based on polilcy:[-0.66079223  0.26774597]
action based on polilcy:[-0.28241086  0.32620376]
action based on polilcy:[-0.61183417  0.3224743 ]
action based on polilcy:[-0.21448058  0.44525167]
Total T: 88847 Episode Num: 6437 Episode T: 12 Reward: -799.442986
action based on polilcy:[-0.9506932  0.6375113]
action based on polilcy:[-0.601653   0.3692258]
action based on polilcy:[-0.599082  0.448355]
action based on polilcy:[-0.9150542   0.45888373]
action based on polilcy:[-0.670514    0.36604828]
action based on polilcy:[-0.95220125  0.3428547 ]
action based on polilcy:[-0.7121409  0.41

action based on polilcy:[-0.9423526  0.637539 ]
action based on polilcy:[-0.8811059  0.5534986]
action based on polilcy:[-0.66067433  0.55626583]
action based on polilcy:[-0.785099    0.48354277]
action based on polilcy:[-0.941664    0.51460737]
action based on polilcy:[-0.6439535   0.44683594]
action based on polilcy:[-0.9999634   0.63234246]
action based on polilcy:[-0.6416588   0.27370346]
action based on polilcy:[-0.6498618   0.42425457]
action based on polilcy:[-0.688959    0.42345354]
---------------------------------------
Episode_num: 6450, Evaluation over 1 episodes: -834.169956
---------------------------------------
action based on polilcy:[-0.78720444  0.408899  ]
Total T: 89001 Episode Num: 6450 Episode T: 11 Reward: -914.690031
action based on polilcy:[-0.9370766  0.6365174]
action based on polilcy:[-0.5490989   0.37301388]
action based on polilcy:[-0.99325055  0.42033345]
action based on polilcy:[-0.84757125  0.373018  ]
action based on polilcy:[-0.5771253  0.3482781]
ac

action based on polilcy:[-0.9151306  0.6373619]
action based on polilcy:[-0.57540274  0.4023852 ]
action based on polilcy:[-0.5085627  0.5071647]
action based on polilcy:[-0.708073    0.44971636]
action based on polilcy:[-0.6718545   0.34631714]
action based on polilcy:[-0.60767174  0.3117373 ]
action based on polilcy:[-0.5707356   0.24479416]
action based on polilcy:[-0.6447606   0.37359586]
action based on polilcy:[-0.6205466   0.29572305]
action based on polilcy:[-0.29407197  0.32087338]
action based on polilcy:[-0.73956794  0.3216671 ]
action based on polilcy:[-0.6297071  0.3567135]
action based on polilcy:[-0.7906088   0.29063755]
action based on polilcy:[-0.6205176   0.32102352]
action based on polilcy:[-0.6224301   0.27703276]
Total T: 89165 Episode Num: 6463 Episode T: 15 Reward: -838.495202
action based on polilcy:[-0.9195494  0.6493832]
action based on polilcy:[-0.57008016  0.5162229 ]
action based on polilcy:[-0.7481179   0.45770708]
action based on polilcy:[-0.5935257   0.3

action based on polilcy:[-0.78938526  0.6780697 ]
action based on polilcy:[-0.73559415  0.5762987 ]
action based on polilcy:[-0.5035671  0.3624756]
action based on polilcy:[-0.65510046  0.4607914 ]
action based on polilcy:[-0.8152212   0.42979553]
action based on polilcy:[-0.99842393  0.49027792]
action based on polilcy:[-0.5907116   0.37156132]
action based on polilcy:[-0.7802548  0.2871551]
action based on polilcy:[-0.6614088  0.4216149]
action based on polilcy:[-0.58432925  0.40497184]
action based on polilcy:[-0.69131136  0.32685953]
action based on polilcy:[-0.5797061   0.26929796]
action based on polilcy:[-0.9969291  0.5618775]
action based on polilcy:[-0.59902626  0.395776  ]
action based on polilcy:[-0.992316    0.57300556]
Total T: 89334 Episode Num: 6476 Episode T: 15 Reward: -1325.947458
action based on polilcy:[-0.8238057   0.66552603]
action based on polilcy:[-0.6508684  0.580572 ]
action based on polilcy:[-0.9999575  0.7114217]
action based on polilcy:[-0.5597267   0.4880

action based on polilcy:[-0.8148076   0.68450546]
action based on polilcy:[-0.49088272  0.48796782]
action based on polilcy:[-0.6631105   0.48762596]
action based on polilcy:[-0.56126887  0.39315176]
action based on polilcy:[-0.5793633   0.41718134]
action based on polilcy:[-0.7647042   0.56215215]
action based on polilcy:[-0.59557104  0.30585927]
action based on polilcy:[-0.9621094   0.40585572]
action based on polilcy:[-0.87199295  0.34913573]
action based on polilcy:[-0.61595136  0.28402424]
action based on polilcy:[0.5996226  0.58270806]
action based on polilcy:[0.8385702  0.71347386]
Total T: 89496 Episode Num: 6487 Episode T: 12 Reward: -1085.850969
action based on polilcy:[-0.77206695  0.67668915]
action based on polilcy:[-0.53803885  0.40525824]
action based on polilcy:[-0.8731679   0.37613806]
action based on polilcy:[-0.9608884   0.43720207]
action based on polilcy:[-0.99076307  0.5008018 ]
action based on polilcy:[-0.5969982   0.26756236]
action based on polilcy:[-0.4838639 

action based on polilcy:[-0.830066    0.67942756]
action based on polilcy:[-0.5429032   0.39403448]
action based on polilcy:[-0.6945956  0.4212113]
action based on polilcy:[-0.6364852   0.27503383]
action based on polilcy:[-0.8282433   0.36398315]
action based on polilcy:[-0.66111165  0.40463454]
action based on polilcy:[-0.6747919   0.48481578]
action based on polilcy:[-0.7797782   0.62287617]
action based on polilcy:[-0.78440547  0.53913665]
action based on polilcy:[-0.7501246  0.3238037]
action based on polilcy:[-0.6243979  0.3516531]
action based on polilcy:[-0.6318951  0.4267266]
action based on polilcy:[-0.93840355  0.47983682]
action based on polilcy:[-0.76487416  0.28929082]
action based on polilcy:[0.4144586 0.6040831]
Total T: 89654 Episode Num: 6499 Episode T: 15 Reward: -1261.362754
action based on polilcy:[-0.77653944  0.68651456]
action based on polilcy:[-0.53233457  0.3896596 ]
action based on polilcy:[-0.8637628   0.38065696]
action based on polilcy:[-0.9945122  0.68692

action based on polilcy:[-0.83012867  0.6885909 ]
action based on polilcy:[-0.5494478   0.39043456]
action based on polilcy:[-0.86099035  0.7192168 ]
action based on polilcy:[-0.69656545  0.5256541 ]
action based on polilcy:[-0.8372898   0.55592406]
action based on polilcy:[-0.79421103  0.3166922 ]
action based on polilcy:[0.4939359  0.58736086]
action based on polilcy:[-0.66941524  0.4641502 ]
action based on polilcy:[0.9392118  0.80737275]
Total T: 89801 Episode Num: 6511 Episode T: 9 Reward: -1143.640108
action based on polilcy:[-0.8052043  0.6901151]
action based on polilcy:[-0.63141114  0.4667414 ]
action based on polilcy:[-0.5915615  0.3505471]
action based on polilcy:[-0.66084075  0.2783636 ]
action based on polilcy:[-0.8378307  0.4218138]
action based on polilcy:[0.8676024  0.72095495]
action based on polilcy:[0.95039   0.6301596]
Total T: 89808 Episode Num: 6512 Episode T: 7 Reward: -756.195857
action based on polilcy:[-0.78401357  0.68592095]
action based on polilcy:[-0.53926

action based on polilcy:[-0.8036918  0.6842557]
action based on polilcy:[-0.7660584  0.5683626]
action based on polilcy:[-0.5998033   0.39248905]
action based on polilcy:[-0.6225885   0.30235407]
action based on polilcy:[-0.62616026  0.34091368]
action based on polilcy:[-0.9843768   0.44166806]
action based on polilcy:[-0.66815865  0.4540949 ]
action based on polilcy:[-0.9994935  0.8026911]
action based on polilcy:[-0.64471024  0.4454814 ]
action based on polilcy:[-0.6466355  0.6081563]
action based on polilcy:[-0.7146198  0.5495134]
action based on polilcy:[-0.6580785   0.42679924]
action based on polilcy:[-0.63371074  0.36910585]
action based on polilcy:[-0.6534698   0.45593667]
action based on polilcy:[-0.75212365  0.4838283 ]
action based on polilcy:[-0.6999416   0.47510895]
action based on polilcy:[-0.8289436  0.5486542]
action based on polilcy:[-0.6401396   0.34901097]
action based on polilcy:[-0.6519121   0.31190276]
action based on polilcy:[-0.7672532   0.27738437]
action based

action based on polilcy:[-0.6807732   0.72051716]
action based on polilcy:[-0.67108035  0.9920083 ]
action based on polilcy:[-1.          0.43934816]
action based on polilcy:[-0.9618376   0.44615087]
action based on polilcy:[-0.970907    0.39265776]
action based on polilcy:[-0.93597573  0.53615916]
action based on polilcy:[-0.8708697   0.38815162]
action based on polilcy:[-0.8386662   0.69122887]
action based on polilcy:[-0.9628633   0.45430934]
action based on polilcy:[-0.9846119   0.47065437]
action based on polilcy:[-0.99831486  0.5501138 ]
action based on polilcy:[-0.99460506  0.7144295 ]
action based on polilcy:[-0.9945675  0.7576844]
action based on polilcy:[-0.9842496   0.66254884]
action based on polilcy:[-0.9464809  0.6074735]
action based on polilcy:[-0.58419496  0.44861943]
action based on polilcy:[-0.6611192   0.45275778]
action based on polilcy:[-0.85558176  0.44265172]
action based on polilcy:[-0.32899216  0.4254712 ]
action based on polilcy:[-0.31763366  0.37303707]
acti

action based on polilcy:[-0.6443507   0.73809516]
action based on polilcy:[-0.41753498  0.99995387]
action based on polilcy:[-0.5636164   0.99732906]
action based on polilcy:[-0.5013542  0.4150878]
action based on polilcy:[-0.6913545   0.39270434]
action based on polilcy:[-0.71147776  0.34872317]
action based on polilcy:[-0.8510173  0.5573906]
action based on polilcy:[-0.64233065  0.43380544]
action based on polilcy:[-0.6404078   0.45562813]
action based on polilcy:[-0.5602886   0.38854906]
action based on polilcy:[-0.65788805  0.42549562]
action based on polilcy:[-0.8573885  0.7473453]
action based on polilcy:[-0.95207393  0.54812825]
action based on polilcy:[-0.93526316  0.41563568]
action based on polilcy:[-0.68749094  0.33459473]
action based on polilcy:[-0.92078125  0.4107869 ]
action based on polilcy:[0.6123647 0.6580826]
Total T: 90750 Episode Num: 6558 Episode T: 17 Reward: -1890.801974
action based on polilcy:[-0.64394426  0.73904085]
action based on polilcy:[-0.5045366   0.85

action based on polilcy:[-0.6323865  0.7521367]
action based on polilcy:[-0.9843934  0.7879374]
action based on polilcy:[-0.48798558  0.41938302]
action based on polilcy:[-0.8040733   0.62707114]
action based on polilcy:[-0.5513926   0.41355976]
action based on polilcy:[-0.96687764  0.83742595]
action based on polilcy:[-0.4747481   0.46259952]
action based on polilcy:[-0.7657124  0.638255 ]
action based on polilcy:[-0.43585655  0.41265845]
action based on polilcy:[-0.5725311  0.3453427]
action based on polilcy:[-0.3735201  0.4459513]
action based on polilcy:[-0.9239172   0.45784974]
action based on polilcy:[-0.22626187  0.38446683]
action based on polilcy:[0.4242339 0.652711 ]
Total T: 90940 Episode Num: 6574 Episode T: 14 Reward: -1591.512651
action based on polilcy:[-0.6434351  0.7470013]
action based on polilcy:[-0.30189422  0.45657685]
action based on polilcy:[-0.7963203  0.6186232]
action based on polilcy:[-0.84947413  0.45729586]
action based on polilcy:[-0.8111922  0.3920516]
ac

action based on polilcy:[-0.5919601  0.7509828]
action based on polilcy:[-0.26672724  0.46658382]
action based on polilcy:[-0.98611504  0.775172  ]
action based on polilcy:[-0.7983336  0.3684977]
action based on polilcy:[-0.63521457  0.26835504]
action based on polilcy:[-0.44871446  0.50472367]
action based on polilcy:[-0.35737273  0.6116299 ]
action based on polilcy:[0.6891991  0.58929086]
Total T: 91087 Episode Num: 6587 Episode T: 8 Reward: -861.165001
action based on polilcy:[-0.5925871  0.7512875]
action based on polilcy:[-0.6453358  0.5395673]
action based on polilcy:[-0.58600795  0.57435644]
action based on polilcy:[-0.6011776  0.8133476]
action based on polilcy:[-0.60639495  0.6715732 ]
action based on polilcy:[-0.42325425  0.42578387]
action based on polilcy:[-0.5187124   0.44071907]
action based on polilcy:[-0.6546391   0.52793556]
action based on polilcy:[-0.6008238   0.48458004]
action based on polilcy:[-0.44288477  0.87393034]
action based on polilcy:[-0.4415621  0.4537675

action based on polilcy:[-0.55108047  0.75959134]
action based on polilcy:[-0.5320678   0.62888163]
action based on polilcy:[-0.9853451  0.5887711]
action based on polilcy:[-0.6547612   0.31812084]
action based on polilcy:[-0.7029278  0.5782536]
action based on polilcy:[-0.871714    0.62226874]
action based on polilcy:[-0.5452967  0.4528738]
action based on polilcy:[-0.48317453  0.36824843]
action based on polilcy:[-0.8168873  0.7186241]
action based on polilcy:[-0.753515    0.62233704]
action based on polilcy:[-0.5810046   0.29354662]
action based on polilcy:[0.38631934 0.5683149 ]
action based on polilcy:[0.97287035 0.80660164]
Total T: 91265 Episode Num: 6603 Episode T: 13 Reward: -1481.622645
action based on polilcy:[-0.5995552  0.7608998]
action based on polilcy:[-0.4480566   0.54205596]
action based on polilcy:[-0.87847286  0.6789193 ]
action based on polilcy:[-0.7934413   0.44031498]
action based on polilcy:[-0.62880814  0.31228366]
action based on polilcy:[-0.6456592  0.3369165

action based on polilcy:[-0.10073663  0.81825095]
action based on polilcy:[-0.38282457  0.78952366]
action based on polilcy:[-0.93639183  0.5543201 ]
action based on polilcy:[0.6418241 0.7226304]
action based on polilcy:[0.9326504 0.7707673]
action based on polilcy:[0.9955005  0.88574135]
action based on polilcy:[-0.04910446  0.5262866 ]
action based on polilcy:[0.1371545  0.53143704]
Total T: 91421 Episode Num: 6617 Episode T: 8 Reward: -951.036632
action based on polilcy:[-0.04110925  0.8246665 ]
action based on polilcy:[-0.78040344  0.7778982 ]
action based on polilcy:[-0.6345281   0.30002767]
action based on polilcy:[0.2596713  0.56547093]
action based on polilcy:[-0.98093253  0.55319417]
action based on polilcy:[0.76903033 0.8184535 ]
action based on polilcy:[-0.9655511  0.5084059]
action based on polilcy:[0.65821075 0.64734876]
Total T: 91429 Episode Num: 6618 Episode T: 8 Reward: -1100.183523
action based on polilcy:[-0.11528974  0.8262718 ]
action based on polilcy:[-0.33647323 

action based on polilcy:[-0.09400848  0.8659542 ]
action based on polilcy:[-0.14710999  0.72460806]
action based on polilcy:[-0.2909353  0.8464332]
action based on polilcy:[-0.230807   0.8817342]
action based on polilcy:[-0.35967484  0.6299639 ]
action based on polilcy:[-0.4246853  0.4629965]
action based on polilcy:[-0.6359155  0.880755 ]
action based on polilcy:[-0.6138468  0.4087556]
action based on polilcy:[-0.79607916  0.45962825]
action based on polilcy:[-0.58981466  0.39202166]
action based on polilcy:[-0.6232176   0.30744556]
action based on polilcy:[0.6735502 0.7173263]
action based on polilcy:[0.9869801 0.8075974]
action based on polilcy:[-0.66541576  0.6122011 ]
Total T: 91581 Episode Num: 6633 Episode T: 14 Reward: -1537.918487
action based on polilcy:[-0.08238249  0.8687089 ]
action based on polilcy:[-0.7503833  0.6973843]
action based on polilcy:[-0.7426696   0.60247236]
action based on polilcy:[-0.7771912   0.47963595]
action based on polilcy:[0.5489896  0.59749997]
acti

action based on polilcy:[-0.05879371  0.87911415]
action based on polilcy:[-0.34002683  0.47827372]
action based on polilcy:[-0.3924958   0.41379413]
action based on polilcy:[-0.8671157   0.61062765]
action based on polilcy:[-0.5391519   0.40984115]
action based on polilcy:[-0.62717056  0.39466074]
action based on polilcy:[-0.7323327   0.50173336]
action based on polilcy:[-0.63000536  0.30648416]
action based on polilcy:[-0.49697816  0.45856473]
action based on polilcy:[0.53762066 0.72182214]
action based on polilcy:[-0.8076782   0.46398965]
action based on polilcy:[-0.8976195  0.5145104]
action based on polilcy:[0.5097158  0.60248804]
Total T: 91764 Episode Num: 6649 Episode T: 13 Reward: -1204.834259
action based on polilcy:[-0.10141387  0.8749881 ]
action based on polilcy:[-0.35654005  0.4293815 ]
action based on polilcy:[-0.2839525  0.8434926]
action based on polilcy:[-0.3728174   0.41950813]
action based on polilcy:[-0.9905043   0.75459945]
action based on polilcy:[-0.99301636  0.

action based on polilcy:[-0.00876003  0.88537586]
action based on polilcy:[-0.35285667  0.43092325]
action based on polilcy:[-0.9912271   0.62369424]
action based on polilcy:[-0.50733995  0.42613184]
action based on polilcy:[-0.11649718  0.8976241 ]
action based on polilcy:[-0.2152131  0.9999933]
action based on polilcy:[-0.3628906   0.50590384]
action based on polilcy:[-0.4820274   0.47946948]
action based on polilcy:[-0.23151332  0.65389293]
action based on polilcy:[-0.76425564  0.5593568 ]
action based on polilcy:[0.4888315 0.6677978]
action based on polilcy:[0.67293227 0.7269076 ]
action based on polilcy:[0.9979914  0.88115656]
Total T: 91915 Episode Num: 6666 Episode T: 13 Reward: -1529.891858
action based on polilcy:[-0.11779434  0.8811538 ]
action based on polilcy:[-0.16896653  0.8001682 ]
action based on polilcy:[-0.31941593  0.5032191 ]
action based on polilcy:[-0.3413577  0.4508754]
action based on polilcy:[-0.6288327  0.8589496]
action based on polilcy:[-0.46728358  0.379998

action based on polilcy:[-0.02669614  0.883685  ]
action based on polilcy:[-0.30868143  0.43346718]
action based on polilcy:[-0.8100823  0.3525033]
action based on polilcy:[0.52533716 0.59892553]
action based on polilcy:[0.9626914 0.8048111]
action based on polilcy:[0.99754506 0.9287282 ]
Total T: 92539 Episode Num: 6696 Episode T: 6 Reward: -638.072449
action based on polilcy:[-0.0108344  0.8844407]
action based on polilcy:[-0.25594118  0.82184815]
action based on polilcy:[0.05125556 0.9017526 ]
action based on polilcy:[-0.32689324  0.4603049 ]
action based on polilcy:[-0.15618695  0.76258224]
action based on polilcy:[-0.5908253   0.63527286]
action based on polilcy:[-0.9092976  0.6020303]
action based on polilcy:[-0.70585227  0.3555141 ]
action based on polilcy:[-0.69162965  0.29170665]
action based on polilcy:[-0.8871962  0.6514273]
action based on polilcy:[0.08918896 0.529758  ]
action based on polilcy:[0.89115244 0.8152273 ]
action based on polilcy:[0.8452792  0.86168677]
Total T:

action based on polilcy:[-0.02428131  0.8874149 ]
action based on polilcy:[-0.3169095   0.48486763]
action based on polilcy:[-0.9562621  0.7436642]
action based on polilcy:[-0.78019404  0.40966642]
action based on polilcy:[-0.7478601  0.7909531]
action based on polilcy:[-0.25367472  0.89860934]
action based on polilcy:[-0.6370943   0.46671012]
action based on polilcy:[0.4025512 0.5764954]
action based on polilcy:[0.9935319  0.91895175]
action based on polilcy:[0.99947304 0.8236793 ]
Total T: 92701 Episode Num: 6711 Episode T: 10 Reward: -1456.689894
action based on polilcy:[0.01477252 0.8856663 ]
action based on polilcy:[-0.06096207  0.8103392 ]
action based on polilcy:[-0.32105875  0.913695  ]
action based on polilcy:[-0.45588008  0.6674451 ]
action based on polilcy:[-0.37206048  0.4475245 ]
action based on polilcy:[-0.36641523  0.4586053 ]
action based on polilcy:[-0.9817728   0.69787717]
action based on polilcy:[-0.66132176  0.29848203]
action based on polilcy:[-0.6920458   0.272849

action based on polilcy:[0.04060777 0.89171   ]
action based on polilcy:[-0.45245796  0.57751644]
action based on polilcy:[-0.9722163   0.57135165]
action based on polilcy:[0.73900807 0.7370843 ]
action based on polilcy:[0.9941     0.82520115]
action based on polilcy:[0.9271567  0.61783695]
Total T: 92867 Episode Num: 6727 Episode T: 6 Reward: -749.893348
action based on polilcy:[-0.13731965  0.8871933 ]
action based on polilcy:[-0.3914604  0.5971235]
action based on polilcy:[-0.9518993  0.7594404]
action based on polilcy:[-0.98408306  0.50937843]
action based on polilcy:[-0.5679894  0.399098 ]
action based on polilcy:[-0.6352171   0.32872468]
action based on polilcy:[-0.33358938  0.8562443 ]
action based on polilcy:[-0.9794988  0.6387227]
action based on polilcy:[-0.47781208  0.44189143]
action based on polilcy:[-0.9290214  0.5488153]
action based on polilcy:[-0.97173566  0.7614496 ]
action based on polilcy:[-0.87836164  0.5906454 ]
action based on polilcy:[-0.9860417   0.57397985]
ac

action based on polilcy:[-0.08549045  0.88320553]
action based on polilcy:[-0.9169046   0.75442654]
action based on polilcy:[-0.47195053  0.7589108 ]
action based on polilcy:[-0.8619987   0.40276626]
action based on polilcy:[-0.82687235  0.42560828]
action based on polilcy:[0.4711935 0.5890275]
action based on polilcy:[-0.8065492   0.42899495]
action based on polilcy:[0.594561  0.6811863]
action based on polilcy:[-0.8540654   0.53153324]
action based on polilcy:[0.8633011 0.7388872]
Total T: 93044 Episode Num: 6743 Episode T: 10 Reward: -1303.847760
action based on polilcy:[-0.04573861  0.8820408 ]
action based on polilcy:[-0.6240231  0.6762443]
action based on polilcy:[-0.9577024  0.52484  ]
action based on polilcy:[0.4344584  0.58774126]
action based on polilcy:[-0.8784661  0.5547496]
action based on polilcy:[-0.39711285  0.59861517]
action based on polilcy:[-0.69912934  0.4610402 ]
action based on polilcy:[-0.9734309  0.4904048]
action based on polilcy:[0.5497042  0.61225057]
Total 

action based on polilcy:[-0.1014728   0.87790024]
action based on polilcy:[-0.7781855   0.72722006]
action based on polilcy:[-0.60613304  0.28266403]
action based on polilcy:[-0.56833565  0.39187977]
action based on polilcy:[-0.9784244  0.5059717]
action based on polilcy:[0.94697237 0.8799795 ]
action based on polilcy:[0.59737766 0.548152  ]
action based on polilcy:[0.935372  0.6702424]
action based on polilcy:[0.8342882 0.5384027]
Total T: 93229 Episode Num: 6759 Episode T: 9 Reward: -870.707045
action based on polilcy:[-0.07555173  0.8714869 ]
action based on polilcy:[-0.4985711  0.5558882]
action based on polilcy:[-0.98525584  0.5871459 ]
action based on polilcy:[0.7709031 0.7507012]
action based on polilcy:[0.95412755 0.7791772 ]
action based on polilcy:[0.9976425  0.70665276]
Total T: 93235 Episode Num: 6760 Episode T: 6 Reward: -858.477967
action based on polilcy:[-0.08866625  0.8682283 ]
action based on polilcy:[-0.31049103  0.5002478 ]
action based on polilcy:[-0.6269556  0.916

action based on polilcy:[-0.07623633  0.86820114]
action based on polilcy:[-0.07795681  0.8919962 ]
action based on polilcy:[-0.6401096  0.5864973]
action based on polilcy:[-0.6119151   0.37197065]
action based on polilcy:[0.45270625 0.61640203]
action based on polilcy:[0.9871561  0.90897834]
action based on polilcy:[0.99884087 0.8257062 ]
Total T: 93380 Episode Num: 6772 Episode T: 7 Reward: -788.849625
action based on polilcy:[-0.05567312  0.8650668 ]
action based on polilcy:[-0.29751983  0.98721164]
action based on polilcy:[-0.1125195  0.9998708]
action based on polilcy:[-0.08885728  0.80506444]
action based on polilcy:[-0.30865958  0.9839998 ]
action based on polilcy:[0.02712073 0.85069174]
action based on polilcy:[-0.46835878  0.49397293]
action based on polilcy:[-0.6976043   0.46669036]
action based on polilcy:[0.3135277 0.7014008]
action based on polilcy:[0.96753716 0.85717463]
action based on polilcy:[0.9988654 0.8042347]
Total T: 93391 Episode Num: 6773 Episode T: 11 Reward: -

action based on polilcy:[-0.1145518  0.8586165]
action based on polilcy:[-0.60202587  0.62130165]
action based on polilcy:[-0.68288916  0.28574273]
action based on polilcy:[-0.96405876  0.45813337]
action based on polilcy:[0.1866697 0.5835452]
action based on polilcy:[0.5960257 0.6581497]
action based on polilcy:[0.9994614 0.9291339]
Total T: 93546 Episode Num: 6788 Episode T: 7 Reward: -713.425434
action based on polilcy:[-0.1053947   0.85609454]
action based on polilcy:[-0.30069485  0.43854427]
action based on polilcy:[-0.73910475  0.6841641 ]
action based on polilcy:[-0.7718405  0.3733148]
action based on polilcy:[0.7813327 0.7669498]
action based on polilcy:[0.99935746 0.93258923]
Total T: 93552 Episode Num: 6789 Episode T: 6 Reward: -766.472768
action based on polilcy:[-0.1280732   0.86353856]
action based on polilcy:[-0.2819045   0.44456857]
action based on polilcy:[-0.6067573   0.30556443]
action based on polilcy:[0.60654426 0.71085167]
action based on polilcy:[-0.6554775   0.36

action based on polilcy:[-0.09166888  0.8586372 ]
action based on polilcy:[-0.2298037  0.7208494]
action based on polilcy:[-0.68178475  0.4203632 ]
action based on polilcy:[0.44058424 0.580872  ]
action based on polilcy:[0.995507   0.96165395]
action based on polilcy:[0.97988474 0.88022095]
action based on polilcy:[0.99843293 0.8016836 ]
Total T: 93699 Episode Num: 6804 Episode T: 7 Reward: -878.170517
action based on polilcy:[-0.11309737  0.8593025 ]
action based on polilcy:[-0.86513424  0.7721924 ]
action based on polilcy:[-0.6779152  0.4992562]
action based on polilcy:[-0.6745112  0.4482471]
action based on polilcy:[-0.56404865  0.5108801 ]
action based on polilcy:[-0.38537538  0.49707597]
action based on polilcy:[-0.31351626  0.43236956]
action based on polilcy:[-0.35906184  0.490703  ]
action based on polilcy:[-0.35111958  0.45104057]
action based on polilcy:[-0.707266    0.42956266]
action based on polilcy:[-0.7614304  0.3848587]
action based on polilcy:[-0.8780586  0.7078912]
ac

action based on polilcy:[-0.11166496  0.8576518 ]
action based on polilcy:[-0.3238899  0.4259218]
action based on polilcy:[-0.68870133  0.29938558]
action based on polilcy:[-0.5554211   0.40938953]
action based on polilcy:[-0.61461437  0.32308522]
action based on polilcy:[-0.7198595   0.32193354]
action based on polilcy:[-0.8170264  0.3757512]
action based on polilcy:[-0.640215    0.34999403]
action based on polilcy:[-0.70490277  0.31998903]
action based on polilcy:[-0.6101176   0.29047528]
action based on polilcy:[0.52945065 0.5755347 ]
action based on polilcy:[0.9762337  0.87098527]
Total T: 93852 Episode Num: 6821 Episode T: 12 Reward: -770.409311
action based on polilcy:[-0.11185442  0.8570651 ]
action based on polilcy:[-0.5540593   0.59122384]
action based on polilcy:[-0.60803866  0.34225598]
action based on polilcy:[-0.6244093  0.3652706]
action based on polilcy:[0.30305356 0.54192674]
action based on polilcy:[0.6950184 0.6754118]
action based on polilcy:[0.8365116  0.66464245]
a

action based on polilcy:[-0.07106546  0.8523344 ]
action based on polilcy:[-0.3048376  0.5370196]
---------------------------------------
Episode_num: 6838, Evaluation over 1 episodes: -800.843192
---------------------------------------
Total T: 94001 Episode Num: 6838 Episode T: 3 Reward: -938.355678
Total T: 94052 Episode Num: 6839 Episode T: 51 Reward: -7044.478249
Total T: 94067 Episode Num: 6840 Episode T: 15 Reward: -2204.960667
Total T: 94111 Episode Num: 6841 Episode T: 44 Reward: -7198.347448
Total T: 94160 Episode Num: 6842 Episode T: 49 Reward: -5975.085578
Total T: 94211 Episode Num: 6843 Episode T: 51 Reward: -7281.419210
Total T: 94227 Episode Num: 6844 Episode T: 16 Reward: -2251.357453
Total T: 94276 Episode Num: 6845 Episode T: 49 Reward: -6884.434602
Total T: 94324 Episode Num: 6846 Episode T: 48 Reward: -7126.959779
Total T: 94374 Episode Num: 6847 Episode T: 50 Reward: -7554.442665
Total T: 94422 Episode Num: 6848 Episode T: 48 Reward: -7102.078508
Total T: 94450 Ep

action based on polilcy:[-0.09345779  0.8325314 ]
action based on polilcy:[-0.7683847   0.74450326]
action based on polilcy:[-0.37729308  0.4245324 ]
action based on polilcy:[-0.35756806  0.474213  ]
action based on polilcy:[-0.30064172  0.5344172 ]
action based on polilcy:[-0.08451763  0.539773  ]
action based on polilcy:[-0.2828252   0.57626456]
action based on polilcy:[-0.3579217   0.47951964]
action based on polilcy:[-0.1640388  0.7491282]
action based on polilcy:[-0.35230803  0.97918177]
action based on polilcy:[-0.26661345  0.6203246 ]
action based on polilcy:[-0.3981929   0.45167273]
action based on polilcy:[-0.79747486  0.7210747 ]
action based on polilcy:[-0.671832    0.47065282]
action based on polilcy:[-0.63596153  0.5404725 ]
action based on polilcy:[-0.7891283   0.36850783]
action based on polilcy:[-0.6073992   0.35166883]
action based on polilcy:[-0.5406263   0.37084556]
action based on polilcy:[-0.83074534  0.6210238 ]
action based on polilcy:[-0.7367002   0.72335017]
ac

action based on polilcy:[-0.08429553  0.8351083 ]
action based on polilcy:[-0.28663093  0.40956923]
action based on polilcy:[-0.44487083  0.5359808 ]
action based on polilcy:[-0.4938617   0.84354115]
action based on polilcy:[-0.9516026  0.4067528]
action based on polilcy:[0.8858603 0.802073 ]
action based on polilcy:[0.9990621  0.84236753]
action based on polilcy:[0.99841267 0.64331716]
Total T: 94796 Episode Num: 6880 Episode T: 8 Reward: -1009.577517
action based on polilcy:[-0.09484097  0.8325609 ]
action based on polilcy:[-0.29988635  0.68880755]
action based on polilcy:[-0.75558966  0.42812303]
action based on polilcy:[-0.9657547  0.4096814]
action based on polilcy:[-0.9844994  0.426261 ]
action based on polilcy:[-0.58086646  0.35635072]
action based on polilcy:[-0.53057903  0.2880913 ]
action based on polilcy:[-0.9549285   0.40686074]
action based on polilcy:[-0.66377234  0.2513361 ]
action based on polilcy:[-0.4948769   0.30990654]
action based on polilcy:[-0.96238726  0.3430782

action based on polilcy:[-0.07383593  0.83365893]
action based on polilcy:[-0.29461837  0.43455973]
action based on polilcy:[-0.8779545   0.33913714]
action based on polilcy:[0.55092865 0.56128675]
action based on polilcy:[0.9608976 0.7152751]
action based on polilcy:[0.9995421 0.8341304]
Total T: 94955 Episode Num: 6894 Episode T: 6 Reward: -640.195070
action based on polilcy:[-0.08224352  0.83370125]
action based on polilcy:[-0.10159457  0.82603955]
action based on polilcy:[-0.25849676  0.9996064 ]
action based on polilcy:[-0.24623495  0.43280736]
action based on polilcy:[-0.91991264  0.5173786 ]
action based on polilcy:[0.26017237 0.6437746 ]
action based on polilcy:[0.5956067 0.5872439]
action based on polilcy:[0.9896577 0.8970867]
action based on polilcy:[0.9969096  0.84257996]
action based on polilcy:[0.9964931  0.81016105]
Total T: 94965 Episode Num: 6895 Episode T: 10 Reward: -1272.049088
action based on polilcy:[-0.09209736  0.83529204]
action based on polilcy:[-0.2430141  0.7

action based on polilcy:[-0.05760822  0.83447796]
action based on polilcy:[-0.5002823  0.644781 ]
action based on polilcy:[-0.99965876  0.55758196]
action based on polilcy:[0.5023391 0.5537809]
action based on polilcy:[0.9976701 0.8889753]
action based on polilcy:[0.9751641 0.6697774]
Total T: 95130 Episode Num: 6910 Episode T: 6 Reward: -816.802228
action based on polilcy:[-0.03290617  0.83883625]
action based on polilcy:[-0.48941678  0.6713801 ]
action based on polilcy:[-0.14555933  0.9989714 ]
action based on polilcy:[-0.58613366  0.76456916]
action based on polilcy:[-0.47496676  0.32289633]
action based on polilcy:[0.80725485 0.7481338 ]
action based on polilcy:[0.99830157 0.9392284 ]
action based on polilcy:[0.976161   0.76674044]
Total T: 95138 Episode Num: 6911 Episode T: 8 Reward: -1195.034958
action based on polilcy:[-0.06359552  0.83311486]
action based on polilcy:[-0.14300667  0.9729189 ]
action based on polilcy:[-0.72724205  0.56867695]
action based on polilcy:[-0.9921464  

action based on polilcy:[-0.05301158  0.82842946]
action based on polilcy:[-0.4411889   0.85500497]
action based on polilcy:[-0.892577    0.38494506]
action based on polilcy:[0.55355525 0.5641688 ]
action based on polilcy:[0.9882146 0.7783236]
action based on polilcy:[0.99983287 0.6721374 ]
Total T: 95281 Episode Num: 6927 Episode T: 6 Reward: -872.609885
action based on polilcy:[-0.07552429  0.82304305]
action based on polilcy:[-0.3663506  0.4640819]
action based on polilcy:[-0.85303307  0.72018373]
action based on polilcy:[-0.5042362   0.40312305]
action based on polilcy:[0.5599153 0.5699153]
action based on polilcy:[0.9967579 0.8468305]
action based on polilcy:[0.99278927 0.6491989 ]
Total T: 95288 Episode Num: 6928 Episode T: 7 Reward: -830.940092
action based on polilcy:[-0.06947535  0.8185117 ]
action based on polilcy:[-0.37804165  0.833444  ]
action based on polilcy:[-0.7323542  0.3714951]
action based on polilcy:[0.5231993 0.5578675]
action based on polilcy:[-0.9997959   0.6062

action based on polilcy:[-0.03564166  0.8206374 ]
action based on polilcy:[-0.17727984  0.9633093 ]
action based on polilcy:[-0.42197266  0.82801265]
action based on polilcy:[-0.6166252   0.26081556]
action based on polilcy:[0.5381911  0.55342376]
action based on polilcy:[0.99919486 0.83462435]
action based on polilcy:[0.9993546  0.88078356]
Total T: 95431 Episode Num: 6942 Episode T: 7 Reward: -1096.326097
action based on polilcy:[-0.00813657  0.81776124]
action based on polilcy:[-0.18242191  0.99741304]
action based on polilcy:[-0.53357506  0.6151636 ]
action based on polilcy:[-0.9364648   0.34788176]
action based on polilcy:[-0.14720078  0.45637307]
action based on polilcy:[-0.9894751   0.33529723]
action based on polilcy:[0.58220255 0.56724036]
action based on polilcy:[0.6316037 0.5440109]
action based on polilcy:[-0.99411166  0.34214836]
action based on polilcy:[0.6226082  0.53630316]
Total T: 95441 Episode Num: 6943 Episode T: 10 Reward: -1194.317869
action based on polilcy:[-0.0

action based on polilcy:[-0.08698043  0.814456  ]
action based on polilcy:[-0.43590304  0.6158463 ]
action based on polilcy:[-0.3607504   0.36171886]
action based on polilcy:[0.9745284  0.86447185]
action based on polilcy:[0.6537944 0.5907607]
action based on polilcy:[0.92285085 0.62183094]
action based on polilcy:[0.99991244 0.7285572 ]
Total T: 95597 Episode Num: 6959 Episode T: 7 Reward: -722.025488
action based on polilcy:[-0.01780597  0.8165947 ]
action based on polilcy:[-0.3669215   0.54111475]
action based on polilcy:[-0.29799244  0.56049883]
action based on polilcy:[-0.28378373  0.4475282 ]
action based on polilcy:[-0.3301167  0.5024352]
action based on polilcy:[-0.59579235  0.50234205]
action based on polilcy:[-0.69833374  0.30164382]
action based on polilcy:[-0.97606015  0.40155378]
action based on polilcy:[-0.5497329  0.56399  ]
action based on polilcy:[-0.8837411   0.45193925]
action based on polilcy:[0.620746  0.6743571]
action based on polilcy:[0.58641446 0.5472528 ]
acti

action based on polilcy:[-0.0530075  0.8144599]
action based on polilcy:[-0.5491462   0.76862407]
action based on polilcy:[-0.94899344  0.47283378]
action based on polilcy:[-0.8215832   0.71465844]
action based on polilcy:[-0.48499036  0.4489864 ]
action based on polilcy:[-0.38682386  0.52177423]
action based on polilcy:[-0.41017175  0.48004052]
action based on polilcy:[0.0550129 0.5425927]
action based on polilcy:[-0.6642666   0.36212683]
action based on polilcy:[-0.63033587  0.42100754]
action based on polilcy:[0.07261299 0.5417727 ]
action based on polilcy:[0.9931016 0.9049037]
Total T: 95755 Episode Num: 6976 Episode T: 12 Reward: -1538.464483
action based on polilcy:[-0.04056572  0.8076742 ]
action based on polilcy:[-0.3605402  0.7399   ]
action based on polilcy:[-0.65446174  0.3199699 ]
action based on polilcy:[-0.5985837   0.41409856]
action based on polilcy:[-0.5613414   0.40606868]
action based on polilcy:[-0.36955914  0.6127804 ]
action based on polilcy:[-0.41351017  0.353916

action based on polilcy:[-0.03980214  0.80821115]
action based on polilcy:[-0.3256869  0.7036723]
action based on polilcy:[-0.5117831  0.9241754]
action based on polilcy:[-0.13331784  0.7796666 ]
action based on polilcy:[-0.33638898  0.44700423]
action based on polilcy:[-0.21070288  0.950241  ]
action based on polilcy:[-0.62849194  0.69512993]
action based on polilcy:[-0.84489816  0.3267223 ]
action based on polilcy:[0.5663525  0.59397864]
action based on polilcy:[0.98659855 0.72303665]
action based on polilcy:[0.9999762 0.6414895]
Total T: 95906 Episode Num: 6989 Episode T: 11 Reward: -1523.079488
action based on polilcy:[0.02236256 0.80223644]
action based on polilcy:[-0.02864762  0.7369296 ]
action based on polilcy:[-0.14061758  0.99756527]
action based on polilcy:[-0.05949812  0.7302953 ]
action based on polilcy:[-0.43876538  0.6030826 ]
action based on polilcy:[-0.5652814  0.3568316]
action based on polilcy:[0.26168415 0.48997685]
action based on polilcy:[0.5613097 0.5682721]
acti

action based on polilcy:[-0.02458203  0.8029248 ]
action based on polilcy:[-0.02973387  0.7947949 ]
action based on polilcy:[-0.30669683  0.5472518 ]
action based on polilcy:[-0.23273572  0.443445  ]
action based on polilcy:[0.47395548 0.581179  ]
action based on polilcy:[0.99362755 0.92077017]
action based on polilcy:[0.98828727 0.7043152 ]
action based on polilcy:[0.9999763 0.6247308]
Total T: 96546 Episode Num: 7017 Episode T: 8 Reward: -832.881535
action based on polilcy:[-0.00649789  0.7961537 ]
action based on polilcy:[-0.27298662  0.59101605]
action based on polilcy:[-0.09369933  0.7940955 ]
action based on polilcy:[-0.27454945  0.57619035]
action based on polilcy:[0.69214034 0.63729894]
action based on polilcy:[0.99750155 0.9094608 ]
action based on polilcy:[0.9985044 0.9072157]
action based on polilcy:[0.99978745 0.70502806]
Total T: 96554 Episode Num: 7018 Episode T: 8 Reward: -1059.931172
action based on polilcy:[-0.03640418  0.79581624]
action based on polilcy:[-0.3484396  

action based on polilcy:[0.01532801 0.7993351 ]
action based on polilcy:[-0.04582086  0.7152193 ]
action based on polilcy:[-0.23186406  0.46779123]
action based on polilcy:[-0.23365565  0.90458685]
action based on polilcy:[-0.3324554   0.54030085]
action based on polilcy:[-0.20078103  0.41767156]
action based on polilcy:[-0.5769214   0.81504047]
action based on polilcy:[-0.33372808  0.49271613]
action based on polilcy:[0.59627044 0.6976051 ]
action based on polilcy:[0.99851644 0.877479  ]
action based on polilcy:[0.9540287 0.7427788]
Total T: 96702 Episode Num: 7033 Episode T: 11 Reward: -1451.070779
action based on polilcy:[0.01706345 0.79675215]
action based on polilcy:[-0.30478635  0.9975151 ]
action based on polilcy:[-0.31681347  0.52811104]
action based on polilcy:[-0.20712274  0.5672568 ]
action based on polilcy:[-0.17854135  0.43382758]
action based on polilcy:[-0.4167521   0.59564567]
action based on polilcy:[0.8511506 0.8259393]
action based on polilcy:[0.9745539 0.7482674]
ac

action based on polilcy:[-0.02175124  0.79889333]
action based on polilcy:[-0.32282633  0.55230284]
action based on polilcy:[-0.4012955   0.48896596]
action based on polilcy:[-0.28426135  0.6753499 ]
action based on polilcy:[-0.6669393   0.60710156]
action based on polilcy:[0.5634413 0.5747952]
action based on polilcy:[0.99958956 0.8204658 ]
action based on polilcy:[0.97735614 0.8130111 ]
Total T: 96866 Episode Num: 7049 Episode T: 8 Reward: -1027.682452
action based on polilcy:[-0.07954174  0.80655956]
action based on polilcy:[-0.3440361   0.73650825]
action based on polilcy:[-0.31571135  0.62153053]
action based on polilcy:[-0.36900154  0.82904696]
action based on polilcy:[-0.3272163  0.684357 ]
action based on polilcy:[-0.26365945  0.52578235]
action based on polilcy:[-0.42041913  0.7852213 ]
action based on polilcy:[-0.49557382  0.6263168 ]
action based on polilcy:[0.83197737 0.78583086]
action based on polilcy:[0.9996153  0.81457734]
Total T: 96876 Episode Num: 7050 Episode T: 10 

action based on polilcy:[-0.0548115  0.8010481]
action based on polilcy:[-0.2907957  0.5130514]
action based on polilcy:[-0.35797787  0.88551337]
action based on polilcy:[-0.0929384  0.9229065]
action based on polilcy:[-0.07899371  0.48088145]
action based on polilcy:[0.5475764  0.59048766]
action based on polilcy:[-0.84767145  0.6408267 ]
action based on polilcy:[0.33319184 0.53520507]
action based on polilcy:[-0.4363375   0.46464974]
action based on polilcy:[0.61675704 0.5754853 ]
action based on polilcy:[0.99972147 0.7855987 ]
Total T: 97012 Episode Num: 7068 Episode T: 11 Reward: -1379.247310
action based on polilcy:[-0.02733065  0.8008286 ]
action based on polilcy:[-0.33573836  0.566272  ]
action based on polilcy:[-0.46494448  0.63202864]
action based on polilcy:[0.6890645 0.6253401]
action based on polilcy:[0.9989266 0.7320681]
action based on polilcy:[0.9989276  0.56982446]
Total T: 97018 Episode Num: 7069 Episode T: 6 Reward: -799.823496
action based on polilcy:[-0.05315422  0.

action based on polilcy:[-0.09543251  0.80692697]
action based on polilcy:[-0.13463254  0.84822357]
action based on polilcy:[-0.34121698  0.591674  ]
action based on polilcy:[-0.43148795  0.52442646]
action based on polilcy:[-0.19064002  0.44537082]
action based on polilcy:[-0.34558874  0.5306364 ]
action based on polilcy:[-0.34296533  0.4762117 ]
action based on polilcy:[-0.3087904  0.5763614]
action based on polilcy:[-0.28352845  0.5117557 ]
action based on polilcy:[-0.28196216  0.49261463]
action based on polilcy:[-0.14430211  0.81726545]
action based on polilcy:[-0.2582677   0.39977875]
action based on polilcy:[-0.3777501   0.41514602]
action based on polilcy:[-0.04717536  0.49266264]
action based on polilcy:[0.75198877 0.8053639 ]
action based on polilcy:[0.9986899  0.85731035]
action based on polilcy:[0.999962  0.6951345]
Total T: 97181 Episode Num: 7086 Episode T: 17 Reward: -1638.778066
action based on polilcy:[-0.10498495  0.80476105]
action based on polilcy:[-0.35386464  0.70

action based on polilcy:[-0.05321285  0.8047781 ]
action based on polilcy:[-0.20894279  0.45244825]
action based on polilcy:[-0.35516745  0.5929997 ]
action based on polilcy:[0.6630775 0.7130127]
action based on polilcy:[0.999088   0.76556623]
action based on polilcy:[0.9995042  0.59546596]
Total T: 97334 Episode Num: 7102 Episode T: 6 Reward: -744.030451
action based on polilcy:[-0.03692492  0.8053738 ]
action based on polilcy:[-0.28632042  0.70536   ]
action based on polilcy:[-0.23245914  0.6970336 ]
action based on polilcy:[-0.3018649  0.5367514]
action based on polilcy:[-0.38396564  0.6766025 ]
action based on polilcy:[0.5507016 0.6231606]
action based on polilcy:[0.7798579  0.67498916]
action based on polilcy:[0.99515474 0.70921427]
Total T: 97342 Episode Num: 7103 Episode T: 8 Reward: -1088.262892
action based on polilcy:[-0.04519082  0.8084837 ]
action based on polilcy:[-0.17246257  0.8556635 ]
action based on polilcy:[-0.32280496  0.56602144]
action based on polilcy:[-0.1007677

action based on polilcy:[-0.06650665  0.8075968 ]
action based on polilcy:[-0.37042746  0.7949438 ]
action based on polilcy:[-0.43632963  0.84373736]
action based on polilcy:[-0.5043993  0.5932501]
action based on polilcy:[0.59994435 0.6634552 ]
action based on polilcy:[0.9820692 0.8360661]
action based on polilcy:[0.99989766 0.7165466 ]
Total T: 97488 Episode Num: 7116 Episode T: 7 Reward: -1242.228633
action based on polilcy:[-0.06964686  0.8024095 ]
action based on polilcy:[-0.31928295  0.51754075]
action based on polilcy:[0.45891368 0.600351  ]
action based on polilcy:[0.98178214 0.92676985]
action based on polilcy:[0.9995969 0.813043 ]
action based on polilcy:[0.9999811 0.627658 ]
Total T: 97494 Episode Num: 7117 Episode T: 6 Reward: -832.688097
action based on polilcy:[-0.09138549  0.8114986 ]
action based on polilcy:[-0.15726462  0.43548834]
action based on polilcy:[-0.37691733  0.62544763]
action based on polilcy:[0.5410576 0.6110586]
action based on polilcy:[0.9973331 0.930590

action based on polilcy:[-0.06374765  0.8038794 ]
action based on polilcy:[-0.29108602  0.6050467 ]
action based on polilcy:[-0.16712315  0.4531459 ]
action based on polilcy:[-0.33421227  0.74295676]
action based on polilcy:[0.48778725 0.56128895]
action based on polilcy:[0.9992638  0.82748336]
action based on polilcy:[0.99987626 0.642665  ]
action based on polilcy:[0.99724585 0.5933998 ]
Total T: 97663 Episode Num: 7134 Episode T: 8 Reward: -950.391404
action based on polilcy:[-0.04573199  0.8033345 ]
action based on polilcy:[-0.32289633  0.80295336]
action based on polilcy:[0.03848236 0.6258303 ]
action based on polilcy:[0.6615546  0.74258137]
action based on polilcy:[0.9981433 0.8186881]
action based on polilcy:[0.9996501  0.65155023]
Total T: 97669 Episode Num: 7135 Episode T: 6 Reward: -943.339883
action based on polilcy:[-0.07357348  0.78966045]
action based on polilcy:[-0.38043234  0.78604823]
action based on polilcy:[0.6177723 0.6315446]
action based on polilcy:[0.84126425 0.74

action based on polilcy:[-0.15302978  0.8023139 ]
action based on polilcy:[-0.18274792  0.8695384 ]
action based on polilcy:[-0.26040807  0.54092246]
action based on polilcy:[-0.23650874  0.45337114]
action based on polilcy:[-0.32074705  0.4896742 ]
action based on polilcy:[0.477698   0.55937564]
action based on polilcy:[-0.74652565  0.5300364 ]
action based on polilcy:[0.736457  0.7173504]
action based on polilcy:[0.99921304 0.80185646]
action based on polilcy:[0.90654016 0.7724925 ]
Total T: 97831 Episode Num: 7150 Episode T: 10 Reward: -1185.073845
action based on polilcy:[-0.13261986  0.7867187 ]
action based on polilcy:[-0.2302788  0.4167765]
action based on polilcy:[-0.36272573  0.47177762]
action based on polilcy:[-0.25418183  0.47472152]
action based on polilcy:[-0.42443594  0.9799936 ]
action based on polilcy:[-0.20084389  0.8491707 ]
action based on polilcy:[-0.3710073   0.53263223]
action based on polilcy:[-0.3573685  0.7645019]
action based on polilcy:[-0.3291903   0.407862

action based on polilcy:[-0.09442665  0.7971374 ]
action based on polilcy:[-0.117092    0.83415675]
action based on polilcy:[-0.39030728  0.98947245]
action based on polilcy:[-0.14946496  0.52086014]
action based on polilcy:[-0.25043735  0.5471216 ]
action based on polilcy:[-0.36159557  0.94430333]
action based on polilcy:[-0.23421629  0.6076993 ]
action based on polilcy:[-0.16827013  0.785013  ]
action based on polilcy:[-0.26599023  0.38756958]
action based on polilcy:[0.93144655 0.9129656 ]
action based on polilcy:[0.9968021 0.9238521]
action based on polilcy:[-0.5338559  0.5762197]
action based on polilcy:[-0.5548531   0.56618214]
action based on polilcy:[0.5820451  0.69595873]
Total T: 97990 Episode Num: 7163 Episode T: 14 Reward: -2228.413080
action based on polilcy:[-0.07890682  0.7926838 ]
action based on polilcy:[-0.15699187  0.43242475]
action based on polilcy:[0.5670234  0.60950184]
action based on polilcy:[0.50730383 0.53424805]
action based on polilcy:[0.9998437 0.7669405]


action based on polilcy:[-0.20139372  0.7623166 ]
action based on polilcy:[-0.25208938  0.5572915 ]
action based on polilcy:[-0.1831042   0.45255917]
action based on polilcy:[0.47964743 0.72755855]
action based on polilcy:[0.9927875  0.87844837]
action based on polilcy:[0.99945676 0.74875605]
action based on polilcy:[0.9873109 0.5886292]
Total T: 98626 Episode Num: 7188 Episode T: 7 Reward: -864.669037
action based on polilcy:[-0.19784224  0.75881904]
action based on polilcy:[-0.11976098  0.44892707]
action based on polilcy:[-0.2634684   0.48216906]
action based on polilcy:[0.53366464 0.71869016]
action based on polilcy:[0.98820186 0.82240134]
action based on polilcy:[0.9998617 0.6796406]
Total T: 98632 Episode Num: 7189 Episode T: 6 Reward: -710.854961
action based on polilcy:[-0.1952019  0.7582232]
action based on polilcy:[-0.23968127  0.4737562 ]
action based on polilcy:[-0.38564745  0.61877465]
action based on polilcy:[0.34176072 0.5623201 ]
action based on polilcy:[-0.7550294   0.

action based on polilcy:[-0.20473099  0.75852025]
action based on polilcy:[-0.33311132  0.99516106]
action based on polilcy:[-0.10875437  0.49754333]
action based on polilcy:[-0.39664727  0.6911041 ]
action based on polilcy:[-0.25186777  0.5234293 ]
action based on polilcy:[0.30481738 0.5411771 ]
action based on polilcy:[0.92335325 0.8551954 ]
action based on polilcy:[0.99522316 0.7578635 ]
action based on polilcy:[0.999583  0.7653103]
Total T: 98781 Episode Num: 7204 Episode T: 9 Reward: -1140.746039
action based on polilcy:[-0.22748123  0.7512094 ]
action based on polilcy:[-0.3679356   0.71782327]
action based on polilcy:[-0.23164842  0.4402855 ]
action based on polilcy:[0.54854906 0.7625955 ]
action based on polilcy:[-0.35453066  0.40977642]
action based on polilcy:[0.3337997  0.54428446]
action based on polilcy:[0.98792344 0.85376143]
action based on polilcy:[0.9915737 0.7524243]
Total T: 98789 Episode Num: 7205 Episode T: 8 Reward: -911.984130
action based on polilcy:[-0.21717745 

action based on polilcy:[-0.26967585  0.7439378 ]
action based on polilcy:[-0.02535347  0.48658183]
action based on polilcy:[-0.19054501  0.48463482]
action based on polilcy:[-0.1607268   0.44900763]
action based on polilcy:[-0.31496915  0.5561484 ]
action based on polilcy:[0.30009103 0.6385317 ]
action based on polilcy:[-0.52128863  0.5299144 ]
action based on polilcy:[-0.5980708  0.6249634]
action based on polilcy:[0.9471039  0.91756517]
action based on polilcy:[0.47407448 0.580199  ]
action based on polilcy:[-0.57533145  0.53107053]
Total T: 98940 Episode Num: 7221 Episode T: 11 Reward: -1101.667179
action based on polilcy:[-0.23653533  0.7389132 ]
action based on polilcy:[-0.26138154  0.5102504 ]
action based on polilcy:[-0.19568191  0.59294385]
action based on polilcy:[-0.43015096  0.72004235]
action based on polilcy:[-0.45440912  0.6891555 ]
action based on polilcy:[-0.21654615  0.49657753]
action based on polilcy:[-0.55898666  0.6937864 ]
action based on polilcy:[-0.23110338  0.

action based on polilcy:[-0.27323392  0.73674583]
action based on polilcy:[-0.3833424  0.7826948]
action based on polilcy:[-0.06665132  0.47369102]
action based on polilcy:[-0.31746688  0.51617724]
action based on polilcy:[0.14993803 0.5015943 ]
action based on polilcy:[-0.44545776  0.5174755 ]
action based on polilcy:[0.32909754 0.5353595 ]
action based on polilcy:[0.5727089 0.5950049]
action based on polilcy:[0.9978406 0.775372 ]
Total T: 99094 Episode Num: 7234 Episode T: 9 Reward: -971.186498
action based on polilcy:[-0.27727455  0.73254776]
action based on polilcy:[0.03040967 0.5141611 ]
action based on polilcy:[-0.45832568  0.84659576]
action based on polilcy:[-0.48956376  0.76163185]
action based on polilcy:[-0.39054802  0.5273162 ]
action based on polilcy:[-0.2806312  0.5179986]
action based on polilcy:[-0.66871715  0.60717934]
action based on polilcy:[-0.26576522  0.41094145]
action based on polilcy:[0.42669564 0.6112908 ]
action based on polilcy:[0.61054647 0.6666557 ]
Total 

action based on polilcy:[-0.29034206  0.72878826]
action based on polilcy:[0.06258691 0.51605487]
action based on polilcy:[-0.5432761  0.6386442]
action based on polilcy:[-0.31151918  0.57558715]
action based on polilcy:[-0.25154993  0.49273193]
action based on polilcy:[-0.41252285  0.53646564]
action based on polilcy:[0.46864805 0.5388182 ]
Total T: 99253 Episode Num: 7250 Episode T: 7 Reward: -944.187852
action based on polilcy:[-0.26972008  0.72834367]
action based on polilcy:[-0.30677265  0.7541844 ]
action based on polilcy:[-0.03362154  0.46171886]
action based on polilcy:[-0.3502093  0.6312429]
action based on polilcy:[-0.44586226  0.7742481 ]
action based on polilcy:[-0.15004675  0.5166204 ]
action based on polilcy:[-0.17697704  0.5155857 ]
action based on polilcy:[-0.2205052   0.52432007]
action based on polilcy:[-0.44336128  0.8643701 ]
action based on polilcy:[0.06625896 0.49475387]
action based on polilcy:[-0.38744727  0.548085  ]
action based on polilcy:[0.43837065 0.529178

action based on polilcy:[-0.27814153  0.72427464]
action based on polilcy:[-0.27558362  0.73136467]
action based on polilcy:[-0.20879911  0.57570773]
action based on polilcy:[-0.24068044  0.52011377]
action based on polilcy:[-0.5941584  0.6280265]
action based on polilcy:[-0.20566335  0.40244198]
action based on polilcy:[-0.41857082  0.5035391 ]
action based on polilcy:[0.48165995 0.5154084 ]
action based on polilcy:[-0.5469271  0.4003101]
Total T: 99401 Episode Num: 7265 Episode T: 9 Reward: -1050.604485
action based on polilcy:[-0.27403337  0.7237731 ]
action based on polilcy:[0.07530583 0.5132923 ]
action based on polilcy:[-0.53193784  0.86897326]
action based on polilcy:[-0.31941327  0.69965076]
action based on polilcy:[-0.07382091  0.46675414]
action based on polilcy:[-0.30135977  0.53026927]
action based on polilcy:[-0.22938244  0.48204902]
action based on polilcy:[0.46084136 0.6051507 ]
action based on polilcy:[-0.6124041  0.6240097]
action based on polilcy:[0.41908088 0.5363502

action based on polilcy:[-0.2512133   0.71988356]
action based on polilcy:[0.05961009 0.5088706 ]
action based on polilcy:[-0.36003816  0.60458994]
action based on polilcy:[0.52894914 0.6131321 ]
action based on polilcy:[-0.17977917  0.54646647]
action based on polilcy:[-0.27754566  0.56044346]
action based on polilcy:[0.5480207  0.54721105]
Total T: 99551 Episode Num: 7281 Episode T: 7 Reward: -918.901953
action based on polilcy:[-0.28349727  0.71320045]
action based on polilcy:[-0.24791922  0.5600722 ]
action based on polilcy:[-0.04371373  0.4559253 ]
action based on polilcy:[-0.23511454  0.596843  ]
action based on polilcy:[0.5806483  0.57242966]
action based on polilcy:[0.9808116  0.68001735]
Total T: 99557 Episode Num: 7282 Episode T: 6 Reward: -744.645777
action based on polilcy:[-0.2877786  0.7211411]
action based on polilcy:[-0.08794642  0.51564556]
action based on polilcy:[-0.30906832  0.7134795 ]
action based on polilcy:[-0.17206348  0.5723393 ]
action based on polilcy:[-0.31

action based on polilcy:[-0.31615078  0.71742994]
action based on polilcy:[-0.29691905  0.7967487 ]
action based on polilcy:[-0.18048674  0.5160239 ]
action based on polilcy:[-0.21212648  0.50852436]
action based on polilcy:[-0.24417758  0.47765738]
action based on polilcy:[-0.47540808  0.90379494]
action based on polilcy:[-0.10244649  0.5032091 ]
action based on polilcy:[-0.4246774  0.8265225]
action based on polilcy:[-0.4242      0.79676205]
action based on polilcy:[-0.3582262  0.6367025]
action based on polilcy:[-0.35157317  0.7487806 ]
action based on polilcy:[-0.0043494   0.48538697]
action based on polilcy:[-0.650555   0.6329901]
action based on polilcy:[-0.02986991  0.48236758]
action based on polilcy:[0.53140175 0.5311359 ]
action based on polilcy:[-0.51609457  0.5120344 ]
action based on polilcy:[0.6022147 0.538462 ]
Total T: 99719 Episode Num: 7299 Episode T: 17 Reward: -2259.871066
action based on polilcy:[-0.31430873  0.7161809 ]
action based on polilcy:[-0.27088246  0.6696

action based on polilcy:[-0.32602975  0.71751046]
action based on polilcy:[-0.01083867  0.49769348]
action based on polilcy:[-0.5042956  0.8189872]
action based on polilcy:[0.0186035  0.49813482]
action based on polilcy:[-0.30904084  0.6052753 ]
action based on polilcy:[0.22881934 0.52729636]
action based on polilcy:[-0.43336722  0.6739562 ]
action based on polilcy:[-0.18953481  0.46981174]
action based on polilcy:[-0.4444885   0.79370856]
action based on polilcy:[-0.1803501   0.49667704]
action based on polilcy:[-0.24855368  0.80998814]
action based on polilcy:[-0.07989961  0.49998438]
action based on polilcy:[-0.2512868   0.51620126]
action based on polilcy:[-0.43795812  0.9941266 ]
action based on polilcy:[-0.22322494  0.6944406 ]
action based on polilcy:[-0.28233626  0.812742  ]
action based on polilcy:[-0.1691512  0.7198578]
action based on polilcy:[-0.18471949  0.7012303 ]
action based on polilcy:[-0.04954846  0.4585964 ]
action based on polilcy:[-0.18798056  0.40551394]
action b

Total T: 100068 Episode Num: 7325 Episode T: 54 Reward: -7059.208515
Total T: 100114 Episode Num: 7326 Episode T: 46 Reward: -6216.113745
Total T: 100137 Episode Num: 7327 Episode T: 23 Reward: -3464.615009
Total T: 100187 Episode Num: 7328 Episode T: 50 Reward: -7311.038353
Total T: 100232 Episode Num: 7329 Episode T: 45 Reward: -7134.754539
Total T: 100245 Episode Num: 7330 Episode T: 13 Reward: -1796.794114
Total T: 100268 Episode Num: 7331 Episode T: 23 Reward: -3291.066765
Total T: 100296 Episode Num: 7332 Episode T: 28 Reward: -4602.013515
Total T: 100345 Episode Num: 7333 Episode T: 49 Reward: -7491.816864
Total T: 100380 Episode Num: 7334 Episode T: 35 Reward: -5175.689420
Total T: 100396 Episode Num: 7335 Episode T: 16 Reward: -2251.342915
Total T: 100409 Episode Num: 7336 Episode T: 13 Reward: -1613.214145
Total T: 100456 Episode Num: 7337 Episode T: 47 Reward: -6989.186833
Total T: 100469 Episode Num: 7338 Episode T: 13 Reward: -1810.865136
action based on polilcy:[-0.412482

action based on polilcy:[-0.49020207  0.6931938 ]
action based on polilcy:[-0.5423423   0.81097037]
action based on polilcy:[-0.37564096  0.973404  ]
action based on polilcy:[-0.26040724  0.45736313]
action based on polilcy:[-0.45895773  0.5639845 ]
action based on polilcy:[-0.6394191   0.64554304]
action based on polilcy:[-0.37924328  0.48071164]
action based on polilcy:[-0.43563756  0.4953585 ]
action based on polilcy:[-0.4545731   0.40849063]
Total T: 100653 Episode Num: 7352 Episode T: 9 Reward: -1360.623861
action based on polilcy:[-0.46339056  0.7010921 ]
action based on polilcy:[-0.44848096  0.65502924]
action based on polilcy:[-0.30703554  0.49488857]
action based on polilcy:[-0.31026748  0.47412667]
action based on polilcy:[-0.34253082  0.43925998]
action based on polilcy:[-0.33522055  0.33516645]
action based on polilcy:[-0.5283826   0.59748334]
action based on polilcy:[-0.44175678  0.32493594]
action based on polilcy:[-0.40553418  0.36812916]
action based on polilcy:[-0.5128

action based on polilcy:[-0.4920666  0.7046021]
action based on polilcy:[-0.32597956  0.54225   ]
action based on polilcy:[-0.3436004  0.5032439]
action based on polilcy:[-0.5452745   0.65330136]
action based on polilcy:[-0.31970853  0.4577615 ]
action based on polilcy:[-0.35798573  0.48744345]
action based on polilcy:[-0.36375353  0.41797805]
action based on polilcy:[-0.37079504  0.41033223]
action based on polilcy:[-0.5697474  0.5990858]
action based on polilcy:[-0.47629908  0.32250053]
action based on polilcy:[-0.43194827  0.37812987]
action based on polilcy:[-0.44352525  0.37569723]
Total T: 100817 Episode Num: 7365 Episode T: 12 Reward: -921.498525
action based on polilcy:[-0.4907489  0.7104603]
action based on polilcy:[-0.4868025  0.6931913]
action based on polilcy:[-0.4636714   0.73466825]
action based on polilcy:[-0.56803584  0.6942178 ]
action based on polilcy:[-0.5200099   0.64633447]
action based on polilcy:[-0.3008134  0.430403 ]
action based on polilcy:[-0.410748   0.54630

action based on polilcy:[-0.36887917  0.39964113]
Total T: 100974 Episode Num: 7380 Episode T: 11 Reward: -1191.930892
action based on polilcy:[-0.44370914  0.72039235]
action based on polilcy:[-0.26841542  0.46480826]
action based on polilcy:[-0.33173564  0.520051  ]
action based on polilcy:[-0.35744554  0.54116344]
action based on polilcy:[-0.3952116   0.61916816]
action based on polilcy:[-0.29792765  0.46645936]
action based on polilcy:[-0.33222762  0.49245515]
action based on polilcy:[-0.48427922  0.5655296 ]
action based on polilcy:[-0.4677324  0.3311696]
action based on polilcy:[-0.43772447  0.47567213]
action based on polilcy:[-0.4438893   0.40182117]
action based on polilcy:[-0.7587942   0.55930185]
action based on polilcy:[-0.5104179  0.3409355]
Total T: 100987 Episode Num: 7381 Episode T: 13 Reward: -1004.039322
action based on polilcy:[-0.4299457  0.71818  ]
action based on polilcy:[-0.39197645  0.5833514 ]
action based on polilcy:[-0.44118288  0.5660677 ]
action based on po

action based on polilcy:[-0.44361764  0.68948054]
action based on polilcy:[-0.27483594  0.55122286]
action based on polilcy:[-0.32694498  0.5050119 ]
action based on polilcy:[-0.6205494  0.6566318]
action based on polilcy:[-0.29720926  0.48427206]
action based on polilcy:[-0.29530138  0.37559956]
action based on polilcy:[-0.33094424  0.35983855]
action based on polilcy:[-0.2457961  0.2781238]
Total T: 101141 Episode Num: 7396 Episode T: 8 Reward: -864.293419
action based on polilcy:[-0.43714672  0.6924135 ]
action based on polilcy:[-0.27474433  0.45222852]
action based on polilcy:[-0.4913155  0.5998291]
action based on polilcy:[-0.24935171  0.4434761 ]
action based on polilcy:[-0.41620252  0.6565704 ]
action based on polilcy:[-0.24280027  0.51284087]
action based on polilcy:[-0.6591091  0.6596067]
action based on polilcy:[-0.40843308  0.31795052]
action based on polilcy:[-0.3615995  0.2996539]
action based on polilcy:[-0.3779731   0.29943237]
action based on polilcy:[-0.40899488  0.258

action based on polilcy:[-0.41440555  0.7055042 ]
action based on polilcy:[-0.41603774  0.709821  ]
action based on polilcy:[-0.40504217  0.64276624]
action based on polilcy:[-0.41507185  0.5851044 ]
action based on polilcy:[-0.4207264  0.6513192]
action based on polilcy:[-0.4675922  0.5862543]
action based on polilcy:[-0.6122322  0.6146396]
action based on polilcy:[-0.33552     0.42545846]
action based on polilcy:[-0.740214    0.56720877]
action based on polilcy:[-0.40444425  0.31573132]
action based on polilcy:[-0.5712129   0.50726616]
action based on polilcy:[-0.40475875  0.4078023 ]
action based on polilcy:[-0.40109208  0.35169166]
Total T: 101293 Episode Num: 7410 Episode T: 13 Reward: -1644.907064
action based on polilcy:[-0.4370793   0.70279443]
action based on polilcy:[-0.44441596  0.71586347]
action based on polilcy:[-0.33297214  0.43804148]
action based on polilcy:[-0.50109017  0.5510372 ]
action based on polilcy:[-0.38685924  0.3335566 ]
action based on polilcy:[-0.43324828 

action based on polilcy:[-0.36799267  0.69603586]
action based on polilcy:[-0.41838992  0.67871666]
action based on polilcy:[-0.39607453  0.4191241 ]
action based on polilcy:[-0.32377142  0.5331529 ]
action based on polilcy:[-0.3987295  0.7423275]
action based on polilcy:[-0.49840903  0.62334126]
action based on polilcy:[-0.45273197  0.5244454 ]
action based on polilcy:[-0.2800802  0.4990817]
action based on polilcy:[-0.4096125   0.48576182]
action based on polilcy:[-0.48165894  0.30358687]
action based on polilcy:[-0.58699393  0.5449214 ]
action based on polilcy:[-0.82997924  0.56732136]
action based on polilcy:[-0.5357226  0.2906049]
action based on polilcy:[-0.40646482  0.33666694]
action based on polilcy:[-0.31177208  0.28806254]
action based on polilcy:[-0.28791937  0.26367396]
Total T: 101470 Episode Num: 7425 Episode T: 16 Reward: -1532.799258
action based on polilcy:[-0.43330336  0.6940025 ]
action based on polilcy:[-0.54994464  0.617517  ]
action based on polilcy:[-0.37546614 

action based on polilcy:[-0.48861277  0.70341706]
action based on polilcy:[-0.6343014  0.6106432]
action based on polilcy:[-0.4216326   0.49145684]
action based on polilcy:[-0.6101805  0.3473464]
action based on polilcy:[-0.73677963  0.59676033]
action based on polilcy:[-0.47386956  0.40887052]
action based on polilcy:[-0.85960054  0.5373596 ]
action based on polilcy:[-0.43815142  0.38432413]
action based on polilcy:[-0.5043303   0.31980687]
action based on polilcy:[-0.41205147  0.2927797 ]
action based on polilcy:[-0.44930205  0.24234389]
action based on polilcy:[-0.409557    0.23324783]
Total T: 101616 Episode Num: 7437 Episode T: 12 Reward: -968.377410
action based on polilcy:[-0.40726197  0.7191328 ]
action based on polilcy:[-0.4488478  0.7147697]
action based on polilcy:[-0.57037467  0.34565184]
action based on polilcy:[-0.43042055  0.447756  ]
action based on polilcy:[-0.5543934   0.34254265]
action based on polilcy:[-0.4567943  0.4044619]
action based on polilcy:[-0.6999867   0.

action based on polilcy:[-0.4652602   0.72983176]
action based on polilcy:[-0.7978246   0.62365675]
action based on polilcy:[-0.47227964  0.5030655 ]
action based on polilcy:[-0.48034498  0.4383717 ]
action based on polilcy:[-0.49896052  0.46811748]
action based on polilcy:[-0.69711524  0.5294448 ]
action based on polilcy:[-0.42323261  0.31538564]
action based on polilcy:[-0.4179528   0.36311784]
action based on polilcy:[-0.4180335   0.37166718]
Total T: 101767 Episode Num: 7451 Episode T: 9 Reward: -930.358442
action based on polilcy:[-0.5295843  0.7216648]
action based on polilcy:[-0.5226071  0.8781536]
action based on polilcy:[-0.37104547  0.5315146 ]
action based on polilcy:[-0.37363753  0.53078693]
action based on polilcy:[-0.82878935  0.9876132 ]
action based on polilcy:[-0.6299243   0.38668546]
action based on polilcy:[-0.4218467   0.58622026]
action based on polilcy:[-0.6360297   0.35684243]
action based on polilcy:[-0.73186296  0.69011265]
action based on polilcy:[-0.08548096 

action based on polilcy:[-0.4849432   0.72817737]
action based on polilcy:[-0.5184364  0.6003995]
action based on polilcy:[-0.3793084  0.5288051]
action based on polilcy:[-0.9246476   0.60098773]
action based on polilcy:[-0.47997758  0.68010825]
action based on polilcy:[-0.6648363  0.3467745]
action based on polilcy:[-0.4626768   0.49248827]
action based on polilcy:[-0.59523433  0.63581693]
action based on polilcy:[-0.9641399   0.58982587]
action based on polilcy:[-0.49031332  0.33898664]
action based on polilcy:[-0.40566987  0.37790748]
action based on polilcy:[-0.41309813  0.37485892]
action based on polilcy:[-0.40952754  0.32580742]
action based on polilcy:[-0.41651925  0.29913375]
Total T: 101936 Episode Num: 7462 Episode T: 14 Reward: -1277.543377
action based on polilcy:[-0.52916646  0.71868485]
action based on polilcy:[-0.5364165  0.4446426]
action based on polilcy:[-0.4624038  0.5339333]
action based on polilcy:[-0.88749385  0.6397401 ]
action based on polilcy:[-0.52643466  0.4

action based on polilcy:[-0.49190712  0.74662614]
action based on polilcy:[-0.78136575  0.7395172 ]
action based on polilcy:[-0.48894346  0.65877056]
action based on polilcy:[-0.975255   0.9758467]
action based on polilcy:[-0.48999345  0.53524387]
action based on polilcy:[-0.44044092  0.52024084]
action based on polilcy:[-0.7591068  0.6884588]
action based on polilcy:[-0.43627375  0.52100503]
action based on polilcy:[-0.52737486  0.65969527]
action based on polilcy:[-0.99984986  0.46316245]
action based on polilcy:[-0.99577177  0.40318444]
action based on polilcy:[-0.9965747  0.4019865]
action based on polilcy:[-0.9961335   0.40941924]
action based on polilcy:[-0.9966595   0.38883504]
action based on polilcy:[-0.99997956  0.61778635]
action based on polilcy:[-0.21360566  0.60223186]
action based on polilcy:[-0.21782662  0.59792775]
action based on polilcy:[-0.04041205  0.6618452 ]
action based on polilcy:[0.10211648 0.66608775]
action based on polilcy:[-0.5557921  0.6998879]
action bas

action based on polilcy:[-0.8906272  0.5971539]
action based on polilcy:[-0.8823735   0.45759976]
action based on polilcy:[-0.77532035  0.32332397]
action based on polilcy:[-0.70056003  0.36917362]
action based on polilcy:[-0.66168106  0.39477363]
action based on polilcy:[-0.81310195  0.651417  ]
action based on polilcy:[-0.6499886   0.42550647]
action based on polilcy:[-0.59266174  0.4214555 ]
action based on polilcy:[-0.64550877  0.35887587]
action based on polilcy:[-0.58117783  0.51968604]
action based on polilcy:[-0.8337662   0.61777806]
action based on polilcy:[-0.7134132   0.48745826]
action based on polilcy:[-0.64614636  0.47709367]
action based on polilcy:[-0.59619224  0.42741755]
action based on polilcy:[-0.60423744  0.3760518 ]
action based on polilcy:[-0.6240965   0.42699426]
action based on polilcy:[-0.6251694  0.5607766]
action based on polilcy:[-0.5981371   0.44793528]
action based on polilcy:[-0.67531705  0.47846302]
action based on polilcy:[-0.9511462   0.55284965]
acti

action based on polilcy:[-0.95371246  0.6048423 ]
action based on polilcy:[-0.97068816  0.6705916 ]
action based on polilcy:[-0.64767796  0.575788  ]
action based on polilcy:[-0.5375956   0.33480203]
action based on polilcy:[-0.63794076  0.54882085]
action based on polilcy:[-0.43056992  0.325591  ]
action based on polilcy:[-0.4901496   0.39261818]
action based on polilcy:[-0.26941547  0.50385165]
Total T: 102924 Episode Num: 7515 Episode T: 8 Reward: -934.106123
action based on polilcy:[-0.9568865   0.60754335]
action based on polilcy:[-0.7340834   0.34980625]
action based on polilcy:[-0.54911613  0.39796606]
action based on polilcy:[-0.5060936   0.41909364]
action based on polilcy:[-0.49551132  0.44103694]
action based on polilcy:[-0.52021855  0.3671227 ]
action based on polilcy:[-0.44521314  0.33217433]
action based on polilcy:[-0.4147591  0.3418304]
action based on polilcy:[-0.41386908  0.33405197]
action based on polilcy:[-0.55943507  0.4895839 ]
action based on polilcy:[-0.4280425

action based on polilcy:[-0.96454823  0.6121083 ]
action based on polilcy:[-0.60203326  0.54567677]
action based on polilcy:[-0.58087504  0.37401885]
action based on polilcy:[-0.8337556  0.6824034]
action based on polilcy:[-0.63696593  0.4869935 ]
action based on polilcy:[-0.48908517  0.45059255]
action based on polilcy:[-0.48930332  0.40614456]
action based on polilcy:[-0.75373363  0.5834817 ]
action based on polilcy:[-0.58230627  0.33950186]
action based on polilcy:[-0.47067437  0.38738543]
action based on polilcy:[-0.4628125  0.3059964]
action based on polilcy:[-0.7424697  0.4850658]
Total T: 103076 Episode Num: 7529 Episode T: 12 Reward: -973.562268
action based on polilcy:[-0.9637038  0.6000148]
action based on polilcy:[-0.6957374  0.3573674]
action based on polilcy:[-0.910422   0.6097585]
action based on polilcy:[-0.6479982   0.39501983]
action based on polilcy:[-0.6303024   0.37002948]
action based on polilcy:[-0.48046377  0.36793464]
action based on polilcy:[-0.6301271   0.5800

action based on polilcy:[-0.64269674  0.43028253]
action based on polilcy:[-0.651646    0.42573375]
action based on polilcy:[-0.9945033   0.74263597]
action based on polilcy:[-0.9106085  0.5098692]
action based on polilcy:[-0.6812632  0.418738 ]
action based on polilcy:[-0.94248706  0.6131445 ]
action based on polilcy:[-0.7382033  0.4460448]
action based on polilcy:[-0.68469083  0.474765  ]
action based on polilcy:[-0.65892166  0.44850478]
action based on polilcy:[-0.83386415  0.5908778 ]
action based on polilcy:[-0.7430692  0.5362575]
action based on polilcy:[-0.7331387   0.47165105]
action based on polilcy:[-0.9081921   0.59635806]
action based on polilcy:[-0.6202819  0.4556695]
action based on polilcy:[-0.65813184  0.4595005 ]
action based on polilcy:[-0.96725833  0.8029069 ]
action based on polilcy:[-0.68286765  0.56397283]
action based on polilcy:[-0.61374986  0.55635226]
action based on polilcy:[-0.44376245  0.44718683]
action based on polilcy:[-0.62544     0.48572546]
action bas

action based on polilcy:[-0.98176414  0.6012714 ]
action based on polilcy:[-0.68569696  0.37016442]
action based on polilcy:[-0.8047337  0.5911561]
action based on polilcy:[-0.538241   0.4804043]
action based on polilcy:[-0.37421644  0.41121605]
action based on polilcy:[-0.50748324  0.3310799 ]
action based on polilcy:[-0.4135536   0.27020788]
Total T: 103417 Episode Num: 7550 Episode T: 7 Reward: -656.613525
action based on polilcy:[-0.9797739   0.60325384]
action based on polilcy:[-0.60906243  0.4453075 ]
action based on polilcy:[-0.98129904  0.60109484]
action based on polilcy:[-0.6103851  0.5388907]
action based on polilcy:[-0.5768498   0.40591007]
action based on polilcy:[-0.53279376  0.47500786]
action based on polilcy:[-0.47018144  0.4255346 ]
action based on polilcy:[-0.6449207   0.52221537]
action based on polilcy:[-0.46722677  0.41946244]
action based on polilcy:[-0.5849466   0.44340327]
action based on polilcy:[-0.7851302  0.5326346]
action based on polilcy:[-0.43174866  0.3

action based on polilcy:[-0.98575276  0.62252903]
action based on polilcy:[-0.6607178   0.40802923]
action based on polilcy:[-0.58850443  0.40176845]
action based on polilcy:[-0.8387555  0.6794125]
action based on polilcy:[-0.60627806  0.47470564]
action based on polilcy:[-0.4429908  0.4713606]
action based on polilcy:[-0.4107555   0.32196105]
action based on polilcy:[-0.48159674  0.24458754]
Total T: 103600 Episode Num: 7563 Episode T: 8 Reward: -658.940237
action based on polilcy:[-0.98614055  0.59818405]
action based on polilcy:[-0.75654984  0.4506531 ]
action based on polilcy:[-0.9918147   0.64463156]
action based on polilcy:[-0.6518975   0.38560432]
action based on polilcy:[-0.60301197  0.6051384 ]
action based on polilcy:[-0.60304916  0.5898227 ]
action based on polilcy:[-0.5933511   0.41807768]
action based on polilcy:[-0.5938822  0.4767761]
action based on polilcy:[-0.33039835  0.4717379 ]
action based on polilcy:[-0.43667167  0.416214  ]
action based on polilcy:[-0.42784628  0

action based on polilcy:[-0.9869981  0.6071483]
action based on polilcy:[-0.6633326  0.3822536]
action based on polilcy:[-0.45752826  0.4145118 ]
action based on polilcy:[-0.37373325  0.3804286 ]
action based on polilcy:[-0.73650163  0.5274943 ]
action based on polilcy:[-0.5669116  0.5421225]
action based on polilcy:[-0.38989863  0.3951734 ]
action based on polilcy:[-0.37994325  0.39821163]
action based on polilcy:[-0.52783096  0.49493858]
action based on polilcy:[-0.4061852   0.36382306]
Total T: 103775 Episode Num: 7575 Episode T: 10 Reward: -660.015565
action based on polilcy:[-0.9871991  0.609747 ]
action based on polilcy:[-0.6447235   0.45644826]
action based on polilcy:[-0.64689755  0.45000538]
action based on polilcy:[-0.70180404  0.6013328 ]
action based on polilcy:[-0.77097464  0.61124766]
action based on polilcy:[-0.35753807  0.47115672]
action based on polilcy:[-0.42914703  0.51346254]
action based on polilcy:[-0.55562145  0.50101185]
action based on polilcy:[-0.54305923  0.

action based on polilcy:[-0.98630047  0.5832907 ]
action based on polilcy:[-0.6849088   0.48448652]
action based on polilcy:[-0.69849765  0.39876387]
action based on polilcy:[-0.9643636  0.5360911]
action based on polilcy:[-0.6412984  0.4196745]
action based on polilcy:[-0.6183476  0.3736047]
action based on polilcy:[-0.6627133   0.59505314]
action based on polilcy:[-0.48714     0.61506796]
action based on polilcy:[-0.36315897  0.48379207]
action based on polilcy:[-0.45369497  0.39318404]
action based on polilcy:[-0.4559746  0.2526786]
action based on polilcy:[-0.42312863  0.33918306]
Total T: 103954 Episode Num: 7587 Episode T: 12 Reward: -991.299700
action based on polilcy:[-0.98497933  0.5793526 ]
action based on polilcy:[-0.8170609   0.49125826]
action based on polilcy:[-0.71546495  0.3827728 ]
action based on polilcy:[-0.9694861   0.58241904]
action based on polilcy:[-0.63398266  0.4333586 ]
action based on polilcy:[-0.6249174  0.432913 ]
action based on polilcy:[-0.5993482   0.40

action based on polilcy:[-0.9858584  0.5166008]
action based on polilcy:[-0.5815077   0.44193846]
action based on polilcy:[-0.602165   0.5372181]
action based on polilcy:[-0.6695546   0.68841827]
action based on polilcy:[-0.52186155  0.4513465 ]
action based on polilcy:[-0.6132517  0.2964863]
action based on polilcy:[-0.6780044   0.49217123]
action based on polilcy:[-0.59026366  0.45680162]
action based on polilcy:[-0.61477447  0.5538788 ]
action based on polilcy:[-0.35640168  0.4310246 ]
action based on polilcy:[-0.45733544  0.30948773]
Total T: 104585 Episode Num: 7611 Episode T: 11 Reward: -1041.497823
action based on polilcy:[-0.98756325  0.5209769 ]
action based on polilcy:[-0.703043    0.35255742]
action based on polilcy:[-0.62566566  0.43762568]
action based on polilcy:[-0.64023876  0.3964035 ]
action based on polilcy:[-0.61205506  0.3973467 ]
action based on polilcy:[-0.5569824  0.6451013]
action based on polilcy:[-0.6701704   0.43020666]
action based on polilcy:[-0.87896293  0

action based on polilcy:[-0.9857108   0.53332585]
action based on polilcy:[-0.6457287   0.45927536]
action based on polilcy:[-0.5138989   0.60723436]
action based on polilcy:[-0.5915308   0.41511336]
action based on polilcy:[-0.97524744  0.54996073]
action based on polilcy:[-0.57174265  0.42937154]
action based on polilcy:[-0.45059884  0.6200916 ]
action based on polilcy:[-0.38174877  0.47779194]
action based on polilcy:[-0.5266216   0.45071062]
action based on polilcy:[-0.5097203  0.3961782]
action based on polilcy:[-0.55737853  0.45315656]
action based on polilcy:[-0.55878115  0.5482589 ]
action based on polilcy:[-0.44482827  0.40590227]
action based on polilcy:[-0.40018228  0.43974873]
action based on polilcy:[-0.5678992   0.51336336]
action based on polilcy:[-0.4072257   0.39043546]
Total T: 104743 Episode Num: 7623 Episode T: 16 Reward: -1271.469359
action based on polilcy:[-0.9869924  0.5349865]
action based on polilcy:[-0.5850233  0.6409856]
action based on polilcy:[-0.5365075  

action based on polilcy:[-0.9872718  0.5504021]
action based on polilcy:[-0.69844735  0.4282866 ]
action based on polilcy:[-0.9538056   0.62769294]
action based on polilcy:[-0.5306977   0.65188277]
action based on polilcy:[-0.64994746  0.71487623]
action based on polilcy:[-0.35891643  0.47849014]
action based on polilcy:[-0.3811757  0.4594357]
action based on polilcy:[-0.40197188  0.35854807]
action based on polilcy:[-0.54785943  0.41897896]
action based on polilcy:[-0.46622923  0.4027938 ]
action based on polilcy:[-0.44499844  0.32851696]
action based on polilcy:[-0.46310744  0.26832673]
Total T: 104910 Episode Num: 7637 Episode T: 12 Reward: -1120.026511
action based on polilcy:[-0.9874406  0.5470995]
action based on polilcy:[-0.68619466  0.4253827 ]
action based on polilcy:[-0.4422377   0.59593403]
action based on polilcy:[-0.41490722  0.47831655]
action based on polilcy:[-0.45171845  0.5665492 ]
action based on polilcy:[-0.44999078  0.49138442]
action based on polilcy:[-0.6013918  

action based on polilcy:[-0.98867816  0.54951143]
action based on polilcy:[-0.46673745  0.63037276]
action based on polilcy:[-0.57053226  0.50247973]
action based on polilcy:[-0.69188654  0.45734408]
action based on polilcy:[-0.99695635  0.72559094]
action based on polilcy:[-0.675459   0.4799723]
action based on polilcy:[-0.62339795  0.43208823]
action based on polilcy:[-0.706537   0.6682066]
action based on polilcy:[-0.62409437  0.63920087]
action based on polilcy:[-0.41752708  0.3839799 ]
action based on polilcy:[-0.5226003   0.45594668]
action based on polilcy:[-0.4571226  0.6942549]
action based on polilcy:[-0.4019063   0.47541985]
action based on polilcy:[-0.4118707   0.40686375]
Total T: 105074 Episode Num: 7650 Episode T: 14 Reward: -1439.840832
action based on polilcy:[-0.9877243   0.55168945]
action based on polilcy:[-0.6485375   0.39461428]
action based on polilcy:[-0.48229548  0.66874623]
action based on polilcy:[-0.4751792   0.66763043]
action based on polilcy:[-0.63720846 

action based on polilcy:[-0.9983909  0.5007813]
action based on polilcy:[0.08599526 0.67801774]
action based on polilcy:[-0.7095492   0.58805287]
action based on polilcy:[-0.99993175  0.839144  ]
action based on polilcy:[-0.9925719  0.5246937]
action based on polilcy:[0.15884271 0.6784017 ]
action based on polilcy:[-0.15894765  0.9010774 ]
action based on polilcy:[0.36899674 0.78391   ]
action based on polilcy:[0.33091965 0.6105145 ]
action based on polilcy:[0.35186937 0.6911409 ]
action based on polilcy:[-0.31297073  0.57982564]
action based on polilcy:[0.6440358  0.73713285]
action based on polilcy:[-0.19292939  0.50156885]
action based on polilcy:[-0.11810002  0.4310553 ]
Total T: 105263 Episode Num: 7664 Episode T: 32 Reward: -2775.910463
action based on polilcy:[-0.98096085  0.5443597 ]
action based on polilcy:[-0.5898977  0.5326084]
action based on polilcy:[-0.6746655   0.44033957]
action based on polilcy:[-0.5146397   0.49339855]
action based on polilcy:[-0.41081807  0.460179  ]

action based on polilcy:[-0.97468513  0.55898756]
action based on polilcy:[-0.90993094  0.5412071 ]
action based on polilcy:[-0.66138524  0.4765895 ]
action based on polilcy:[-0.6374969   0.42275745]
action based on polilcy:[-0.8501477  0.6076274]
action based on polilcy:[-0.6776843   0.46928006]
action based on polilcy:[-0.5565369   0.46096164]
action based on polilcy:[-0.6200411  0.6357779]
action based on polilcy:[-0.35102275  0.45316717]
action based on polilcy:[-0.36758432  0.41325682]
action based on polilcy:[-0.36402413  0.3769569 ]
action based on polilcy:[-0.3984775   0.23279083]
Total T: 105415 Episode Num: 7676 Episode T: 12 Reward: -958.407841
action based on polilcy:[-0.9752125  0.5824036]
action based on polilcy:[-0.9996238  0.9995877]
action based on polilcy:[-0.82406294  0.5082004 ]
action based on polilcy:[-0.4630817  0.6825037]
action based on polilcy:[-0.52415836  0.54790974]
action based on polilcy:[-0.3591113   0.40042862]
action based on polilcy:[-0.6309695  0.536

action based on polilcy:[-0.9669224  0.5648135]
action based on polilcy:[-0.5572376   0.68520117]
action based on polilcy:[-0.493323   0.4475129]
action based on polilcy:[-0.47569913  0.6278    ]
action based on polilcy:[-0.52295864  0.42559305]
action based on polilcy:[-0.4193208   0.43317205]
action based on polilcy:[-0.47222188  0.45950073]
action based on polilcy:[-0.4491212  0.4385495]
action based on polilcy:[-0.4571829  0.3858532]
action based on polilcy:[-0.6778772  0.5095261]
action based on polilcy:[-0.51159745  0.45425567]
action based on polilcy:[-0.46256384  0.3030231 ]
action based on polilcy:[-0.4805313   0.23635015]
Total T: 105590 Episode Num: 7692 Episode T: 13 Reward: -961.912955
action based on polilcy:[-0.9680823  0.5785421]
action based on polilcy:[-0.982493   0.6169388]
action based on polilcy:[-0.6281985  0.4336036]
action based on polilcy:[-0.52915907  0.47281328]
action based on polilcy:[-0.62490535  0.6100091 ]
action based on polilcy:[-0.43360037  0.4340229 

action based on polilcy:[-0.9482801  0.5669102]
action based on polilcy:[-0.5473155  0.6718645]
action based on polilcy:[-0.4850495   0.41698447]
action based on polilcy:[-0.4407337  0.3792257]
action based on polilcy:[-0.42928126  0.3858999 ]
action based on polilcy:[-0.42698678  0.33120808]
action based on polilcy:[-0.4760867   0.39393377]
action based on polilcy:[-0.4529307   0.32789388]
action based on polilcy:[-0.4592927   0.27901357]
Total T: 105762 Episode Num: 7706 Episode T: 9 Reward: -492.689445
action based on polilcy:[-0.94854605  0.57619274]
action based on polilcy:[-0.5390235  0.6726961]
action based on polilcy:[-0.49383235  0.44115898]
action based on polilcy:[-0.6530969  0.6239203]
action based on polilcy:[-0.52681375  0.6704787 ]
action based on polilcy:[-0.7295139  0.6827462]
action based on polilcy:[-0.6719507  0.6066835]
action based on polilcy:[-0.6488488   0.68529856]
action based on polilcy:[-0.6339631   0.64217985]
action based on polilcy:[-0.43673018  0.4480226

action based on polilcy:[-0.91922957  0.5775393 ]
action based on polilcy:[-0.5633408  0.6181606]
action based on polilcy:[-0.55577976  0.71370363]
action based on polilcy:[-0.38958606  0.51015633]
action based on polilcy:[-0.63717514  0.5235224 ]
action based on polilcy:[-0.6756597  0.5434507]
action based on polilcy:[-0.3877126   0.37096587]
action based on polilcy:[-0.47179216  0.3905714 ]
Total T: 105915 Episode Num: 7717 Episode T: 8 Reward: -1034.665307
action based on polilcy:[-0.9169978   0.58041596]
action based on polilcy:[-0.5537018  0.4347553]
action based on polilcy:[-0.49407512  0.4715337 ]
action based on polilcy:[-0.4812391   0.69726205]
action based on polilcy:[-0.3422418  0.5198113]
action based on polilcy:[-0.31669322  0.43827358]
action based on polilcy:[-0.76625025  0.52100986]
action based on polilcy:[-0.43809855  0.46060836]
action based on polilcy:[-0.4204709  0.3539465]
action based on polilcy:[-0.55794036  0.44787848]
Total T: 105925 Episode Num: 7718 Episode 

action based on polilcy:[-0.2384112  0.6328951]
action based on polilcy:[-0.2252298  0.6890261]
action based on polilcy:[-0.09040874  0.6702738 ]
action based on polilcy:[-0.26323715  0.6848599 ]
action based on polilcy:[-0.41899118  0.63195515]
action based on polilcy:[-0.35734853  0.68089545]
action based on polilcy:[-0.5598095  0.6708269]
action based on polilcy:[-0.38656408  0.68573046]
action based on polilcy:[-0.186639   0.6209823]
action based on polilcy:[-0.41510585  0.47880504]
action based on polilcy:[-0.5861268   0.67359436]
action based on polilcy:[-0.4279785   0.48062405]
action based on polilcy:[-0.3525705   0.43399656]
Total T: 106573 Episode Num: 7745 Episode T: 13 Reward: -2060.449582
action based on polilcy:[-0.2548892   0.63924134]
action based on polilcy:[-0.19672917  0.7248211 ]
action based on polilcy:[-0.5645649  0.7259785]
action based on polilcy:[-0.22294542  0.48977157]
action based on polilcy:[-0.26741418  0.51582146]
action based on polilcy:[-0.15952191  0.4

action based on polilcy:[-0.29897866  0.65341985]
action based on polilcy:[-0.5587603  0.4384766]
action based on polilcy:[-0.39279827  0.44830737]
action based on polilcy:[-0.47321853  0.5672085 ]
action based on polilcy:[-0.27284652  0.4856787 ]
action based on polilcy:[-0.35077602  0.54653597]
action based on polilcy:[-0.60726166  0.5186642 ]
action based on polilcy:[-0.4860464   0.38190866]
action based on polilcy:[-0.52695465  0.24132152]
Total T: 106728 Episode Num: 7759 Episode T: 9 Reward: -632.659028
action based on polilcy:[-0.3035704  0.65465  ]
action based on polilcy:[-0.42981592  0.6725501 ]
action based on polilcy:[-0.35060313  0.4906891 ]
action based on polilcy:[-0.2599763   0.52735984]
action based on polilcy:[-0.66893286  0.6792954 ]
action based on polilcy:[-0.35974178  0.54612935]
action based on polilcy:[-0.53256226  0.6095713 ]
action based on polilcy:[-0.34590277  0.5068092 ]
action based on polilcy:[-0.5495534  0.6496242]
action based on polilcy:[-0.6682563  0.

action based on polilcy:[-0.24050516  0.66394687]
action based on polilcy:[-0.5107051  0.5501993]
action based on polilcy:[-0.5394901   0.47814944]
action based on polilcy:[-0.41407102  0.48320493]
action based on polilcy:[-0.47666624  0.57703876]
action based on polilcy:[-0.33192554  0.44193196]
action based on polilcy:[-0.19503736  0.49940026]
action based on polilcy:[-0.7482778   0.46301773]
action based on polilcy:[-0.53533864  0.33976036]
Total T: 106880 Episode Num: 7773 Episode T: 9 Reward: -720.473974
action based on polilcy:[-0.26174754  0.657555  ]
action based on polilcy:[-0.52352667  0.61956626]
action based on polilcy:[-0.5227853   0.62304103]
action based on polilcy:[-0.50036854  0.6687981 ]
action based on polilcy:[-0.31645444  0.4837376 ]
action based on polilcy:[-0.48403227  0.52618945]
action based on polilcy:[-0.3404829  0.5666208]
action based on polilcy:[-0.22538547  0.42202517]
action based on polilcy:[-0.13720125  0.5263846 ]
action based on polilcy:[-0.6129252  

action based on polilcy:[-0.16232201  0.6580694 ]
action based on polilcy:[-0.28528446  0.7248999 ]
action based on polilcy:[-0.33267102  0.5241475 ]
action based on polilcy:[-0.15484448  0.51286113]
action based on polilcy:[-0.54973686  0.45714858]
action based on polilcy:[-0.13705261  0.5166805 ]
action based on polilcy:[-0.62912345  0.5205916 ]
action based on polilcy:[-0.20835741  0.49355704]
action based on polilcy:[-0.4782819   0.34383026]
Total T: 107026 Episode Num: 7786 Episode T: 9 Reward: -717.861830
action based on polilcy:[-0.12350076  0.6493518 ]
action based on polilcy:[-0.40812823  0.59702957]
action based on polilcy:[-0.4618394  0.5120945]
action based on polilcy:[-0.43912375  0.5179861 ]
action based on polilcy:[-0.26098663  0.5134096 ]
action based on polilcy:[-0.434151   0.5208214]
action based on polilcy:[-0.3179975  0.3980035]
action based on polilcy:[-0.395107    0.28738484]
action based on polilcy:[-0.2734049   0.14376411]
Total T: 107035 Episode Num: 7787 Episo

action based on polilcy:[-0.14117348  0.66084254]
action based on polilcy:[-0.330998    0.73908633]
action based on polilcy:[-0.628486   0.7179373]
action based on polilcy:[-0.43456647  0.46592918]
action based on polilcy:[-0.47338438  0.40652236]
action based on polilcy:[-0.49811044  0.24387853]
action based on polilcy:[-0.20747481  0.44391432]
Total T: 107180 Episode Num: 7802 Episode T: 7 Reward: -854.041288
action based on polilcy:[-0.07857081  0.66834795]
action based on polilcy:[-0.00344234  0.7957059 ]
action based on polilcy:[-0.38639325  0.48350668]
action based on polilcy:[-0.54527783  0.6856423 ]
action based on polilcy:[-0.20015909  0.44159293]
action based on polilcy:[-0.4391109   0.34772852]
action based on polilcy:[-0.35825792  0.2658232 ]
action based on polilcy:[-0.38737476  0.25089806]
Total T: 107188 Episode Num: 7803 Episode T: 8 Reward: -752.797082
action based on polilcy:[-0.00439196  0.66893286]
action based on polilcy:[-0.49688184  0.47738895]
action based on po

action based on polilcy:[-0.11066964  0.6683282 ]
action based on polilcy:[-0.53409743  0.59980196]
action based on polilcy:[-0.43747967  0.56104255]
action based on polilcy:[-0.5238266  0.5919261]
action based on polilcy:[-0.40597266  0.44631714]
action based on polilcy:[-0.48102355  0.44048986]
action based on polilcy:[-0.38884255  0.3366282 ]
Total T: 107359 Episode Num: 7818 Episode T: 7 Reward: -729.903725
action based on polilcy:[0.01062034 0.6876608 ]
action based on polilcy:[-0.46157768  0.5350704 ]
action based on polilcy:[-0.32355097  0.48087028]
action based on polilcy:[-0.22345226  0.48668948]
action based on polilcy:[-0.71708184  0.6314318 ]
action based on polilcy:[-0.5104922  0.6008261]
action based on polilcy:[-0.53156734  0.6151564 ]
action based on polilcy:[-0.59124637  0.7342844 ]
action based on polilcy:[-0.66780484  0.5633641 ]
action based on polilcy:[-0.32199666  0.72699153]
action based on polilcy:[-0.526999   0.7486055]
Total T: 107370 Episode Num: 7819 Episode

action based on polilcy:[-0.0102587  0.6680422]
action based on polilcy:[0.03217508 0.6727663 ]
action based on polilcy:[-0.43313956  0.805935  ]
action based on polilcy:[-0.31807637  0.5671737 ]
action based on polilcy:[0.6058018 0.8481748]
action based on polilcy:[0.07990921 0.7223665 ]
action based on polilcy:[-0.3455773  0.5383742]
action based on polilcy:[-0.4974306   0.52550125]
action based on polilcy:[-0.20746303  0.5963665 ]
action based on polilcy:[-0.40965587  0.5227921 ]
action based on polilcy:[-0.5282512  0.5268996]
action based on polilcy:[-0.37338257  0.5930988 ]
action based on polilcy:[-0.4584447   0.63195944]
action based on polilcy:[-0.45065963  0.73262656]
action based on polilcy:[-0.471368    0.50931084]
action based on polilcy:[-0.53302294  0.49638358]
action based on polilcy:[-0.3779206  0.5066444]
Total T: 107543 Episode Num: 7834 Episode T: 17 Reward: -2335.509337
action based on polilcy:[-0.20318012  0.6383544 ]
action based on polilcy:[-0.37076208  0.6907268

action based on polilcy:[-0.17432444  0.65617883]
action based on polilcy:[-0.51632416  0.49204603]
action based on polilcy:[-0.5310989  0.7055019]
action based on polilcy:[-0.501554   0.6035644]
action based on polilcy:[-0.28495488  0.46744728]
action based on polilcy:[-0.43597296  0.37457868]
action based on polilcy:[-0.54909635  0.22274117]
action based on polilcy:[-0.39603302  0.40615934]
Total T: 107694 Episode Num: 7851 Episode T: 8 Reward: -627.674768
action based on polilcy:[-0.20549962  0.6480131 ]
action based on polilcy:[-0.56942743  0.5662253 ]
action based on polilcy:[-0.6776666   0.53741384]
action based on polilcy:[-0.77985966  0.45776868]
action based on polilcy:[-0.6461983   0.48502204]
action based on polilcy:[-0.5848387   0.53811634]
action based on polilcy:[-0.7816489   0.45800686]
action based on polilcy:[-0.5570973  0.4781742]
action based on polilcy:[-0.73923934  0.47177285]
action based on polilcy:[-0.4967875   0.51754075]
action based on polilcy:[-0.46739244  0

action based on polilcy:[-0.25889876  0.64271057]
action based on polilcy:[-0.1639719  0.7381553]
action based on polilcy:[-0.50204676  0.58868897]
action based on polilcy:[-0.24970628  0.47235006]
action based on polilcy:[-0.27628884  0.4234349 ]
action based on polilcy:[-0.6086874   0.49106333]
action based on polilcy:[-0.5431683  0.3381323]
action based on polilcy:[-0.41300443  0.35917747]
Total T: 107872 Episode Num: 7865 Episode T: 8 Reward: -802.498080
action based on polilcy:[-0.18700123  0.6556618 ]
action based on polilcy:[-0.9978018  0.6012023]
action based on polilcy:[-0.9531793   0.47816518]
action based on polilcy:[-0.99996006  0.8080274 ]
action based on polilcy:[-0.85461915  0.6250993 ]
action based on polilcy:[0.40807435 0.70587665]
action based on polilcy:[-0.25434342  0.61295784]
action based on polilcy:[0.92266536 0.91118854]
action based on polilcy:[0.86379766 0.9669284 ]
action based on polilcy:[0.09966248 0.76816994]
action based on polilcy:[-0.26298887  0.8838997

Total T: 108258 Episode Num: 7885 Episode T: 47 Reward: -7012.807203
Total T: 108307 Episode Num: 7886 Episode T: 49 Reward: -6926.704544
Total T: 108357 Episode Num: 7887 Episode T: 50 Reward: -7006.498405
Total T: 108365 Episode Num: 7888 Episode T: 8 Reward: -827.623179
Total T: 108410 Episode Num: 7889 Episode T: 45 Reward: -7342.879101
Total T: 108456 Episode Num: 7890 Episode T: 46 Reward: -7520.240313
Total T: 108477 Episode Num: 7891 Episode T: 21 Reward: -3115.828707
action based on polilcy:[-0.9999994  -0.28317544]
action based on polilcy:[-0.6167993  -0.05143932]
Total T: 108502 Episode Num: 7892 Episode T: 25 Reward: -2656.915889
action based on polilcy:[-0.26288995  0.6086025 ]
action based on polilcy:[-0.6937221   0.41897634]
action based on polilcy:[-0.6308799  0.6004602]
action based on polilcy:[-0.3807003  0.4478832]
action based on polilcy:[-0.40389326  0.37925977]
action based on polilcy:[-0.49207166  0.36762324]
action based on polilcy:[-0.5775002   0.43825173]
acti

action based on polilcy:[-0.26397836  0.61896276]
action based on polilcy:[-0.6585058  0.5600792]
action based on polilcy:[-0.46228617  0.48428726]
action based on polilcy:[-0.64390254  0.45085645]
action based on polilcy:[-0.6476983   0.44019362]
action based on polilcy:[-0.8221751   0.65940905]
action based on polilcy:[-0.4211261   0.48334333]
action based on polilcy:[-0.6999221   0.51361275]
action based on polilcy:[-0.4805065   0.32858875]
action based on polilcy:[-0.4848602   0.32697022]
action based on polilcy:[-0.48851848  0.38252926]
action based on polilcy:[-0.4985067   0.37911165]
action based on polilcy:[-0.5288306   0.34544438]
Total T: 108675 Episode Num: 7904 Episode T: 13 Reward: -806.031860
action based on polilcy:[-0.19870803  0.61532104]
action based on polilcy:[-0.70680726  0.44499603]
action based on polilcy:[-0.6312964  0.4764183]
action based on polilcy:[-0.9247428   0.47227374]
action based on polilcy:[-0.10979591  0.6555001 ]
action based on polilcy:[-0.3381302 

action based on polilcy:[-0.31936502  0.6065617 ]
action based on polilcy:[-0.99765927  0.5198197 ]
action based on polilcy:[-0.6722824  0.4458967]
action based on polilcy:[-0.9999085   0.54107225]
action based on polilcy:[-0.72352266  0.42078856]
action based on polilcy:[-0.68971705  0.48191255]
action based on polilcy:[-0.57434475  0.44819894]
action based on polilcy:[-0.7971266   0.59284824]
action based on polilcy:[-0.42243695  0.46938616]
action based on polilcy:[-0.38707644  0.42148384]
action based on polilcy:[-0.5059836  0.4117706]
action based on polilcy:[-0.4901538   0.37532738]
action based on polilcy:[-0.46593973  0.25349233]
action based on polilcy:[-0.44867852  0.19257903]
Total T: 108834 Episode Num: 7918 Episode T: 14 Reward: -1061.421199
action based on polilcy:[-0.28051502  0.6019633 ]
action based on polilcy:[-0.98838425  0.44356787]
action based on polilcy:[-0.9609204   0.42187923]
action based on polilcy:[-0.99982595  0.46840647]
action based on polilcy:[-0.9950585

action based on polilcy:[-0.27314025  0.6135078 ]
action based on polilcy:[-0.74296176  0.42312732]
action based on polilcy:[-0.62258625  0.43145972]
action based on polilcy:[-0.67326146  0.44435662]
action based on polilcy:[-0.6732242  0.6236818]
action based on polilcy:[-0.8497753  0.6584978]
action based on polilcy:[-0.7798502  0.5564904]
action based on polilcy:[-0.5378549   0.45081878]
action based on polilcy:[-0.48649845  0.33342424]
Total T: 108990 Episode Num: 7930 Episode T: 9 Reward: -945.212795
action based on polilcy:[-0.325037    0.61953694]
action based on polilcy:[-0.72118807  0.39655012]
action based on polilcy:[-0.75604427  0.44214982]
action based on polilcy:[-0.74386     0.41268083]
action based on polilcy:[-0.7490101   0.43455115]
action based on polilcy:[-0.70276576  0.51349115]
action based on polilcy:[-0.44032773  0.44966415]
action based on polilcy:[-0.44917303  0.49724948]
action based on polilcy:[-0.5563434   0.63615614]
action based on polilcy:[-0.50105006  0

action based on polilcy:[-0.32291633  0.61158717]
action based on polilcy:[-0.49054208  0.63432103]
action based on polilcy:[-0.68985003  0.5444859 ]
action based on polilcy:[-0.3673943   0.40882304]
action based on polilcy:[-0.4611987   0.33802715]
action based on polilcy:[-0.4946524   0.23194335]
action based on polilcy:[-0.44550872  0.16472197]
Total T: 109138 Episode Num: 7944 Episode T: 7 Reward: -570.549795
action based on polilcy:[-0.31763077  0.6127025 ]
action based on polilcy:[-0.56519663  0.6151189 ]
action based on polilcy:[-0.69332314  0.57624364]
action based on polilcy:[-0.64107966  0.52596164]
action based on polilcy:[-0.73750275  0.567662  ]
action based on polilcy:[-0.4895055   0.43007836]
action based on polilcy:[-0.6366397   0.48822176]
action based on polilcy:[-0.49038285  0.34250587]
action based on polilcy:[-0.7452796  0.5329268]
action based on polilcy:[-0.4990231  0.3534076]
action based on polilcy:[-0.53220606  0.29482928]
Total T: 109149 Episode Num: 7945 Epi

action based on polilcy:[-0.32756355  0.6343629 ]
action based on polilcy:[-0.44161457  0.46950546]
action based on polilcy:[-0.2797441  0.6010097]
action based on polilcy:[-0.47530714  0.61474705]
action based on polilcy:[-0.68559253  0.4417636 ]
action based on polilcy:[-0.5379652   0.58208215]
action based on polilcy:[-0.33585697  0.44121683]
action based on polilcy:[-0.343294   0.5012362]
action based on polilcy:[-0.66972256  0.5350453 ]
action based on polilcy:[-0.439307    0.35246488]
action based on polilcy:[-0.7278248   0.51144594]
action based on polilcy:[-0.49473968  0.2675425 ]
action based on polilcy:[-0.49947727  0.19698593]
Total T: 109308 Episode Num: 7955 Episode T: 13 Reward: -981.491435
action based on polilcy:[-0.27811575  0.6405591 ]
action based on polilcy:[-0.23182568  0.78928876]
action based on polilcy:[-0.6070549  0.4410911]
action based on polilcy:[-0.5500386   0.47257975]
action based on polilcy:[-0.99975365  0.54225266]
action based on polilcy:[-0.61905295  

action based on polilcy:[-0.30989808  0.629733  ]
action based on polilcy:[-0.7054426   0.40207648]
action based on polilcy:[-0.5902531   0.49072456]
action based on polilcy:[-0.5764942  0.6238713]
action based on polilcy:[-0.4667858   0.45860267]
action based on polilcy:[-0.64943314  0.644127  ]
action based on polilcy:[-0.6401439  0.6361034]
action based on polilcy:[-0.52842236  0.40418047]
action based on polilcy:[-0.45985484  0.40650028]
action based on polilcy:[-0.4457996   0.25893041]
Total T: 109461 Episode Num: 7968 Episode T: 10 Reward: -931.847854
action based on polilcy:[-0.35641703  0.6245873 ]
action based on polilcy:[-0.35170415  0.6016191 ]
action based on polilcy:[-0.6573403  0.5903599]
action based on polilcy:[-0.42460528  0.43966335]
action based on polilcy:[-0.42008707  0.37778234]
action based on polilcy:[-0.39842045  0.41729453]
action based on polilcy:[-0.37334847  0.45715073]
action based on polilcy:[-0.6516148   0.67417824]
action based on polilcy:[-0.56414914  

action based on polilcy:[-0.34708187  0.6374606 ]
action based on polilcy:[-0.6483356  0.4894743]
action based on polilcy:[-0.5946356   0.49202758]
action based on polilcy:[-0.57447207  0.5595971 ]
action based on polilcy:[-0.5317645   0.61046267]
action based on polilcy:[-0.5151289  0.5568438]
action based on polilcy:[-0.6533226  0.6263332]
action based on polilcy:[-0.45526028  0.41417035]
action based on polilcy:[-0.42955032  0.2877606 ]
Total T: 109630 Episode Num: 7980 Episode T: 9 Reward: -827.443923
action based on polilcy:[-0.39426377  0.6347959 ]
action based on polilcy:[-0.39231196  0.49433255]
action based on polilcy:[-0.51078415  0.6141766 ]
action based on polilcy:[-0.2985311   0.66745126]
action based on polilcy:[-0.6891743   0.43836072]
action based on polilcy:[-0.29799497  0.74566245]
action based on polilcy:[-0.35537535  0.67240214]
action based on polilcy:[-0.6397133   0.60282063]
action based on polilcy:[-0.51777506  0.35653585]
action based on polilcy:[-0.5544507   0

action based on polilcy:[-0.4101995  0.6369134]
action based on polilcy:[-0.3865554   0.66313326]
action based on polilcy:[-0.58451605  0.4942005 ]
action based on polilcy:[-0.57913613  0.55614924]
action based on polilcy:[-0.46740273  0.36241588]
action based on polilcy:[-0.6012825   0.30545393]
action based on polilcy:[-0.6080378  0.292306 ]
action based on polilcy:[-0.62178314  0.31074527]
action based on polilcy:[-0.6323903   0.27415934]
Total T: 109786 Episode Num: 7992 Episode T: 9 Reward: -709.420130
action based on polilcy:[-0.40203342  0.64183354]
action based on polilcy:[-0.341038    0.66822946]
action based on polilcy:[-0.5635535  0.5045457]
action based on polilcy:[-0.54044294  0.34497356]
action based on polilcy:[-0.53791916  0.3015523 ]
action based on polilcy:[-0.5970671  0.1855679]
action based on polilcy:[ 0.09330848 -0.27093908]
Total T: 109793 Episode Num: 7993 Episode T: 7 Reward: -543.345072
action based on polilcy:[-0.3951054  0.64518  ]
action based on polilcy:[-

action based on polilcy:[-0.5037302  0.621619 ]
action based on polilcy:[-0.3230719  0.7383648]
action based on polilcy:[-0.56272274  0.6724309 ]
action based on polilcy:[-0.586308   0.3624093]
action based on polilcy:[-0.5663867   0.40990573]
action based on polilcy:[-0.51223385  0.3356934 ]
action based on polilcy:[-0.5707724  0.4060305]
action based on polilcy:[-0.65671253  0.20235664]
Total T: 109952 Episode Num: 8007 Episode T: 8 Reward: -795.788851
action based on polilcy:[-0.40667993  0.63499486]
action based on polilcy:[-0.39365715  0.4843312 ]
action based on polilcy:[-0.36187616  0.7219137 ]
action based on polilcy:[-0.41889915  0.44542772]
action based on polilcy:[-0.99995315  0.629112  ]
action based on polilcy:[-0.55367637  0.4188529 ]
action based on polilcy:[-0.41799533  0.45120132]
action based on polilcy:[-0.58420235  0.50988084]
action based on polilcy:[-0.42891476  0.694403  ]
action based on polilcy:[-0.7555318   0.44973758]
action based on polilcy:[-0.6944566   0.4

action based on polilcy:[-0.45367444  0.6321102 ]
action based on polilcy:[-0.4437014   0.63359296]
action based on polilcy:[-0.7270024   0.32892302]
action based on polilcy:[-0.7034606   0.57145053]
action based on polilcy:[-0.7344618   0.28557724]
action based on polilcy:[-0.7179055   0.22608921]
action based on polilcy:[-0.681666    0.33280423]
action based on polilcy:[-0.718474   0.2539706]
Total T: 110579 Episode Num: 8034 Episode T: 8 Reward: -808.002500
action based on polilcy:[-0.52514493  0.6175138 ]
action based on polilcy:[-0.6958132   0.40272304]
action based on polilcy:[-0.6563903   0.37999105]
action based on polilcy:[-0.652867   0.6212812]
action based on polilcy:[-0.5671165  0.4548852]
action based on polilcy:[-0.4649065  0.3725112]
action based on polilcy:[-0.6848347   0.33723497]
action based on polilcy:[-0.65962625  0.39262885]
action based on polilcy:[-0.72630143  0.26367947]
action based on polilcy:[-0.70768905  0.20091988]
Total T: 110589 Episode Num: 8035 Episode

action based on polilcy:[-0.5157896  0.6099949]
action based on polilcy:[-0.7763492   0.94057745]
action based on polilcy:[-0.55706745  0.5127984 ]
action based on polilcy:[-0.45725128  0.61410964]
action based on polilcy:[-0.42324108  0.8744682 ]
action based on polilcy:[-0.4720534  0.5757013]
action based on polilcy:[-0.75049406  0.5701803 ]
action based on polilcy:[-0.6604599   0.42178997]
action based on polilcy:[-0.6537143   0.43287143]
action based on polilcy:[-0.27076185  0.7284177 ]
action based on polilcy:[-0.64207906  0.44976103]
action based on polilcy:[-0.5885961  0.5061941]
action based on polilcy:[-0.47086716  0.47166196]
action based on polilcy:[-0.6356064   0.47042668]
action based on polilcy:[-0.65129256  0.4520505 ]
action based on polilcy:[-0.75066304  0.59087455]
action based on polilcy:[-0.5582634   0.54812086]
action based on polilcy:[-0.6925744  0.4181693]
action based on polilcy:[-0.48366207  0.59423864]
action based on polilcy:[-0.62352836  0.5291933 ]
action b

action based on polilcy:[-0.50660944  0.60366464]
action based on polilcy:[-0.41019925  0.50758576]
action based on polilcy:[-0.7312838   0.40081522]
action based on polilcy:[-0.67334676  0.4451039 ]
action based on polilcy:[-0.65192086  0.42730445]
action based on polilcy:[-0.6430466   0.44972277]
action based on polilcy:[-0.7462009  0.3191838]
action based on polilcy:[-0.7314739   0.47442794]
action based on polilcy:[-0.73965615  0.4748484 ]
action based on polilcy:[-0.75802886  0.6281911 ]
action based on polilcy:[-0.6959914  0.4077829]
action based on polilcy:[-0.8040901  0.5860159]
action based on polilcy:[-0.7404788   0.28550178]
Total T: 110908 Episode Num: 8058 Episode T: 13 Reward: -1056.585267
action based on polilcy:[-0.5318757   0.60872966]
action based on polilcy:[-0.7394337   0.39474836]
action based on polilcy:[-0.747275    0.42387977]
action based on polilcy:[-0.7317103   0.37197536]
action based on polilcy:[-0.7286277   0.26527342]
action based on polilcy:[-0.78159934 

action based on polilcy:[-0.48769176  0.6259862 ]
action based on polilcy:[-0.64311326  0.42375523]
action based on polilcy:[-0.42307085  0.71527135]
action based on polilcy:[-0.6052325   0.51826274]
action based on polilcy:[-0.5815904   0.42245904]
action based on polilcy:[-0.40851155  0.9083625 ]
action based on polilcy:[-0.59771454  0.63681984]
action based on polilcy:[-0.6848906   0.78732085]
action based on polilcy:[-0.5637535  0.4752574]
action based on polilcy:[-0.25216514  0.80355245]
action based on polilcy:[-0.665993   0.9104407]
action based on polilcy:[-0.37683856  0.6490003 ]
action based on polilcy:[-0.24336657  0.7585498 ]
action based on polilcy:[-0.38342446  0.7676304 ]
action based on polilcy:[-0.863237    0.43934605]
action based on polilcy:[-0.21323314  0.52967125]
action based on polilcy:[-0.4310849   0.44261792]
Total T: 111075 Episode Num: 8072 Episode T: 17 Reward: -1718.479762
action based on polilcy:[-0.48012495  0.6159334 ]
action based on polilcy:[-0.6052198

action based on polilcy:[-0.11103202  0.21835318]
Total T: 111249 Episode Num: 8085 Episode T: 24 Reward: -2447.055548
action based on polilcy:[-0.5242219   0.61264026]
action based on polilcy:[-0.725893    0.52424693]
action based on polilcy:[-0.7730858   0.55303633]
action based on polilcy:[-0.70876074  0.39181414]
action based on polilcy:[-0.7300883  0.5583097]
action based on polilcy:[-0.71491563  0.31059042]
action based on polilcy:[-0.75085175  0.36693394]
action based on polilcy:[-0.66956306  0.33694464]
action based on polilcy:[-0.5374014  0.3015663]
action based on polilcy:[-0.7157264   0.21956845]
Total T: 111259 Episode Num: 8086 Episode T: 10 Reward: -688.979675
action based on polilcy:[-0.53742564  0.61903507]
action based on polilcy:[-0.4661011   0.42084897]
action based on polilcy:[-0.6050073   0.60911196]
action based on polilcy:[-0.72405344  0.4281385 ]
action based on polilcy:[-0.43004695  0.60188746]
action based on polilcy:[-0.7934351   0.44730592]
action based on p

action based on polilcy:[-0.4880588   0.61971176]
action based on polilcy:[-0.41826296  0.45901582]
action based on polilcy:[-0.5830349   0.65809494]
action based on polilcy:[-0.6735345  0.5062169]
action based on polilcy:[-0.8189205   0.50708795]
action based on polilcy:[-0.33135998  0.614439  ]
action based on polilcy:[-0.7310308  0.6131848]
action based on polilcy:[-0.75138265  0.67324364]
action based on polilcy:[-0.70756817  0.32383165]
action based on polilcy:[-0.59999216  0.30240425]
action based on polilcy:[-0.72005105  0.42329016]
action based on polilcy:[-0.65088904  0.21984544]
Total T: 111417 Episode Num: 8098 Episode T: 12 Reward: -1195.526541
action based on polilcy:[-0.5034814   0.59888077]
action based on polilcy:[-0.64674044  0.5600805 ]
action based on polilcy:[-0.83634496  0.48324087]
action based on polilcy:[-0.28510857  0.77834827]
action based on polilcy:[-0.71704257  0.41689858]
action based on polilcy:[-0.68800974  0.43408394]
action based on polilcy:[-0.7757610

action based on polilcy:[-0.515383    0.60414666]
action based on polilcy:[-0.79749626  0.43827844]
action based on polilcy:[-0.690771    0.44159207]
action based on polilcy:[-0.6510254   0.45513827]
action based on polilcy:[-0.6199026  0.5859712]
action based on polilcy:[-0.7673769   0.45986316]
action based on polilcy:[-0.8229611  0.6483813]
action based on polilcy:[-0.7375672   0.33576873]
action based on polilcy:[-0.72422206  0.3187771 ]
action based on polilcy:[-0.804068   0.4100842]
action based on polilcy:[-0.75615185  0.39963132]
action based on polilcy:[-0.7739742  0.4073682]
action based on polilcy:[-0.8653447   0.56779546]
action based on polilcy:[-0.757416   0.4284876]
Total T: 111600 Episode Num: 8109 Episode T: 14 Reward: -1233.329576
action based on polilcy:[-0.48807725  0.6156019 ]
action based on polilcy:[-0.57709014  0.4855077 ]
action based on polilcy:[-0.62345874  0.43485466]
action based on polilcy:[-0.80769837  0.58558583]
action based on polilcy:[-0.7122662   0.4

action based on polilcy:[-0.5007562   0.61700225]
action based on polilcy:[-0.68212223  0.4245257 ]
action based on polilcy:[-0.6724626   0.44967455]
action based on polilcy:[-0.8298849   0.58022094]
action based on polilcy:[-0.7218927   0.40753958]
action based on polilcy:[-0.7189909   0.42038643]
action based on polilcy:[-0.7656487   0.46703735]
action based on polilcy:[-0.74393225  0.34787273]
action based on polilcy:[-0.8256432   0.46321324]
action based on polilcy:[-0.8256234  0.4813619]
action based on polilcy:[-0.79688     0.41224983]
action based on polilcy:[-0.7774356  0.4079679]
Total T: 111759 Episode Num: 8121 Episode T: 12 Reward: -904.911613
action based on polilcy:[-0.52589613  0.6179023 ]
action based on polilcy:[-0.726019    0.42437816]
action based on polilcy:[-0.31627998  0.82918787]
action based on polilcy:[-0.6190751  0.5895479]
action based on polilcy:[-0.70956945  0.6813772 ]
action based on polilcy:[-0.73855305  0.56700516]
action based on polilcy:[-0.843643   0

action based on polilcy:[-0.5078436   0.62243176]
action based on polilcy:[-0.4750837  0.7388222]
action based on polilcy:[-0.7440424  0.5371387]
action based on polilcy:[-0.43384984  0.43105903]
action based on polilcy:[-0.43552974  0.48461652]
action based on polilcy:[-0.75209564  0.5203817 ]
action based on polilcy:[-0.459071    0.53474593]
action based on polilcy:[-0.7551567   0.54914093]
action based on polilcy:[-0.7070031   0.43897724]
action based on polilcy:[-0.73391163  0.41286713]
action based on polilcy:[-0.8029302  0.5394788]
action based on polilcy:[-0.6599058   0.32148242]
action based on polilcy:[-0.78822446  0.45275137]
Total T: 111911 Episode Num: 8132 Episode T: 13 Reward: -1282.356964
action based on polilcy:[-0.5561283  0.6015837]
action based on polilcy:[-0.7825587   0.52379334]
action based on polilcy:[-0.47014755  0.4081362 ]
action based on polilcy:[-0.4041482   0.47477952]
action based on polilcy:[-0.54528815  0.39415124]
action based on polilcy:[-0.5585565  0.

action based on polilcy:[-0.50123703  0.626288  ]
action based on polilcy:[-0.44252896  0.74532205]
action based on polilcy:[-0.50681746  0.5344631 ]
action based on polilcy:[-0.80645204  0.52988255]
action based on polilcy:[-0.7077817  0.3987145]
action based on polilcy:[-0.78712726  0.48716447]
action based on polilcy:[-0.42897978  0.4974892 ]
action based on polilcy:[-0.7490225   0.56823444]
action based on polilcy:[-0.7802405  0.5206388]
action based on polilcy:[-0.48366117  0.47940475]
action based on polilcy:[-0.8303212   0.49481526]
action based on polilcy:[-0.6955242   0.35619155]
action based on polilcy:[-0.6577356   0.40826833]
action based on polilcy:[-0.7475289   0.43443325]
action based on polilcy:[-0.652132    0.46695337]
action based on polilcy:[-0.73052585  0.4685331 ]
action based on polilcy:[-0.8650674  0.516827 ]
action based on polilcy:[-0.76186055  0.6131345 ]
Total T: 112554 Episode Num: 8157 Episode T: 18 Reward: -1590.002568
action based on polilcy:[-0.5161874  

action based on polilcy:[-0.40869853  0.6355911 ]
action based on polilcy:[-0.33472568  0.47438273]
action based on polilcy:[-0.45694613  0.46008074]
action based on polilcy:[-0.67852736  0.55657303]
action based on polilcy:[-0.43712327  0.73189735]
action based on polilcy:[-0.6710359   0.44305405]
action based on polilcy:[-0.38445786  0.4443135 ]
action based on polilcy:[-0.31078854  0.6692339 ]
action based on polilcy:[-0.2528542   0.81003165]
action based on polilcy:[-0.24694903  0.7103683 ]
action based on polilcy:[-0.0718727  0.6996734]
action based on polilcy:[-0.67639613  0.754593  ]
action based on polilcy:[-0.21407983  0.8319561 ]
action based on polilcy:[-0.12464829  0.62435496]
action based on polilcy:[-0.1650441   0.61206824]
action based on polilcy:[-0.54381657  0.5382638 ]
Total T: 112723 Episode Num: 8170 Episode T: 16 Reward: -1694.909200
action based on polilcy:[-0.46369502  0.62205803]
action based on polilcy:[-0.44883025  0.6244143 ]
action based on polilcy:[-0.74423

action based on polilcy:[-0.4795514  0.6315521]
action based on polilcy:[-0.6060294  0.4421142]
action based on polilcy:[-0.35362005  0.78169715]
action based on polilcy:[-0.7282721   0.42503056]
action based on polilcy:[-0.87582785  0.55763495]
action based on polilcy:[-0.51826376  0.5075526 ]
action based on polilcy:[-0.68942106  0.441464  ]
action based on polilcy:[-0.8520232  0.5371278]
action based on polilcy:[-0.7383275   0.45526984]
action based on polilcy:[-0.6249439   0.49088565]
action based on polilcy:[-0.70433044  0.36967424]
action based on polilcy:[-0.8890448   0.48680022]
action based on polilcy:[-0.6826211  0.2640019]
Total T: 112892 Episode Num: 8184 Episode T: 13 Reward: -1287.064255
action based on polilcy:[-0.47476438  0.63131773]
action based on polilcy:[-0.47178766  0.54322445]
action based on polilcy:[-0.7270932  0.4206201]
action based on polilcy:[-0.71220917  0.3574334 ]
action based on polilcy:[-0.6594495   0.37336013]
action based on polilcy:[-0.66431653  0.2

action based on polilcy:[-0.44299597  0.62946177]
action based on polilcy:[-0.41362298  0.7613672 ]
action based on polilcy:[-0.5959326   0.45109633]
action based on polilcy:[-0.32175434  0.69745934]
action based on polilcy:[-0.36374268  0.7124961 ]
action based on polilcy:[-0.45294487  0.55174184]
action based on polilcy:[-0.66797495  0.4726912 ]
action based on polilcy:[-0.7511669   0.45756596]
action based on polilcy:[-0.51792705  0.529771  ]
action based on polilcy:[-0.30591798  0.45371595]
action based on polilcy:[-0.20664161  0.6446526 ]
action based on polilcy:[-0.7280812   0.40953067]
action based on polilcy:[-0.1340658   0.62116385]
action based on polilcy:[-0.33348113  0.45301837]
action based on polilcy:[-0.30789012  0.7391343 ]
action based on polilcy:[-0.36786067  0.62954384]
action based on polilcy:[-0.40673083  0.4381807 ]
action based on polilcy:[-0.91812336  0.45494944]
action based on polilcy:[-0.6040829   0.45584616]
action based on polilcy:[-0.36768615  0.5476633 ]


action based on polilcy:[-0.5790361   0.40436897]
action based on polilcy:[-0.6752845   0.29807395]
action based on polilcy:[-0.6256373   0.18830822]
Total T: 113222 Episode Num: 8208 Episode T: 8 Reward: -633.625572
action based on polilcy:[-0.47519538  0.63228333]
action based on polilcy:[-0.43708602  0.49441016]
action based on polilcy:[-0.36562237  0.61451066]
action based on polilcy:[-0.32969755  0.5486243 ]
action based on polilcy:[-0.9396557  0.8511772]
action based on polilcy:[-0.5123668   0.42206645]
action based on polilcy:[-0.565174    0.43770844]
action based on polilcy:[-0.5192497  0.4635906]
action based on polilcy:[-0.67864424  0.5314423 ]
action based on polilcy:[-0.78810316  0.5367434 ]
action based on polilcy:[-0.7781012   0.55408454]
action based on polilcy:[-0.79893136  0.4906313 ]
action based on polilcy:[-0.6234983  0.309982 ]
action based on polilcy:[-0.8100593   0.10809755]
Total T: 113236 Episode Num: 8209 Episode T: 14 Reward: -1385.095944
action based on poli

action based on polilcy:[-0.48207235  0.64491004]
action based on polilcy:[-0.4146121  0.7243602]
action based on polilcy:[-0.65683115  0.43662125]
action based on polilcy:[-0.75574875  0.4269094 ]
action based on polilcy:[-0.65945375  0.4357608 ]
action based on polilcy:[-0.6452186   0.45160174]
action based on polilcy:[-0.7873396   0.50313437]
action based on polilcy:[-0.86817485  0.55170727]
action based on polilcy:[-0.81598556  0.53202575]
action based on polilcy:[-0.8073913   0.43428105]
action based on polilcy:[-0.60417616  0.47180617]
action based on polilcy:[-0.6258389  0.4670099]
action based on polilcy:[-0.6328233   0.48537728]
action based on polilcy:[-0.72723746  0.35072666]
Total T: 113385 Episode Num: 8221 Episode T: 14 Reward: -1012.268229
action based on polilcy:[-0.42252123  0.633533  ]
action based on polilcy:[-0.64215606  0.5492481 ]
action based on polilcy:[-0.7733301  0.5702844]
action based on polilcy:[-0.7027749  0.3383665]
action based on polilcy:[-0.6485057   0

action based on polilcy:[-0.44911987  0.48750854]
action based on polilcy:[-0.17609103  0.7016864 ]
action based on polilcy:[-0.69341516  0.42184657]
action based on polilcy:[-0.6933907  0.5592382]
action based on polilcy:[-0.59281075  0.6201239 ]
action based on polilcy:[-0.28750956  0.5377258 ]
action based on polilcy:[-0.14695773  0.7936992 ]
action based on polilcy:[-0.29223275  0.57172775]
action based on polilcy:[-0.4468497  0.5602442]
action based on polilcy:[-0.62458694  0.5255275 ]
action based on polilcy:[-0.7773177  0.5375552]
action based on polilcy:[-0.60287416  0.52648413]
action based on polilcy:[-0.45790344  0.50272524]
action based on polilcy:[-0.49060857  0.456558  ]
action based on polilcy:[-0.99821025  0.5278282 ]
action based on polilcy:[-0.995821   0.5852022]
action based on polilcy:[-0.9997073   0.97902435]
action based on polilcy:[-0.42777285  0.55294865]
action based on polilcy:[-0.5464326   0.40161404]
action based on polilcy:[-0.47500378  0.46850207]
action b

action based on polilcy:[-0.4440039  0.6269784]
action based on polilcy:[-0.4985392   0.43278617]
action based on polilcy:[-0.41737214  0.5298666 ]
action based on polilcy:[-0.73469126  0.5533906 ]
action based on polilcy:[-0.28820276  0.59418225]
action based on polilcy:[-0.78814125  0.5406102 ]
action based on polilcy:[-0.7201692   0.36835188]
action based on polilcy:[-0.81787395  0.49987078]
action based on polilcy:[-0.66241145  0.2851846 ]
action based on polilcy:[-0.6018517   0.43909776]
action based on polilcy:[-0.66742826  0.31934127]
action based on polilcy:[-0.6795031   0.48495352]
action based on polilcy:[-0.55820906  0.5055171 ]
action based on polilcy:[-0.24398439  0.73208666]
action based on polilcy:[-0.6638732   0.45205888]
action based on polilcy:[-0.5916399   0.46803832]
Total T: 113722 Episode Num: 8245 Episode T: 16 Reward: -1340.432541
action based on polilcy:[-0.43837348  0.63409555]
action based on polilcy:[-0.38132775  0.5488928 ]
action based on polilcy:[-0.41354

action based on polilcy:[-0.41048115  0.6402248 ]
action based on polilcy:[-0.36399364  0.47281092]
action based on polilcy:[-0.5334245   0.45008612]
action based on polilcy:[-0.36572134  0.659505  ]
action based on polilcy:[-0.74758744  0.46463594]
action based on polilcy:[-0.41001934  0.6132561 ]
action based on polilcy:[-0.76614964  0.5441073 ]
action based on polilcy:[-0.6537911   0.44822156]
action based on polilcy:[-0.8123909  0.5367775]
action based on polilcy:[-0.75073695  0.5402777 ]
action based on polilcy:[-0.6671823   0.48146063]
action based on polilcy:[-0.5831541   0.49738017]
action based on polilcy:[-0.79979956  0.52630174]
action based on polilcy:[-0.7202862  0.4388727]
action based on polilcy:[-0.6375202  0.42261  ]
Total T: 113892 Episode Num: 8258 Episode T: 15 Reward: -1219.359292
action based on polilcy:[-0.41567323  0.6311323 ]
action based on polilcy:[-0.40915114  0.5159844 ]
action based on polilcy:[-0.5947895  0.5008631]
action based on polilcy:[-0.7587917  0.

action based on polilcy:[-0.34650844  0.65054095]
action based on polilcy:[-0.6488222   0.54547906]
action based on polilcy:[-0.76951337  0.58664227]
action based on polilcy:[-0.90260327  0.54004896]
action based on polilcy:[-0.7212542   0.49606913]
action based on polilcy:[-0.7896311   0.45688504]
action based on polilcy:[-0.7631048   0.28824642]
action based on polilcy:[-0.70320463  0.3435513 ]
Total T: 114512 Episode Num: 8284 Episode T: 8 Reward: -823.830807
action based on polilcy:[-0.3478378  0.6623055]
action based on polilcy:[-0.32687393  0.8493885 ]
action based on polilcy:[-0.48401466  0.45284837]
action based on polilcy:[-0.34453022  0.6070554 ]
action based on polilcy:[-0.27401334  0.6643007 ]
action based on polilcy:[-0.5004009   0.45300397]
action based on polilcy:[-0.35778475  0.46802303]
action based on polilcy:[-0.30963257  0.5151136 ]
action based on polilcy:[-0.2443669  0.6500579]
action based on polilcy:[-0.695649   0.5198004]
action based on polilcy:[-0.29085442  0

action based on polilcy:[-0.33981004  0.6585901 ]
action based on polilcy:[-0.32292777  0.6161432 ]
action based on polilcy:[-0.48203743  0.4426244 ]
action based on polilcy:[-0.26638582  0.6745213 ]
action based on polilcy:[-0.4192714  0.4648036]
action based on polilcy:[-0.704508    0.41784722]
action based on polilcy:[-0.75767887  0.5050982 ]
action based on polilcy:[-0.8830648   0.47204903]
action based on polilcy:[-0.82784384  0.5066936 ]
action based on polilcy:[-0.684628    0.31282917]
action based on polilcy:[-0.7977591   0.22061063]
Total T: 114672 Episode Num: 8296 Episode T: 11 Reward: -891.903256
action based on polilcy:[-0.36506817  0.6655244 ]
action based on polilcy:[-0.5227523   0.46854195]
action based on polilcy:[-0.69985294  0.66258717]
action based on polilcy:[-0.70465565  0.38175297]
action based on polilcy:[-0.6656592   0.36980453]
action based on polilcy:[-0.65906656  0.32151634]
action based on polilcy:[-0.8602412   0.44329703]
action based on polilcy:[-0.738129

action based on polilcy:[-0.3544213  0.6566688]
action based on polilcy:[-0.7041898  0.4860637]
action based on polilcy:[-0.72187495  0.37923932]
action based on polilcy:[-0.7670289   0.41505682]
action based on polilcy:[-0.95775    0.6524102]
action based on polilcy:[-0.6512523  0.3226068]
action based on polilcy:[-0.85306996  0.51492375]
action based on polilcy:[-0.6614934   0.34927306]
action based on polilcy:[-0.5827984   0.48550287]
action based on polilcy:[-0.6311892   0.42911562]
Total T: 114842 Episode Num: 8309 Episode T: 10 Reward: -816.309605
action based on polilcy:[-0.29378128  0.6484339 ]
action based on polilcy:[-0.34420636  0.98415226]
action based on polilcy:[-0.28420678  0.66244996]
action based on polilcy:[-0.19201894  0.6696595 ]
action based on polilcy:[-0.68500334  0.42445448]
action based on polilcy:[-0.5133523  0.4869124]
action based on polilcy:[-0.58712775  0.6490399 ]
action based on polilcy:[-0.44580045  0.4860304 ]
action based on polilcy:[-0.2625947   0.46

---------------------------------------
Episode_num: 8322, Evaluation over 1 episodes: -638.504782
---------------------------------------
action based on polilcy:[-0.17126578  0.63206637]
Total T: 115001 Episode Num: 8322 Episode T: 11 Reward: -669.151671
action based on polilcy:[-0.32313985  0.6701459 ]
action based on polilcy:[-0.35868302  0.8536679 ]
action based on polilcy:[-0.37823007  0.8256887 ]
action based on polilcy:[-0.28889528  0.8385688 ]
action based on polilcy:[-0.64692587  0.4162843 ]
action based on polilcy:[-0.77032924  0.5674926 ]
action based on polilcy:[-0.47772586  0.4975601 ]
action based on polilcy:[-0.7646331  0.5398878]
action based on polilcy:[-0.6022682   0.45812857]
action based on polilcy:[-0.7379166   0.52123106]
action based on polilcy:[-0.8875819  0.6720829]
action based on polilcy:[-0.74882245  0.40982634]
action based on polilcy:[-0.60535175  0.34754482]
Total T: 115014 Episode Num: 8323 Episode T: 13 Reward: -1381.269532
action based on polilcy:[-0.

action based on polilcy:[-0.27904922  0.66666174]
action based on polilcy:[-0.20905067  0.651073  ]
action based on polilcy:[-0.30974087  0.7411697 ]
action based on polilcy:[-0.42469287  0.8905184 ]
action based on polilcy:[-0.37290695  0.9988885 ]
action based on polilcy:[-0.23646139  0.5441938 ]
action based on polilcy:[-0.27078944  0.7153756 ]
action based on polilcy:[-0.34183905  0.99526656]
action based on polilcy:[-0.3859025  0.7416293]
action based on polilcy:[-0.42522854  0.799733  ]
action based on polilcy:[-0.46018878  0.89228225]
action based on polilcy:[-0.02679915  0.83803546]
action based on polilcy:[-0.18545221  0.64315116]
action based on polilcy:[0.22966473 0.84718317]
action based on polilcy:[0.06793282 0.72989374]
action based on polilcy:[-0.3132261   0.79126155]
action based on polilcy:[-0.05585867  0.73060155]
action based on polilcy:[-0.07237545  0.86209905]
Total T: 115183 Episode Num: 8337 Episode T: 18 Reward: -2226.740590
action based on polilcy:[-0.29483065 

action based on polilcy:[-0.2845683   0.67877114]
action based on polilcy:[-0.29513958  0.6965183 ]
action based on polilcy:[-0.7958683  0.5603092]
action based on polilcy:[-0.7344266   0.50174415]
action based on polilcy:[-0.7997752  0.5369921]
action based on polilcy:[-0.79481566  0.44850892]
action based on polilcy:[-0.5877569  0.4680637]
action based on polilcy:[-0.67427206  0.32153106]
action based on polilcy:[-0.91060686  0.48152614]
action based on polilcy:[-0.756006   0.5205246]
Total T: 115325 Episode Num: 8349 Episode T: 10 Reward: -890.474122
action based on polilcy:[-0.30377966  0.6913012 ]
action based on polilcy:[-0.41060275  0.42440456]
action based on polilcy:[-0.49451116  0.5268649 ]
action based on polilcy:[-0.54992276  0.45953992]
action based on polilcy:[-0.77452683  0.6837065 ]
action based on polilcy:[-0.70771325  0.37405404]
action based on polilcy:[-0.74172115  0.5272684 ]
action based on polilcy:[-0.70847106  0.38227496]
action based on polilcy:[-0.6863881   0.

action based on polilcy:[-0.2892282  0.81813  ]
action based on polilcy:[-0.36067024  0.5025178 ]
action based on polilcy:[-0.64588785  0.5565078 ]
action based on polilcy:[-0.72783303  0.46447465]
action based on polilcy:[-0.9396167  0.5903466]
action based on polilcy:[-0.6431869  0.2786854]
action based on polilcy:[-0.68197954  0.2174972 ]
action based on polilcy:[-0.70928717  0.13359295]
Total T: 115489 Episode Num: 8361 Episode T: 8 Reward: -700.849396
action based on polilcy:[-0.27592587  0.81490767]
action based on polilcy:[-0.64690137  0.50837576]
action based on polilcy:[-0.7040995  0.3922685]
action based on polilcy:[-0.5852075   0.45442474]
action based on polilcy:[-0.8848143  0.6579058]
action based on polilcy:[-0.638621   0.4147671]
action based on polilcy:[-0.9397356   0.40300858]
action based on polilcy:[-0.5930271   0.23248492]
action based on polilcy:[-0.5883749   0.27606356]
Total T: 115498 Episode Num: 8362 Episode T: 9 Reward: -674.326274
action based on polilcy:[-0.

action based on polilcy:[-0.29216537  0.86689824]
action based on polilcy:[-0.28767186  0.9586458 ]
action based on polilcy:[-0.5739876   0.52323294]
action based on polilcy:[-0.7029711  0.5153876]
action based on polilcy:[-0.6064862  0.7114688]
action based on polilcy:[-0.2978075   0.48934913]
action based on polilcy:[-0.6511365  0.4662081]
action based on polilcy:[-0.69284827  0.5686318 ]
action based on polilcy:[-0.6582229  0.4298693]
action based on polilcy:[-0.61139184  0.68308246]
action based on polilcy:[-0.7439542  0.5952814]
action based on polilcy:[-0.7603563   0.50035423]
action based on polilcy:[-0.6978999   0.47126296]
action based on polilcy:[-0.73144615  0.4483704 ]
Total T: 115672 Episode Num: 8376 Episode T: 14 Reward: -1549.027542
action based on polilcy:[-0.3023102  0.8676921]
action based on polilcy:[-0.66218114  0.4825423 ]
action based on polilcy:[-0.562325    0.48529047]
action based on polilcy:[-0.74555254  0.5325428 ]
action based on polilcy:[-0.78496075  0.450

action based on polilcy:[-0.2756891  0.8876782]
action based on polilcy:[-0.2473196  0.7472229]
action based on polilcy:[-0.69135845  0.4234959 ]
action based on polilcy:[-0.6271642   0.44429547]
action based on polilcy:[-0.640447    0.35376257]
action based on polilcy:[-0.60794646  0.37635508]
action based on polilcy:[-0.6659893   0.29028425]
action based on polilcy:[-0.8881449   0.23251618]
action based on polilcy:[-0.6523413   0.24104403]
Total T: 115832 Episode Num: 8393 Episode T: 9 Reward: -534.941652
action based on polilcy:[-0.2643977   0.89680886]
action based on polilcy:[-0.32775137  0.6960658 ]
action based on polilcy:[-0.67901856  0.42419428]
action based on polilcy:[-0.85595906  0.7482383 ]
action based on polilcy:[-0.60979104  0.49950987]
action based on polilcy:[-0.3316349  0.5272722]
action based on polilcy:[-0.39295623  0.510129  ]
action based on polilcy:[-0.36745942  0.6410109 ]
action based on polilcy:[-0.84419376  0.7184938 ]
action based on polilcy:[-0.6269656   0

action based on polilcy:[-0.23573299  0.89351916]
action based on polilcy:[-0.4414373  0.525241 ]
action based on polilcy:[-0.70195293  0.5181153 ]
action based on polilcy:[-0.54698604  0.5063715 ]
action based on polilcy:[-0.43531027  0.5381214 ]
action based on polilcy:[-0.6934525  0.5000571]
action based on polilcy:[-0.634326   0.4659191]
action based on polilcy:[-0.5909072   0.42537317]
action based on polilcy:[-0.61252236  0.3607696 ]
action based on polilcy:[-0.75893956  0.45866543]
action based on polilcy:[-0.8064928  0.5177908]
action based on polilcy:[-0.8962314   0.71937466]
action based on polilcy:[-0.77523917  0.64684904]
---------------------------------------
Episode_num: 8407, Evaluation over 1 episodes: -640.652166
---------------------------------------
Total T: 116000 Episode Num: 8407 Episode T: 13 Reward: -640.652166
Total T: 116042 Episode Num: 8408 Episode T: 42 Reward: -7419.709107
Total T: 116089 Episode Num: 8409 Episode T: 47 Reward: -7256.499595
Total T: 1161

action based on polilcy:[0.59009737 0.6770724 ]
action based on polilcy:[-0.20655222  0.7560318 ]
action based on polilcy:[-0.1509591  0.6125965]
action based on polilcy:[0.5661944  0.77770036]
action based on polilcy:[-0.90706575  0.70961964]
action based on polilcy:[-0.8513159   0.41220653]
action based on polilcy:[-0.47029915  0.30240166]
action based on polilcy:[-0.44046766  0.2236752 ]
Total T: 116649 Episode Num: 8432 Episode T: 9 Reward: -788.344540
action based on polilcy:[-0.11643333  0.9313096 ]
action based on polilcy:[-0.41599876  0.8318408 ]
action based on polilcy:[0.59369874 0.6983735 ]
action based on polilcy:[-0.9640925   0.63325644]
action based on polilcy:[-0.44145724  0.36085007]
action based on polilcy:[-0.4652664   0.26739994]
Total T: 116655 Episode Num: 8433 Episode T: 6 Reward: -651.880766
action based on polilcy:[0.00544075 0.931524  ]
action based on polilcy:[0.15392542 0.88227165]
action based on polilcy:[-0.8949903   0.78133047]
action based on polilcy:[0.2

action based on polilcy:[-0.02736589  0.9434305 ]
action based on polilcy:[-0.01399668  0.8348032 ]
action based on polilcy:[-0.0488034  0.9257966]
action based on polilcy:[-0.8078014  0.7692688]
action based on polilcy:[0.5550678 0.739264 ]
action based on polilcy:[-0.0534141  0.8067549]
action based on polilcy:[-0.6075133  0.6856541]
action based on polilcy:[-0.7666476  0.4936013]
action based on polilcy:[0.48602253 0.77638096]
action based on polilcy:[-0.40935886  0.45933598]
Total T: 116805 Episode Num: 8448 Episode T: 10 Reward: -1125.844330
action based on polilcy:[-0.02552076  0.94116986]
action based on polilcy:[-0.16430631  0.95688546]
action based on polilcy:[0.5500277 0.8863726]
action based on polilcy:[-0.95043576  0.70648646]
action based on polilcy:[-0.97529745  0.5412481 ]
action based on polilcy:[-0.79804325  0.28029627]
action based on polilcy:[-0.50123125  0.2573602 ]
Total T: 116812 Episode Num: 8449 Episode T: 7 Reward: -883.649016
action based on polilcy:[-0.046103

action based on polilcy:[-0.07649789  0.954239  ]
action based on polilcy:[0.4653699  0.84430224]
action based on polilcy:[-0.19564036  0.8054852 ]
action based on polilcy:[-0.4772937   0.83739245]
action based on polilcy:[0.3168966  0.85303247]
action based on polilcy:[0.09493256 0.9346362 ]
action based on polilcy:[-0.90112555  0.78707755]
action based on polilcy:[0.48235443 0.75458395]
action based on polilcy:[0.40647325 0.732043  ]
action based on polilcy:[-0.9376643   0.68401396]
action based on polilcy:[-0.19295135  0.44880086]
action based on polilcy:[-0.9844006  0.7316476]
Total T: 116968 Episode Num: 8465 Episode T: 12 Reward: -1349.771461
action based on polilcy:[-0.09547108  0.9508611 ]
action based on polilcy:[-0.19576557  0.97618514]
action based on polilcy:[-0.25815466  0.7856684 ]
action based on polilcy:[-0.03411936  0.6725668 ]
action based on polilcy:[0.39820853 0.6867076 ]
action based on polilcy:[0.42274743 0.8453039 ]
action based on polilcy:[0.5130358 0.8045194]
a

action based on polilcy:[-0.03830775  0.96404016]
action based on polilcy:[-0.1742061  0.9514382]
action based on polilcy:[0.54690385 0.9299332 ]
action based on polilcy:[-0.08547276  0.8457533 ]
action based on polilcy:[-0.9659425   0.55785406]
action based on polilcy:[-0.45634106  0.3290346 ]
action based on polilcy:[-0.9365824   0.23263021]
Total T: 117113 Episode Num: 8481 Episode T: 7 Reward: -882.713098
action based on polilcy:[-0.12466765  0.9649779 ]
action based on polilcy:[-0.12798569  0.96391636]
action based on polilcy:[0.08227389 0.9657715 ]
action based on polilcy:[-0.65510327  0.7472255 ]
action based on polilcy:[0.49604884 0.74241483]
action based on polilcy:[-0.9497344   0.39423248]
action based on polilcy:[-0.5191448   0.27717644]
action based on polilcy:[-0.8582423   0.31015518]
action based on polilcy:[-0.5583638  0.3031864]
Total T: 117122 Episode Num: 8482 Episode T: 9 Reward: -832.228413
action based on polilcy:[-0.06347774  0.96425384]
action based on polilcy:[-

action based on polilcy:[-0.05185233  0.9728686 ]
action based on polilcy:[-0.03097555  0.9812855 ]
action based on polilcy:[0.52075386 0.85140085]
action based on polilcy:[-0.54380405  0.9222422 ]
action based on polilcy:[0.5249897 0.7261398]
action based on polilcy:[-0.96421653  0.5017982 ]
action based on polilcy:[-0.67372644  0.22848848]
action based on polilcy:[0.45679566 0.77286065]
action based on polilcy:[-0.7674822   0.18286814]
Total T: 117263 Episode Num: 8498 Episode T: 9 Reward: -877.788964
action based on polilcy:[-0.13040969  0.9721041 ]
action based on polilcy:[-0.11830477  0.9849578 ]
action based on polilcy:[-0.06725724  0.97467774]
action based on polilcy:[0.33037484 0.8053349 ]
action based on polilcy:[-0.23148872  0.97233576]
action based on polilcy:[0.1526494  0.92340016]
action based on polilcy:[-0.9172043  0.5637178]
action based on polilcy:[-0.69801116  0.32724997]
action based on polilcy:[-0.46192622  0.2734056 ]
action based on polilcy:[-0.5101965   0.2187391

action based on polilcy:[-0.13257599  0.9831544 ]
action based on polilcy:[0.38531315 0.7477561 ]
action based on polilcy:[0.42192885 0.79523265]
action based on polilcy:[-0.0687584  0.9827555]
action based on polilcy:[-0.21966612  0.96944994]
action based on polilcy:[-0.95907825  0.69195795]
action based on polilcy:[0.49665508 0.7896207 ]
action based on polilcy:[0.33081532 0.96299976]
action based on polilcy:[0.4818378 0.8505699]
action based on polilcy:[0.005047   0.97967553]
action based on polilcy:[-0.44262758  0.9163038 ]
Total T: 117421 Episode Num: 8516 Episode T: 11 Reward: -1364.417583
action based on polilcy:[-0.13562645  0.9833948 ]
action based on polilcy:[-0.28124848  0.9872441 ]
action based on polilcy:[0.2000571 0.7599796]
action based on polilcy:[-0.04335788  0.92980134]
action based on polilcy:[-0.15073697  0.96642244]
action based on polilcy:[0.12894821 0.9897735 ]
action based on polilcy:[0.3274366  0.98188204]
action based on polilcy:[-0.95065176  0.6984568 ]
actio

action based on polilcy:[-0.13635404  0.98724604]
action based on polilcy:[0.46873552 0.943237  ]
action based on polilcy:[-0.11935055  0.96374404]
action based on polilcy:[0.5387615  0.82275796]
action based on polilcy:[-0.9822421  0.4941386]
action based on polilcy:[0.5118015 0.7047451]
action based on polilcy:[-0.6548472   0.10706487]
Total T: 117573 Episode Num: 8534 Episode T: 7 Reward: -727.088782
action based on polilcy:[-0.14862804  0.98744315]
action based on polilcy:[-0.25671178  0.97974503]
action based on polilcy:[-0.29785922  0.7865788 ]
action based on polilcy:[0.16283198 0.8316406 ]
action based on polilcy:[0.3150078 0.9925579]
action based on polilcy:[0.4338961 0.967112 ]
action based on polilcy:[0.6057886 0.7373822]
action based on polilcy:[-0.99143124  0.16867068]
action based on polilcy:[0.6128496  0.80469114]
action based on polilcy:[-0.9416447   0.30671614]
Total T: 117583 Episode Num: 8535 Episode T: 10 Reward: -1077.538640
action based on polilcy:[-0.16802311  0.

action based on polilcy:[-0.24746741  0.9916767 ]
action based on polilcy:[0.24152367 0.99589103]
action based on polilcy:[0.4105779  0.99476564]
action based on polilcy:[0.25315973 0.9948432 ]
action based on polilcy:[0.6202327  0.97700214]
action based on polilcy:[-0.21679929  0.8641656 ]
action based on polilcy:[-0.91958284  0.29381534]
action based on polilcy:[0.24073173 0.8191894 ]
Total T: 117720 Episode Num: 8551 Episode T: 8 Reward: -1043.135739
action based on polilcy:[-0.14661947  0.9915387 ]
action based on polilcy:[-0.16991769  0.9897544 ]
action based on polilcy:[0.4695535  0.98301286]
action based on polilcy:[0.48576453 0.9815238 ]
action based on polilcy:[0.62859297 0.98510224]
action based on polilcy:[-0.96248096  0.6679683 ]
action based on polilcy:[0.65672886 0.74392986]
action based on polilcy:[-0.9910203   0.08067907]
Total T: 117728 Episode Num: 8552 Episode T: 8 Reward: -998.983623
action based on polilcy:[-0.26136896  0.99204755]
action based on polilcy:[0.372921

action based on polilcy:[-0.1966393   0.99385107]
action based on polilcy:[0.5096625  0.87878764]
action based on polilcy:[0.6064464 0.9822224]
action based on polilcy:[-0.9199725  0.6628151]
action based on polilcy:[0.5624202 0.7745472]
action based on polilcy:[-0.99522907  0.23423436]
action based on polilcy:[0.6291109 0.8083733]
action based on polilcy:[0.6117425 0.8478828]
Total T: 117893 Episode Num: 8569 Episode T: 8 Reward: -720.335261
action based on polilcy:[-0.14866905  0.9936517 ]
action based on polilcy:[0.49445757 0.99713314]
action based on polilcy:[-0.75990534  0.8160763 ]
action based on polilcy:[0.60409415 0.7149757 ]
action based on polilcy:[-0.9885316  0.2476297]
action based on polilcy:[-0.7613237   0.16856226]
Total T: 117899 Episode Num: 8570 Episode T: 6 Reward: -736.837426
action based on polilcy:[-0.24866202  0.9939912 ]
action based on polilcy:[-0.34268597  0.9817421 ]
action based on polilcy:[0.3617753 0.8193166]
action based on polilcy:[0.598035   0.99557334

action based on polilcy:[0.03311298 0.99637437]
action based on polilcy:[0.535195   0.97310483]
action based on polilcy:[0.12286799 0.99959683]
action based on polilcy:[0.59715    0.99869907]
action based on polilcy:[0.45958647 0.92612255]
action based on polilcy:[0.59774613 0.99427253]
action based on polilcy:[0.6300564 0.8312   ]
action based on polilcy:[-0.9951204  0.3995931]
action based on polilcy:[0.7286035 0.8145961]
Total T: 118531 Episode Num: 8600 Episode T: 9 Reward: -1183.999212
action based on polilcy:[0.30417266 0.99602294]
action based on polilcy:[0.7552116 0.9864915]
action based on polilcy:[0.73372734 0.9982659 ]
action based on polilcy:[0.9354373 0.9942586]
action based on polilcy:[-0.985407   0.4548105]
action based on polilcy:[0.92060274 0.9761411 ]
Total T: 118537 Episode Num: 8601 Episode T: 6 Reward: -903.922266
action based on polilcy:[0.146761  0.9953128]
action based on polilcy:[0.7301606 0.9986996]
action based on polilcy:[0.5885601  0.99700594]
action based 

action based on polilcy:[0.2854872 0.9944063]
action based on polilcy:[0.7062176 0.9558004]
action based on polilcy:[0.69050014 0.9953479 ]
action based on polilcy:[-0.9747693  0.6243695]
action based on polilcy:[0.6900772  0.90762377]
action based on polilcy:[-0.97806174  0.6169246 ]
action based on polilcy:[0.77997094 0.78065354]
Total T: 118675 Episode Num: 8620 Episode T: 7 Reward: -928.603642
action based on polilcy:[0.10343117 0.9946119 ]
action based on polilcy:[0.59461164 0.81220895]
action based on polilcy:[0.23412709 0.9990867 ]
action based on polilcy:[0.28435755 0.9969877 ]
action based on polilcy:[-0.9748545  0.6449671]
action based on polilcy:[0.55351985 0.84918076]
action based on polilcy:[0.2180302 0.9979697]
action based on polilcy:[-0.23090743  0.99591523]
action based on polilcy:[0.5796904 0.903249 ]
Total T: 118684 Episode Num: 8621 Episode T: 9 Reward: -1501.184757
action based on polilcy:[0.2913462  0.99526036]
action based on polilcy:[0.31788275 0.9991488 ]
actio

action based on polilcy:[0.27965063 0.9937512 ]
action based on polilcy:[0.7633028 0.9395475]
action based on polilcy:[0.84462786 0.9962239 ]
action based on polilcy:[-0.98454005  0.56111836]
action based on polilcy:[0.6991832 0.8410454]
action based on polilcy:[0.33234507 0.971731  ]
action based on polilcy:[0.85553116 0.97855437]
Total T: 118822 Episode Num: 8638 Episode T: 7 Reward: -945.727036
action based on polilcy:[0.170956  0.9942246]
action based on polilcy:[0.59934056 0.9994093 ]
action based on polilcy:[0.68546236 0.771224  ]
action based on polilcy:[0.07228956 0.98683834]
action based on polilcy:[-0.97414434  0.50097495]
action based on polilcy:[0.75529456 0.7969079 ]
action based on polilcy:[-0.4300966  0.1424878]
Total T: 118829 Episode Num: 8639 Episode T: 7 Reward: -748.584464
action based on polilcy:[0.26901698 0.99341214]
action based on polilcy:[0.714025   0.76625365]
action based on polilcy:[0.35914794 0.9936032 ]
action based on polilcy:[0.74251807 0.9184001 ]
acti

action based on polilcy:[0.2700239 0.9924477]
action based on polilcy:[0.46350935 0.9672136 ]
action based on polilcy:[0.30278632 0.99894166]
action based on polilcy:[0.7111444  0.75668263]
action based on polilcy:[0.24513744 0.9884463 ]
action based on polilcy:[0.76987636 0.914065  ]
action based on polilcy:[0.8803951 0.9315182]
action based on polilcy:[-0.92398477  0.41982374]
action based on polilcy:[0.76434135 0.7887576 ]
action based on polilcy:[0.7513492 0.8025935]
Total T: 118985 Episode Num: 8659 Episode T: 10 Reward: -1001.035387
action based on polilcy:[0.20883076 0.99201226]
action based on polilcy:[0.73569226 0.9642256 ]
action based on polilcy:[-0.54966486  0.95915854]
action based on polilcy:[0.6699976 0.7959805]
action based on polilcy:[-0.06223968  0.74610597]
action based on polilcy:[-0.9974944  0.3973064]
action based on polilcy:[0.8966514 0.9770786]
Total T: 118992 Episode Num: 8660 Episode T: 7 Reward: -911.555573
action based on polilcy:[0.30201063 0.99159694]
acti

action based on polilcy:[0.23205179 0.99114054]
action based on polilcy:[0.59091496 0.99843174]
action based on polilcy:[0.47645113 0.99434656]
action based on polilcy:[-0.93394774  0.69625133]
action based on polilcy:[0.6728404 0.7936082]
action based on polilcy:[0.6732615 0.8106909]
action based on polilcy:[0.55827105 0.92519957]
Total T: 119142 Episode Num: 8677 Episode T: 7 Reward: -1025.596434
action based on polilcy:[0.33235836 0.9918215 ]
action based on polilcy:[-0.02705756  0.9885888 ]
action based on polilcy:[-0.99999696  0.99837285]
action based on polilcy:[-0.46962568  0.9576475 ]
action based on polilcy:[-0.99998724  0.9993332 ]
action based on polilcy:[0.29316    0.80521744]
action based on polilcy:[0.53150874 0.9986858 ]
action based on polilcy:[0.6520475 0.9259836]
action based on polilcy:[-0.9996277   0.99252874]
action based on polilcy:[0.17910233 0.96322906]
action based on polilcy:[-0.05277057  0.9777422 ]
action based on polilcy:[-0.9006284  0.824872 ]
action based

action based on polilcy:[0.39325154 0.9932541 ]
action based on polilcy:[0.5323229  0.99614644]
action based on polilcy:[0.7441472  0.99684507]
action based on polilcy:[0.39781877 0.9978218 ]
action based on polilcy:[0.23235668 0.9921445 ]
action based on polilcy:[0.8524039  0.95259786]
action based on polilcy:[-0.99776125  0.3795992 ]
action based on polilcy:[-0.9047395  0.2905675]
Total T: 119317 Episode Num: 8698 Episode T: 8 Reward: -1307.726252
action based on polilcy:[0.32859594 0.9931381 ]
action based on polilcy:[0.702827   0.93562734]
action based on polilcy:[0.15168238 0.999351  ]
action based on polilcy:[0.7269913  0.97943264]
action based on polilcy:[0.01188811 0.9915006 ]
action based on polilcy:[0.6581641 0.7937392]
action based on polilcy:[-0.9836119  0.4381859]
action based on polilcy:[-0.96330833  0.5545737 ]
action based on polilcy:[0.69988334 0.8122943 ]
Total T: 119326 Episode Num: 8699 Episode T: 9 Reward: -1291.747427
action based on polilcy:[0.310285   0.99268395

action based on polilcy:[0.37545115 0.9939683 ]
action based on polilcy:[0.70047563 0.9989815 ]
action based on polilcy:[0.75526524 0.7929719 ]
action based on polilcy:[0.66763014 0.99539804]
action based on polilcy:[-0.3435313  0.9751085]
action based on polilcy:[-0.23816442  0.9821708 ]
action based on polilcy:[0.86513716 0.98822576]
action based on polilcy:[-0.67735785  0.9665717 ]
action based on polilcy:[0.5584146 0.9860486]
Total T: 119465 Episode Num: 8716 Episode T: 9 Reward: -1679.942247
action based on polilcy:[0.36768645 0.99441725]
action based on polilcy:[0.68371475 0.7969532 ]
action based on polilcy:[0.8659105 0.9729849]
action based on polilcy:[-0.9792862  0.534125 ]
action based on polilcy:[0.7617743 0.8014354]
action based on polilcy:[-0.62656975  0.06152378]
action based on polilcy:[0.8345306  0.91918695]
action based on polilcy:[0.7961751  0.88753027]
Total T: 119473 Episode Num: 8717 Episode T: 8 Reward: -704.099087
action based on polilcy:[0.3770783 0.9943428]
act

action based on polilcy:[0.33899087 0.99528384]
action based on polilcy:[0.5166476 0.9200225]
action based on polilcy:[0.7559854  0.99697256]
action based on polilcy:[-0.84368664  0.9062777 ]
action based on polilcy:[0.6377846 0.8507798]
action based on polilcy:[-0.998599   0.5654278]
action based on polilcy:[0.7240778 0.8998471]
action based on polilcy:[0.73211217 0.99484825]
action based on polilcy:[-0.9193804  0.5396819]
action based on polilcy:[0.8382962 0.9085511]
Total T: 119627 Episode Num: 8737 Episode T: 10 Reward: -1272.781708
action based on polilcy:[0.35560519 0.995354  ]
action based on polilcy:[0.6819565 0.998716 ]
action based on polilcy:[0.61417985 0.99804306]
action based on polilcy:[0.8426556 0.989426 ]
action based on polilcy:[0.9623191 0.9944248]
action based on polilcy:[-0.7274815  0.7840131]
action based on polilcy:[0.8926742 0.9744062]
action based on polilcy:[-0.9994637   0.53399366]
action based on polilcy:[0.73057044 0.8781106 ]
Total T: 119636 Episode Num: 87

action based on polilcy:[0.3595216  0.99636877]
action based on polilcy:[0.12606652 0.97600466]
action based on polilcy:[0.5833395 0.9996492]
action based on polilcy:[0.59326804 0.95091397]
action based on polilcy:[0.84802836 0.99677515]
action based on polilcy:[0.89521646 0.9921813 ]
action based on polilcy:[0.97343045 0.99537283]
action based on polilcy:[-0.999741    0.61683655]
action based on polilcy:[0.94646996 0.995746  ]
Total T: 119789 Episode Num: 8758 Episode T: 9 Reward: -1393.424464
action based on polilcy:[0.40072614 0.99638104]
action based on polilcy:[0.67353106 0.9935404 ]
action based on polilcy:[-0.9926772  0.7767189]
action based on polilcy:[0.67385125 0.83361995]
action based on polilcy:[0.8978756  0.99106616]
action based on polilcy:[-0.9814223  0.3673426]
action based on polilcy:[-0.9945274  0.5158189]
Total T: 119796 Episode Num: 8759 Episode T: 7 Reward: -881.240750
action based on polilcy:[0.39562482 0.99631864]
action based on polilcy:[0.43750888 0.9997052 ]
a

action based on polilcy:[0.4033127 0.9961837]
action based on polilcy:[0.5685801 0.9999063]
action based on polilcy:[0.7155651 0.9845413]
action based on polilcy:[-0.9966992   0.69839585]
action based on polilcy:[0.6787243 0.8654071]
action based on polilcy:[-0.99326813  0.7590212 ]
action based on polilcy:[0.76185197 0.78477764]
action based on polilcy:[0.97225827 0.9821424 ]
Total T: 119957 Episode Num: 8779 Episode T: 8 Reward: -1111.489414
action based on polilcy:[0.329218  0.9964682]
action based on polilcy:[0.61278045 0.81212795]
action based on polilcy:[-0.99192846  0.7702403 ]
action based on polilcy:[0.7718123  0.79153466]
action based on polilcy:[0.27066672 0.7968771 ]
action based on polilcy:[-0.9990805   0.16131118]
Total T: 119963 Episode Num: 8780 Episode T: 6 Reward: -693.450762
action based on polilcy:[0.40361303 0.99644524]
action based on polilcy:[-0.15016295  0.970926  ]
action based on polilcy:[0.5706736 0.8385306]
action based on polilcy:[0.7269614 0.9990658]
actio

action based on polilcy:[0.48236415 0.9908954 ]
action based on polilcy:[0.8108571 0.9941576]
action based on polilcy:[0.91602117 0.96446645]
action based on polilcy:[0.9488516  0.98778903]
action based on polilcy:[0.9921597  0.99619454]
action based on polilcy:[-0.8760871   0.09135337]
action based on polilcy:[0.90712583 0.7855536 ]
Total T: 120585 Episode Num: 8812 Episode T: 7 Reward: -782.951181
action based on polilcy:[0.39968848 0.9908381 ]
action based on polilcy:[0.7316961 0.9977996]
action based on polilcy:[-0.98650014  0.7239652 ]
action based on polilcy:[0.776354 0.772238]
action based on polilcy:[-0.99974823  0.44729242]
action based on polilcy:[0.964645  0.9760772]
Total T: 120591 Episode Num: 8813 Episode T: 6 Reward: -926.813313
action based on polilcy:[0.48839253 0.99087113]
action based on polilcy:[0.77524    0.99779755]
action based on polilcy:[-0.9400849   0.76152295]
action based on polilcy:[0.806267   0.76454526]
action based on polilcy:[-0.9589328   0.33723578]
ac

action based on polilcy:[0.44375136 0.9851201 ]
action based on polilcy:[0.7117605  0.99750346]
action based on polilcy:[0.938446   0.98196083]
action based on polilcy:[0.9624966 0.981708 ]
action based on polilcy:[0.99468946 0.99832   ]
action based on polilcy:[-0.9992538   0.34264943]
action based on polilcy:[0.97980326 0.98694146]
Total T: 120735 Episode Num: 8830 Episode T: 7 Reward: -835.616007
action based on polilcy:[0.556007   0.98426193]
action based on polilcy:[0.8008988 0.9969745]
action based on polilcy:[-0.94185215  0.68321216]
action based on polilcy:[0.90750766 0.992498  ]
action based on polilcy:[0.58780026 0.97936016]
action based on polilcy:[-0.46698716  0.89641714]
action based on polilcy:[0.94496685 0.96843284]
action based on polilcy:[-0.99814904  0.4186664 ]
Total T: 120743 Episode Num: 8831 Episode T: 8 Reward: -1488.302652
action based on polilcy:[0.43553823 0.98366416]
action based on polilcy:[0.75374776 0.99749124]
action based on polilcy:[-0.95795053  0.72395

action based on polilcy:[0.60802543 0.9726123 ]
action based on polilcy:[0.8041297 0.9953071]
action based on polilcy:[0.73873436 0.78004193]
action based on polilcy:[-0.98905593  0.5948103 ]
action based on polilcy:[0.9778317  0.98572356]
action based on polilcy:[-0.91353065  0.32599628]
action based on polilcy:[0.8803187 0.8079722]
Total T: 120899 Episode Num: 8850 Episode T: 7 Reward: -895.565299
action based on polilcy:[0.6507959  0.97357386]
action based on polilcy:[0.66944003 0.91033804]
action based on polilcy:[-0.97684675  0.6875495 ]
action based on polilcy:[0.8208694  0.81755745]
action based on polilcy:[0.8252852  0.81106836]
action based on polilcy:[0.08075078 0.83374584]
action based on polilcy:[0.9971024  0.99329156]
Total T: 120906 Episode Num: 8851 Episode T: 7 Reward: -815.841815
action based on polilcy:[0.54709136 0.97157526]
action based on polilcy:[0.67424417 0.96926135]
action based on polilcy:[0.7688733 0.8022709]
action based on polilcy:[-0.80210817  0.42320004]


action based on polilcy:[0.6437113 0.9590207]
action based on polilcy:[0.5991324  0.97728133]
action based on polilcy:[0.6809652  0.74826854]
action based on polilcy:[0.9361426 0.9999671]
action based on polilcy:[0.69305027 0.9995116 ]
action based on polilcy:[0.6881038  0.90940714]
action based on polilcy:[0.68333876 0.9983559 ]
action based on polilcy:[0.82368386 0.9923721 ]
action based on polilcy:[-0.66944265  0.8303199 ]
action based on polilcy:[0.93610036 0.9409741 ]
action based on polilcy:[-0.965726   0.3628202]
action based on polilcy:[0.9022725  0.81353146]
Total T: 121067 Episode Num: 8872 Episode T: 12 Reward: -2012.345870
action based on polilcy:[0.6064283 0.9669245]
action based on polilcy:[0.60109913 0.9992827 ]
action based on polilcy:[0.914362   0.99804527]
action based on polilcy:[0.7817415  0.81467146]
action based on polilcy:[0.9703569 0.9981148]
action based on polilcy:[0.87990534 0.89048404]
action based on polilcy:[-0.9555821  0.6519761]
action based on polilcy:[

action based on polilcy:[0.63425875 0.9600371 ]
action based on polilcy:[0.84358674 0.99897575]
action based on polilcy:[0.9302705  0.99999017]
action based on polilcy:[0.4289449  0.98308814]
action based on polilcy:[0.6472243 0.9985108]
action based on polilcy:[0.60928285 0.8800147 ]
action based on polilcy:[0.97776437 0.99977666]
action based on polilcy:[0.99560434 0.99944556]
action based on polilcy:[0.98801    0.98111504]
action based on polilcy:[0.92135364 0.8357814 ]
action based on polilcy:[0.9942652  0.99272287]
Total T: 121228 Episode Num: 8893 Episode T: 11 Reward: -1634.981027
action based on polilcy:[0.61939   0.9588831]
action based on polilcy:[0.71811235 0.9907416 ]
action based on polilcy:[0.601093  0.8402769]
action based on polilcy:[0.9796475 0.9997984]
action based on polilcy:[0.999496   0.99992514]
action based on polilcy:[0.99993974 0.9999591 ]
action based on polilcy:[0.99997663 0.9999479 ]
Total T: 121235 Episode Num: 8894 Episode T: 7 Reward: -1213.374101
action 

action based on polilcy:[0.9993205  0.99829566]
Total T: 121386 Episode Num: 8914 Episode T: 8 Reward: -1287.469562
action based on polilcy:[0.5427793 0.9738222]
action based on polilcy:[0.6481363 0.9995426]
action based on polilcy:[0.9373977 0.9973974]
action based on polilcy:[0.9708529 0.9975539]
action based on polilcy:[0.91310203 0.9996313 ]
action based on polilcy:[0.9370524 0.9999093]
action based on polilcy:[0.9470017 0.9925001]
action based on polilcy:[0.9984439  0.99938697]
Total T: 121394 Episode Num: 8915 Episode T: 8 Reward: -1420.594013
action based on polilcy:[0.545536  0.9736125]
action based on polilcy:[0.5282494 0.9478588]
action based on polilcy:[0.9316606  0.99878436]
action based on polilcy:[0.9951331 0.9999544]
action based on polilcy:[0.9861088 0.9999092]
action based on polilcy:[0.99650335 0.99616396]
Total T: 121400 Episode Num: 8916 Episode T: 6 Reward: -1003.289410
action based on polilcy:[0.52816635 0.97619593]
action based on polilcy:[0.6088868 0.8068016]
ac

action based on polilcy:[0.60696554 0.97319114]
action based on polilcy:[0.54934835 0.88095605]
action based on polilcy:[0.6796857 0.7898697]
action based on polilcy:[0.99381036 0.9999714 ]
action based on polilcy:[0.99959093 0.99997956]
action based on polilcy:[0.9922053 0.970537 ]
Total T: 121544 Episode Num: 8936 Episode T: 6 Reward: -701.001667
action based on polilcy:[0.588567  0.9708145]
action based on polilcy:[0.6533232 0.9866813]
action based on polilcy:[0.7314018 0.9078146]
action based on polilcy:[0.99814093 0.99975383]
action based on polilcy:[0.7991671 0.7974101]
action based on polilcy:[0.99615073 0.99878097]
action based on polilcy:[0.9998936 0.9996497]
Total T: 121551 Episode Num: 8937 Episode T: 7 Reward: -744.005783
action based on polilcy:[0.5341908 0.9725609]
action based on polilcy:[0.5605823  0.99980116]
action based on polilcy:[0.85643095 0.99992955]
action based on polilcy:[0.9973042 0.9999819]
action based on polilcy:[0.99799204 0.99997586]
action based on poli

action based on polilcy:[0.6298866 0.9997957]
action based on polilcy:[0.6395413  0.80840194]
action based on polilcy:[0.7247067 0.9519854]
action based on polilcy:[0.706013 0.999892]
action based on polilcy:[0.96512675 0.9926026 ]
action based on polilcy:[0.99903333 0.99996966]
Total T: 121707 Episode Num: 8959 Episode T: 8 Reward: -1232.807395
action based on polilcy:[0.5938941 0.9706742]
action based on polilcy:[0.8342317 0.9998921]
action based on polilcy:[0.57579195 0.99981755]
action based on polilcy:[0.76054263 0.98081887]
action based on polilcy:[0.64975506 0.8096075 ]
action based on polilcy:[0.642244   0.91102296]
action based on polilcy:[0.98326176 0.9979022 ]
action based on polilcy:[0.5269327  0.99972177]
action based on polilcy:[0.72065556 0.80099404]
action based on polilcy:[0.98977405 0.9999557 ]
action based on polilcy:[0.99632835 0.9998138 ]
Total T: 121718 Episode Num: 8960 Episode T: 11 Reward: -1728.869988
action based on polilcy:[0.5606619  0.97068036]
action base

action based on polilcy:[0.6639177 0.9652333]
action based on polilcy:[0.652357   0.99978554]
action based on polilcy:[0.8661231 0.999341 ]
action based on polilcy:[0.9764684 0.9998337]
action based on polilcy:[0.9975402  0.99861676]
action based on polilcy:[0.94795114 0.99025697]
Total T: 121859 Episode Num: 8980 Episode T: 6 Reward: -1037.661561
action based on polilcy:[0.66308653 0.9597805 ]
action based on polilcy:[0.7392477 0.999137 ]
action based on polilcy:[0.69701934 0.99971884]
action based on polilcy:[0.718118   0.99982435]
action based on polilcy:[0.7865455 0.7875413]
action based on polilcy:[0.99811524 0.99951136]
action based on polilcy:[0.95419955 0.99276316]
Total T: 121866 Episode Num: 8981 Episode T: 7 Reward: -1176.268548
action based on polilcy:[0.66261995 0.9593041 ]
action based on polilcy:[0.66962826 0.9997719 ]
action based on polilcy:[0.75721276 0.99979967]
action based on polilcy:[0.7549445 0.9997726]
action based on polilcy:[0.7074961  0.99966633]
action based

Total T: 122095 Episode Num: 9001 Episode T: 49 Reward: -7465.867066
Total T: 122108 Episode Num: 9002 Episode T: 13 Reward: -1871.971507
Total T: 122128 Episode Num: 9003 Episode T: 20 Reward: -2994.042913
Total T: 122137 Episode Num: 9004 Episode T: 9 Reward: -1007.027713
Total T: 122188 Episode Num: 9005 Episode T: 51 Reward: -7293.069217
Total T: 122210 Episode Num: 9006 Episode T: 22 Reward: -2617.016913
Total T: 122236 Episode Num: 9007 Episode T: 26 Reward: -3787.192299
Total T: 122252 Episode Num: 9008 Episode T: 16 Reward: -1848.257274
Total T: 122293 Episode Num: 9009 Episode T: 41 Reward: -7534.233686
Total T: 122307 Episode Num: 9010 Episode T: 14 Reward: -2240.929279
Total T: 122355 Episode Num: 9011 Episode T: 48 Reward: -6976.063412
Total T: 122370 Episode Num: 9012 Episode T: 15 Reward: -2524.190080
Total T: 122399 Episode Num: 9013 Episode T: 29 Reward: -4144.561839
Total T: 122444 Episode Num: 9014 Episode T: 45 Reward: -7610.282282
Total T: 122466 Episode Num: 9015 E

action based on polilcy:[0.78436214 0.9348819 ]
action based on polilcy:[0.7918487 0.9695575]
action based on polilcy:[0.7091829 0.8691254]
action based on polilcy:[0.59892046 0.9995848 ]
action based on polilcy:[0.60173166 0.740672  ]
action based on polilcy:[0.99804336 0.99877113]
action based on polilcy:[0.9999438 0.9975765]
action based on polilcy:[0.9952696  0.71035135]
Total T: 122632 Episode Num: 9036 Episode T: 8 Reward: -1107.614214
action based on polilcy:[0.7877635 0.9400469]
action based on polilcy:[0.74118674 0.97538716]
action based on polilcy:[0.92768234 0.9993114 ]
action based on polilcy:[0.9994774  0.99905866]
action based on polilcy:[0.9988316 0.9992663]
action based on polilcy:[0.9997828 0.9979576]
Total T: 122638 Episode Num: 9037 Episode T: 6 Reward: -965.815228
action based on polilcy:[0.74797356 0.92399096]
action based on polilcy:[0.93257505 0.99990624]
action based on polilcy:[0.6346247 0.8826796]
action based on polilcy:[0.52664375 0.9992842 ]
action based on

action based on polilcy:[0.8025769 0.9302341]
action based on polilcy:[0.76787674 0.9679621 ]
action based on polilcy:[0.5947163  0.87988204]
action based on polilcy:[0.9203891 0.9996152]
action based on polilcy:[0.9282696 0.9996863]
action based on polilcy:[0.97701514 0.9999283 ]
action based on polilcy:[0.930188  0.9991695]
action based on polilcy:[0.9687636  0.99986005]
action based on polilcy:[0.9838809  0.99694675]
action based on polilcy:[0.9958415  0.99448925]
action based on polilcy:[0.99880147 0.98362774]
Total T: 122779 Episode Num: 9057 Episode T: 11 Reward: -2248.816624
action based on polilcy:[0.79731274 0.9226368 ]
action based on polilcy:[0.95720786 0.99933153]
action based on polilcy:[0.95981187 0.96349514]
action based on polilcy:[0.77319753 0.7696132 ]
action based on polilcy:[0.9998264  0.95759493]
action based on polilcy:[0.9996464 0.7869256]
Total T: 122785 Episode Num: 9058 Episode T: 6 Reward: -892.514751
action based on polilcy:[0.81804425 0.9329379 ]
action bas

action based on polilcy:[0.74619526 0.9343765 ]
action based on polilcy:[0.89295506 0.997759  ]
action based on polilcy:[0.96148187 0.98664695]
action based on polilcy:[0.9841415  0.99275196]
action based on polilcy:[0.99915034 0.98899496]
action based on polilcy:[0.99918336 0.7823119 ]
action based on polilcy:[0.99997985 0.99315596]
Total T: 122930 Episode Num: 9079 Episode T: 7 Reward: -1263.596593
action based on polilcy:[0.7418078  0.93368906]
action based on polilcy:[0.9485195  0.99970955]
action based on polilcy:[0.9937165 0.9991828]
action based on polilcy:[0.9986968 0.9924012]
action based on polilcy:[0.9999737  0.99311763]
action based on polilcy:[0.9953423 0.6213397]
Total T: 122936 Episode Num: 9080 Episode T: 6 Reward: -1097.972030
action based on polilcy:[0.7729183  0.93292356]
action based on polilcy:[0.95111775 0.9918836 ]
action based on polilcy:[0.9982308  0.99793005]
action based on polilcy:[0.9996777 0.9470945]
action based on polilcy:[0.9999895 0.9976521]
Total T: 1

Total T: 123088 Episode Num: 9102 Episode T: 9 Reward: -1727.677730
action based on polilcy:[0.81413555 0.93207586]
action based on polilcy:[0.91429126 0.97515833]
action based on polilcy:[0.9763054 0.9994417]
action based on polilcy:[0.92680365 0.9290194 ]
action based on polilcy:[0.99809   0.9977876]
action based on polilcy:[0.9910823 0.9983213]
action based on polilcy:[0.8945696 0.92555  ]
action based on polilcy:[0.91193634 0.947913  ]
action based on polilcy:[0.99802816 0.9681588 ]
action based on polilcy:[0.99848086 0.68788   ]
Total T: 123098 Episode Num: 9103 Episode T: 10 Reward: -1818.990163
action based on polilcy:[0.78105426 0.92435735]
action based on polilcy:[0.98361945 0.9986294 ]
action based on polilcy:[0.9970934 0.9844332]
action based on polilcy:[0.999338   0.72646344]
action based on polilcy:[0.9999831  0.99761933]
action based on polilcy:[0.99995494 0.99672604]
Total T: 123104 Episode Num: 9104 Episode T: 6 Reward: -945.142545
action based on polilcy:[0.7870706  0.

action based on polilcy:[0.82763857 0.92211396]
action based on polilcy:[0.99862945 0.99800885]
action based on polilcy:[0.9996387 0.9665093]
action based on polilcy:[0.99983126 0.80281436]
action based on polilcy:[0.9998285  0.83270514]
action based on polilcy:[0.9998988  0.90721166]
Total T: 123325 Episode Num: 9117 Episode T: 6 Reward: -1258.542693
action based on polilcy:[0.81470144 0.92746985]
action based on polilcy:[0.95416427 0.98867774]
action based on polilcy:[0.9707694 0.9998335]
action based on polilcy:[0.7491194 0.9043692]
action based on polilcy:[0.99924344 0.9953826 ]
action based on polilcy:[0.99928236 0.9820949 ]
action based on polilcy:[0.9993993  0.97956306]
action based on polilcy:[0.9998959 0.9728877]
action based on polilcy:[0.9992804 0.759318 ]
Total T: 123334 Episode Num: 9118 Episode T: 9 Reward: -1647.760192
action based on polilcy:[0.8186732 0.9248179]
action based on polilcy:[0.6597765 0.8093556]
action based on polilcy:[0.99920976 0.99425566]
action based o

action based on polilcy:[0.85498214 0.93447936]
action based on polilcy:[0.87358165 0.9238465 ]
action based on polilcy:[0.9998754  0.97739017]
action based on polilcy:[0.99996084 0.8820808 ]
action based on polilcy:[0.99999684 0.99775225]
Total T: 123471 Episode Num: 9137 Episode T: 5 Reward: -829.797108
action based on polilcy:[0.84012824 0.93198866]
action based on polilcy:[0.99910986 0.99840176]
action based on polilcy:[0.99929684 0.9991897 ]
action based on polilcy:[0.9372638  0.91148835]
action based on polilcy:[0.999741  0.9792747]
action based on polilcy:[0.9998421 0.9733314]
action based on polilcy:[0.9999485 0.990937 ]
action based on polilcy:[0.99996966 0.99610174]
Total T: 123479 Episode Num: 9138 Episode T: 8 Reward: -986.304926
action based on polilcy:[0.7949567  0.92365366]
action based on polilcy:[0.9986872 0.998102 ]
action based on polilcy:[0.9997088 0.9922712]
action based on polilcy:[0.9993096 0.9435213]
action based on polilcy:[0.9996471  0.78729683]
Total T: 12348

action based on polilcy:[0.82324874 0.9284198 ]
action based on polilcy:[0.98917335 0.99977165]
action based on polilcy:[0.7303084 0.8557537]
action based on polilcy:[0.99987227 0.99407893]
action based on polilcy:[0.9990312 0.9994639]
action based on polilcy:[0.9998681 0.9930993]
action based on polilcy:[0.9999809  0.93175715]
action based on polilcy:[0.97933906 0.7507481 ]
Total T: 123630 Episode Num: 9160 Episode T: 8 Reward: -1402.414846
action based on polilcy:[0.85626316 0.93224597]
action based on polilcy:[0.7594462 0.8690392]
action based on polilcy:[0.9926117  0.99258107]
action based on polilcy:[0.99316454 0.9989968 ]
action based on polilcy:[0.9993326 0.9991049]
action based on polilcy:[0.9999615 0.9963677]
action based on polilcy:[0.9999889  0.95208097]
action based on polilcy:[0.9999914 0.9895135]
Total T: 123638 Episode Num: 9161 Episode T: 8 Reward: -1372.602073
action based on polilcy:[0.8207866  0.92707926]
action based on polilcy:[0.7339299 0.884568 ]
action based on 

action based on polilcy:[0.852456  0.9408841]
action based on polilcy:[0.99958634 0.99943787]
action based on polilcy:[0.99687207 0.97659564]
action based on polilcy:[0.9999716 0.9252183]
action based on polilcy:[0.9840382 0.8921179]
action based on polilcy:[0.9999259 0.999401 ]
action based on polilcy:[0.9998891 0.9979647]
Total T: 123785 Episode Num: 9181 Episode T: 7 Reward: -1206.614845
action based on polilcy:[0.8674987 0.9399893]
action based on polilcy:[0.9995434 0.9991982]
action based on polilcy:[0.99995136 0.9942611 ]
action based on polilcy:[0.9999864 0.9539148]
action based on polilcy:[0.99996066 0.932838  ]
Total T: 123790 Episode Num: 9182 Episode T: 5 Reward: -933.517974
action based on polilcy:[0.8509039  0.93625075]
action based on polilcy:[0.99825877 0.99785453]
action based on polilcy:[0.9992355 0.9901571]
action based on polilcy:[0.9999805 0.9382236]
action based on polilcy:[0.99994075 0.9038268 ]
Total T: 123795 Episode Num: 9183 Episode T: 5 Reward: -897.827092
ac

action based on polilcy:[0.84289473 0.9346111 ]
action based on polilcy:[0.92910063 0.9729824 ]
action based on polilcy:[0.99442583 0.9998708 ]
action based on polilcy:[0.9945544  0.99926645]
action based on polilcy:[0.99854314 0.9994368 ]
action based on polilcy:[0.99960357 0.9916434 ]
action based on polilcy:[0.9999036  0.83393955]
action based on polilcy:[0.9999348 0.9178247]
action based on polilcy:[0.9999879 0.9940322]
Total T: 123947 Episode Num: 9203 Episode T: 9 Reward: -1655.744522
action based on polilcy:[0.8294295 0.9342252]
action based on polilcy:[0.6019078 0.8323169]
action based on polilcy:[0.9985095  0.99969167]
action based on polilcy:[0.99965537 0.9969503 ]
action based on polilcy:[0.99964076 0.99904025]
action based on polilcy:[0.99972034 0.9972862 ]
action based on polilcy:[0.9996583  0.99776554]
action based on polilcy:[0.99996966 0.9915574 ]
action based on polilcy:[0.9999481  0.92616343]
Total T: 123956 Episode Num: 9204 Episode T: 9 Reward: -1597.804326
action b

action based on polilcy:[0.8421951  0.93361795]
action based on polilcy:[0.39296752 0.9862843 ]
action based on polilcy:[0.33738828 0.9223716 ]
action based on polilcy:[0.33464688 0.99018544]
action based on polilcy:[0.92951936 0.9914826 ]
action based on polilcy:[0.997033   0.96311235]
action based on polilcy:[0.95545447 0.97924006]
action based on polilcy:[0.99670064 0.97427917]
action based on polilcy:[0.9957692 0.9770156]
action based on polilcy:[0.9951398 0.736419 ]
action based on polilcy:[0.9993972  0.39910966]
action based on polilcy:[0.99923164 0.41732126]
action based on polilcy:[0.99969846 0.274706  ]
Total T: 124571 Episode Num: 9240 Episode T: 13 Reward: -2732.050118
action based on polilcy:[0.8273526 0.9252371]
action based on polilcy:[0.8775979  0.99985117]
action based on polilcy:[0.3808649  0.99982506]
action based on polilcy:[0.18377747 0.9778589 ]
action based on polilcy:[0.96678144 0.98642826]
action based on polilcy:[0.8219696 0.79004  ]
action based on polilcy:[0.

action based on polilcy:[0.86701214 0.9230412 ]
action based on polilcy:[0.6897016 0.7801425]
action based on polilcy:[0.83616924 0.8436248 ]
action based on polilcy:[0.99773324 0.9560895 ]
action based on polilcy:[0.9989869 0.7143299]
action based on polilcy:[0.99984026 0.62015826]
action based on polilcy:[0.99987245 0.7496354 ]
Total T: 124721 Episode Num: 9256 Episode T: 7 Reward: -1066.351026
action based on polilcy:[0.8203798 0.9162071]
action based on polilcy:[0.5062265  0.85275376]
action based on polilcy:[0.5059085 0.7863021]
action based on polilcy:[0.98998696 0.9478963 ]
action based on polilcy:[0.99576175 0.9380744 ]
action based on polilcy:[0.99881506 0.8670263 ]
action based on polilcy:[0.9994001  0.86732185]
action based on polilcy:[0.9995266 0.5530686]
action based on polilcy:[0.99990803 0.92743874]
action based on polilcy:[0.9985197 0.8194121]
Total T: 124731 Episode Num: 9257 Episode T: 10 Reward: -1228.039423
action based on polilcy:[0.8493536 0.9197869]
action based 

action based on polilcy:[0.83739674 0.91969734]
action based on polilcy:[-0.9427018   0.70329404]
action based on polilcy:[-0.6882961  0.6822468]
action based on polilcy:[0.97692007 0.83344185]
action based on polilcy:[0.9002032 0.9079235]
action based on polilcy:[-0.9721228  0.3106502]
action based on polilcy:[0.9905496 0.8017455]
action based on polilcy:[-0.20920278  0.8746206 ]
action based on polilcy:[0.985312   0.81280565]
action based on polilcy:[0.9716135 0.8439082]
action based on polilcy:[-0.99458027  0.34272602]
Total T: 124896 Episode Num: 9277 Episode T: 11 Reward: -1537.930622
action based on polilcy:[0.86466527 0.90877926]
action based on polilcy:[0.9632402 0.8562397]
action based on polilcy:[-0.9764912  0.2990257]
action based on polilcy:[0.99365133 0.777041  ]
action based on polilcy:[0.9964987  0.87467533]
action based on polilcy:[-0.9707599   0.30097422]
action based on polilcy:[0.9971785  0.71606743]
action based on polilcy:[-0.99997365 -0.36556363]
Total T: 124904 E

action based on polilcy:[0.7915649 0.9259317]
action based on polilcy:[-0.9797893  0.9140027]
action based on polilcy:[0.9506186  0.87824774]
action based on polilcy:[-0.9993771  0.3425924]
action based on polilcy:[0.6727514  0.88634986]
action based on polilcy:[-0.99986655  0.2708305 ]
action based on polilcy:[0.8869451 0.87387  ]
action based on polilcy:[0.9494921 0.8868603]
action based on polilcy:[-0.99941814  0.18511152]
action based on polilcy:[0.9901423  0.80584836]
action based on polilcy:[-0.99997747  0.21724127]
action based on polilcy:[0.96147275 0.8370984 ]
action based on polilcy:[-0.9995849  0.0997207]
action based on polilcy:[0.99229795 0.80649495]
action based on polilcy:[0.99409896 0.79818106]
Total T: 125059 Episode Num: 9293 Episode T: 15 Reward: -2087.423621
action based on polilcy:[0.7888669  0.92513263]
action based on polilcy:[0.95826435 0.866575  ]
action based on polilcy:[-0.99969685  0.32432887]
action based on polilcy:[0.99107224 0.83843756]
action based on p

action based on polilcy:[0.95681393 0.945904  ]
action based on polilcy:[-0.91530734  0.49337468]
action based on polilcy:[0.9900456  0.91291016]
action based on polilcy:[-0.8052249   0.69423294]
action based on polilcy:[0.9934246 0.9070771]
action based on polilcy:[-0.96498954  0.3839576 ]
action based on polilcy:[0.99678844 0.90791094]
action based on polilcy:[-0.99938035  0.23930284]
action based on polilcy:[0.9986941 0.8829189]
action based on polilcy:[-0.99736196  0.2773855 ]
action based on polilcy:[0.9992435  0.87330425]
action based on polilcy:[-0.9915429  0.3894137]
Total T: 125216 Episode Num: 9312 Episode T: 12 Reward: -1640.291769
action based on polilcy:[0.95431876 0.948795  ]
action based on polilcy:[-0.18758127  0.70512676]
action based on polilcy:[0.02159278 0.7472434 ]
action based on polilcy:[-0.9997585   0.34000233]
action based on polilcy:[0.9973393 0.9095421]
action based on polilcy:[0.72111297 0.7251657 ]
action based on polilcy:[-0.999991   0.3136163]
action base

action based on polilcy:[0.9958505  0.96700114]
action based on polilcy:[0.46810696 0.7437978 ]
action based on polilcy:[0.99812055 0.9479409 ]
action based on polilcy:[-0.999698   0.2554305]
action based on polilcy:[0.999487  0.9157724]
action based on polilcy:[-0.99761724  0.33407333]
action based on polilcy:[0.99973273 0.91050446]
action based on polilcy:[-0.9999983   0.15694948]
Total T: 125377 Episode Num: 9329 Episode T: 8 Reward: -1032.385229
action based on polilcy:[0.9956217 0.9690734]
action based on polilcy:[-0.98087364  0.5171064 ]
action based on polilcy:[0.9992284 0.9178721]
action based on polilcy:[0.8816554 0.8019427]
action based on polilcy:[-0.99773365  0.38124222]
action based on polilcy:[0.9998292 0.8872596]
action based on polilcy:[-0.9999997   0.29721266]
Total T: 125384 Episode Num: 9330 Episode T: 7 Reward: -997.678989
action based on polilcy:[0.9951218 0.9689875]
action based on polilcy:[0.99698806 0.95269525]
action based on polilcy:[-0.9479065  0.4631146]
act

action based on polilcy:[0.99740064 0.97312915]
action based on polilcy:[-0.12326335  0.80511093]
action based on polilcy:[-0.827339    0.60940075]
action based on polilcy:[0.9972251 0.9143799]
action based on polilcy:[0.996815   0.89307415]
action based on polilcy:[-0.9998684   0.07800896]
action based on polilcy:[0.99964744 0.9187755 ]
action based on polilcy:[-0.0486916   0.67287445]
action based on polilcy:[0.9990525 0.9540219]
action based on polilcy:[-0.9996372   0.16499753]
action based on polilcy:[-0.99185866  0.21482538]
Total T: 125530 Episode Num: 9345 Episode T: 11 Reward: -1568.059936
action based on polilcy:[0.9975452 0.9761174]
action based on polilcy:[-0.9232046  0.5309992]
action based on polilcy:[0.9991694  0.95822495]
action based on polilcy:[-0.99670255  0.40678352]
action based on polilcy:[0.9993545  0.94705385]
action based on polilcy:[-0.69715995  0.5450126 ]
action based on polilcy:[-0.9999993  0.2141687]
action based on polilcy:[0.99968135 0.9344164 ]
action ba

action based on polilcy:[0.99667805 0.974699  ]
action based on polilcy:[-0.99548656  0.5418143 ]
action based on polilcy:[0.99877894 0.8725656 ]
action based on polilcy:[-0.9999985   0.34805724]
action based on polilcy:[0.9990776 0.9219947]
action based on polilcy:[-0.9999992  0.2769478]
action based on polilcy:[0.9989123  0.90784985]
Total T: 125700 Episode Num: 9363 Episode T: 7 Reward: -929.421111
action based on polilcy:[0.99691725 0.9742259 ]
action based on polilcy:[-0.99512184  0.5334058 ]
action based on polilcy:[0.9989613  0.84551287]
action based on polilcy:[0.4559214  0.68078375]
action based on polilcy:[0.99976104 0.9001462 ]
action based on polilcy:[-0.99999994  0.31663975]
action based on polilcy:[0.9921705 0.8945278]
Total T: 125707 Episode Num: 9364 Episode T: 7 Reward: -920.989905
action based on polilcy:[0.9975494  0.97628033]
action based on polilcy:[-0.9948275  0.5244164]
action based on polilcy:[0.999107   0.85369253]
action based on polilcy:[-0.8080936  0.5655859

action based on polilcy:[0.9935983  0.97597826]
action based on polilcy:[-0.9965796  0.579117 ]
action based on polilcy:[0.9966737  0.96865004]
action based on polilcy:[-0.9999028   0.44942284]
action based on polilcy:[0.6875708 0.8843135]
action based on polilcy:[-0.9999789   0.31934062]
action based on polilcy:[-0.98466086  0.69765764]
action based on polilcy:[-0.2756727  0.7279186]
action based on polilcy:[-0.99442464  0.41219434]
action based on polilcy:[-0.98630977  0.3276666 ]
action based on polilcy:[0.99830824 0.94134027]
action based on polilcy:[-0.99990577  0.20431112]
Total T: 125881 Episode Num: 9380 Episode T: 12 Reward: -2330.112007
action based on polilcy:[0.9942719 0.9758313]
action based on polilcy:[-0.22146066  0.93819684]
action based on polilcy:[0.9801239  0.93396956]
action based on polilcy:[0.9030019  0.87438846]
action based on polilcy:[0.99821335 0.8309084 ]
action based on polilcy:[-0.99999976  0.21721877]
action based on polilcy:[0.9927569 0.9154533]
action ba

action based on polilcy:[-1.        0.626782]
action based on polilcy:[-1.        -0.1569037]
action based on polilcy:[-1.          0.43806154]
action based on polilcy:[-1.        0.610596]
action based on polilcy:[-1.         0.5625069]
action based on polilcy:[-1.          0.12302395]
action based on polilcy:[-1.         0.4890837]
action based on polilcy:[-1.          0.17793958]
action based on polilcy:[-1.         -0.04897585]
action based on polilcy:[-1.         -0.00546225]
action based on polilcy:[-1.        -0.3816013]
action based on polilcy:[-1.          0.27939606]
action based on polilcy:[-1.        -0.7816042]
action based on polilcy:[-0.99999976  0.08233361]
action based on polilcy:[-1.          0.17933246]
action based on polilcy:[-1.          0.22917315]
action based on polilcy:[-1.        0.281943]
action based on polilcy:[-1.          0.10900246]
action based on polilcy:[-0.9972149  0.0263962]
action based on polilcy:[-0.9999983   0.09646101]
action based on polilcy:

action based on polilcy:[0.44773   0.9893573]
action based on polilcy:[-0.31954733  0.9080344 ]
action based on polilcy:[0.95553863 0.98342454]
action based on polilcy:[-0.9821538   0.45478693]
action based on polilcy:[0.99649894 0.9905432 ]
action based on polilcy:[-0.998885    0.32368103]
action based on polilcy:[0.99912584 0.9942936 ]
Total T: 126683 Episode Num: 9422 Episode T: 7 Reward: -704.225482
action based on polilcy:[0.5373411 0.9919996]
action based on polilcy:[0.68350375 0.9848178 ]
action based on polilcy:[-0.87556905  0.8391174 ]
action based on polilcy:[0.94707805 0.98281723]
action based on polilcy:[-0.9999987  0.555379 ]
action based on polilcy:[0.99388295 0.9931559 ]
action based on polilcy:[-0.9999999   0.40243506]
action based on polilcy:[0.9971267 0.9915104]
Total T: 126691 Episode Num: 9423 Episode T: 8 Reward: -1099.180318
action based on polilcy:[0.4016179 0.9930792]
action based on polilcy:[-0.9996225  0.5737113]
action based on polilcy:[0.83278346 0.96880877]

action based on polilcy:[0.55718744 0.99586385]
action based on polilcy:[-0.9997001  0.4344652]
action based on polilcy:[0.49248487 0.98500764]
action based on polilcy:[0.7760248 0.9739592]
action based on polilcy:[-0.99999994  0.48522508]
action based on polilcy:[-0.7397487  0.6725224]
action based on polilcy:[0.974037  0.9927661]
action based on polilcy:[-1.          0.39592758]
Total T: 126857 Episode Num: 9442 Episode T: 8 Reward: -900.642267
action based on polilcy:[0.464838   0.99553776]
action based on polilcy:[0.31754154 0.99296147]
action based on polilcy:[0.5640049 0.9850959]
action based on polilcy:[0.7529309  0.97460395]
action based on polilcy:[0.961226   0.99193734]
action based on polilcy:[-1.          0.40530375]
action based on polilcy:[0.9880462 0.9918396]
action based on polilcy:[-1.         -0.01859833]
Total T: 126865 Episode Num: 9443 Episode T: 8 Reward: -765.464682
action based on polilcy:[0.54235387 0.9957434 ]
action based on polilcy:[0.05331468 0.9429342 ]
ac

action based on polilcy:[0.56818277 0.99682575]
action based on polilcy:[0.58943963 0.98402846]
action based on polilcy:[-0.9998553   0.55714154]
action based on polilcy:[0.578616   0.98645246]
action based on polilcy:[-0.99999994  0.61248684]
action based on polilcy:[0.98840773 0.992528  ]
action based on polilcy:[-0.9997263  0.4415626]
action based on polilcy:[0.9114502 0.9939405]
Total T: 127022 Episode Num: 9460 Episode T: 8 Reward: -970.931653
action based on polilcy:[0.43186596 0.9966536 ]
action based on polilcy:[-0.41684565  0.9999027 ]
action based on polilcy:[0.01598016 0.9987142 ]
action based on polilcy:[0.366436   0.98115766]
action based on polilcy:[-0.7775998   0.99665123]
action based on polilcy:[0.614161  0.9867196]
action based on polilcy:[-0.9998098   0.34874204]
action based on polilcy:[-0.58470035  0.88206476]
action based on polilcy:[0.5214149  0.96494585]
action based on polilcy:[-0.99999994  0.49941728]
action based on polilcy:[0.96747637 0.986778  ]
action base

action based on polilcy:[0.58025765 0.99788094]
action based on polilcy:[-0.99784374  0.553811  ]
action based on polilcy:[0.03730227 0.99353576]
action based on polilcy:[-0.9422082   0.60738623]
action based on polilcy:[-0.9999319  0.587695 ]
action based on polilcy:[0.9066112 0.9937546]
action based on polilcy:[-1.          0.49529645]
action based on polilcy:[0.99117935 0.9970825 ]
Total T: 127192 Episode Num: 9479 Episode T: 8 Reward: -991.867450
action based on polilcy:[0.5283041  0.99784905]
action based on polilcy:[0.5608883 0.9816477]
action based on polilcy:[0.74517345 0.99213254]
action based on polilcy:[-0.992842   0.5220016]
action based on polilcy:[0.9450188  0.99433655]
action based on polilcy:[-1.          0.47544596]
action based on polilcy:[0.9798646 0.9894955]
Total T: 127199 Episode Num: 9480 Episode T: 7 Reward: -740.697433
action based on polilcy:[0.58189774 0.9980398 ]
action based on polilcy:[0.6651488 0.9933577]
action based on polilcy:[0.30060777 0.9907685 ]
ac

action based on polilcy:[0.4290972  0.99833256]
action based on polilcy:[0.3859079  0.99417204]
action based on polilcy:[-0.9556148  0.5886593]
action based on polilcy:[-0.22051533  0.9957557 ]
action based on polilcy:[0.02889413 0.99592954]
action based on polilcy:[-0.9999731  0.604154 ]
action based on polilcy:[-0.9999869   0.62097424]
action based on polilcy:[-1.       0.51526]
action based on polilcy:[0.96059406 0.9962648 ]
Total T: 127341 Episode Num: 9496 Episode T: 9 Reward: -1099.015305
action based on polilcy:[0.4832052  0.99835604]
action based on polilcy:[0.07285832 0.99927306]
action based on polilcy:[0.27139926 0.998506  ]
action based on polilcy:[0.2235239 0.9751782]
action based on polilcy:[0.6887818 0.9961169]
action based on polilcy:[0.8724899 0.990747 ]
action based on polilcy:[-0.9999999   0.61039114]
action based on polilcy:[0.81084096 0.99283195]
action based on polilcy:[0.9647369 0.9888331]
action based on polilcy:[-1.       0.43917]
action based on polilcy:[0.993

action based on polilcy:[0.428412   0.99832135]
action based on polilcy:[-0.999954    0.56054175]
action based on polilcy:[-0.17287236  0.9975102 ]
action based on polilcy:[-0.41347924  0.9996882 ]
action based on polilcy:[0.5486793 0.9945289]
action based on polilcy:[-0.99998116  0.58037037]
action based on polilcy:[0.66788757 0.98982614]
action based on polilcy:[-1.         0.5238126]
action based on polilcy:[-1.          0.45781398]
action based on polilcy:[-1.          0.69584626]
action based on polilcy:[0.77988434 0.99299777]
Total T: 127495 Episode Num: 9514 Episode T: 11 Reward: -1521.867045
action based on polilcy:[0.5457627 0.9983125]
action based on polilcy:[-0.9999756  0.5492704]
action based on polilcy:[0.56595343 0.99109685]
action based on polilcy:[-1.          0.56194985]
action based on polilcy:[0.64203185 0.99400574]
action based on polilcy:[-0.92566556  0.38121763]
action based on polilcy:[0.95184946 0.9945383 ]
Total T: 127502 Episode Num: 9515 Episode T: 7 Reward: 

action based on polilcy:[0.5661075 0.9982894]
action based on polilcy:[0.6240212 0.985811 ]
action based on polilcy:[-0.9998101  0.5068867]
action based on polilcy:[0.43957475 0.99774843]
action based on polilcy:[-0.9990572   0.61104894]
action based on polilcy:[-0.3065735  0.9989945]
action based on polilcy:[-0.999607   0.4953701]
action based on polilcy:[-0.6468797  0.9996162]
action based on polilcy:[-0.11231562  0.99874973]
action based on polilcy:[-0.9994019   0.49761462]
action based on polilcy:[0.32688993 0.9914752 ]
action based on polilcy:[0.9062142 0.989144 ]
action based on polilcy:[-0.99999905  0.49667034]
action based on polilcy:[0.90544826 0.9912069 ]
action based on polilcy:[0.97926   0.9896787]
action based on polilcy:[-1.          0.04747767]
Total T: 127656 Episode Num: 9531 Episode T: 16 Reward: -1726.750592
action based on polilcy:[0.53753483 0.9983176 ]
action based on polilcy:[-0.99877334  0.5660838 ]
action based on polilcy:[0.29044923 0.9967178 ]
action based on

action based on polilcy:[0.6697597 0.9981458]
action based on polilcy:[0.47219265 0.97702736]
action based on polilcy:[-0.7196431  0.6816756]
action based on polilcy:[0.69737023 0.99344003]
action based on polilcy:[0.4440901  0.99397445]
action based on polilcy:[-1.          0.66301125]
action based on polilcy:[0.7589215  0.99352044]
action based on polilcy:[-0.9999995   0.68017626]
action based on polilcy:[0.71910983 0.9942328 ]
action based on polilcy:[-0.99997073  0.42664376]
Total T: 127814 Episode Num: 9547 Episode T: 10 Reward: -937.869633
action based on polilcy:[0.6713066 0.9981989]
action based on polilcy:[-0.9972314  0.56573  ]
action based on polilcy:[-0.99739647  0.56725943]
action based on polilcy:[0.73558706 0.99998945]
action based on polilcy:[0.66394806 0.99804795]
action based on polilcy:[-0.97564614  0.7038408 ]
action based on polilcy:[0.99551046 0.99998415]
action based on polilcy:[-1.        -0.9998602]
action based on polilcy:[1. 1.]
action based on polilcy:[-1.  

action based on polilcy:[0.77732235 0.99869466]
action based on polilcy:[-0.9993977   0.59381783]
action based on polilcy:[0.4093393 0.9991157]
action based on polilcy:[-0.9963599  0.5245849]
action based on polilcy:[0.78270656 0.9984873 ]
action based on polilcy:[-0.9999998  0.6328126]
action based on polilcy:[0.76943976 0.99374056]
action based on polilcy:[-1.         0.5316137]
action based on polilcy:[0.56171626 0.99550843]
action based on polilcy:[-0.99999994  0.45686233]
action based on polilcy:[0.605563  0.9961116]
Total T: 127986 Episode Num: 9565 Episode T: 11 Reward: -1416.606203
action based on polilcy:[0.74696815 0.9986472 ]
action based on polilcy:[-0.9999797  0.5878217]
action based on polilcy:[-0.73960555  0.76964504]
action based on polilcy:[-0.11990403  0.99885046]
action based on polilcy:[-0.9999999  0.6584582]
action based on polilcy:[0.54817    0.99804497]
action based on polilcy:[-0.99802786  0.54581034]
action based on polilcy:[-0.9999758   0.59443045]
action base

action based on polilcy:[0.99998045 0.9999981 ]
action based on polilcy:[0.99999905 0.99999875]
action based on polilcy:[0.92198694 0.9870223 ]
action based on polilcy:[-0.9999995  0.6424217]
action based on polilcy:[0.9915738 0.9998951]
action based on polilcy:[-0.9997797  0.529934 ]
action based on polilcy:[0.9544048  0.99945205]
action based on polilcy:[-1.         0.7609147]
action based on polilcy:[-0.9998076  0.580245 ]
action based on polilcy:[-0.9999997   0.42776006]
action based on polilcy:[0.8358953 0.9973984]
Total T: 128633 Episode Num: 9594 Episode T: 11 Reward: -1619.399557
action based on polilcy:[0.99997675 0.99999774]
action based on polilcy:[0.96851647 0.99598145]
action based on polilcy:[-0.99902946  0.54164404]
action based on polilcy:[0.9991213 0.9999842]
action based on polilcy:[-0.9998992  0.5073745]
action based on polilcy:[0.75995487 0.99817795]
action based on polilcy:[0.624444  0.9976738]
action based on polilcy:[-1.         0.5130048]
action based on polilcy

action based on polilcy:[0.9999986  0.99999976]
action based on polilcy:[-0.999907    0.57130325]
action based on polilcy:[0.5912739 0.9984289]
action based on polilcy:[-1.         0.5948503]
action based on polilcy:[0.64576083 0.99743015]
action based on polilcy:[0.63265437 0.997941  ]
action based on polilcy:[-0.5302441  0.5661359]
action based on polilcy:[0.904943  0.9968187]
Total T: 128783 Episode Num: 9611 Episode T: 8 Reward: -754.541626
action based on polilcy:[0.9999987 0.9999997]
action based on polilcy:[0.99991655 0.99999416]
action based on polilcy:[0.931717  0.9985971]
action based on polilcy:[-0.99992585  0.6825055 ]
action based on polilcy:[0.9952334  0.99973416]
action based on polilcy:[-0.99932826  0.53602904]
action based on polilcy:[0.9650126 0.9996678]
action based on polilcy:[-0.88546073  0.58603156]
action based on polilcy:[0.8406457  0.98996145]
action based on polilcy:[-0.9999998   0.43414962]
action based on polilcy:[0.8770333 0.9961122]
Total T: 128794 Episode

action based on polilcy:[0.9999997 0.9999999]
action based on polilcy:[-0.9999088  0.557683 ]
action based on polilcy:[0.9875217  0.99972403]
action based on polilcy:[-1.          0.79938924]
action based on polilcy:[0.7662655  0.98444456]
action based on polilcy:[0.7899548 0.9940633]
action based on polilcy:[-1.          0.42992947]
action based on polilcy:[0.96386915 0.98810345]
Total T: 128936 Episode Num: 9628 Episode T: 8 Reward: -983.739367
action based on polilcy:[0.99999976 0.9999999 ]
action based on polilcy:[0.96786326 0.99105054]
action based on polilcy:[-0.9999885  0.7167707]
action based on polilcy:[0.5134922  0.99807316]
action based on polilcy:[-1.          0.48850194]
action based on polilcy:[-0.999999    0.41605654]
action based on polilcy:[0.870546  0.9964637]
Total T: 128943 Episode Num: 9629 Episode T: 7 Reward: -817.570042
action based on polilcy:[0.9999998  0.99999994]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999838  0.99996936]
action based on 

action based on polilcy:[1. 1.]
action based on polilcy:[0.99022615 0.99730104]
action based on polilcy:[0.12637886 0.9698774 ]
action based on polilcy:[-0.9998105  0.6832142]
action based on polilcy:[0.5396839  0.99852926]
action based on polilcy:[-1.         0.6362217]
action based on polilcy:[0.92149925 0.99564207]
action based on polilcy:[-1.         0.6304007]
Total T: 129106 Episode Num: 9646 Episode T: 8 Reward: -1137.547066
action based on polilcy:[1. 1.]
action based on polilcy:[0.07508266 0.9681396 ]
action based on polilcy:[-0.9993642  0.647678 ]
action based on polilcy:[0.9924408  0.99996436]
action based on polilcy:[-0.9950455   0.73368484]
action based on polilcy:[0.49576572 0.99851143]
action based on polilcy:[-0.9999999  0.562762 ]
action based on polilcy:[0.81433433 0.998408  ]
action based on polilcy:[-1.          0.54166555]
action based on polilcy:[0.9290218 0.9979941]
Total T: 129116 Episode Num: 9647 Episode T: 10 Reward: -1275.268296
action based on polilcy:[1. 1

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9958875  0.8420805]
action based on polilcy:[0.9314682 0.9991934]
action based on polilcy:[-0.9999542   0.45079693]
action based on polilcy:[0.5998459  0.99896085]
action based on polilcy:[-0.9990609   0.66853017]
action based on polilcy:[0.67569834 0.9985327 ]
action based on polilcy:[-1.          0.54721075]
action based on polilcy:[0.9332669 0.9983104]
Total T: 129287 Episode Num: 9667 Episode T: 12 Reward: -1516.155011
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999979  0.99997234]
action based on polilcy:[-0.99999636  0.8007889 ]
action based on polilcy:[0.72105825 0.9480083 ]
action based on polilcy:[-1.         0.7754659]
action based on polilcy:[0.8291695  0.99776644]
action based on polilcy:[-0.99999887  0.42616054]
action based on polilcy:[0.90961176 0.99977404]
Total T: 129295 Episode Num: 9668 Episode T: 

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99954927 0.9998946 ]
action based on polilcy:[-0.9999722  0.7119075]
action based on polilcy:[0.50254303 0.9991362 ]
action based on polilcy:[0.85538656 0.9990841 ]
action based on polilcy:[-0.99931407  0.321978  ]
action based on polilcy:[-0.9999998   0.46700194]
action based on polilcy:[0.7056363  0.99899644]
Total T: 129459 Episode Num: 9686 Episode T: 9 Reward: -1182.084865
action based on polilcy:[1. 1.]
action based on polilcy:[0.99999994 0.9999974 ]
action based on polilcy:[-0.9997113   0.75953496]
action based on polilcy:[0.5903174 0.9991906]
action based on polilcy:[-0.99997777  0.54067194]
action based on polilcy:[0.9851789  0.99998057]
action based on polilcy:[0.9933652 0.9999166]
action based on polilcy:[0.8257472  0.99883753]
action based on polilcy:[-0.9950165   0.32485422]
Total T: 129468 Episode Num: 9687 Episode T: 9 Reward: -1081.618900
action based on polilcy:[1. 1.]
action bas

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999988]
action based on polilcy:[-0.99999917  0.8429503 ]
action based on polilcy:[-0.99991715  0.45349702]
action based on polilcy:[0.8093212 0.9775923]
action based on polilcy:[-1.         0.5842079]
action based on polilcy:[0.92181516 0.9994185 ]
Total T: 129627 Episode Num: 9706 Episode T: 7 Reward: -1078.211259
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999997]
action based on polilcy:[0.99508685 0.9987323 ]
action based on polilcy:[-1.          0.70909584]
action based on polilcy:[0.7494404 0.9991872]
action based on polilcy:[-1.        0.702816]
action based on polilcy:[0.9772532  0.99944395]
Total T: 129634 Episode Num: 9707 Episode T: 7 Reward: -839.286582
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999998]
action based on polilcy:[-0.9999842   0.80680805]
action based on polilcy:[0.6239228 0.9992712]
action based on polilcy:[-1.          0.61929363]
actio

action based on polilcy:[-1.         0.7834482]
action based on polilcy:[0.5811963  0.99920666]
action based on polilcy:[-1.          0.52755153]
Total T: 129803 Episode Num: 9724 Episode T: 12 Reward: -1905.087703
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999934]
action based on polilcy:[0.7298182  0.99918044]
action based on polilcy:[-0.9999981   0.50506276]
action based on polilcy:[0.8023796 0.9992869]
action based on polilcy:[-1.         0.5679152]
action based on polilcy:[-0.9981193   0.26248515]
Total T: 129810 Episode Num: 9725 Episode T: 7 Reward: -837.455718
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999953]
action based on polilcy:[-0.99994135  0.7873177 ]
action based on polilcy:[-0.9542941   0.39090267]
action based on polilcy:[0.73003775 0.99945265]
action based on polilcy:[-0.9999992  0.397606 ]
action based on polilcy:[0.85486877 0.9993938 ]
Total T: 129817 Episode Num: 9726 Episode T: 7 Reward: -779.778923
action 

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9965864  0.99987316]
action based on polilcy:[-0.99999994  0.7788831 ]
action based on polilcy:[0.8016656  0.99894977]
action based on polilcy:[-1.          0.49082118]
action based on polilcy:[-0.951833   0.5505189]
action based on polilcy:[0.73989177 0.9995612 ]
Total T: 129980 Episode Num: 9745 Episode T: 8 Reward: -1282.812761
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999997]
action based on polilcy:[-0.99999756  0.8425776 ]
action based on polilcy:[-0.98653877  0.6353242 ]
action based on polilcy:[-0.9972275   0.47534958]
action based on polilcy:[0.96955204 0.9999702 ]
action based on polilcy:[0.9820064 0.9998691]
action based on polilcy:[-0.99999994  0.78376806]
Total T: 129988 Episode Num: 9746 Episode T: 8 Reward: -1245.854966
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999997]
action based on polilcy:[0.9999911 0.9999916]
action based

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999982]
action based on polilcy:[-0.9999968  0.8213956]
action based on polilcy:[0.6109396 0.9997436]
action based on polilcy:[-0.99958736  0.6674726 ]
action based on polilcy:[0.6855338  0.99974376]
action based on polilcy:[-1.         0.6322477]
Total T: 130612 Episode Num: 9779 Episode T: 7 Reward: -801.168790
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999934]
action based on polilcy:[-0.99999726  0.8207399 ]
action based on polilcy:[0.5782429  0.99974066]
action based on polilcy:[-0.993966    0.65362847]
action based on polilcy:[0.6092837 0.9997566]
action based on polilcy:[-1.          0.61198723]
Total T: 130619 Episode Num: 9780 Episode T: 7 Reward: -865.943493
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[-0.8056069   0.55701953]
action based on polilcy:[0.7703935 0.9969241]
action based on polilcy:[-0.2288269  0.6484779]


action based on polilcy:[1. 1.]
action based on polilcy:[0.9999906 0.9999995]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[-0.99998355  0.8718917 ]
action based on polilcy:[-0.9999983  0.6391613]
action based on polilcy:[-0.99262697  0.52268046]
action based on polilcy:[0.72302973 0.9997982 ]
Total T: 130783 Episode Num: 9800 Episode T: 7 Reward: -899.628217
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999946]
action based on polilcy:[0.9999978 0.9999584]
action based on polilcy:[-0.99999976  0.8400833 ]
action based on polilcy:[-0.99921554  0.46805972]
action based on polilcy:[0.6419057 0.9998755]
action based on polilcy:[-0.9281205   0.35091516]
action based on polilcy:[0.8787095 0.9998178]
Total T: 130791 Episode Num: 9801 Episode T: 8 Reward: -1057.574803
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999906  0.99999946]
action based on polilcy:[-0.9999177  0.8230716]
action based on polilcy:[0.6719741  0.92750293]

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999946]
action based on polilcy:[1.         0.99999946]
action based on polilcy:[-0.99998975  0.8683249 ]
action based on polilcy:[0.49309942 0.99986553]
action based on polilcy:[0.6318186 0.9998439]
action based on polilcy:[-1.          0.78098667]
action based on polilcy:[0.7101371  0.99986017]
Total T: 130957 Episode Num: 9822 Episode T: 8 Reward: -1076.667975
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999999 1.       ]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.966372    0.95523745]
action based on polilcy:[1. 1.]
action based on polilcy:[-1.        -0.9999959]
action based on polilcy:[1. 1.]
action based on polilcy:[-1.         -0.99999547]
action based on polilcy:[1. 1.]
action based on polilcy:[-1.        -0.9999836]
action based on polilcy:[0.99492306 0.99600697]
Total T: 130971 

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[0.97079605 0.9862287 ]
action based on polilcy:[-0.99687785  0.5213564 ]
action based on polilcy:[0.74981153 0.99987257]
action based on polilcy:[-1.          0.65194243]
action based on polilcy:[0.8521697 0.9998693]
Total T: 131134 Episode Num: 9841 Episode T: 7 Reward: -753.652961
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[-0.9999768  0.8958335]
action based on polilcy:[0.9414582 0.9997283]
action based on polilcy:[-0.99999976  0.81024903]
action based on polilcy:[-0.8950547   0.48608226]
action based on polilcy:[0.69848466 0.9998821 ]
Total T: 131141 Episode Num: 9842 Episode T: 7 Reward: -973.331458
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999747 0.99998  ]
action based on polilcy:[-0.99999887  0.8611171 ]
action based on polilcy:[0.9302874 0.9999822]
action based on poli

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[-0.9983332   0.95770293]
action based on polilcy:[0.4853246 0.999898 ]
action based on polilcy:[0.9208751  0.99702454]
action based on polilcy:[-1.          0.70289433]
action based on polilcy:[0.9822382  0.99994236]
Total T: 131293 Episode Num: 9861 Episode T: 7 Reward: -853.338832
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.6996721 0.9988109]
action based on polilcy:[0.99998266 0.9998644 ]
action based on polilcy:[-0.9111582   0.59700316]
action based on polilcy:[0.7140206 0.9998934]
action based on polilcy:[-1.          0.71290517]
action based on polilcy:[0.9342109  0.99998015]
Total T: 131302 Episode Num: 9862 Episode T: 9 Reward: -1445.563299
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[-0.9999927   0.89178693]
action based on polilcy:[0.999478

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999998]
action based on polilcy:[0.99956644 0.9984387 ]
action based on polilcy:[-0.99999964  0.889095  ]
action based on polilcy:[0.8486372 0.9968316]
action based on polilcy:[-0.99988276  0.6226243 ]
action based on polilcy:[0.8944562 0.9893009]
Total T: 131462 Episode Num: 9882 Episode T: 7 Reward: -1045.937631
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.19803174  0.9983941 ]
action based on polilcy:[0.40394166 0.9998393 ]
action based on polilcy:[-0.9995152   0.32292762]
action based on polilcy:[0.74346703 0.9998071 ]
action based on polilcy:[-1.          0.69396985]
Total T: 131469 Episode Num: 9883 Episode T: 7 Reward: -824.536400
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.999321  0.9999276]
action based on polilcy:[-0.9999713  0.7615063]
action based on

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9058866   0.99418974]
action based on polilcy:[0.4352224 0.9998786]
action based on polilcy:[-0.99999964  0.32946342]
action based on polilcy:[0.8065878 0.9999238]
Total T: 131638 Episode Num: 9905 Episode T: 6 Reward: -852.464877
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999906 0.999988 ]
action based on polilcy:[0.9999996 0.9999855]
action based on polilcy:[-0.9999998  0.8839149]
action based on polilcy:[0.4889296 0.9998722]
action based on polilcy:[-0.9999989  0.3655663]
action based on polilcy:[0.7835677 0.9998784]
Total T: 131649 Episode Num: 9906 Episode T: 11 Reward: -1489.157257
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.40721482  0.9981889 ]
action based on polilcy:[0.999998   0.99999994]
action based on polilcy:[0.96812665 0.9962164 ]
action based on polilcy:[0.6770017  0.99750876]
action based on polilcy:[-0.9999195  0.2849232]
action based on polilcy:[0.73625493 0.9997855 ]
Total T: 131812 Episode Num: 9926 Episode T: 10 Reward: -1446.632946
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999964]
action based on polilcy:[-0.9999993   0.89591336]
action based on polilcy:[-0.9999672  0.5768634]
action based on polilcy:[-0.4589009  0.6343118]
Total T: 131818 Episode Num: 9927 Episode T: 6 Reward: -983.244481
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[-0.9999954  0.8868494]
action based on polilcy:[1.         0.99999976]
action based

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999726]
action based on polilcy:[0.9407286 0.9999612]
action based on polilcy:[0.9489552 0.999751 ]
action based on polilcy:[-0.9997744  0.9395439]
action based on polilcy:[0.31747583 0.9998024 ]
action based on polilcy:[-0.9996867  0.3126415]
action based on polilcy:[0.8774554  0.99981105]
Total T: 131982 Episode Num: 9944 Episode T: 11 Reward: -1697.952391
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99451953 0.999626  ]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.99999994  0.8477712 ]
action based on polilcy:[0.719599   0.99205893]
action based on polilcy:[-0.9999978   0.41941604]
action based on polilcy:[0.17053801 0.96053725]
Total T: 131991 Episode Num: 9945 Episode T: 9 Reward: -1102.084118
action based on polilcy:[1. 1.]

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.4897665  0.9957724]
action based on polilcy:[-1.         0.7447527]
action based on polilcy:[-1.          0.44285434]
action based on polilcy:[0.9906036 0.9999642]
Total T: 132637 Episode Num: 9975 Episode T: 6 Reward: -779.966697
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.       0.999998]
action based on polilcy:[-0.9421966  0.9907089]
action based on polilcy:[0.52475756 0.952643  ]
action based on polilcy:[-0.9999746   0.67783976]
action based on polilcy:[0.830818   0.99980843]
Total T: 132644 Episode Num: 9976 Episode T: 7 Reward: -1067.307172
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9983145   0.96747154]
action based on polilcy:[-0.9765286  0.5378097]
action based on polilcy:[0.99999976 0.99999994]
action based on polilcy:[0.9809456 0.9999846]
action based on polilcy:[-1.         0.7780231]
act

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999905]
action based on polilcy:[-0.9629458  0.9437071]
action based on polilcy:[-0.99999225  0.7730809 ]
action based on polilcy:[0.86373985 0.9999646 ]
action based on polilcy:[-1.          0.42418757]
Total T: 132802 Episode Num: 9996 Episode T: 6 Reward: -920.317951
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999954]
action based on polilcy:[0.9970107 0.9989675]
action based on polilcy:[-0.9999987   0.84851736]
action based on polilcy:[-0.964062   0.7168131]
action based on polilcy:[0.7667004  0.99994093]
action based on polilcy:[0.901868   0.99986404]
Total T: 132810 Episode Num: 9997 Episode T: 8 Reward: -1039.639598
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999976]
action based on polilcy:[0.999986   0.99987435]
action based on polilcy:[-0.17255656  0.98223555]
action based on polilcy:[0.90853006 0.9999752 ]
action based 

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999946]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[-0.99999756  0.7685628 ]
action based on polilcy:[0.59622693 0.9708955 ]
action based on polilcy:[-0.9987823   0.33856845]
action based on polilcy:[0.81066763 0.98337096]
Total T: 132987 Episode Num: 10018 Episode T: 7 Reward: -860.486199
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99999994 0.9999624 ]
action based on polilcy:[-0.99999386  0.50473267]
action based on polilcy:[0.9999997  0.99999964]
action based on polilcy:[-0.9999994   0.72303677]
action based on polilcy:[0.5749775 0.9995984]
action based on polilcy:[-0.9999998   0.42299286]
Total T: 132999 Episode Num: 10019 Episode T: 12 Reward: -1618.206548
action based on polilcy:[1. 1.]
----------------------

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999961]
action based on polilcy:[-0.99907345  0.7325879 ]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99997824]
action based on polilcy:[0.99636865 0.9994158 ]
action based on polilcy:[-0.9999997   0.64561963]
action based on polilcy:[0.82669383 0.9998881 ]
Total T: 133157 Episode Num: 10039 Episode T: 8 Reward: -1150.133056
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9576266  0.8182107]
action based on polilcy:[0.99999994 1.        ]
action based on polilcy:[-0.9999915  0.6958721]
action based on polilcy:[0.9357425  0.99908537]
action based on polilcy:[0.63340837 0.9998455 ]
action based on polilcy:[-0.9982757   0.16244534]
Total T: 133165 Episode Num: 10040 Episode T: 8 Reward: -793.192430
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99997383]
action base

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999744  0.99926597]
action based on polilcy:[-0.99998516  0.70716906]
action based on polilcy:[0.9530937 0.9993975]
action based on polilcy:[-0.9999997   0.46867076]
action based on polilcy:[0.96754456 0.99996096]
Total T: 133319 Episode Num: 10060 Episode T: 7 Reward: -924.656161
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.90887046 0.9825788 ]
action based on polilcy:[0.91385615 0.9686078 ]
action based on polilcy:[0.9658514 0.9999912]
action based on polilcy:[-0.99999964  0.5545898 ]
action based on polilcy:[-0.9992887   0.25046518]
Total T: 133326 Episode Num: 10061 Episode T: 7 Reward: -804.773261
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99992114 0.9992681 ]
action based on polilcy:[-0.52222854  0.8232063 ]
action based on polilcy:[0.90081745 0.9995736 ]
action based on polilcy:[-0.9999943   0.68

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999881]
action based on polilcy:[-0.99620265  0.71743906]
action based on polilcy:[0.99999964 0.99999994]
action based on polilcy:[-0.9999899  0.6863028]
action based on polilcy:[0.827797   0.99989927]
action based on polilcy:[0.97210515 0.9962801 ]
Total T: 133479 Episode Num: 10080 Episode T: 8 Reward: -1175.139319
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99998885]
action based on polilcy:[-0.99554145  0.7553892 ]
action based on polilcy:[0.5722641  0.98454326]
action based on polilcy:[0.96106964 0.9994191 ]
action based on polilcy:[-0.9999584  0.4506413]
action based on polilcy:[0.8609376  0.99988115]
Total T: 133486 Episode Num: 10081 Episode T: 7 Reward: -902.957343
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999995]
action based on polilcy:[0.9999918 0.9989608]
action based on polilcy:[-0.9942106  0.7756971]
action based on p

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999898]
action based on polilcy:[-0.8711146  0.7672764]
action based on polilcy:[0.9253268  0.99998385]
action based on polilcy:[-0.9999993  0.6615242]
action based on polilcy:[0.9512153 0.9999644]
Total T: 133628 Episode Num: 10101 Episode T: 6 Reward: -876.735622
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999505]
action based on polilcy:[-0.9806583   0.74416775]
action based on polilcy:[0.8364314 0.9999603]
action based on polilcy:[-0.9999547  0.5274408]
action based on polilcy:[0.939095  0.9999484]
action based on polilcy:[-0.99881256  0.4087171 ]
Total T: 133636 Episode Num: 10102 Episode T: 8 Reward: -992.958702
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999985]
action based on polilcy:[-0.9997439  0.7021519]
action based on polilcy:[0.9999908  0.99999994]
action based on polilcy:[-0.99999744  0.69833493]
action based on poli

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999971]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999992]
action based on polilcy:[1.         0.99998105]
action based on polilcy:[-0.99984777  0.575123  ]
action based on polilcy:[0.821787  0.9999035]
action based on polilcy:[-0.99999875  0.53101647]
action based on polilcy:[0.92423123 0.9998869 ]
Total T: 133793 Episode Num: 10123 Episode T: 9 Reward: -1287.920244
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[0.9998898 0.99863  ]
action based on polilcy:[-0.9999541   0.64011234]
action based on polilcy:[-0.19728424  0.87643456]
action based on polilcy:[0.79004973 0.99038637]
action based on polilcy:[0.9775205  0.99821436]
Total T: 133801 Episode Num: 10124 Episode T: 8 Reward: -1338.505301
action based on polilcy:[1. 1.]
action based on polilcy:[1.       0.999987]
action based on polilcy:[-0.999355

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999555]
action based on polilcy:[-0.95299155  0.67559814]
action based on polilcy:[0.99813586 0.9973611 ]
action based on polilcy:[0.9934418 0.9772141]
action based on polilcy:[0.85891557 0.9135831 ]
action based on polilcy:[-0.99999   0.574229]
Total T: 133956 Episode Num: 10144 Episode T: 8 Reward: -1616.943835
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.3866863  0.8920096]
action based on polilcy:[0.79800206 0.9999411 ]
action based on polilcy:[-0.999831    0.63600904]
action based on polilcy:[-0.720138    0.56458205]
action based on polilcy:[0.8560343  0.99991345]
Total T: 133963 Episode Num: 10145 Episode T: 7 Reward: -816.238957
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999809]
action based on polilcy:[0.9999914  0.99999887]
action based on polilcy:[-0.9998328  0.6283748]
action based on polilcy:[-0.99234

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99997854]
action based on polilcy:[0.9997797 0.9999984]
action based on polilcy:[-0.99995923  0.34803972]
action based on polilcy:[-0.9999974   0.36250994]
action based on polilcy:[0.87368166 0.999858  ]
Total T: 134585 Episode Num: 10177 Episode T: 8 Reward: -1237.849710
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999947]
action based on polilcy:[-0.4250904   0.78126705]
action based on polilcy:[0.8920093 0.9996263]
action based on polilcy:[-0.9990816   0.36399457]
action based on polilcy:[0.99641716 0.9998345 ]
Total T: 134591 Episode Num: 10178 Episode T: 6 Reward: -771.854865
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999864]
action based on polilcy:[-0.99933803  0.41896546]
action based on polilcy:[-0.98421335  0.45585918]
action based on polilcy:[0.81534606 0.99973

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999993]
action based on polilcy:[1.         0.99999946]
action based on polilcy:[0.9886787 0.9713546]
action based on polilcy:[-0.9996933  0.4615712]
action based on polilcy:[0.8666301  0.99987304]
action based on polilcy:[-0.99999774  0.34789324]
action based on polilcy:[0.71170187 0.99958193]
Total T: 134753 Episode Num: 10197 Episode T: 8 Reward: -1289.348415
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.99766994  0.4978354 ]
action based on polilcy:[1.        0.9999796]
action based on polilcy:[0.9989511 0.9999966]
action based on polilcy:[-0.99998856  0.47477135]
action based on polilcy:[-0.85081726  0.4418833 ]
action based on polilcy:[-0.45055178  0.893095  ]
Total T: 134761 Episode Num: 10198 Episode T: 8 Reward: -993.228727
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based 

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999974]
action based on polilcy:[-0.99711865  0.34506115]
action based on polilcy:[0.99571687 0.9999567 ]
action based on polilcy:[-0.9999856   0.33485255]
action based on polilcy:[-0.9999826   0.30059624]
Total T: 134933 Episode Num: 10215 Episode T: 6 Reward: -930.493986
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[-0.9967653   0.31962928]
action based on polilcy:[0.9999999  0.99999994]
action based on polilcy:[-0.99401426  0.3109929 ]
action based on polilcy:[0.99996144 0.9999989 ]
action based on polilcy:[-0.9999325   0.26956713]
action based on polilcy:[0.9995255  0.99825704]
action based on polilcy:[-0.9999195   0.25485557]
Total T: 134944 Episode Num: 10216 Episode T: 11 Reward: -1819.665541
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999845]
action based on poli

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999857]
action based on polilcy:[-0.99826294  0.4333528 ]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99999994 0.99992794]
action based on polilcy:[-0.9994      0.37692377]
action based on polilcy:[-0.00353349  0.89553326]
action based on polilcy:[-0.9998996  0.3924804]
Total T: 135109 Episode Num: 10232 Episode T: 8 Reward: -1431.808401
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9989408   0.46676546]
action based on polilcy:[-0.32730314  0.8700472 ]
action based on polilcy:[0.7331686 0.9445743]
action based on polilcy:[0.96937466 0.99946964]
action based on polilcy:[-0.9997893  0.3724169]
action based on polilcy:[0.9858324 0.9999882]
Total T: 135117 Episode Num: 10233 Episode T: 8 Reward: -1306.712758
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999993]
action bas

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999857]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999645 0.9957379]
action based on polilcy:[-0.99720365  0.28284293]
action based on polilcy:[0.9999199 0.9999987]
action based on polilcy:[0.9999938 0.9948691]
action based on polilcy:[-0.998105    0.04466959]
action based on polilcy:[0.99999917 0.999999  ]
Total T: 135283 Episode Num: 10252 Episode T: 12 Reward: -2186.509421
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99999994 0.9999442 ]
action based on polilcy:[-0.9007657   0.44082335]
action based on polilcy:[-0.46412995  0.7269405 ]
action based on polilcy:[0.99999684 0.99986315]


action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999992]
action based on polilcy:[0.63857687 0.85353583]
action based on polilcy:[-0.99999034  0.2800888 ]
action based on polilcy:[0.93247396 0.9934935 ]
action based on polilcy:[-0.99999607  0.25868195]
action based on polilcy:[0.9902726 0.9999085]
Total T: 135458 Episode Num: 10273 Episode T: 8 Reward: -1388.081918
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9995764 0.9913938]
action based on polilcy:[0.999996  0.9999901]
action based on polilcy:[0.84351015 0.94908655]
action based on polilcy:[-0.999923    0.42345124]
action based on polilcy:[0.9999427 0.9999995]
action based on polilcy:[-0.99999356  0.3466629 ]
action based on polilcy:[0.9945781 0.9999562]
Total T: 135467 Episode Num: 10274 Episode T: 9 Reward: -1185.311459
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999806]
action based on polilcy:[1.         

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999128  0.99777275]
action based on polilcy:[-0.99994063  0.5314769 ]
action based on polilcy:[0.96283704 0.997123  ]
action based on polilcy:[-0.999997    0.48991778]
action based on polilcy:[0.9904953 0.9999333]
Total T: 135632 Episode Num: 10292 Episode T: 8 Reward: -1218.535471
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9960449  0.5703006]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[-0.9999963   0.48922738]
action based on polilcy:[-0.99996656  0.39481995]
action based on polilcy:[0.98184174 0.9999059 ]
Total T: 135639 Episode Num: 10293 Episode T: 7 Reward: -989.070457
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.99994093 0.9970339 ]
action based on polilcy:[-0.999993   0.2828514]
action based on polilcy:[

action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999994]
action based on polilcy:[0.2542811  0.93460774]
action based on polilcy:[0.9800238 0.9999166]
action based on polilcy:[-1.          0.29857233]
action based on polilcy:[0.99740374 0.9999851 ]
Total T: 135805 Episode Num: 10308 Episode T: 6 Reward: -704.456581
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999994]
action based on polilcy:[0.999999   0.99999994]
action based on polilcy:[1.         0.99999934]
action based on polilcy:[-0.99986887  0.4277651 ]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9999998 0.9999885]
action based on polilcy:[-0.99982435  0.41062787]
action based on polilcy:[-0.9993284   0.14022894]
action based on polilcy:[0.9993045 0.999979 ]
Total T: 135815 Episode Num: 10309 Episode T: 10 Reward: -1234.185283
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9991612  0.5875373]
action based on polilcy:[-0.99

action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9961513   0.50556624]
action based on polilcy:[0.8687818  0.99983096]
action based on polilcy:[-1.          0.25739986]
action based on polilcy:[-1.         -0.03173553]
Total T: 135966 Episode Num: 10326 Episode T: 6 Reward: -801.032623
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999999]
action based on polilcy:[-0.9762802  0.5073055]
action based on polilcy:[0.8928896  0.99984694]
action based on polilcy:[-0.9999997  0.418033 ]
action based on polilcy:[0.99782884 0.9999246 ]
Total T: 135972 Episode Num: 10327 Episode T: 6 Reward: -860.426687
action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999981]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.9999906   0.39763477]
action based on polilcy:[0.9991905  0.99999803]
action based on polilcy:[1.        0.9999806]
action based on polilcy:[-0.99993694  0.3546577 ]
action based on polilcy:[0.9

action based on polilcy:[1. 1.]
action based on polilcy:[1.        0.9999998]
action based on polilcy:[-0.9999582  0.5586137]
action based on polilcy:[0.76126087 0.9998769 ]
action based on polilcy:[-0.99999905  0.49710944]
action based on polilcy:[-0.47285345  0.9395157 ]
action based on polilcy:[0.780391   0.99981767]
action based on polilcy:[-0.99999976  0.16354676]
Total T: 136626 Episode Num: 10360 Episode T: 8 Reward: -919.197095
action based on polilcy:[1. 1.]
action based on polilcy:[0.99999994 0.99999493]
action based on polilcy:[0.9989873 0.9999822]
action based on polilcy:[-1.         0.5637365]
action based on polilcy:[-1.         0.3507298]
action based on polilcy:[-0.9951972  -0.20234679]
Total T: 136632 Episode Num: 10361 Episode T: 6 Reward: -919.586402
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999976]
action based on polilcy:[0.9999745 0.9999249]
action based on polilcy:[-1.          0.42045897]
action based

action based on polilcy:[1. 1.]
action based on polilcy:[0.9999987 0.9999951]
action based on polilcy:[-0.72968066  0.9800306 ]
action based on polilcy:[0.9933008 0.9995114]
action based on polilcy:[0.99999994 0.9999999 ]
action based on polilcy:[-0.9999995  0.6202593]
action based on polilcy:[0.89503807 0.9998786 ]
action based on polilcy:[-1.        0.321453]
Total T: 136785 Episode Num: 10381 Episode T: 8 Reward: -1237.475314
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[0.9247458 0.9981761]
action based on polilcy:[-1.        0.472875]
action based on polilcy:[-0.88274944  0.1743589 ]
action based on polilcy:[0.9388124 0.9998344]
action based on polilcy:[-0.94150794  0.15495077]
action based on polilcy:[-0.94164     0.34338084]
Total T: 136795 Episode Num: 10382 Episode T: 10 Reward: -1314.004967
action based on polilcy:[1. 1.]
action based on polilcy:[1.         0.99999934]
a

action based on polilcy:[1. 1.]
action based on polilcy:[0.96582556 0.9999678 ]
action based on polilcy:[-0.9999998   0.75168645]
action based on polilcy:[-0.9999691   0.74854326]
action based on polilcy:[0.9828047  0.99937975]
action based on polilcy:[-0.9911624   0.35043693]
action based on polilcy:[0.86850834 0.9998122 ]
Total T: 136970 Episode Num: 10400 Episode T: 7 Reward: -1010.748022
action based on polilcy:[1. 1.]
action based on polilcy:[0.999658 1.      ]
action based on polilcy:[0.91054845 0.99999666]
action based on polilcy:[-0.9999879   0.69141555]
action based on polilcy:[0.47475314 0.9998705 ]
action based on polilcy:[-0.9999999   0.58188236]
action based on polilcy:[0.9507141 0.9999452]
action based on polilcy:[-1.         -0.83697325]
Total T: 136978 Episode Num: 10401 Episode T: 8 Reward: -750.464780
action based on polilcy:[1. 1.]
action based on polilcy:[1. 1.]
action based on polilcy:[-0.8896386  0.9982211]
action based on polilcy:[0.3420434 0.9998103]
action base

action based on polilcy:[1. 1.]
action based on polilcy:[-0.98735124  0.90981054]
action based on polilcy:[0.98640656 0.9999998 ]
action based on polilcy:[-0.9999263   0.72963774]
action based on polilcy:[-0.99999684  0.670679  ]
action based on polilcy:[0.06096527 0.9921678 ]
action based on polilcy:[-0.9985724  0.4794881]
action based on polilcy:[0.30820647 0.994868  ]
action based on polilcy:[-0.999806  0.234536]
action based on polilcy:[0.9956112  0.99996775]
Total T: 137136 Episode Num: 10420 Episode T: 10 Reward: -1053.746992
action based on polilcy:[1. 1.]
action based on polilcy:[-0.72536886  0.9926835 ]
action based on polilcy:[0.5209081 0.9997639]
action based on polilcy:[-0.9974449  0.6493545]
action based on polilcy:[-0.15736192  0.9784376 ]
action based on polilcy:[-0.99999994  0.75670886]
action based on polilcy:[0.97504437 0.9999845 ]
action based on polilcy:[0.5126149 0.9964838]
action based on polilcy:[-1.          0.68385506]
action based on polilcy:[-0.99940944  0.64

action based on polilcy:[0.9991888  0.99999976]
action based on polilcy:[-0.00906452  0.9999948 ]
action based on polilcy:[-0.8651293   0.98514754]
action based on polilcy:[-0.96489   0.729532]
action based on polilcy:[0.7134023 0.9998349]
action based on polilcy:[-1.        -0.6818422]
action based on polilcy:[-0.9999954   0.16742764]
action based on polilcy:[-1.          0.62887406]
Total T: 137301 Episode Num: 10437 Episode T: 8 Reward: -860.635239
action based on polilcy:[0.9986944 0.9999996]
action based on polilcy:[-0.96449447  0.8138117 ]
action based on polilcy:[-0.9554969  0.7200948]
action based on polilcy:[0.6034363  0.99904644]
action based on polilcy:[-1.        -0.6681321]
action based on polilcy:[0.9450456  0.99962884]
action based on polilcy:[-1.         -0.93693626]
action based on polilcy:[0.99633926 0.9998785 ]
Total T: 137309 Episode Num: 10438 Episode T: 8 Reward: -632.482722
action based on polilcy:[0.9985079  0.99999946]
action based on polilcy:[-0.99996436  0.73

action based on polilcy:[-0.03612752  0.99994045]
action based on polilcy:[-0.99999285  0.804742  ]
action based on polilcy:[-0.9592248   0.64608485]
action based on polilcy:[0.3186262  0.99991244]
action based on polilcy:[-0.99966544  0.751729  ]
action based on polilcy:[-1.         0.7443838]
action based on polilcy:[-0.9999951   0.61027217]
action based on polilcy:[0.95917934 0.9996762 ]
Total T: 137461 Episode Num: 10455 Episode T: 8 Reward: -981.292783
action based on polilcy:[-0.22725458  0.99990296]
action based on polilcy:[-0.9909041   0.85829175]
action based on polilcy:[0.24096432 0.99951565]
action based on polilcy:[-1.         0.6678392]
action based on polilcy:[-0.9424761  0.432018 ]
action based on polilcy:[-0.9382213  -0.05008227]
action based on polilcy:[-0.99999964 -0.590831  ]
Total T: 137468 Episode Num: 10456 Episode T: 7 Reward: -639.534645
action based on polilcy:[-0.12553525  0.9998716 ]
action based on polilcy:[-0.9578707   0.91178703]
action based on polilcy:[0

action based on polilcy:[-0.13048153  0.9998095 ]
action based on polilcy:[0.71708333 0.99999917]
action based on polilcy:[-0.9998451   0.83512723]
action based on polilcy:[0.10359773 0.99984026]
action based on polilcy:[0.5125387  0.99976766]
action based on polilcy:[-0.99418247  0.736112  ]
action based on polilcy:[0.602615   0.99947846]
action based on polilcy:[-1.         0.7776909]
action based on polilcy:[-0.88868713  0.41175407]
action based on polilcy:[0.5506095 0.9860303]
action based on polilcy:[-1.         -0.47470734]
action based on polilcy:[0.9955393 0.9996252]
Total T: 137624 Episode Num: 10473 Episode T: 12 Reward: -1201.128986
action based on polilcy:[-0.09456787  0.99983335]
action based on polilcy:[-0.96725357  0.8281298 ]
action based on polilcy:[-0.14723116  0.9999832 ]
action based on polilcy:[-0.24272363  0.967999  ]
action based on polilcy:[0.44180623 0.9996622 ]
action based on polilcy:[-0.9999064  0.7326448]
action based on polilcy:[0.7820415  0.99870443]
acti

action based on polilcy:[-0.06558739  0.9998616 ]
action based on polilcy:[0.5164146 0.9977414]
action based on polilcy:[-0.99969655  0.8347933 ]
action based on polilcy:[-0.3743349   0.96146214]
action based on polilcy:[-0.99997824  0.76639855]
action based on polilcy:[0.51333845 0.99996513]
action based on polilcy:[0.8909258  0.99984103]
action based on polilcy:[-0.87130624  0.4602225 ]
action based on polilcy:[0.3670649 0.9999093]
action based on polilcy:[-0.9185995   0.62196565]
action based on polilcy:[0.77972084 0.9997607 ]
action based on polilcy:[-0.9999981   0.03408896]
action based on polilcy:[0.8763412 0.9995747]
action based on polilcy:[-0.9999822  -0.80893755]
action based on polilcy:[0.5331768 0.9643016]
action based on polilcy:[-0.999973  -0.9717208]
action based on polilcy:[0.92764705 0.99815637]
action based on polilcy:[-0.99359196 -0.862931  ]
action based on polilcy:[0.9598596  0.99581236]
action based on polilcy:[0.18845469 0.81376326]
action based on polilcy:[-0.96

action based on polilcy:[-0.13227485  0.9998504 ]
action based on polilcy:[-0.92228633  0.9232333 ]
action based on polilcy:[0.03235358 0.9999872 ]
action based on polilcy:[0.95399845 0.99999624]
action based on polilcy:[-0.9474912   0.73881686]
action based on polilcy:[-0.8965249  0.6675674]
action based on polilcy:[-0.90401787  0.6703079 ]
action based on polilcy:[0.8272244  0.99981713]
action based on polilcy:[-1.          0.75772816]
action based on polilcy:[0.26356396 0.98350024]
action based on polilcy:[-1.         0.7125497]
action based on polilcy:[0.9931315  0.99978817]
action based on polilcy:[-0.9999998  0.5511985]
action based on polilcy:[0.99738514 0.9998566 ]
action based on polilcy:[0.9997008 0.9999623]
Total T: 137966 Episode Num: 10508 Episode T: 15 Reward: -1709.259662
action based on polilcy:[0.00150323 0.9998304 ]
action based on polilcy:[-0.9815964  0.8221706]
action based on polilcy:[-0.99998844  0.7333844 ]
action based on polilcy:[-1.          0.67264855]
action

action based on polilcy:[0.13767901 0.99905694]
action based on polilcy:[-0.30062345  0.9983715 ]
action based on polilcy:[-0.27402496  0.9942565 ]
action based on polilcy:[-1.         0.6808903]
action based on polilcy:[-0.99981576  0.11553383]
action based on polilcy:[-0.7031647  0.872559 ]
action based on polilcy:[-0.99996483 -0.40132925]
action based on polilcy:[-0.9999998   0.48618445]
action based on polilcy:[0.8472946  0.99883896]
Total T: 138619 Episode Num: 10535 Episode T: 9 Reward: -704.111267
action based on polilcy:[0.11830258 0.99913895]
action based on polilcy:[-0.9999921   0.93929434]
action based on polilcy:[0.4620864 0.9998969]
action based on polilcy:[-0.9999836  0.8104588]
action based on polilcy:[-0.9728291   0.45308903]
action based on polilcy:[0.66956973 0.9983449 ]
action based on polilcy:[-1.         0.7498523]
action based on polilcy:[-1.          0.37789196]
action based on polilcy:[0.05741416 0.97051877]
Total T: 138628 Episode Num: 10536 Episode T: 9 Reward

action based on polilcy:[-0.01675023  0.99865997]
action based on polilcy:[-0.48088142  0.9988209 ]
action based on polilcy:[-0.9999999   0.90126073]
action based on polilcy:[0.7681805 0.9987724]
action based on polilcy:[-1.         -0.48614648]
action based on polilcy:[0.86306155 0.99774367]
action based on polilcy:[-1.        -0.5920223]
action based on polilcy:[0.8389791 0.9969847]
Total T: 138788 Episode Num: 10552 Episode T: 8 Reward: -860.155350
action based on polilcy:[0.44719288 0.99901897]
action based on polilcy:[0.5525603  0.99996907]
action based on polilcy:[-0.99898666  0.8349737 ]
action based on polilcy:[0.82437736 0.99884784]
action based on polilcy:[-0.97225326  0.49809816]
action based on polilcy:[0.99437255 0.999867  ]
action based on polilcy:[-1. -1.]
action based on polilcy:[0.94754076 0.9979237 ]
Total T: 138796 Episode Num: 10553 Episode T: 8 Reward: -605.243890
action based on polilcy:[-0.07868553  0.9987579 ]
action based on polilcy:[0.24598807 0.99995095]
acti

action based on polilcy:[-0.34502184  0.99945563]
action based on polilcy:[-0.6879488  0.9999254]
action based on polilcy:[-0.9987244   0.84229773]
action based on polilcy:[0.6528338 0.9993437]
action based on polilcy:[-1.         0.4665785]
action based on polilcy:[-0.99999976  0.42350596]
action based on polilcy:[0.90222937 0.9995644 ]
action based on polilcy:[-0.99636436  0.669236  ]
action based on polilcy:[-1.         0.6298271]
Total T: 138940 Episode Num: 10569 Episode T: 9 Reward: -802.154975
action based on polilcy:[-0.28646803  0.9992264 ]
action based on polilcy:[-0.99998546  0.9316863 ]
action based on polilcy:[-0.99999684  0.68950975]
action based on polilcy:[0.73662734 0.99930793]
action based on polilcy:[-1.       -0.801094]
action based on polilcy:[0.8290617 0.9989327]
action based on polilcy:[-1.        -0.9999309]
Total T: 138947 Episode Num: 10570 Episode T: 7 Reward: -770.708094
action based on polilcy:[-0.24662243  0.9990652 ]
action based on polilcy:[0.18334109 0.

action based on polilcy:[0.3900711 0.9994152]
action based on polilcy:[-0.9999998   0.97794855]
action based on polilcy:[0.49840635 0.9997275 ]
action based on polilcy:[-0.46893358  0.9697067 ]
action based on polilcy:[0.51033694 0.99840224]
action based on polilcy:[-1.         0.6607293]
action based on polilcy:[-0.98345214  0.6736503 ]
action based on polilcy:[0.9674621 0.9995723]
Total T: 139107 Episode Num: 10589 Episode T: 8 Reward: -773.346896
action based on polilcy:[-0.19094901  0.9990735 ]
action based on polilcy:[-0.99869895  0.9231925 ]
action based on polilcy:[-0.99940187  0.7447444 ]
action based on polilcy:[-0.9812196  0.6682792]
action based on polilcy:[0.7513346 0.9987712]
action based on polilcy:[-1.        -0.9999995]
Total T: 139113 Episode Num: 10590 Episode T: 6 Reward: -668.400389
action based on polilcy:[-0.28969425  0.99875164]
action based on polilcy:[-0.21532643  0.99916327]
action based on polilcy:[-0.38714588  0.99818707]
action based on polilcy:[-0.99901694

action based on polilcy:[-0.33381617  0.99907523]
action based on polilcy:[-0.6180369  0.9990546]
action based on polilcy:[0.38466245 0.99976724]
action based on polilcy:[0.36640295 0.99975353]
action based on polilcy:[-0.99999994  0.6707795 ]
action based on polilcy:[-0.99593234  0.73840725]
action based on polilcy:[0.91610444 0.9995252 ]
action based on polilcy:[-1.         -0.99559426]
action based on polilcy:[0.79242134 0.99693274]
Total T: 139260 Episode Num: 10607 Episode T: 9 Reward: -641.590595
action based on polilcy:[-0.2286736   0.99922615]
action based on polilcy:[-0.9999998   0.97032166]
action based on polilcy:[-0.99898046  0.72905827]
action based on polilcy:[-0.773477   0.9100193]
action based on polilcy:[-0.9872646  0.7540202]
action based on polilcy:[-0.9888063  0.751637 ]
action based on polilcy:[0.6026336  0.99419135]
action based on polilcy:[-1.         -0.24926817]
Total T: 139268 Episode Num: 10608 Episode T: 8 Reward: -698.552129
action based on polilcy:[-0.2736

action based on polilcy:[-0.99410063  0.99371773]
action based on polilcy:[0.11467227 0.9999252 ]
action based on polilcy:[-1.         0.9210904]
action based on polilcy:[-0.9999991   0.75798076]
action based on polilcy:[-0.99999994  0.9433991 ]
action based on polilcy:[-0.99686223  0.59293926]
action based on polilcy:[-0.81311125  0.9255517 ]
action based on polilcy:[0.6694249 0.9987105]
action based on polilcy:[-0.99999994  0.64470243]
action based on polilcy:[-0.8983338  0.7839065]
action based on polilcy:[0.9713184 0.9997566]
Total T: 139423 Episode Num: 10625 Episode T: 11 Reward: -978.165412
action based on polilcy:[-0.9948043  0.9934292]
action based on polilcy:[0.26122138 0.9998799 ]
action based on polilcy:[-1.          0.86362135]
action based on polilcy:[0.73065686 0.9988846 ]
action based on polilcy:[-1.         0.4536065]
action based on polilcy:[0.9857537 0.999893 ]
Total T: 139429 Episode Num: 10626 Episode T: 6 Reward: -524.940051
action based on polilcy:[-0.9949673   0

action based on polilcy:[-0.99757284  0.9956264 ]
action based on polilcy:[-1.          0.98403853]
action based on polilcy:[0.394881 0.999562]
action based on polilcy:[-1.         0.9838134]
action based on polilcy:[-0.99999374  0.67100954]
action based on polilcy:[-0.987516  0.426038]
action based on polilcy:[0.81825894 0.99870867]
action based on polilcy:[-1.         -0.12485093]
Total T: 139583 Episode Num: 10645 Episode T: 8 Reward: -1031.253991
action based on polilcy:[-0.9974648   0.99542356]
action based on polilcy:[0.31391555 0.99982804]
action based on polilcy:[-1.          0.97706056]
action based on polilcy:[-1.         0.6517663]
action based on polilcy:[0.58774185 0.99923193]
action based on polilcy:[-1.          0.68747735]
action based on polilcy:[-0.9998302  0.7129253]
action based on polilcy:[0.917434  0.9988313]
Total T: 139591 Episode Num: 10646 Episode T: 8 Reward: -947.817596
action based on polilcy:[-0.99750966  0.9954937 ]
action based on polilcy:[-0.92171097  0

action based on polilcy:[-0.9985094   0.99429744]
action based on polilcy:[-0.9999971  0.838789 ]
action based on polilcy:[0.5737424 0.998818 ]
action based on polilcy:[-1.         0.5731089]
action based on polilcy:[-1.         0.4041621]
action based on polilcy:[-0.9991767   0.61915565]
Total T: 139746 Episode Num: 10661 Episode T: 6 Reward: -767.200083
action based on polilcy:[-0.9982387   0.99452037]
action based on polilcy:[-0.45636594  0.99999714]
action based on polilcy:[-0.9999722  0.9852746]
action based on polilcy:[-0.98781985  0.9751099 ]
action based on polilcy:[0.49974155 0.9998188 ]
action based on polilcy:[-0.99998826  0.76429117]
action based on polilcy:[0.56989837 0.99966437]
action based on polilcy:[-1.         0.8766613]
action based on polilcy:[0.7414727  0.99710333]
action based on polilcy:[-1.          0.47251514]
action based on polilcy:[0.9503575 0.9985877]
action based on polilcy:[-1.          0.62040544]
Total T: 139758 Episode Num: 10662 Episode T: 12 Reward:

action based on polilcy:[-0.9991143   0.98597574]
action based on polilcy:[-0.999877    0.64859796]
action based on polilcy:[-0.87887865  0.99762636]
action based on polilcy:[-0.9999997  0.9470523]
action based on polilcy:[0.515219  0.9997305]
action based on polilcy:[-0.9997636  0.504159 ]
action based on polilcy:[-0.9949071  0.4987368]
action based on polilcy:[0.897071 0.995488]
action based on polilcy:[-1.          0.46433634]
action based on polilcy:[0.98992604 0.9993717 ]
action based on polilcy:[0.9978786 0.9998096]
Total T: 139931 Episode Num: 10676 Episode T: 11 Reward: -876.873445
action based on polilcy:[-0.9991146  0.9839597]
action based on polilcy:[-1.         0.9810753]
action based on polilcy:[-1.          0.68153477]
action based on polilcy:[0.66753775 0.99835366]
action based on polilcy:[-1.          0.47400674]
action based on polilcy:[0.91212195 0.9965021 ]
action based on polilcy:[-1.         0.4537304]
action based on polilcy:[0.99807674 0.9998492 ]
Total T: 139939

action based on polilcy:[-0.9969743  0.849072 ]
action based on polilcy:[0.74471843 0.9993712 ]
action based on polilcy:[-0.9999999  0.7317848]
action based on polilcy:[-1.          0.85703343]
action based on polilcy:[0.96153086 0.9980277 ]
action based on polilcy:[-1.          0.87057257]
action based on polilcy:[-0.80213964  0.32388824]
action based on polilcy:[0.5489936  0.97410476]
action based on polilcy:[-0.99997675  0.7469586 ]
action based on polilcy:[-0.99993354  0.6955298 ]
action based on polilcy:[0.4959367 0.992538 ]
action based on polilcy:[-0.9999792   0.62268114]
action based on polilcy:[-0.9926127   0.39565662]
action based on polilcy:[-0.9556341  0.5362936]
Total T: 140562 Episode Num: 10706 Episode T: 14 Reward: -1473.602299
action based on polilcy:[-0.9977285  0.8340244]
action based on polilcy:[0.71805304 0.9991116 ]
action based on polilcy:[-0.991497    0.33199257]
action based on polilcy:[0.9839089 0.9985667]
action based on polilcy:[-1.         0.2829763]
action

action based on polilcy:[-0.99639344  0.83591336]
action based on polilcy:[-0.99727803  0.85689753]
action based on polilcy:[0.884715  0.9993491]
action based on polilcy:[-0.92787546  0.37760445]
action based on polilcy:[-0.6817429   0.38827056]
action based on polilcy:[0.9909551  0.99841636]
action based on polilcy:[-1.          0.33234638]
action based on polilcy:[-0.9999436  0.5057972]
Total T: 140711 Episode Num: 10724 Episode T: 8 Reward: -675.439147
action based on polilcy:[-0.99720895  0.816233  ]
action based on polilcy:[0.8562623 0.9993256]
action based on polilcy:[0.876371   0.99929124]
action based on polilcy:[-1.          0.86109495]
action based on polilcy:[0.9808403 0.9987102]
action based on polilcy:[-1.         0.3825464]
action based on polilcy:[0.9174477  0.97898066]
action based on polilcy:[-1.          0.40000853]
action based on polilcy:[0.9947472 0.9987268]
Total T: 140720 Episode Num: 10725 Episode T: 9 Reward: -880.313882
action based on polilcy:[-0.9972658  0.8

action based on polilcy:[-0.9965887  0.8000506]
action based on polilcy:[0.83557767 0.9991477 ]
action based on polilcy:[-0.9999975   0.54472387]
action based on polilcy:[-1.         0.6357553]
action based on polilcy:[-1.         0.5040554]
action based on polilcy:[-0.9801835   0.55461735]
action based on polilcy:[-0.996785    0.50773907]
Total T: 140862 Episode Num: 10741 Episode T: 7 Reward: -717.591741
action based on polilcy:[-0.9967088  0.7979997]
action based on polilcy:[-0.99847096 -0.3590441 ]
action based on polilcy:[0.0416844  0.99999774]
action based on polilcy:[-0.99984026 -0.9098815 ]
action based on polilcy:[-0.01884599  0.9999952 ]
action based on polilcy:[-0.8526767   0.80649984]
action based on polilcy:[0.13830349 0.999995  ]
action based on polilcy:[-0.98566604  0.24865781]
action based on polilcy:[0.99994254 1.        ]
action based on polilcy:[-1.        -0.8890271]
action based on polilcy:[-1.       -0.766157]
action based on polilcy:[0.9881745 0.9999989]
action b

action based on polilcy:[-0.9960389  0.8045443]
action based on polilcy:[0.8763656 0.9991981]
action based on polilcy:[-1.         0.7541689]
action based on polilcy:[0.99647826 0.9992582 ]
action based on polilcy:[-1.          0.42208895]
action based on polilcy:[0.994409   0.99848366]
action based on polilcy:[-0.9999997  0.4037231]
Total T: 141016 Episode Num: 10758 Episode T: 7 Reward: -641.069482
action based on polilcy:[-0.99586475  0.80127317]
action based on polilcy:[0.8972712 0.9993412]
action based on polilcy:[-0.9892941  0.5431341]
action based on polilcy:[0.96151793 0.9993754 ]
action based on polilcy:[-1.          0.63309026]
action based on polilcy:[0.98363465 0.9984209 ]
action based on polilcy:[-1.        0.596087]
action based on polilcy:[0.94524044 0.99788684]
action based on polilcy:[-1.          0.60647476]
action based on polilcy:[-0.7164709   0.45921445]
Total T: 141026 Episode Num: 10759 Episode T: 10 Reward: -1045.670157
action based on polilcy:[-0.99695593  0.78

action based on polilcy:[-0.99561346  0.79032123]
action based on polilcy:[0.76382834 0.99999756]
action based on polilcy:[0.9989828  0.99999976]
action based on polilcy:[0.74576277 0.95733654]
action based on polilcy:[-0.9999095 -0.9719251]
action based on polilcy:[0.7616184  0.99988854]
action based on polilcy:[-0.99963903  0.38695088]
action based on polilcy:[0.9999999 1.       ]
action based on polilcy:[-1.        -0.9877419]
action based on polilcy:[-0.99900883  0.7170818 ]
action based on polilcy:[-0.9998981   0.47005194]
action based on polilcy:[1. 1.]
action based on polilcy:[-1.         -0.94788104]
action based on polilcy:[0.9999963  0.99999994]
action based on polilcy:[-1.        -0.9892288]
action based on polilcy:[-1.         -0.98790264]
action based on polilcy:[0.9999999 1.       ]
action based on polilcy:[-1.        -0.6768522]
action based on polilcy:[0.99999994 1.        ]
action based on polilcy:[-1.        -0.9997559]
action based on polilcy:[0.99999523 0.9999999 ]


action based on polilcy:[-0.9968848   0.76559424]
action based on polilcy:[0.87165594 0.99961215]
action based on polilcy:[-0.9999984   0.93410003]
action based on polilcy:[-0.9999399   0.58195716]
action based on polilcy:[0.9967046 0.9997307]
action based on polilcy:[-0.57889056  0.31867048]
action based on polilcy:[0.9960385  0.99940604]
action based on polilcy:[-1.         0.3249456]
action based on polilcy:[0.9948319 0.9987854]
action based on polilcy:[-1.          0.39707896]
Total T: 141352 Episode Num: 10792 Episode T: 10 Reward: -1098.962404
action based on polilcy:[-0.9971785  0.7694324]
action based on polilcy:[-0.91650546  0.43711388]
action based on polilcy:[0.82601696 0.9996714 ]
action based on polilcy:[-1.          0.73366296]
action based on polilcy:[-0.67604184  0.40505156]
action based on polilcy:[0.9933394 0.9997043]
action based on polilcy:[-1.          0.42011496]
action based on polilcy:[-1.          0.29338604]
action based on polilcy:[0.15579659 0.53164923]
Tota

action based on polilcy:[-0.9974217  0.771065 ]
action based on polilcy:[-0.9578786   0.53024507]
action based on polilcy:[-0.999995    0.60247195]
action based on polilcy:[-0.99999994  0.45194784]
action based on polilcy:[-0.65056133  0.31618893]
action based on polilcy:[-1.          0.43050125]
action based on polilcy:[-0.99981713  0.24656963]
action based on polilcy:[-0.9996054  0.214266 ]
Total T: 141519 Episode Num: 10809 Episode T: 8 Reward: -780.283868
action based on polilcy:[-0.9972107  0.7736605]
action based on polilcy:[0.7202761 0.9999969]
action based on polilcy:[-0.94804007  0.6695123 ]
action based on polilcy:[-1.        0.705098]
action based on polilcy:[0.94858944 0.9996159 ]
action based on polilcy:[-1.         0.5105442]
action based on polilcy:[0.96281564 0.99977714]
action based on polilcy:[-1.          0.29371306]
action based on polilcy:[0.9985659 0.9996732]
action based on polilcy:[-1.          0.24685304]
action based on polilcy:[0.9177052 0.9523281]
Total T: 1

action based on polilcy:[-0.9985068  0.7035322]
action based on polilcy:[0.8998726  0.99976885]
action based on polilcy:[-0.99854547  0.4884987 ]
action based on polilcy:[0.99798787 0.99980766]
action based on polilcy:[-1.          0.36592817]
action based on polilcy:[-0.99897856  0.23088127]
action based on polilcy:[-0.55512017  0.33399472]
action based on polilcy:[0.9999932 0.9999976]
Total T: 141676 Episode Num: 10819 Episode T: 8 Reward: -711.969388
action based on polilcy:[-0.9984013   0.71355456]
action based on polilcy:[0.9118103 0.9998678]
action based on polilcy:[-0.99992007 -0.0612513 ]
action based on polilcy:[0.9004284 0.9996073]
action based on polilcy:[-0.8574731  0.7753155]
action based on polilcy:[-0.9969292  0.6376059]
action based on polilcy:[0.98001474 0.9991565 ]
action based on polilcy:[-1.          0.49856728]
action based on polilcy:[-0.9859176   0.45812002]
action based on polilcy:[-0.9999469   0.41731504]
action based on polilcy:[0.9963967 0.9994172]
action bas

action based on polilcy:[-0.9988491  0.664977 ]
action based on polilcy:[0.95887226 0.99918646]
action based on polilcy:[-0.9139752   0.50021535]
action based on polilcy:[0.6571443  0.96799684]
action based on polilcy:[-1.         0.6327438]
action based on polilcy:[-0.99988675  0.5580257 ]
action based on polilcy:[-0.9999801   0.49100187]
action based on polilcy:[0.99980223 0.99988073]
action based on polilcy:[-0.99999976  0.27214125]
action based on polilcy:[0.9999287  0.99996316]
Total T: 141834 Episode Num: 10835 Episode T: 10 Reward: -951.496772
action based on polilcy:[-0.9990431   0.66656756]
action based on polilcy:[-0.99976045  0.49426308]
action based on polilcy:[0.9893657 0.9996539]
action based on polilcy:[-1.          0.63237774]
action based on polilcy:[0.9994656 0.9998366]
action based on polilcy:[-1.          0.41651976]
action based on polilcy:[-1.         0.3442404]
action based on polilcy:[0.9978062 0.9996187]
Total T: 141842 Episode Num: 10836 Episode T: 8 Reward: -

action based on polilcy:[-0.99928254  0.6462289 ]
action based on polilcy:[-0.99192363  0.5340823 ]
action based on polilcy:[-0.9870599  0.555912 ]
action based on polilcy:[0.7517142  0.99998724]
action based on polilcy:[-0.9999994  0.7822265]
action based on polilcy:[-0.5584034  0.7932083]
action based on polilcy:[-0.99867105  0.5335195 ]
action based on polilcy:[0.99869466 0.9996228 ]
action based on polilcy:[-1.          0.15864275]
action based on polilcy:[0.99986595 0.9998761 ]
action based on polilcy:[-1.          0.13254619]
action based on polilcy:[0.67785096 0.5840374 ]
action based on polilcy:[0.99999946 0.9999994 ]
Total T: 141998 Episode Num: 10853 Episode T: 13 Reward: -1332.743846
action based on polilcy:[-0.99938977  0.64587986]
action based on polilcy:[0.9659102 0.9994997]
---------------------------------------
Episode_num: 10854, Evaluation over 1 episodes: -725.658537
---------------------------------------
Total T: 142001 Episode Num: 10854 Episode T: 3 Reward: -818

action based on polilcy:[-0.9998109   0.65718377]
action based on polilcy:[-0.999987    0.53963345]
action based on polilcy:[-0.9902731  0.3626766]
action based on polilcy:[-1.          0.46121904]
action based on polilcy:[0.9996927 0.9995668]
action based on polilcy:[-0.99999887  0.25054786]
action based on polilcy:[0.99998504 0.99994   ]
action based on polilcy:[-1.          0.13675913]
Total T: 142627 Episode Num: 10882 Episode T: 8 Reward: -762.990935
action based on polilcy:[-0.9998348   0.63946414]
action based on polilcy:[0.7922042 0.9999469]
action based on polilcy:[-1.          0.76520306]
action based on polilcy:[-0.9999832   0.52783465]
action based on polilcy:[-1.         0.5069489]
action based on polilcy:[0.98764074 0.9886296 ]
action based on polilcy:[-1.          0.26126423]
action based on polilcy:[-0.99999976  0.33690947]
action based on polilcy:[0.9348748 0.96366  ]
action based on polilcy:[-0.99998367  0.43155304]
action based on polilcy:[0.9999788  0.99991775]
Tota

action based on polilcy:[-0.9998837   0.62662303]
action based on polilcy:[-0.30842566  0.8367576 ]
action based on polilcy:[0.9582925 0.9941309]
action based on polilcy:[-0.999982   0.5451509]
action based on polilcy:[0.02048359 0.76261216]
action based on polilcy:[-0.9995102   0.51595175]
action based on polilcy:[-0.98049194  0.4709805 ]
action based on polilcy:[0.96998626 0.9926121 ]
action based on polilcy:[-1.          0.32031447]
action based on polilcy:[0.99671453 0.99730515]
action based on polilcy:[-1.          0.07178246]
action based on polilcy:[0.99987406 0.9995922 ]
action based on polilcy:[-1.          0.02863457]
action based on polilcy:[0.58892226 0.7569436 ]
action based on polilcy:[-0.9999953   0.05238093]
Total T: 142801 Episode Num: 10893 Episode T: 15 Reward: -1501.272225
action based on polilcy:[-0.9998834   0.62546647]
action based on polilcy:[0.9968838 0.9988606]
action based on polilcy:[-1.          0.37223572]
action based on polilcy:[0.9999017 0.9997602]
acti

action based on polilcy:[-0.999938   0.6189159]
action based on polilcy:[0.930347  0.9989539]
action based on polilcy:[-1.          0.63211393]
action based on polilcy:[-0.9999934  0.570103 ]
action based on polilcy:[0.99716073 0.9975052 ]
action based on polilcy:[-1.          0.26957083]
action based on polilcy:[0.9999693  0.99978983]
action based on polilcy:[-0.99999934  0.16974697]
action based on polilcy:[-0.9999982   0.19752374]
action based on polilcy:[0.99985844 0.99933577]
Total T: 142951 Episode Num: 10904 Episode T: 10 Reward: -1081.496333
action based on polilcy:[-0.9999489  0.6277386]
action based on polilcy:[0.99596304 0.9980811 ]
action based on polilcy:[-1.        0.408814]
action based on polilcy:[0.9980716 0.9984733]
action based on polilcy:[-1.         0.5923935]
action based on polilcy:[0.99878573 0.99863315]
action based on polilcy:[-1.        0.378241]
action based on polilcy:[0.9999082  0.99962395]
action based on polilcy:[-1.         0.3493851]
action based on po

action based on polilcy:[-0.9999785  0.6158385]
action based on polilcy:[-1.          0.67030585]
action based on polilcy:[-0.9880273  0.4250973]
action based on polilcy:[-0.99998444  0.3705011 ]
action based on polilcy:[-1.          0.24623919]
action based on polilcy:[-0.99991035  0.22109564]
action based on polilcy:[0.9955808  0.99356717]
action based on polilcy:[-1.          0.26722303]
action based on polilcy:[0.9999989 0.9999797]
Total T: 143130 Episode Num: 10919 Episode T: 9 Reward: -1020.157846
action based on polilcy:[-0.9999789   0.61510146]
action based on polilcy:[0.98816127 0.9961748 ]
action based on polilcy:[0.9744715  0.99956894]
action based on polilcy:[-0.99999934  0.53185713]
action based on polilcy:[0.99895465 0.99861795]
action based on polilcy:[-1.          0.29660526]
action based on polilcy:[-0.9999995   0.48791322]
action based on polilcy:[0.99957764 0.9992137 ]
action based on polilcy:[-1.          0.22565758]
action based on polilcy:[-0.997913  0.250744]
act

action based on polilcy:[-1.          0.13950719]
action based on polilcy:[-0.9999744  -0.04089132]
Total T: 143302 Episode Num: 10933 Episode T: 7 Reward: -689.287956
action based on polilcy:[-0.9999899   0.66710204]
action based on polilcy:[-1.          0.62007964]
action based on polilcy:[-0.5926293   0.51218367]
action based on polilcy:[-0.99999714  0.38660118]
action based on polilcy:[-0.9999969   0.21415523]
action based on polilcy:[0.96599567 0.97237533]
action based on polilcy:[-1.         0.3126679]
Total T: 143309 Episode Num: 10934 Episode T: 7 Reward: -626.581228
action based on polilcy:[-0.9999924  0.6566813]
action based on polilcy:[-0.87398565  0.59748435]
action based on polilcy:[-0.9999986   0.59824127]
action based on polilcy:[-1.         0.7151217]
action based on polilcy:[0.9784466 0.9940522]
action based on polilcy:[-1.         0.4968897]
action based on polilcy:[-0.999964    0.40754986]
action based on polilcy:[-0.9999137   0.26876915]
action based on polilcy:[0.9

action based on polilcy:[-0.99999505  0.7305814 ]
action based on polilcy:[0.98837674 0.9955509 ]
action based on polilcy:[-1.          0.68528354]
action based on polilcy:[0.99996984 0.9997615 ]
action based on polilcy:[-1.          0.64649844]
action based on polilcy:[0.99999917 0.99998146]
action based on polilcy:[-1.          0.31005278]
Total T: 143501 Episode Num: 10945 Episode T: 7 Reward: -742.108153
action based on polilcy:[-0.9999959  0.7317887]
action based on polilcy:[0.95272994 0.99129236]
action based on polilcy:[-1.          0.71154314]
action based on polilcy:[0.9999132 0.9995643]
action based on polilcy:[-1.          0.64869964]
action based on polilcy:[0.99992   0.9996449]
action based on polilcy:[-1.          0.50854754]
action based on polilcy:[1.         0.99999887]
Total T: 143509 Episode Num: 10946 Episode T: 8 Reward: -959.367068
action based on polilcy:[-0.99999607  0.72928727]
action based on polilcy:[-0.9986961   0.53172255]
action based on polilcy:[-1.      

action based on polilcy:[-0.99999714  0.75289637]
action based on polilcy:[0.9450459  0.99556345]
action based on polilcy:[-1.         0.7495048]
action based on polilcy:[-0.99999994  0.66108763]
action based on polilcy:[-0.9938343  0.4450579]
action based on polilcy:[0.7726243 0.9511896]
action based on polilcy:[-1.         0.4432878]
action based on polilcy:[0.99966127 0.9994248 ]
action based on polilcy:[-1.         0.5413431]
action based on polilcy:[-0.9116613   0.29037628]
action based on polilcy:[0.9999984 0.9999683]
Total T: 143664 Episode Num: 10959 Episode T: 11 Reward: -1076.733205
action based on polilcy:[-0.9999975  0.7469271]
action based on polilcy:[-0.5536895   0.81932336]
action based on polilcy:[0.8297044 0.9999373]
action based on polilcy:[0.9962902 0.9999983]
action based on polilcy:[-0.92905605  0.8860147 ]
action based on polilcy:[-0.9707264  0.6281425]
action based on polilcy:[0.9567884  0.98857826]
action based on polilcy:[-1.        -0.9975665]
action based on 

action based on polilcy:[-0.9999987  0.7529303]
action based on polilcy:[0.8703714 0.9924561]
action based on polilcy:[-1.         0.7343863]
action based on polilcy:[0.98867446 0.99852633]
action based on polilcy:[-1.         0.7161715]
action based on polilcy:[-0.99999994  0.5720818 ]
action based on polilcy:[0.97795063 0.9954472 ]
action based on polilcy:[-1.        -0.9857204]
action based on polilcy:[-0.99671084  0.4969274 ]
action based on polilcy:[-1.         0.5368591]
action based on polilcy:[0.9998721 0.9994897]
action based on polilcy:[-1.         0.5178144]
action based on polilcy:[0.79346293 0.9371384 ]
action based on polilcy:[-1.          0.45245314]
action based on polilcy:[0.99999964 0.99999017]
Total T: 143837 Episode Num: 10971 Episode T: 15 Reward: -1983.574313
action based on polilcy:[-0.9999985  0.7578302]
action based on polilcy:[0.9440989 0.9907917]
action based on polilcy:[-1.          0.74461913]
action based on polilcy:[0.23812528 0.82005495]
action based on 

action based on polilcy:[-0.99999857  0.7564416 ]
action based on polilcy:[-0.9999979   0.62938344]
action based on polilcy:[-1.          0.72009313]
action based on polilcy:[-0.9999136  0.5299556]
action based on polilcy:[0.9934986  0.99311125]
action based on polilcy:[-1.         0.3740857]
action based on polilcy:[1.        0.9999997]
---------------------------------------
Episode_num: 10986, Evaluation over 1 episodes: -672.999007
---------------------------------------
Total T: 144000 Episode Num: 10986 Episode T: 7 Reward: -672.999007
Total T: 144049 Episode Num: 10987 Episode T: 49 Reward: -7149.280936
Total T: 144065 Episode Num: 10988 Episode T: 16 Reward: -2413.105520
Total T: 144085 Episode Num: 10989 Episode T: 20 Reward: -3123.774233
Total T: 144111 Episode Num: 10990 Episode T: 26 Reward: -3534.063808
Total T: 144157 Episode Num: 10991 Episode T: 46 Reward: -7197.895104
Total T: 144174 Episode Num: 10992 Episode T: 17 Reward: -2212.091673
Total T: 144206 Episode Num: 109

action based on polilcy:[-1.         0.7468462]
action based on polilcy:[0.34249073 0.94691503]
action based on polilcy:[-0.99999994  0.54527915]
action based on polilcy:[0.99649477 0.99586535]
action based on polilcy:[-1.          0.27622184]
action based on polilcy:[-1.         -0.03366357]
action based on polilcy:[0.9999992 0.9999713]
Total T: 144638 Episode Num: 11015 Episode T: 7 Reward: -610.176662
action based on polilcy:[-1.         0.7471775]
action based on polilcy:[0.44764277 0.95523137]
action based on polilcy:[-0.99413425  0.5109553 ]
action based on polilcy:[0.98845065 0.99217725]
action based on polilcy:[-1.          0.34869426]
action based on polilcy:[0.9999961  0.99995196]
action based on polilcy:[-1.         0.3222096]
Total T: 144645 Episode Num: 11016 Episode T: 7 Reward: -650.227696
action based on polilcy:[-1.      0.7405]
action based on polilcy:[-1.         0.7041191]
action based on polilcy:[0.53828186 0.98934424]
action based on polilcy:[-1.         0.7847146

action based on polilcy:[-1.         0.7101034]
action based on polilcy:[-0.9999355  0.532511 ]
action based on polilcy:[0.79185414 0.9565339 ]
action based on polilcy:[0.86914915 0.9603675 ]
action based on polilcy:[-1.          0.36135817]
action based on polilcy:[0.96784997 0.9783281 ]
action based on polilcy:[-1.         0.3080144]
action based on polilcy:[0.99756235 0.99363357]
Total T: 144787 Episode Num: 11031 Episode T: 8 Reward: -734.474057
action based on polilcy:[-1.          0.70796275]
action based on polilcy:[0.64509255 0.9480565 ]
action based on polilcy:[-1.         0.5621695]
action based on polilcy:[0.889152   0.96831024]
action based on polilcy:[-1.          0.63752794]
action based on polilcy:[0.93926716 0.9834184 ]
action based on polilcy:[-1.         0.4064382]
action based on polilcy:[0.49256134 0.94908625]
action based on polilcy:[-0.92387545  0.53758264]
action based on polilcy:[0.9945781 0.9976886]
action based on polilcy:[0.36885312 0.88505185]
action based o

action based on polilcy:[-1.          0.69885063]
action based on polilcy:[-0.9295876  0.4306153]
action based on polilcy:[-0.9413424   0.33041185]
action based on polilcy:[-0.9344249   0.32941473]
action based on polilcy:[-1.          0.40758577]
action based on polilcy:[-1.          0.31902304]
action based on polilcy:[-0.99999994  0.36654812]
action based on polilcy:[-0.92236817  0.3408258 ]
action based on polilcy:[-1.          0.24146806]
Total T: 144964 Episode Num: 11047 Episode T: 9 Reward: -697.175044
action based on polilcy:[-1.          0.68550396]
action based on polilcy:[-1.          0.54990417]
action based on polilcy:[-1.         0.4080088]
action based on polilcy:[-1.         0.4013248]
action based on polilcy:[-0.99913394  0.34704274]
action based on polilcy:[-1.         0.3640932]
action based on polilcy:[-1.          0.16773418]
action based on polilcy:[-1.         0.3505288]
Total T: 144972 Episode Num: 11048 Episode T: 8 Reward: -934.659396
action based on polilcy:

action based on polilcy:[-1.        0.695513]
action based on polilcy:[-0.9999533   0.47875592]
action based on polilcy:[-0.9999809   0.44552484]
action based on polilcy:[-0.9999893   0.40838414]
action based on polilcy:[-1.          0.51698756]
action based on polilcy:[-0.9999918  0.4064819]
action based on polilcy:[-1.          0.48690107]
action based on polilcy:[-0.9999891   0.40741554]
action based on polilcy:[-0.9999417   0.43182248]
action based on polilcy:[-1.          0.56446064]
action based on polilcy:[-0.9999548   0.42759606]
action based on polilcy:[-0.99991184  0.4120404 ]
action based on polilcy:[-1.          0.41389638]
action based on polilcy:[-0.9999646   0.36651406]
action based on polilcy:[-1.          0.30300787]
action based on polilcy:[-1.         0.3811365]
action based on polilcy:[-1.          0.32405657]
Total T: 145129 Episode Num: 11059 Episode T: 17 Reward: -1243.918287
action based on polilcy:[-1.         0.6907134]
action based on polilcy:[-1.         0.5

action based on polilcy:[-1.         0.6313394]
action based on polilcy:[-1.          0.41371414]
action based on polilcy:[-0.9999942  0.286112 ]
action based on polilcy:[-1.          0.29213896]
action based on polilcy:[-1.          0.14045842]
action based on polilcy:[-0.9999999   0.03346941]
action based on polilcy:[-1.          0.01866303]
action based on polilcy:[-0.9999999   0.04946988]
Total T: 145302 Episode Num: 11070 Episode T: 8 Reward: -538.826690
action based on polilcy:[-1.          0.63621926]
action based on polilcy:[-1.         0.5263519]
action based on polilcy:[-0.9999958   0.51214826]
action based on polilcy:[-1.        0.497265]
action based on polilcy:[-0.9997604   0.46580887]
action based on polilcy:[-1.         0.4112602]
action based on polilcy:[-0.99999315  0.3226839 ]
action based on polilcy:[-0.99999374  0.30832466]
action based on polilcy:[-1.         0.2074138]
action based on polilcy:[-1.          0.19341114]
action based on polilcy:[-0.9997805  0.2937986

action based on polilcy:[-1.          0.69924116]
action based on polilcy:[-0.9999599  0.4875631]
action based on polilcy:[-1.          0.53733104]
action based on polilcy:[-1.          0.46066967]
action based on polilcy:[-1.         0.3515106]
action based on polilcy:[-1.          0.46246868]
action based on polilcy:[-0.999989    0.27362987]
action based on polilcy:[-1.          0.31681654]
Total T: 145457 Episode Num: 11081 Episode T: 8 Reward: -719.606659
action based on polilcy:[-1.         0.6957507]
action based on polilcy:[-1.         0.5723242]
action based on polilcy:[-0.9999999   0.40089512]
action based on polilcy:[-1.          0.47452587]
action based on polilcy:[-1.          0.41457433]
action based on polilcy:[-1.          0.45548323]
action based on polilcy:[-0.99990714  0.44059083]
action based on polilcy:[-1.          0.43213633]
action based on polilcy:[-0.9998693  0.4200818]
action based on polilcy:[-0.9999283   0.40416822]
action based on polilcy:[-0.9999626   0.38

action based on polilcy:[-1.          0.64900947]
action based on polilcy:[-1.          0.52890027]
action based on polilcy:[-0.999992    0.35784262]
action based on polilcy:[-0.9999956  0.2933324]
action based on polilcy:[-0.9999871   0.23109312]
action based on polilcy:[-1.          0.16578372]
action based on polilcy:[-1.          0.13623425]
action based on polilcy:[-1.          0.03108719]
action based on polilcy:[-0.99981874  0.17657486]
Total T: 145626 Episode Num: 11093 Episode T: 9 Reward: -511.863865
action based on polilcy:[-1.          0.64995754]
action based on polilcy:[-0.99994993  0.47857398]
action based on polilcy:[-0.9999956   0.61776084]
action based on polilcy:[-0.99992436  0.5281537 ]
action based on polilcy:[-0.99987847  0.45081252]
action based on polilcy:[-0.99989176  0.5024134 ]
action based on polilcy:[-0.99853987  0.22999458]
action based on polilcy:[-0.9984061  0.8401589]
action based on polilcy:[-0.99970853  0.28986531]
action based on polilcy:[-0.9990228 

action based on polilcy:[-1.          0.64747214]
action based on polilcy:[-0.99997634  0.52839565]
action based on polilcy:[-0.9999568  0.4556487]
action based on polilcy:[-1.         0.5315113]
action based on polilcy:[-0.99991393  0.43603116]
action based on polilcy:[-0.9999105   0.43055314]
action based on polilcy:[-0.99997616  0.37962762]
action based on polilcy:[-1.         0.4499996]
action based on polilcy:[-0.9999795   0.34019232]
action based on polilcy:[-1.          0.35155922]
action based on polilcy:[-1.          0.21701993]
action based on polilcy:[-1.          0.25036883]
action based on polilcy:[-1.          0.28107288]
action based on polilcy:[-0.99999994  0.20994842]
action based on polilcy:[-1.          0.23829319]
action based on polilcy:[-0.99993706  0.17350426]
Total T: 145817 Episode Num: 11104 Episode T: 16 Reward: -1299.507328
action based on polilcy:[-1.       0.64513]
action based on polilcy:[-1.          0.49922934]
action based on polilcy:[-1.          0.32

action based on polilcy:[-1.          0.64491737]
action based on polilcy:[-1.          0.55133903]
action based on polilcy:[-1.          0.52171934]
action based on polilcy:[-1.         0.4653873]
action based on polilcy:[-0.99999166  0.31099266]
action based on polilcy:[-0.99999267  0.29515612]
action based on polilcy:[-0.99999315  0.29563698]
action based on polilcy:[-0.9999755   0.24407697]
action based on polilcy:[-0.9999845  0.2589504]
action based on polilcy:[-1.          0.38094473]
action based on polilcy:[-0.9999952  0.3297516]
action based on polilcy:[-0.99997675  0.25942022]
action based on polilcy:[-1.          0.34605652]
Total T: 145969 Episode Num: 11116 Episode T: 13 Reward: -910.928859
action based on polilcy:[-1.         0.6092497]
action based on polilcy:[-0.99998623  0.31092244]
action based on polilcy:[-1.          0.30901232]
action based on polilcy:[-1.         0.2558551]
action based on polilcy:[-0.99996257  0.13809736]
action based on polilcy:[-1.          0.2

action based on polilcy:[-1.         0.5698289]
action based on polilcy:[-0.9999305   0.51510525]
action based on polilcy:[-0.9999832   0.37166587]
action based on polilcy:[-0.9999992   0.35321262]
action based on polilcy:[-1.          0.38778928]
action based on polilcy:[-1.         0.3186995]
action based on polilcy:[-1.          0.34323132]
action based on polilcy:[-0.9998739   0.33730662]
action based on polilcy:[-0.99940723  0.37223887]
action based on polilcy:[-0.9999322   0.36105278]
action based on polilcy:[-0.9977648   0.38185757]
action based on polilcy:[-0.99890995  0.39133534]
action based on polilcy:[-0.99918556  0.05684608]
action based on polilcy:[-0.98303485  0.3186729 ]
action based on polilcy:[-0.9950667   0.01380989]
action based on polilcy:[-0.99957025  0.70629215]
action based on polilcy:[-0.9989045  -0.18682647]
action based on polilcy:[-0.99759644  0.9964258 ]
action based on polilcy:[-0.9976109   0.17714845]
action based on polilcy:[-0.99357796  0.3899015 ]
acti

action based on polilcy:[-1.          0.58273673]
action based on polilcy:[-0.9999938   0.33886933]
action based on polilcy:[-0.999998    0.32696843]
action based on polilcy:[-1.        0.273368]
action based on polilcy:[-0.999994   0.3171561]
action based on polilcy:[-0.9999829   0.22027056]
action based on polilcy:[-0.9999478   0.19889326]
action based on polilcy:[-0.9999901   0.34952044]
action based on polilcy:[-0.99986166  0.18737023]
Total T: 146810 Episode Num: 11148 Episode T: 9 Reward: -388.594326
action based on polilcy:[-1.         0.5881504]
action based on polilcy:[-1.         0.4481757]
action based on polilcy:[-0.9999496   0.43964243]
action based on polilcy:[-0.9999165   0.53432655]
action based on polilcy:[-0.99988765  0.46370706]
action based on polilcy:[-0.9994682   0.41034028]
action based on polilcy:[-0.9962674   0.39732412]
action based on polilcy:[-0.99980426  0.5764656 ]
action based on polilcy:[-0.99979734  0.24361964]
action based on polilcy:[-0.9998377   0.43

action based on polilcy:[-1.       0.59443]
action based on polilcy:[-1.          0.49717307]
action based on polilcy:[-0.9999976   0.37285984]
action based on polilcy:[-1.         0.3862249]
action based on polilcy:[-0.9999658  0.4109025]
action based on polilcy:[-0.9999999   0.36910278]
action based on polilcy:[-0.99999994  0.38504854]
action based on polilcy:[-0.999999    0.33192396]
action based on polilcy:[-1.          0.34969482]
action based on polilcy:[-1.          0.32079425]
action based on polilcy:[-0.99999684  0.2934358 ]
action based on polilcy:[-1.          0.30206007]
action based on polilcy:[-1.         0.2249378]
action based on polilcy:[-0.99999243  0.2521343 ]
Total T: 146970 Episode Num: 11157 Episode T: 14 Reward: -849.019678
action based on polilcy:[-1.         0.5939156]
action based on polilcy:[-1.         0.4440926]
action based on polilcy:[-1.         0.3948483]
action based on polilcy:[-0.9999958  0.3969746]
action based on polilcy:[-1.          0.33170214]
a

action based on polilcy:[-1.         0.6075709]
action based on polilcy:[-0.9999955   0.40908492]
action based on polilcy:[-0.9999984   0.42810726]
action based on polilcy:[-0.99999577  0.4231936 ]
action based on polilcy:[-0.99996376  0.4380652 ]
action based on polilcy:[-1.          0.46358708]
action based on polilcy:[-0.9999965   0.37365097]
action based on polilcy:[-0.99999624  0.4399443 ]
action based on polilcy:[-1.         0.4464051]
action based on polilcy:[-0.9979998   0.42582852]
action based on polilcy:[-0.9997701   0.36346787]
action based on polilcy:[-0.99979824  0.34990284]
action based on polilcy:[-1.         0.3751637]
action based on polilcy:[-0.9997654   0.33067527]
action based on polilcy:[-1.          0.28037137]
action based on polilcy:[-1.         0.2300621]
action based on polilcy:[-0.9999895   0.26226732]
action based on polilcy:[-0.9999918  0.2372063]
action based on polilcy:[-0.99999994  0.21452437]
action based on polilcy:[-0.99998474  0.22412752]
action bas

action based on polilcy:[-1.         0.6560824]
action based on polilcy:[-0.9999815   0.42253852]
action based on polilcy:[-1.          0.61269003]
action based on polilcy:[-0.99933976  0.46712366]
action based on polilcy:[-0.9999982  0.5007992]
action based on polilcy:[-0.99968123  0.44046345]
action based on polilcy:[-0.99992573  0.45780075]
action based on polilcy:[-0.99968904  0.4429478 ]
action based on polilcy:[-0.99954695  0.42387888]
action based on polilcy:[-0.9993116  0.3960166]
action based on polilcy:[-0.9996366   0.34561992]
action based on polilcy:[-1.          0.80525523]
action based on polilcy:[-0.99999976  0.3997538 ]
action based on polilcy:[0.3437956  0.95730424]
action based on polilcy:[-1.         0.8411583]
action based on polilcy:[0.9991505 0.9966613]
action based on polilcy:[-0.99999917  0.49177718]
action based on polilcy:[0.99986565 0.99878514]
action based on polilcy:[-1.          0.79990613]
action based on polilcy:[0.99999595 0.99945736]
action based on po

action based on polilcy:[-1.         0.6842698]
action based on polilcy:[-0.99882096  0.5292977 ]
action based on polilcy:[-0.9996155   0.50779235]
action based on polilcy:[-0.9999983   0.55300605]
action based on polilcy:[-1.          0.49611503]
action based on polilcy:[-1.         0.4189966]
action based on polilcy:[-1.          0.39091292]
action based on polilcy:[-1.          0.44606146]
action based on polilcy:[-0.99984574  0.3831539 ]
action based on polilcy:[-0.99969083  0.42741606]
action based on polilcy:[-1.          0.34915698]
action based on polilcy:[-0.9978776   0.40336606]
action based on polilcy:[-0.9999746  0.3676771]
action based on polilcy:[-1.          0.38213515]
action based on polilcy:[-1.          0.37379384]
action based on polilcy:[-0.9993049  0.304203 ]
action based on polilcy:[-0.9991066   0.31947544]
action based on polilcy:[-0.99840385  0.27696237]
Total T: 147453 Episode Num: 11184 Episode T: 18 Reward: -1463.488051
action based on polilcy:[-1.         0

action based on polilcy:[-1.         0.7277864]
action based on polilcy:[-0.99941564  0.49707016]
action based on polilcy:[-1.         0.6145391]
action based on polilcy:[-1.          0.56102026]
action based on polilcy:[-0.99915123  0.46300456]
action based on polilcy:[-0.9961742   0.51957905]
action based on polilcy:[-0.9999989   0.49676976]
action based on polilcy:[-0.99996996  0.4979264 ]
action based on polilcy:[-1.         0.4447447]
action based on polilcy:[-1.          0.44780815]
action based on polilcy:[-0.9865635   0.35563162]
action based on polilcy:[-1.          0.40987518]
action based on polilcy:[-1.          0.31110296]
action based on polilcy:[-0.95427275  0.32121593]
action based on polilcy:[-0.87679255  0.29169476]
action based on polilcy:[-0.9999826   0.32492185]
action based on polilcy:[-0.67381954  0.28165904]
action based on polilcy:[-1.          0.26985976]
action based on polilcy:[-1.          0.20228978]
Total T: 147669 Episode Num: 11196 Episode T: 19 Reward:

action based on polilcy:[-1.          0.75656825]
action based on polilcy:[-0.9990722  0.5264571]
action based on polilcy:[-0.99999994  0.5373581 ]
action based on polilcy:[-0.9329354   0.49629658]
action based on polilcy:[-1.         0.4605489]
action based on polilcy:[-0.9894052  0.536071 ]
action based on polilcy:[-0.99650806  0.54883754]
action based on polilcy:[-0.96340084  0.4995151 ]
action based on polilcy:[-0.999905  0.460426]
Total T: 147816 Episode Num: 11207 Episode T: 9 Reward: -572.717148
action based on polilcy:[-1.         0.7505123]
action based on polilcy:[-0.9997106   0.49041408]
action based on polilcy:[-0.9996758  0.4967831]
action based on polilcy:[-0.9988375  0.5008806]
action based on polilcy:[-0.9985906   0.52031636]
action based on polilcy:[-0.99696475  0.5149674 ]
action based on polilcy:[-0.9656179   0.44767532]
action based on polilcy:[-0.9749341   0.49347854]
action based on polilcy:[-0.9865761  0.5068457]
action based on polilcy:[-0.9999997   0.49952763]


action based on polilcy:[-1.         0.7864201]
action based on polilcy:[-0.99976146  0.62774044]
action based on polilcy:[-0.99875987  0.5327628 ]
action based on polilcy:[-1.         0.6777556]
action based on polilcy:[-0.999999   0.6050596]
action based on polilcy:[-1.          0.52170587]
action based on polilcy:[-1.         0.4996869]
action based on polilcy:[-0.95232177  0.43244374]
action based on polilcy:[-1.          0.44541627]
action based on polilcy:[-0.9999979   0.45760703]
action based on polilcy:[-1.          0.31352043]
Total T: 147979 Episode Num: 11220 Episode T: 11 Reward: -1027.781638
action based on polilcy:[-1.         0.7771137]
action based on polilcy:[-0.9991105  0.5342606]
action based on polilcy:[-1.          0.54662055]
action based on polilcy:[-0.999359   0.5792284]
action based on polilcy:[-1.         0.5095351]
action based on polilcy:[-0.99852675  0.5454412 ]
action based on polilcy:[-0.9872393   0.47684368]
action based on polilcy:[-0.99999976  0.506266

action based on polilcy:[-1.          0.79351056]
action based on polilcy:[-0.9995959  0.7295972]
action based on polilcy:[-0.99944246  0.7475829 ]
action based on polilcy:[-0.99977434  0.633157  ]
action based on polilcy:[-0.99997973  0.6402321 ]
action based on polilcy:[-0.9929833  0.6038245]
action based on polilcy:[-0.99998087  0.632359  ]
action based on polilcy:[-0.9999928  0.6334885]
action based on polilcy:[-0.99750185  0.5925573 ]
action based on polilcy:[-0.99980587  0.62654054]
action based on polilcy:[-0.99999994  0.54196703]
action based on polilcy:[-0.8954197  0.5171721]
action based on polilcy:[-0.9999386   0.50432456]
action based on polilcy:[-0.9674505  0.5136616]
action based on polilcy:[-0.99890876  0.45855856]
Total T: 148641 Episode Num: 11250 Episode T: 15 Reward: -754.494627
action based on polilcy:[-1.         0.7952026]
action based on polilcy:[-0.9998357  0.6724504]
action based on polilcy:[-1.          0.64322096]
action based on polilcy:[-0.9999398  0.563691

action based on polilcy:[-1.          0.79747707]
action based on polilcy:[-0.9998998  0.6809258]
action based on polilcy:[-0.9996208  0.6660936]
action based on polilcy:[-0.99999857  0.64970946]
action based on polilcy:[-1.          0.62369406]
action based on polilcy:[-0.99991393  0.67408633]
action based on polilcy:[-0.9999995   0.65704596]
action based on polilcy:[-1.         0.5788021]
action based on polilcy:[-0.9997238   0.62976205]
action based on polilcy:[-0.9598917   0.56650233]
action based on polilcy:[-0.50149596  0.5316528 ]
Total T: 148796 Episode Num: 11264 Episode T: 11 Reward: -756.066898
action based on polilcy:[-1.         0.7933526]
action based on polilcy:[-0.9997111  0.7424109]
action based on polilcy:[-0.9999552   0.67043424]
action based on polilcy:[-0.9979828  0.648865 ]
action based on polilcy:[-0.9999887   0.56420326]
action based on polilcy:[0.44005927 0.58911985]
action based on polilcy:[-1.        0.412413]
action based on polilcy:[0.994703  0.6156241]
Tot

action based on polilcy:[-1.         0.7915747]
action based on polilcy:[-0.99995065  0.8812821 ]
action based on polilcy:[-1.          0.67584205]
action based on polilcy:[-1.         0.6625921]
action based on polilcy:[-0.9933526   0.61674297]
action based on polilcy:[-0.9999476  0.5697546]
action based on polilcy:[-0.99831027  0.5615169 ]
action based on polilcy:[-0.9999994  0.5837186]
action based on polilcy:[-1.          0.46864045]
Total T: 148954 Episode Num: 11278 Episode T: 9 Reward: -870.307661
action based on polilcy:[-1.          0.78497607]
action based on polilcy:[-0.99984026  0.7570626 ]
action based on polilcy:[-0.9999995  0.913417 ]
action based on polilcy:[-0.99986833  0.6436851 ]
action based on polilcy:[-0.99970466  0.6365925 ]
action based on polilcy:[-0.99797636  0.6165652 ]
action based on polilcy:[-0.9985342   0.64177954]
action based on polilcy:[-1.        0.519713]
action based on polilcy:[-0.7586452   0.54518664]
action based on polilcy:[-0.5929132   0.548107

action based on polilcy:[-1.          0.78452975]
action based on polilcy:[-0.999932    0.80852556]
action based on polilcy:[-0.99991137  0.6587837 ]
action based on polilcy:[-0.9997316   0.64116144]
action based on polilcy:[-0.993315    0.60212874]
action based on polilcy:[-0.99995667  0.64558953]
action based on polilcy:[-0.99553776  0.6099026 ]
action based on polilcy:[-0.9999625  0.6509892]
action based on polilcy:[-0.99996936  0.65552354]
action based on polilcy:[-1.         0.6402999]
action based on polilcy:[-0.9872164  0.567313 ]
action based on polilcy:[-1.          0.65343535]
action based on polilcy:[-0.9834813   0.55097103]
Total T: 149126 Episode Num: 11294 Episode T: 13 Reward: -655.803608
action based on polilcy:[-1.         0.7738681]
action based on polilcy:[-0.9999471  0.6512338]
action based on polilcy:[-0.9999965   0.63867784]
action based on polilcy:[-0.9966879  0.614825 ]
action based on polilcy:[-0.87267244  0.5569185 ]
action based on polilcy:[-1.          0.560

action based on polilcy:[-1.         0.7679699]
action based on polilcy:[-0.9999741  0.775985 ]
action based on polilcy:[-0.9999652  0.81846  ]
action based on polilcy:[-0.99998355  0.9551387 ]
action based on polilcy:[-1.         0.6034254]
action based on polilcy:[-0.9999644  0.6333201]
action based on polilcy:[-0.9999996   0.63887846]
action based on polilcy:[-0.99983746  0.6620145 ]
action based on polilcy:[-0.99508107  0.5905808 ]
action based on polilcy:[-0.997437    0.57384825]
action based on polilcy:[-0.9999998  0.558451 ]
action based on polilcy:[0.5966121  0.56752765]
Total T: 149291 Episode Num: 11309 Episode T: 12 Reward: -670.748187
action based on polilcy:[-1.          0.77138567]
action based on polilcy:[-1.         0.6539785]
action based on polilcy:[-1.          0.67005086]
action based on polilcy:[-1.         0.6892376]
action based on polilcy:[-1.         0.6395774]
action based on polilcy:[-0.9999588   0.69772017]
action based on polilcy:[-1.          0.62452674]
a

action based on polilcy:[-1.         0.7743756]
action based on polilcy:[-0.9999935   0.66769564]
action based on polilcy:[-1.          0.69413644]
action based on polilcy:[-0.99998987  0.71888673]
action based on polilcy:[-0.9999367  0.7283659]
action based on polilcy:[-0.99999577  0.7286682 ]
action based on polilcy:[-0.99997765  0.7191787 ]
action based on polilcy:[-0.99996436  0.72735137]
action based on polilcy:[-0.99997807  0.71783376]
action based on polilcy:[-0.9999325  0.7240653]
action based on polilcy:[-0.99767685  0.73057723]
Total T: 149462 Episode Num: 11323 Episode T: 11 Reward: -451.377505
action based on polilcy:[-1.          0.77773035]
action based on polilcy:[-0.9999924   0.65658176]
action based on polilcy:[-0.99999076  0.690565  ]
action based on polilcy:[-0.99999684  0.7124676 ]
action based on polilcy:[-1.          0.76665545]
action based on polilcy:[-0.99993646  0.93139046]
action based on polilcy:[-0.9999833  0.6083212]
action based on polilcy:[-0.9999656   0

action based on polilcy:[-1.         0.7941573]
action based on polilcy:[-0.9999992   0.65505105]
action based on polilcy:[-0.9999963   0.69210005]
action based on polilcy:[-1.         0.7226739]
action based on polilcy:[-0.99997157  0.73594385]
action based on polilcy:[-0.99999714  0.7183411 ]
action based on polilcy:[-0.99996936  0.73156404]
action based on polilcy:[-0.9996218   0.76522547]
action based on polilcy:[-0.9981088  0.7787577]
action based on polilcy:[-0.99792325  0.7939806 ]
Total T: 149617 Episode Num: 11337 Episode T: 10 Reward: -491.213421
action based on polilcy:[-1.          0.78015745]
action based on polilcy:[-0.9999998   0.97457105]
action based on polilcy:[-0.9999952  0.8581674]
action based on polilcy:[-0.99999064  0.79578626]
action based on polilcy:[-1.          0.80811566]
action based on polilcy:[-0.9999845   0.57129264]
action based on polilcy:[-0.9999604   0.60518074]
action based on polilcy:[-0.9999039   0.64099795]
action based on polilcy:[-0.99994195  0

action based on polilcy:[-1.          0.80034673]
action based on polilcy:[-1.          0.68165535]
action based on polilcy:[-1.          0.80596185]
action based on polilcy:[-0.9999898   0.61779773]
action based on polilcy:[-0.9999917  0.6604992]
action based on polilcy:[-1.          0.79323053]
action based on polilcy:[-0.99999094  0.6545006 ]
action based on polilcy:[-0.9999998  0.6378578]
action based on polilcy:[-1.          0.66337556]
action based on polilcy:[-1.         0.6830558]
action based on polilcy:[-0.9999628   0.72947806]
Total T: 149790 Episode Num: 11353 Episode T: 11 Reward: -1017.968148
action based on polilcy:[-1.         0.7968074]
action based on polilcy:[-0.9999972  0.6217574]
action based on polilcy:[-1.          0.75498176]
action based on polilcy:[-1.          0.76526064]
action based on polilcy:[-0.99992704  0.77388614]
action based on polilcy:[-0.99999946  0.833016  ]
action based on polilcy:[-1.         0.7606127]
action based on polilcy:[-0.9999244   0.76

action based on polilcy:[-1.         0.8371931]
action based on polilcy:[-0.9990811   0.82429206]
action based on polilcy:[-0.99981827  0.82226765]
Total T: 149953 Episode Num: 11369 Episode T: 12 Reward: -471.934248
action based on polilcy:[-1.         0.8330544]
action based on polilcy:[-0.99999046  0.947566  ]
action based on polilcy:[-0.99999565  0.8618364 ]
action based on polilcy:[-0.9999976  0.5906302]
action based on polilcy:[-1.          0.77080417]
action based on polilcy:[-0.9999937  0.6724963]
action based on polilcy:[-0.9999815   0.74079895]
action based on polilcy:[-0.9998378  0.7910037]
action based on polilcy:[-1.         0.8577192]
action based on polilcy:[-1.          0.90412915]
Total T: 149963 Episode Num: 11370 Episode T: 10 Reward: -505.485343
action based on polilcy:[-1.         0.8328116]
action based on polilcy:[-0.999985   0.9738656]
action based on polilcy:[-0.9999989   0.73022425]
action based on polilcy:[-1.         0.7621045]
action based on polilcy:[-1.  

action based on polilcy:[-1.         0.8867471]
action based on polilcy:[-1.         0.7739826]
action based on polilcy:[-1.          0.59635925]
action based on polilcy:[-1.         0.6482737]
action based on polilcy:[-0.99999976  0.5204009 ]
action based on polilcy:[-0.9999996  0.5896376]
action based on polilcy:[-1.         0.8324718]
action based on polilcy:[-1.         0.7510738]
action based on polilcy:[-0.9998967   0.80549693]
action based on polilcy:[-0.9998584  0.8664456]
action based on polilcy:[-0.9981678  0.8991567]
Total T: 150608 Episode Num: 11394 Episode T: 11 Reward: -903.999563
action based on polilcy:[-1.         0.8868097]
action based on polilcy:[-0.9999992   0.98381126]
action based on polilcy:[-0.9999996  0.9409977]
action based on polilcy:[-1.          0.40341243]
action based on polilcy:[-0.99999994  0.6639287 ]
action based on polilcy:[-0.99999994  0.3656073 ]
action based on polilcy:[-0.9999997   0.51358896]
action based on polilcy:[-0.9999977  0.6910292]
act

action based on polilcy:[-1.          0.86511666]
action based on polilcy:[-0.9999997  0.9431887]
action based on polilcy:[-1.         0.4382599]
action based on polilcy:[-0.9999999   0.48336914]
action based on polilcy:[-0.99999887  0.6786321 ]
action based on polilcy:[-0.99998266  0.8068299 ]
action based on polilcy:[-0.99999017  0.7967202 ]
action based on polilcy:[-0.9999104  0.8660824]
action based on polilcy:[-0.99999285  0.7757988 ]
action based on polilcy:[-1.         0.7498755]
Total T: 150772 Episode Num: 11407 Episode T: 12 Reward: -702.559400
action based on polilcy:[-1.          0.92612416]
action based on polilcy:[-0.99999994  0.94635284]
action based on polilcy:[-1.          0.39849925]
action based on polilcy:[-1.         0.8858993]
action based on polilcy:[-1.         0.8361963]
action based on polilcy:[-0.99999917  0.6786613 ]
action based on polilcy:[-1.         0.7918749]
action based on polilcy:[-1.          0.65207726]
action based on polilcy:[-0.9999927  0.737677

Total T: 150914 Episode Num: 11420 Episode T: 7 Reward: -595.264176
action based on polilcy:[-1.         0.9423783]
action based on polilcy:[-1.          0.67960906]
action based on polilcy:[-0.99999994  0.93912077]
action based on polilcy:[-1.          0.40573734]
action based on polilcy:[-1.         0.6853318]
action based on polilcy:[-0.99999994  0.581879  ]
action based on polilcy:[-1.         0.7842122]
action based on polilcy:[-1.          0.71195483]
action based on polilcy:[-0.99999565  0.73867273]
action based on polilcy:[-0.9999957   0.74037576]
action based on polilcy:[-0.99999774  0.69498384]
action based on polilcy:[-1.         0.8458623]
Total T: 150926 Episode Num: 11421 Episode T: 12 Reward: -723.479107
action based on polilcy:[-1.          0.93564826]
action based on polilcy:[-1.          0.44099566]
action based on polilcy:[-1.          0.76463455]
action based on polilcy:[-1.         0.5822292]
action based on polilcy:[-1.         0.5207597]
action based on polilcy:[

action based on polilcy:[-1.          0.95098543]
action based on polilcy:[-0.9999998  0.9880834]
action based on polilcy:[-1.         0.8899741]
action based on polilcy:[-1.          0.35556728]
action based on polilcy:[-0.99999976  0.97553927]
action based on polilcy:[-1.          0.35049182]
action based on polilcy:[-1.         0.5387621]
action based on polilcy:[-0.99999994  0.3878672 ]
action based on polilcy:[-0.99999994  0.40348694]
action based on polilcy:[-1.          0.38943034]
action based on polilcy:[-1.         0.6089253]
action based on polilcy:[-1.        0.761981]
action based on polilcy:[-0.99999917  0.8609104 ]
action based on polilcy:[-0.99999946  0.3380732 ]
action based on polilcy:[-0.9999979  0.3960885]
action based on polilcy:[-0.99999857  0.38793886]
action based on polilcy:[-1.         0.6020167]
action based on polilcy:[-0.999997    0.36098862]
action based on polilcy:[-1.         0.6212189]
action based on polilcy:[-0.9999942   0.64603525]
action based on po

action based on polilcy:[-1.         0.9724041]
action based on polilcy:[-1.          0.40256613]
action based on polilcy:[-1.          0.45897767]
action based on polilcy:[-0.99999994  0.9821704 ]
action based on polilcy:[-1.          0.39739233]
action based on polilcy:[-1.          0.40811014]
action based on polilcy:[-1.          0.44378188]
action based on polilcy:[-1.          0.39364865]
action based on polilcy:[-1.          0.67768794]
action based on polilcy:[-1.          0.47631478]
action based on polilcy:[-1.         0.5050479]
action based on polilcy:[-1.          0.50930625]
action based on polilcy:[-1.         0.7330073]
action based on polilcy:[-1.         0.6898217]
action based on polilcy:[-0.99999964  0.7680297 ]
action based on polilcy:[-0.9998608  0.7908818]
Total T: 151273 Episode Num: 11451 Episode T: 16 Reward: -895.828142
action based on polilcy:[-1.          0.97117054]
action based on polilcy:[-1.          0.66051924]
action based on polilcy:[-1.         0.90

action based on polilcy:[-1.          0.96922493]
action based on polilcy:[-1.          0.39088875]
action based on polilcy:[-1.         0.3585619]
action based on polilcy:[-1.         0.4843634]
action based on polilcy:[-1.          0.83209616]
action based on polilcy:[-0.9999999  0.877725 ]
action based on polilcy:[-1.         0.9213616]
Total T: 151429 Episode Num: 11462 Episode T: 7 Reward: -418.230946
action based on polilcy:[-1.         0.9698685]
action based on polilcy:[-1.          0.37980855]
action based on polilcy:[-1.          0.41067547]
action based on polilcy:[-1.          0.39258265]
action based on polilcy:[-1.          0.41215762]
action based on polilcy:[-1.          0.88480604]
action based on polilcy:[-1.          0.41386038]
action based on polilcy:[-1.          0.31862244]
action based on polilcy:[-1.         0.8362634]
action based on polilcy:[-1.        0.638264]
action based on polilcy:[-1.         0.7770478]
action based on polilcy:[-0.99999994  0.3142476 ]


action based on polilcy:[-1.         0.9791518]
action based on polilcy:[-1.          0.37754905]
action based on polilcy:[-1.          0.70502365]
action based on polilcy:[-1.          0.85259503]
action based on polilcy:[-1.         0.7411054]
action based on polilcy:[-1.          0.40634272]
action based on polilcy:[-0.9999999  0.5489178]
action based on polilcy:[-0.99999636  0.72887564]
action based on polilcy:[-1.          0.77337456]
action based on polilcy:[-0.9999588  0.8055123]
Total T: 151586 Episode Num: 11475 Episode T: 10 Reward: -611.825999
action based on polilcy:[-1.          0.97831833]
action based on polilcy:[-1.          0.66790056]
action based on polilcy:[-1.         0.8910761]
action based on polilcy:[-1.          0.75771886]
action based on polilcy:[-1.          0.29096675]
action based on polilcy:[-1.         0.7432372]
action based on polilcy:[-0.99999964  0.59971744]
Total T: 151593 Episode Num: 11476 Episode T: 7 Reward: -863.292554
action based on polilcy:[

action based on polilcy:[-1.         0.9796574]
action based on polilcy:[-1.          0.37611705]
action based on polilcy:[-1.          0.36635646]
action based on polilcy:[-1.         0.5643097]
action based on polilcy:[-1.         0.4735504]
action based on polilcy:[-1.         0.3346754]
action based on polilcy:[-1.          0.29852661]
action based on polilcy:[-1.          0.23692486]
action based on polilcy:[-1.         0.1337972]
action based on polilcy:[-0.99999994  0.1073992 ]
action based on polilcy:[-0.9999997   0.21450737]
action based on polilcy:[-1.          0.22992042]
Total T: 151752 Episode Num: 11489 Episode T: 12 Reward: -534.764267
action based on polilcy:[-1.         0.9775944]
action based on polilcy:[-1.         0.8166962]
action based on polilcy:[-1.          0.41048267]
action based on polilcy:[-1.          0.24497214]
action based on polilcy:[-1.          0.13905591]
action based on polilcy:[-1.        0.128725]
action based on polilcy:[-1.          0.11702175]

action based on polilcy:[-1.         0.9635708]
action based on polilcy:[-1.          0.41149187]
action based on polilcy:[-1.         0.3957157]
action based on polilcy:[-1.          0.30925795]
action based on polilcy:[-1.         0.8382581]
action based on polilcy:[-1.          0.56547314]
action based on polilcy:[-1.         0.4211774]
action based on polilcy:[-1.          0.42490014]
action based on polilcy:[-1.         0.3835825]
action based on polilcy:[-1.          0.29585406]
action based on polilcy:[-1.         0.4067336]
action based on polilcy:[-1.          0.43007663]
action based on polilcy:[-1.         0.5028428]
action based on polilcy:[-0.9999998   0.20044073]
Total T: 151922 Episode Num: 11501 Episode T: 14 Reward: -1078.838416
action based on polilcy:[-1.         0.9654529]
action based on polilcy:[-1.         0.9092281]
action based on polilcy:[-1.         0.2834093]
action based on polilcy:[-1.          0.18110844]
action based on polilcy:[-1.          0.38651964]


action based on polilcy:[-1.         0.8972923]
action based on polilcy:[-1.         0.4520418]
action based on polilcy:[-1.         0.7620465]
action based on polilcy:[-1.         0.5700407]
action based on polilcy:[-1.          0.46257135]
action based on polilcy:[-1.          0.43862516]
action based on polilcy:[-1.       0.41589]
action based on polilcy:[-1.          0.58138204]
action based on polilcy:[-1.         0.5062252]
action based on polilcy:[-1.         0.6551747]
action based on polilcy:[-0.9999958   0.35353225]
action based on polilcy:[-0.99999654  0.9001773 ]
action based on polilcy:[-1.        0.539191]
action based on polilcy:[-1.          0.66610885]
action based on polilcy:[-0.999999    0.38844314]
action based on polilcy:[-0.99999815  0.33433   ]
action based on polilcy:[-1.          0.33366832]
action based on polilcy:[-1.          0.37370998]
action based on polilcy:[-0.99999076  0.28284964]
action based on polilcy:[-1.          0.45882282]
action based on polilc

action based on polilcy:[-1.         0.8684141]
action based on polilcy:[-1.         0.3452688]
action based on polilcy:[-1.         0.2813956]
action based on polilcy:[-1.          0.42722404]
action based on polilcy:[-1.          0.26957715]
action based on polilcy:[-1.         0.2232405]
action based on polilcy:[-1.          0.22353624]
action based on polilcy:[-1.          0.17026964]
action based on polilcy:[-1.          0.42666247]
action based on polilcy:[-1.          0.14524308]
action based on polilcy:[-0.9999999   0.06440208]
action based on polilcy:[-0.99999994  0.07424979]
Total T: 152746 Episode Num: 11539 Episode T: 12 Reward: -535.473904
action based on polilcy:[-1.         0.8830195]
action based on polilcy:[-1.          0.34406608]
action based on polilcy:[-1.         0.5900879]
action based on polilcy:[-1.          0.42776108]
action based on polilcy:[-1.         0.7388884]
action based on polilcy:[-1.         0.2961914]
action based on polilcy:[-1.          0.2325410

action based on polilcy:[-1.         0.8594458]
action based on polilcy:[-1.          0.40421554]
action based on polilcy:[-1.        0.323603]
action based on polilcy:[-1.          0.34420228]
action based on polilcy:[-1.          0.33503443]
action based on polilcy:[-1.          0.41291076]
action based on polilcy:[-1.         0.6430659]
action based on polilcy:[-1.          0.22941661]
action based on polilcy:[-0.99999994  0.13507293]
action based on polilcy:[-1.          0.19800372]
action based on polilcy:[-1.          0.18299527]
action based on polilcy:[-1.          0.16976324]
action based on polilcy:[-0.9999997   0.03760209]
Total T: 152919 Episode Num: 11551 Episode T: 13 Reward: -659.977325
action based on polilcy:[-1.         0.8584224]
action based on polilcy:[-1.         0.4166798]
action based on polilcy:[-1.          0.73715043]
action based on polilcy:[-1.          0.39118066]
action based on polilcy:[-1.         0.6434494]
action based on polilcy:[-1.          0.66558

action based on polilcy:[-1.         0.8390924]
action based on polilcy:[-1.          0.38838947]
action based on polilcy:[-1.         0.6017275]
action based on polilcy:[-1.       0.34357]
action based on polilcy:[-1.          0.33186054]
action based on polilcy:[-1.          0.26954138]
action based on polilcy:[-1.         0.3166625]
action based on polilcy:[-1.          0.20042697]
action based on polilcy:[-1.          0.24036911]
action based on polilcy:[-1.          0.04079273]
Total T: 153075 Episode Num: 11561 Episode T: 10 Reward: -842.121394
action based on polilcy:[-1.          0.83188176]
action based on polilcy:[-1.          0.41931042]
action based on polilcy:[-1.          0.37896606]
action based on polilcy:[-1.          0.28143495]
action based on polilcy:[-1.         0.1977208]
action based on polilcy:[-1.          0.11781661]
action based on polilcy:[-1.         0.0847014]
action based on polilcy:[-1.          0.10383519]
action based on polilcy:[-1.          0.0830723

action based on polilcy:[-1.         0.8358561]
action based on polilcy:[-1.         0.3702426]
action based on polilcy:[-1.          0.27921104]
action based on polilcy:[-1.         0.2138657]
action based on polilcy:[-1.          0.19330294]
action based on polilcy:[-1.          0.13840215]
action based on polilcy:[-1.          0.20226324]
action based on polilcy:[-1.          0.18755086]
action based on polilcy:[-1.          0.22070807]
action based on polilcy:[-1.         0.4674899]
action based on polilcy:[-1.          0.52386355]
action based on polilcy:[-1.          0.30075866]
action based on polilcy:[-0.99999994  0.12465693]
Total T: 153238 Episode Num: 11573 Episode T: 13 Reward: -697.230725
action based on polilcy:[-1.         0.8175886]
action based on polilcy:[-1.         0.7856252]
action based on polilcy:[-1.         0.6601242]
action based on polilcy:[-1.          0.29389334]
action based on polilcy:[-1.         0.5368801]
action based on polilcy:[-1.         0.3006452]

action based on polilcy:[-1.         0.8360884]
action based on polilcy:[-1.          0.40834352]
action based on polilcy:[-1.          0.69502664]
action based on polilcy:[-1.          0.73494273]
action based on polilcy:[-1.          0.38602766]
action based on polilcy:[-1.          0.36512664]
action based on polilcy:[-1.          0.35891253]
action based on polilcy:[-1.          0.40143818]
action based on polilcy:[-1.          0.59090424]
action based on polilcy:[-0.9999998   0.28360435]
action based on polilcy:[-0.99999994  0.2326759 ]
action based on polilcy:[-0.99999964  0.1548322 ]
action based on polilcy:[-0.9999999   0.14744575]
action based on polilcy:[-1.          0.09252874]
action based on polilcy:[-0.9999954   0.01075844]
Total T: 153397 Episode Num: 11582 Episode T: 15 Reward: -974.354101
action based on polilcy:[-1.         0.7921324]
action based on polilcy:[-1.          0.39619863]
action based on polilcy:[-1.         0.2392058]
action based on polilcy:[-1.         

action based on polilcy:[-1.        0.812259]
action based on polilcy:[-1.          0.33769724]
action based on polilcy:[-1.          0.31110504]
action based on polilcy:[-1.         0.2051105]
action based on polilcy:[-1.          0.55484056]
action based on polilcy:[-1.         0.4962325]
action based on polilcy:[-1.          0.24129984]
action based on polilcy:[-0.99999994  0.09271925]
Total T: 153587 Episode Num: 11592 Episode T: 8 Reward: -815.733644
action based on polilcy:[-1.         0.7980088]
action based on polilcy:[-1.          0.33470216]
action based on polilcy:[-1.         0.3202608]
action based on polilcy:[-1.          0.52976644]
action based on polilcy:[-1.          0.16583657]
action based on polilcy:[-1.          0.29728848]
action based on polilcy:[-1.          0.10270676]
action based on polilcy:[-1.          0.24910036]
action based on polilcy:[-1.          0.20493631]
action based on polilcy:[-0.99999994  0.1416821 ]
action based on polilcy:[-1.          0.1593

action based on polilcy:[-1.          0.82954615]
action based on polilcy:[-1.          0.33696195]
action based on polilcy:[-1.          0.32088187]
action based on polilcy:[-1.          0.77503884]
action based on polilcy:[-1.        0.253563]
action based on polilcy:[-1.          0.60304314]
action based on polilcy:[-1.          0.62156117]
action based on polilcy:[-1.         0.5145787]
action based on polilcy:[-0.9999999   0.34476802]
action based on polilcy:[-0.99999994  0.36101392]
action based on polilcy:[-0.9999996   0.30948013]
action based on polilcy:[-0.9999997   0.20589161]
action based on polilcy:[-0.99999964  0.14189701]
action based on polilcy:[-0.9999998   0.14533485]
action based on polilcy:[-1.          0.37237564]
action based on polilcy:[-1.        0.356104]
action based on polilcy:[-0.9999981   0.19412164]
action based on polilcy:[-1.         0.0772163]
Total T: 153759 Episode Num: 11605 Episode T: 18 Reward: -1374.748316
action based on polilcy:[-1.        0.8166

action based on polilcy:[-1.         0.7749001]
action based on polilcy:[-1.         0.3524978]
action based on polilcy:[-1.          0.30158398]
action based on polilcy:[-1.          0.23478186]
action based on polilcy:[-1.          0.22835469]
action based on polilcy:[-1.          0.29495913]
action based on polilcy:[-1.          0.37647018]
action based on polilcy:[-1.         0.3433152]
action based on polilcy:[-1.         0.1994644]
action based on polilcy:[-0.9999999   0.08420558]
action based on polilcy:[-1.         0.0221038]
Total T: 153924 Episode Num: 11616 Episode T: 11 Reward: -566.851129
action based on polilcy:[-1.         0.8038043]
action based on polilcy:[-1.          0.24651754]
action based on polilcy:[-1.          0.43136728]
action based on polilcy:[-1.          0.37344557]
action based on polilcy:[-1.          0.27024144]
action based on polilcy:[-1.          0.72268844]
action based on polilcy:[-1.          0.33859742]
action based on polilcy:[-1.          0.413

action based on polilcy:[-1.         0.8001023]
action based on polilcy:[-1.          0.35264787]
action based on polilcy:[-1.          0.33901235]
action based on polilcy:[-1.          0.45129803]
action based on polilcy:[-1.          0.19892575]
action based on polilcy:[-1.          0.15328497]
action based on polilcy:[-1.         0.0294792]
action based on polilcy:[-1.         -0.05977536]
Total T: 154567 Episode Num: 11642 Episode T: 8 Reward: -584.626551
action based on polilcy:[-1.          0.80370116]
action based on polilcy:[-1.          0.34870464]
action based on polilcy:[-1.         0.8479488]
action based on polilcy:[-1.         0.6482463]
action based on polilcy:[-1.          0.53893363]
action based on polilcy:[-1.         0.6547191]
action based on polilcy:[-1.          0.29527083]
action based on polilcy:[-1.          0.47860754]
action based on polilcy:[-1.         0.5897405]
action based on polilcy:[-1.          0.58372724]
action based on polilcy:[-0.9999999   0.4112

action based on polilcy:[-1.        0.789625]
action based on polilcy:[-1.          0.49558255]
action based on polilcy:[-1.          0.28731146]
action based on polilcy:[-1.         0.4721012]
action based on polilcy:[-1.          0.44354853]
action based on polilcy:[-1.          0.30287567]
action based on polilcy:[-1.          0.31499723]
action based on polilcy:[-1.          0.27229232]
action based on polilcy:[-1.         0.7169316]
action based on polilcy:[-1.         0.3555488]
action based on polilcy:[-1.          0.32607892]
action based on polilcy:[-1.          0.32887277]
action based on polilcy:[-1.         0.2515725]
action based on polilcy:[-1.          0.24156818]
action based on polilcy:[-1.          0.20404051]
Total T: 154727 Episode Num: 11653 Episode T: 15 Reward: -1114.091053
action based on polilcy:[-1.          0.79464144]
action based on polilcy:[-1.         0.7881732]
action based on polilcy:[-1.          0.27042323]
action based on polilcy:[-1.          0.2080

action based on polilcy:[-1.          0.29910606]
action based on polilcy:[-1.          0.44071844]
action based on polilcy:[-1.         0.2467253]
action based on polilcy:[-1.          0.07702868]
action based on polilcy:[-1.         -0.00834337]
action based on polilcy:[-1.         -0.11026333]
Total T: 154870 Episode Num: 11664 Episode T: 8 Reward: -514.834117
action based on polilcy:[-1.         0.7986547]
action based on polilcy:[-1.          0.33948982]
action based on polilcy:[-1.          0.38386977]
action based on polilcy:[-1.          0.21344495]
action based on polilcy:[-1.          0.33810207]
action based on polilcy:[-1.         0.4935728]
action based on polilcy:[-1.          0.36715484]
action based on polilcy:[-1.          0.15110932]
action based on polilcy:[-1.          0.41040128]
action based on polilcy:[-1.         0.3474722]
Total T: 154880 Episode Num: 11665 Episode T: 10 Reward: -772.999673
action based on polilcy:[-1.         0.8062652]
action based on polilcy

action based on polilcy:[-1.         0.8054926]
action based on polilcy:[-1.          0.63963485]
action based on polilcy:[-1.         0.8105356]
action based on polilcy:[-1.          0.33850643]
action based on polilcy:[-1.         0.5758357]
action based on polilcy:[-1.          0.33530074]
action based on polilcy:[-1.          0.45490935]
action based on polilcy:[-1.          0.31547752]
action based on polilcy:[-1.          0.42224258]
action based on polilcy:[-1.          0.30356538]
action based on polilcy:[-1.          0.44823107]
action based on polilcy:[-1.          0.23396444]
Total T: 155038 Episode Num: 11679 Episode T: 12 Reward: -1194.806707
action based on polilcy:[-1.          0.80020237]
action based on polilcy:[-1.          0.42932457]
action based on polilcy:[-1.          0.52969277]
action based on polilcy:[-1.          0.47183007]
action based on polilcy:[-1.          0.28783217]
action based on polilcy:[-1.          0.22998291]
action based on polilcy:[-1.        

action based on polilcy:[-1.         0.8086964]
action based on polilcy:[-1.          0.35482663]
action based on polilcy:[-1.          0.56940997]
action based on polilcy:[-1.          0.45302022]
action based on polilcy:[-1.          0.43604377]
action based on polilcy:[-1.          0.36167273]
action based on polilcy:[-1.          0.76829946]
action based on polilcy:[-1.          0.34759954]
action based on polilcy:[-1.         0.2833509]
action based on polilcy:[-1.          0.33201396]
action based on polilcy:[-1.         0.1310356]
action based on polilcy:[-1.          0.03741179]
Total T: 155203 Episode Num: 11690 Episode T: 12 Reward: -982.531381
action based on polilcy:[-1.          0.81344014]
action based on polilcy:[-1.          0.41205612]
action based on polilcy:[-1.          0.41592845]
action based on polilcy:[-1.          0.53944623]
action based on polilcy:[-1.          0.56548125]
action based on polilcy:[-1.         0.7461063]
action based on polilcy:[-1.         0.

action based on polilcy:[-1.         0.8218709]
action based on polilcy:[-1.         0.4480087]
action based on polilcy:[-1.          0.35756716]
action based on polilcy:[-1.          0.49974525]
action based on polilcy:[-1.          0.46768087]
action based on polilcy:[-1.          0.47125873]
action based on polilcy:[-1.         0.4641973]
action based on polilcy:[-1.          0.44808432]
action based on polilcy:[-1.          0.46677512]
action based on polilcy:[-1.          0.38513902]
action based on polilcy:[-1.          0.37228593]
action based on polilcy:[-1.          0.29063722]
action based on polilcy:[-0.99999994  0.2682996 ]
action based on polilcy:[-0.99999994  0.25543252]
action based on polilcy:[-0.99999994  0.25135353]
action based on polilcy:[-0.99999994  0.32175684]
action based on polilcy:[-0.99999994  0.2542448 ]
action based on polilcy:[-0.99999976  0.36660317]
action based on polilcy:[-1.         0.5859625]
action based on polilcy:[-0.99999905  0.42831233]
Total T:

action based on polilcy:[-1.         0.8167881]
action based on polilcy:[-1.          0.49395868]
action based on polilcy:[-1.          0.44967636]
action based on polilcy:[-1.          0.80259967]
action based on polilcy:[-1.          0.59874773]
action based on polilcy:[-1.          0.28787783]
action based on polilcy:[-1.         0.3779325]
action based on polilcy:[-1.          0.18582849]
action based on polilcy:[-1.          0.09794439]
action based on polilcy:[-1.         -0.00948603]
action based on polilcy:[-1.          0.09126911]
Total T: 155548 Episode Num: 11711 Episode T: 11 Reward: -916.837549
action based on polilcy:[-1.          0.82293105]
action based on polilcy:[-1.         0.4570243]
action based on polilcy:[-1.         0.3739769]
action based on polilcy:[-1.         0.3940203]
action based on polilcy:[-1.          0.34427816]
action based on polilcy:[-1.         0.3869237]
action based on polilcy:[-1.          0.51960677]
action based on polilcy:[-1.         0.5168

action based on polilcy:[-1.          0.81334686]
action based on polilcy:[-1.          0.37192857]
action based on polilcy:[-1.         0.3540038]
action based on polilcy:[-1.         0.4451855]
action based on polilcy:[-1.          0.43814388]
action based on polilcy:[-1.         0.5291048]
action based on polilcy:[-1.         0.6038918]
action based on polilcy:[-1.         0.7325947]
action based on polilcy:[-1.          0.31408793]
action based on polilcy:[-1.          0.23699895]
action based on polilcy:[-1.          0.26372147]
action based on polilcy:[-1.          0.30112746]
action based on polilcy:[-1.          0.24034533]
action based on polilcy:[-1.          0.24035478]
action based on polilcy:[-1.         0.2240036]
action based on polilcy:[-0.99999994  0.09439916]
action based on polilcy:[-1.          0.03679866]
Total T: 155718 Episode Num: 11723 Episode T: 17 Reward: -957.211968
action based on polilcy:[-1.          0.82165694]
action based on polilcy:[-1.          0.374

action based on polilcy:[-1.          0.81554246]
action based on polilcy:[-1.          0.59487164]
action based on polilcy:[-1.          0.29998407]
action based on polilcy:[-1.          0.37537533]
action based on polilcy:[-1.        0.306644]
action based on polilcy:[-1.          0.29369125]
action based on polilcy:[-1.          0.25644204]
action based on polilcy:[-1.          0.32171127]
action based on polilcy:[-1.         0.4852748]
action based on polilcy:[-1.          0.24345475]
Total T: 155885 Episode Num: 11734 Episode T: 10 Reward: -748.386426
action based on polilcy:[-1.          0.81591535]
action based on polilcy:[-1.          0.35471547]
action based on polilcy:[-1.          0.67052555]
action based on polilcy:[-1.          0.34069622]
action based on polilcy:[-1.         0.2880605]
action based on polilcy:[-1.          0.40832523]
action based on polilcy:[-1.          0.28416982]
action based on polilcy:[-1.          0.16035032]
action based on polilcy:[-1.          0

action based on polilcy:[-1.          0.83283293]
action based on polilcy:[-1.         0.8861029]
action based on polilcy:[-1.         0.8533659]
action based on polilcy:[-1.          0.37606338]
action based on polilcy:[-1.         0.6060228]
action based on polilcy:[-1.          0.29028094]
action based on polilcy:[-1.          0.23164833]
action based on polilcy:[-1.          0.13235244]
action based on polilcy:[-1.          0.07356268]
action based on polilcy:[-1.          0.04110387]
action based on polilcy:[-1.         -0.21181351]
Total T: 156538 Episode Num: 11763 Episode T: 11 Reward: -908.600108
action based on polilcy:[-1.         0.8374473]
action based on polilcy:[-1.         0.3851426]
action based on polilcy:[-1.          0.39659765]
action based on polilcy:[-1.          0.88159245]
action based on polilcy:[-1.          0.40390795]
action based on polilcy:[-1.          0.71911824]
action based on polilcy:[-1.          0.28657776]
action based on polilcy:[-1.          0.2

action based on polilcy:[-1.          0.84258187]
action based on polilcy:[-1.          0.40845075]
action based on polilcy:[-1.          0.35601714]
action based on polilcy:[-1.          0.49391925]
action based on polilcy:[-1.          0.27196667]
action based on polilcy:[-1.          0.15400136]
action based on polilcy:[-1.          0.17595185]
action based on polilcy:[-1.         0.2452266]
action based on polilcy:[-1.          0.21457915]
action based on polilcy:[-1.          0.18659036]
Total T: 156700 Episode Num: 11775 Episode T: 10 Reward: -589.747297
action based on polilcy:[-1.         0.8364965]
action based on polilcy:[-1.         0.4209547]
action based on polilcy:[-1.         0.7485348]
action based on polilcy:[-1.          0.49892828]
action based on polilcy:[-1.         0.5411422]
action based on polilcy:[-1.         0.5710255]
action based on polilcy:[-1.         0.7831103]
action based on polilcy:[-1.          0.39423016]
action based on polilcy:[-1.          0.43570

action based on polilcy:[-1.         0.8566916]
action based on polilcy:[-1.         0.9087349]
action based on polilcy:[-1.          0.46460557]
action based on polilcy:[-1.          0.39331698]
action based on polilcy:[-1.         0.1657998]
action based on polilcy:[-1.          0.08236731]
action based on polilcy:[-1.          0.02455368]
action based on polilcy:[-1.          0.08183539]
Total T: 156850 Episode Num: 11787 Episode T: 8 Reward: -700.481565
action based on polilcy:[-1.         0.8604442]
action based on polilcy:[-1.         0.8272784]
action based on polilcy:[-1.         0.4132201]
action based on polilcy:[-1.          0.48247257]
action based on polilcy:[-1.          0.60157394]
action based on polilcy:[-1.          0.41142073]
action based on polilcy:[-1.          0.60551935]
action based on polilcy:[-1.          0.34455335]
action based on polilcy:[-1.        0.286684]
action based on polilcy:[-1.          0.27527407]
action based on polilcy:[-1.          0.24080944

action based on polilcy:[-1.         0.8776715]
action based on polilcy:[-1.         0.8113963]
action based on polilcy:[-1.          0.42274716]
action based on polilcy:[-1.          0.93176836]
action based on polilcy:[-1.        0.576845]
action based on polilcy:[-1.         0.5910181]
action based on polilcy:[-1.          0.43778458]
action based on polilcy:[-1.          0.39270043]
action based on polilcy:[-1.         0.2691151]
action based on polilcy:[-1.          0.29064935]
action based on polilcy:[-1.          0.26134568]
action based on polilcy:[-1.          0.27556953]
action based on polilcy:[-1.         0.3567084]
action based on polilcy:[-1.          0.28997907]
action based on polilcy:[-1.         0.2299489]
action based on polilcy:[-1.          0.25660735]
Total T: 157029 Episode Num: 11798 Episode T: 16 Reward: -1005.776741
action based on polilcy:[-1.          0.87900084]
action based on polilcy:[-1.         0.9228554]
action based on polilcy:[-1.         0.4437951]


action based on polilcy:[-1.          0.88651997]
action based on polilcy:[-1.         0.7871058]
action based on polilcy:[-1.          0.46767977]
action based on polilcy:[-1.         0.7877841]
action based on polilcy:[-1.         0.9083348]
action based on polilcy:[-1.          0.67754567]
action based on polilcy:[-1.          0.44502583]
action based on polilcy:[-1.         0.3799878]
action based on polilcy:[-1.          0.33148268]
action based on polilcy:[-1.          0.33648014]
action based on polilcy:[-1.          0.23128209]
action based on polilcy:[-1.          0.22861971]
action based on polilcy:[-1.          0.42872304]
action based on polilcy:[-1.          0.30233067]
action based on polilcy:[-1.          0.19979902]
Total T: 157196 Episode Num: 11812 Episode T: 15 Reward: -946.032344
action based on polilcy:[-1.         0.8952358]
action based on polilcy:[-1.          0.92788815]
action based on polilcy:[-1.         0.6012267]
action based on polilcy:[-1.          0.467

action based on polilcy:[-1.          0.89419687]
action based on polilcy:[-1.      0.7206]
action based on polilcy:[-1.          0.48305422]
action based on polilcy:[-1.         0.7950616]
action based on polilcy:[-1.          0.39564478]
action based on polilcy:[-1.          0.30820602]
action based on polilcy:[-1.          0.34414786]
action based on polilcy:[-1.          0.18713726]
action based on polilcy:[-1.        -0.0140201]
Total T: 157355 Episode Num: 11824 Episode T: 9 Reward: -639.333369
action based on polilcy:[-1.         0.8942672]
action based on polilcy:[-1.         0.6855044]
action based on polilcy:[-1.          0.59068084]
action based on polilcy:[-1.         0.3573188]
action based on polilcy:[-1.          0.36904648]
action based on polilcy:[-1.         0.9092099]
action based on polilcy:[-1.          0.42626122]
action based on polilcy:[-1.          0.43237796]
action based on polilcy:[-1.          0.27415264]
action based on polilcy:[-1.          0.43379715]
ac

action based on polilcy:[-1.          0.91046375]
action based on polilcy:[-1.         0.9264914]
action based on polilcy:[-1.        0.371553]
action based on polilcy:[-1.         0.3336811]
action based on polilcy:[-1.         0.3572814]
action based on polilcy:[-1.        0.611374]
action based on polilcy:[-1.         0.2961013]
action based on polilcy:[-1.          0.51448685]
action based on polilcy:[-1.          0.28809497]
action based on polilcy:[-1.          0.19654289]
action based on polilcy:[-1.          0.15671887]
action based on polilcy:[-1.          0.15802406]
Total T: 157538 Episode Num: 11839 Episode T: 12 Reward: -833.396078
action based on polilcy:[-1.         0.9055628]
action based on polilcy:[-1.          0.45527032]
action based on polilcy:[-1.          0.55419946]
action based on polilcy:[-1.        0.516996]
action based on polilcy:[-1.          0.46658078]
action based on polilcy:[-1.          0.42547384]
action based on polilcy:[-1.          0.77378803]
act

action based on polilcy:[-1.         0.9099827]
action based on polilcy:[-1.          0.44423223]
action based on polilcy:[-1.          0.49853206]
action based on polilcy:[-1.          0.31309292]
action based on polilcy:[-1.          0.38132018]
action based on polilcy:[-1.         0.3876847]
action based on polilcy:[-1.          0.37747264]
action based on polilcy:[-1.         0.4734865]
action based on polilcy:[-1.          0.30484098]
action based on polilcy:[-1.          0.46515563]
action based on polilcy:[-1.          0.68784475]
action based on polilcy:[-1.          0.40565002]
action based on polilcy:[-1.         0.5261358]
action based on polilcy:[-1.         0.5456789]
Total T: 157690 Episode Num: 11851 Episode T: 14 Reward: -993.336329
action based on polilcy:[-1.          0.91029197]
action based on polilcy:[-1.          0.92540604]
action based on polilcy:[-1.         0.4507447]
action based on polilcy:[-1.         0.5308988]
action based on polilcy:[-1.         0.685775

action based on polilcy:[-1.          0.90781343]
action based on polilcy:[-1.          0.45496032]
action based on polilcy:[-1.        0.392348]
action based on polilcy:[-1.          0.36806875]
action based on polilcy:[-1.          0.46336284]
action based on polilcy:[-1.          0.26644376]
action based on polilcy:[-1.          0.33423918]
action based on polilcy:[-1.          0.31342378]
action based on polilcy:[-1.          0.24927883]
action based on polilcy:[-1.         0.2074418]
action based on polilcy:[-1.         0.1479177]
Total T: 157859 Episode Num: 11865 Episode T: 11 Reward: -542.509989
action based on polilcy:[-1.         0.9042767]
action based on polilcy:[-1.          0.45627892]
action based on polilcy:[-1.        0.713997]
action based on polilcy:[-1.         0.6577135]
action based on polilcy:[-1.         0.4962324]
action based on polilcy:[-1.          0.41526407]
action based on polilcy:[-1.         0.3408012]
action based on polilcy:[-1.         0.2809746]
act

---------------------------------------
Episode_num: 11876, Evaluation over 1 episodes: -379.385618
---------------------------------------
Total T: 158000 Episode Num: 11876 Episode T: 12 Reward: -379.385618
Total T: 158033 Episode Num: 11877 Episode T: 33 Reward: -5178.949419
Total T: 158078 Episode Num: 11878 Episode T: 45 Reward: -7224.856063
Total T: 158090 Episode Num: 11879 Episode T: 12 Reward: -1716.634745
Total T: 158136 Episode Num: 11880 Episode T: 46 Reward: -7096.739282
Total T: 158184 Episode Num: 11881 Episode T: 48 Reward: -7177.268361
Total T: 158194 Episode Num: 11882 Episode T: 10 Reward: -1085.333902
Total T: 158241 Episode Num: 11883 Episode T: 47 Reward: -5621.850776
Total T: 158286 Episode Num: 11884 Episode T: 45 Reward: -7544.060818
Total T: 158312 Episode Num: 11885 Episode T: 26 Reward: -3533.521950
Total T: 158335 Episode Num: 11886 Episode T: 23 Reward: -4210.175960
Total T: 158365 Episode Num: 11887 Episode T: 30 Reward: -4191.544552
Total T: 158386 Episo

action based on polilcy:[-1.          0.94084495]
action based on polilcy:[-1.         0.8806125]
action based on polilcy:[-1.         0.4898185]
action based on polilcy:[-1.         0.5785755]
action based on polilcy:[-1.          0.23515199]
action based on polilcy:[-1.          0.16182745]
action based on polilcy:[-1.          0.18953195]
action based on polilcy:[-1.          0.12093633]
Total T: 158657 Episode Num: 11903 Episode T: 8 Reward: -726.808137
action based on polilcy:[-1.         0.9410131]
action based on polilcy:[-1.          0.87491775]
action based on polilcy:[-1.         0.5285717]
action based on polilcy:[-1.         0.9096139]
action based on polilcy:[-1.         0.4175986]
action based on polilcy:[-1.          0.43028107]
action based on polilcy:[-1.          0.79883283]
action based on polilcy:[-1.         0.5576383]
action based on polilcy:[-1.        0.632617]
action based on polilcy:[-1.          0.37011135]
action based on polilcy:[-1.          0.24643466]
ac

action based on polilcy:[-1.         0.9390558]
action based on polilcy:[-1.        0.931499]
action based on polilcy:[-1.          0.77415246]
action based on polilcy:[-1.         0.3012586]
action based on polilcy:[-1.         0.6928614]
action based on polilcy:[-1.          0.38071546]
action based on polilcy:[-1.         0.3602924]
action based on polilcy:[-1.          0.33037597]
action based on polilcy:[-1.          0.35013583]
action based on polilcy:[-1.          0.43886396]
action based on polilcy:[-1.          0.42649603]
action based on polilcy:[-1.          0.34096068]
Total T: 158821 Episode Num: 11914 Episode T: 12 Reward: -990.100467
action based on polilcy:[-1.        0.935266]
action based on polilcy:[-1.         0.4604173]
action based on polilcy:[-1.          0.59909236]
action based on polilcy:[-1.          0.43665102]
action based on polilcy:[-1.         0.5565525]
action based on polilcy:[-1.          0.40417433]
action based on polilcy:[-1.         0.3524084]
act

action based on polilcy:[-1.         0.9245865]
action based on polilcy:[-1.          0.44533512]
action based on polilcy:[-1.          0.40420073]
action based on polilcy:[-1.         0.3583173]
action based on polilcy:[-1.          0.33582819]
action based on polilcy:[-1.         0.3853538]
action based on polilcy:[-1.         0.3482566]
action based on polilcy:[-1.          0.39493755]
action based on polilcy:[-1.         0.2970637]
action based on polilcy:[-1.          0.17525475]
action based on polilcy:[-1.          0.35282764]
action based on polilcy:[-1.         0.2919336]
Total T: 158993 Episode Num: 11927 Episode T: 12 Reward: -581.857840
action based on polilcy:[-1.         0.9166226]
action based on polilcy:[-1.         0.9549195]
action based on polilcy:[-1.         0.4118528]
action based on polilcy:[-1.          0.34043303]
action based on polilcy:[-1.          0.44569626]
action based on polilcy:[-1.          0.26507384]
action based on polilcy:[-1.          0.16722034]

action based on polilcy:[-1.        0.923377]
action based on polilcy:[-1.         0.4793811]
action based on polilcy:[-1.         0.5634353]
action based on polilcy:[-1.          0.51826334]
action based on polilcy:[-1.          0.83988136]
action based on polilcy:[-1.         0.8903022]
action based on polilcy:[-1.         0.4584598]
action based on polilcy:[-1.          0.43829814]
action based on polilcy:[-1.          0.39833897]
action based on polilcy:[-1.          0.32251766]
action based on polilcy:[-1.          0.35470718]
action based on polilcy:[-1.          0.26096445]
action based on polilcy:[-1.          0.18776731]
action based on polilcy:[-1.          0.35144264]
action based on polilcy:[-1.          0.30085358]
action based on polilcy:[-1.          0.65036976]
action based on polilcy:[-1.          0.33709973]
action based on polilcy:[-1.          0.26687667]
Total T: 159169 Episode Num: 11942 Episode T: 18 Reward: -1166.149558
action based on polilcy:[-1.         0.921

action based on polilcy:[-1.         0.9042416]
action based on polilcy:[-1.         0.4325972]
action based on polilcy:[-1.          0.44846192]
action based on polilcy:[-1.          0.28466055]
action based on polilcy:[-1.         0.2597542]
action based on polilcy:[-1.          0.31022897]
action based on polilcy:[-1.          0.17553532]
action based on polilcy:[-1.          0.13674141]
Total T: 159320 Episode Num: 11951 Episode T: 8 Reward: -459.822708
action based on polilcy:[-1.         0.9016304]
action based on polilcy:[-1.        0.948515]
action based on polilcy:[-1.        0.540012]
action based on polilcy:[-1.        0.751048]
action based on polilcy:[-1.         0.4573138]
action based on polilcy:[-1.          0.35787818]
action based on polilcy:[-1.          0.31771386]
action based on polilcy:[-1.         0.2698913]
action based on polilcy:[-1.          0.35639787]
action based on polilcy:[-1.        0.283786]
action based on polilcy:[-1.         0.3570818]
action based

action based on polilcy:[-1.         0.5129299]
action based on polilcy:[-1.         0.2523487]
action based on polilcy:[-1.          0.54280114]
action based on polilcy:[-1.          0.41700456]
Total T: 159502 Episode Num: 11964 Episode T: 18 Reward: -1314.894945
action based on polilcy:[-1.         0.8780835]
action based on polilcy:[-1.          0.37865162]
action based on polilcy:[-1.          0.28560525]
action based on polilcy:[-1.          0.61706936]
action based on polilcy:[-1.          0.46243978]
action based on polilcy:[-1.          0.23271419]
action based on polilcy:[-1.          0.18438892]
action based on polilcy:[-1.          0.13155226]
action based on polilcy:[-1.         0.1216115]
action based on polilcy:[-1.          0.18094736]
Total T: 159512 Episode Num: 11965 Episode T: 10 Reward: -750.781215
action based on polilcy:[-1.         0.8738116]
action based on polilcy:[-1.         0.9177342]
action based on polilcy:[-1.          0.41787615]
action based on polilcy

action based on polilcy:[-1.          0.85264057]
action based on polilcy:[-1.         0.4033725]
action based on polilcy:[-1.          0.40791735]
action based on polilcy:[-1.          0.36974502]
action based on polilcy:[-1.         0.8996377]
action based on polilcy:[-1.          0.43971837]
action based on polilcy:[-1.          0.34424207]
action based on polilcy:[-1.          0.26320907]
action based on polilcy:[-1.         0.2585939]
action based on polilcy:[-1.         0.3819971]
action based on polilcy:[-1.          0.14485936]
action based on polilcy:[-1.          0.08456479]
action based on polilcy:[-1.          0.13839695]
Total T: 159665 Episode Num: 11974 Episode T: 13 Reward: -839.046714
action based on polilcy:[-1.          0.85033387]
action based on polilcy:[-1.          0.41774282]
action based on polilcy:[-1.        0.561122]
action based on polilcy:[-1.         0.4704684]
action based on polilcy:[-1.          0.27255648]
action based on polilcy:[-1.          0.21458

action based on polilcy:[-1.         0.8184656]
action based on polilcy:[-1.         0.8756479]
action based on polilcy:[-1.        0.406776]
action based on polilcy:[-1.          0.31207603]
action based on polilcy:[-1.          0.29464903]
action based on polilcy:[-1.         0.6572411]
action based on polilcy:[-1.         0.4852423]
action based on polilcy:[-1.          0.17316987]
action based on polilcy:[-1.         0.1188977]
action based on polilcy:[-1.          0.20648803]
action based on polilcy:[-1.          0.09693533]
Total T: 159855 Episode Num: 11987 Episode T: 11 Reward: -1205.229136
action based on polilcy:[-1.         0.8129686]
action based on polilcy:[-1.          0.38366812]
action based on polilcy:[-1.          0.52873564]
action based on polilcy:[-1.          0.37712702]
action based on polilcy:[-1.          0.34073222]
action based on polilcy:[-1.         0.5889801]
action based on polilcy:[-1.         0.7677067]
action based on polilcy:[-1.         0.6969719]
ac

Total T: 160059 Episode Num: 11998 Episode T: 12 Reward: -1459.783388
Total T: 160111 Episode Num: 11999 Episode T: 52 Reward: -7130.854856
Total T: 160122 Episode Num: 12000 Episode T: 11 Reward: -1619.884331
Total T: 160138 Episode Num: 12001 Episode T: 16 Reward: -2222.071219
Total T: 160180 Episode Num: 12002 Episode T: 42 Reward: -7429.719836
Total T: 160199 Episode Num: 12003 Episode T: 19 Reward: -2969.731910
Total T: 160215 Episode Num: 12004 Episode T: 16 Reward: -2171.102471
Total T: 160228 Episode Num: 12005 Episode T: 13 Reward: -1885.276671
Total T: 160240 Episode Num: 12006 Episode T: 12 Reward: -2011.262456
Total T: 160262 Episode Num: 12007 Episode T: 22 Reward: -3178.595071
Total T: 160306 Episode Num: 12008 Episode T: 44 Reward: -7504.919140
Total T: 160336 Episode Num: 12009 Episode T: 30 Reward: -3576.376642
Total T: 160385 Episode Num: 12010 Episode T: 49 Reward: -7135.815220
Total T: 160417 Episode Num: 12011 Episode T: 32 Reward: -4346.256981
Total T: 160440 Epis

action based on polilcy:[-1.          0.63406694]
action based on polilcy:[-1.         0.5088035]
action based on polilcy:[-1.          0.32044026]
action based on polilcy:[-1.          0.42214528]
action based on polilcy:[-1.          0.39704117]
action based on polilcy:[-1.          0.45413858]
action based on polilcy:[-1.         0.2708889]
action based on polilcy:[-1.          0.20463844]
action based on polilcy:[-1.          0.16938233]
action based on polilcy:[-1.          0.20691633]
action based on polilcy:[-1.          0.20675598]
action based on polilcy:[-1.          0.20325212]
action based on polilcy:[-1.         0.1824822]
action based on polilcy:[-1.         0.2503943]
action based on polilcy:[-1.          0.17443913]
action based on polilcy:[-0.99999994  0.13078825]
action based on polilcy:[-0.99999994  0.06759458]
action based on polilcy:[-1.          0.07246136]
action based on polilcy:[-1.          0.14409363]
Total T: 160685 Episode Num: 12023 Episode T: 19 Reward: -

action based on polilcy:[-1.          0.61712205]
action based on polilcy:[-1.          0.58147943]
action based on polilcy:[-1.         0.3098739]
action based on polilcy:[-0.99999994  0.26458126]
action based on polilcy:[-1.          0.22792198]
action based on polilcy:[-1.          0.25804856]
action based on polilcy:[-1.          0.32562914]
action based on polilcy:[-1.          0.36031035]
action based on polilcy:[-1.          0.46114537]
action based on polilcy:[-1.          0.45279032]
action based on polilcy:[0.9995832 0.992531 ]
action based on polilcy:[-0.9999695  0.8061645]
action based on polilcy:[0.9999979  0.99970907]
action based on polilcy:[-0.99999994  0.36164537]
action based on polilcy:[0.9975388 0.9486892]
action based on polilcy:[0.87640357 0.6224095 ]
action based on polilcy:[-0.99998766  0.1623176 ]
action based on polilcy:[0.98295826 0.65437794]
action based on polilcy:[-0.9999996   0.30780673]
action based on polilcy:[-0.9994829   0.19965862]
action based on po

action based on polilcy:[-1.         0.6446796]
action based on polilcy:[-1.         0.3525997]
action based on polilcy:[-1.         0.5844579]
action based on polilcy:[-1.          0.51995194]
action based on polilcy:[-1.          0.59598666]
action based on polilcy:[-1.          0.33488366]
action based on polilcy:[-1.          0.37650853]
action based on polilcy:[-1.          0.30883598]
action based on polilcy:[-0.9999987   0.19423175]
action based on polilcy:[-0.99999535  0.1648251 ]
action based on polilcy:[-0.9999904   0.09964769]
action based on polilcy:[-0.99997985  0.01506417]
action based on polilcy:[-0.99999917  0.11930918]
action based on polilcy:[-0.9999999   0.14162305]
action based on polilcy:[-0.99999934  0.17147538]
Total T: 161024 Episode Num: 12045 Episode T: 15 Reward: -1102.181409
action based on polilcy:[-1.         0.6411761]
action based on polilcy:[-1.          0.71721566]
action based on polilcy:[-0.99999994  0.29931968]
action based on polilcy:[-0.9999999  0

action based on polilcy:[-1.          0.65206003]
action based on polilcy:[-1.         0.5810259]
action based on polilcy:[-1.        0.278906]
action based on polilcy:[-1.         0.3023261]
action based on polilcy:[-1.         0.3579098]
action based on polilcy:[-1.         0.5838096]
action based on polilcy:[-0.99999994  0.3089372 ]
action based on polilcy:[-1.          0.43638587]
action based on polilcy:[-1.         0.3014311]
action based on polilcy:[-0.99999994  0.30838814]
action based on polilcy:[-0.99999976  0.26209277]
action based on polilcy:[-0.99999994  0.26496023]
action based on polilcy:[-0.9999993   0.19906162]
action based on polilcy:[-0.9999998   0.23908722]
action based on polilcy:[-0.9999985   0.21232907]
action based on polilcy:[-0.99999994  0.2520379 ]
action based on polilcy:[-0.99999565  0.23231348]
Total T: 161216 Episode Num: 12056 Episode T: 17 Reward: -1458.871159
action based on polilcy:[-1.          0.64832926]
action based on polilcy:[-1.         0.35334

action based on polilcy:[-1.       0.63265]
action based on polilcy:[-1.          0.36543366]
action based on polilcy:[-1.          0.41082227]
action based on polilcy:[-1.          0.33037272]
action based on polilcy:[-1.         0.6156963]
action based on polilcy:[-1.        0.554151]
action based on polilcy:[-1.          0.35611898]
action based on polilcy:[-1.         0.2843527]
action based on polilcy:[-1.        0.404182]
action based on polilcy:[-1.          0.26710504]
action based on polilcy:[-0.9999994   0.22995333]
action based on polilcy:[-0.9999972   0.16049542]
action based on polilcy:[-0.9999954  0.0989778]
action based on polilcy:[-0.9999997  0.1254595]
Total T: 161382 Episode Num: 12069 Episode T: 14 Reward: -1149.259980
action based on polilcy:[-1.         0.6411785]
action based on polilcy:[-1.          0.33953652]
action based on polilcy:[-1.         0.3938951]
action based on polilcy:[-1.         0.6220592]
action based on polilcy:[-1.        0.414823]
action based

action based on polilcy:[-1.         0.6463151]
action based on polilcy:[-1.          0.39306545]
action based on polilcy:[-1.          0.41174477]
action based on polilcy:[-1.          0.26909098]
action based on polilcy:[-1.          0.30595002]
action based on polilcy:[-1.         0.5674667]
action based on polilcy:[-1.          0.32133853]
action based on polilcy:[-0.9999996   0.18683322]
action based on polilcy:[-0.99999976  0.14365457]
action based on polilcy:[-0.9999995   0.11699972]
action based on polilcy:[-0.99999285  0.06420311]
action based on polilcy:[-0.99999976  0.13949673]
Total T: 161559 Episode Num: 12078 Episode T: 12 Reward: -923.672122
action based on polilcy:[-1.         0.6538823]
action based on polilcy:[-1.          0.36525026]
action based on polilcy:[-1.         0.6548698]
action based on polilcy:[-0.99999994  0.25910977]
action based on polilcy:[-1.          0.37465405]
action based on polilcy:[-0.99999994  0.26263738]
action based on polilcy:[-0.9999974   0

action based on polilcy:[-1.          0.62405384]
action based on polilcy:[-1.          0.36118442]
action based on polilcy:[-1.          0.24337901]
action based on polilcy:[-1.          0.24279019]
action based on polilcy:[-1.          0.23810484]
action based on polilcy:[-1.         0.2035397]
action based on polilcy:[-0.99999976  0.13904089]
action based on polilcy:[-0.9999994   0.06369928]
action based on polilcy:[-0.9999807   0.10533985]
action based on polilcy:[-0.99999696  0.05320201]
action based on polilcy:[-0.9999893   0.05665138]
Total T: 161736 Episode Num: 12088 Episode T: 11 Reward: -521.544353
action based on polilcy:[-1.         0.6285584]
action based on polilcy:[-1.         0.6674588]
action based on polilcy:[-1.         0.4217802]
action based on polilcy:[-1.          0.41215417]
action based on polilcy:[-1.          0.27448276]
action based on polilcy:[-1.         0.2133629]
action based on polilcy:[-1.         0.3391799]
action based on polilcy:[-0.9999996   0.177

action based on polilcy:[-1.          0.63635755]
action based on polilcy:[-1.         0.3267205]
action based on polilcy:[-1.         0.3304503]
action based on polilcy:[-1.          0.39727965]
action based on polilcy:[-1.          0.48001617]
action based on polilcy:[-1.         0.2698711]
action based on polilcy:[-0.99999994  0.29003075]
action based on polilcy:[-0.99999976  0.1208831 ]
action based on polilcy:[-0.9999999   0.20032017]
action based on polilcy:[-0.99999934  0.07503274]
action based on polilcy:[-1.          0.12209215]
action based on polilcy:[-1.          0.24284317]
action based on polilcy:[-0.99999994  0.20798422]
action based on polilcy:[-0.9999993   0.13812003]
Total T: 161916 Episode Num: 12100 Episode T: 14 Reward: -868.001449
action based on polilcy:[-1.          0.62899244]
action based on polilcy:[-1.          0.33556718]
action based on polilcy:[-1.         0.2844939]
action based on polilcy:[-1.          0.22269672]
action based on polilcy:[-0.9999999   0

action based on polilcy:[-1.          0.63825595]
action based on polilcy:[-1.         0.3196555]
action based on polilcy:[-1.          0.26491117]
action based on polilcy:[-1.          0.22353053]
action based on polilcy:[-1.         0.2645281]
action based on polilcy:[-0.99999994  0.168497  ]
action based on polilcy:[-0.99999976  0.14533375]
action based on polilcy:[-0.99999994  0.09286013]
action based on polilcy:[-1.          0.10949119]
action based on polilcy:[-1.          0.15111506]
action based on polilcy:[-0.99999994  0.2251668 ]
Total T: 162549 Episode Num: 12125 Episode T: 11 Reward: -462.604824
action based on polilcy:[-1.         0.6343305]
action based on polilcy:[-1.          0.31006068]
action based on polilcy:[-1.          0.29278433]
action based on polilcy:[-1.          0.47799522]
action based on polilcy:[-1.         0.4569453]
action based on polilcy:[-1.         0.3362046]
action based on polilcy:[-1.         0.2330405]
action based on polilcy:[-1.          0.235

action based on polilcy:[-1.          0.63795304]
action based on polilcy:[-1.          0.31514186]
action based on polilcy:[-1.         0.3680283]
action based on polilcy:[-1.          0.18297057]
action based on polilcy:[-0.9999994   0.21881515]
action based on polilcy:[-1.         0.1496302]
action based on polilcy:[-0.9999997   0.17015222]
action based on polilcy:[-0.9999997   0.14246339]
Total T: 162703 Episode Num: 12136 Episode T: 8 Reward: -559.362930
action based on polilcy:[-1.         0.6377652]
action based on polilcy:[-1.          0.35773534]
action based on polilcy:[-1.          0.33205855]
action based on polilcy:[-1.         0.4449265]
action based on polilcy:[-1.          0.24402799]
action based on polilcy:[-1.          0.23385039]
action based on polilcy:[-1.          0.25928292]
action based on polilcy:[-0.99999994  0.25075024]
action based on polilcy:[-1.          0.29825017]
action based on polilcy:[-1.          0.32209137]
action based on polilcy:[-1.         0.2

action based on polilcy:[-0.9999995   0.05236931]
Total T: 162862 Episode Num: 12148 Episode T: 9 Reward: -641.495893
action based on polilcy:[-1.          0.64033604]
action based on polilcy:[-1.         0.3982273]
action based on polilcy:[-1.          0.34543726]
action based on polilcy:[-1.          0.33197188]
action based on polilcy:[-1.          0.33477902]
action based on polilcy:[-1.          0.30277646]
action based on polilcy:[-1.          0.31833494]
action based on polilcy:[-1.          0.30634442]
action based on polilcy:[-1.          0.40447825]
action based on polilcy:[-1.         0.3107075]
action based on polilcy:[-1.         0.5949941]
action based on polilcy:[-1.         0.5158309]
action based on polilcy:[-0.99999964  0.28210914]
action based on polilcy:[-0.9999998   0.23586465]
action based on polilcy:[-1.          0.35599908]
action based on polilcy:[-0.9999999   0.24194735]
action based on polilcy:[-0.99999994  0.29871395]
action based on polilcy:[-0.99999964  0.

action based on polilcy:[-1.         0.6515894]
action based on polilcy:[-1.         0.3058553]
action based on polilcy:[-1.          0.35784152]
action based on polilcy:[-1.          0.31654432]
action based on polilcy:[-1.         0.5692036]
action based on polilcy:[-1.         0.3018013]
action based on polilcy:[-1.         0.2993764]
action based on polilcy:[-0.99999994  0.29805192]
action based on polilcy:[-1.         0.2942011]
action based on polilcy:[-0.99999994  0.31947204]
action based on polilcy:[-0.9999998   0.22951555]
action based on polilcy:[-0.9999996  0.1769217]
action based on polilcy:[-0.9999993   0.14188428]
action based on polilcy:[-0.9999962   0.11805672]
action based on polilcy:[-0.9999871   0.02251007]
action based on polilcy:[-0.9999998  0.108926 ]
action based on polilcy:[-1.          0.13901015]
action based on polilcy:[-0.9999928   0.08877914]
Total T: 163065 Episode Num: 12159 Episode T: 18 Reward: -836.927937
action based on polilcy:[-1.          0.6461013

action based on polilcy:[-1.         0.6652112]
action based on polilcy:[-1.          0.61252856]
action based on polilcy:[-1.         0.3859259]
action based on polilcy:[-1.          0.27652463]
action based on polilcy:[-1.          0.53986335]
action based on polilcy:[-1.          0.68725455]
action based on polilcy:[-1.          0.32498163]
action based on polilcy:[-0.99999976  0.2161854 ]
action based on polilcy:[-0.99999976  0.16416049]
action based on polilcy:[-1.          0.20660572]
action based on polilcy:[-1.          0.23846875]
action based on polilcy:[-1.         0.2425811]
action based on polilcy:[-0.9999995   0.20136745]
action based on polilcy:[-1.         0.3110564]
action based on polilcy:[-0.9999982   0.16428934]
action based on polilcy:[-1.          0.19461675]
Total T: 163219 Episode Num: 12169 Episode T: 16 Reward: -1186.473143
action based on polilcy:[-1.          0.65947795]
action based on polilcy:[-1.          0.33881918]
action based on polilcy:[-1.         0

action based on polilcy:[-1.         0.6601828]
action based on polilcy:[-1.         0.6278926]
action based on polilcy:[-1.         0.3240977]
action based on polilcy:[-1.         0.3864601]
action based on polilcy:[-1.          0.22288716]
action based on polilcy:[-1.          0.20647891]
action based on polilcy:[-1.         0.2469155]
action based on polilcy:[-0.99999994  0.1648819 ]
action based on polilcy:[-0.99999994  0.14040785]
action based on polilcy:[-0.9999991   0.05338136]
Total T: 163367 Episode Num: 12180 Episode T: 10 Reward: -745.231702
action based on polilcy:[-1.         0.6708185]
action based on polilcy:[-1.         0.3325806]
action based on polilcy:[-1.          0.30718374]
action based on polilcy:[-1.          0.25794187]
action based on polilcy:[-1.          0.38181385]
action based on polilcy:[-1.         0.4215126]
action based on polilcy:[-1.          0.27697092]
action based on polilcy:[-1.          0.28945154]
action based on polilcy:[-0.9999999   0.2221280

action based on polilcy:[-1.          0.68644667]
action based on polilcy:[-1.        0.287844]
action based on polilcy:[-1.         0.2952058]
action based on polilcy:[-1.          0.35783508]
action based on polilcy:[-1.          0.34845233]
action based on polilcy:[-1.          0.37762013]
action based on polilcy:[-1.         0.5087142]
action based on polilcy:[-1.          0.47953975]
action based on polilcy:[-0.99999994  0.33759364]
action based on polilcy:[-0.99999994  0.29687646]
action based on polilcy:[-1.          0.39556578]
action based on polilcy:[-1.          0.39350307]
action based on polilcy:[-0.9999999   0.30280504]
action based on polilcy:[-0.9999995   0.24533589]
action based on polilcy:[-0.9999999   0.32749507]
action based on polilcy:[-0.99999994  0.24074382]
action based on polilcy:[-0.99999964  0.23028117]
action based on polilcy:[-0.9999999   0.24354641]
action based on polilcy:[-0.9999997   0.23797686]
action based on polilcy:[-0.9999937   0.20477262]
action b

action based on polilcy:[-1.         0.6803543]
action based on polilcy:[-1.          0.35777557]
action based on polilcy:[-1.          0.32373267]
action based on polilcy:[-1.         0.2659312]
action based on polilcy:[-0.99999994  0.20938538]
action based on polilcy:[-0.99999994  0.18893255]
action based on polilcy:[-0.99999994  0.13204423]
action based on polilcy:[-0.9999935   0.00382599]
action based on polilcy:[-1.          0.10674353]
action based on polilcy:[-1.          0.11195463]
Total T: 163697 Episode Num: 12201 Episode T: 10 Reward: -583.168853
action based on polilcy:[-1.         0.6726204]
action based on polilcy:[-1.         0.7586901]
action based on polilcy:[-1.          0.35914543]
action based on polilcy:[-1.          0.34960112]
action based on polilcy:[-1.          0.37721616]
action based on polilcy:[-1.          0.33487946]
action based on polilcy:[-1.          0.32402623]
action based on polilcy:[-1.          0.29984316]
action based on polilcy:[-1.          0

action based on polilcy:[-1.         0.6815281]
action based on polilcy:[-1.          0.30519515]
action based on polilcy:[-1.         0.8467714]
action based on polilcy:[-1.         0.3788326]
action based on polilcy:[-1.          0.27498886]
action based on polilcy:[-0.9999999  0.1947976]
action based on polilcy:[-1.          0.24028826]
action based on polilcy:[-1.          0.26021433]
action based on polilcy:[-1.         0.2692647]
action based on polilcy:[-0.9999999   0.21396457]
action based on polilcy:[-1.          0.24310961]
action based on polilcy:[-0.9999999   0.26741108]
action based on polilcy:[-1.          0.23215605]
action based on polilcy:[-1.         0.4578517]
Total T: 163884 Episode Num: 12209 Episode T: 14 Reward: -1131.704236
action based on polilcy:[-1.         0.6641435]
action based on polilcy:[-1.         0.7246925]
action based on polilcy:[-1.          0.26341015]
action based on polilcy:[-1.          0.22276959]
action based on polilcy:[-1.          0.229096

action based on polilcy:[-1.          0.45372283]
action based on polilcy:[-1.          0.31945807]
action based on polilcy:[-1.          0.30236223]
action based on polilcy:[-1.          0.30531666]
action based on polilcy:[-1.          0.34734377]
action based on polilcy:[-0.99999994  0.23986657]
action based on polilcy:[-0.9999998  0.1796461]
action based on polilcy:[-0.9999998   0.33115968]
action based on polilcy:[-1.          0.23909205]
action based on polilcy:[-0.99999994  0.6014789 ]
action based on polilcy:[-0.9999998   0.21996593]
action based on polilcy:[-0.9999988   0.57156926]
action based on polilcy:[-0.99999994  0.24918841]
action based on polilcy:[-1.         0.2683115]
action based on polilcy:[-1.          0.33850977]
action based on polilcy:[-1.          0.37198937]
action based on polilcy:[-1.          0.47886062]
action based on polilcy:[-0.99999946  0.3728705 ]
action based on polilcy:[-0.9999999   0.42170802]
Total T: 164536 Episode Num: 12233 Episode T: 19 Rewar

action based on polilcy:[-0.9999988   0.21921162]
action based on polilcy:[-0.9999957   0.21455225]
action based on polilcy:[-0.9998405   0.17200974]
action based on polilcy:[-0.99996257  0.21702704]
action based on polilcy:[-0.9998038   0.11590742]
action based on polilcy:[-0.9999711   0.16905165]
action based on polilcy:[-0.9999901   0.18297292]
Total T: 164694 Episode Num: 12241 Episode T: 13 Reward: -669.889357
action based on polilcy:[-1.         0.4981124]
action based on polilcy:[-0.99999934  0.29572567]
action based on polilcy:[-1.          0.31983808]
action based on polilcy:[-1.          0.31529677]
action based on polilcy:[-1.         0.3137415]
action based on polilcy:[-1.          0.33564666]
action based on polilcy:[-0.9999999   0.26170954]
action based on polilcy:[-0.99999917  0.42852443]
action based on polilcy:[-0.99999976  0.3582325 ]
action based on polilcy:[-1.          0.49385646]
action based on polilcy:[-0.9999916  0.3527117]
action based on polilcy:[-0.99998665 

action based on polilcy:[-1.         0.5335102]
action based on polilcy:[-0.9983739   0.32157442]
action based on polilcy:[-0.99212706  0.29799023]
action based on polilcy:[-0.99087715  0.38910693]
action based on polilcy:[-0.9968314  0.3791651]
action based on polilcy:[-0.99558365  0.15916619]
action based on polilcy:[-0.9823717   0.44022074]
action based on polilcy:[-0.9939962   0.21913332]
action based on polilcy:[-0.99849206  0.25380495]
Total T: 164853 Episode Num: 12251 Episode T: 9 Reward: -813.603168
action based on polilcy:[-0.99999994  0.50619495]
action based on polilcy:[-0.9999987   0.26449683]
action based on polilcy:[-0.99977386  0.34806812]
action based on polilcy:[-0.9999078  0.3750386]
action based on polilcy:[-0.9944366  0.3114046]
action based on polilcy:[-0.99857974  0.33139575]
action based on polilcy:[-0.9924576   0.29460832]
action based on polilcy:[-0.9960097   0.41488138]
action based on polilcy:[-0.9996859   0.35718077]
action based on polilcy:[-0.9962284   0.

action based on polilcy:[-0.9855304  0.5791192]
action based on polilcy:[-0.71040124  0.4796324 ]
action based on polilcy:[-0.8680567  0.5539917]
action based on polilcy:[-0.69812226  0.46033478]
action based on polilcy:[-0.94379014  0.4426778 ]
action based on polilcy:[-0.99999994  0.31708747]
action based on polilcy:[-0.920112    0.46974444]
action based on polilcy:[-0.53091216  0.5538975 ]
action based on polilcy:[-0.69908214  0.5798347 ]
action based on polilcy:[-0.32660174  0.39990744]
action based on polilcy:[-0.779455    0.50491285]
action based on polilcy:[-0.5579456   0.55998707]
action based on polilcy:[-0.19629914  0.43187693]
action based on polilcy:[-0.2769415   0.47139177]
action based on polilcy:[-0.29415485  0.4638166 ]
action based on polilcy:[-0.81620693  0.48449048]
Total T: 165042 Episode Num: 12264 Episode T: 16 Reward: -1499.201803
action based on polilcy:[-0.9828413  0.5890317]
action based on polilcy:[-0.78085345  0.4632109 ]
action based on polilcy:[-0.9472799 

action based on polilcy:[-0.3807467  0.7022413]
action based on polilcy:[-0.7875091   0.50894374]
action based on polilcy:[-0.63180935  0.6138165 ]
action based on polilcy:[-0.4784447   0.42846918]
action based on polilcy:[-0.77044326  0.43784246]
action based on polilcy:[-0.6118595   0.33285275]
action based on polilcy:[-0.6558578   0.44359976]
action based on polilcy:[-0.47195753  0.48488528]
action based on polilcy:[-0.8379372  0.4995426]
action based on polilcy:[-0.81962466  0.49071392]
action based on polilcy:[-0.40185225  0.41787758]
action based on polilcy:[-0.67086554  0.4021246 ]
Total T: 165210 Episode Num: 12277 Episode T: 12 Reward: -1018.733515
action based on polilcy:[-0.31167415  0.7187377 ]
action based on polilcy:[-0.47858432  0.478247  ]
action based on polilcy:[-0.32244208  0.48265842]
action based on polilcy:[-0.44129094  0.54114896]
action based on polilcy:[-0.7863317  0.5461613]
action based on polilcy:[-0.55921423  0.31944472]
action based on polilcy:[-0.69672704

action based on polilcy:[-0.37860948  0.74126077]
action based on polilcy:[-0.5257608   0.48487416]
action based on polilcy:[-0.59829056  0.73051643]
action based on polilcy:[-0.5411036  0.7444111]
action based on polilcy:[-0.46372786  0.4834384 ]
action based on polilcy:[-0.47579855  0.45969546]
action based on polilcy:[-0.5194664  0.4669593]
action based on polilcy:[-0.39022914  0.35380208]
action based on polilcy:[-0.79859066  0.40578592]
action based on polilcy:[-0.39722937  0.44571176]
action based on polilcy:[-0.49046782  0.29995793]
Total T: 165364 Episode Num: 12290 Episode T: 11 Reward: -1068.380403
action based on polilcy:[-0.35928917  0.7373282 ]
action based on polilcy:[-0.9930629   0.33652192]
action based on polilcy:[-0.8096345   0.46502942]
action based on polilcy:[-0.6113548  0.508869 ]
action based on polilcy:[-0.9999996  0.7886328]
action based on polilcy:[-0.5810337   0.59278977]
action based on polilcy:[-0.7033067   0.55873066]
action based on polilcy:[-0.751039   0

action based on polilcy:[-0.38599652  0.7443606 ]
action based on polilcy:[-0.62910426  0.6213232 ]
action based on polilcy:[-0.45681214  0.47284228]
action based on polilcy:[-0.623767    0.53683233]
action based on polilcy:[-0.8131323  0.4374889]
action based on polilcy:[-0.45036048  0.37597075]
action based on polilcy:[-0.5250354   0.30243504]
action based on polilcy:[-0.49841544  0.30460694]
action based on polilcy:[-0.7284273   0.48967543]
action based on polilcy:[-0.3951244  0.406806 ]
Total T: 165531 Episode Num: 12305 Episode T: 10 Reward: -925.302082
action based on polilcy:[-0.3496569  0.7525344]
action based on polilcy:[-0.5908819  0.6421102]
action based on polilcy:[-0.45850387  0.5641593 ]
action based on polilcy:[-0.5825057  0.5831352]
action based on polilcy:[-0.39757994  0.41874048]
action based on polilcy:[-0.3986251   0.46375406]
action based on polilcy:[-0.7098713   0.52533484]
action based on polilcy:[-0.3469638   0.42416766]
action based on polilcy:[-0.6422727   0.5

action based on polilcy:[-0.29611504  0.7671845 ]
action based on polilcy:[-0.9999997  0.4256224]
action based on polilcy:[-0.9999523   0.35980773]
action based on polilcy:[-1.          0.36043385]
action based on polilcy:[-1.          0.21927173]
action based on polilcy:[-0.99999994  0.35831118]
action based on polilcy:[-1.          0.38640118]
action based on polilcy:[-1.          0.50296175]
action based on polilcy:[-0.99999905  0.33086535]
action based on polilcy:[-1.         0.2766918]
action based on polilcy:[-0.9838539   0.59407413]
action based on polilcy:[-0.41625902  0.38813105]
action based on polilcy:[-0.68793046  0.64051104]
action based on polilcy:[-0.804232   0.8407896]
action based on polilcy:[-0.3625004   0.35171005]
action based on polilcy:[-0.56959224  0.3762507 ]
action based on polilcy:[-0.55937743  0.49138337]
action based on polilcy:[-0.44072756  0.4840182 ]
action based on polilcy:[-0.5553111   0.56752306]
action based on polilcy:[-0.80831426  0.7960413 ]
action

action based on polilcy:[-0.39852765  0.75070804]
action based on polilcy:[-0.53101754  0.6125195 ]
action based on polilcy:[-0.5922061  0.604419 ]
action based on polilcy:[-0.4107914  0.4665764]
action based on polilcy:[-0.5952977   0.34832373]
action based on polilcy:[-0.4056672   0.38963246]
action based on polilcy:[-0.6254446   0.20333163]
action based on polilcy:[-0.68046546  0.13088857]
Total T: 165838 Episode Num: 12331 Episode T: 8 Reward: -797.861655
action based on polilcy:[-0.3896859  0.7525552]
action based on polilcy:[-0.5922718   0.67259467]
action based on polilcy:[-0.5561689  0.7544491]
action based on polilcy:[-0.5822549  0.6008874]
action based on polilcy:[-0.72638553  0.5069532 ]
action based on polilcy:[-0.43486056  0.3741881 ]
action based on polilcy:[-0.5830872   0.29948744]
action based on polilcy:[-0.65851617  0.09931177]
Total T: 165846 Episode Num: 12332 Episode T: 8 Reward: -948.109992
action based on polilcy:[-0.39069706  0.7522471 ]
action based on polilcy:

action based on polilcy:[-0.39122647  0.7592012 ]
action based on polilcy:[-0.46455112  0.6443896 ]
action based on polilcy:[-0.37488323  0.5018996 ]
action based on polilcy:[-0.46807578  0.48740026]
action based on polilcy:[-0.66577446  0.53389275]
action based on polilcy:[-0.6343149  0.6152604]
action based on polilcy:[-0.4798884   0.51395917]
action based on polilcy:[-0.6400018   0.35620412]
Total T: 165994 Episode Num: 12347 Episode T: 8 Reward: -889.575390
action based on polilcy:[-0.36388046  0.756609  ]
action based on polilcy:[-0.57617486  0.6286304 ]
action based on polilcy:[-0.35583282  0.4820695 ]
action based on polilcy:[-0.3428754   0.44800588]
action based on polilcy:[-0.6497948   0.29144457]
action based on polilcy:[-0.54740584  0.43719646]
---------------------------------------
Episode_num: 12348, Evaluation over 1 episodes: -711.343239
---------------------------------------
Total T: 166001 Episode Num: 12348 Episode T: 7 Reward: -758.878455
Total T: 166028 Episode Nu

action based on polilcy:[-0.47043777  0.77442884]
action based on polilcy:[-0.38525584  0.7201293 ]
action based on polilcy:[-0.3590798   0.56832916]
action based on polilcy:[-0.46765265  0.5891559 ]
action based on polilcy:[-0.29035977  0.48197663]
action based on polilcy:[-0.58476174  0.32292584]
action based on polilcy:[-0.6464391   0.18824512]
action based on polilcy:[-0.37653574  0.35452813]
action based on polilcy:[-0.4999826   0.23966229]
Total T: 166628 Episode Num: 12376 Episode T: 9 Reward: -514.628928
action based on polilcy:[-0.48064944  0.77267724]
action based on polilcy:[-0.39373133  0.5998457 ]
action based on polilcy:[-0.52005845  0.71386856]
action based on polilcy:[-0.4346834  0.5581366]
action based on polilcy:[-0.3739963   0.53943425]
action based on polilcy:[-0.29011416  0.49035862]
action based on polilcy:[-0.55871284  0.5554786 ]
action based on polilcy:[-0.54436827  0.55202067]
action based on polilcy:[-0.73822826  0.6033002 ]
action based on polilcy:[-0.259058

action based on polilcy:[-0.512586   0.7753606]
action based on polilcy:[-0.3451535  0.6263039]
action based on polilcy:[-0.46789497  0.7380345 ]
action based on polilcy:[-0.5741814   0.79402614]
action based on polilcy:[-0.45680037  0.6754057 ]
action based on polilcy:[-0.4323855   0.56739104]
action based on polilcy:[-0.31498277  0.55908346]
action based on polilcy:[-0.64781326  0.5346788 ]
action based on polilcy:[-0.24329655  0.47107103]
action based on polilcy:[-0.3530187   0.37540743]
action based on polilcy:[-0.44927225  0.19032158]
action based on polilcy:[-0.6041642   0.12881981]
Total T: 166786 Episode Num: 12390 Episode T: 12 Reward: -1061.901254
action based on polilcy:[-0.53158116  0.7739337 ]
action based on polilcy:[-0.56039417  0.6911243 ]
action based on polilcy:[-0.75066805  0.63050455]
action based on polilcy:[-0.34455684  0.5943995 ]
action based on polilcy:[-0.60496646  0.57735497]
action based on polilcy:[-0.34050977  0.4529183 ]
action based on polilcy:[-0.620162

action based on polilcy:[-0.50916487  0.7851405 ]
action based on polilcy:[-0.28676233  0.6324973 ]
action based on polilcy:[-0.36183587  0.6635249 ]
action based on polilcy:[-0.4170726  0.7161541]
action based on polilcy:[-0.73288226  0.67250115]
action based on polilcy:[-0.24458578  0.5075319 ]
action based on polilcy:[-0.40418327  0.38081944]
action based on polilcy:[-0.52779937  0.28523365]
action based on polilcy:[-0.6626334   0.15521845]
Total T: 166942 Episode Num: 12405 Episode T: 9 Reward: -698.745062
action based on polilcy:[-0.5117599  0.787617 ]
action based on polilcy:[-0.3673739   0.64670706]
action based on polilcy:[-0.37805378  0.6185806 ]
action based on polilcy:[-0.25893322  0.546505  ]
action based on polilcy:[-0.731355    0.44240165]
action based on polilcy:[-0.50573003  0.30727154]
action based on polilcy:[-0.62748176  0.29844442]
action based on polilcy:[-0.28433895  0.45954606]
action based on polilcy:[-0.38034865  0.379766  ]
Total T: 166951 Episode Num: 12406 E

action based on polilcy:[-0.49113807  0.79991287]
action based on polilcy:[-0.2384732   0.73803425]
action based on polilcy:[-0.47001842  0.73115224]
action based on polilcy:[-0.23432033  0.5551845 ]
action based on polilcy:[-0.70828557  0.6576458 ]
action based on polilcy:[-0.6917064  0.5515415]
action based on polilcy:[-0.5198365  0.3855932]
action based on polilcy:[-0.20098378  0.48936027]
action based on polilcy:[-0.3279801   0.56407624]
Total T: 167108 Episode Num: 12417 Episode T: 9 Reward: -1181.245146
action based on polilcy:[-0.51945543  0.7998601 ]
action based on polilcy:[-0.34190014  0.58709866]
action based on polilcy:[-0.17809263  0.86167634]
action based on polilcy:[-0.37289998  0.71032006]
action based on polilcy:[-0.36104825  0.5738553 ]
action based on polilcy:[-0.44249845  0.6600349 ]
action based on polilcy:[-0.32442755  0.5574009 ]
action based on polilcy:[-0.58750594  0.64729416]
action based on polilcy:[-0.79430944  0.5218805 ]
action based on polilcy:[-0.7778021

action based on polilcy:[-0.23689565  0.6583852 ]
action based on polilcy:[-0.4965236  0.8198511]
action based on polilcy:[-0.5587989  0.7665351]
action based on polilcy:[-0.4954176  0.734902 ]
action based on polilcy:[-0.33137655  0.5572638 ]
action based on polilcy:[-0.5051955   0.73776597]
action based on polilcy:[-0.49247882  0.7355399 ]
action based on polilcy:[-0.504473    0.71208966]
action based on polilcy:[-0.5981889   0.77166134]
action based on polilcy:[-0.37357357  0.54451466]
action based on polilcy:[-0.5089817   0.72458506]
action based on polilcy:[-0.4098862  0.5914103]
action based on polilcy:[-0.39346355  0.64406633]
action based on polilcy:[-0.42151862  0.6684722 ]
action based on polilcy:[-0.998772    0.46248454]
action based on polilcy:[-0.99997437  0.44052333]
action based on polilcy:[-0.9997126   0.46264827]
action based on polilcy:[-0.5467367   0.64266014]
action based on polilcy:[-1.          0.57913923]
action based on polilcy:[-0.66138697  0.59309864]
action b

action based on polilcy:[-0.4952792  0.7907946]
action based on polilcy:[-0.40741795  0.61878127]
action based on polilcy:[-0.4982127  0.7476456]
action based on polilcy:[-0.62267303  0.7448627 ]
action based on polilcy:[-0.3015244  0.4973584]
action based on polilcy:[-0.75200075  0.4232881 ]
action based on polilcy:[-0.5529424  0.2565676]
Total T: 167450 Episode Num: 12446 Episode T: 7 Reward: -879.861577
action based on polilcy:[-0.4905221  0.7915565]
action based on polilcy:[-0.47553402  0.6776507 ]
action based on polilcy:[-0.41226327  0.5704204 ]
action based on polilcy:[-0.2894766   0.57587945]
action based on polilcy:[-0.32460517  0.62474227]
action based on polilcy:[-0.44788578  0.8175707 ]
action based on polilcy:[-0.39039135  0.5596577 ]
action based on polilcy:[-0.50792074  0.763372  ]
action based on polilcy:[-0.61667717  0.6823633 ]
action based on polilcy:[-0.29165804  0.4821967 ]
action based on polilcy:[-0.28011078  0.5434183 ]
action based on polilcy:[-0.42252862  0.58

action based on polilcy:[-0.52661216  0.78313696]
action based on polilcy:[-0.4929189  0.6719415]
action based on polilcy:[-0.3904858   0.66481215]
action based on polilcy:[-0.3361299   0.65484726]
action based on polilcy:[-0.48031506  0.69541323]
action based on polilcy:[-0.52234834  0.63180375]
action based on polilcy:[-0.31212816  0.53162664]
action based on polilcy:[-0.29801214  0.5218389 ]
action based on polilcy:[-0.67381597  0.6124847 ]
action based on polilcy:[-0.4004173   0.49399185]
action based on polilcy:[-0.45556     0.35482118]
action based on polilcy:[-0.4662116   0.23705162]
Total T: 167614 Episode Num: 12460 Episode T: 12 Reward: -834.828575
action based on polilcy:[-0.5296834  0.7875437]
action based on polilcy:[-0.88183075  0.61083806]
action based on polilcy:[-0.9270351   0.44923732]
action based on polilcy:[-0.43557376  0.6725174 ]
action based on polilcy:[-0.39166707  0.6859025 ]
action based on polilcy:[-0.40630713  0.79781973]
action based on polilcy:[-0.3219355

action based on polilcy:[-0.33083677  0.7046978 ]
action based on polilcy:[-0.4872056  0.81216  ]
action based on polilcy:[-0.3186373   0.52475715]
action based on polilcy:[-0.63890576  0.68756026]
action based on polilcy:[-0.29148802  0.514849  ]
action based on polilcy:[-0.5348675  0.6743784]
action based on polilcy:[-0.59016347  0.5530839 ]
action based on polilcy:[-0.41315114  0.37343928]
action based on polilcy:[-0.43723685  0.34510142]
Total T: 167765 Episode Num: 12474 Episode T: 11 Reward: -1120.113658
action based on polilcy:[-0.5465038  0.7824583]
action based on polilcy:[-0.3850638   0.55226415]
action based on polilcy:[-0.43471193  0.6608232 ]
action based on polilcy:[-0.46925467  0.56709254]
action based on polilcy:[-0.6267911  0.3891816]
action based on polilcy:[-0.78970766  0.4252893 ]
action based on polilcy:[-0.29383978  0.46924758]
Total T: 167772 Episode Num: 12475 Episode T: 7 Reward: -805.933828
action based on polilcy:[-0.5733875  0.7783503]
action based on polilc

action based on polilcy:[-0.5926113  0.7670791]
action based on polilcy:[-0.39141265  0.7070381 ]
action based on polilcy:[-0.40235797  0.8218255 ]
action based on polilcy:[-0.47770298  0.58562875]
action based on polilcy:[-0.50570303  0.7167586 ]
action based on polilcy:[-0.4292875   0.54398966]
action based on polilcy:[-0.44256997  0.5649731 ]
action based on polilcy:[-0.7110491  0.5655514]
action based on polilcy:[-0.72095394  0.37275597]
action based on polilcy:[-0.40646714  0.3959146 ]
Total T: 167916 Episode Num: 12488 Episode T: 10 Reward: -1235.949684
action based on polilcy:[-0.5861895  0.7684778]
action based on polilcy:[-0.6483559  0.7593297]
action based on polilcy:[-0.4626296  0.7938437]
action based on polilcy:[-0.41891083  0.5727347 ]
action based on polilcy:[-0.31357405  0.50886065]
action based on polilcy:[-0.353946    0.45996216]
action based on polilcy:[-0.42277104  0.3868299 ]
action based on polilcy:[-0.37171233  0.42671737]
action based on polilcy:[-0.38635486  0.

action based on polilcy:[-0.72618556  0.7208615 ]
action based on polilcy:[-0.614571   0.6331086]
action based on polilcy:[-0.531986   0.5309334]
action based on polilcy:[-0.63210356  0.7972625 ]
action based on polilcy:[-0.70122313  0.6522093 ]
action based on polilcy:[-0.39255342  0.47069606]
action based on polilcy:[-0.6283163   0.24904534]
Total T: 168559 Episode Num: 12518 Episode T: 7 Reward: -898.636971
action based on polilcy:[-0.7270578  0.7156919]
action based on polilcy:[-0.5583073  0.5450642]
action based on polilcy:[-0.5184417  0.5465461]
action based on polilcy:[-0.67590797  0.63590276]
action based on polilcy:[-0.5786617   0.53894573]
action based on polilcy:[-0.4390465   0.47756496]
action based on polilcy:[-0.56337655  0.55201364]
action based on polilcy:[-0.646026   0.6682948]
action based on polilcy:[-0.63812584  0.4638481 ]
action based on polilcy:[-0.6775015   0.24015364]
action based on polilcy:[-0.8470776   0.08947618]
action based on polilcy:[-0.60285383  0.2454

action based on polilcy:[-0.7605637  0.6916275]
action based on polilcy:[-0.6051463   0.49631694]
action based on polilcy:[-0.62555075  0.5500865 ]
action based on polilcy:[-0.5373403  0.5640528]
action based on polilcy:[-0.5212488  0.6272392]
action based on polilcy:[-0.46099365  0.44028714]
action based on polilcy:[-0.6556511  0.2819714]
action based on polilcy:[-0.7425409   0.18903312]
action based on polilcy:[-0.78639865 -0.12263861]
action based on polilcy:[-0.8051129   0.09505162]
Total T: 168733 Episode Num: 12533 Episode T: 10 Reward: -588.057125
action based on polilcy:[-0.77152324  0.6898258 ]
action based on polilcy:[-0.69749755  0.69936895]
action based on polilcy:[-0.63394    0.5778128]
action based on polilcy:[-0.67464745  0.76313955]
action based on polilcy:[-0.50464576  0.41242358]
action based on polilcy:[-0.6550577   0.65064627]
action based on polilcy:[-0.572994  0.661179]
action based on polilcy:[-0.43246815  0.4988533 ]
action based on polilcy:[-0.6728935   0.66872

action based on polilcy:[-0.8285134   0.68026483]
action based on polilcy:[-0.73300064  0.5118507 ]
action based on polilcy:[-0.6075822  0.5409442]
action based on polilcy:[-0.7291471  0.7144402]
action based on polilcy:[-0.55839866  0.48062727]
action based on polilcy:[-0.8024762  0.7635326]
action based on polilcy:[-0.5933803  0.6471006]
action based on polilcy:[-0.47834137  0.49089286]
action based on polilcy:[-0.47239152  0.49762583]
action based on polilcy:[-0.67946184  0.3224027 ]
action based on polilcy:[-0.86751777  0.15999772]
action based on polilcy:[-0.8847295   0.09620459]
Total T: 168897 Episode Num: 12546 Episode T: 12 Reward: -821.676961
action based on polilcy:[-0.8348768   0.66420925]
action based on polilcy:[-0.7456019   0.71026254]
action based on polilcy:[-0.68424463  0.550848  ]
action based on polilcy:[-0.49436966  0.6175947 ]
action based on polilcy:[-0.6602372   0.36588076]
action based on polilcy:[-0.8627264   0.16268708]
action based on polilcy:[-0.9248631   0

action based on polilcy:[-0.82016647  0.6493479 ]
action based on polilcy:[-0.75437325  0.4253314 ]
action based on polilcy:[-0.63527966  0.5210532 ]
action based on polilcy:[-0.47072172  0.65066576]
action based on polilcy:[-0.6560619   0.35191426]
action based on polilcy:[-0.6973536   0.31146592]
action based on polilcy:[-0.5837547   0.35441092]
action based on polilcy:[-0.53053904  0.5721206 ]
Total T: 169038 Episode Num: 12559 Episode T: 8 Reward: -775.162139
action based on polilcy:[-0.8116189   0.65019214]
action based on polilcy:[-0.6196525   0.55792344]
action based on polilcy:[-0.8628488   0.64791703]
action based on polilcy:[-0.73029494  0.45081648]
action based on polilcy:[-0.6205581  0.5399438]
action based on polilcy:[-0.5331813   0.55579865]
action based on polilcy:[-0.76057744  0.49892932]
action based on polilcy:[-0.82344025  0.18874738]
action based on polilcy:[-0.78957355  0.1965152 ]
action based on polilcy:[-0.593686  0.351771]
action based on polilcy:[-0.4529734   

action based on polilcy:[-0.7887931   0.66045773]
action based on polilcy:[-0.7151534   0.54827416]
action based on polilcy:[-0.5739759   0.45265412]
action based on polilcy:[-0.48504263  0.67105746]
action based on polilcy:[-0.5590778   0.65454453]
action based on polilcy:[-0.36833283  0.54349387]
action based on polilcy:[-0.81147236  0.23092099]
action based on polilcy:[-0.6611352   0.32234526]
Total T: 169187 Episode Num: 12572 Episode T: 8 Reward: -647.585077
action based on polilcy:[-0.7964846  0.6547638]
action based on polilcy:[-0.8653677  0.6298015]
action based on polilcy:[-0.7190106   0.54087245]
action based on polilcy:[-0.5966008  0.4469173]
action based on polilcy:[-0.45779854  0.4460256 ]
action based on polilcy:[-0.40160993  0.46117413]
action based on polilcy:[-0.42092195  0.48574167]
action based on polilcy:[-0.498446    0.41817975]
action based on polilcy:[-0.88805616  0.13554132]
action based on polilcy:[-0.9268936   0.05556401]
Total T: 169197 Episode Num: 12573 Epi

action based on polilcy:[-0.8035314   0.63236225]
action based on polilcy:[-0.60491747  0.6225393 ]
action based on polilcy:[-0.6627701   0.44527483]
action based on polilcy:[-0.51564455  0.56821954]
action based on polilcy:[-1.          0.43296185]
action based on polilcy:[-1.        0.616225]
action based on polilcy:[-1.          0.36074635]
action based on polilcy:[-0.91454494  0.42470002]
action based on polilcy:[-0.6619286  0.5004409]
action based on polilcy:[-0.7956909  0.6188315]
action based on polilcy:[-0.38079095  0.49853048]
action based on polilcy:[-0.20342466  0.45655897]
action based on polilcy:[-0.26506317  0.351406  ]
action based on polilcy:[-0.37652305  0.8199856 ]
action based on polilcy:[-0.42510778  0.8121046 ]
action based on polilcy:[-0.55949664  0.34500438]
action based on polilcy:[-0.2148614  0.5331218]
action based on polilcy:[-0.35921556  0.47956347]
action based on polilcy:[-0.65948546  0.47073188]
action based on polilcy:[-0.2554878  0.5197097]
action based

action based on polilcy:[-0.7961864   0.64227605]
action based on polilcy:[-0.7448452   0.36641094]
action based on polilcy:[-0.58293605  0.40303347]
action based on polilcy:[-0.5725944   0.60280824]
action based on polilcy:[-0.27668878  0.5585935 ]
action based on polilcy:[-0.42437345  0.65455353]
action based on polilcy:[-0.41924682  0.58267134]
Total T: 169540 Episode Num: 12604 Episode T: 7 Reward: -787.822305
action based on polilcy:[-0.78675246  0.6461874 ]
action based on polilcy:[-0.7417465   0.37211904]
action based on polilcy:[-0.6534373   0.68685305]
action based on polilcy:[-0.5442089   0.73543274]
action based on polilcy:[-0.42411    0.4451254]
action based on polilcy:[-0.46386373  0.70364773]
action based on polilcy:[-0.3479099  0.5635979]
action based on polilcy:[-0.34662718  0.47748736]
action based on polilcy:[-0.512854    0.55335784]
action based on polilcy:[-0.43715233  0.46384385]
Total T: 169550 Episode Num: 12605 Episode T: 10 Reward: -1032.182863
action based on 

action based on polilcy:[-0.77717507  0.6572064 ]
action based on polilcy:[-1.         0.5636996]
action based on polilcy:[-0.6805904  0.5773523]
action based on polilcy:[-1.          0.32648388]
action based on polilcy:[-0.63416344  0.51323134]
action based on polilcy:[-0.72903144  0.6275928 ]
action based on polilcy:[-0.59334314  0.47345197]
action based on polilcy:[-0.4909194   0.45637146]
action based on polilcy:[-0.4168652  0.6101854]
action based on polilcy:[-0.23922369  0.5618626 ]
action based on polilcy:[-0.4681923   0.36688444]
action based on polilcy:[-0.35532224  0.6126759 ]
action based on polilcy:[-0.41194487  0.42079005]
action based on polilcy:[-0.5013695   0.32508278]
Total T: 169733 Episode Num: 12622 Episode T: 14 Reward: -1356.200364
action based on polilcy:[-0.80834377  0.6376798 ]
action based on polilcy:[-0.7278885  0.5845113]
action based on polilcy:[-0.82794607  0.7515505 ]
action based on polilcy:[-0.76827943  0.73388124]
action based on polilcy:[-0.4132994   

action based on polilcy:[-0.7706101  0.6496339]
action based on polilcy:[-0.67645335  0.7195156 ]
action based on polilcy:[-0.6591494  0.60899  ]
action based on polilcy:[-0.60678834  0.40665475]
action based on polilcy:[-0.3980911   0.41582218]
action based on polilcy:[-0.46685848  0.34856677]
action based on polilcy:[-0.4796182  0.332117 ]
action based on polilcy:[-0.20342812  0.5520157 ]
action based on polilcy:[-0.19536386  0.5764693 ]
action based on polilcy:[-0.35798985  0.42236194]
action based on polilcy:[-0.21743406  0.5896603 ]
Total T: 169898 Episode Num: 12636 Episode T: 11 Reward: -737.837061
action based on polilcy:[-0.7814302   0.63708854]
action based on polilcy:[-0.67698586  0.56195104]
action based on polilcy:[-0.8203565  0.7560371]
action based on polilcy:[-0.838057   0.6288551]
action based on polilcy:[-0.7008408  0.673602 ]
action based on polilcy:[-0.4648287   0.42224827]
action based on polilcy:[-0.41069984  0.4547506 ]
action based on polilcy:[-0.43728742  0.385

action based on polilcy:[-0.7463503  0.6331141]
action based on polilcy:[-0.99579424  0.54531074]
action based on polilcy:[-0.6203958   0.49641657]
action based on polilcy:[-0.70999575  0.58926904]
action based on polilcy:[-0.8501468  0.6553632]
action based on polilcy:[-0.53842455  0.52154267]
action based on polilcy:[-0.3828382   0.45308107]
action based on polilcy:[-0.4026063   0.38279897]
action based on polilcy:[-0.80902815  0.10498262]
action based on polilcy:[-0.84652185  0.02642026]
action based on polilcy:[-0.4719382   0.21512088]
action based on polilcy:[-0.3303648   0.42032877]
Total T: 170548 Episode Num: 12666 Episode T: 12 Reward: -963.637112
action based on polilcy:[-0.71317405  0.6402775 ]
action based on polilcy:[-0.72992206  0.38333583]
action based on polilcy:[-0.8142142  0.7453346]
action based on polilcy:[-0.2617124   0.68296826]
action based on polilcy:[-0.26695126  0.524362  ]
action based on polilcy:[-0.29377028  0.61455595]
action based on polilcy:[-0.44368663 

action based on polilcy:[-0.42109463  0.47519612]
action based on polilcy:[-0.57867885  0.61777866]
action based on polilcy:[-0.74310523  0.71216524]
action based on polilcy:[-0.562881   0.5486752]
action based on polilcy:[-0.8425748   0.77103543]
action based on polilcy:[-0.6021272   0.71020734]
action based on polilcy:[-0.550958    0.34201458]
Total T: 170692 Episode Num: 12679 Episode T: 11 Reward: -1247.477586
action based on polilcy:[-0.74348927  0.63502216]
action based on polilcy:[-0.5930525  0.5049271]
action based on polilcy:[-0.67817795  0.60801804]
action based on polilcy:[-0.51599896  0.42229727]
action based on polilcy:[-0.53680956  0.50644726]
action based on polilcy:[-0.6805554   0.73363763]
action based on polilcy:[-0.6927018   0.75531155]
action based on polilcy:[-0.291862   0.5303774]
action based on polilcy:[-0.35011584  0.6680721 ]
action based on polilcy:[-0.33848998  0.47343817]
action based on polilcy:[-0.70674074  0.7530796 ]
action based on polilcy:[-0.48415238

action based on polilcy:[-0.75054234  0.63411325]
action based on polilcy:[-0.8827481  0.6013222]
action based on polilcy:[-0.64238787  0.38839248]
action based on polilcy:[-0.39367288  0.44524062]
action based on polilcy:[-0.46005684  0.37182304]
action based on polilcy:[-0.75015247  0.11496494]
action based on polilcy:[-0.8999281  0.0169241]
Total T: 170869 Episode Num: 12692 Episode T: 7 Reward: -568.439517
action based on polilcy:[-0.75106424  0.6278415 ]
action based on polilcy:[-0.7475412  0.6254882]
action based on polilcy:[-0.6359651   0.38156646]
action based on polilcy:[-0.44876754  0.6355957 ]
action based on polilcy:[-0.3485966   0.41501275]
action based on polilcy:[-0.48081663  0.52641326]
action based on polilcy:[-0.8163301   0.12389524]
action based on polilcy:[-0.43476197  0.37414762]
Total T: 170877 Episode Num: 12693 Episode T: 8 Reward: -812.574653
action based on polilcy:[-0.7105075  0.6467583]
action based on polilcy:[-0.6366582  0.4730469]
action based on polilcy:

action based on polilcy:[-0.74522674  0.62951636]
action based on polilcy:[-0.7471515  0.6568142]
action based on polilcy:[-0.7241074   0.65802443]
action based on polilcy:[-0.95089865  0.65052885]
action based on polilcy:[-0.8839255  0.5390047]
action based on polilcy:[-0.69673467  0.47506556]
action based on polilcy:[-0.7717869  0.6575287]
action based on polilcy:[-0.55579823  0.58412004]
action based on polilcy:[-0.6500702  0.5410383]
action based on polilcy:[-0.8579833   0.66630507]
action based on polilcy:[-0.53761804  0.6692183 ]
action based on polilcy:[-0.5991869  0.7169487]
action based on polilcy:[-0.45620665  0.5472307 ]
action based on polilcy:[-0.5169928   0.33332705]
action based on polilcy:[-0.81822044  0.12657213]
Total T: 171029 Episode Num: 12707 Episode T: 15 Reward: -1671.262947
action based on polilcy:[-0.7329116   0.62686074]
action based on polilcy:[-0.7420113   0.39441377]
action based on polilcy:[-0.6375201   0.38979062]
action based on polilcy:[-0.5341812  0.3

action based on polilcy:[-0.75896084  0.6278221 ]
action based on polilcy:[-0.6998472  0.6575648]
action based on polilcy:[-0.64519095  0.7659388 ]
action based on polilcy:[-0.43177477  0.4588275 ]
action based on polilcy:[-0.4522233  0.5693573]
action based on polilcy:[-0.34794584  0.46723512]
action based on polilcy:[-0.49605268  0.56323946]
action based on polilcy:[-0.6044687  0.460357 ]
action based on polilcy:[-0.41432068  0.54611516]
action based on polilcy:[-0.27569234  0.60429454]
Total T: 171191 Episode Num: 12721 Episode T: 10 Reward: -925.089294
action based on polilcy:[-0.77491754  0.61977863]
action based on polilcy:[-0.69273305  0.52659976]
action based on polilcy:[-0.62668437  0.39656484]
action based on polilcy:[-0.52219164  0.4564703 ]
action based on polilcy:[-0.18872394  0.60551685]
action based on polilcy:[-0.51943815  0.34368157]
action based on polilcy:[-0.51386535  0.33038726]
action based on polilcy:[-0.4887483   0.66967446]
action based on polilcy:[-0.4315698  

action based on polilcy:[-0.7779844   0.64586234]
action based on polilcy:[-0.47755414  0.5831921 ]
action based on polilcy:[-0.48006716  0.67214525]
action based on polilcy:[-0.78516304  0.60201204]
action based on polilcy:[-0.7308072  0.413764 ]
action based on polilcy:[-0.4688041  0.6065651]
action based on polilcy:[-0.36486125  0.48836094]
action based on polilcy:[-0.87706995  0.7510053 ]
action based on polilcy:[-0.4330055  0.4777041]
action based on polilcy:[-0.5814774   0.62537533]
action based on polilcy:[-0.5246797   0.44739914]
action based on polilcy:[-0.40558714  0.60266656]
Total T: 171348 Episode Num: 12735 Episode T: 12 Reward: -1083.975337
action based on polilcy:[-0.7584178   0.65097135]
action based on polilcy:[-0.669844   0.6159512]
action based on polilcy:[-0.6283238   0.40521428]
action based on polilcy:[-0.3705591   0.67827666]
action based on polilcy:[-0.45197323  0.5023291 ]
action based on polilcy:[-0.4291768   0.44216713]
action based on polilcy:[-0.58127487  

action based on polilcy:[-0.7742969  0.6483649]
action based on polilcy:[-0.72187054  0.40514702]
action based on polilcy:[-0.5504348   0.74153805]
action based on polilcy:[-0.3264752  0.6459651]
action based on polilcy:[-0.31427926  0.5752215 ]
action based on polilcy:[-0.3738559   0.36687455]
action based on polilcy:[-0.5381169  0.589823 ]
Total T: 171493 Episode Num: 12748 Episode T: 7 Reward: -897.873644
action based on polilcy:[-0.7818203  0.63845  ]
action based on polilcy:[-0.9162814   0.49770668]
action based on polilcy:[-0.79927576  0.6580608 ]
action based on polilcy:[-0.97248846  0.6352544 ]
action based on polilcy:[-0.6565244  0.6304543]
action based on polilcy:[-0.7305529  0.4052933]
action based on polilcy:[-0.68679786  0.72895026]
action based on polilcy:[-0.3648953  0.4737957]
action based on polilcy:[-0.49682882  0.47009128]
action based on polilcy:[-0.38925785  0.4487232 ]
action based on polilcy:[-0.74821943  0.14080495]
action based on polilcy:[-0.5949727   0.220050

action based on polilcy:[-0.82803416  0.64652777]
action based on polilcy:[-0.6504107   0.57333493]
action based on polilcy:[-0.8701404  0.7012995]
action based on polilcy:[-0.7152412  0.7435975]
action based on polilcy:[-0.43082377  0.460817  ]
action based on polilcy:[-0.39864925  0.68467957]
action based on polilcy:[-0.39454213  0.4166856 ]
action based on polilcy:[-0.36011636  0.43265247]
action based on polilcy:[-0.35107166  0.45973814]
action based on polilcy:[-0.43742263  0.3719506 ]
action based on polilcy:[-0.31908646  0.61215127]
Total T: 171662 Episode Num: 12763 Episode T: 11 Reward: -958.627612
action based on polilcy:[-0.82401925  0.6331322 ]
action based on polilcy:[-0.39459947  0.65548176]
action based on polilcy:[-0.59528446  0.4934526 ]
action based on polilcy:[-0.99999994  0.5619393 ]
action based on polilcy:[-0.6011548  0.494093 ]
action based on polilcy:[-0.7668865  0.6961471]
action based on polilcy:[-0.76253265  0.69474715]
action based on polilcy:[-0.93853945  0

action based on polilcy:[-0.8155194   0.64726853]
action based on polilcy:[-1.          0.56204236]
action based on polilcy:[-0.99783903  0.68442   ]
action based on polilcy:[-0.71155494  0.4876422 ]
action based on polilcy:[-0.5558008  0.6467259]
action based on polilcy:[-0.64693236  0.5105648 ]
action based on polilcy:[-0.46571222  0.5898416 ]
action based on polilcy:[-0.360011    0.46962482]
action based on polilcy:[-0.4054867  0.5049416]
action based on polilcy:[-0.75751704  0.77748084]
action based on polilcy:[-0.3319033   0.50088197]
action based on polilcy:[-0.7820779   0.75142264]
action based on polilcy:[-0.44200197  0.46440017]
action based on polilcy:[-0.6060194  0.7371377]
action based on polilcy:[-0.32846045  0.6312561 ]
action based on polilcy:[-0.45774883  0.5595046 ]
action based on polilcy:[-0.5266167   0.49793923]
Total T: 171817 Episode Num: 12775 Episode T: 17 Reward: -1539.548584
action based on polilcy:[-0.8379146  0.6294042]
action based on polilcy:[-0.7285997  0

action based on polilcy:[-0.778726   0.6457206]
action based on polilcy:[-0.604892    0.49282295]
action based on polilcy:[-0.54666495  0.63579094]
action based on polilcy:[-0.5585767   0.52340883]
action based on polilcy:[-0.68135107  0.7358172 ]
action based on polilcy:[-0.58185744  0.61597395]
action based on polilcy:[-0.88472974  0.6865683 ]
action based on polilcy:[-0.7008852   0.41219282]
action based on polilcy:[-0.2985052  0.5420746]
action based on polilcy:[-0.25694847  0.55020356]
action based on polilcy:[-0.26266712  0.56952614]
action based on polilcy:[-0.41384664  0.34592897]
Total T: 171966 Episode Num: 12788 Episode T: 12 Reward: -1109.197537
action based on polilcy:[-0.8133161  0.6370526]
action based on polilcy:[-1.        0.854658]
action based on polilcy:[-1.         0.3922979]
action based on polilcy:[-1.          0.29630664]
action based on polilcy:[-0.5442393  0.6006274]
action based on polilcy:[-0.6098753  0.6562053]
action based on polilcy:[-0.9999994  0.7332175

action based on polilcy:[-0.73514825  0.65667295]
action based on polilcy:[-0.9104934   0.65979743]
action based on polilcy:[-0.713118    0.57535964]
action based on polilcy:[-0.705654    0.38826364]
action based on polilcy:[-0.556112    0.48223585]
action based on polilcy:[-0.54492915  0.50476015]
action based on polilcy:[-0.5121579  0.5971569]
action based on polilcy:[-0.35849103  0.6923655 ]
action based on polilcy:[-1.        0.307165]
action based on polilcy:[-0.9945194  0.4605243]
action based on polilcy:[-0.5193615   0.61865824]
action based on polilcy:[-0.656893    0.48734507]
action based on polilcy:[-0.653881    0.72413975]
action based on polilcy:[-0.86024565  0.74680114]
action based on polilcy:[-0.4423547   0.48219705]
action based on polilcy:[-0.28065994  0.4873766 ]
action based on polilcy:[-0.9325548 -0.3276175]
action based on polilcy:[-0.28733775  0.46867943]
Total T: 172616 Episode Num: 12813 Episode T: 18 Reward: -1363.336289
action based on polilcy:[-0.749784   0.6

action based on polilcy:[-0.7641802  0.6150515]
action based on polilcy:[-0.69710827  0.509665  ]
action based on polilcy:[-0.62541616  0.62119627]
action based on polilcy:[-0.6900136   0.75169253]
action based on polilcy:[-0.5650066   0.53683555]
action based on polilcy:[-0.8312006  0.6146616]
action based on polilcy:[-0.9999994  0.8173838]
action based on polilcy:[-0.642208    0.51648706]
action based on polilcy:[-0.7107013   0.40487587]
action based on polilcy:[-0.5428013  0.4742399]
action based on polilcy:[-0.5740335   0.63528347]
action based on polilcy:[-0.37034532  0.39337873]
action based on polilcy:[-0.40958285  0.576686  ]
action based on polilcy:[-0.38141906  0.39528754]
action based on polilcy:[-0.5301014   0.28004503]
Total T: 172773 Episode Num: 12823 Episode T: 15 Reward: -1364.355118
action based on polilcy:[-0.7378942  0.6466925]
action based on polilcy:[-0.6867974  0.5547241]
action based on polilcy:[-0.88955563  0.6912056 ]
action based on polilcy:[-0.7145155   0.48

action based on polilcy:[-0.77694774  0.60925025]
action based on polilcy:[-0.7451894   0.36550045]
action based on polilcy:[-0.56866795  0.4046597 ]
action based on polilcy:[-0.44709405  0.5439767 ]
action based on polilcy:[-0.23296975  0.64129055]
action based on polilcy:[-0.4633909  0.7662646]
action based on polilcy:[-0.80093     0.82951987]
action based on polilcy:[-0.5214573  0.6928302]
action based on polilcy:[-0.33824322  0.46309778]
Total T: 172934 Episode Num: 12836 Episode T: 9 Reward: -1037.707300
action based on polilcy:[-0.7708982  0.6254052]
action based on polilcy:[-0.43319327  0.61818635]
action based on polilcy:[-1.         0.8391126]
action based on polilcy:[-0.6581925   0.67728335]
action based on polilcy:[-0.6915183  0.6160314]
action based on polilcy:[-0.48036754  0.6187341 ]
action based on polilcy:[-0.6666497  0.669726 ]
action based on polilcy:[-0.82974267  0.64323676]
action based on polilcy:[-0.60392314  0.51929235]
action based on polilcy:[-0.87101626  0.618

action based on polilcy:[-0.80650824  0.61350554]
action based on polilcy:[-0.73348796  0.3742128 ]
action based on polilcy:[-0.6169249   0.70932424]
action based on polilcy:[-0.53895587  0.65236413]
action based on polilcy:[-0.72374463  0.3813713 ]
action based on polilcy:[-0.57334757  0.51245683]
action based on polilcy:[-0.52581537  0.6799621 ]
action based on polilcy:[-0.94798136 -0.24825564]
action based on polilcy:[-0.47831747  0.54046476]
action based on polilcy:[-0.8846367  0.7000928]
action based on polilcy:[-0.5638059  0.4950789]
action based on polilcy:[-0.67233837  0.32861778]
action based on polilcy:[-0.35364753  0.55637586]
action based on polilcy:[-0.9933358 -0.9816458]
Total T: 173099 Episode Num: 12850 Episode T: 14 Reward: -1332.137043
action based on polilcy:[-0.80683845  0.632704  ]
action based on polilcy:[-0.3335426  0.6958271]
action based on polilcy:[-0.54578876  0.6706042 ]
action based on polilcy:[-1.          0.38327268]
action based on polilcy:[-1.         0

action based on polilcy:[-0.9024764   0.56933355]
action based on polilcy:[-0.96149707  0.60908294]
action based on polilcy:[-0.8455982   0.53529674]
action based on polilcy:[-0.6900782  -0.44472536]
action based on polilcy:[-0.6160399  -0.89102656]
action based on polilcy:[-0.5080451  -0.14709912]
action based on polilcy:[-0.00748912 -0.99948066]
action based on polilcy:[-0.739273   -0.34878018]
action based on polilcy:[-0.6840594 -0.9884   ]
action based on polilcy:[-0.5407573  -0.44268894]
action based on polilcy:[-0.66761756 -0.8830617 ]
action based on polilcy:[-0.73186934  0.5076846 ]
action based on polilcy:[-0.9553263  0.618101 ]
action based on polilcy:[-0.8272346   0.10995908]
action based on polilcy:[-0.9475759  -0.81932384]
action based on polilcy:[-0.73650706  0.5436458 ]
Total T: 173256 Episode Num: 12858 Episode T: 16 Reward: -1999.433208
action based on polilcy:[-0.9156372  0.5203068]
action based on polilcy:[-0.5601179   0.08554846]
action based on polilcy:[-0.7938293 

action based on polilcy:[-0.9796966   0.23728181]
action based on polilcy:[-0.99931496 -0.7671348 ]
action based on polilcy:[-0.99945354 -0.8293717 ]
action based on polilcy:[-0.9999791   0.25158936]
action based on polilcy:[-0.99999994  0.30867806]
action based on polilcy:[-1.          0.37468174]
action based on polilcy:[-1.          0.23292601]
action based on polilcy:[-0.9996984 -0.6544838]
action based on polilcy:[-1.          0.27249184]
action based on polilcy:[-0.999997    0.24195533]
action based on polilcy:[-1.          0.14334077]
action based on polilcy:[0.7472458 0.9935905]
action based on polilcy:[-0.12338592  0.9507079 ]
action based on polilcy:[-0.51506066  0.93015003]
action based on polilcy:[-0.44571638  0.9659799 ]
action based on polilcy:[-0.26398185  0.87763035]
action based on polilcy:[-0.5048338   0.64743274]
action based on polilcy:[-0.13351767  0.84450626]
action based on polilcy:[-0.35127622 -0.97925854]
action based on polilcy:[-0.36344805 -0.84030795]
Total 

action based on polilcy:[-0.9966856 -0.417094 ]
action based on polilcy:[-0.9999947 -0.7351345]
action based on polilcy:[-0.99997896 -0.974062  ]
action based on polilcy:[-0.9987346 -0.9956634]
action based on polilcy:[-0.99993783 -0.3778506 ]
action based on polilcy:[-0.9999991 -0.9235374]
action based on polilcy:[-0.9998432 -0.9934232]
action based on polilcy:[-0.9999897 -0.6354023]
action based on polilcy:[-0.99995965 -0.9860554 ]
action based on polilcy:[-0.9999932  -0.78947514]
action based on polilcy:[-1.        -0.9485419]
action based on polilcy:[-1.         -0.12078691]
action based on polilcy:[-0.99883014  0.9999975 ]
action based on polilcy:[-0.99986774  0.999999  ]
action based on polilcy:[-0.5847076  0.9999619]
action based on polilcy:[-0.8618074  0.9998135]
action based on polilcy:[0.60800296 0.9999678 ]
action based on polilcy:[-1.          0.80410933]
action based on polilcy:[-0.21772665  0.99774605]
action based on polilcy:[0.31812626 0.9996599 ]
action based on polilc

action based on polilcy:[-0.9961688  -0.99421877]
action based on polilcy:[-0.99993163 -0.40657493]
action based on polilcy:[-0.9985373 -0.9807863]
action based on polilcy:[-1.         -0.87730634]
action based on polilcy:[-1.         -0.99946135]
action based on polilcy:[-1.         0.5182394]
action based on polilcy:[-1.        -0.9486956]
action based on polilcy:[-1.          0.69381034]
action based on polilcy:[-1.       -0.968821]
action based on polilcy:[-1.          0.76126856]
action based on polilcy:[-1.        -0.9355761]
action based on polilcy:[-0.999969  -0.9696471]
action based on polilcy:[-1.        -0.9793701]
action based on polilcy:[-1.         0.9092843]
action based on polilcy:[-1.         -0.99717444]
action based on polilcy:[-1.         0.9483599]
action based on polilcy:[-1.        -0.7723766]
action based on polilcy:[-1.        -0.9943255]
action based on polilcy:[-0.99018687 -0.9937453 ]
action based on polilcy:[-1.         -0.44140378]
action based on polilcy:

Total T: 174047 Episode Num: 12883 Episode T: 47 Reward: -7480.123303
Total T: 174065 Episode Num: 12884 Episode T: 18 Reward: -2810.444692
Total T: 174115 Episode Num: 12885 Episode T: 50 Reward: -6047.855985
Total T: 174126 Episode Num: 12886 Episode T: 11 Reward: -1109.132714
Total T: 174171 Episode Num: 12887 Episode T: 45 Reward: -7506.569186
Total T: 174218 Episode Num: 12888 Episode T: 47 Reward: -4946.207486
Total T: 174233 Episode Num: 12889 Episode T: 15 Reward: -2098.341983
Total T: 174270 Episode Num: 12890 Episode T: 37 Reward: -5526.370782
Total T: 174326 Episode Num: 12891 Episode T: 56 Reward: -6820.275822
Total T: 174338 Episode Num: 12892 Episode T: 12 Reward: -2014.736160
Total T: 174350 Episode Num: 12893 Episode T: 12 Reward: -1286.016839
Total T: 174364 Episode Num: 12894 Episode T: 14 Reward: -1491.012607
Total T: 174391 Episode Num: 12895 Episode T: 27 Reward: -4397.694977
Total T: 174412 Episode Num: 12896 Episode T: 21 Reward: -2836.240492
Total T: 174465 Epis

action based on polilcy:[-0.008034   0.8627208]
action based on polilcy:[ 0.99272895 -0.99999994]
action based on polilcy:[-0.99767584 -0.9863152 ]
action based on polilcy:[-1.        -0.9998634]
action based on polilcy:[-1.         -0.99588835]
action based on polilcy:[-1.        -0.7737746]
action based on polilcy:[-0.24410528  0.56038666]
action based on polilcy:[-0.999967   -0.23410074]
action based on polilcy:[-0.9997555   0.36711636]
action based on polilcy:[-0.7759839   0.64374125]
action based on polilcy:[-0.46812987  0.50019896]
action based on polilcy:[-0.5186924  0.6802186]
action based on polilcy:[-0.09673589  0.69914913]
action based on polilcy:[-0.99815595  0.9846541 ]
action based on polilcy:[-0.98838764  0.9949476 ]
action based on polilcy:[-0.9999754   0.99966234]
action based on polilcy:[-0.9950169  0.9976613]
action based on polilcy:[-0.18378998  0.8462425 ]
action based on polilcy:[-0.9999633  -0.09636693]
action based on polilcy:[-0.9528274   0.58888483]
action bas

action based on polilcy:[-0.1328943   0.89172304]
action based on polilcy:[-0.9965748 -0.9946258]
action based on polilcy:[ 0.89919573 -0.9999916 ]
action based on polilcy:[-1.         -0.99990004]
action based on polilcy:[-0.99999887 -0.8482758 ]
action based on polilcy:[-0.9999996  -0.82527363]
action based on polilcy:[-1.         -0.76628995]
action based on polilcy:[-1.         -0.73183733]
action based on polilcy:[-1.         -0.75579107]
action based on polilcy:[-1.        -0.9451243]
action based on polilcy:[-0.9424724  0.3543745]
action based on polilcy:[-0.99719     0.44771835]
action based on polilcy:[-0.9183631   0.40530437]
action based on polilcy:[-0.99999905  0.00397033]
action based on polilcy:[-0.9520444   0.39831996]
action based on polilcy:[-0.9132452  0.4126174]
action based on polilcy:[-0.953977    0.41510522]
action based on polilcy:[-1.        -0.7629159]
action based on polilcy:[-0.9124562   0.39728162]
action based on polilcy:[-0.981828   0.3614075]
action based

action based on polilcy:[-0.2091901   0.87550426]
action based on polilcy:[-1.        -0.9999994]
action based on polilcy:[-1.        -0.9321239]
action based on polilcy:[-0.9999995  -0.83090484]
action based on polilcy:[-1.        -0.7432637]
action based on polilcy:[-1.         -0.65774643]
action based on polilcy:[-1.        -0.7297959]
action based on polilcy:[-1.        -0.6521249]
action based on polilcy:[-1.         -0.65371406]
action based on polilcy:[-1.        -0.5910704]
action based on polilcy:[-1.         0.9999896]
action based on polilcy:[-0.99999684  0.9989553 ]
action based on polilcy:[-0.9995102   0.98835236]
action based on polilcy:[-1.         0.9917216]
action based on polilcy:[-0.8073045   0.41398567]
action based on polilcy:[-0.9990521   0.51522934]
action based on polilcy:[-0.8757305   0.43242115]
action based on polilcy:[-0.9392936   0.43160075]
action based on polilcy:[-0.99999964 -0.92981076]
action based on polilcy:[-0.9332994   0.46100366]
action based on 

action based on polilcy:[-0.24141106  0.8770492 ]
action based on polilcy:[ 0.99619097 -0.99999607]
action based on polilcy:[-1.         -0.99727315]
action based on polilcy:[-0.9999996  -0.84875286]
action based on polilcy:[-1.        -0.9820852]
action based on polilcy:[-1.         -0.74904823]
action based on polilcy:[-1.        -0.7588937]
action based on polilcy:[-1.        -0.7831597]
action based on polilcy:[-1.         -0.70465803]
action based on polilcy:[-0.99993783  0.9547337 ]
action based on polilcy:[-0.98768735  0.4720479 ]
action based on polilcy:[-0.42060992  0.5128526 ]
action based on polilcy:[-0.84387803  0.47299412]
action based on polilcy:[-1.          0.42072204]
action based on polilcy:[-1.        -0.2294252]
action based on polilcy:[-0.6238017   0.43214765]
action based on polilcy:[-0.99421227 -0.0093793 ]
action based on polilcy:[-0.80440927  0.48426613]
action based on polilcy:[-0.9998364 -0.6398286]
action based on polilcy:[-0.78101915  0.4774859 ]
action bas

action based on polilcy:[-0.32310495  0.8144929 ]
action based on polilcy:[-1.        -0.9999976]
action based on polilcy:[-0.9999999 -0.9144101]
action based on polilcy:[-1.        -0.8010319]
action based on polilcy:[-1.         -0.71370244]
action based on polilcy:[-1.        -0.6074999]
action based on polilcy:[-1.        -0.9402946]
action based on polilcy:[-1.         -0.59564245]
action based on polilcy:[-1.          0.99999464]
action based on polilcy:[-1.         0.9994435]
action based on polilcy:[-0.98009145  0.5842299 ]
action based on polilcy:[-0.5787684   0.58764493]
action based on polilcy:[-0.9999997  0.9967464]
action based on polilcy:[-0.6624601   0.53242344]
action based on polilcy:[-0.99999994  0.37595454]
action based on polilcy:[-0.709057   0.5907271]
action based on polilcy:[-0.75077224  0.59135246]
action based on polilcy:[-0.9999914 -0.9999558]
action based on polilcy:[-0.7670608  0.1591307]
action based on polilcy:[-0.75198734  0.62815505]
action based on poli

action based on polilcy:[-0.24866141  0.8345622 ]
action based on polilcy:[-0.9999334  -0.89531624]
action based on polilcy:[-0.85134447 -0.9998751 ]
action based on polilcy:[-0.9860174 -0.9992771]
action based on polilcy:[-0.99997294 -0.9356322 ]
action based on polilcy:[-0.9999986  -0.85382295]
action based on polilcy:[-1.        -0.9618952]
action based on polilcy:[-1.        -0.7222564]
action based on polilcy:[-1.         -0.64534533]
action based on polilcy:[-1.         -0.57484144]
action based on polilcy:[-1.          0.99997103]
action based on polilcy:[-0.99999577  0.999945  ]
action based on polilcy:[-0.99966246  0.9999622 ]
action based on polilcy:[-1.         0.9999829]
action based on polilcy:[-0.98102105  0.9999792 ]
action based on polilcy:[-0.7868611  0.9947491]
action based on polilcy:[-1.         0.6068108]
action based on polilcy:[-0.99968636  0.98483855]
action based on polilcy:[-0.47350964  0.95022655]
action based on polilcy:[-0.6281413  0.8867191]
action based o

action based on polilcy:[-0.3005732   0.84136814]
action based on polilcy:[-0.99988586 -0.9760488 ]
action based on polilcy:[-0.9999984 -0.8885733]
action based on polilcy:[-1.        -0.9001615]
action based on polilcy:[-0.9999999  -0.81864256]
action based on polilcy:[-0.9999994 -0.8642675]
action based on polilcy:[-0.99999964 -0.81657606]
action based on polilcy:[-0.99999774 -0.8897922 ]
action based on polilcy:[-0.99999315 -0.90778124]
action based on polilcy:[-0.99863434 -0.99509215]
action based on polilcy:[-0.999953   -0.95051765]
action based on polilcy:[-0.99999964 -0.8970635 ]
action based on polilcy:[0.44974166 0.9830027 ]
action based on polilcy:[-0.9845005  0.6446756]
action based on polilcy:[-0.30113533  0.8873107 ]
action based on polilcy:[0.15938845 0.99607676]
action based on polilcy:[ 0.74242294 -0.99156266]
action based on polilcy:[-1.        -0.9998919]
action based on polilcy:[-0.99999624  0.06512345]
action based on polilcy:[0.20920773 0.9992826 ]
action based on 

action based on polilcy:[-0.29937243  0.80831224]
action based on polilcy:[ 0.9869377  -0.99999994]
action based on polilcy:[ 0.9973407  -0.99999994]
action based on polilcy:[ 0.99686146 -0.9999999 ]
action based on polilcy:[-1.        -0.9999989]
action based on polilcy:[-0.99999595 -0.8807148 ]
action based on polilcy:[-0.9999994  -0.82485235]
action based on polilcy:[-0.99999994 -0.8078095 ]
action based on polilcy:[-1.        -0.9977974]
action based on polilcy:[-1.         -0.73687327]
action based on polilcy:[-1.         -0.70836616]
action based on polilcy:[-1.         -0.71818936]
action based on polilcy:[-1.        -0.6562586]
action based on polilcy:[-1.         0.9831017]
action based on polilcy:[-1.         -0.62304723]
---------------------------------------
Episode_num: 12925, Evaluation over 1 episodes: -4114.098901
---------------------------------------
Total T: 176001 Episode Num: 12925 Episode T: 16 Reward: -4221.050361
Total T: 176047 Episode Num: 12926 Episode T: 4

action based on polilcy:[-0.37163806  0.7442287 ]
action based on polilcy:[-0.99998933 -0.99999964]
action based on polilcy:[-0.99998903 -0.8938897 ]
action based on polilcy:[-0.999903   -0.98720753]
action based on polilcy:[-0.9999791 -0.8836181]
action based on polilcy:[-1.         -0.92313945]
action based on polilcy:[-1.        -0.7216394]
action based on polilcy:[-1.        -0.7905714]
action based on polilcy:[-1.         -0.73453844]
action based on polilcy:[-0.99641144  0.9999999 ]
action based on polilcy:[-0.5460878   0.99999887]
action based on polilcy:[-0.8198355  0.9877793]
action based on polilcy:[0.0653107 1.       ]
action based on polilcy:[-0.52317977  1.        ]
action based on polilcy:[-0.910931  1.      ]
action based on polilcy:[-0.9717345  0.9999999]
action based on polilcy:[-0.94913214  1.        ]
action based on polilcy:[-0.9606279  0.9999864]
action based on polilcy:[-0.92403793  1.        ]
action based on polilcy:[-0.88150704  1.        ]
action based on poli

action based on polilcy:[-1.        -0.7634605]
action based on polilcy:[-1.         -0.69368315]
action based on polilcy:[-0.9999821   0.99998623]
action based on polilcy:[-1.          0.99998134]
action based on polilcy:[-0.8272251   0.99999994]
action based on polilcy:[-0.9931699   0.99999887]
action based on polilcy:[-0.9995812   0.99750215]
action based on polilcy:[-0.72115314  1.        ]
action based on polilcy:[-1.          0.99943906]
action based on polilcy:[0.44320142 1.        ]
action based on polilcy:[-0.9999841  0.6143716]
action based on polilcy:[0.5931856 1.       ]
action based on polilcy:[-0.7778181   0.99998635]
action based on polilcy:[0.50992703 1.        ]
action based on polilcy:[0.48973134 1.        ]
action based on polilcy:[0.6157694 1.       ]
action based on polilcy:[-0.51931286  0.98964715]
action based on polilcy:[0.63661027 1.        ]
action based on polilcy:[-0.5769592  1.       ]
action based on polilcy:[0.54460716 1.        ]
action based on polilcy:

action based on polilcy:[-0.12300844  0.7905991 ]
action based on polilcy:[-0.09107693 -0.9999999 ]
action based on polilcy:[-0.76768315 -0.9860551 ]
action based on polilcy:[-0.98342454 -0.9439698 ]
---------------------------------------
Episode_num: 12956, Evaluation over 1 episodes: -2509.613153
---------------------------------------
action based on polilcy:[-0.937606   -0.99222934]
Total T: 177001 Episode Num: 12956 Episode T: 5 Reward: -2762.113363
action based on polilcy:[-0.25380078  0.78827596]
action based on polilcy:[-0.97635335 -0.9739952 ]
action based on polilcy:[-0.99567664 -0.90149057]
action based on polilcy:[-0.9987861  -0.82502997]
action based on polilcy:[-1.         -0.97831076]
action based on polilcy:[-0.9999965  -0.72527075]
action based on polilcy:[-0.99999946 -0.7163416 ]
action based on polilcy:[-0.99964863 -0.79498065]
action based on polilcy:[-0.99998677 -0.7117232 ]
action based on polilcy:[-1.         -0.65495396]
action based on polilcy:[-0.9942308   0.

action based on polilcy:[-0.08862703  0.7638206 ]
action based on polilcy:[-0.7940459 -0.995491 ]
action based on polilcy:[-0.88121617 -0.9596857 ]
action based on polilcy:[-0.9680095  -0.87571806]
action based on polilcy:[-0.96469486 -0.84353524]
action based on polilcy:[-0.9994969  -0.77914727]
action based on polilcy:[-0.99988896 -0.72615206]
action based on polilcy:[-1.        -0.7512711]
action based on polilcy:[-1.        -0.5665402]
action based on polilcy:[-1.        -0.5316863]
action based on polilcy:[-0.9984318   0.99971133]
action based on polilcy:[-0.99957806  0.9999684 ]
action based on polilcy:[-0.58302575  0.99999887]
action based on polilcy:[-0.96170336  0.99985105]
action based on polilcy:[-0.95686615  0.99996567]
action based on polilcy:[-0.05841  1.     ]
action based on polilcy:[-0.9649206   0.99128795]
action based on polilcy:[-0.07151705  0.99963367]
action based on polilcy:[0.41694745 1.        ]
action based on polilcy:[-0.5834931  0.9970518]
action based on po

action based on polilcy:[0.13433531 0.8398242 ]
action based on polilcy:[-0.64121175 -0.98818016]
action based on polilcy:[-0.9487287  -0.87790877]
action based on polilcy:[-0.9433283  -0.87493217]
action based on polilcy:[-0.99709964 -0.7890347 ]
action based on polilcy:[-0.9998264  -0.74010724]
action based on polilcy:[-1.        -0.6615162]
action based on polilcy:[-1.        -0.5887164]
action based on polilcy:[-1.         -0.66357017]
action based on polilcy:[-1.        -0.6640984]
action based on polilcy:[-0.9999992  -0.68827844]
action based on polilcy:[-0.9748248  0.9999022]
action based on polilcy:[0.08295809 1.        ]
action based on polilcy:[-0.40941972  0.9973897 ]
action based on polilcy:[0.30593666 1.        ]
action based on polilcy:[-0.19176641  0.9991466 ]
action based on polilcy:[0.37168363 1.        ]
action based on polilcy:[0.3021925 1.       ]
action based on polilcy:[-0.11240507  1.        ]
action based on polilcy:[-0.7110189  1.       ]
action based on polilc

action based on polilcy:[0.02563891 0.87631917]
action based on polilcy:[-0.80871284 -0.9764943 ]
action based on polilcy:[-0.95534   -0.9029653]
action based on polilcy:[-0.9631258 -0.887904 ]
action based on polilcy:[-0.9999999  -0.96398664]
action based on polilcy:[-0.99986917 -0.7427356 ]
action based on polilcy:[-1.        -0.6539233]
action based on polilcy:[-0.99999994 -0.7245325 ]
action based on polilcy:[-1.         -0.65123475]
action based on polilcy:[-1.        -0.6029327]
action based on polilcy:[-1.         -0.48459503]
action based on polilcy:[-0.2784093   0.99999994]
action based on polilcy:[-0.9999967   0.99999434]
action based on polilcy:[-0.85031915  0.9993128 ]
action based on polilcy:[-0.99997187  0.83368826]
action based on polilcy:[-0.99962276  0.58478975]
action based on polilcy:[0.48894215 1.        ]
action based on polilcy:[-0.7866127   0.82449836]
action based on polilcy:[0.43736833 1.        ]
action based on polilcy:[0.05972039 0.99999994]
action based on 

action based on polilcy:[-0.02257921  0.8802572 ]
action based on polilcy:[-0.33008444 -0.9795435 ]
action based on polilcy:[-0.7561331 -0.9908129]
action based on polilcy:[-0.9932053  -0.80091804]
action based on polilcy:[-1.        -0.9156561]
action based on polilcy:[-1.         -0.75577855]
action based on polilcy:[-1.        -0.8012285]
action based on polilcy:[-1.        -0.6896523]
action based on polilcy:[-0.35716763  1.        ]
action based on polilcy:[-1.          0.99999756]
action based on polilcy:[-0.99999297  0.99993694]
action based on polilcy:[-0.24420989  1.        ]
action based on polilcy:[-0.4145827  1.       ]
action based on polilcy:[-0.970149  0.446828]
action based on polilcy:[-0.86702365  0.994872  ]
action based on polilcy:[0.5748735 1.       ]
action based on polilcy:[-0.75346076  0.8214208 ]
action based on polilcy:[-0.4937705  0.9999767]
action based on polilcy:[0.9406636 1.       ]
action based on polilcy:[0.4776642 1.       ]
action based on polilcy:[0.7

action based on polilcy:[-0.02862045  0.85949254]
action based on polilcy:[ 0.36398464 -0.99953455]
action based on polilcy:[-0.99108   -0.9073923]
action based on polilcy:[-0.9962274 -0.9985811]
action based on polilcy:[-0.99965084 -0.77084005]
action based on polilcy:[-0.9996505 -0.8383777]
action based on polilcy:[-0.9998812  -0.75190735]
action based on polilcy:[-1.         -0.65619063]
action based on polilcy:[-0.89902467  0.9999856 ]
action based on polilcy:[-0.3883548   0.99999994]
action based on polilcy:[-0.4435542  1.       ]
action based on polilcy:[-0.9984786  0.8562197]
action based on polilcy:[-0.45458704  0.99999785]
action based on polilcy:[-0.255419  1.      ]
action based on polilcy:[-0.9715591   0.99458957]
action based on polilcy:[-0.91184145  0.40851027]
action based on polilcy:[0.39925423 1.        ]
action based on polilcy:[-0.99974644  0.4511209 ]
action based on polilcy:[0.57184815 1.        ]
action based on polilcy:[-0.21500364  0.99709374]
action based on po

action based on polilcy:[-0.04101406  0.57892734]
action based on polilcy:[-0.07635925 -0.9487692 ]
action based on polilcy:[ 0.17079857 -0.9999997 ]
action based on polilcy:[-0.69674146 -0.9953674 ]
action based on polilcy:[-0.98884374 -0.83571297]
action based on polilcy:[-0.999987   -0.98181593]
action based on polilcy:[-0.999788  -0.7268226]
action based on polilcy:[0.0248542  0.99999934]
action based on polilcy:[-0.13195719  0.54593265]
action based on polilcy:[-0.1980035  0.8567341]
action based on polilcy:[0.1758598  0.37870443]
action based on polilcy:[0.15451558 0.25504723]
action based on polilcy:[0.29059732 0.4355584 ]
action based on polilcy:[-0.9674373   0.53628796]
action based on polilcy:[0.1797924  0.20132399]
action based on polilcy:[-0.306529    0.49657208]
action based on polilcy:[0.6552793 1.       ]
action based on polilcy:[0.9504046 1.       ]
action based on polilcy:[0.99773914 1.        ]
action based on polilcy:[0.97986823 1.        ]
action based on polilcy:[0