In [2]:
%matplotlib inline
import bisect
import copy 
import os 
from collections import deque, Counter
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import editdistance
import sys
import RNA
from typing import Dict, List, Tuple

# import path 
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.sequence_utils import translate_one_hot_to_string,generate_random_mutant
from utils.sequence_utils import translate_string_to_one_hot, translate_one_hot_to_string
from models.Theoretical_models import *
from models.Noise_wrapper import *
from utils.landscape_utils import *
from models.RNA_landscapes import *
from models.Multi_dimensional_model import *

import tensorflow as tf
from tf_agents.drivers import dynamic_step_driver
from tf_agents.metrics import tf_metrics
from tf_agents.agents import tf_agent
from tf_agents.policies import random_tf_policy
from tf_agents.agents.ppo import ppo_policy, ppo_agent, ppo_utils
from tf_agents.environments import py_environment, tf_py_environment
from tf_agents.environments.utils import validate_py_environment
from tf_agents.drivers import dynamic_episode_driver
from tf_agents.networks import network, normal_projection_network
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common
from tf_agents.trajectories import time_step as ts
from tf_agents.specs import array_spec

In [2]:
RAA="UGCA" #alphabet
alphabet_len=len(RAA)
# TODO: UNDO THIS
# length=40
length=40
noise_alpha=1
generations = 10
experiment_batch_size = 1000
wt=generate_random_sequences(length,1,alphabet=RAA)[0]
landscape1=RNA_landscape(wt)
landscape2=RNA_landscape(wt)
noisy_landscape=Noise_wrapper(landscape1,
                              noise_alpha=noise_alpha,
                              always_costly=True)
initial_genotypes=list(set([wt]+[generate_random_mutant(wt,0.05,RAA) 
                                 for i in range(experiment_batch_size*10)]))[:experiment_batch_size]
noisy_landscape.reset()
noisy_landscape.measure_true_landscape(initial_genotypes)
noisy_landscape.natural_mode=False
noisy_landscape.local_mode=False
noisy_landscape.cost

def renormalize_moves(one_hot_input, rewards_output):
    """ensures that staying in place gives no reward"""
    zero_current_state = (one_hot_input - 1) * (-1)
    return np.multiply(rewards_output, zero_current_state)

def walk_away_renormalize_moves(one_hot_input, one_hot_wt, rewards_output):
    """ensures that moving toward wt is also not useful"""
    zero_current_state=(one_hot_input-1)*-1
    zero_wt=((one_hot_wt-1)*-1)
    zero_conservative_moves=np.multiply(zero_wt,zero_current_state)
    return np.multiply(rewards_output,zero_conservative_moves)

def get_all_singles_fitness(model,sequence,alphabet):
    prob_singles=np.zeros((len(alphabet),len(sequence)))
    for i in range(len(sequence)):
        for j in range(len(alphabet)):
            putative_seq=sequence[:i]+alphabet[j]+sequence[i+1:]
           # print (putative_seq)
            prob_singles[j][i]=model.get_fitness(putative_seq)
    return prob_singles

def get_all_mutants(sequence):
    mutants = []
    for i in range(sequence.shape[0]):
        for j in range(sequence.shape[1]):
            putative_seq = sequence.copy()
            putative_seq[:, j] = 0
            putative_seq[i, j] = 1
            mutants.append(putative_seq)
    return np.array(mutants)

def sample_greedy(matrix):
    i,j=matrix.shape
    max_arg=np.argmax(matrix)
    y=max_arg%j
    x=int(max_arg/j)
    output=np.zeros((i,j))
    output[x][y]=matrix[x][y]
    return output

def sample_multi_greedy(matrix):
    n = 5 # the number of base positions to greedily change
    max_args = np.argpartition(matrix.flatten(), -n)[-n:]
    i,j=matrix.shape
    output=np.zeros((i,j))
    for max_arg in max_args:
        y=max_arg%j
        x=int(max_arg/j)
        output[x][y]=matrix[x][y]
    return output

def sample_random(matrix):
    i,j=matrix.shape
    non_zero_moves=np.nonzero(matrix)
   # print (non_zero_moves)
    k=len(non_zero_moves)
    l=len(non_zero_moves[0])
    if k!=0 and l!=0:
        rand_arg=random.choice([[non_zero_moves[alph][pos] for alph in range(k)] for pos in range(l)])
    else:
        rand_arg=[random.randint(0,i-1),random.randint(0,j-1)]
    #print (rand_arg)
    y=rand_arg[1]
    x=rand_arg[0]
    output=np.zeros((i,j))
    output[x][y] = 1
    return output   

def action_to_scalar(matrix):
    matrix = matrix.ravel()
    for i in range(len(matrix)):
        if matrix[i] != 0:
            return i
    
def construct_mutant_from_sample(pwm_sample, one_hot_base):
    one_hot = np.zeros(one_hot_base.shape)
    one_hot += one_hot_base
    nonzero = np.nonzero(pwm_sample)
    nonzero = list(zip(nonzero[0], nonzero[1]))
    for nz in nonzero: # this can be problematic for non-positive fitnesses
        i, j = nz
        one_hot[:,j]=0
        one_hot[i,j]=1
    return one_hot

def best_predicted_new_gen(actor, genotypes, alphabet, pop_size):
    mutants = get_all_mutants(genotypes)
    one_hot_mutants = np.array([translate_string_to_one_hot(mutant, alphabet) for mutant in mutants])
    torch_one_hot_mutants = torch.from_numpy(np.expand_dims(one_hot_mutants, axis=0)).float()
    predictions = actor(torch_one_hot_mutants)
    predictions = predictions.detach().numpy()
    best_pred_ind = predictions.argsort()[-pop_size:]
    return mutants[best_pred_ind]

def make_one_hot_train_test(genotypes, model, alphabet):
    genotypes_one_hot = np.array([translate_string_to_one_hot(genotype, alphabet) for genotype in genotypes])
    genotype_fitnesses = []
    for genotype in genotypes:
        genotype_fitnesses.append(model.get_fitness(genotype))
    genotype_fitnesses = np.array(genotype_fitnesses)

    return genotypes_one_hot, genotype_fitnesses

In [3]:
# parameters for PPO Agent 
generations = 10
experiment_batch_size = 1000
global_step = tf.compat.v1.train.get_or_create_global_step()
optimizer = tf.keras.optimizers.Adam(1e-3)
optimizer.iterations = global_step

In [4]:
# environment for PPO Agent
class FitnessLandscapeEnvironment(py_environment.PyEnvironment):
    # Based on this: https://www.mikulskibartosz.name/how-to-create-an-environment-for-a-tensorflow-agent/
    def __init__(self, alphabet, seq_len, landscape, max_episodes):
        self.alphabet = alphabet
        self.alphabet_len = len(self.alphabet)
        self.landscape = copy.deepcopy(landscape)
        self.seq_len = seq_len
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(1, 2), dtype=np.float32, minimum=0, 
            maximum=1, name='action_x')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(self.alphabet_len, self.seq_len), dtype=np.float32, minimum=0, 
            maximum=1, name='observation')
        self._time_step_spec = ts.time_step_spec(self._observation_spec)
        self._state = translate_string_to_one_hot(wt, self.alphabet)
        self._episode_ended = False
        self.ctr = 0
        self.max_episodes = max_episodes
        self.seen_sequences = {}
        
    def _reset(self):
        self.ctr = 0
        # TODO: UNDO THIS
#         self._state = translate_string_to_one_hot(wt, self.alphabet)
        self._state = translate_string_to_one_hot(generate_random_sequences(length,1,alphabet=RAA)[0], self.alphabet)
        self._episode_ended = False
        return ts.restart(np.array(self._state, dtype=np.float32))
    
    def time_step_spec(self):
        return self._time_step_spec 

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec
    
    def get_state_string(self):
        return translate_one_hot_to_string(self._state, self.alphabet)
    
    def _step(self, action):
        if self.ctr < self.max_episodes:
            self.ctr += 1
            action_one_hot = np.zeros((self.alphabet_len, self.seq_len))
            print('action', action)
            x, y = action[0]
            x, y = int(self.alphabet_len*x), int(self.seq_len*y)
            action_one_hot[x, y] = 1
            assert self._state.sum() == self._state.shape[1]
            if self._state[x, y] == 1:
                self._episode_ended = True
                return ts.termination(np.array(self._state, dtype=np.float32), 0)
            else:
                self._state = construct_mutant_from_sample(action_one_hot, self._state)
                state_string = translate_one_hot_to_string(self._state, self.alphabet)
                
                if state_string in self.seen_sequences:
                    return ts.termination(np.array(self._state, dtype=np.float32), 0)
                self.seen_sequences[state_string] = 1
                
                reward = self.landscape.get_fitness(state_string)
                assert self._state.sum() == self._state.shape[1]
                print('New state', self._state)
                return ts.transition(np.array(self._state, dtype=np.float32), reward=reward)
        else:
            self._episode_ended = True
            assert self._state.sum() == self._state.shape[1]
            print('New state', self._state)
            return ts.termination(np.array(self._state, dtype=np.float32), 0)

In [5]:
#max_iter = experiment_batch_size * generations 
max_iter = 10 ** 6
fle2 = FitnessLandscapeEnvironment(RAA, length, landscape1, max_iter)
print("starting validating environment...")
validate_py_environment(fle2, episodes=2)
print("done validating environment.")
fle = FitnessLandscapeEnvironment(RAA, length, landscape2, max_iter)
tf_env = tf_py_environment.TFPyEnvironment(fle)

starting validating environment...
action [[0.2680582  0.78098875]]
New state [[1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1.
  0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0.]]
action [[0.87408054 0.7873583 ]]
New state [[1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1.
  0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0

In [6]:
# specs 
time_step_spec = tf_env.time_step_spec()
observation_spec = tf_env.observation_spec()
action_spec = tf_env.action_spec()
alphabet_len = len(RAA)
seq_len = length

In [7]:
# run random agent for testing purposes only 
random_policy = random_tf_policy.RandomTFPolicy(tf_env.time_step_spec(), tf_env.action_spec())
random_collect_policy = random_tf_policy.RandomTFPolicy(tf_env.time_step_spec(), tf_env.action_spec())
random_agent = tf_agent.TFAgent(
    tf_env.time_step_spec(),
    tf_env.action_spec(),
    random_policy,
    random_collect_policy,
    None
)

collect_driver = dynamic_step_driver.DynamicStepDriver(
    tf_env,
    random_agent.collect_policy,
    num_steps=1000
)

collect_driver.run()

Instructions for updating:
SeedStream has moved to `tfp.util.SeedStream`.
action [[0.9681208  0.89113593]]
New state [[1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0.
  0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0.
  1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.]]
action [[0.9711348  0.85397613]]
New state [[1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0.
  0. 1. 0. 0. 1

  0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.]]
action [[0.519887   0.37943506]]
New state [[0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.
  1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0.]
 [1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1.
  0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.]]
action [[0.54694104 0.47810423]]
New state [[0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.
  1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0.]
 [1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0.

New state [[0. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.
  0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0.]
 [1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]
action [[0.29641128 0.13505507]]
New state [[0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.
  0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0.]
 [1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 

New state [[0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.409173   0.27424252]]
New state [[0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 

  0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]
action [[0.6051749  0.70576644]]
action [[0.01469874 0.5065607 ]]
New state [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 0.
  1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
 [1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1.
  0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]]
action [[0.5699457 0.1313194]]
action [[0.6166179  0.93451905]]
New state [[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 0.
  1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
 [1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 

New state [[0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0.]
 [0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1.]]
action [[0.26346934 0.660195  ]]
New state [[0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0.]
 [0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 

New state [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1.
  0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0.
  0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.21551204 0.9757873 ]]
action [[0.6850115  0.33017862]]
New state [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1.
  0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 

action [[0.8140472  0.14664614]]
New state [[1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0.
  1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1.]
 [0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.42165267 0.86976635]]
New state [[1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0.
  1. 1. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1.]
 [0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 

action [[0.45277953 0.26850688]]
New state [[0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1.
  0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0.
  1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.42867172 0.36684597]]
New state [[0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1.
  0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 

action [[0.05967963 0.79425657]]
action [[0.06758034 0.8940375 ]]
action [[0.15563846 0.7457074 ]]
action [[0.6950033 0.3477255]]
New state [[1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0.
  0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1.
  0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.]]
action [[0.14450943 0.28550422]]
New state [[1. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1

action [[0.9421524  0.42053413]]
New state [[1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0.
  0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0.
  0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0.]]
action [[0.0343616  0.08026612]]
New state [[1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0.
  0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 

action [[0.578817  0.3454889]]
New state [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1.
  0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0.]]
action [[0.8911034  0.78037596]]
New state [[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0.

action [[0.99376094 0.6934221 ]]
New state [[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1.]
 [1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0.
  0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0.]]
action [[0.7196764 0.8433931]]
action [[0.8884789 0.5586684]]
New state [[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.
  1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1.]
 [1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0

  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.29238546 0.02561939]]
New state [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0.
  1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.2574681 0.4767505]]
New state [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0.
  1. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1

action [[0.17482185 0.43367612]]
New state [[0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1.
  0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0.]]
action [[0.9953922 0.3342204]]
action [[0.2368344 0.8440156]]
New state [[0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1.
  0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0.]
 [0. 1. 0

  1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
action [[0.7556405 0.6456995]]
action [[0.29905558 0.41135585]]
action [[0.25952363 0.864712  ]]
New state [[1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0.
  1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
action [[0.68554103 0.07804155]]
New state [[1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.
  0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 

action [[0.3532983  0.29137635]]
action [[0.68005884 0.8022363 ]]
New state [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0.
  0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1.]
 [1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0.
  0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0.]]
action [[0.79414415 0.93420696]]
action [[0.8082998  0.97324693]]
New state [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0.
  0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 1. 

New state [[0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0.
  1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
action [[0.58653605 0.67867744]]
New state [[0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1.]
 [0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 

action [[0.6447654 0.9815345]]
New state [[0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1.
  1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0.]]
action [[0.2202748  0.61974835]]
New state [[0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0.
  1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.

action [[0.9032066 0.8374044]]
New state [[0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0.]
 [0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 0. 1. 0. 0. 0.]]
action [[0.871801   0.33945417]]
New state [[0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 0. 1. 0. 0.]
 [0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0.

action [[0.836503   0.24868453]]
New state [[0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0.]
 [0. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
action [[0.90342176 0.04149532]]
action [[0.43966162 0.44290912]]
New state [[0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
  1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0.]
 [0. 

action [[0.8216853  0.44534075]]
New state [[0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.]
 [1. 0. 0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 1.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0.]]
action [[0.58017313 0.32870162]]
New state [[0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1.]
 [1. 0. 0. 1. 1. 0. 1. 0. 0. 1. 1. 0. 

(TimeStep(step_type=<tf.Tensor: id=42418, shape=(1,), dtype=int32, numpy=array([1], dtype=int32)>, reward=<tf.Tensor: id=42419, shape=(1,), dtype=float32, numpy=array([0.05058824], dtype=float32)>, discount=<tf.Tensor: id=42420, shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>, observation=<tf.Tensor: id=42421, shape=(1, 4, 40), dtype=float32, numpy=
 array([[[1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
          0., 0., 0., 1., 1., 0., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
          1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
          1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0.,
          0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 

In [8]:
# https://github.com/Yawmus/MachineLearning/blob/32c7b7b5ee853fd188d1c22112ba16f031202a0d/rl/main.py
from tf_agents.utils import nest_utils
from tf_agents.networks import encoding_network
from tf_agents.networks import actor_distribution_network
from tf_agents.networks import value_network

# https://gist.github.com/Ujwal2910/885bc6f32ca4719d1cdd4fbd427b835e
# https://github.com/tensorflow/agents/blob/master/tf_agents/networks/normal_projection_network.pyd
print('action_spec', action_spec)
def normal_projection_net(action_spec, init_means_output_factor=0.1):
      return normal_projection_network.NormalProjectionNetwork(
          action_spec,
          init_means_output_factor=init_means_output_factor,
          state_dependent_std=True,
          scale_distribution=True)
      '''
      return normal_projection_network.NormalProjectionNetwork(
          action_spec,
          activation_fn=tf.nn.softmax,
          mean_transform=None,
          state_dependent_std=True,
          init_means_output_factor=init_means_output_factor,
          scale_distribution=True)
      '''

class ActorNet(network.DistributionNetwork):
    def __init__(self, input_tensor_spec, output_tensor_spec): 
        projection_networks = tf.nest.map_structure(normal_projection_net, 
                                                    output_tensor_spec)
        output_spec = tf.nest.map_structure(lambda proj_net: proj_net.output_spec,
                                            projection_networks)
        super(ActorNet, self).__init__(
            input_tensor_spec,
            (),
            output_spec,
            'ActorNet')
        self._projection_networks = projection_networks 
        self._output_tensor_spec = output_tensor_spec
        self._layers = [
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(seq_len, activation=tf.nn.relu),
            tf.keras.layers.Dense(5, activation=tf.nn.relu)
        ]
    
    def call(self, observations, step_type, network_state):
        print('Observations in ActorNet', observations)
        del step_type
        output = tf.keras.backend.reshape(observations, (-1, alphabet_len, seq_len))
        outer_rank = nest_utils.get_outer_rank(output, self.input_tensor_spec)
        for layer in self._layers:
            output = layer(output)
            print('Output of neural network for action', layer, output.numpy())
        output = tf.nest.map_structure(
            lambda proj_net: proj_net(output, outer_rank), self._projection_networks)
        return output, network_state


class ValueNet(network.Network):
    def __init__(self, input_tensor_spec):
        super(ValueNet, self).__init__(
            input_tensor_spec,
            (),
            'ValueNet')
        self._layers = [
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(
                seq_len, activation=tf.nn.relu),
            tf.keras.layers.Dense(
                5, activation=tf.nn.relu),
            tf.keras.layers.Dense(
                1, activation=tf.nn.sigmoid),
        ]
        
    def call(self, observations, step_type, network_state):
        del step_type
        output = tf.keras.backend.reshape(observations, (-1, alphabet_len, seq_len))
        for layer in self._layers:
            output = layer(output)
        return tf.transpose(output), network_state
        
actor_net = ActorNet(observation_spec, action_spec)
value_net = ValueNet(observation_spec)
'''
actor_net = actor_distribution_network.ActorDistributionNetwork(
    tf_env.observation_spec(),
    tf_env.action_spec(),
    fc_layer_params=(128,),
    continuous_projection_net=normal_projection_net
)
value_net = value_network.ValueNetwork(
    tf_env.observation_spec(),
    fc_layer_params=(40, 5)
)
'''

optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=3.0e-4)

agent = ppo_agent.PPOAgent(
    tf_env.time_step_spec(),
    tf_env.action_spec(),
    optimizer,
    actor_net=actor_net,
    value_net=value_net
)

action_spec BoundedTensorSpec(shape=(1, 2), dtype=tf.float32, name='action_x', minimum=array(0., dtype=float32), maximum=array(1., dtype=float32))
Observations in ActorNet tf.Tensor([], shape=(0, 4, 40), dtype=float32)
Output of neural network for action <tensorflow.python.keras.layers.core.Flatten object at 0x7efc0824ea90> []
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> []
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> []
Instructions for updating:
`AffineScalar` bijector is deprecated; please use `tfb.Shift(loc)(tfb.Scale(...))` instead.


In [9]:
replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    agent.collect_data_spec,
    batch_size=1,
    max_length=2
)
collect_driver = dynamic_step_driver.DynamicStepDriver(
    tf_env,
    agent.collect_policy,
    observers=[replay_buffer.add_batch],
    num_steps=2
)

In [13]:
print('Observations', replay_buffer.gather_all())
collect_driver.run()

Observations Trajectory(step_type=<tf.Tensor: id=784311, shape=(1, 0), dtype=int32, numpy=array([], shape=(1, 0), dtype=int32)>, observation=<tf.Tensor: id=784312, shape=(1, 0, 4, 40), dtype=float32, numpy=array([], shape=(1, 0, 4, 40), dtype=float32)>, action=<tf.Tensor: id=784313, shape=(1, 0, 1, 2), dtype=float32, numpy=array([], shape=(1, 0, 1, 2), dtype=float32)>, policy_info=DictWrapper({'loc': <tf.Tensor: id=784314, shape=(1, 0, 1, 2), dtype=float32, numpy=array([], shape=(1, 0, 1, 2), dtype=float32)>, 'scale': <tf.Tensor: id=784315, shape=(1, 0, 1, 2), dtype=float32, numpy=array([], shape=(1, 0, 1, 2), dtype=float32)>}), next_step_type=<tf.Tensor: id=784316, shape=(1, 0), dtype=int32, numpy=array([], shape=(1, 0), dtype=int32)>, reward=<tf.Tensor: id=784317, shape=(1, 0), dtype=float32, numpy=array([], shape=(1, 0), dtype=float32)>, discount=<tf.Tensor: id=784318, shape=(1, 0), dtype=float32, numpy=array([], shape=(1, 0), dtype=float32)>)
Observations in ActorNet tf.Tensor(
[[[

(TimeStep(step_type=<tf.Tensor: id=784533, shape=(1,), dtype=int32, numpy=array([1], dtype=int32)>, reward=<tf.Tensor: id=784534, shape=(1,), dtype=float32, numpy=array([0.09647059], dtype=float32)>, discount=<tf.Tensor: id=784535, shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>, observation=<tf.Tensor: id=784536, shape=(1, 4, 40), dtype=float32, numpy=
 array([[[1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 1., 1., 0., 0., 1.],
         [0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1.,
          1., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,
          1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
          0., 0., 1., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 

In [11]:
# trajectories = replay_buffer.gather_all()
# total_loss, _ = agent.train(experience=trajectories)
# replay_buffer.clear()

# https://www.mikulskibartosz.name/how-to-train-a-reinforcement-learning-agent-using-tensorflow-agents/
def train_agent():
    dataset = replay_buffer.as_dataset(
        sample_batch_size=100,
        num_steps=10
    )
    
    iterator = iter(dataset)
    
    loss = None
    for _ in range(10):
        trajectories, _ = next(iterator)
        loss = agent.train(experience=trajectories)
    
    print("Training loss:", loss.loss.numpy())
    return loss.loss.numpy()

In [15]:
# train agent 
for i in range(50):
    print('Iteration', i)
    collect_driver.run()
    print('Gathering trajectories...')
    trajectories = replay_buffer.gather_all()
    print('Replay buffer gather all', trajectories)
    total_loss, _ = agent.train(experience=trajectories)
    replay_buffer.clear()
    print('Total', total_loss)

Iteration 0
Observations in ActorNet tf.Tensor(
[[[ 0.         -0.42587212 -1.2093381  -1.1066072  -2.9669387
    0.          0.          1.2060769   0.          0.15988994
    0.          1.1103549   0.         -0.5383446  -0.3588906
   -0.7297873  -0.5830615  -1.0525551   2.66691     0.
   -0.15013991 -0.45707595 -0.5511859  -0.45919695  1.7543349
   -0.18913272 -0.52357084 -1.8185984   0.         -0.54478574
   -0.51913404  0.          0.         -0.36321494  0.
    0.          1.41234     0.         -0.24646446  0.        ]
  [ 0.          0.45192003  0.          4.788521   -0.28990066
    1.5055937   0.         -0.42523494  0.          0.
   -0.6324552  -0.19624195  0.         -0.13601542  0.
   -0.8373201  -1.1601118   3.1169424  -0.39664936 -1.3032991
   -0.36352918 -0.7056376   1.248919    0.8510008   0.
   -0.36349896 -1.7593246   3.3338575  -0.24245687  0.8703022
    1.3123169   0.639009    0.         -1.7593246  -0.6141761
    0.         -0.32545304  0.          0.27382016  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42587212 -1.2093381  -1.1066072  -2.9669387
     0.          0.          1.2060769   0.          0.15988994
     0.          1.1103549   0.         -0.5383446  -0.3588906
    -0.7297873  -0.5830615  -1.0525551   2.66691     0.
    -0.15013991 -0.45707595 -0.5511859  -0.45919695  1.7543349
    -0.18913272 -0.52357084 -1.8185984   0.         -0.54478574
    -0.51913404  0.          0.         -0.36321494  0.
     0.          1.41234     0.         -0.24646446  0.        ]
   [ 0.          0.45192003  0.          4.788521   -0.28990066
     1.5055937   0.         -0.42523494  0.          0.
    -0.6324552  -0.19624195  0.         -0.13601542  0.
    -0.8373201  -1.1601118   3.1169424  -0.39664936 -1.3032991
    -0.36352918 -0.7056376   1.248919    0.8510008   0.
    -0.36349896 -1.7593246   3.3338575  -0.24245687  0.8703022
     1.3123169   0.639009    0.         -1.7593246  -0.6141761
     0.         -0.32545304  0.          0.273820

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42587212 -1.2093381  -1.1066072  -2.9669387
     0.          0.          1.2060769   0.          0.15988994
     0.          1.1103549   0.         -0.5383446  -0.3588906
    -0.7297873  -0.5830615  -1.0525551   2.66691     0.
    -0.15013991 -0.45707595 -0.5511859  -0.45919695  1.7543349
    -0.18913272 -0.52357084 -1.8185984   0.         -0.54478574
    -0.51913404  0.          0.         -0.36321494  0.
     0.          1.41234     0.         -0.24646446  0.        ]
   [ 0.          0.45192003  0.          4.788521   -0.28990066
     1.5055937   0.         -0.42523494  0.          0.
    -0.6324552  -0.19624195  0.         -0.13601542  0.
    -0.8373201  -1.1601118   3.1169424  -0.39664936 -1.3032991
    -0.36352918 -0.7056376   1.248919    0.8510008   0.
    -0.36349896 -1.7593246   3.3338575  -0.24245687  0.8703022
     1.3123169   0.639009    0.         -1.7593246  -0.6141761
     0.         -0.32545304  0.          0.273820

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42587212 -1.2093381  -1.1066072  -2.9669387
     0.          0.          1.2060769   0.          0.15988994
     0.          1.1103549   0.         -0.5383446  -0.3588906
    -0.7297873  -0.5830615  -1.0525551   2.66691     0.
    -0.15013991 -0.45707595 -0.5511859  -0.45919695  1.7543349
    -0.18913272 -0.52357084 -1.8185984   0.         -0.54478574
    -0.51913404  0.          0.         -0.36321494  0.
     0.          1.41234     0.         -0.24646446  0.        ]
   [ 0.          0.45192003  0.          4.788521   -0.28990066
     1.5055937   0.         -0.42523494  0.          0.
    -0.6324552  -0.19624195  0.         -0.13601542  0.
    -0.8373201  -1.1601118   3.1169424  -0.39664936 -1.3032991
    -0.36352918 -0.7056376   1.248919    0.8510008   0.
    -0.36349896 -1.7593246   3.3338575  -0.24245687  0.8703022
     1.3123169   0.639009    0.         -1.7593246  -0.6141761
     0.         -0.32545304  0.          0.273820

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42089003 -1.1802423  -1.0824871  -2.7086718
     0.          0.          1.1771469   0.          0.1582489
     0.          1.0860648   0.         -0.5314758  -0.35486618
    -0.7187498  -0.57533556 -1.0307889   2.4705467   0.
    -0.14860626 -0.4516011  -0.5440763  -0.4536876   1.6858523
    -0.18716055 -0.51696795 -1.7438384   0.         -0.5377968
    -0.51261115  0.          0.         -0.35913008  0.
     0.          1.3712847   0.         -0.24386498  0.        ]
   [ 0.          0.44652843  0.          3.9251776  -0.2867659
     1.4580098   0.         -0.4202627   0.          0.
    -0.6237066  -0.194187    0.         -0.13458821  0.
    -0.8232859  -1.1334862   2.8237684  -0.3920936  -1.2690312
    -0.3594399  -0.69520575  1.2177174   0.8365488   0.
    -0.3594101  -1.6903726  -0.32450768 -0.23988794  0.8552408
     1.2775195   0.6301205   0.         -1.6903726  -0.60581505
     0.         -0.32187176  0.          0.2708876

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42089003 -1.1802423  -1.0824871  -2.7086718
     0.          0.          1.1771469   0.          0.1582489
     0.          1.0860648   0.         -0.5314758  -0.35486618
    -0.7187498  -0.57533556 -1.0307889   2.4705467   0.
    -0.14860626 -0.4516011  -0.5440763  -0.4536876   1.6858523
    -0.18716055 -0.51696795 -1.7438384   0.         -0.5377968
    -0.51261115  0.          0.         -0.35913008  0.
     0.          1.3712847   0.         -0.24386498  0.        ]
   [ 0.          0.44652843  0.          3.9251776  -0.2867659
     1.4580098   0.         -0.4202627   0.          0.
    -0.6237066  -0.194187    0.         -0.13458821  0.
    -0.8232859  -1.1334862   2.8237684  -0.3920936  -1.2690312
    -0.3594399  -0.69520575  1.2177174   0.8365488   0.
    -0.3594101  -1.6903726  -0.32450768 -0.23988794  0.8552408
     1.2775195   0.6301205   0.         -1.6903726  -0.60581505
     0.         -0.32187176  0.          0.2708876

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.42089003 -1.1802423  -1.0824871  -2.7086718
     0.          0.          1.1771469   0.          0.1582489
     0.          1.0860648   0.         -0.5314758  -0.35486618
    -0.7187498  -0.57533556 -1.0307889   2.4705467   0.
    -0.14860626 -0.4516011  -0.5440763  -0.4536876   1.6858523
    -0.18716055 -0.51696795 -1.7438384   0.         -0.5377968
    -0.51261115  0.          0.         -0.35913008  0.
     0.          1.3712847   0.         -0.24386498  0.        ]
   [ 0.          0.44652843  0.          3.9251776  -0.2867659
     1.4580098   0.         -0.4202627   0.          0.
    -0.6237066  -0.194187    0.         -0.13458821  0.
    -0.8232859  -1.1334862   2.8237684  -0.3920936  -1.2690312
    -0.3594399  -0.69520575  1.2177174   0.8365488   0.
    -0.3594101  -1.6903726  -0.32450768 -0.23988794  0.8552408
     1.2775195   0.6301205   0.         -1.6903726  -0.60581505
     0.         -0.32187176  0.          0.2708876

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[1.6970410e+00 7.6911563e-01 1.1784511e+00 3.3540761e-01 5.9960020e-01
  0.0000000e+00 2.6126854e+00 8.8421595e-01 0.0000000e+00 2.4669844e-01
  0.0000000e+00 1.0940495e+00 0.0000000e+00 1.0403763e+00 0.0000000e+00
  0.0000000e+00 0.0000000e+00 0.0000000e+00 3.1652331e+00 1.6177281e+00
  0.0000000e+00 0.0000000e+00 1.7720383e-01 0.0000000e+00 1.1324470e+00
  7.1114469e-01 0.0000000e+00 1.5142560e-03 0.0000000e+00 0.0000000e+00
  1.9031936e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 3.8258234e-01
  0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.09111942 0.         1.6397916  1.6126196  0.        ]]
action [[0.41786608 0.9129008 ]]
New state [[1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4160782  -1.1531545  -1.0598812  -2.507954
     0.          0.          1.1502078   0.          0.1566565
     0.          1.0633049   0.         -0.524863   -0.3509731
    -0.70819885 -0.56790835 -1.010322    2.3120577   0.
    -0.14711785 -0.44631758 -0.5372344  -0.44837114  1.6248244
    -0.18524724 -0.51060814 -1.6776152   0.         -0.53106976
    -0.5063276   0.          0.         -0.3551789   0.
     0.          1.3336207   0.         -0.2413452   0.        ]
   [ 0.          0.44132447  0.          3.4061513  -0.28372937
     1.4146788   0.         -0.4154602   0.          0.
    -0.6153109  -0.19219355  0.         -0.13320266  0.
    -0.80993533 -1.1086172   2.6003535  -0.38769028 -1.2373368
    -0.35548446 -0.6852236   1.1887479   0.8228098   0.
    -0.35545513 -1.6289535  -0.32100657 -0.23739757  0.84093654
    -0.778348    0.62159276  0.         -1.6289535  -0.59778607
     0.         -0.31840494  0.          0.268046

  -0.7948411   1.4060829   0.          0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[1.522953   1.2158449  1.52552    0.5689858  0.06139952 0.
  2.477025   1.2847091  0.         0.4052716  0.         0.71274257
  0.         1.6754874  0.         0.         0.         0.33855957
  2.682629   1.8890365  0.         0.         0.         0.
  1.6825435  1.0003467  0.05877376 0.         0.         0.2994135
  2.0882995  0.         0.         0.         0.31782225 0.
  0.         0.         0.         0.29882658]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.3796865 0.        1.5358696 2.5818722 0.       ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4160782  -1.1531545  -1.0598812  -2.507954
     0.          0.          1.1502078   0.          0.1566565
     0.          1.0633049   0.         -0.524863   -0.3509731
    -0.70819885 -0.56790835 -

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4160782  -1.1531545  -1.0598812  -2.507954
     0.          0.          1.1502078   0.          0.1566565
     0.          1.0633049   0.         -0.524863   -0.3509731
    -0.70819885 -0.56790835 -1.010322    2.3120577   0.
    -0.14711785 -0.44631758 -0.5372344  -0.44837114  1.6248244
    -0.18524724 -0.51060814 -1.6776152   0.         -0.53106976
    -0.5063276   0.          0.         -0.3551789   0.
     0.          1.3336207   0.         -0.2413452   0.        ]
   [ 0.          0.44132447  0.          3.4061513  -0.28372937
     1.4146788   0.         -0.4154602   0.          0.
    -0.6153109  -0.19219355  0.         -0.13320266  0.
    -0.80993533 -1.1086172   2.6003535  -0.38769028 -1.2373368
    -0.35548446 -0.6852236   1.1887479   0.8228098   0.
    -0.35545513 -1.6289535  -0.32100657 -0.23739757  0.84093654
    -0.778348    0.62159276  0.         -1.6289535  -0.59778607
     0.         -0.31840494  0.          0.268046

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[2.1854174  0.7861684  0.5254973  0.         0.17513733 0.
  2.7999022  1.2210221  0.         0.30302453 0.         1.0665923
  0.         0.64565825 0.         0.         0.         0.
  3.3338065  0.8151823  0.         0.         0.19972467 0.
  0.86115193 0.03764898 0.         0.         0.         0.4895085
  2.3707385  0.         0.         0.         0.         0.22158003
  0.         0.         0.         0.25749937]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.        0.        1.6988987 1.4114813 0.       ]]
action [[0.2832602  0.43871012]]
Observations in ActorNet tf.Tensor(
[[[ 0.         -0.4114271  -1.1278539  -1.0386376  -2.3461657
    0.          0.          1.125041    0.          0.15511012
    0.          1.0419209   0.         -0.51849055 -0.34720445
   -0.6980994  -0.5607612  -0.9910299   2.1806488

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4114271  -1.1278539  -1.0386376  -2.3461657
     0.          0.          1.125041    0.          0.15511012
     0.          1.0419209   0.         -0.51849055 -0.34720445
    -0.6980994  -0.5607612  -0.9910299   2.1806488   0.
    -0.14567254 -0.44121456 -0.5306439  -0.44323662 -0.6197325
    -0.18338992 -0.50447685 -1.6184176   0.         -0.5245884
    -0.50026906  0.          0.         -0.3513543   0.
     0.         -0.66800827  0.         -0.23890114  0.        ]
   [ 0.          0.43629766  0.          3.0503378  -0.28078595
     1.375002    0.         -0.4108181   0.          0.
    -0.6072453  -0.19025853  0.         -0.13185675  0.
    -0.79721475 -1.0853196   2.4228034  -0.3834311  -1.207909
    -0.3516558  -0.6756595   1.1617563   0.8097271   0.
    -0.3516268  -1.5737885  -0.31761524 -0.23498179  0.8273275
    -0.766488    0.6134021   0.         -1.5737885  -0.5900678
     0.          3.024448    0.          0.2652912

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4114271  -1.1278539  -1.0386376  -2.3461657
     0.          0.          1.125041    0.          0.15511012
     0.          1.0419209   0.         -0.51849055 -0.34720445
    -0.6980994  -0.5607612  -0.9910299   2.1806488   0.
    -0.14567254 -0.44121456 -0.5306439  -0.44323662 -0.6197325
    -0.18338992 -0.50447685 -1.6184176   0.         -0.5245884
    -0.50026906  0.          0.         -0.3513543   0.
     0.         -0.66800827  0.         -0.23890114  0.        ]
   [ 0.          0.43629766  0.          3.0503378  -0.28078595
     1.375002    0.         -0.4108181   0.          0.
    -0.6072453  -0.19025853  0.         -0.13185675  0.
    -0.79721475 -1.0853196   2.4228034  -0.3834311  -1.207909
    -0.3516558  -0.6756595   1.1617563   0.8097271   0.
    -0.3516268  -1.5737885  -0.31761524 -0.23498179  0.8273275
    -0.766488    0.6134021   0.         -1.5737885  -0.5900678
     0.          3.024448    0.          0.2652912

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4114271  -1.1278539  -1.0386376  -2.3461657
     0.          0.          1.125041    0.          0.15511012
     0.          1.0419209   0.         -0.51849055 -0.34720445
    -0.6980994  -0.5607612  -0.9910299   2.1806488   0.
    -0.14567254 -0.44121456 -0.5306439  -0.44323662 -0.6197325
    -0.18338992 -0.50447685 -1.6184176   0.         -0.5245884
    -0.50026906  0.          0.         -0.3513543   0.
     0.         -0.66800827  0.         -0.23890114  0.        ]
   [ 0.          0.43629766  0.          3.0503378  -0.28078595
     1.375002    0.         -0.4108181   0.          0.
    -0.6072453  -0.19025853  0.         -0.13185675  0.
    -0.79721475 -1.0853196   2.4228034  -0.3834311  -1.207909
    -0.3516558  -0.6756595   1.1617563   0.8097271   0.
    -0.3516268  -1.5737885  -0.31761524 -0.23498179  0.8273275
    -0.766488    0.6134021   0.         -1.5737885  -0.5900678
     0.          3.024448    0.          0.2652912

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.4114271  -1.1278539  -1.0386376  -2.3461657
     0.          0.          1.125041    0.          0.15511012
     0.          1.0419209   0.         -0.51849055 -0.34720445
    -0.6980994  -0.5607612  -0.9910299   2.1806488   0.
    -0.14567254 -0.44121456 -0.5306439  -0.44323662 -0.6197325
    -0.18338992 -0.50447685 -1.6184176   0.         -0.5245884
    -0.50026906  0.          0.         -0.3513543   0.
     0.         -0.66800827  0.         -0.23890114  0.        ]
   [ 0.          0.43629766  0.          3.0503378  -0.28078595
     1.375002    0.         -0.4108181   0.          0.
    -0.6072453  -0.19025853  0.         -0.13185675  0.
    -0.79721475 -1.0853196   2.4228034  -0.3834311  -1.207909
    -0.3516558  -0.6756595   1.1617563   0.8097271   0.
    -0.3516268  -1.5737885  -0.31761524 -0.23498179  0.8273275
    -0.766488    0.6134021   0.         -1.5737885  -0.5900678
     0.          3.024448    0.          0.2652912

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40692794 -1.104152   -1.0186245  -2.212157
     0.          0.          1.10146     0.          0.15360785
     0.          1.0217799   0.         -0.51234424 -0.34355363
    -0.6884202  -0.55387676 -0.9728042   2.0693936   0.
    -0.14426826 -0.4362819  -0.52428955 -0.43827373 -0.6117275
    -0.18158586 -0.49856067 -1.5650854   0.         -0.5183384
    -0.49442232  0.          0.         -0.34764963  0.
     0.         -0.6590017   0.         -0.23652911  0.        ]
   [ 0.          0.4314382   0.          2.786939   -0.27793103
     1.3384928   0.         -0.40632766  0.          0.
    -0.59948874 -0.18837917  0.         -0.13054863  0.
    -0.78507566 -1.0634346   2.2773066  -0.37930834 -1.1804898
    -0.34794718 -0.66648513  1.1365271  -1.1596365   0.
    -0.3479186  -1.5238835  -0.31432804  3.9548273   0.8143593
    -0.7551546   0.6055268   0.         -1.5238835  -0.58264065
     0.          2.7667825   0.          0.262618

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40692794 -1.104152   -1.0186245  -2.212157
     0.          0.          1.10146     0.          0.15360785
     0.          1.0217799   0.         -0.51234424 -0.34355363
    -0.6884202  -0.55387676 -0.9728042   2.0693936   0.
    -0.14426826 -0.4362819  -0.52428955 -0.43827373 -0.6117275
    -0.18158586 -0.49856067 -1.5650854   0.         -0.5183384
    -0.49442232  0.          0.         -0.34764963  0.
     0.         -0.6590017   0.         -0.23652911  0.        ]
   [ 0.          0.4314382   0.          2.786939   -0.27793103
     1.3384928   0.         -0.40632766  0.          0.
    -0.59948874 -0.18837917  0.         -0.13054863  0.
    -0.78507566 -1.0634346   2.2773066  -0.37930834 -1.1804898
    -0.34794718 -0.66648513  1.1365271  -1.1596365   0.
    -0.3479186  -1.5238835  -0.31432804  3.9548273   0.8143593
    -0.7551546   0.6055268   0.         -1.5238835  -0.58264065
     0.          2.7667825   0.          0.262618

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40692794 -1.104152   -1.0186245  -2.212157
     0.          0.          1.10146     0.          0.15360785
     0.          1.0217799   0.         -0.51234424 -0.34355363
    -0.6884202  -0.55387676 -0.9728042   2.0693936   0.
    -0.14426826 -0.4362819  -0.52428955 -0.43827373 -0.6117275
    -0.18158586 -0.49856067 -1.5650854   0.         -0.5183384
    -0.49442232  0.          0.         -0.34764963  0.
     0.         -0.6590017   0.         -0.23652911  0.        ]
   [ 0.          0.4314382   0.          2.786939   -0.27793103
     1.3384928   0.         -0.40632766  0.          0.
    -0.59948874 -0.18837917  0.         -0.13054863  0.
    -0.78507566 -1.0634346   2.2773066  -0.37930834 -1.1804898
    -0.34794718 -0.66648513  1.1365271  -1.1596365   0.
    -0.3479186  -1.5238835  -0.31432804  3.9548273   0.8143593
    -0.7551546   0.6055268   0.         -1.5238835  -0.58264065
     0.          2.7667825   0.          0.262618

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40692794 -1.104152   -1.0186245  -2.212157
     0.          0.          1.10146     0.          0.15360785
     0.          1.0217799   0.         -0.51234424 -0.34355363
    -0.6884202  -0.55387676 -0.9728042   2.0693936   0.
    -0.14426826 -0.4362819  -0.52428955 -0.43827373 -0.6117275
    -0.18158586 -0.49856067 -1.5650854   0.         -0.5183384
    -0.49442232  0.          0.         -0.34764963  0.
     0.         -0.6590017   0.         -0.23652911  0.        ]
   [ 0.          0.4314382   0.          2.786939   -0.27793103
     1.3384928   0.         -0.40632766  0.          0.
    -0.59948874 -0.18837917  0.         -0.13054863  0.
    -0.78507566 -1.0634346   2.2773066  -0.37930834 -1.1804898
    -0.34794718 -0.66648513  1.1365271  -1.1596365   0.
    -0.3479186  -1.5238835  -0.31432804  3.9548273   0.8143593
    -0.7551546   0.6055268   0.         -1.5238835  -0.58264065
     0.          2.7667825   0.          0.262618

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40257266 -1.0818868  -0.9997273  -2.0987914
     0.          0.          1.0793049   0.          0.15214777
     0.          1.0027654   0.         -0.506411   -0.3400146
    -0.6791327  -0.5472395  -0.95555013  1.9736168   0.
    -0.14290309 -0.43151033 -0.5181576  -0.43347317 -0.60402447
    -0.1798326  -0.4928472  -1.5167093   0.         -0.5123059
    -0.48877525  0.          0.         -0.3440587   0.
     0.         -0.6503499   0.         -0.23422557  0.        ]
   [ 0.          0.42673683  0.          2.5818262  -0.27516022
     1.3047509   0.         -0.40198064  0.          0.
    -0.5920216  -0.1865528   0.         -0.12927648  0.
    -0.7734752  -1.0428247   2.1552505  -0.37531492 -1.1548604
    -0.34435248 -0.6576747   1.1128762  -1.1349611   0.
    -0.34432426 -1.4784515  -0.31113973  3.450276   -1.2029746
    -0.7443099  -1.457496    0.         -1.4784515  -0.5754868
     0.          2.5654817   0.          0.260023

  -0.75955683  1.2976197   0.          0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[2.2241745  1.0872582  0.28677997 0.9825479  0.09631099 0.
  2.4235775  2.516789   0.         0.         0.         0.10405071
  0.         0.80152893 0.         0.         0.         0.59891796
  3.1223254  0.45184708 0.         1.1468278  0.18512765 0.
  1.3182864  0.         0.6501318  0.19401467 0.         0.
  1.8452532  0.         0.         0.         0.         0.9916009
  0.         0.14594221 0.         0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.         0.         0.54283375 2.0874195  0.        ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40257266 -1.0818868  -0.9997273  -2.0987914
     0.          0.          1.0793049   0.          0.15214777
     0.          1.0027654   0.         -0.506411   -0.3400146
    -0.6791327  -0.547

  -0.75955683  1.2976197   0.          0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[2.2241745  1.0872582  0.28677997 0.9825479  0.09631099 0.
  2.4235775  2.516789   0.         0.         0.         0.10405071
  0.         0.80152893 0.         0.         0.         0.59891796
  3.1223254  0.45184708 0.         1.1468278  0.18512765 0.
  1.3182864  0.         0.6501318  0.19401467 0.         0.
  1.8452532  0.         0.         0.         0.         0.9916009
  0.         0.14594221 0.         0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.         0.         0.54283375 2.0874195  0.        ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40257266 -1.0818868  -0.9997273  -2.0987914
     0.          0.          1.0793049   0.          0.15214777
     0.          1.0027654   0.         -0.506411   -0.3400146
    -0.6791327  -0.547

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.40257266 -1.0818868  -0.9997273  -2.0987914
     0.          0.          1.0793049   0.          0.15214777
     0.          1.0027654   0.         -0.506411   -0.3400146
    -0.6791327  -0.5472395  -0.95555013  1.9736168   0.
    -0.14290309 -0.43151033 -0.5181576  -0.43347317 -0.60402447
    -0.1798326  -0.4928472  -1.5167093   0.         -0.5123059
    -0.48877525  0.          0.         -0.3440587   0.
     0.         -0.6503499   0.         -0.23422557  0.        ]
   [ 0.          0.42673683  0.          2.5818262  -0.27516022
     1.3047509   0.         -0.40198064  0.          0.
    -0.5920216  -0.1865528   0.         -0.12927648  0.
    -0.7734752  -1.0428247   2.1552505  -0.37531492 -1.1548604
    -0.34435248 -0.6576747   1.1128762  -1.1349611   0.
    -0.34432426 -1.4784515  -0.31113973  3.450276   -1.2029746
    -0.7443099  -1.457496    0.         -1.4784515  -0.5754868
     0.          2.5654817   0.          0.260023

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39835376 -1.0609188  -0.98184603 -2.001261
     0.          0.          1.0584376   0.         -4.069639
     0.          0.9847761   0.         -0.5006787  -0.33658183
    -0.6702114  -0.54083467 -0.93918425  1.8900325   0.
    -0.14157528 -0.4268913  -0.5122354  -0.42882624 -0.59660494
    -0.17812778 -0.48732522 -1.4725664   0.         -0.50647885
    -0.48331675  0.          0.         -0.3405758   0.
     0.         -0.64203024  0.         -0.23198733  0.        ]
   [ 0.          0.42218494  0.          2.4162538  -0.27246946
     1.2734431   0.         -0.39776963  0.          0.
    -0.5848264  -0.18477704  0.         -0.12803878  0.
    -0.76237464 -1.0233701   2.050949   -0.37144417 -1.130834
    -0.34086597 -0.6492046   1.0906456  -1.1117997   0.
    -0.34083807 -1.4368622  -0.30804545  3.099918   -1.176762
    -0.73391944 -1.4172603   0.         -1.4368622  -0.5685896
     0.          2.4025989   0.          0.25750375 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39835376 -1.0609188  -0.98184603 -2.001261
     0.          0.          1.0584376   0.         -4.069639
     0.          0.9847761   0.         -0.5006787  -0.33658183
    -0.6702114  -0.54083467 -0.93918425  1.8900325   0.
    -0.14157528 -0.4268913  -0.5122354  -0.42882624 -0.59660494
    -0.17812778 -0.48732522 -1.4725664   0.         -0.50647885
    -0.48331675  0.          0.         -0.3405758   0.
     0.         -0.64203024  0.         -0.23198733  0.        ]
   [ 0.          0.42218494  0.          2.4162538  -0.27246946
     1.2734431   0.         -0.39776963  0.          0.
    -0.5848264  -0.18477704  0.         -0.12803878  0.
    -0.76237464 -1.0233701   2.050949   -0.37144417 -1.130834
    -0.34086597 -0.6492046   1.0906456  -1.1117997   0.
    -0.34083807 -1.4368622  -0.30804545  3.099918   -1.176762
    -0.73391944 -1.4172603   0.         -1.4368622  -0.5685896
     0.          2.4025989   0.          0.25750375 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39835376 -1.0609188  -0.98184603 -2.001261
     0.          0.          1.0584376   0.         -4.069639
     0.          0.9847761   0.         -0.5006787  -0.33658183
    -0.6702114  -0.54083467 -0.93918425  1.8900325   0.
    -0.14157528 -0.4268913  -0.5122354  -0.42882624 -0.59660494
    -0.17812778 -0.48732522 -1.4725664   0.         -0.50647885
    -0.48331675  0.          0.         -0.3405758   0.
     0.         -0.64203024  0.         -0.23198733  0.        ]
   [ 0.          0.42218494  0.          2.4162538  -0.27246946
     1.2734431   0.         -0.39776963  0.          0.
    -0.5848264  -0.18477704  0.         -0.12803878  0.
    -0.76237464 -1.0233701   2.050949   -0.37144417 -1.130834
    -0.34086597 -0.6492046   1.0906456  -1.1117997   0.
    -0.34083807 -1.4368622  -0.30804545  3.099918   -1.176762
    -0.73391944 -1.4172603   0.         -1.4368622  -0.5685896
     0.          2.4025989   0.          0.25750375 

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.        0.        0.4373415 0.5647204 0.       ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39426407 -1.0411268  -0.9648926  -1.9161917
     0.          0.          1.0387374   0.         -3.5432243
     0.          0.9677228   0.         -0.49513632 -0.33325008
    -0.6616325  -0.5346492  -0.9236325   1.8162551   0.
    -0.14028311 -0.42241675 -0.5065112  -0.42432487 -0.58945215
    -0.17646916 -0.4819842  -1.4320726   0.         -0.5008457
    -0.4780366   0.          0.         -0.3371957   0.
     0.         -0.63402176  0.         -0.22981133  0.        ]
   [ 0.          0.41777515  0.          2.2789593  -0.2698549
     1.2442901   0.         -0.39368773  0.          0.
    -0.5778871  -0.18304947  0.         -0.1268339   0.
    -0.75173897 -1.0049675   1.9604769  -0.36768982 -1.1082498
     2.8685997  -0.6410535   1.0696985  -1.0900033   0.
    -0.33745474 -

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39426407 -1.0411268  -0.9648926  -1.9161917
     0.          0.          1.0387374   0.         -3.5432243
     0.          0.9677228   0.         -0.49513632 -0.33325008
    -0.6616325  -0.5346492  -0.9236325   1.8162551   0.
    -0.14028311 -0.42241675 -0.5065112  -0.42432487 -0.58945215
    -0.17646916 -0.4819842  -1.4320726   0.         -0.5008457
    -0.4780366   0.          0.         -0.3371957   0.
     0.         -0.63402176  0.         -0.22981133  0.        ]
   [ 0.          0.41777515  0.          2.2789593  -0.2698549
     1.2442901   0.         -0.39368773  0.          0.
    -0.5778871  -0.18304947  0.         -0.1268339   0.
    -0.75173897 -1.0049675   1.9604769  -0.36768982 -1.1082498
     2.8685997  -0.6410535   1.0696985  -1.0900033   0.
    -0.33745474 -1.3986021  -0.30504057  2.8384361  -1.1521944
    -0.7239525  -1.3801885   0.         -1.3986021  -0.5619344
     0.          2.2672882   0.          0.2550542

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39426407 -1.0411268  -0.9648926  -1.9161917
     0.          0.          1.0387374   0.         -3.5432243
     0.          0.9677228   0.         -0.49513632 -0.33325008
    -0.6616325  -0.5346492  -0.9236325   1.8162551   0.
    -0.14028311 -0.42241675 -0.5065112  -0.42432487 -0.58945215
    -0.17646916 -0.4819842  -1.4320726   0.         -0.5008457
    -0.4780366   0.          0.         -0.3371957   0.
     0.         -0.63402176  0.         -0.22981133  0.        ]
   [ 0.          0.41777515  0.          2.2789593  -0.2698549
     1.2442901   0.         -0.39368773  0.          0.
    -0.5778871  -0.18304947  0.         -0.1268339   0.
    -0.75173897 -1.0049675   1.9604769  -0.36768982 -1.1082498
     2.8685997  -0.6410535   1.0696985  -1.0900033   0.
    -0.33745474 -1.3986021  -0.30504057  2.8384361  -1.1521944
    -0.7239525  -1.3801885   0.         -1.3986021  -0.5619344
     0.          2.2672882   0.          0.2550542

Total tf.Tensor(0.0, shape=(), dtype=float32)
Iteration 8
Observations in ActorNet tf.Tensor(
[[[ 0.         -0.39029735 -1.0224046  -0.9487893  -1.8411375
    0.          0.          1.0200996   0.         -3.1794603
    0.          0.95152736  0.         -0.4897735  -0.3300144
   -0.6533748  -0.5286707  -0.9088297   1.7505056   0.
   -0.13902505 -0.41807923 -0.50097424 -0.41996166 -0.5825501
   -0.17485471 -0.47681442 -1.3947512   0.         -0.4953958
   -0.47292528  0.          0.         -0.3339133   0.
    0.         -0.6263057   0.         -0.22769469  0.        ]
  [ 0.          0.41350007  0.          2.1627123  -0.26731312
    1.2170551   0.         -0.3897284   0.          0.
   -0.5711888  -0.18136802  0.         -0.12566042  0.
   -0.7415365  -0.9875246   1.8810246  -0.36404625 -1.0869693
    2.6581326  -0.6332019   1.0499156  -1.0694424   0.
   -0.3341693  -1.3632493  -0.30212095 -0.36148328 -1.1291069
   -0.71438116 -1.3458865   0.         -1.3632493  -0.555507
    0.   

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39029735 -1.0224046  -0.9487893  -1.8411375
     0.          0.          1.0200996   0.         -3.1794603
     0.          0.95152736  0.         -0.4897735  -0.3300144
    -0.6533748  -0.5286707  -0.9088297   1.7505056   0.
    -0.13902505 -0.41807923 -0.50097424 -0.41996166 -0.5825501
    -0.17485471 -0.47681442 -1.3947512   0.         -0.4953958
    -0.47292528  0.          0.         -0.3339133   0.
     0.         -0.6263057   0.         -0.22769469  0.        ]
   [ 0.          0.41350007  0.          2.1627123  -0.26731312
     1.2170551   0.         -0.3897284   0.          0.
    -0.5711888  -0.18136802  0.         -0.12566042  0.
    -0.7415365  -0.9875246   1.8810246  -0.36404625 -1.0869693
     2.6581326  -0.6332019   1.0499156  -1.0694424   0.
    -0.3341693  -1.3632493  -0.30212095 -0.36148328 -1.1291069
    -0.71438116 -1.3458865   0.         -1.3632493  -0.555507
     0.          2.1525564   0.          0.25267267 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39029735 -1.0224046  -0.9487893  -1.8411375
     0.          0.          1.0200996   0.         -3.1794603
     0.          0.95152736  0.         -0.4897735  -0.3300144
    -0.6533748  -0.5286707  -0.9088297   1.7505056   0.
    -0.13902505 -0.41807923 -0.50097424 -0.41996166 -0.5825501
    -0.17485471 -0.47681442 -1.3947512   0.         -0.4953958
    -0.47292528  0.          0.         -0.3339133   0.
     0.         -0.6263057   0.         -0.22769469  0.        ]
   [ 0.          0.41350007  0.          2.1627123  -0.26731312
     1.2170551   0.         -0.3897284   0.          0.
    -0.5711888  -0.18136802  0.         -0.12566042  0.
    -0.7415365  -0.9875246   1.8810246  -0.36404625 -1.0869693
     2.6581326  -0.6332019   1.0499156  -1.0694424   0.
    -0.3341693  -1.3632493  -0.30212095 -0.36148328 -1.1291069
    -0.71438116 -1.3458865   0.         -1.3632493  -0.555507
     0.          2.1525564   0.          0.25267267 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.39029735 -1.0224046  -0.9487893  -1.8411375
     0.          0.          1.0200996   0.         -3.1794603
     0.          0.95152736  0.         -0.4897735  -0.3300144
    -0.6533748  -0.5286707  -0.9088297   1.7505056   0.
    -0.13902505 -0.41807923 -0.50097424 -0.41996166 -0.5825501
    -0.17485471 -0.47681442 -1.3947512   0.         -0.4953958
    -0.47292528  0.          0.         -0.3339133   0.
     0.         -0.6263057   0.         -0.22769469  0.        ]
   [ 0.          0.41350007  0.          2.1627123  -0.26731312
     1.2170551   0.         -0.3897284   0.          0.
    -0.5711888  -0.18136802  0.         -0.12566042  0.
    -0.7415365  -0.9875246   1.8810246  -0.36404625 -1.0869693
     2.6581326  -0.6332019   1.0499156  -1.0694424   0.
    -0.3341693  -1.3632493  -0.30212095 -0.36148328 -1.1291069
    -0.71438116 -1.3458865   0.         -1.3632493  -0.555507
     0.          2.1525564   0.          0.25267267 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.38644743 -1.0046586  -0.9334674  -1.7742758
     0.          0.          1.002432    0.         -2.9088147
     0.         -1.0364181   0.         -0.4845807  -0.32687032
    -0.6454185  -0.522888   -0.89471734  1.691426    0.
    -0.13779958 -0.41387206 -0.4956144  -0.4157296  -0.57588476
    -0.17328247 -0.4718069  -1.3602085   0.         -0.49011955
    -0.46797383  0.          0.         -0.330724    0.
     0.         -0.6188645   0.         -0.22563481  0.        ]
   [ 0.          0.40935278  0.          2.0626326  -0.26484075
     1.1915369   0.         -0.3858856   0.          0.
    -0.56471777 -0.17973062  0.         -0.124517    0.
    -0.7317386  -0.9709611   1.8105229  -0.36050802 -1.0668713
     2.4881253  -0.62563175  1.0311928  -1.050005    0.
    -0.33097702 -1.3304533  -0.29928264 -0.35797608 -1.107356
    -0.7051795  -1.3140259   0.         -1.3304533  -0.5492949
     0.          2.0536656   0.          0.250355

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.38644743 -1.0046586  -0.9334674  -1.7742758
     0.          0.          1.002432    0.         -2.9088147
     0.         -1.0364181   0.         -0.4845807  -0.32687032
    -0.6454185  -0.522888   -0.89471734  1.691426    0.
    -0.13779958 -0.41387206 -0.4956144  -0.4157296  -0.57588476
    -0.17328247 -0.4718069  -1.3602085   0.         -0.49011955
    -0.46797383  0.          0.         -0.330724    0.
     0.         -0.6188645   0.         -0.22563481  0.        ]
   [ 0.          0.40935278  0.          2.0626326  -0.26484075
     1.1915369   0.         -0.3858856   0.          0.
    -0.56471777 -0.17973062  0.         -0.124517    0.
    -0.7317386  -0.9709611   1.8105229  -0.36050802 -1.0668713
     2.4881253  -0.62563175  1.0311928  -1.050005    0.
    -0.33097702 -1.3304533  -0.29928264 -0.35797608 -1.107356
    -0.7051795  -1.3140259   0.         -1.3304533  -0.5492949
     0.          2.0536656   0.          0.250355

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.38644743 -1.0046586  -0.9334674  -1.7742758
     0.          0.          1.002432    0.         -2.9088147
     0.         -1.0364181   0.         -0.4845807  -0.32687032
    -0.6454185  -0.522888   -0.89471734  1.691426    0.
    -0.13779958 -0.41387206 -0.4956144  -0.4157296  -0.57588476
    -0.17328247 -0.4718069  -1.3602085   0.         -0.49011955
    -0.46797383  0.          0.         -0.330724    0.
     0.         -0.6188645   0.         -0.22563481  0.        ]
   [ 0.          0.40935278  0.          2.0626326  -0.26484075
     1.1915369   0.         -0.3858856   0.          0.
    -0.56471777 -0.17973062  0.         -0.124517    0.
    -0.7317386  -0.9709611   1.8105229  -0.36050802 -1.0668713
     2.4881253  -0.62563175  1.0311928  -1.050005    0.
    -0.33097702 -1.3304533  -0.29928264 -0.35797608 -1.107356
    -0.7051795  -1.3140259   0.         -1.3304533  -0.5492949
     0.          2.0536656   0.          0.250355

  -0.70993716  1.161992    0.          0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[2.229027   0.4929905  1.1919398  0.         0.14957373 0.11834671
  3.169306   1.3832672  0.         1.1296012  0.45518115 0.7263922
  0.         0.         0.         1.8443114  1.6313342  1.6815553
  4.218713   0.69764274 1.465347   2.1744857  0.10831617 0.
  0.8538426  0.         2.3980653  1.2669486  0.64242846 0.
  1.010706   2.3779721  0.         0.         0.45917037 2.8850281
  0.         2.667912   1.3384036  0.18308921]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.        0.        1.1524048 1.0425512 0.       ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3827087  -0.98780733 -0.9188655  -1.7142165
     0.          0.          0.9856528   0.         -2.6973224
     0.         -1.0184822   0.         -0.47954923 -0.3238136
    -0.637746   -0.517

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3827087  -0.98780733 -0.9188655  -1.7142165
     0.          0.          0.9856528   0.         -2.6973224
     0.         -1.0184822   0.         -0.47954923 -0.3238136
    -0.637746   -0.5172904  -0.8812434   1.6379595   0.
    -0.13660531 -0.40978873 -0.4904225  -0.4116224  -0.56944263
    -0.1717507  -0.4669533  -1.3281155   0.         -0.48500785
    -0.4631742   0.          0.         -0.32762352  0.
     0.         -0.61168224  0.         -0.2236292   0.        ]
   [ 0.          0.40532732  0.          1.9752891  -0.26243457
     1.1675617   0.         -0.38215378  5.          0.
    -0.5584615  -0.17813542  0.         -0.12340237  0.
    -0.7223192  -0.95520496 -0.5318198  -0.3570702  -1.0478501
     2.3470802  -0.6183267   1.0134388  -1.0315919   0.
    -0.32787368 -1.2999197  -0.29652184 -0.35456824 -1.0868175
    -0.6963246  -1.2843295   0.         -1.2999197  -0.54328614
     0.          1.9672768   0.          0.24810

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3827087  -0.98780733 -0.9188655  -1.7142165
     0.          0.          0.9856528   0.         -2.6973224
     0.         -1.0184822   0.         -0.47954923 -0.3238136
    -0.637746   -0.5172904  -0.8812434   1.6379595   0.
    -0.13660531 -0.40978873 -0.4904225  -0.4116224  -0.56944263
    -0.1717507  -0.4669533  -1.3281155   0.         -0.48500785
    -0.4631742   0.          0.         -0.32762352  0.
     0.         -0.61168224  0.         -0.2236292   0.        ]
   [ 0.          0.40532732  0.          1.9752891  -0.26243457
     1.1675617   0.         -0.38215378  5.          0.
    -0.5584615  -0.17813542  0.         -0.12340237  0.
    -0.7223192  -0.95520496 -0.5318198  -0.3570702  -1.0478501
     2.3470802  -0.6183267   1.0134388  -1.0315919   0.
    -0.32787368 -1.2999197  -0.29652184 -0.35456824 -1.0868175
    -0.6963246  -1.2843295   0.         -1.2999197  -0.54328614
     0.          1.9672768   0.          0.24810

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[2.3572416  0.5985371  1.5866063  0.         0.5426415  0.40876386
  3.0866401  1.7624881  0.         1.1995952  0.9901674  0.44328454
  0.         0.         0.         1.9151473  1.5800855  2.1423988
  4.072989   0.86272585 0.87028897 2.131793   0.8024809  0.
  0.7459984  0.         2.4758844  1.3465735  0.67866963 0.
  1.1075135  2.3691647  0.         0.         1.1166943  2.9283333
  0.         2.6806667  1.4703922  0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.26502568 0.1422235  0.6277739  1.403796   0.        ]]
action [[0.8678484  0.07899526]]
New state [[1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0.]


Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37907583 -0.9717778  -0.90492874 -1.6598788
     0.          0.          0.9696901   0.         -2.5261645
     0.         -1.0014477   0.         -0.47467077 -0.32084018
    -0.63034046 -0.51186836 -0.8683611   1.5892696   0.
    -0.13544095 -0.4058232  -0.48538992 -0.40763375 -0.56321174
    -0.17025757 -0.46224588 -1.298196    0.         -0.4800524
     2.0841756   0.          0.         -0.32460776  0.
     0.         -0.60474426  0.         -0.22167541  0.        ]
   [ 0.          0.40141773  0.          1.8981907  -0.26009178
     1.1449802   0.         -0.37852758  5.          0.
    -0.5524082  -0.17658055  0.         -0.1223153   0.
    -0.71325445 -0.94019294 -0.5261812  -0.35372812 -1.0298128
     2.2276103  -0.6112715   0.99657255 -1.014116    0.
    -0.3248551  -1.2714001  -0.29383507 -0.35125512 -1.0673828
    -0.68779504 -1.2565631   0.         -1.2714001  -0.53747004
     0.          1.8909593   0.          0.24590

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37907583 -0.9717778  -0.90492874 -1.6598788
     0.          0.          0.9696901   0.         -2.5261645
     0.         -1.0014477   0.         -0.47467077 -0.32084018
    -0.63034046 -0.51186836 -0.8683611   1.5892696   0.
    -0.13544095 -0.4058232  -0.48538992 -0.40763375 -0.56321174
    -0.17025757 -0.46224588 -1.298196    0.         -0.4800524
     2.0841756   0.          0.         -0.32460776  0.
     0.         -0.60474426  0.         -0.22167541  0.        ]
   [ 0.          0.40141773  0.          1.8981907  -0.26009178
     1.1449802   0.         -0.37852758  5.          0.
    -0.5524082  -0.17658055  0.         -0.1223153   0.
    -0.71325445 -0.94019294 -0.5261812  -0.35372812 -1.0298128
     2.2276103  -0.6112715   0.99657255 -1.014116    0.
    -0.3248551  -1.2714001  -0.29383507 -0.35125512 -1.0673828
    -0.68779504 -1.2565631   0.         -1.2714001  -0.53747004
     0.          1.8909593   0.          0.24590

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37907583 -0.9717778  -0.90492874 -1.6598788
     0.          0.          0.9696901   0.         -2.5261645
     0.         -1.0014477   0.         -0.47467077 -0.32084018
    -0.63034046 -0.51186836 -0.8683611   1.5892696   0.
    -0.13544095 -0.4058232  -0.48538992 -0.40763375 -0.56321174
    -0.17025757 -0.46224588 -1.298196    0.         -0.4800524
     2.0841756   0.          0.         -0.32460776  0.
     0.         -0.60474426  0.         -0.22167541  0.        ]
   [ 0.          0.40141773  0.          1.8981907  -0.26009178
     1.1449802   0.         -0.37852758  5.          0.
    -0.5524082  -0.17658055  0.         -0.1223153   0.
    -0.71325445 -0.94019294 -0.5261812  -0.35372812 -1.0298128
     2.2276103  -0.6112715   0.99657255 -1.014116    0.
    -0.3248551  -1.2714001  -0.29383507 -0.35125512 -1.0673828
    -0.68779504 -1.2565631   0.         -1.2714001  -0.53747004
     0.          1.8909593   0.          0.24590

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37554395 -0.9565052  -0.8916083  -1.6104085
     0.          0.          0.9544797   0.         -2.3839602
     0.         -0.9852415   0.         -0.4699377  -0.31794634
    -0.6231868  -0.50661284 -0.85602814  1.5446852   0.
    -0.13430525 -0.4019699  -0.48050848 -0.4037583  -0.55718046
    -0.16880156 -0.45767745 -1.2702144   0.         -0.47524524
     1.9973065   0.          0.         -0.3216729   0.
     0.         -0.5980369   0.         -0.21977137  0.        ]
   [ 0.          0.3976183   0.         -0.5335981  -0.25780958
     1.1236624   0.         -0.37500212  5.          0.
    -0.54654723 -0.17506437  0.         -0.12125471  0.
    -0.70452255 -0.92586803 -0.5207178  -0.3504773  -1.0126774
     2.124722   -0.60445213  0.98052245 -0.9975006   0.
    -0.32191753 -1.2446814  -0.29121912 -0.3480324  -1.0489564
    -0.6795714  -1.2305259   0.         -1.2446814  -0.5318363
     0.          1.822897    0.         -3.53457

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37554395 -0.9565052  -0.8916083  -1.6104085
     0.          0.          0.9544797   0.         -2.3839602
     0.         -0.9852415   0.         -0.4699377  -0.31794634
    -0.6231868  -0.50661284 -0.85602814  1.5446852   0.
    -0.13430525 -0.4019699  -0.48050848 -0.4037583  -0.55718046
    -0.16880156 -0.45767745 -1.2702144   0.         -0.47524524
     1.9973065   0.          0.         -0.3216729   0.
     0.         -0.5980369   0.         -0.21977137  0.        ]
   [ 0.          0.3976183   0.         -0.5335981  -0.25780958
     1.1236624   0.         -0.37500212  5.          0.
    -0.54654723 -0.17506437  0.         -0.12125471  0.
    -0.70452255 -0.92586803 -0.5207178  -0.3504773  -1.0126774
     2.124722   -0.60445213  0.98052245 -0.9975006   0.
    -0.32191753 -1.2446814  -0.29121912 -0.3480324  -1.0489564
    -0.6795714  -1.2305259   0.         -1.2446814  -0.5318363
     0.          1.822897    0.         -3.53457

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[1.9469908  0.         1.9566349  0.         0.64259493 0.
  2.1510892  1.4522457  0.         0.573723   1.125987   0.
  0.         1.233039   0.         2.2841458  1.2343569  2.3921359
  3.1433904  0.6295048  0.         2.7656264  0.12262142 0.
  0.9568707  0.         2.0192254  0.38495332 0.         0.
  1.6121799  1.6261559  0.         0.         0.2771145  3.1331367
  0.1576812  2.6612263  0.8322118  0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.8810674  0.         0.39067823 1.2448767  0.        ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37554395 -0.9565052  -0.8916083  -1.6104085
     0.          0.          0.9544797   0.         -2.3839602
     0.         -0.9852415   0.         -0.4699377  -0.31794634
    -0.6231868  -0.50661284 -0.85602814  1.5446852   0.
    -0.13430525 -0.4019699 

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[1.799267  0.        0.6237479 1.1051427 0.3975128]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37210852 -0.9419314  -0.87885994 -1.5651197
     0.          0.          0.93996406  0.         -2.2633708
     0.         -0.9697984   0.         -0.46534303 -0.31512868
    -0.6162712  -0.5015155  -0.84420687  1.5036596   0.
    -0.13319711 -0.39822382 -0.47577104 -0.39999062 -0.5513386
    -0.16738108 -0.4532413  -1.2439705   0.         -0.4705792
     1.9204779   0.          0.         -0.31881547  0.
     0.         -0.59154767  0.         -0.21791494  0.        ]
   [ 0.          0.39392424  0.         -0.52811134 -0.2555855
     1.1034943   0.         -0.37157288  4.3885565   0.
    -0.5408687  -0.17358531  0.         -0.1202195   0.
    -0.6961038  -0.91217935 -0.51542073 -0.3473138  -0.9963711
     2.0349035  -0.5978559   0.96522474 -0.98167723  0.
    -0.31905743  0

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37210852 -0.9419314  -0.87885994 -1.5651197
     0.          0.          0.93996406  0.         -2.2633708
     0.         -0.9697984   0.         -0.46534303 -0.31512868
    -0.6162712  -0.5015155  -0.84420687  1.5036596   0.
    -0.13319711 -0.39822382 -0.47577104 -0.39999062 -0.5513386
    -0.16738108 -0.4532413  -1.2439705   0.         -0.4705792
     1.9204779   0.          0.         -0.31881547  0.
     0.         -0.59154767  0.         -0.21791494  0.        ]
   [ 0.          0.39392424  0.         -0.52811134 -0.2555855
     1.1034943   0.         -0.37157288  4.3885565   0.
    -0.5408687  -0.17358531  0.         -0.1202195   0.
    -0.6961038  -0.91217935 -0.51542073 -0.3473138  -0.9963711
     2.0349035  -0.5978559   0.96522474 -0.98167723  0.
    -0.31905743  0.7505121  -0.28867096 -0.34489608 -1.0314537
    -0.6716357  -1.2060454  -5.         -1.2195823  -0.5263756
     0.          1.7617009   0.         -3.1985874 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.37210852 -0.9419314  -0.87885994 -1.5651197
     0.          0.          0.93996406  0.         -2.2633708
     0.         -0.9697984   0.         -0.46534303 -0.31512868
    -0.6162712  -0.5015155  -0.84420687  1.5036596   0.
    -0.13319711 -0.39822382 -0.47577104 -0.39999062 -0.5513386
    -0.16738108 -0.4532413  -1.2439705   0.         -0.4705792
     1.9204779   0.          0.         -0.31881547  0.
     0.         -0.59154767  0.         -0.21791494  0.        ]
   [ 0.          0.39392424  0.         -0.52811134 -0.2555855
     1.1034943   0.         -0.37157288  4.3885565   0.
    -0.5408687  -0.17358531  0.         -0.1202195   0.
    -0.6961038  -0.91217935 -0.51542073 -0.3473138  -0.9963711
     2.0349035  -0.5978559   0.96522474 -0.98167723  0.
    -0.31905743  0.7505121  -0.28867096 -0.34489608 -1.0314537
    -0.6716357  -1.2060454  -5.         -1.2195823  -0.5263756
     0.          1.7617009   0.         -3.1985874 

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[3.031381  0.        0.        1.8912013 0.       ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3687651  -0.9280051  -0.8666436  -1.523454
     0.          0.          0.926092    0.         -2.1594229
     0.         -0.95506036  0.         -0.46087998 -0.3123838
    -0.6095805  -0.49656856 -0.8328623   1.4657431   0.
    -0.13211535 -0.39457983 -0.47117049 -0.39632583  1.7825425
    -0.16599476 -0.44893107 -1.2192913   0.         -0.46604747
     1.8518945   0.          0.         -0.31603193  0.
     0.         -0.5852649   0.         -0.21610413  0.        ]
   [ 0.          0.3903308   0.         -0.5227899  -0.25341693
     1.0843761   0.         -0.36823556 -0.25399908  0.
    -0.5353631  -0.17214182  0.         -0.11920865  0.
    -0.6879796  -0.8990811  -0.5102817  -0.34423363 -0.98082876
     1.9556023  -0.59147084  0.9506222  -0.9665843   0.
    -0.31627145  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3687651  -0.9280051  -0.8666436  -1.523454
     0.          0.          0.926092    0.         -2.1594229
     0.         -0.95506036  0.         -0.46087998 -0.3123838
    -0.6095805  -0.49656856 -0.8328623   1.4657431   0.
    -0.13211535 -0.39457983 -0.47117049 -0.39632583  1.7825425
    -0.16599476 -0.44893107 -1.2192913   0.         -0.46604747
     1.8518945   0.          0.         -0.31603193  0.
     0.         -0.5852649   0.         -0.21610413  0.        ]
   [ 0.          0.3903308   0.         -0.5227899  -0.25341693
     1.0843761   0.         -0.36823556 -0.25399908  0.
    -0.5353631  -0.17214182  0.         -0.11920865  0.
    -0.6879796  -0.8990811  -0.5102817  -0.34423363 -0.98082876
     1.9556023  -0.59147084  0.9506222  -0.9665843   0.
    -0.31627145  0.7412964  -0.28618753 -0.34184226 -1.0148001
    -0.6639714  -1.1829724  -5.         -1.195945   -0.5210794
     0.          1.7062893   0.         -2.9432395

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3687651  -0.9280051  -0.8666436  -1.523454
     0.          0.          0.926092    0.         -2.1594229
     0.         -0.95506036  0.         -0.46087998 -0.3123838
    -0.6095805  -0.49656856 -0.8328623   1.4657431   0.
    -0.13211535 -0.39457983 -0.47117049 -0.39632583  1.7825425
    -0.16599476 -0.44893107 -1.2192913   0.         -0.46604747
     1.8518945   0.          0.         -0.31603193  0.
     0.         -0.5852649   0.         -0.21610413  0.        ]
   [ 0.          0.3903308   0.         -0.5227899  -0.25341693
     1.0843761   0.         -0.36823556 -0.25399908  0.
    -0.5353631  -0.17214182  0.         -0.11920865  0.
    -0.6879796  -0.8990811  -0.5102817  -0.34423363 -0.98082876
     1.9556023  -0.59147084  0.9506222  -0.9665843   0.
    -0.31627145  0.7412964  -0.28618753 -0.34184226 -1.0148001
    -0.6639714  -1.1829724  -5.         -1.195945   -0.5210794
     0.          1.7062893   0.         -2.9432395

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36550972 -0.91467947 -0.85492337 -1.4849529
     0.          0.          0.912817    0.         -2.0686097
     0.         -0.940975    0.         -0.45654255 -0.30970865
    -0.6031029  -0.4917646  -0.82196355  1.4305615   0.
    -0.13105898 -0.39103344 -0.4667003  -0.39275935  1.7263699
    -0.1646412  -0.44474098 -1.1960272   0.         -0.46164364
     1.7901789   0.          0.         -0.31331933  0.
     0.         -0.57917804  0.         -0.21433717  0.        ]
   [ 0.          0.3868332   0.         -0.51762575 -0.2513017
     1.0662197   0.         -0.36498597 -0.25187826  0.
    -0.53002197 -0.17073256  0.         -0.11822124  0.
    -0.6801334  -0.8865319  -0.5052929  -0.34123328 -0.9659927
     1.8849146  -0.5852858   0.9366636  -0.95216787  0.
    -0.31355634  0.7324121  -0.28376633  2.8077588  -0.9989288
    -0.6565637  -1.1611772  -4.486178   -1.1736331  -0.51593953
     0.          1.6558044   0.         -2.740701

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36550972 -0.91467947 -0.85492337 -1.4849529
     0.          0.          0.912817    0.         -2.0686097
     0.         -0.940975    0.         -0.45654255 -0.30970865
    -0.6031029  -0.4917646  -0.82196355  1.4305615   0.
    -0.13105898 -0.39103344 -0.4667003  -0.39275935  1.7263699
    -0.1646412  -0.44474098 -1.1960272   0.         -0.46164364
     1.7901789   0.          0.         -0.31331933  0.
     0.         -0.57917804  0.         -0.21433717  0.        ]
   [ 0.          0.3868332   0.         -0.51762575 -0.2513017
     1.0662197   0.         -0.36498597 -0.25187826  0.
    -0.53002197 -0.17073256  0.         -0.11822124  0.
    -0.6801334  -0.8865319  -0.5052929  -0.34123328 -0.9659927
     1.8849146  -0.5852858   0.9366636  -0.95216787  0.
    -0.31355634  0.7324121  -0.28376633  2.8077588  -0.9989288
    -0.6565637  -1.1611772  -4.486178   -1.1736331  -0.51593953
     0.          1.6558044   0.         -2.740701

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36550972 -0.91467947 -0.85492337 -1.4849529
     0.          0.          0.912817    0.         -2.0686097
     0.         -0.940975    0.         -0.45654255 -0.30970865
    -0.6031029  -0.4917646  -0.82196355  1.4305615   0.
    -0.13105898 -0.39103344 -0.4667003  -0.39275935  1.7263699
    -0.1646412  -0.44474098 -1.1960272   0.         -0.46164364
     1.7901789   0.          0.         -0.31331933  0.
     0.         -0.57917804  0.         -0.21433717  0.        ]
   [ 0.          0.3868332   0.         -0.51762575 -0.2513017
     1.0662197   0.         -0.36498597 -0.25187826  0.
    -0.53002197 -0.17073256  0.         -0.11822124  0.
    -0.6801334  -0.8865319  -0.5052929  -0.34123328 -0.9659927
     1.8849146  -0.5852858   0.9366636  -0.95216787  0.
    -0.31355634  0.7324121  -0.28376633  2.8077588  -0.9989288
    -0.6565637  -1.1611772  -4.486178   -1.1736331  -0.51593953
     0.          1.6558044   0.         -2.740701

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[2.1550229  0.07069428 0.         1.7601755  0.        ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36233854 -0.9019124  -0.84366655 -1.449235
     0.          0.          0.9000977   0.         -1.9883792
     0.         -0.9274959   0.         -0.45232484 -0.30710036
    -0.5968272  -0.48709705 -0.81148195  1.397801    0.
    -0.13002698 -0.38758042 -0.4623545  -0.38928682  1.6752005
     5.         -0.44066557 -1.1740477   0.         -0.45736188
     1.7342564   0.          0.         -0.3106746   0.
     0.         -0.573277    0.         -0.21261224  0.        ]
   [ 0.          0.38342738  0.         -0.5126112  -0.24923764
     1.0489469   0.         -0.3618205  -0.24980876  0.
    -0.52483714 -0.16935612  0.         -0.11725637  0.
    -0.67254937 -0.8744943  -0.5004473  -0.33830932 -0.95181084
     1.8213854  -0.5792905   0.92330325 -0.93837863  0.
    -0.3109

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36233854 -0.9019124  -0.84366655 -1.449235
     0.          0.          0.9000977   0.         -1.9883792
     0.         -0.9274959   0.         -0.45232484 -0.30710036
    -0.5968272  -0.48709705 -0.81148195  1.397801    0.
    -0.13002698 -0.38758042 -0.4623545  -0.38928682  1.6752005
     5.         -0.44066557 -1.1740477   0.         -0.45736188
     1.7342564   0.          0.         -0.3106746   0.
     0.         -0.573277    0.         -0.21261224  0.        ]
   [ 0.          0.38342738  0.         -0.5126112  -0.24923764
     1.0489469   0.         -0.3618205  -0.24980876  0.
    -0.52483714 -0.16935612  0.         -0.11725637  0.
    -0.67254937 -0.8744943  -0.5004473  -0.33830932 -0.95181084
     1.8213854  -0.5792905   0.92330325 -0.93837863  0.
    -0.31090924  0.72384    -0.28140473  2.6312194  -0.98378056
    -0.64939827 -1.1405454  -3.8898485  -1.1525267  -0.51094824
     0.          1.6095567   0.         -2.5749

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.36233854 -0.9019124  -0.84366655 -1.449235
     0.          0.          0.9000977   0.         -1.9883792
     0.         -0.9274959   0.         -0.45232484 -0.30710036
    -0.5968272  -0.48709705 -0.81148195  1.397801    0.
    -0.13002698 -0.38758042 -0.4623545  -0.38928682  1.6752005
     5.         -0.44066557 -1.1740477   0.         -0.45736188
     1.7342564   0.          0.         -0.3106746   0.
     0.         -0.573277    0.         -0.21261224  0.        ]
   [ 0.          0.38342738  0.         -0.5126112  -0.24923764
     1.0489469   0.         -0.3618205  -0.24980876  0.
    -0.52483714 -0.16935612  0.         -0.11725637  0.
    -0.67254937 -0.8744943  -0.5004473  -0.33830932 -0.95181084
     1.8213854  -0.5792905   0.92330325 -0.93837863  0.
    -0.31090924  0.72384    -0.28140473  2.6312194  -0.98378056
    -0.64939827 -1.1405454  -3.8898485  -1.1525267  -0.51094824
     0.          1.6095567   0.         -2.5749

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35924807 -0.8896659  -0.83284324 -1.4159799
     0.          0.          0.88789606  0.         -1.916824
     0.         -0.9145803   0.         -0.4482214  -0.30455613
    -0.59074324 -0.4825594  -0.8013917   1.3671961   0.
    -0.12901846 -0.38421667 -0.45812735 -0.3859042   1.6283326
     4.361685   -0.4366996  -1.1532389   0.         -0.4531966
     1.6832736   0.          0.         -0.308095    0.
     0.         -0.5675525   0.         -0.21092774  0.        ]
   [ 0.          0.3801093   0.         -0.5077392  -0.24722266
     1.0324885   0.         -0.3587355  -0.24778849  0.
    -0.5198012  -0.16801128  0.         -0.11631316  0.
    -0.6652135  -0.8629347  -0.495738   -0.33545858 -0.9382365
     1.7638826  -0.5734756   0.9104991  -0.9251721   0.
    -0.30832726  0.71556175 -0.2791002   2.484298   -0.9693022
    -0.6424622  -1.1209774  -3.4817815  -1.1325213  -0.5060987
     0.          1.5669847   0.         -2.4360888 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35924807 -0.8896659  -0.83284324 -1.4159799
     0.          0.          0.88789606  0.         -1.916824
     0.         -0.9145803   0.         -0.4482214  -0.30455613
    -0.59074324 -0.4825594  -0.8013917   1.3671961   0.
    -0.12901846 -0.38421667 -0.45812735 -0.3859042   1.6283326
     4.361685   -0.4366996  -1.1532389   0.         -0.4531966
     1.6832736   0.          0.         -0.308095    0.
     0.         -0.5675525   0.         -0.21092774  0.        ]
   [ 0.          0.3801093   0.         -0.5077392  -0.24722266
     1.0324885   0.         -0.3587355  -0.24778849  0.
    -0.5198012  -0.16801128  0.         -0.11631316  0.
    -0.6652135  -0.8629347  -0.495738   -0.33545858 -0.9382365
     1.7638826  -0.5734756   0.9104991  -0.9251721   0.
    -0.30832726  0.71556175 -0.2791002   2.484298   -0.9693022
    -0.6424622  -1.1209774  -3.4817815  -1.1325213  -0.5060987
     0.          1.5669847   0.         -2.4360888 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35924807 -0.8896659  -0.83284324 -1.4159799
     0.          0.          0.88789606  0.         -1.916824
     0.         -0.9145803   0.         -0.4482214  -0.30455613
    -0.59074324 -0.4825594  -0.8013917   1.3671961   0.
    -0.12901846 -0.38421667 -0.45812735 -0.3859042   1.6283326
     4.361685   -0.4366996  -1.1532389   0.         -0.4531966
     1.6832736   0.          0.         -0.308095    0.
     0.         -0.5675525   0.         -0.21092774  0.        ]
   [ 0.          0.3801093   0.         -0.5077392  -0.24722266
     1.0324885   0.         -0.3587355  -0.24778849  0.
    -0.5198012  -0.16801128  0.         -0.11631316  0.
    -0.6652135  -0.8629347  -0.495738   -0.33545858 -0.9382365
     1.7638826  -0.5734756   0.9104991  -0.9251721   0.
    -0.30832726  0.71556175 -0.2791002   2.484298   -0.9693022
    -0.6424622  -1.1209774  -3.4817815  -1.1325213  -0.5060987
     0.          1.5669847   0.         -2.4360888 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35924807 -0.8896659  -0.83284324 -1.4159799
     0.          0.          0.88789606  0.         -1.916824
     0.         -0.9145803   0.         -0.4482214  -0.30455613
    -0.59074324 -0.4825594  -0.8013917   1.3671961   0.
    -0.12901846 -0.38421667 -0.45812735 -0.3859042   1.6283326
     4.361685   -0.4366996  -1.1532389   0.         -0.4531966
     1.6832736   0.          0.         -0.308095    0.
     0.         -0.5675525   0.         -0.21092774  0.        ]
   [ 0.          0.3801093   0.         -0.5077392  -0.24722266
     1.0324885   0.         -0.3587355  -0.24778849  0.
    -0.5198012  -0.16801128  0.         -0.11631316  0.
    -0.6652135  -0.8629347  -0.495738   -0.33545858 -0.9382365
     1.7638826  -0.5734756   0.9104991  -0.9251721   0.
    -0.30832726  0.71556175 -0.2791002   2.484298   -0.9693022
    -0.6424622  -1.1209774  -3.4817815  -1.1325213  -0.5060987
     0.          1.5669847   0.         -2.4360888 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35623482 -0.8779056  -0.8224262  -1.384917
     0.          0.          0.8761779   0.         -1.852485
     0.         -0.90219015  0.         -0.44422716 -0.30207336
    -0.58484125 -0.4781458  -0.7916688   1.33852     0.
    -0.12803254 -0.3809384  -0.45401365 -0.38260764  1.5851958
     3.8204234  -0.4328383  -1.1335001   0.         -0.44914258
     1.6365438   0.          0.         -0.30557784  0.
     0.         -0.5619959   0.         -0.2092821   0.        ]
   [ 0.          0.3768754   0.         -0.50300306 -0.24525493
     1.016782    0.         -0.35572752 -0.24581562  0.
    -0.51490706 -0.1666968   0.         -0.11539081  0.
    -0.65811235 -0.851822   -0.49115875 -0.332678   -0.9252274
     1.7115102  -0.56783205  0.89821386 -0.9125084   0.
    -0.30580786  0.70756125 -0.27685067  2.359543   -0.9554453
    -0.6357436  -1.1023844  -3.180046   -1.1135242   1.9339106
     0.          1.527626    0.         -2.3175187 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35623482 -0.8779056  -0.8224262  -1.384917
     0.          0.          0.8761779   0.         -1.852485
     0.         -0.90219015  0.         -0.44422716 -0.30207336
    -0.58484125 -0.4781458  -0.7916688   1.33852     0.
    -0.12803254 -0.3809384  -0.45401365 -0.38260764  1.5851958
     3.8204234  -0.4328383  -1.1335001   0.         -0.44914258
     1.6365438   0.          0.         -0.30557784  0.
     0.         -0.5619959   0.         -0.2092821   0.        ]
   [ 0.          0.3768754   0.         -0.50300306 -0.24525493
     1.016782    0.         -0.35572752 -0.24581562  0.
    -0.51490706 -0.1666968   0.         -0.11539081  0.
    -0.65811235 -0.851822   -0.49115875 -0.332678   -0.9252274
     1.7115102  -0.56783205  0.89821386 -0.9125084   0.
    -0.30580786  0.70756125 -0.27685067  2.359543   -0.9554453
    -0.6357436  -1.1023844  -3.180046   -1.1135242   1.9339106
     0.          1.527626    0.         -2.3175187 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35623482 -0.8779056  -0.8224262  -1.384917
     0.          0.          0.8761779   0.         -1.852485
     0.         -0.90219015  0.         -0.44422716 -0.30207336
    -0.58484125 -0.4781458  -0.7916688   1.33852     0.
    -0.12803254 -0.3809384  -0.45401365 -0.38260764  1.5851958
     3.8204234  -0.4328383  -1.1335001   0.         -0.44914258
     1.6365438   0.          0.         -0.30557784  0.
     0.         -0.5619959   0.         -0.2092821   0.        ]
   [ 0.          0.3768754   0.         -0.50300306 -0.24525493
     1.016782    0.         -0.35572752 -0.24581562  0.
    -0.51490706 -0.1666968   0.         -0.11539081  0.
    -0.65811235 -0.851822   -0.49115875 -0.332678   -0.9252274
     1.7115102  -0.56783205  0.89821386 -0.9125084   0.
    -0.30580786  0.70756125 -0.27685067  2.359543   -0.9554453
    -0.6357436  -1.1023844  -3.180046   -1.1135242   1.9339106
     0.          1.527626    0.         -2.3175187 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35329565 -0.86660004 -0.8123908  -1.3558156
     0.          0.          0.86491215  0.         -1.7942255
     0.         -0.8902908   0.         -0.4403374  -0.2996497
    -0.5791124  -0.47385067 -0.7822916   1.3115785   0.
    -0.12706837 -0.37774208 -0.45000827 -0.37939352  1.5453193
     3.440915   -0.42907715 -1.1147425   0.         -0.445195
     1.5935063   0.          0.         -0.30312067  0.
     0.         -0.556599    0.         -0.20767382  0.        ]
   [ 0.          0.37372208  0.         -0.49839664 -0.24333255
     1.0017719   0.         -0.35279357 -0.24388817  0.
    -0.5101482   5.          0.         -0.11448858  0.
    -0.6512337  -0.8411281  -0.48670378 -0.32996473 -0.9127454
     1.6635473  -0.56235164 -1.0865711  -0.9003515   0.
    -0.30334848  0.69982326 -0.2746538   2.2518883  -0.94216675
    -0.6292311  -1.0846883  -2.9452639  -1.0954533   1.869055
     0.          1.4910952   0.         -2.2147455  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35329565 -0.86660004 -0.8123908  -1.3558156
     0.          0.          0.86491215  0.         -1.7942255
     0.         -0.8902908   0.         -0.4403374  -0.2996497
    -0.5791124  -0.47385067 -0.7822916   1.3115785   0.
    -0.12706837 -0.37774208 -0.45000827 -0.37939352  1.5453193
     3.440915   -0.42907715 -1.1147425   0.         -0.445195
     1.5935063   0.          0.         -0.30312067  0.
     0.         -0.556599    0.         -0.20767382  0.        ]
   [ 0.          0.37372208  0.         -0.49839664 -0.24333255
     1.0017719   0.         -0.35279357 -0.24388817  0.
    -0.5101482   5.          0.         -0.11448858  0.
    -0.6512337  -0.8411281  -0.48670378 -0.32996473 -0.9127454
     1.6635473  -0.56235164 -1.0865711  -0.9003515   0.
    -0.30334848  0.69982326 -0.2746538   2.2518883  -0.94216675
    -0.6292311  -1.0846883  -2.9452639  -1.0954533   1.869055
     0.          1.4910952   0.         -2.2147455  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35329565 -0.86660004 -0.8123908  -1.3558156
     0.          0.          0.86491215  0.         -1.7942255
     0.         -0.8902908   0.         -0.4403374  -0.2996497
    -0.5791124  -0.47385067 -0.7822916   1.3115785   0.
    -0.12706837 -0.37774208 -0.45000827 -0.37939352  1.5453193
     3.440915   -0.42907715 -1.1147425   0.         -0.445195
     1.5935063   0.          0.         -0.30312067  0.
     0.         -0.556599    0.         -0.20767382  0.        ]
   [ 0.          0.37372208  0.         -0.49839664 -0.24333255
     1.0017719   0.         -0.35279357 -0.24388817  0.
    -0.5101482   5.          0.         -0.11448858  0.
    -0.6512337  -0.8411281  -0.48670378 -0.32996473 -0.9127454
     1.6635473  -0.56235164 -1.0865711  -0.9003515   0.
    -0.30334848  0.69982326 -0.2746538   2.2518883  -0.94216675
    -0.6292311  -1.0846883  -2.9452639  -1.0954533   1.869055
     0.          1.4910952   0.         -2.2147455  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35042763 -0.8557206  -0.80271417 -1.3284774
     0.          0.          0.8540703   0.         -1.7411447
     0.         -0.87885034  0.         -0.4365477  -0.2972827
    -0.57354844 -0.46966884 -0.77324     1.2862029   0.
    -0.12612516 -0.37462434 -0.4461066  -0.37625852  1.5083123
     3.1559067  -0.42541182 -1.0968878   0.         -0.4413492
     1.5536995   0.          0.         -0.30072114  0.
     0.         -0.55135447  0.         -0.20610155  0.        ]
   [ 0.          0.37064624  0.         -0.4939141  -0.24145383
     0.9874085   0.         -0.34993058 -0.24200453  0.
    -0.5055187   4.5759315   0.         -0.11360571  0.
    -0.6445661  -0.83082736 -0.4823675  -0.3273162  -0.9007555
     1.6194079  -0.5570267  -1.0696398   1.0552936   0.
    -0.30094677  0.69233346 -0.27250767  2.1577532  -0.9294274
    -0.6229147  -1.0678189  -2.7558513  -1.0782357   1.8103224
     0.          1.4570693   0.         -2.1245472 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35042763 -0.8557206  -0.80271417 -1.3284774
     0.          0.          0.8540703   0.         -1.7411447
     0.         -0.87885034  0.         -0.4365477  -0.2972827
    -0.57354844 -0.46966884 -0.77324     1.2862029   0.
    -0.12612516 -0.37462434 -0.4461066  -0.37625852  1.5083123
     3.1559067  -0.42541182 -1.0968878   0.         -0.4413492
     1.5536995   0.          0.         -0.30072114  0.
     0.         -0.55135447  0.         -0.20610155  0.        ]
   [ 0.          0.37064624  0.         -0.4939141  -0.24145383
     0.9874085   0.         -0.34993058 -0.24200453  0.
    -0.5055187   4.5759315   0.         -0.11360571  0.
    -0.6445661  -0.83082736 -0.4823675  -0.3273162  -0.9007555
     1.6194079  -0.5570267  -1.0696398   1.0552936   0.
    -0.30094677  0.69233346 -0.27250767  2.1577532  -0.9294274
    -0.6229147  -1.0678189  -2.7558513  -1.0782357   1.8103224
     0.          1.4570693   0.         -2.1245472 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.35042763 -0.8557206  -0.80271417 -1.3284774
     0.          0.          0.8540703   0.         -1.7411447
     0.         -0.87885034  0.         -0.4365477  -0.2972827
    -0.57354844 -0.46966884 -0.77324     1.2862029   0.
    -0.12612516 -0.37462434 -0.4461066  -0.37625852  1.5083123
     3.1559067  -0.42541182 -1.0968878   0.         -0.4413492
     1.5536995   0.          0.         -0.30072114  0.
     0.         -0.55135447  0.         -0.20610155  0.        ]
   [ 0.          0.37064624  0.         -0.4939141  -0.24145383
     0.9874085   0.         -0.34993058 -0.24200453  0.
    -0.5055187   4.5759315   0.         -0.11360571  0.
    -0.6445661  -0.83082736 -0.4823675  -0.3273162  -0.9007555
     1.6194079  -0.5570267  -1.0696398   1.0552936   0.
    -0.30094677  0.69233346 -0.27250767  2.1577532  -0.9294274
    -0.6229147  -1.0678189  -2.7558513  -1.0782357   1.8103224
     0.          1.4570693   0.         -2.1245472 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.34762782 -0.8452411  -0.7933753  -1.302731
     0.          0.          0.8436266   0.         -1.6925176
     0.         -0.86784047  0.         -0.4328536  -0.29497024
    -0.56814146 -0.46559525 -0.7644954   1.2622476   0.
    -0.12520213 -0.3715819  -0.4423042  -0.3731994   1.4738461
     2.931737   -0.4218383  -1.0798652   0.         -0.43760103
     1.5167375   0.          0.         -0.29837695  0.
     0.         -0.5462552   0.         -0.20456392  0.        ]
   [ 0.          0.36764407  0.         -0.4895499  -0.2396171
     0.97364604  0.         -0.34713563 -0.24016304  0.
    -0.50101244  3.9858265   0.         -0.11274154  0.
    -0.63809896 -0.82089627 -0.47814468 -0.3247297  -0.8892266
     1.57861    -0.55184984 -1.0534769   1.039571    0.
    -0.29860055  0.68507934 -0.27041033  2.0745258  -0.9171916
    -0.6167844   0.86294484 -2.5988567  -1.0618062   1.7568061
     0.          1.4252743   0.         -2.0445511 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.34762782 -0.8452411  -0.7933753  -1.302731
     0.          0.          0.8436266   0.         -1.6925176
     0.         -0.86784047  0.         -0.4328536  -0.29497024
    -0.56814146 -0.46559525 -0.7644954   1.2622476   0.
    -0.12520213 -0.3715819  -0.4423042  -0.3731994   1.4738461
     2.931737   -0.4218383  -1.0798652   0.         -0.43760103
     1.5167375   0.          0.         -0.29837695  0.
     0.         -0.5462552   0.         -0.20456392  0.        ]
   [ 0.          0.36764407  0.         -0.4895499  -0.2396171
     0.97364604  0.         -0.34713563 -0.24016304  0.
    -0.50101244  3.9858265   0.         -0.11274154  0.
    -0.63809896 -0.82089627 -0.47814468 -0.3247297  -0.8892266
     1.57861    -0.55184984 -1.0534769   1.039571    0.
    -0.29860055  0.68507934 -0.27041033  2.0745258  -0.9171916
    -0.6167844   0.86294484 -2.5988567  -1.0618062   1.7568061
     0.          1.4252743   0.         -2.0445511 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.34762782 -0.8452411  -0.7933753  -1.302731
     0.          0.          0.8436266   0.         -1.6925176
     0.         -0.86784047  0.         -0.4328536  -0.29497024
    -0.56814146 -0.46559525 -0.7644954   1.2622476   0.
    -0.12520213 -0.3715819  -0.4423042  -0.3731994   1.4738461
     2.931737   -0.4218383  -1.0798652   0.         -0.43760103
     1.5167375   0.          0.         -0.29837695  0.
     0.         -0.5462552   0.         -0.20456392  0.        ]
   [ 0.          0.36764407  0.         -0.4895499  -0.2396171
     0.97364604  0.         -0.34713563 -0.24016304  0.
    -0.50101244  3.9858265   0.         -0.11274154  0.
    -0.63809896 -0.82089627 -0.47814468 -0.3247297  -0.8892266
     1.57861    -0.55184984 -1.0534769   1.039571    0.
    -0.29860055  0.68507934 -0.27041033  2.0745258  -0.9171916
    -0.6167844   0.86294484 -2.5988567  -1.0618062   1.7568061
     0.          1.4252743   0.         -2.0445511 

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081ee1d0> [[0.         1.5775672  0.         1.0867015  0.         0.
  1.7035058  3.4983125  0.         0.         0.         0.
  0.04444927 0.8699025  0.         0.         1.9868503  0.
  0.         0.         0.         0.         0.         1.2120268
  0.         1.1507516  1.5703719  0.         0.         2.8131785
  2.0736783  0.43933037 0.         1.038042   0.77115047 0.2957129
  0.         0.55664295 0.38826025 0.        ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[1.9599949  0.         0.         0.53541505 0.6719736 ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3448937  -0.83513767 -0.78435516 -1.2784277
     0.          0.          0.8335568   0.         -1.6477544
     0.         -0.8572343   0.         -0.42925137 -0.2927103
    -0.56288433 -0.4616255  -0.75604105  1.2395842   0.
    -0.12429865 -0.36

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3448937  -0.83513767 -0.78435516 -1.2784277
     0.          0.          0.8335568   0.         -1.6477544
     0.         -0.8572343   0.         -0.42925137 -0.2927103
    -0.56288433 -0.4616255  -0.75604105  1.2395842   0.
    -0.12429865 -0.36861196 -0.43859693 -0.37021318  1.4416424
     2.7494457  -0.4183529  -1.0636126   0.         -0.43394622
     1.4822974   0.          0.         -0.2960861   0.
     0.         -0.5412945   0.         -0.20305969  0.        ]
   [ 0.          0.36471367  0.         -0.48529896 -0.23782086
     0.9604442   0.         -0.34440625 -0.23836213  0.
    -0.4966243   3.5775769   0.         -0.11189537  0.
    -0.63182247 -0.81131315 -0.47403038 -0.32220298 -0.8781297
    -0.63442713 -0.5468145  -1.0380261   1.0245316   0.
    -0.29630765  0.67804825 -0.26835987  2.00025    -0.9054272
    -0.6108313   0.8524504  -2.4659767  -1.0461065   1.707777
     0.          1.3954763   0.         -1.9729692 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3448937  -0.83513767 -0.78435516 -1.2784277
     0.          0.          0.8335568   0.         -1.6477544
     0.         -0.8572343   0.         -0.42925137 -0.2927103
    -0.56288433 -0.4616255  -0.75604105  1.2395842   0.
    -0.12429865 -0.36861196 -0.43859693 -0.37021318  1.4416424
     2.7494457  -0.4183529  -1.0636126   0.         -0.43394622
     1.4822974   0.          0.         -0.2960861   0.
     0.         -0.5412945   0.         -0.20305969  0.        ]
   [ 0.          0.36471367  0.         -0.48529896 -0.23782086
     0.9604442   0.         -0.34440625 -0.23836213  0.
    -0.4966243   3.5775769   0.         -0.11189537  0.
    -0.63182247 -0.81131315 -0.47403038 -0.32220298 -0.8781297
    -0.63442713 -0.5468145  -1.0380261   1.0245316   0.
    -0.29630765  0.67804825 -0.26835987  2.00025    -0.9054272
    -0.6108313   0.8524504  -2.4659767  -1.0461065   1.707777
     0.          1.3954763   0.         -1.9729692 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3422226  -0.8253882  -0.7756357  -1.2554377
     0.          0.          0.8238394   0.         -1.6063691
     0.         -0.8470079   0.         -0.42573717 -0.29050082
    -0.5577702  -0.45775515 -0.7478611   1.218101    0.
    -0.12341395 -0.3657115  -0.43498087 -0.36729693  1.4114642
     2.5974317  -0.41495198 -1.0480728   0.         -0.43038106
     1.4501046   0.          0.         -0.29384652  0.
     0.         -0.5364664   0.         -0.20158765  0.        ]
   [ 0.          0.36185145  0.         -0.4811564  -0.23606357
     0.9477657   0.         -0.3417398  -0.2366003   0.
    -0.4923491   3.2736077   0.         -0.11106662  0.
    -0.6257275  -0.8020582  -0.47002012 -0.3197337  -0.8674381
    -0.62829244 -0.5419142  -1.0232364   1.0101274   0.
    -0.2940661   0.67122924 -0.26635468  1.9334285  -0.89410436
    -0.60504717  0.8423302  -2.351609   -1.0310838   1.662641
     0.          1.3674749   0.         -1.908422

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3422226  -0.8253882  -0.7756357  -1.2554377
     0.          0.          0.8238394   0.         -1.6063691
     0.         -0.8470079   0.         -0.42573717 -0.29050082
    -0.5577702  -0.45775515 -0.7478611   1.218101    0.
    -0.12341395 -0.3657115  -0.43498087 -0.36729693  1.4114642
     2.5974317  -0.41495198 -1.0480728   0.         -0.43038106
     1.4501046   0.          0.         -0.29384652  0.
     0.         -0.5364664   0.         -0.20158765  0.        ]
   [ 0.          0.36185145  0.         -0.4811564  -0.23606357
     0.9477657   0.         -0.3417398  -0.2366003   0.
    -0.4923491   3.2736077   0.         -0.11106662  0.
    -0.6257275  -0.8020582  -0.47002012 -0.3197337  -0.8674381
    -0.62829244 -0.5419142  -1.0232364   1.0101274   0.
    -0.2940661   0.67122924 -0.26635468  1.9334285  -0.89410436
    -0.60504717  0.8423302  -2.351609   -1.0310838   1.662641
     0.          1.3674749   0.         -1.908422

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3422226  -0.8253882  -0.7756357  -1.2554377
     0.          0.          0.8238394   0.         -1.6063691
     0.         -0.8470079   0.         -0.42573717 -0.29050082
    -0.5577702  -0.45775515 -0.7478611   1.218101    0.
    -0.12341395 -0.3657115  -0.43498087 -0.36729693  1.4114642
     2.5974317  -0.41495198 -1.0480728   0.         -0.43038106
     1.4501046   0.          0.         -0.29384652  0.
     0.         -0.5364664   0.         -0.20158765  0.        ]
   [ 0.          0.36185145  0.         -0.4811564  -0.23606357
     0.9477657   0.         -0.3417398  -0.2366003   0.
    -0.4923491   3.2736077   0.         -0.11106662  0.
    -0.6257275  -0.8020582  -0.47002012 -0.3197337  -0.8674381
    -0.62829244 -0.5419142  -1.0232364   1.0101274   0.
    -0.2940661   0.67122924 -0.26635468  1.9334285  -0.89410436
    -0.60504717  0.8423302  -2.351609   -1.0310838   1.662641
     0.          1.3674749   0.         -1.908422

action [[0.07367364 0.7313527 ]]
New state [[1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0.]]
Gathering trajectories...
Replay buffer gather all Trajectory(step_type=<tf.Tensor: id=1140675, shape=(1, 2), dtype=int32, numpy=array([[1, 1]], dtype=int32)>, observation=<tf.Tensor: id=1140676, shape=(1, 2, 4, 40), dtype=float32, numpy=
array([[[[1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
          1., 0., 0., 0., 0., 1., 0., 0., 0., 1.],
         [0., 1., 0., 0., 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3396122  -0.8159727  -0.76720095 -1.2336463
     0.          0.          0.8144543   0.         -1.567957
     5.         -0.8371392   0.         -0.42230746 -0.28834003
    -0.55279267 -0.45398006 -0.73994094  1.1976986   0.
    -0.12254741 -0.36287796 -0.43145236 -0.36444804  1.3831075
     2.4681473  -0.41163215 -1.0331957   0.         -0.42690188
     1.4199246   0.          0.         -0.29165637  0.
     0.         -0.5317649   0.         -0.20014669  0.        ]
   [ 0.          0.35905528  0.         -0.4771178  -0.23434395
     0.9355767   0.         -0.3391339  -0.2348762   0.
    -0.4881821   3.0359533   0.         -0.11025469  0.
    -0.6198052  -0.79311293 -0.46610963 -0.31731975 -0.8571283
    -0.62233216 -0.5371432  -1.0090622   0.9963146   0.
    -0.291874    0.6646117  -0.264393    1.8728918  -0.8831963
    -0.59942394  0.8325622   0.39317465 -1.0166912   1.6209084
     0.          1.3410962   0.         -1.8498285

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3396122  -0.8159727  -0.76720095 -1.2336463
     0.          0.          0.8144543   0.         -1.567957
     5.         -0.8371392   0.         -0.42230746 -0.28834003
    -0.55279267 -0.45398006 -0.73994094  1.1976986   0.
    -0.12254741 -0.36287796 -0.43145236 -0.36444804  1.3831075
     2.4681473  -0.41163215 -1.0331957   0.         -0.42690188
     1.4199246   0.          0.         -0.29165637  0.
     0.         -0.5317649   0.         -0.20014669  0.        ]
   [ 0.          0.35905528  0.         -0.4771178  -0.23434395
     0.9355767   0.         -0.3391339  -0.2348762   0.
    -0.4881821   3.0359533   0.         -0.11025469  0.
    -0.6198052  -0.79311293 -0.46610963 -0.31731975 -0.8571283
    -0.62233216 -0.5371432  -1.0090622   0.9963146   0.
    -0.291874    0.6646117  -0.264393    1.8728918  -0.8831963
    -0.59942394  0.8325622   0.39317465 -1.0166912   1.6209084
     0.          1.3410962   0.         -1.8498285

Observations in ActorNet tf.Tensor(
[[[ 0.         -0.33706015 -0.80687225 -0.75903547 -1.2129532
    0.          0.          0.8053827   0.         -1.5321783
    5.         -0.8276077   0.         -0.41895887 -0.28622612
   -0.54794574 -0.45029646 -0.7322673   1.1782894   0.
   -0.12169846 -0.3601088  -0.42800793 -0.36166388  1.356396
    2.3564389  -0.4083903  -1.0189353   0.          2.223403
    1.391556    0.          0.         -0.2895138   0.
    0.         -0.5271846   0.         -0.1987357   0.        ]
  [ 0.          0.3563223   0.         -0.4731788  -0.23266055
    0.9238465   0.         -0.3365863  -0.23318848  0.
   -0.48411876  2.8435495   0.         -0.10945898  0.
   -0.61404777 -0.7844605  -0.46229473 -0.31495902 -0.8471775
   -0.616538   -0.5324955  -0.9954615   0.98305386  0.
   -0.28972957  0.6581861  -0.26247346  1.817713   -0.87267816
   -0.59395456  0.82312655  0.39011908  0.9257405   1.582172
    0.          1.3161893   0.         -1.7963258   0.        ]
  [

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33706015 -0.80687225 -0.75903547 -1.2129532
     0.          0.          0.8053827   0.         -1.5321783
     5.         -0.8276077   0.         -0.41895887 -0.28622612
    -0.54794574 -0.45029646 -0.7322673   1.1782894   0.
    -0.12169846 -0.3601088  -0.42800793 -0.36166388  1.356396
     2.3564389  -0.4083903  -1.0189353   0.          2.223403
     1.391556    0.          0.         -0.2895138   0.
     0.         -0.5271846   0.         -0.1987357   0.        ]
   [ 0.          0.3563223   0.         -0.4731788  -0.23266055
     0.9238465   0.         -0.3365863  -0.23318848  0.
    -0.48411876  2.8435495   0.         -0.10945898  0.
    -0.61404777 -0.7844605  -0.46229473 -0.31495902 -0.8471775
    -0.616538   -0.5324955  -0.9954615   0.98305386  0.
    -0.28972957  0.6581861  -0.26247346  1.817713   -0.87267816
    -0.59395456  0.82312655  0.39011908  0.9257405   1.582172
     0.          1.3161893   0.         -1.7963258  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33706015 -0.80687225 -0.75903547 -1.2129532
     0.          0.          0.8053827   0.         -1.5321783
     5.         -0.8276077   0.         -0.41895887 -0.28622612
    -0.54794574 -0.45029646 -0.7322673   1.1782894   0.
    -0.12169846 -0.3601088  -0.42800793 -0.36166388  1.356396
     2.3564389  -0.4083903  -1.0189353   0.          2.223403
     1.391556    0.          0.         -0.2895138   0.
     0.         -0.5271846   0.         -0.1987357   0.        ]
   [ 0.          0.3563223   0.         -0.4731788  -0.23266055
     0.9238465   0.         -0.3365863  -0.23318848  0.
    -0.48411876  2.8435495   0.         -0.10945898  0.
    -0.61404777 -0.7844605  -0.46229473 -0.31495902 -0.8471775
    -0.616538   -0.5324955  -0.9954615   0.98305386  0.
    -0.28972957  0.6581861  -0.26247346  1.817713   -0.87267816
    -0.59395456  0.82312655  0.39011908  0.9257405   1.582172
     0.          1.3161893   0.         -1.7963258  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33706015 -0.80687225 -0.75903547 -1.2129532
     0.          0.          0.8053827   0.         -1.5321783
     5.         -0.8276077   0.         -0.41895887 -0.28622612
    -0.54794574 -0.45029646 -0.7322673   1.1782894   0.
    -0.12169846 -0.3601088  -0.42800793 -0.36166388  1.356396
     2.3564389  -0.4083903  -1.0189353   0.          2.223403
     1.391556    0.          0.         -0.2895138   0.
     0.         -0.5271846   0.         -0.1987357   0.        ]
   [ 0.          0.3563223   0.         -0.4731788  -0.23266055
     0.9238465   0.         -0.3365863  -0.23318848  0.
    -0.48411876  2.8435495   0.         -0.10945898  0.
    -0.61404777 -0.7844605  -0.46229473 -0.31495902 -0.8471775
    -0.616538   -0.5324955  -0.9954615   0.98305386  0.
    -0.28972957  0.6581861  -0.26247346  1.817713   -0.87267816
    -0.59395456  0.82312655  0.39011908  0.9257405   1.582172
     0.          1.3161893   0.         -1.7963258  

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[1.2778659  0.         0.         0.19984543 1.194613  ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33456448 -0.79806966 -0.75112504 -1.1932689
     5.          0.          0.796608    0.         -1.4987448
     5.         -0.8183946   0.         -0.41568828 -0.28415743
    -0.5432238  -0.44670066 -0.72482735  1.1597954   0.
    -0.12086644 -0.35740155 -0.42464423 -0.35894206  1.3311765
     2.2586539  -0.40522343 -1.0052501   0.          2.1392524
     1.3648247   0.          0.         -0.28741717  0.
     0.         -0.5227204   0.         -0.19735369  0.        ]
   [ 0.          0.35365033  0.         -0.46933532 -0.2310122
    -1.0689838   0.         -0.33409485 -0.2315358   0.
    -0.4801548   2.6836474   0.         -0.10867898  0.
    -0.60844755 -0.77608526 -0.45857158 -0.31264964 -0.8375656
    -0.6109026  -0.5279664  -0.9823968   0.9703095   0.
    -0.287631

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33456448 -0.79806966 -0.75112504 -1.1932689
     5.          0.          0.796608    0.         -1.4987448
     5.         -0.8183946   0.         -0.41568828 -0.28415743
    -0.5432238  -0.44670066 -0.72482735  1.1597954   0.
    -0.12086644 -0.35740155 -0.42464423 -0.35894206  1.3311765
     2.2586539  -0.40522343 -1.0052501   0.          2.1392524
     1.3648247   0.          0.         -0.28741717  0.
     0.         -0.5227204   0.         -0.19735369  0.        ]
   [ 0.          0.35365033  0.         -0.46933532 -0.2310122
    -1.0689838   0.         -0.33409485 -0.2315358   0.
    -0.4801548   2.6836474   0.         -0.10867898  0.
    -0.60844755 -0.77608526 -0.45857158 -0.31264964 -0.8375656
    -0.6109026  -0.5279664  -0.9823968   0.9703095   0.
    -0.28763106  0.65194297 -0.26059437 -0.5484244  -0.86252725
    -0.58863187  0.81400466  0.38713336  0.9143977   1.5460896
     0.          1.2926221   0.         -1.7472173

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33456448 -0.79806966 -0.75112504 -1.1932689
     5.          0.          0.796608    0.         -1.4987448
     5.         -0.8183946   0.         -0.41568828 -0.28415743
    -0.5432238  -0.44670066 -0.72482735  1.1597954   0.
    -0.12086644 -0.35740155 -0.42464423 -0.35894206  1.3311765
     2.2586539  -0.40522343 -1.0052501   0.          2.1392524
     1.3648247   0.          0.         -0.28741717  0.
     0.         -0.5227204   0.         -0.19735369  0.        ]
   [ 0.          0.35365033  0.         -0.46933532 -0.2310122
    -1.0689838   0.         -0.33409485 -0.2315358   0.
    -0.4801548   2.6836474   0.         -0.10867898  0.
    -0.60844755 -0.77608526 -0.45857158 -0.31264964 -0.8375656
    -0.6109026  -0.5279664  -0.9823968   0.9703095   0.
    -0.28763106  0.65194297 -0.26059437 -0.5484244  -0.86252725
    -0.58863187  0.81400466  0.38713336  0.9143977   1.5460896
     0.          1.2926221   0.         -1.7472173

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33456448 -0.79806966 -0.75112504 -1.1932689
     5.          0.          0.796608    0.         -1.4987448
     5.         -0.8183946   0.         -0.41568828 -0.28415743
    -0.5432238  -0.44670066 -0.72482735  1.1597954   0.
    -0.12086644 -0.35740155 -0.42464423 -0.35894206  1.3311765
     2.2586539  -0.40522343 -1.0052501   0.          2.1392524
     1.3648247   0.          0.         -0.28741717  0.
     0.         -0.5227204   0.         -0.19735369  0.        ]
   [ 0.          0.35365033  0.         -0.46933532 -0.2310122
    -1.0689838   0.         -0.33409485 -0.2315358   0.
    -0.4801548   2.6836474   0.         -0.10867898  0.
    -0.60844755 -0.77608526 -0.45857158 -0.31264964 -0.8375656
    -0.6109026  -0.5279664  -0.9823968   0.9703095   0.
    -0.28763106  0.65194297 -0.26059437 -0.5484244  -0.86252725
    -0.58863187  0.81400466  0.38713336  0.9143977   1.5460896
     0.          1.2926221   0.         -1.7472173

Output of neural network for action <tensorflow.python.keras.layers.core.Flatten object at 0x7efc0824ea90> [[ 0.         -0.33212292 -0.7895492  -0.74345714 -1.1745137   5.
   0.          0.78811383  0.         -1.4674114  -0.1974325  -0.8094825
   0.         -0.4124928  -0.28213236 -0.5386215  -0.44318923 -0.71760964
   1.1421467   0.         -0.12005082 -0.3547539  -0.4213582  -0.35628027
   1.3073151   2.172122   -0.4021286  -0.99210244  0.          2.0639937
   1.3395787   0.          0.         -0.2853648   0.          0.
  -0.51836747  0.         -0.19599962  0.          0.          0.35103703
   0.         -0.46558368 -0.22939761 -1.0540975   0.         -0.33165747
  -0.22991711  0.         -0.47628638 -0.3803014   0.         -0.10791416
   0.         -0.6029977  -0.7679727  -0.45493668 -0.31038976 -0.82827365
  -0.6054187  -0.5235504  -0.96983397  0.95804864  0.         -0.28557694
   0.64587426 -0.25875437 -0.5437575  -0.8527227  -0.5834494   0.8051797
   0.3842144   0.9034625

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33212292 -0.7895492  -0.74345714 -1.1745137
     5.          0.          0.78811383  0.         -1.4674114
    -0.1974325  -0.8094825   0.         -0.4124928  -0.28213236
    -0.5386215  -0.44318923 -0.71760964  1.1421467   0.
    -0.12005082 -0.3547539  -0.4213582  -0.35628027  1.3073151
     2.172122   -0.4021286  -0.99210244  0.          2.0639937
     1.3395787   0.          0.         -0.2853648   0.
     0.         -0.51836747  0.         -0.19599962  0.        ]
   [ 0.          0.35103703  0.         -0.46558368 -0.22939761
    -1.0540975   0.         -0.33165747 -0.22991711  0.
    -0.47628638 -0.3803014   0.         -0.10791416  0.
    -0.6029977  -0.7679727  -0.45493668 -0.31038976 -0.82827365
    -0.6054187  -0.5235504  -0.96983397  0.95804864  0.
    -0.28557694  0.64587426 -0.25875437 -0.5437575  -0.8527227
    -0.5834494   0.8051797   0.3842144   0.9034625   1.5123711
     0.          1.2702787   0.         -1.701931

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[0.8605872 0.        0.        0.        1.1455754]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.33212292 -0.7895492  -0.74345714 -1.1745137
     5.          0.          0.78811383  0.         -1.4674114
    -0.1974325  -0.8094825   0.         -0.4124928  -0.28213236
    -0.5386215  -0.44318923 -0.71760964  1.1421467   0.
    -0.12005082 -0.3547539  -0.4213582  -0.35628027  1.3073151
     2.172122   -0.4021286  -0.99210244  0.          2.0639937
     1.3395787   0.          0.         -0.2853648   0.
     0.         -0.51836747  0.         -0.19599962  0.        ]
   [ 0.          0.35103703  0.         -0.46558368 -0.22939761
    -1.0540975   0.         -0.33165747 -0.22991711  0.
    -0.47628638 -0.3803014   0.         -0.10791416  0.
    -0.6029977  -0.7679727  -0.45493668 -0.31038976 -0.82827365
    -0.6054187  -0.5235504  -0.96983397  0.95804864  0.
    -0.28557694 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32973373 -0.7812959  -0.73601913 -1.1566172
     5.          0.          0.77988565  0.         -1.4379666
    -0.19608052 -0.8008556   0.         -0.4093694  -0.2801494
    -0.5341341  -0.43975896 -0.71060324  1.1252807   0.
     4.531541   -0.35216382 -0.41814688 -0.35367632  1.2846937
     2.0948374  -0.39910316 -0.9794579   0.          1.9961641
     1.3156854   0.          0.         -0.2833552   0.
     0.         -0.51412123  0.         -0.19467263  0.        ]
   [ 0.          0.34848046  0.         -0.46192017 -0.2278157
    -1.039817    0.         -0.32927233 -0.2283311   0.
    -0.47250956 -0.37748036  0.          5.          0.
    -0.59769136 -0.76010936 -0.45138645 -0.30817762 -0.8192844
    -0.60007954 -0.51924324 -0.9577413   0.94624144  0.
    -0.28356558  0.63997173 -0.2569522  -0.5392075  -0.8432451
    -0.57840127  0.7966355   0.38136053  0.8929107   1.4807689
     0.          1.2490566   0.         -1.6599983  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32973373 -0.7812959  -0.73601913 -1.1566172
     5.          0.          0.77988565  0.         -1.4379666
    -0.19608052 -0.8008556   0.         -0.4093694  -0.2801494
    -0.5341341  -0.43975896 -0.71060324  1.1252807   0.
     4.531541   -0.35216382 -0.41814688 -0.35367632  1.2846937
     2.0948374  -0.39910316 -0.9794579   0.          1.9961641
     1.3156854   0.          0.         -0.2833552   0.
     0.         -0.51412123  0.         -0.19467263  0.        ]
   [ 0.          0.34848046  0.         -0.46192017 -0.2278157
    -1.039817    0.         -0.32927233 -0.2283311   0.
    -0.47250956 -0.37748036  0.          5.          0.
    -0.59769136 -0.76010936 -0.45138645 -0.30817762 -0.8192844
    -0.60007954 -0.51924324 -0.9577413   0.94624144  0.
    -0.28356558  0.63997173 -0.2569522  -0.5392075  -0.8432451
    -0.57840127  0.7966355   0.38136053  0.8929107   1.4807689
     0.          1.2490566   0.         -1.6599983  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32973373 -0.7812959  -0.73601913 -1.1566172
     5.          0.          0.77988565  0.         -1.4379666
    -0.19608052 -0.8008556   0.         -0.4093694  -0.2801494
    -0.5341341  -0.43975896 -0.71060324  1.1252807   0.
     4.531541   -0.35216382 -0.41814688 -0.35367632  1.2846937
     2.0948374  -0.39910316 -0.9794579   0.          1.9961641
     1.3156854   0.          0.         -0.2833552   0.
     0.         -0.51412123  0.         -0.19467263  0.        ]
   [ 0.          0.34848046  0.         -0.46192017 -0.2278157
    -1.039817    0.         -0.32927233 -0.2283311   0.
    -0.47250956 -0.37748036  0.          5.          0.
    -0.59769136 -0.76010936 -0.45138645 -0.30817762 -0.8192844
    -0.60007954 -0.51924324 -0.9577413   0.94624144  0.
    -0.28356558  0.63997173 -0.2569522  -0.5392075  -0.8432451
    -0.57840127  0.7966355   0.38136053  0.8929107   1.4807689
     0.          1.2490566   0.         -1.6599983  

Total tf.Tensor(0.0, shape=(), dtype=float32)
Iteration 29
Observations in ActorNet tf.Tensor(
[[[ 0.         -0.32739493 -0.77329624 -0.7287999  -1.1395155
    4.934617    0.         -1.2607864   0.         -1.4102279
   -0.1947551  -0.7924986   0.         -0.40631554 -0.27820706
   -0.5297566  -0.43640667 -0.703798    1.1091411   0.
    4.005755   -0.34962913 -0.4150074  -0.3511282   1.2632091
    2.0252624  -0.39614454 -0.96728504  0.          1.9346159
    1.293028    0.          0.         -0.28138685  0.
    0.         -0.50997734  0.         -0.19337177  0.        ]
  [ 0.          0.3459785   0.         -0.4583414  -0.22626534
   -1.026102    0.         -0.32693747 -0.22677681  0.
   -0.46882084 -0.3747207   0.          5.          0.
   -0.5925225  -0.75248265  2.0796177  -0.30601156 -0.81058174
   -0.5948791  -0.5150404  -0.94609046  0.9348607   0.
   -0.28159547  0.6342279  -0.25518647 -0.53476954 -0.8340768
   -0.5734816   0.78835773  0.3785684   0.8827205   1.4510708
    0

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32739493 -0.77329624 -0.7287999  -1.1395155
     4.934617    0.         -1.2607864   0.         -1.4102279
    -0.1947551  -0.7924986   0.         -0.40631554 -0.27820706
    -0.5297566  -0.43640667 -0.703798    1.1091411   0.
     4.005755   -0.34962913 -0.4150074  -0.3511282   1.2632091
     2.0252624  -0.39614454 -0.96728504  0.          1.9346159
     1.293028    0.          0.         -0.28138685  0.
     0.         -0.50997734  0.         -0.19337177  0.        ]
   [ 0.          0.3459785   0.         -0.4583414  -0.22626534
    -1.026102    0.         -0.32693747 -0.22677681  0.
    -0.46882084 -0.3747207   0.          5.          0.
    -0.5925225  -0.75248265  2.0796177  -0.30601156 -0.81058174
    -0.5948791  -0.5150404  -0.94609046  0.9348607   0.
    -0.28159547  0.6342279  -0.25518647 -0.53476954 -0.8340768
    -0.5734816   0.78835773  0.3785684   0.8827205   1.4510708
     0.          1.2288649   0.         -1.621022

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32739493 -0.77329624 -0.7287999  -1.1395155
     4.934617    0.         -1.2607864   0.         -1.4102279
    -0.1947551  -0.7924986   0.         -0.40631554 -0.27820706
    -0.5297566  -0.43640667 -0.703798    1.1091411   0.
     4.005755   -0.34962913 -0.4150074  -0.3511282   1.2632091
     2.0252624  -0.39614454 -0.96728504  0.          1.9346159
     1.293028    0.          0.         -0.28138685  0.
     0.         -0.50997734  0.         -0.19337177  0.        ]
   [ 0.          0.3459785   0.         -0.4583414  -0.22626534
    -1.026102    0.         -0.32693747 -0.22677681  0.
    -0.46882084 -0.3747207   0.          5.          0.
    -0.5925225  -0.75248265  2.0796177  -0.30601156 -0.81058174
    -0.5948791  -0.5150404  -0.94609046  0.9348607   0.
    -0.28159547  0.6342279  -0.25518647 -0.53476954 -0.8340768
    -0.5734816   0.78835773  0.3785684   0.8827205   1.4510708
     0.          1.2288649   0.         -1.621022

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32739493 -0.77329624 -0.7287999  -1.1395155
     4.934617    0.         -1.2607864   0.         -1.4102279
    -0.1947551  -0.7924986   0.         -0.40631554 -0.27820706
    -0.5297566  -0.43640667 -0.703798    1.1091411   0.
     4.005755   -0.34962913 -0.4150074  -0.3511282   1.2632091
     2.0252624  -0.39614454 -0.96728504  0.          1.9346159
     1.293028    0.          0.         -0.28138685  0.
     0.         -0.50997734  0.         -0.19337177  0.        ]
   [ 0.          0.3459785   0.         -0.4583414  -0.22626534
    -1.026102    0.         -0.32693747 -0.22677681  0.
    -0.46882084 -0.3747207   0.          5.          0.
    -0.5925225  -0.75248265  2.0796177  -0.30601156 -0.81058174
    -0.5948791  -0.5150404  -0.94609046  0.9348607   0.
    -0.28159547  0.6342279  -0.25518647 -0.53476954 -0.8340768
    -0.5734816   0.78835773  0.3785684   0.8827205   1.4510708
     0.          1.2288649   0.         -1.621022

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3251048  -0.7655373  -0.72178894 -1.1231513
     4.2867103   0.         -1.2404321   0.         -1.3840369
    -0.19345546 -0.7843978   0.         -0.40332866 -0.27630407
    -0.5254847  -0.4331295  -0.69718456  1.0936775   0.
    -0.19098803 -0.34714788 -0.41193727 -0.34863383  1.2427684
     1.9621947  -0.39325038 -0.9555556   0.          1.8784361
     1.2715037   0.          0.         -0.2794584   0.
     0.         -0.50593185  0.         -0.19209623  0.        ]
   [ 0.          0.34352922  0.         -0.45484415 -0.22474556
    -1.0129162   0.         -0.3246512  -0.2252531   0.
    -0.46521688 -0.37202024  0.          4.9351683   0.
    -0.58748513 -0.745081    2.0121567  -0.30389    -0.80215067
     1.6570885  -0.5109377  -0.9348549   0.9238812   0.
    -0.27966535  0.6286359  -0.25345603 -0.53043896 -0.8252012
    -0.5686851   0.7803329   0.3758366   0.8728713   1.4230926
     0.          1.2096229   0.         -1.584671

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3251048  -0.7655373  -0.72178894 -1.1231513
     4.2867103   0.         -1.2404321   0.         -1.3840369
    -0.19345546 -0.7843978   0.         -0.40332866 -0.27630407
    -0.5254847  -0.4331295  -0.69718456  1.0936775   0.
    -0.19098803 -0.34714788 -0.41193727 -0.34863383  1.2427684
     1.9621947  -0.39325038 -0.9555556   0.          1.8784361
     1.2715037   0.          0.         -0.2794584   0.
     0.         -0.50593185  0.         -0.19209623  0.        ]
   [ 0.          0.34352922  0.         -0.45484415 -0.22474556
    -1.0129162   0.         -0.3246512  -0.2252531   0.
    -0.46521688 -0.37202024  0.          4.9351683   0.
    -0.58748513 -0.745081    2.0121567  -0.30389    -0.80215067
     1.6570885  -0.5109377  -0.9348549   0.9238812   0.
    -0.27966535  0.6286359  -0.25345603 -0.53043896 -0.8252012
    -0.5686851   0.7803329   0.3758366   0.8728713   1.4230926
     0.          1.2096229   0.         -1.584671

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3251048  -0.7655373  -0.72178894 -1.1231513
     4.2867103   0.         -1.2404321   0.         -1.3840369
    -0.19345546 -0.7843978   0.         -0.40332866 -0.27630407
    -0.5254847  -0.4331295  -0.69718456  1.0936775   0.
    -0.19098803 -0.34714788 -0.41193727 -0.34863383  1.2427684
     1.9621947  -0.39325038 -0.9555556   0.          1.8784361
     1.2715037   0.          0.         -0.2794584   0.
     0.         -0.50593185  0.         -0.19209623  0.        ]
   [ 0.          0.34352922  0.         -0.45484415 -0.22474556
    -1.0129162   0.         -0.3246512  -0.2252531   0.
    -0.46521688 -0.37202024  0.          4.9351683   0.
    -0.58748513 -0.745081    2.0121567  -0.30389    -0.80215067
     1.6570885  -0.5109377  -0.9348549   0.9238812   0.
    -0.27966535  0.6286359  -0.25345603 -0.53043896 -0.8252012
    -0.5686851   0.7803329   0.3758366   0.8728713   1.4230926
     0.          1.2096229   0.         -1.584671

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32286176 -0.75800735  1.3533483  -1.1074731
     3.8413363   0.         -1.221034    0.         -1.3592546
    -0.19218063 -0.77654064  0.         -0.40040627 -0.27443898
    -0.52131426 -0.4299247  -0.69075394  1.0788437   0.
    -0.18974487 -0.34471834 -0.40893376 -0.3461915   1.2232901
     1.9046799  -0.39041823 -0.94424284  0.          1.8268872
     1.2510209   0.          0.         -0.27756846  0.
     0.         -0.5019809   0.         -0.19084515  0.        ]
   [ 0.          0.3411305   0.         -0.45142534 -0.22325528
    -1.0002263   0.         -0.32241192 -0.22375901  0.
    -0.4616944  -0.3693769   0.          4.2937455   0.
    -0.5825739  -0.73789346  1.9508673  -0.30181143 -0.79397714
     1.6192226  -0.5069311  -0.9240105   0.9132799   0.
    -0.2777738   0.62318885 -0.25175968 -0.5262117  -0.8166031
    -0.5640067   0.7725483   0.37316275  0.8633448   1.3966746
     0.          1.1912583   0.         -1.550664

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32286176 -0.75800735  1.3533483  -1.1074731
     3.8413363   0.         -1.221034    0.         -1.3592546
    -0.19218063 -0.77654064  0.         -0.40040627 -0.27443898
    -0.52131426 -0.4299247  -0.69075394  1.0788437   0.
    -0.18974487 -0.34471834 -0.40893376 -0.3461915   1.2232901
     1.9046799  -0.39041823 -0.94424284  0.          1.8268872
     1.2510209   0.          0.         -0.27756846  0.
     0.         -0.5019809   0.         -0.19084515  0.        ]
   [ 0.          0.3411305   0.         -0.45142534 -0.22325528
    -1.0002263   0.         -0.32241192 -0.22375901  0.
    -0.4616944  -0.3693769   0.          4.2937455   0.
    -0.5825739  -0.73789346  1.9508673  -0.30181143 -0.79397714
     1.6192226  -0.5069311  -0.9240105   0.9132799   0.
    -0.2777738   0.62318885 -0.25175968 -0.5262117  -0.8166031
    -0.5640067   0.7725483   0.37316275  0.8633448   1.3966746
     0.          1.1912583   0.         -1.550664

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32286176 -0.75800735  1.3533483  -1.1074731
     3.8413363   0.         -1.221034    0.         -1.3592546
    -0.19218063 -0.77654064  0.         -0.40040627 -0.27443898
    -0.52131426 -0.4299247  -0.69075394  1.0788437   0.
    -0.18974487 -0.34471834 -0.40893376 -0.3461915   1.2232901
     1.9046799  -0.39041823 -0.94424284  0.          1.8268872
     1.2510209   0.          0.         -0.27756846  0.
     0.         -0.5019809   0.         -0.19084515  0.        ]
   [ 0.          0.3411305   0.         -0.45142534 -0.22325528
    -1.0002263   0.         -0.32241192 -0.22375901  0.
    -0.4616944  -0.3693769   0.          4.2937455   0.
    -0.5825739  -0.73789346  1.9508673  -0.30181143 -0.79397714
     1.6192226  -0.5069311  -0.9240105   0.9132799   0.
    -0.2777738   0.62318885 -0.25175968 -0.5262117  -0.8166031
    -0.5640067   0.7725483   0.37316275  0.8633448   1.3966746
     0.          1.1912583   0.         -1.550664

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32066408 -0.75069517  1.3300822  -1.0924343
     3.5110717   0.         -1.2025193   0.         -1.335759
    -0.19092996 -0.7689149   0.         -0.39754605 -0.27261063
    -0.51724124 -0.42678964 -0.68449795  1.0645981   0.
    -0.18852526 -0.3423386  -0.4059947  -0.34379923  1.204701
     1.8519473  -0.38764602 -0.933323    0.         -0.53663415
     1.231498    0.          0.         -0.27571577  0.
     0.         -0.49812075  0.         -0.18961778  0.        ]
   [ 0.          0.33878112  0.         -0.4480822  -0.22179356
    -0.98800224  0.         -0.32021788 -0.22229357  0.
    -0.45825046 -0.3667887   0.          3.851212    0.
    -0.5777836  -0.73090994  1.8948624  -0.2997744  -0.7860486
     1.5838418  -0.5030172  -0.91353524  0.9030354   0.
    -0.27591953  0.61788106 -0.25009638 -0.5220836   1.200156
    -0.5594417  -1.1953      0.3705449   0.8541237   1.3716769
     0.          1.1737065   0.         -1.5187587  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32066408 -0.75069517  1.3300822  -1.0924343
     3.5110717   0.         -1.2025193   0.         -1.335759
    -0.19092996 -0.7689149   0.         -0.39754605 -0.27261063
    -0.51724124 -0.42678964 -0.68449795  1.0645981   0.
    -0.18852526 -0.3423386  -0.4059947  -0.34379923  1.204701
     1.8519473  -0.38764602 -0.933323    0.         -0.53663415
     1.231498    0.          0.         -0.27571577  0.
     0.         -0.49812075  0.         -0.18961778  0.        ]
   [ 0.          0.33878112  0.         -0.4480822  -0.22179356
    -0.98800224  0.         -0.32021788 -0.22229357  0.
    -0.45825046 -0.3667887   0.          3.851212    0.
    -0.5777836  -0.73090994  1.8948624  -0.2997744  -0.7860486
     1.5838418  -0.5030172  -0.91353524  0.9030354   0.
    -0.27591953  0.61788106 -0.25009638 -0.5220836   1.200156
    -0.5594417  -1.1953      0.3705449   0.8541237   1.3716769
     0.          1.1737065   0.         -1.5187587  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.32066408 -0.75069517  1.3300822  -1.0924343
     3.5110717   0.         -1.2025193   0.         -1.335759
    -0.19092996 -0.7689149   0.         -0.39754605 -0.27261063
    -0.51724124 -0.42678964 -0.68449795  1.0645981   0.
    -0.18852526 -0.3423386  -0.4059947  -0.34379923  1.204701
     1.8519473  -0.38764602 -0.933323    0.         -0.53663415
     1.231498    0.          0.         -0.27571577  0.
     0.         -0.49812075  0.         -0.18961778  0.        ]
   [ 0.          0.33878112  0.         -0.4480822  -0.22179356
    -0.98800224  0.         -0.32021788 -0.22229357  0.
    -0.45825046 -0.3667887   0.          3.851212    0.
    -0.5777836  -0.73090994  1.8948624  -0.2997744  -0.7860486
     1.5838418  -0.5030172  -0.91353524  0.9030354   0.
    -0.27591953  0.61788106 -0.25009638 -0.5220836   1.200156
    -0.5594417  -1.1953      0.3705449   0.8541237   1.3716769
     0.          1.1737065   0.         -1.5187587  

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3185103  -0.74359065  1.3079777  -1.0779926
     3.2535841   0.         -1.1848228   0.         -1.3134425
    -0.18970263 -0.7615092   0.         -0.394746   -0.27081785
    -0.513262   -0.4237218  -0.6784087   1.0509027   0.
    -0.18732849 -0.34000704 -0.40311763 -0.34145546 -0.82358736
     1.803369   -0.3849316  -0.92277354  0.         -0.53244007
     1.2128623   0.          0.         -0.27389917  0.
     0.         -0.4943481   0.         -0.1884134   0.        ]
   [ 0.          0.3364792   0.         -0.4448118  -0.22035956
    -0.97621596  0.         -0.31806776 -0.2208559   0.
    -0.45488206 -0.3642536   0.          3.5222926   0.
    -0.5731093  -0.72412103  1.8434241  -0.29777753 -0.7783528
     1.550686   -0.49919227 -0.9034085   0.8931283   0.
    -0.27410135  0.6127062  -0.248465   -0.5180509   1.1825348
    -0.5549855  -1.1778334   0.36798072  0.8451921   1.3479764
     0.          1.156909    0.         -1.48874

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3185103  -0.74359065  1.3079777  -1.0779926
     3.2535841   0.         -1.1848228   0.         -1.3134425
    -0.18970263 -0.7615092   0.         -0.394746   -0.27081785
    -0.513262   -0.4237218  -0.6784087   1.0509027   0.
    -0.18732849 -0.34000704 -0.40311763 -0.34145546 -0.82358736
     1.803369   -0.3849316  -0.92277354  0.         -0.53244007
     1.2128623   0.          0.         -0.27389917  0.
     0.         -0.4943481   0.         -0.1884134   0.        ]
   [ 0.          0.3364792   0.         -0.4448118  -0.22035956
    -0.97621596  0.         -0.31806776 -0.2208559   0.
    -0.45488206 -0.3642536   0.          3.5222926   0.
    -0.5731093  -0.72412103  1.8434241  -0.29777753 -0.7783528
     1.550686   -0.49919227 -0.9034085   0.8931283   0.
    -0.27410135  0.6127062  -0.248465   -0.5180509   1.1825348
    -0.5549855  -1.1778334   0.36798072  0.8451921   1.3479764
     0.          1.156909    0.         -1.48874

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3185103  -0.74359065  1.3079777  -1.0779926
     3.2535841   0.         -1.1848228   0.         -1.3134425
    -0.18970263 -0.7615092   0.         -0.394746   -0.27081785
    -0.513262   -0.4237218  -0.6784087   1.0509027   0.
    -0.18732849 -0.34000704 -0.40311763 -0.34145546 -0.82358736
     1.803369   -0.3849316  -0.92277354  0.         -0.53244007
     1.2128623   0.          0.         -0.27389917  0.
     0.         -0.4943481   0.         -0.1884134   0.        ]
   [ 0.          0.3364792   0.         -0.4448118  -0.22035956
    -0.97621596  0.         -0.31806776 -0.2208559   0.
    -0.45488206 -0.3642536   0.          3.5222926   0.
    -0.5731093  -0.72412103  1.8434241  -0.29777753 -0.7783528
     1.550686   -0.49919227 -0.9034085   0.8931283   0.
    -0.27410135  0.6127062  -0.248465   -0.5180509   1.1825348
    -0.5549855  -1.1778334   0.36798072  0.8451921   1.3479764
     0.          1.156909    0.         -1.48874

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31639898 -0.7366839   1.286941   -1.0641094
     3.0455532   0.         -1.167886    0.         -1.2922095
    -0.18849796 -0.75431347  0.         -0.39200392 -0.26905942
    -0.5093727  -0.4207188  -0.672479    1.0377231   0.
    -0.18615381 -0.33772203 -0.40030056 -0.3391585  -0.815326
     1.758427   -0.38227302 -0.9125741   0.         -0.5283426
     1.1950487   0.          0.         -0.2721175   0.
     0.         -0.49065962  0.         -0.18723127  0.        ]
   [ 0.          0.33422303  0.         -0.4416117  -0.21895239
    -0.9648418   5.         -0.31595996 -0.21944514  0.
    -0.45158654 -0.36177     0.          3.2654421   0.
    -0.5685463  -0.71751773  1.7959629  -0.29581958 -0.77087885
     1.5195308  -0.49545306 -0.8936114   0.88354063  0.
    -0.27231815  0.6076592  -0.2468645  -0.51410985  1.1656682
    -0.5506338  -1.1611117   0.36546898  0.83653474  1.3254647
     0.         -0.8556099   0.         -1.4604484

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31639898 -0.7366839   1.286941   -1.0641094
     3.0455532   0.         -1.167886    0.         -1.2922095
    -0.18849796 -0.75431347  0.         -0.39200392 -0.26905942
    -0.5093727  -0.4207188  -0.672479    1.0377231   0.
    -0.18615381 -0.33772203 -0.40030056 -0.3391585  -0.815326
     1.758427   -0.38227302 -0.9125741   0.         -0.5283426
     1.1950487   0.          0.         -0.2721175   0.
     0.         -0.49065962  0.         -0.18723127  0.        ]
   [ 0.          0.33422303  0.         -0.4416117  -0.21895239
    -0.9648418   5.         -0.31595996 -0.21944514  0.
    -0.45158654 -0.36177     0.          3.2654421   0.
    -0.5685463  -0.71751773  1.7959629  -0.29581958 -0.77087885
     1.5195308  -0.49545306 -0.8936114   0.88354063  0.
    -0.27231815  0.6076592  -0.2468645  -0.51410985  1.1656682
    -0.5506338  -1.1611117   0.36546898  0.83653474  1.3254647
     0.         -0.8556099   0.         -1.4604484

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31639898 -0.7366839   1.286941   -1.0641094
     3.0455532   0.         -1.167886    0.         -1.2922095
    -0.18849796 -0.75431347  0.         -0.39200392 -0.26905942
    -0.5093727  -0.4207188  -0.672479    1.0377231   0.
    -0.18615381 -0.33772203 -0.40030056 -0.3391585  -0.815326
     1.758427   -0.38227302 -0.9125741   0.         -0.5283426
     1.1950487   0.          0.         -0.2721175   0.
     0.         -0.49065962  0.         -0.18723127  0.        ]
   [ 0.          0.33422303  0.         -0.4416117  -0.21895239
    -0.9648418   5.         -0.31595996 -0.21944514  0.
    -0.45158654 -0.36177     0.          3.2654421   0.
    -0.5685463  -0.71751773  1.7959629  -0.29581958 -0.77087885
     1.5195308  -0.49545306 -0.8936114   0.88354063  0.
    -0.27231815  0.6076592  -0.2468645  -0.51410985  1.1656682
    -0.5506338  -1.1611117   0.36546898  0.83653474  1.3254647
     0.         -0.8556099   0.         -1.4604484

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31432876 -0.72996616  1.2668886  -1.0507497
     2.872937    0.         -1.1516563   0.         -1.2719753
    -0.18731521 -0.747318    0.         -0.38931787 -0.2673343
    -0.50557035 -0.41777828 -0.666702    1.0250275   0.
     3.7462618  -0.33548206 -0.39754134 -0.3369069  -0.8073082
     1.7166889  -0.37966835 -0.90270567  0.         -0.52433795
     1.1779984   0.          0.         -0.2703696   0.
     0.         -0.48705214  0.         -0.1860707   0.        ]
   [ 0.          0.33201146  0.         -0.4384793  -0.21757117
    -0.95385647  5.         -0.31389314 -0.2180604   0.
    -0.4483613  -0.3593361   0.         -0.31768125  0.
    -0.5640904  -0.7110917   1.751992   -0.29389924 -0.76361597
    -0.65669084 -0.49179626 -0.88412637  0.87425494  0.
    -0.27056873  0.6027347  -0.24529403 -0.51025707  1.1495042
    -0.5463826  -1.1450831   0.3630073   0.82813835  1.3040459
     0.         -0.8468598   0.         -1.433706

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31432876 -0.72996616  1.2668886  -1.0507497
     2.872937    0.         -1.1516563   0.         -1.2719753
    -0.18731521 -0.747318    0.         -0.38931787 -0.2673343
    -0.50557035 -0.41777828 -0.666702    1.0250275   0.
     3.7462618  -0.33548206 -0.39754134 -0.3369069  -0.8073082
     1.7166889  -0.37966835 -0.90270567  0.         -0.52433795
     1.1779984   0.          0.         -0.2703696   0.
     0.         -0.48705214  0.         -0.1860707   0.        ]
   [ 0.          0.33201146  0.         -0.4384793  -0.21757117
    -0.95385647  5.         -0.31389314 -0.2180604   0.
    -0.4483613  -0.3593361   0.         -0.31768125  0.
    -0.5640904  -0.7110917   1.751992   -0.29389924 -0.76361597
    -0.65669084 -0.49179626 -0.88412637  0.87425494  0.
    -0.27056873  0.6027347  -0.24529403 -0.51025707  1.1495042
    -0.5463826  -1.1450831   0.3630073   0.82813835  1.3040459
     0.         -0.8468598   0.         -1.433706

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31432876 -0.72996616  1.2668886  -1.0507497
     2.872937    0.         -1.1516563   0.         -1.2719753
    -0.18731521 -0.747318    0.         -0.38931787 -0.2673343
    -0.50557035 -0.41777828 -0.666702    1.0250275   0.
     3.7462618  -0.33548206 -0.39754134 -0.3369069  -0.8073082
     1.7166889  -0.37966835 -0.90270567  0.         -0.52433795
     1.1779984   0.          0.         -0.2703696   0.
     0.         -0.48705214  0.         -0.1860707   0.        ]
   [ 0.          0.33201146  0.         -0.4384793  -0.21757117
    -0.95385647  5.         -0.31389314 -0.2180604   0.
    -0.4483613  -0.3593361   0.         -0.31768125  0.
    -0.5640904  -0.7110917   1.751992   -0.29389924 -0.76361597
    -0.65669084 -0.49179626 -0.88412637  0.87425494  0.
    -0.27056873  0.6027347  -0.24529403 -0.51025707  1.1495042
    -0.5463826  -1.1450831   0.3630073   0.82813835  1.3040459
     0.         -0.8468598   0.         -1.433706

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31229827 -0.7234287   1.2477465  -1.037881
     2.7267075   0.         -1.1360853   0.         -1.2526634
    -0.18615372 -0.7405134   0.          2.3845627  -0.26564142
    -0.5018516  -0.41489834 -0.6610711   1.0127871   0.
     3.4501715  -0.3332856  -0.39483798 -0.3346991  -0.79952234
     1.6777911  -0.3771158  -0.89315087  0.         -0.5204228
     1.1616583   0.          0.         -0.26865438  0.
     0.         -0.48352292  0.         -0.18493108  0.        ]
   [ 0.          0.32984257  0.         -0.4354123  -0.21621515
    -0.94323826  5.         -0.31186602 -0.21670097  0.
    -0.44520384 -0.35695025  0.         -0.31562042  0.
    -0.5597374  -0.7048352   1.711103   -0.29201528 -0.75655454
    -0.65119416 -0.48821902 -0.87493724  0.8652564   0.
    -0.26885208  0.5979277  -0.24375254 -0.50648946  1.1339948
    -0.54222816 -1.129701    0.36059475  0.8199899   1.2836336
     0.         -0.83837277  0.         -1.408383

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31229827 -0.7234287   1.2477465  -1.037881
     2.7267075   0.         -1.1360853   0.         -1.2526634
    -0.18615372 -0.7405134   0.          2.3845627  -0.26564142
    -0.5018516  -0.41489834 -0.6610711   1.0127871   0.
     3.4501715  -0.3332856  -0.39483798 -0.3346991  -0.79952234
     1.6777911  -0.3771158  -0.89315087  0.         -0.5204228
     1.1616583   0.          0.         -0.26865438  0.
     0.         -0.48352292  0.         -0.18493108  0.        ]
   [ 0.          0.32984257  0.         -0.4354123  -0.21621515
    -0.94323826  5.         -0.31186602 -0.21670097  0.
    -0.44520384 -0.35695025  0.         -0.31562042  0.
    -0.5597374  -0.7048352   1.711103   -0.29201528 -0.75655454
    -0.65119416 -0.48821902 -0.87493724  0.8652564   0.
    -0.26885208  0.5979277  -0.24375254 -0.50648946  1.1339948
    -0.54222816 -1.129701    0.36059475  0.8199899   1.2836336
     0.         -0.83837277  0.         -1.408383

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.31229827 -0.7234287   1.2477465  -1.037881
     2.7267075   0.         -1.1360853   0.         -1.2526634
    -0.18615372 -0.7405134   0.          2.3845627  -0.26564142
    -0.5018516  -0.41489834 -0.6610711   1.0127871   0.
     3.4501715  -0.3332856  -0.39483798 -0.3346991  -0.79952234
     1.6777911  -0.3771158  -0.89315087  0.         -0.5204228
     1.1616583   0.          0.         -0.26865438  0.
     0.         -0.48352292  0.         -0.18493108  0.        ]
   [ 0.          0.32984257  0.         -0.4354123  -0.21621515
    -0.94323826  5.         -0.31186602 -0.21670097  0.
    -0.44520384 -0.35695025  0.         -0.31562042  0.
    -0.5597374  -0.7048352   1.711103   -0.29201528 -0.75655454
    -0.65119416 -0.48821902 -0.87493724  0.8652564   0.
    -0.26885208  0.5979277  -0.24375254 -0.50648946  1.1339948
    -0.54222816 -1.129701    0.36059475  0.8199899   1.2836336
     0.         -0.83837277  0.         -1.408383

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3103063  -0.7170639   1.2294471  -1.0254742
     2.600762    0.         -1.1211299   0.         -1.2342062
    -0.18501288 -0.7338913   0.          2.29603    -0.2639798
    -0.49821347 -0.41207668 -0.65558046 -0.9782258   0.
     3.214773   -0.33113134 -0.3921887  -0.33253372 -0.7919575
     1.6414255  -0.37461367 -0.8838933   0.         -0.51659375
     1.1459805   0.          0.         -0.26697093  0.
     0.         -0.48006907  0.         -0.18381174  0.        ]
   [ 0.          0.32771516  0.         -0.43240836 -0.21488354
    -0.932967    5.         -0.30987737 -0.21536599  0.
    -0.44211194 -0.35461083  0.         -0.31359872  0.
    -0.5554833  -0.6987409   1.672952   -0.2901667  -0.74968535
    -0.64583325 -0.48471853 -0.8660288   0.8565302   0.
    -0.26716715  0.59323394 -0.24223919 -0.5028037   1.1190975
    -0.5381668  -1.1149232   0.35822892  0.81207716 -0.77253723
     0.         -0.830136    0.         -1.38435

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3103063  -0.7170639   1.2294471  -1.0254742
     2.600762    0.         -1.1211299   0.         -1.2342062
    -0.18501288 -0.7338913   0.          2.29603    -0.2639798
    -0.49821347 -0.41207668 -0.65558046 -0.9782258   0.
     3.214773   -0.33113134 -0.3921887  -0.33253372 -0.7919575
     1.6414255  -0.37461367 -0.8838933   0.         -0.51659375
     1.1459805   0.          0.         -0.26697093  0.
     0.         -0.48006907  0.         -0.18381174  0.        ]
   [ 0.          0.32771516  0.         -0.43240836 -0.21488354
    -0.932967    5.         -0.30987737 -0.21536599  0.
    -0.44211194 -0.35461083  0.         -0.31359872  0.
    -0.5554833  -0.6987409   1.672952   -0.2901667  -0.74968535
    -0.64583325 -0.48471853 -0.8660288   0.8565302   0.
    -0.26716715  0.59323394 -0.24223919 -0.5028037   1.1190975
    -0.5381668  -1.1149232   0.35822892  0.81207716 -0.77253723
     0.         -0.830136    0.         -1.38435

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3103063  -0.7170639   1.2294471  -1.0254742
     2.600762    0.         -1.1211299   0.         -1.2342062
    -0.18501288 -0.7338913   0.          2.29603    -0.2639798
    -0.49821347 -0.41207668 -0.65558046 -0.9782258   0.
     3.214773   -0.33113134 -0.3921887  -0.33253372 -0.7919575
     1.6414255  -0.37461367 -0.8838933   0.         -0.51659375
     1.1459805   0.          0.         -0.26697093  0.
     0.         -0.48006907  0.         -0.18381174  0.        ]
   [ 0.          0.32771516  0.         -0.43240836 -0.21488354
    -0.932967    5.         -0.30987737 -0.21536599  0.
    -0.44211194 -0.35461083  0.         -0.31359872  0.
    -0.5554833  -0.6987409   1.672952   -0.2901667  -0.74968535
    -0.64583325 -0.48471853 -0.8660288   0.8565302   0.
    -0.26716715  0.59323394 -0.24223919 -0.5028037   1.1190975
    -0.5381668  -1.1149232   0.35822892  0.81207716 -0.77253723
     0.         -0.830136    0.         -1.38435

Observations in ActorNet tf.Tensor(
[[[ 0.         -0.30835164 -0.71086395  1.2119309  -1.0135024
    2.4908068   0.         -1.1067504   0.         -1.2165425
   -0.18389209 -0.7274437   0.         -0.41937277 -0.2623485
   -0.49465305 -0.40931153 -0.65022427 -0.96732485  0.
    3.0218132  -0.32901788 -0.38959172 -0.33040938 -0.78460336
    1.6073287  -0.3721603  -0.8749176   0.         -0.5128478
    1.1309214   0.          0.         -0.2653182   0.
    0.         -0.47668788  0.         -0.18271212  0.        ]
  [ 0.          0.32562828  0.         -0.42946538 -0.21357568
   -0.92302436  4.5049715  -0.3079259  -0.21405481  0.
   -0.43908328 -0.35231647  0.         -0.31161493  0.
    1.7589878  -0.69280183  1.6372467  -0.28835228 -0.74299985
   -0.6406023  -0.48129198 -0.857387    0.84806275  0.
   -0.26551306 -1.5697296  -0.24075314 -0.49919698  1.1047726
   -0.5341951  -1.1007111   0.3559091   0.80438936 -0.765496
    0.         -0.8221371   0.         -1.3615215   0.        ]
 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30835164 -0.71086395  1.2119309  -1.0135024
     2.4908068   0.         -1.1067504   0.         -1.2165425
    -0.18389209 -0.7274437   0.         -0.41937277 -0.2623485
    -0.49465305 -0.40931153 -0.65022427 -0.96732485  0.
     3.0218132  -0.32901788 -0.38959172 -0.33040938 -0.78460336
     1.6073287  -0.3721603  -0.8749176   0.         -0.5128478
     1.1309214   0.          0.         -0.2653182   0.
     0.         -0.47668788  0.         -0.18271212  0.        ]
   [ 0.          0.32562828  0.         -0.42946538 -0.21357568
    -0.92302436  4.5049715  -0.3079259  -0.21405481  0.
    -0.43908328 -0.35231647  0.         -0.31161493  0.
     1.7589878  -0.69280183  1.6372467  -0.28835228 -0.74299985
    -0.6406023  -0.48129198 -0.857387    0.84806275  0.
    -0.26551306  0.58864856 -0.24075314 -0.49919698  1.1047726
    -0.5341951  -1.1007111   0.3559091   0.80438936 -0.765496
     0.         -0.8221371   0.         -1.3615215

  0.         0.17494829 0.         0.3647902 ]]
Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[1.4675286  0.57750636 0.39836037 0.         0.        ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30835164 -0.71086395  1.2119309  -1.0135024
     2.4908068   0.         -1.1067504   0.         -1.2165425
    -0.18389209 -0.7274437   0.         -0.41937277 -0.2623485
    -0.49465305 -0.40931153 -0.65022427 -0.96732485  0.
     3.0218132  -0.32901788 -0.38959172 -0.33040938 -0.78460336
     1.6073287  -0.3721603  -0.8749176   0.         -0.5128478
     1.1309214   0.          0.         -0.2653182   0.
     0.         -0.47668788  0.         -0.18271212  0.        ]
   [ 0.          0.32562828  0.         -0.42946538 -0.21357568
    -0.92302436  4.5049715  -0.3079259  -0.21405481  0.
    -0.43908328 -0.35231647  0.         -0.31161493  0.
     1.7589878  -0.69280183  1.6372467  -0.28835228 -0.74299985
    -0.6406023  -0.

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30835164 -0.71086395  1.2119309  -1.0135024
     2.4908068   0.         -1.1067504   0.         -1.2165425
    -0.18389209 -0.7274437   0.         -0.41937277 -0.2623485
    -0.49465305 -0.40931153 -0.65022427 -0.96732485  0.
     3.0218132  -0.32901788 -0.38959172 -0.33040938 -0.78460336
     1.6073287  -0.3721603  -0.8749176   0.         -0.5128478
     1.1309214   0.          0.         -0.2653182   0.
     0.         -0.47668788  0.         -0.18271212  0.        ]
   [ 0.          0.32562828  0.         -0.42946538 -0.21357568
    -0.92302436  4.5049715  -0.3079259  -0.21405481  0.
    -0.43908328 -0.35231647  0.         -0.31161493  0.
     1.7589878  -0.69280183  1.6372467  -0.28835228 -0.74299985
    -0.6406023  -0.48129198 -0.857387    0.84806275  0.
    -0.26551306  0.58864856 -0.24075314 -0.49919698  1.1047726
    -0.5341951  -1.1007111   0.3559091   0.80438936 -0.765496
     0.         -0.8221371   0.         -1.3615215

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30643314 -0.70482206  1.1951432  -1.0019406
     2.3937213   0.         -1.0929106   0.         -1.1996164
    -0.18279073 -0.721163    0.         -0.41657764 -0.2607466
    -0.49116752 -0.40660095 -0.644997   -0.9567807   0.
     2.8599038  -0.326944   -0.38704517 -0.32832482 -0.77745026
     1.575274   -0.36975414 -0.86621016  0.         -0.5091818
     1.1164411   0.          0.         -0.26369533  0.
     0.         -0.47337693  0.         -0.18163165  0.        ]
   [ 0.          0.32358003  0.         -0.42658138 -0.2122908
    -0.9133931   4.037675   -0.3060105  -0.21276665  0.
    -0.43611565 -0.35006568  0.         -0.30966786  0.
     1.7190671  -0.68701166  1.6037359  -0.286571   -0.73648995
    -0.6354963  -0.4779368  -0.84899896  0.8398417   0.
    -0.26388878 -1.5821211  -0.23929358 -0.49566653  1.0909846
    -0.5303097  -1.0870295   0.35363317  0.79691577 -0.7586437
     0.         -0.8143653   0.         -1.3397807

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30643314 -0.70482206  1.1951432  -1.0019406
     2.3937213   0.         -1.0929106   0.         -1.1996164
    -0.18279073 -0.721163    0.         -0.41657764 -0.2607466
    -0.49116752 -0.40660095 -0.644997   -0.9567807   0.
     2.8599038  -0.326944   -0.38704517 -0.32832482 -0.77745026
     1.575274   -0.36975414 -0.86621016  0.         -0.5091818
     1.1164411   0.          0.         -0.26369533  0.
     0.         -0.47337693  0.         -0.18163165  0.        ]
   [ 0.          0.32358003  0.         -0.42658138 -0.2122908
    -0.9133931   4.037675   -0.3060105  -0.21276665  0.
    -0.43611565 -0.35006568  0.         -0.30966786  0.
     1.7190671  -0.68701166  1.6037359  -0.286571   -0.73648995
    -0.6354963  -0.4779368  -0.84899896  0.8398417   0.
    -0.26388878 -1.5821211  -0.23929358 -0.49566653  1.0909846
    -0.5303097  -1.0870295   0.35363317  0.79691577 -0.7586437
     0.         -0.8143653   0.         -1.3397807

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30643314 -0.70482206  1.1951432  -1.0019406
     2.3937213   0.         -1.0929106   0.         -1.1996164
    -0.18279073 -0.721163    0.         -0.41657764 -0.2607466
    -0.49116752 -0.40660095 -0.644997   -0.9567807   0.
     2.8599038  -0.326944   -0.38704517 -0.32832482 -0.77745026
     1.575274   -0.36975414 -0.86621016  0.         -0.5091818
     1.1164411   0.          0.         -0.26369533  0.
     0.         -0.47337693  0.         -0.18163165  0.        ]
   [ 0.          0.32358003  0.         -0.42658138 -0.2122908
    -0.9133931   4.037675   -0.3060105  -0.21276665  0.
    -0.43611565 -0.35006568  0.         -0.30966786  0.
     1.7190671  -0.68701166  1.6037359  -0.286571   -0.73648995
    -0.6354963  -0.4779368  -0.84899896  0.8398417   0.
    -0.26388878 -1.5821211  -0.23929358 -0.49566653  1.0909846
    -0.5303097  -1.0870295   0.35363317  0.79691577 -0.7586437
     0.         -0.8143653   0.         -1.3397807

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30454963 -0.6989315   1.1790347  -0.9907659
     2.3071764   0.         -1.079578    0.         -1.1833787
    -0.18170828 -0.7150419   0.         -0.41383734 -0.2591732
    -0.48775443 -0.4039432  -0.63989365 -0.9465739   0.
     2.7215219  -0.32490844 -0.38454768 -0.3262789  -0.77048934
     1.5450652  -0.36739367 -0.85775757  0.         -0.5055931
     1.1025035   0.          0.         -0.26210135  0.
     0.         -0.47013378  0.         -0.18056974  0.        ]
   [ 0.          0.32156992  0.         -0.42375442 -0.21102823
    -0.904057    3.6909852  -0.30413014 -0.2115009   0.
    -0.4332071  -0.34785706  0.         -0.30775642  0.
     1.6817489  -0.68136406  1.5722041  -0.2848219  -0.73014826
    -0.6305103  -0.4746506  -0.84085244  0.83185506  0.
    -0.2622935  -1.5515987  -0.23785973 -0.49220967  1.0777006
    -0.5265077  -1.0738461   0.35140014  0.7896464  -0.75197214
     0.         -0.8068097   0.         -1.31905

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30454963 -0.6989315   1.1790347  -0.9907659
     2.3071764   0.         -1.079578    0.         -1.1833787
    -0.18170828 -0.7150419   0.         -0.41383734 -0.2591732
    -0.48775443 -0.4039432  -0.63989365 -0.9465739   0.
     2.7215219  -0.32490844 -0.38454768 -0.3262789  -0.77048934
     1.5450652  -0.36739367 -0.85775757  0.         -0.5055931
     1.1025035   0.          0.         -0.26210135  0.
     0.         -0.47013378  0.         -0.18056974  0.        ]
   [ 0.          0.32156992  0.         -0.42375442 -0.21102823
    -0.904057    3.6909852  -0.30413014 -0.2115009   0.
    -0.4332071  -0.34785706  0.         -0.30775642  0.
     1.6817489  -0.68136406  1.5722041  -0.2848219  -0.73014826
    -0.6305103  -0.4746506  -0.84085244  0.83185506  0.
    -0.2622935  -1.5515987  -0.23785973 -0.49220967  1.0777006
    -0.5265077  -1.0738461   0.35140014  0.7896464  -0.75197214
     0.         -0.8068097   0.         -1.31905

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30454963 -0.6989315   1.1790347  -0.9907659
     2.3071764   0.         -1.079578    0.         -1.1833787
    -0.18170828 -0.7150419   0.         -0.41383734 -0.2591732
    -0.48775443 -0.4039432  -0.63989365 -0.9465739   0.
     2.7215219  -0.32490844 -0.38454768 -0.3262789  -0.77048934
     1.5450652  -0.36739367 -0.85775757  0.         -0.5055931
     1.1025035   0.          0.         -0.26210135  0.
     0.         -0.47013378  0.         -0.18056974  0.        ]
   [ 0.          0.32156992  0.         -0.42375442 -0.21102823
    -0.904057    3.6909852  -0.30413014 -0.2115009   0.
    -0.4332071  -0.34785706  0.         -0.30775642  0.
     1.6817489  -0.68136406  1.5722041  -0.2848219  -0.73014826
    -0.6305103  -0.4746506  -0.84085244  0.83185506  0.
    -0.2622935  -1.5515987  -0.23785973 -0.49220967  1.0777006
    -0.5265077  -1.0738461   0.35140014  0.7896464  -0.75197214
     0.         -0.8068097   0.         -1.31905

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30270013 -0.69318616  1.1635613  -0.97995716
     2.2293928   0.         -1.066722    0.         -1.1677836
    -0.18064418 -0.7090741   0.         -0.41115     3.6497228
    -0.4844113  -0.40133652 -0.63490933 -0.93668723  0.
     2.60147    -0.32291004 -0.38209748 -0.3242703  -0.763712
     1.516532   -0.36507747 -0.8495477   0.         -0.502079
     1.0890758   0.          0.         -0.2605355   0.
     0.         -0.46695617  0.         -0.17952588  0.        ]
   [ 0.          0.3195963   0.         -0.42098257 -0.20978735
    -0.8950017   3.4206016  -0.30228364 -0.21025692  0.
    -0.43035564 -0.3456894   0.          3.1735      0.
     1.6467621  -0.67585343  1.5424634  -0.28310397 -0.72396743
    -0.6256397  -0.47143102 -0.83293605  0.82409203  0.
    -0.26072627 -1.522779   -0.23645082 -0.4888239   1.0648906
    -0.522786   -1.0611312   0.3492086   0.7825726  -0.7454735
     0.         -0.79946053  0.         -1.2992544 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30270013 -0.69318616  1.1635613  -0.97995716
     2.2293928   0.         -1.066722    0.         -1.1677836
    -0.18064418 -0.7090741   0.         -0.41115     3.6497228
    -0.4844113  -0.40133652 -0.63490933 -0.93668723  0.
     2.60147    -0.32291004 -0.38209748 -0.3242703  -0.763712
     1.516532   -0.36507747 -0.8495477   0.         -0.502079
     1.0890758   0.          0.         -0.2605355   0.
     0.         -0.46695617  0.         -0.17952588  0.        ]
   [ 0.          0.3195963   0.         -0.42098257 -0.20978735
    -0.8950017   3.4206016  -0.30228364 -0.21025692  0.
    -0.43035564 -0.3456894   0.          3.1735      0.
     1.6467621  -0.67585343  1.5424634  -0.28310397 -0.72396743
    -0.6256397  -0.47143102 -0.83293605  0.82409203  0.
    -0.26072627 -1.522779   -0.23645082 -0.4888239   1.0648906
    -0.522786   -1.0611312   0.3492086   0.7825726  -0.7454735
     0.         -0.79946053  0.         -1.2992544 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.30270013 -0.69318616  1.1635613  -0.97995716
     2.2293928   0.         -1.066722    0.         -1.1677836
    -0.18064418 -0.7090741   0.         -0.41115     3.6497228
    -0.4844113  -0.40133652 -0.63490933 -0.93668723  0.
     2.60147    -0.32291004 -0.38209748 -0.3242703  -0.763712
     1.516532   -0.36507747 -0.8495477   0.         -0.502079
     1.0890758   0.          0.         -0.2605355   0.
     0.         -0.46695617  0.         -0.17952588  0.        ]
   [ 0.          0.3195963   0.         -0.42098257 -0.20978735
    -0.8950017   3.4206016  -0.30228364 -0.21025692  0.
    -0.43035564 -0.3456894   0.          3.1735      0.
     1.6467621  -0.67585343  1.5424634  -0.28310397 -0.72396743
    -0.6256397  -0.47143102 -0.83293605  0.82409203  0.
    -0.26072627 -1.522779   -0.23645082 -0.4888239   1.0648906
    -0.522786   -1.0611312   0.3492086   0.7825726  -0.7454735
     0.         -0.79946053  0.         -1.2992544 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3008836  -0.68758005  1.1486821  -0.9694947
     2.1589856   0.         -1.0543147   0.         -1.1527892
    -0.17959791 -0.7032531   0.          2.2760074   3.389931
    -0.48113567 -0.39877933 -0.6300395  -0.927104    0.
     2.4960296  -0.3209477  -0.3796932  -0.32229793 -0.7571101
     1.4895247  -0.36280417 -0.8415692   0.         -0.49863678
     1.0761273   0.          0.         -0.25899687  0.
     0.         -0.46384186  0.         -0.17849955  0.        ]
   [ 0.          0.31765795  0.         -0.41826412 -0.20856756
    -0.8862134   3.2020981  -0.30047005 -0.20903407  0.
    -0.4275595  -0.3435613   0.         -0.32472137  0.
     1.6138737  -0.67047423  1.5143508  -0.28141633 -0.7179408
    -0.6208798  -0.46827576 -0.82523894  0.8165424   0.
    -0.2591864  -1.4955091  -0.23506612 -0.48550665  1.0525272
    -0.5191419  -1.0488579  -2.5556023   0.77568555 -0.73914033
     0.         -0.7923084   0.         -1.2803245

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3008836  -0.68758005  1.1486821  -0.9694947
     2.1589856   0.         -1.0543147   0.         -1.1527892
    -0.17959791 -0.7032531   0.          2.2760074   3.389931
    -0.48113567 -0.39877933 -0.6300395  -0.927104    0.
     2.4960296  -0.3209477  -0.3796932  -0.32229793 -0.7571101
     1.4895247  -0.36280417 -0.8415692   0.         -0.49863678
     1.0761273   0.          0.         -0.25899687  0.
     0.         -0.46384186  0.         -0.17849955  0.        ]
   [ 0.          0.31765795  0.         -0.41826412 -0.20856756
    -0.8862134   3.2020981  -0.30047005 -0.20903407  0.
    -0.4275595  -0.3435613   0.         -0.32472137  0.
     1.6138737  -0.67047423  1.5143508  -0.28141633 -0.7179408
    -0.6208798  -0.46827576 -0.82523894  0.8165424   0.
    -0.2591864  -1.4955091  -0.23506612 -0.48550665  1.0525272
    -0.5191419  -1.0488579  -2.5556023   0.77568555 -0.73914033
     0.         -0.7923084   0.         -1.2803245

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.3008836  -0.68758005  1.1486821  -0.9694947
     2.1589856   0.         -1.0543147   0.         -1.1527892
    -0.17959791 -0.7032531   0.          2.2760074   3.389931
    -0.48113567 -0.39877933 -0.6300395  -0.927104    0.
     2.4960296  -0.3209477  -0.3796932  -0.32229793 -0.7571101
     1.4895247  -0.36280417 -0.8415692   0.         -0.49863678
     1.0761273   0.          0.         -0.25899687  0.
     0.         -0.46384186  0.         -0.17849955  0.        ]
   [ 0.          0.31765795  0.         -0.41826412 -0.20856756
    -0.8862134   3.2020981  -0.30047005 -0.20903407  0.
    -0.4275595  -0.3435613   0.         -0.32472137  0.
     1.6138737  -0.67047423  1.5143508  -0.28141633 -0.7179408
    -0.6208798  -0.46827576 -0.82523894  0.8165424   0.
    -0.2591864  -1.4955091  -0.23506612 -0.48550665  1.0525272
    -0.5191419  -1.0488579  -2.5556023   0.77568555 -0.73914033
     0.         -0.7923084   0.         -1.2803245

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29909906 -0.68210757  1.1343597  -0.95936054
     2.0948582   0.         -1.0423311   0.         -1.1383588
    -0.17856899 -0.69757295  0.          2.2024953   3.1787498
    -0.47792533 -0.39627007 -0.62527996 -0.91780925  0.
     2.4024587  -0.3190203  -0.3773334  -0.32036078 -0.75067675
     1.4639121  -0.3605724  -0.83381116  0.         -0.4952642
     1.0636303   0.          0.          3.733529    0.
     0.         -0.4607888   0.         -0.1774903   0.        ]
   [ 0.          0.31575465  0.         -0.41559726 -0.20736825
    -0.8776789   3.0207582  -0.2986884  -0.20783173  0.
    -0.42481688 -0.34147173  0.         -0.3227666   0.
     1.5828819  -0.6652213   1.4877224  -0.27975807 -0.71206206
    -0.616227   -0.46518284 -0.81775147  0.8091965   0.
    -0.2576729  -1.4696552  -0.233705   -0.48225582  1.0405848
    -0.5155727   0.891821   -2.456034   -1.2175469  -0.7329657
     0.         -0.78534496  0.         -1.26219

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29909906 -0.68210757  1.1343597  -0.95936054
     2.0948582   0.         -1.0423311   0.         -1.1383588
    -0.17856899 -0.69757295  0.          2.2024953   3.1787498
    -0.47792533 -0.39627007 -0.62527996 -0.91780925  0.
     2.4024587  -0.3190203  -0.3773334  -0.32036078 -0.75067675
     1.4639121  -0.3605724  -0.83381116  0.         -0.4952642
     1.0636303   0.          0.          3.733529    0.
     0.         -0.4607888   0.         -0.1774903   0.        ]
   [ 0.          0.31575465  0.         -0.41559726 -0.20736825
    -0.8776789   3.0207582  -0.2986884  -0.20783173  0.
    -0.42481688 -0.34147173  0.         -0.3227666   0.
     1.5828819  -0.6652213   1.4877224  -0.27975807 -0.71206206
    -0.616227   -0.46518284 -0.81775147  0.8091965   0.
    -0.2576729  -1.4696552  -0.233705   -0.48225582  1.0405848
    -0.5155727   0.891821   -2.456034   -1.2175469  -0.7329657
     0.         -0.78534496  0.         -1.26219

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29909906 -0.68210757  1.1343597  -0.95936054
     2.0948582   0.         -1.0423311   0.         -1.1383588
    -0.17856899 -0.69757295  0.          2.2024953   3.1787498
    -0.47792533 -0.39627007 -0.62527996 -0.91780925  0.
     2.4024587  -0.3190203  -0.3773334  -0.32036078 -0.75067675
     1.4639121  -0.3605724  -0.83381116  0.         -0.4952642
     1.0636303   0.          0.          3.733529    0.
     0.         -0.4607888   0.         -0.1774903   0.        ]
   [ 0.          0.31575465  0.         -0.41559726 -0.20736825
    -0.8776789   3.0207582  -0.2986884  -0.20783173  0.
    -0.42481688 -0.34147173  0.         -0.3227666   0.
     1.5828819  -0.6652213   1.4877224  -0.27975807 -0.71206206
    -0.616227   -0.46518284 -0.81775147  0.8091965   0.
    -0.2576729  -1.4696552  -0.233705   -0.48225582  1.0405848
    -0.5155727   0.891821   -2.456034   -1.2175469  -0.7329657
     0.         -0.78534496  0.         -1.26219

New state [[1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.
  0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0.]]
Gathering trajectories...
Replay buffer gather all Trajectory(step_type=<tf.Tensor: id=1436407, shape=(1, 2), dtype=int32, numpy=array([[1, 1]], dtype=int32)>, observation=<tf.Tensor: id=1436408, shape=(1, 2, 4, 40), dtype=float32, numpy=
array([[[[1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
          0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
          1., 0., 0., 1., 0., 1., 0., 0., 0., 1.],
         [0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29734564 -0.67676365  1.1205605  -0.9495379
     2.036129    0.         -1.0307472   0.         -1.1244576
    -0.17755695 -0.6920282   0.          2.1356795   3.0026946
    -0.47477818 -0.3938073  -0.62062645 -0.90878844  0.
     2.3186848  -0.31712687 -0.3750168   3.0076563  -0.74440426
    -0.6748021  -0.35838103 -0.82626414  0.         -0.49195904
     1.051559    0.          0.          3.4628818   0.
     0.         -0.45779496  0.         -0.17649762  0.        ]
   [ 0.          0.31388426  0.         -0.4129805  -0.20618886
    -0.8693862   2.8671155  -0.29693785 -0.20664936  0.
    -0.42212608 -0.3394194   0.         -0.32084632  0.
     1.5536113  -0.66008973  1.4624525  -0.27812836 -0.70632535
    -0.6116769  -0.4621502  -0.81046414 -1.1819614   0.
     3.7573845  -1.4450984  -0.23236674 -0.47906917  1.0290401
    -0.5120759   0.88327825 -2.367271   -1.2014902  -0.7269433
     0.         -0.7785619   0.         -1.24482

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29734564 -0.67676365  1.1205605  -0.9495379
     2.036129    0.         -1.0307472   0.         -1.1244576
    -0.17755695 -0.6920282   0.          2.1356795   3.0026946
    -0.47477818 -0.3938073  -0.62062645 -0.90878844  0.
     2.3186848  -0.31712687 -0.3750168   3.0076563  -0.74440426
    -0.6748021  -0.35838103 -0.82626414  0.         -0.49195904
     1.051559    0.          0.          3.4628818   0.
     0.         -0.45779496  0.         -0.17649762  0.        ]
   [ 0.          0.31388426  0.         -0.4129805  -0.20618886
    -0.8693862   2.8671155  -0.29693785 -0.20664936  0.
    -0.42212608 -0.3394194   0.         -0.32084632  0.
     1.5536113  -0.66008973  1.4624525  -0.27812836 -0.70632535
    -0.6116769  -0.4621502  -0.81046414 -1.1819614   0.
     3.7573845  -1.4450984  -0.23236674 -0.47906917  1.0290401
    -0.5120759   0.88327825 -2.367271   -1.2014902  -0.7269433
     0.         -0.7785619   0.         -1.24482

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29734564 -0.67676365  1.1205605  -0.9495379
     2.036129    0.         -1.0307472   0.         -1.1244576
    -0.17755695 -0.6920282   0.          2.1356795   3.0026946
    -0.47477818 -0.3938073  -0.62062645 -0.90878844  0.
     2.3186848  -0.31712687 -0.3750168   3.0076563  -0.74440426
    -0.6748021  -0.35838103 -0.82626414  0.         -0.49195904
     1.051559    0.          0.          3.4628818   0.
     0.         -0.45779496  0.         -0.17649762  0.        ]
   [ 0.          0.31388426  0.         -0.4129805  -0.20618886
    -0.8693862   2.8671155  -0.29693785 -0.20664936  0.
    -0.42212608 -0.3394194   0.         -0.32084632  0.
     1.5536113  -0.66008973  1.4624525  -0.27812836 -0.70632535
    -0.6116769  -0.4621502  -0.81046414 -1.1819614   0.
     3.7573845  -1.4450984  -0.23236674 -0.47906917  1.0290401
    -0.5120759   0.88327825 -2.367271   -1.2014902  -0.7269433
     0.         -0.7785619   0.         -1.24482

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29562238 -0.6715433   1.1072533  -0.9400109
     1.982081    0.         -1.0195411   0.         -1.1110537
    -0.17656127 -0.6866135   0.          2.0746014   2.8529963
     2.017794   -0.39138955 -0.6160752  -0.9000287   0.
     2.24311    -0.3152664  -0.37274197  2.8572974  -0.7382866
    -0.6696054  -0.35622883 -0.8189181   0.         -0.48871875
     1.0398898   0.          0.          3.2436903   0.
     0.         -0.45485857  0.         -0.17552112  0.        ]
   [ 0.          0.31204653  0.         -0.41041228 -0.20502885
    -0.8613244   2.734769   -0.29521737 -0.20548643  0.
    -0.41948545 -0.3374033   0.         -0.3189596   0.
    -0.6376928  -0.655075    1.4384291  -0.27652636 -0.7007251
    -0.607226   -0.45917577 -0.8033681  -1.1670594   0.
     3.4847405  -1.421734   -0.23105071 -0.47594455  1.0178717
    -0.50864905  0.8749763  -2.2874901  -1.1860527  -0.72106683
     0.         -0.7719515   0.         -1.228146

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29562238 -0.6715433   1.1072533  -0.9400109
     1.982081    0.         -1.0195411   0.         -1.1110537
    -0.17656127 -0.6866135   0.          2.0746014   2.8529963
     2.017794   -0.39138955 -0.6160752  -0.9000287   0.
     2.24311    -0.3152664  -0.37274197  2.8572974  -0.7382866
    -0.6696054  -0.35622883 -0.8189181   0.         -0.48871875
     1.0398898   0.          0.          3.2436903   0.
     0.         -0.45485857  0.         -0.17552112  0.        ]
   [ 0.          0.31204653  0.         -0.41041228 -0.20502885
    -0.8613244   2.734769   -0.29521737 -0.20548643  0.
    -0.41948545 -0.3374033   0.         -0.3189596   0.
    -0.6376928  -0.655075    1.4384291  -0.27652636 -0.7007251
    -0.607226   -0.45917577 -0.8033681  -1.1670594   0.
     3.4847405  -1.421734   -0.23105071 -0.47594455  1.0178717
    -0.50864905  0.8749763  -2.2874901  -1.1860527  -0.72106683
     0.         -0.7719515   0.         -1.228146

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29562238 -0.6715433   1.1072533  -0.9400109
     1.982081    0.         -1.0195411   0.         -1.1110537
    -0.17656127 -0.6866135   0.          2.0746014   2.8529963
     2.017794   -0.39138955 -0.6160752  -0.9000287   0.
     2.24311    -0.3152664  -0.37274197  2.8572974  -0.7382866
    -0.6696054  -0.35622883 -0.8189181   0.         -0.48871875
     1.0398898   0.          0.          3.2436903   0.
     0.         -0.45485857  0.         -0.17552112  0.        ]
   [ 0.          0.31204653  0.         -0.41041228 -0.20502885
    -0.8613244   2.734769   -0.29521737 -0.20548643  0.
    -0.41948545 -0.3374033   0.         -0.3189596   0.
    -0.6376928  -0.655075    1.4384291  -0.27652636 -0.7007251
    -0.607226   -0.45917577 -0.8033681  -1.1670594   0.
     3.4847405  -1.421734   -0.23105071 -0.47594455  1.0178717
    -0.50864905  0.8749763  -2.2874901  -1.1860527  -0.72106683
     0.         -0.7719515   0.         -1.228146

action [[0.4489743 0.1846644]]
New state [[1. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 1. 0.
  0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0.]]
Observations in ActorNet tf.Tensor(
[[[ 0.         -0.2939284  -0.6664417   1.0944095  -0.9307652
    1.9321237   0.         -1.0086931   0.         -1.0981184
   -0.17558157 -0.6813237   0.          2.0184846   2.723674
    1.965517   -0.3890155  -0.6116224  -0.8915174   0.
    2.174478   -0.31343788 -0.37050772  2.7274547  -0.732317
   -0.6645267   2.7363398  -0.81176466  0.         -0.48554143
   -0.94631267  0.          0.          3.0614772   0.
   

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.2939284  -0.6664417   1.0944095  -0.9307652
     1.9321237   0.         -1.0086931   0.         -1.0981184
    -0.17558157 -0.6813237   0.          2.0184846   2.723674
     1.965517   -0.3890155  -0.6116224  -0.8915174   0.
     2.174478   -0.31343788 -0.37050772  2.7274547  -0.732317
    -0.6645267   2.7363398  -0.81176466  0.         -0.48554143
    -0.94631267  0.          0.          3.0614772   0.
     0.         -0.45197773  0.         -0.17456028  0.        ]
   [ 0.          0.3102405   0.         -0.40789098 -0.20388764
    -0.8534827   2.6192143  -0.29352617 -0.20434237  0.
    -0.4168935  -0.33542234  0.         -0.31710532  0.
    -0.63299394 -0.65017277  1.4155529  -0.27495137 -0.6952557
    -0.6028707  -0.45625788 -0.7964553  -1.1527076   0.
     3.2639763  -1.3994683  -0.22975633 -0.47288007  1.0070593
     1.9201012   0.86690414 -2.215272   -1.1711957  -0.71533036
     0.         -0.76550657  0.         -1.2121226 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.2939284  -0.6664417   1.0944095  -0.9307652
     1.9321237   0.         -1.0086931   0.         -1.0981184
    -0.17558157 -0.6813237   0.          2.0184846   2.723674
     1.965517   -0.3890155  -0.6116224  -0.8915174   0.
     2.174478   -0.31343788 -0.37050772  2.7274547  -0.732317
    -0.6645267   2.7363398  -0.81176466  0.         -0.48554143
    -0.94631267  0.          0.          3.0614772   0.
     0.         -0.45197773  0.         -0.17456028  0.        ]
   [ 0.          0.3102405   0.         -0.40789098 -0.20388764
    -0.8534827   2.6192143  -0.29352617 -0.20434237  0.
    -0.4168935  -0.33542234  0.         -0.31710532  0.
    -0.63299394 -0.65017277  1.4155529  -0.27495137 -0.6952557
    -0.6028707  -0.45625788 -0.7964553  -1.1527076   0.
     3.2639763  -1.3994683  -0.22975633 -0.47288007  1.0070593
     1.9201012   0.86690414 -2.215272   -1.1711957  -0.71533036
     0.         -0.76550657  0.         -1.2121226 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.2939284  -0.6664417   1.0944095  -0.9307652
     1.9321237   0.         -1.0086931   0.         -1.0981184
    -0.17558157 -0.6813237   0.          2.0184846   2.723674
     1.965517   -0.3890155  -0.6116224  -0.8915174   0.
     2.174478   -0.31343788 -0.37050772  2.7274547  -0.732317
    -0.6645267   2.7363398  -0.81176466  0.         -0.48554143
    -0.94631267  0.          0.          3.0614772   0.
     0.         -0.45197773  0.         -0.17456028  0.        ]
   [ 0.          0.3102405   0.         -0.40789098 -0.20388764
    -0.8534827   2.6192143  -0.29352617 -0.20434237  0.
    -0.4168935  -0.33542234  0.         -0.31710532  0.
    -0.63299394 -0.65017277  1.4155529  -0.27495137 -0.6952557
    -0.6028707  -0.45625788 -0.7964553  -1.1527076   0.
     3.2639763  -1.3994683  -0.22975633 -0.47288007  1.0070593
     1.9201012   0.86690414 -2.215272   -1.1711957  -0.71533036
     0.         -0.76550657  0.         -1.2121226 

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29226294 -0.66145456  1.0820028  -0.92178714
     1.8857654   0.         -0.99818414  0.         -1.0856248
    -0.17461744 -0.6761542   0.          1.9666913   2.6104882
     1.9171069  -0.3866838  -0.6072645  -0.8832432   0.
     2.1117873  -0.3116405  -0.36831284  2.613851   -0.7264899
    -0.65956163  2.6217504  -0.80479527  5.         -0.4824251
    -0.93704295  0.          0.          2.9068925   0.
     0.         -0.44915065  0.         -0.17361476  0.        ]
   [ 0.          0.308465    0.         -0.40541542 -0.20276478
    -0.84585136  2.5171762   2.853775   -0.20321669  0.
    -0.4143487  -0.33347553  0.         -0.31528267  0.
    -0.6283972  -0.64537877  1.3937354  -0.27340254 -0.6899122
    -0.5986075  -0.45339462 -0.78971773 -1.1388729   0.
     3.0804837  -1.3782178  -0.228483   -0.46987376  0.9965846
     1.8744674   0.85905135 -2.1494932  -1.1568838  -0.7097285
     0.         -0.7592203   0.         -1.1967106

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29226294 -0.66145456  1.0820028  -0.92178714
     1.8857654   0.         -0.99818414  0.         -1.0856248
    -0.17461744 -0.6761542   0.          1.9666913   2.6104882
     1.9171069  -0.3866838  -0.6072645  -0.8832432   0.
     2.1117873  -0.3116405  -0.36831284  2.613851   -0.7264899
    -0.65956163  2.6217504  -0.80479527  5.         -0.4824251
    -0.93704295  0.          0.          2.9068925   0.
     0.         -0.44915065  0.         -0.17361476  0.        ]
   [ 0.          0.308465    0.         -0.40541542 -0.20276478
    -0.84585136  2.5171762   2.853775   -0.20321669  0.
    -0.4143487  -0.33347553  0.         -0.31528267  0.
    -0.6283972  -0.64537877  1.3937354  -0.27340254 -0.6899122
    -0.5986075  -0.45339462 -0.78971773 -1.1388729   0.
     3.0804837  -1.3782178  -0.228483   -0.46987376  0.9965846
     1.8744674   0.85905135 -2.1494932  -1.1568838  -0.7097285
     0.         -0.7592203   0.         -1.1967106

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29226294 -0.66145456  1.0820028  -0.92178714
     1.8857654   0.         -0.99818414  0.         -1.0856248
    -0.17461744 -0.6761542   0.          1.9666913   2.6104882
     1.9171069  -0.3866838  -0.6072645  -0.8832432   0.
     2.1117873  -0.3116405  -0.36831284  2.613851   -0.7264899
    -0.65956163  2.6217504  -0.80479527  5.         -0.4824251
    -0.93704295  0.          0.          2.9068925   0.
     0.         -0.44915065  0.         -0.17361476  0.        ]
   [ 0.          0.308465    0.         -0.40541542 -0.20276478
    -0.84585136  2.5171762   2.853775   -0.20321669  0.
    -0.4143487  -0.33347553  0.         -0.31528267  0.
    -0.6283972  -0.64537877  1.3937354  -0.27340254 -0.6899122
    -0.5986075  -0.45339462 -0.78971773 -1.1388729   0.
     3.0804837  -1.3782178  -0.228483   -0.46987376  0.9965846
     1.8744674   0.85905135 -2.1494932  -1.1568838  -0.7097285
     0.         -0.7592203   0.         -1.1967106

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29062518 -0.65657765  1.0700088  -0.91306406
     1.8425937   0.         -0.9879973   0.         -1.0735487
    -0.17366843 -0.67110044  0.          1.9186931   2.5103402
     1.8721081  -0.38439322 -0.60299844 -0.87519515  0.
     2.0542274  -0.30987334 -0.36615625  2.513361   -0.7207996
    -0.6547061   2.5204537  -0.7980025   5.         -0.47936767
    -0.9280404   0.          5.          2.7735927   0.
     0.         -0.44637582  0.         -0.17268406  0.        ]
   [ 0.          0.3067193   0.         -0.40298405 -0.20165977
    -0.8384211   2.42621     2.726878   -0.20210889  0.
     2.2789023  -0.33156192  0.         -0.31349072  0.
    -0.623899   -0.64068925  1.3728976  -0.2718792  -0.68468994
    -0.5944334  -0.45058444 -0.78314835 -1.1255251   0.
     2.924832   -1.3579079  -0.22723013 -0.4669238   0.98643005
     1.8319421   0.8514081  -2.0892494  -1.1430846  -0.7042563
     0.         -0.7530863   0.         -1.1818

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29062518 -0.65657765  1.0700088  -0.91306406
     1.8425937   0.         -0.9879973   0.         -1.0735487
    -0.17366843 -0.67110044  0.          1.9186931   2.5103402
     1.8721081  -0.38439322 -0.60299844 -0.87519515  0.
     2.0542274  -0.30987334 -0.36615625  2.513361   -0.7207996
    -0.6547061   2.5204537  -0.7980025   5.         -0.47936767
    -0.9280404   0.          5.          2.7735927   0.
     0.         -0.44637582  0.         -0.17268406  0.        ]
   [ 0.          0.3067193   0.         -0.40298405 -0.20165977
    -0.8384211   2.42621     2.726878   -0.20210889  0.
     2.2789023  -0.33156192  0.         -0.31349072  0.
    -0.623899   -0.64068925  1.3728976  -0.2718792  -0.68468994
    -0.5944334  -0.45058444 -0.78314835 -1.1255251   0.
     2.924832   -1.3579079  -0.22723013 -0.4669238   0.98643005
     1.8319421   0.8514081  -2.0892494  -1.1430846  -0.7042563
     0.         -0.7530863   0.         -1.1818

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29062518 -0.65657765  1.0700088  -0.91306406
     1.8425937   0.         -0.9879973   0.         -1.0735487
    -0.17366843 -0.67110044  0.          1.9186931   2.5103402
     1.8721081  -0.38439322 -0.60299844 -0.87519515  0.
     2.0542274  -0.30987334 -0.36615625  2.513361   -0.7207996
    -0.6547061   2.5204537  -0.7980025   5.         -0.47936767
    -0.9280404   0.          5.          2.7735927   0.
     0.         -0.44637582  0.         -0.17268406  0.        ]
   [ 0.          0.3067193   0.         -0.40298405 -0.20165977
    -0.8384211   2.42621     2.726878   -0.20210889  0.
     2.2789023  -0.33156192  0.         -0.31349072  0.
    -0.623899   -0.64068925  1.3728976  -0.2718792  -0.68468994
    -0.5944334  -0.45058444 -0.78314835 -1.1255251   0.
     2.924832   -1.3579079  -0.22723013 -0.4669238   0.98643005
     1.8319421   0.8514081  -2.0892494  -1.1430846  -0.7042563
     0.         -0.7530863   0.         -1.1818

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.29062518 -0.65657765  1.0700088  -0.91306406
     1.8425937   0.         -0.9879973   0.         -1.0735487
    -0.17366843 -0.67110044  0.          1.9186931   2.5103402
     1.8721081  -0.38439322 -0.60299844 -0.87519515  0.
     2.0542274  -0.30987334 -0.36615625  2.513361   -0.7207996
    -0.6547061   2.5204537  -0.7980025   5.         -0.47936767
    -0.9280404   0.          5.          2.7735927   0.
     0.         -0.44637582  0.         -0.17268406  0.        ]
   [ 0.          0.3067193   0.         -0.40298405 -0.20165977
    -0.8384211   2.42621     2.726878   -0.20210889  0.
     2.2789023  -0.33156192  0.         -0.31349072  0.
    -0.623899   -0.64068925  1.3728976  -0.2718792  -0.68468994
    -0.5944334  -0.45058444 -0.78314835 -1.1255251   0.
     2.924832   -1.3579079  -0.22723013 -0.4669238   0.98643005
     1.8319421   0.8514081  -2.0892494  -1.1430846  -0.7042563
     0.         -0.7530863   0.         -1.1818

Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.28901437 -0.6518068   1.0584054  -0.904584
    -0.5454207   0.         -0.9781161   0.         -1.0618668
    -0.17273416 -0.6661582   0.          1.8740487   2.4209046
     1.8301389  -0.3821426  -0.5988208  -0.86736315  0.
     2.0011344  -0.3081355  -0.36403677  2.4236412  -0.7152408
    -0.64995617  2.4300647  -0.79137874  5.         -0.47636747
    -0.9192925   0.          5.          2.657103    0.
     0.         -0.44365156  0.         -0.1717679   0.        ]
   [ 0.          0.30500245  0.         -0.40059552 -0.2005721
    -0.8311833   2.3444479   2.615538   -0.20101851  0.
     2.209495   -0.32968044  0.         -0.31172863  0.
    -0.61949587 -0.63610023  1.352968   -0.27038068 -0.67958444
    -0.5903452  -0.4478256  -0.7767401  -1.1126361   0.
     2.790626   -1.3384707  -0.22599721 -0.4640285   0.9765799
     1.7921878   0.8439654  -2.033806   -1.1297679  -0.6989085
     0.         -0.7470986   0.         -1.1675731 

Output of neural network for action <tensorflow.python.keras.layers.core.Dense object at 0x7efc081eebd0> [[1.0366623 1.2717732 1.1851196 0.        0.       ]]
Observations in ActorNet tf.Tensor(
[[[[ 0.         -0.28901437 -0.6518068   1.0584054  -0.904584
    -0.5454207   0.         -0.9781161   0.         -1.0618668
    -0.17273416 -0.6661582   0.          1.8740487   2.4209046
     1.8301389  -0.3821426  -0.5988208  -0.86736315  0.
     2.0011344  -0.3081355  -0.36403677  2.4236412  -0.7152408
    -0.64995617  2.4300647  -0.79137874  5.         -0.47636747
    -0.9192925   0.          5.          2.657103    0.
     0.         -0.44365156  0.         -0.1717679   0.        ]
   [ 0.          0.30500245  0.         -0.40059552 -0.2005721
    -0.8311833   2.3444479   2.615538   -0.20101851  0.
     2.209495   -0.32968044  0.         -0.31172863  0.
    -0.61949587 -0.63610023  1.352968   -0.27038068 -0.67958444
    -0.5903452  -0.4478256  -0.7767401  -1.1126361   0.
     2.790626   -1

Output of neural network for action <tensorflow.python.keras.layers.core.Flatten object at 0x7efc0824ea90> [[ 0.         -0.28901437 -0.6518068   1.0584054  -0.904584   -0.5454207
   0.         -0.9781161   0.         -1.0618668  -0.17273416 -0.6661582
   0.          1.8740487   2.4209046   1.8301389  -0.3821426  -0.5988208
  -0.86736315  0.          2.0011344  -0.3081355  -0.36403677  2.4236412
  -0.7152408  -0.64995617  2.4300647  -0.79137874  5.         -0.47636747
  -0.9192925   0.          5.          2.657103    0.          0.
  -0.44365156  0.         -0.1717679   0.          0.          0.30500245
   0.         -0.40059552 -0.2005721  -0.8311833   2.3444479   2.615538
  -0.20101851  0.          2.209495   -0.32968044  0.         -0.31172863
   0.         -0.61949587 -0.63610023  1.352968   -0.27038068 -0.67958444
  -0.5903452  -0.4478256  -0.7767401  -1.1126361   0.          2.790626
  -1.3384707  -0.22599721 -0.4640285   0.9765799   1.7921878   0.8439654
  -2.033806   -1.12976

In [None]:
# see how agent performs 
time_step = tf_env.reset()
for _ in range(50):
    tf_env.reset()
    episode_reward = 0
    episode_steps = 0
    while not tf_env.current_time_step().is_last():
        action = agent.policy.action(tf_env.current_time_step()).action
        print('predicted action', action)
        next_time_step = tf_env.step(action)
        episode_steps += 1
        print("Reward", next_time_step.reward.numpy())
        episode_reward += next_time_step.reward.numpy()
    print("Steps:", episode_steps, "Reward:", episode_reward)

In [None]:
dir(agent.collect_policy)

In [None]:
agent.collect_policy._action_spec

In [None]:
agent.collect_policy._policy_step_spec