In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from state import get_state, queue
import os
import traci

In [3]:
from dotenv import load_dotenv
load_dotenv()


sumo_bin = os.getenv("SUMO")
sumo_gui_bin = os.getenv("SUMO-GUI")
simulConfig = os.getenv("SIMUL-CONFIG")

In [4]:
import tensorflow as tf
import numpy as np
from collections import deque

In [13]:

class DuelingDQN(tf.keras.Model):
    def __init__(self, input_shape, n_outputs):
        super(DuelingDQN, self).__init__()

        # Couche d'entrée commune
        self.shared_layers = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation="relu", input_shape=input_shape),
            tf.keras.layers.Dense(128, activation="relu"),
        ])

        # **Stream Valeur** V(s)
        self.value_stream = tf.keras.layers.Dense(1)  # Une seule sortie : la valeur de l'état

        # **Stream Avantage** A(s, a)
        self.advantage_stream = tf.keras.layers.Dense(n_outputs)  # Une sortie par action

    def call(self, inputs):
        x = self.shared_layers(inputs)

        V = self.value_stream(x)  # Calcul de la valeur de l’état
        A = self.advantage_stream(x)  # Calcul des avantages

        # Normalisation de A pour éviter le biais
        Q = V + (A - tf.reduce_mean(A, axis=1, keepdims=True))

        return Q


In [14]:
tf.random.set_seed(42)
input_shape = [48]  # Taille de l'état (exemple)
n_outputs = 4  # Nombre d'actions possibles

# Création du modèle principal et du réseau cible
model_action = DuelingDQN(input_shape, n_outputs)
target = DuelingDQN(input_shape, n_outputs)
target.set_weights(model_action.get_weights())  # Copie des poids initiaux

In [15]:
def epsilon_greedy_policy(state, epsilon=0):
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)  # random action
    else:
        Q_values = model_action.predict(state[np.newaxis], verbose=0)[0]
        print(f"state : {state}")
        print(f"Q_values : {Q_values}")
        return Q_values.argmax()  # optimal action according to the DQN

In [16]:
def sample_experiences(batch_size):
    indices = np.random.randint(len(replay_buffer), size=batch_size)
    batch = [replay_buffer[index] for index in indices]
    states, actions, rewards, next_states = [
        np.array([experience[field_index] for experience in batch])
        for field_index in range(4)
    ]
    return states, actions, rewards, next_states

In [17]:
batch_size = 32
discount_factor = 0.5
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
loss_fn = tf.keras.losses.MeanSquaredError()


def training_step(batch_size):
    experiences = sample_experiences(batch_size)
    states, actions, rewards, next_states = experiences  # a changer
        #################### CHANGED SECTION ####################
    next_Q_values = model_action.predict(next_states, verbose=0)  # ≠ target.predict()
    best_next_actions = next_Q_values.argmax(axis=1)
    next_mask = tf.one_hot(best_next_actions, n_outputs).numpy()
    max_next_Q_values = (target.predict(next_states, verbose=0) * next_mask
                        ).sum(axis=1)
    #########################################################
    next_Q_values = target.predict(next_states, verbose=0)
    max_next_Q_values = next_Q_values.max(axis=1)
    # runs = 1.0 - (dones | truncateds)  # episode is not done or truncated
    target_Q_values = rewards + discount_factor * max_next_Q_values
    target_Q_values = target_Q_values.reshape(-1, 1)
    mask = tf.one_hot(actions, n_outputs)
    with tf.GradientTape() as tape:
        all_Q_values = model_action(states)
        Q_values = tf.reduce_sum(all_Q_values * mask, axis=1, keepdims=True)
        loss = tf.reduce_mean(loss_fn(target_Q_values, Q_values))

    grads = tape.gradient(loss, model_action.trainable_variables)
    optimizer.apply_gradients(zip(grads, model_action.trainable_variables))

In [None]:
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"

replay_buffer = deque(maxlen=10000)


sumoCmd = [sumo_bin, "-c",simulConfig,'--start','--no-warnings']
print(sumoCmd)
for episode in range(40):
    if traci.isLoaded():
        traci.close()
    traci.start(sumoCmd)
    lane_ids =  traci.lane.getIDList()
    # print(lane_ids[0])

    trafic_light_ids = traci.trafficlight.getIDList()

    # state = np.array(queue(lane_ids))
    state=np.array(get_state(lane_ids))
    action=-1
    # print(state)
    for step in range(130000): ## TO CHANGED
        epsilon = max(1 - episode+120 / 160, 0.01)

        if step%2000 == 0:
            #######################################################################
            next_state = np.array(get_state(lane_ids))
            reward = np.sum(state[:24])- np.sum(next_state[:24])
            replay_buffer.append((state, action, reward, next_state))
            #######################################################################

            state=next_state
            action = epsilon_greedy_policy(state, epsilon)
            traci.trafficlight.setPhase(trafic_light_ids[0],2*action)

            if len(replay_buffer) >= batch_size*10:
                training_step(batch_size)

        traci.simulationStep()

    if episode%5==0:
        target.set_weights(model_action.get_weights())
    print(f'episode : {episode}')
    traci.close()


 Retrying in 1 seconds




Step #1260.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 924 ACT 61 BUF 0)                  #180.00 (1ms ~= 10.00*RT, ~77000.00UPS, TraCI: 1ms, vehicles TOT 148 ACT 77 BUF 8)   0.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 280 ACT 65 BUF 8)                   (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 412 ACT 53 BUF 8)                  s ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 559 ACT 70 BUF 5)                  RT. ?UPS, TraCI: 1ms, vehicles TOT 703 ACT 76 BUF 5)                   ?UPS, TraCI: 1ms, vehicles TOT 835 ACT 84 BUF 5)                 episode : 0
Step #1300.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 924 ACT 60 BUF 0)                 
 Retrying in 1 seconds




state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.45553  -10.549704 -10.3311   -10.484062]
state : [ 0  2  6  0  0  2  0  2  2  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  2 10  2  2  4  4  2  2  0  3  2  0  0  0  3  4  0  0  0  0  0  0  0]
Q_values : [ 0.6969881   0.88783586 -2.467606   -0.37102592]
Stepstate : [ 0  0  5  0  3  1  0  0  1  0  9  1  0  0  0  0  0  0  0  0  0  0  0  0 0)   
  0  0 10  0  4  2  4  0  2  0 13  2  2  2  1  4  0  0  2  3  0  2  0  0]
Q_values : [1.3208722  4.4934735  4.3333554  0.46226048]
state : [ 0  0  3  0  6  0  0  0  0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  0  7  2  6  0  5  0  0  1 20  0  2  0  1  2  0  1  0  0  3  0  0  0]
Q_values : [ 5.9033813 -0.5551715 14.26734    2.647236 ]
state : [ 0  0 13  0  0  2  0  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  0 17  1  2  4  5  0  0  1  7  0  0  0  0  1  1  0  0  0  0  0  0  0]
Q_values : [-0.8799108   2.4646



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.6216574 -3.7940562 -3.988247  -3.8893688]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.45553  -10.549704 -10.3311   -10.484062]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [ 0.6371434   0.76693666 -2.5841815  -0.54121864]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.0034847 -6.4176893 -4.258577  -2.8215158]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-0.18282914 -0.88344353  1.737735   -2.5936043 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-1.39



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.6216574 -3.7940562 -3.988247  -3.8893688]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.45553  -10.549704 -10.3311   -10.484062]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [ 0.6371434   0.76693666 -2.5841815  -0.54121864]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.0034847 -6.4176893 -4.258577  -2.8215158]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-0.18282914 -0.88344353  1.737735   -2.5936043 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-1.39



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.6216574 -3.7940562 -3.988247  -3.8893688]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.45553  -10.549704 -10.3311   -10.484062]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [ 0.6371434   0.76693666 -2.5841815  -0.54121864]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.0034847 -6.4176893 -4.258577  -2.8215158]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-0.18282914 -0.88344353  1.737735   -2.5936043 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-1.39



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.0857153 -3.7245877 -4.181203  -4.666076 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-17.561844 -12.051014 -12.872757 -15.431156]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-0.9562988 -1.3047945  1.1900408 -0.8840104]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-5.3134584 -3.7403088 -4.5513487 -5.034334 ]
state : [0 2 0 0 2 4 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 3 4 4 2 0 1 6 2 2
 0 0 3 1 5 0 0 0 0 0 0]
Q_values : [-4.781534  -4.3542647 -1.377845  -1.7018981]
state : [0 2 3 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 1 1 6 5 2 0 1 3 2 0
 0 0 3 3 3 0 0 0 0 2 0]
Q_values : [-1.7633811   0.76264966 -3.2038383  -2.6950479 ]
state : [0 2



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.7398963 -2.9636354 -4.0334015 -3.2049022]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.203784   -9.753445   -9.4504595  -9.3029175]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [-2.985694   0.9135542 -1.7610917 -3.8495069]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  3  2  4  2  4  2  0  0 10  0  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-3.4373324 -2.9551275  3.8432071 -1.0106655]
state : [0 2 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-6.361442  -3.1476707 -3.8406105 -3.6424985]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 6 0 0 0 0 0 0]
Q_values : [-6.158363



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.3512664 -2.4228935 -3.2988265 -2.8919334]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.105371 -12.882288 -11.457606 -15.09326 ]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [-2.9303007  -0.6049566  -0.69377327 -2.9127245 ]
state : [ 0  0  0  0  2  2  0  0  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  3  2  4  4  4  0  0  0 10  2  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-3.6849282  -3.1645672   2.308995   -0.04518998]
state : [0 0 2 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 6 4 1 4 4 0 0 1 3 2 2
 0 0 3 6 4 0 0 0 0 2 1]
Q_values : [-8.434507  -6.20093   -4.6488247 -4.6712246]
state : [ 0  0  6  0  0  4  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  1  0 10  1  1  6  5  0  0  1  3  2  0  0 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.7735577 -2.904879  -3.5714612 -3.6214461]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.371323 -13.613213 -11.822719 -14.130219]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-2.90414    1.23347   -2.0842175 -1.4633837]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.3191605 -3.6365395 -2.527428  -1.2905815]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-0.24288669 -0.92510456  3.519043   -2.9081006 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-2.711356



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.4530888 -3.082121  -3.533094  -2.8814492]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ -9.276674 -12.715954 -11.850243 -15.774473]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [-2.120123  -0.5481103  0.2937976 -2.060484 ]
state : [ 0  0  6  0  0  2  0  0  2  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0 10  2  2  4  4  0  2  0  3  2  0  0  0  3  5  2  0  0  0  0  1  0]
Q_values : [-3.6769567   0.68842447 -1.8465636  -0.82127976]
state : [0 0 0 0 2 4 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 3 4 3 4 4 0 0 1 6 2 2
 0 0 3 1 9 0 0 0 0 0 1]
Q_values : [-4.83197   -3.5615993 -0.8285624 -1.3719871]
state : [0 0 3 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 1 1 6 5 0 0 1 3 2 0
 0 0 3 4 4 0 0 0 0 2 0]
Q_values : [-5.018014



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-6.0412674 -3.4447718 -4.006813  -3.143891 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.362479 -14.549455 -14.893714 -18.121613]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [-3.0150113 -1.4841359 -0.8073487 -2.9659917]
state : [ 0  0  6  0  0  2  0  0  2  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0 10  2  2  4  4  0  2  0  3  2  0  0  0  3  5  2  0  0  0  0  1  0]
Q_values : [-4.869015   -0.34113765 -2.178647   -0.99076235]
state : [0 0 0 0 2 4 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 3 4 3 4 4 0 0 1 6 2 2
 0 0 3 1 9 0 0 0 0 0 1]
Q_values : [-7.5300045  -5.4552636  -0.86781645 -0.9355793 ]
state : [0 0 3 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 1 1 6 5 0 0 1 3 2 0
 0 0 3 4 4 0 0 0 0 2 0]
Q_values : [-7.21



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.829191  -3.442164  -4.0863056 -3.8038707]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.962647 -12.798452 -10.007381 -12.496861]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-0.80268836  1.6406984  -1.798657   -0.8956307 ]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.4239635 -3.272282  -2.0938778 -1.500071 ]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-0.82874095  0.68965006  3.1774118  -2.6026795 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-0.95



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.808902  -3.3771408 -4.3227663 -3.1168585]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.2251835 -10.573913   -9.764432   -9.640833 ]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [ 1.2414846  3.6205232  1.1174327 -2.3919072]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  3  2  4  2  4  2  0  0 10  0  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-1.002833  -0.6903907  3.2613966 -2.7493112]
state : [0 2 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-5.683662  -2.3823256 -4.3194714 -6.681508 ]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 6 0 0 0 0 0 0]
Q_values : [-4.258468



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-6.422105  -3.988254  -4.5478544 -3.5964632]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.419538 -11.183168 -10.512545 -11.43014 ]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-1.860077   2.2487516 -2.3375795 -0.9400694]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-7.404167  -5.375766  -3.9125075 -1.814714 ]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-1.0398302  -0.38865465  3.1321175  -2.647964  ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-2.658888



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.575446  -3.7908094 -3.6093826 -3.5362027]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.678673 -11.018242 -11.693442 -13.61683 ]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-1.5231655  -0.55650026  1.1970313  -0.7947356 ]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-5.2777634 -3.6488762 -3.8065991 -3.8143442]
state : [0 2 0 0 2 4 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 3 4 4 2 0 1 6 2 2
 0 0 3 1 5 0 0 0 0 0 0]
Q_values : [-2.0501187  -1.5570153   0.331123    0.16622007]
state : [0 2 3 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 1 1 6 5 2 0 1 3 2 0
 0 0 3 3 3 0 0 0 0 2 0]
Q_values : [-2.8360493  -1.1732144  -3.2097178  -0.38635433]
stat



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.283289  -4.3298035 -4.020035  -4.4647217]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.941144 -10.492543 -11.012321 -12.473882]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-0.5808561 -0.533844   2.602329   0.7464475]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-4.7121124 -2.873125  -3.6141193 -3.462874 ]
state : [0 2 0 0 2 4 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 3 4 4 2 0 1 6 2 2
 0 0 3 1 5 0 0 0 0 0 0]
Q_values : [-2.197151  -0.8244593  1.0379286  0.6949339]
state : [0 2 3 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 1 1 6 5 2 0 1 3 2 0
 0 0 3 3 3 0 0 0 0 2 0]
Q_values : [-2.865941   -0.87637806 -4.3117847  -0.9969313 ]
state : [0 2



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.021991 -4.083812 -4.313337 -4.200325]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.809614  -9.699727  -9.472115  -7.493116]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [-2.8143816  3.4456906  1.9551936 -1.8736787]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  3  2  4  2  4  2  0  0 10  0  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-3.1206014 -1.7024615  3.0983548 -2.9115164]
state : [0 2 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-5.0950203 -2.4608822 -3.574603  -4.835497 ]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 6 0 0 0 0 0 0]
Q_values : [-6.447579 -4.8825



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.9136076 -4.034113  -3.877843  -3.7149713]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.403938   -9.8516445  -8.5672865 -10.947334 ]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-3.3636284  1.9413736 -4.0221577 -0.9793688]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.971755  -4.9008965 -3.3600292 -2.135132 ]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-4.2902055 -1.2843494  2.5480568 -2.6620722]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-3.394647



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.770223  -3.7252162 -4.0748825 -3.762247 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.5522175 -10.825605  -11.482132  -11.761547 ]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [ 0.27918643 -0.13517714  3.3958235   0.87651944]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-5.071742  -2.6134055 -3.5837762 -2.5300777]
state : [0 2 6 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 3 0 4 2 0 1 6 0 2
 0 1 3 1 0 0 0 3 0 0 0]
Q_values : [-3.4125948  3.239831   3.005023  -3.5530272]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2  3  1  4  2  6  2  0  1 10  0  0  0  0  3  0  8  0  0  0  0  0  0]
Q_values : [-3.97



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.7610383 -4.237433  -4.1278467 -3.5727715]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.20285  -12.272405 -10.037391 -10.850399]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-3.793438   2.314348  -2.0439103 -1.618721 ]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-6.205819  -4.465243  -1.3812444 -2.1602888]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-4.747078  -2.3550334 -2.2235527 -0.7256999]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-5.042206    



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.32248   -4.0048985 -3.6808133 -3.6202972]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.320136 -11.581236  -9.393511  -8.852793]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [-3.5747259  1.6723626  2.1073906 -5.087042 ]
state : [ 0  2  6  0  0  0  0  2  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2 10  2  1  2  4  2  2  0  3  0  0  0  0  4  4  2  0  0  0  0  2  0]
Q_values : [-4.824576   2.8975816 -2.0144868 -4.5378647]
state : [0 2 0 0 1 2 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 2 2 4 2 0 1 6 0 2
 0 0 4 0 9 0 0 0 0 0 1]
Q_values : [-5.30972   -5.5293517 -0.5420215 -3.6089506]
state : [0 2 3 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 1 1 4 5 2 0 1 3 0 0
 0 0 3 4 4 0 0 0 0 2 0]
Q_values : [-5.047551  -3



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.7997336 -3.374711  -3.47926   -3.389613 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.113569  -10.464868   -7.1482196 -10.562673 ]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-1.9900212  2.2319696 -2.0132296 -1.2420757]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.124802  -4.5199957 -1.8438219 -2.135797 ]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-3.7614403 -2.3538184 -2.9625509 -0.9380901]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-3.384541



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.113968  -3.6047144 -3.7359529 -2.9319243]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.351698 -10.206921 -10.602542 -10.926768]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-0.85333866 -1.5696013   2.17401    -0.2217474 ]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-6.114177  -4.656932  -4.5117426 -4.4198136]
state : [0 2 6 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 3 0 4 2 0 1 6 0 2
 0 1 3 1 0 0 0 3 0 0 0]
Q_values : [-4.4058237  3.1448033  2.2114708 -5.1548777]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2  3  1  4  2  6  2  0  1 10  0  0  0  0  3  0  8  0  0  0  0  0  0]
Q_values : [-2.545117



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.9287252 -3.559612  -3.5781841 -2.955978 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.079626 -11.639571 -10.567766  -9.702199]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [-2.764313   3.156959   1.2325118 -3.5305107]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  3  2  4  2  4  2  0  0 10  0  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-2.3720038 -1.4618915  3.358759  -1.1491303]
state : [0 2 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-4.955585  -1.6298506 -3.1281817 -5.8043833]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 6 0 0 0 0 0 0]
Q_values : [-5.714017  -4



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.0749154 -3.780776  -3.5406513 -3.2064545]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.382804 -11.959911 -10.210136  -9.140889]
state : [0 2 2 0 2 0 0 2 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 2 0 6 2 2 1 7 0 2
 0 2 2 0 0 2 0 2 2 0 0]
Q_values : [-3.2341077  2.3205729  1.056839  -3.4260335]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  3  2  4  2  4  2  0  0 10  0  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-2.079047   -1.6202492   4.1142287  -0.87197924]
state : [0 2 2 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-4.6087427 -2.8336477 -3.3202472 -3.842051 ]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 6 0 0 0 0 0 0]
Q_values : [-7.183896



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.474928  -3.4193428 -3.1479318 -2.914331 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.1617155 -11.116289   -8.575196   -9.57427  ]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-2.7028172  2.0983899 -0.3595536  0.6234059]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.4156365  -3.9353132  -1.5288064  -0.25356913]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-2.1593351  -0.97147804  3.5573545  -1.3025541 ]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q_values : [-



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.401671  -3.3647137 -2.857027  -2.9877772]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-14.467264  -9.080585 -10.019273 -10.656298]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-0.22281098 -0.33291352  2.5198019   0.21347412]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-6.2960815 -3.232472  -3.1285641 -2.954241 ]
state : [0 2 6 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 3 0 4 2 0 1 6 0 2
 0 1 3 1 0 0 0 3 0 0 0]
Q_values : [-1.8825696   1.5014954   0.24689561 -2.4131887 ]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2  3  1  4  2  6  2  0  1 10  0  0  0  0  3  0  8  0  0  0  0  0  0]
Q_values : [-2.04



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.1938167 -3.434176  -3.3815286 -4.6848383]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.482264  -9.358299  -8.706555 -13.37307 ]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 1 1]
Q_values : [ 0.77452844  2.0728989   0.09071285 -0.94469476]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-3.2867308 -2.8505905 -1.2495751 -2.324496 ]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-1.8699319  -1.2893662  -1.3061719  -0.40963018]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-0.36



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.5975556 -4.6220746 -4.1515603 -4.279535 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.079378 -12.450696 -11.286673 -13.604393]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 1 1]
Q_values : [-1.4364067   1.5674036  -0.41690853 -1.2222548 ]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.910142  -4.9809113 -1.9282756 -3.053452 ]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-3.655777  -2.224196  -1.4809049 -1.1534436]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-4.619666



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.8360162 -3.3038387 -3.0371432 -3.6221704]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.911146 -15.205511 -14.927211 -17.121088]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [ 0.24182159  2.9826837   2.325035   -1.5782726 ]
state : [ 0  0  0  0  2  2  0  0  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  3  2  4  4  4  0  0  0 10  2  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [ 1.7014263 -3.2092683  3.6049476 -1.9156926]
state : [0 0 2 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 6 4 1 4 4 0 0 1 3 2 2
 0 0 3 6 4 0 0 0 0 2 1]
Q_values : [-2.4840317 -3.2496676 -2.133363  -2.2857096]
state : [ 0  0  6  0  0  4  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  1  0 10  1  1  6  5  0  0  1  3  2  0  0  0  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.169037  -4.747755  -3.5766034 -3.524843 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.509091 -11.111388  -8.442715 -12.879887]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-0.9558546  1.7448056 -1.4523153 -1.3699296]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-4.7207785 -5.9659443 -1.6724701 -2.5824037]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-3.3871229 -2.3819106 -3.9284084 -2.1898541]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-0.9474582   



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.875836 -10.075389 -10.320193 -11.541818]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [-0.07796022 -0.43810737  2.5853112  -1.3696557 ]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-3.8920815 -2.9119914 -3.3902698 -3.291538 ]
state : [0 2 0 0 2 4 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 3 4 4 2 0 1 6 2 2
 0 0 3 1 5 0 0 0 0 0 0]
Q_values : [-1.1940514  -0.9635335   0.74748826 -0.03082895]
state : [0 2 3 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 1 1 6 5 2 0 1 3 2 0
 0 0 3 3 3 0 0 0 0 2 0]
Q_values : [-2.1341374  -1.1700863  -2.6296754   0.41362977]
state : [ 0  2  6  0  1  0  0  2  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2 10  2  4  1  4  2  0  0  6  0  2  0  1  3  0  1  0  0  5  0  0  0]
Q_values : [-



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.1382244 -4.8114715 -3.9608848 -3.466238 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-6.7244573 -8.585672  -8.644649  -6.9857655]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [-1.0125594   2.9710712   2.8920045  -0.87747693]
state : [ 0  0  0  0  2  2  0  0  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  3  2  4  4  4  0  0  0 10  2  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-1.4063201 -2.3152115  3.6759734 -0.5817116]
state : [0 0 2 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 6 4 1 4 4 0 0 1 3 2 2
 0 0 3 6 4 0 0 0 0 2 1]
Q_values : [-4.358521  -4.058467  -2.4089308 -1.7236619]
state : [ 0  0  6  0  0  0  0  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  0 10  1  2  2  5  0  0  1  7  0  0  0  1  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.443359  -4.864213  -4.30062   -3.5717816]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.145035 -12.494018 -11.359333 -11.801396]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 1 1]
Q_values : [-1.9207973   1.2837377  -0.19567126 -0.01387721]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-2.6858635 -3.1291423 -0.9441973 -1.3877983]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-1.7575533  -1.0039066  -1.3326302  -0.98227215]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-5.60



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.2402923 -3.8523903 -4.872718  -3.3015218]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-13.179458   -7.3306303 -11.040286  -10.164393 ]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [0.42500663 2.1781778  2.7365804  1.0805079 ]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-3.7553189 -3.1751645 -3.2309244 -3.0977228]
state : [0 2 6 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 3 0 4 2 0 1 6 0 2
 0 1 3 1 0 0 0 3 0 0 0]
Q_values : [-0.9247979  2.9947374  1.42659   -3.0242128]
state : [ 0  2  0  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2  3  1  4  2  6  2  0  1 10  0  0  0  0  3  0  8  0  0  0  0  0  0]
Q_values : [-0.467251



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.46722   -4.2989783 -4.4394197 -3.583334 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.938997  -10.902732  -11.716827  -11.1166525]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [ 0.21651286 -0.0279122   2.1654434  -0.36126077]
state : [0 2 2 0 0 2 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 6 2 2 4 4 2 0 0 3 2 0
 0 0 4 4 3 0 0 0 0 1 0]
Q_values : [-2.825343  -3.506535  -2.3940206 -3.3182821]
state : [0 2 6 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 1 4 4 2 0 1 3 2 2
 0 0 3 4 0 0 0 0 0 1 0]
Q_values : [1.0890186 3.9598544 3.4029305 1.7247777]
state : [0 2 0 0 0 4 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 6 6 2 0 1 7 2 0
 0 0 2 0 8 0 0 0 0 0 0]
Q_values : [-0.8157149  -2.529609    0.7577858   0.42027783]
state : 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.1884165 -3.976118  -4.732705  -4.1579194]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.9722   -10.215965 -11.136139 -10.427279]
state : [0 2 0 0 2 2 0 2 0 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 2 2 2 6 2 0 1 7 2 2
 0 0 2 0 3 2 0 0 2 0 2]
Q_values : [1.4689276 1.519573  2.412034  2.5295773]
state : [ 0  2  2  0  2  0  0  2  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  2  6  2  4  2  4  2  0  0 10  0  0  0  1  4  0  1  0  0  1  0  0  0]
Q_values : [-1.7115932 -2.1202722  3.142397  -0.9480819]
state : [0 2 6 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 9 4 1 2 4 2 0 1 3 0 2
 0 0 4 5 4 0 0 0 0 2 1]
Q_values : [-1.3288559   1.6066549  -0.12676156 -2.0707145 ]
state : [0 2 0 0 0 2 0 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 1 2 4 6 2 0 1 7 0 0
 0 0 2 0 8 0 0 0 0 0 0]
Q_values : [-4.769677 -3.



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.054762  -4.5304084 -4.7159677 -4.44375  ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.340146 -13.375751 -13.582655 -11.950918]
state : [0 0 2 0 2 2 0 0 2 0 3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 7 2 2 2 5 0 2 1 7 2 2
 1 0 2 0 0 1 1 1 2 0 0]
Q_values : [-1.4605365  1.6818798  1.332685  -2.7299354]
state : [ 0  0  0  0  2  2  0  0  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  3  2  4  4  4  0  0  0 10  2  0  0  0  4  0  7  0  0  0  0  0  1]
Q_values : [-2.87962   -0.8085054  2.3697598 -2.902063 ]
state : [0 0 2 0 0 4 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 6 4 1 4 4 0 0 1 3 2 2
 0 0 3 6 4 0 0 0 0 2 1]
Q_values : [-2.9258623 -2.217699  -1.5967234 -1.3420615]
state : [ 0  0  6  0  0  0  0  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  0 10  1  2  2  5  0  0  1  7  0  0  0  1  1  1



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.010467  -3.9377444 -4.5800414 -4.424393 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.665113 -10.014373  -8.661282 -10.300933]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [-0.50413305  1.9768608  -0.36201483  0.36495703]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.289276  -4.2441535 -2.5052183 -2.8503923]
state : [0 2 2 0 0 4 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 1 4 4 2 0 1 3 2 2
 0 0 4 3 2 0 0 0 0 1 0]
Q_values : [-3.5926604 -2.520935  -2.521415  -1.6476558]
state : [ 0  2  6  0  0  0  0  2  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  2  2  6  2  0  1  7  0  0  0  1  2  0  0  0  0  3  0  0  0]
Q_values : [-4.069272



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-5.3530426 -4.8761396 -4.541785  -4.513396 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 2 2 2 4 2 2 3 3 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.340983 -12.169029  -9.766932 -12.227298]
state : [0 2 2 0 0 2 0 2 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 1 2 7 2 0 2 5 2 2 1 3 2 2
 0 0 2 2 0 2 0 0 1 2 0]
Q_values : [ 0.3367328   1.8083503  -0.69677323 -0.80258644]
state : [0 2 0 0 0 2 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 2 2 4 4 2 0 0 6 2 0
 0 0 3 0 8 0 0 0 0 0 1]
Q_values : [-5.8408365 -5.72272   -3.8066425 -3.607748 ]
state : [0 2 2 0 2 0 0 2 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 6 4 3 0 4 2 0 1 9 0 2
 0 1 4 0 0 0 0 3 0 0 0]
Q_values : [-6.69405103e-01  1.19268894e-04  3.81231475e+00 -2.12421393e+00]
state : [ 0  2  6  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 10  1  1  2  5  2  0  1  3  0  0  0  0  3  5  4  0  0  0  0  2  0]
Q

KeyboardInterrupt: 

In [36]:
model_action.save_weights("model_3DQN.weights.h5")
model_action.save("full_model.keras")

In [49]:
sumoCmd = [sumo_gui_bin, "-c",simulConfig,"--start", "--no-warnings"]

if traci.isLoaded():
    traci.close()
traci.start(sumoCmd)
lane_ids =  traci.lane.getIDList()
trafic_light_ids = traci.trafficlight.getIDList()
print(lane_ids)
state = np.array(get_state(lane_ids))
action=1
# print(state)
alpha=0.1
for step in range(130000): ## TO CHANGED
    nom_du_feu= traci.trafficlight.getIDList()[0]
    print()
    if step%2000 == 0:
        state=np.array(get_state(lane_ids))
        action = epsilon_greedy_policy(state,0)*2
        traci.trafficlight.setPhase(trafic_light_ids[0],action)
    traci.simulationStep()

traci.close()


 Retrying in 1 seconds
(':junction_0_0', ':junction_10_0', ':junction_11_0', ':junction_1_0', ':junction_2_0', ':junction_3_0', ':junction_4_0', ':junction_5_0', ':junction_6_0', ':junction_7_0', ':junction_8_0', ':junction_9_0', 'end1_junction_0', 'end1_junction_1', 'end1_junction_2', 'end2_junction_0', 'end2_junction_1', 'end2_junction_2', 'end3_junction_0', 'end3_junction_1', 'end3_junction_2', 'end4_junction_0', 'end4_junction_1', 'end4_junction_2', 'junction_end1_0', 'junction_end1_1', 'junction_end1_2', 'junction_end2_0', 'junction_end2_1', 'junction_end2_2', 'junction_end3_0', 'junction_end3_1', 'junction_end3_2', 'junction_end4_0', 'junction_end4_1', 'junction_end4_2')

state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.452952  -4.45352   -4.8745112 -4.1132607]























































































































































FatalTraCIError: Connection closed by SUMO.