In [73]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [74]:
from state import get_state, queue
import os
import traci

In [75]:
from dotenv import load_dotenv
load_dotenv()


sumo_bin = os.getenv("SUMO")
sumo_gui_bin = os.getenv("SUMO-GUI")
simulConfig = os.getenv("SIMUL-CONFIG")

In [76]:
import numpy as np

from tensorflow.keras import Sequential, layers
from tensorflow.random import set_seed
from collections import deque
from tensorflow import keras
from tensorflow import reduce_sum, reduce_mean, one_hot, GradientTape
from tensorflow.keras.losses import MeanSquaredError
import matplotlib.pyplot as plt
import tensorflow as tf

In [77]:
set_seed(42)  # extra code â€“ ensures reproducibility on the CPU

input_shape = [48]  # == env.observation_space.shape
n_outputs = 4  # == env.action_space.n
#[((Phase(duration=30.0, state='GGrGrrGGrGrr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grGgrrgrGgrr', minDur=20.0, maxDur=20.0), Phase(duration=30.0, state='GrrGGrGrrGGr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grrgrGgrrgrG', minDur=20.0, maxDur=20.0)), [0, 2, 4, 6])]
model_action = Sequential([
    layers.Dense(32,activation=tf.keras.layers.LeakyReLU(alpha=0.01), input_shape=input_shape),
    layers.Dense(32, activation=tf.keras.layers.LeakyReLU(alpha=0.01)),
    layers.Dense(n_outputs, activation= 'linear')])

In [78]:
def epsilon_greedy_policy(state, epsilon=0):
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)  # random action
    else:
        Q_values = model_action.predict(state[np.newaxis], verbose=0)[0]
        print(f"state : {state}")
        print(f"Q_values : {Q_values}")
        return Q_values.argmax()  # optimal action according to the DQN

In [79]:
def sample_experiences(batch_size):
    indices = np.random.randint(len(replay_buffer), size=batch_size)
    batch = [replay_buffer[index] for index in indices]
    states, actions, rewards, next_states = [
        np.array([experience[field_index] for experience in batch])
        for field_index in range(4)
    ]
    return states, actions, rewards, next_states

In [80]:
batch_size = 32
discount_factor = 0.5
optimizer = keras.optimizers.Adam(learning_rate=0.05)
loss_fn = MeanSquaredError()

def training_step(batch_size):
    experiences = sample_experiences(batch_size)
    states, actions, rewards, next_states = experiences  # a changer
    next_Q_values = model_action.predict(next_states, verbose=0)
    max_next_Q_values = next_Q_values.max(axis=1)
    # runs = 1.0 - (dones | truncateds)  # episode is not done or truncated
    target_Q_values = rewards + discount_factor * max_next_Q_values
    target_Q_values = target_Q_values.reshape(-1, 1)
    mask = one_hot(actions, n_outputs)
    with GradientTape() as tape:
        all_Q_values = model_action(states)
        Q_values = reduce_sum(all_Q_values * mask, axis=1, keepdims=True)
        loss = reduce_mean(loss_fn(target_Q_values, Q_values))

    grads = tape.gradient(loss, model_action.trainable_variables)
    optimizer.apply_gradients(zip(grads, model_action.trainable_variables))

In [88]:
rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
replay_buffer = deque(maxlen=10000)
epoch = 50

for episode in range(epoch):
    #SEED
    seed_value = np.random.randint(1000)
    #seed_value = 41
    sumoCmd = [sumo_bin, "-c", simulConfig, "--start", "--seed", str(seed_value), '--no-warnings']

    if traci.isLoaded():
        traci.close()
    traci.start(sumoCmd)
    lane_ids =  traci.lane.getIDList()
    # print(lane_ids[0])

    trafic_light_ids = traci.trafficlight.getIDList()

    # state = np.array(queue(lane_ids))
    state=np.array(get_state(lane_ids))
    action=-1
    # print(state)
    wait_times.append(0)
    for step in range(80000): ## TO CHANGED
        epsilon = max(1 - episode / epoch, 0.01)

        if step%2000 == 0:
            #print(f"longeur du buffer :{len(replay_buffer)}")
            ########################################################################
            ##Calcul de la reward
            # next_state = np.array(queue(lane_ids))
            next_state = np.array(get_state(lane_ids))
            # reward = calculate_reward(values, reward, total_reward)[0]
            # reward=(np.sum(state)-np.sum(next_state))
            reward = np.sum(state[:24])- np.sum(next_state[:24])
            replay_buffer.append((state, action, reward, next_state))

            list_values.append(queue(lane_ids))
            #########################################################################
            state=next_state
            action = epsilon_greedy_policy(state, epsilon)
            #print("action", action)
            traci.trafficlight.setPhase(trafic_light_ids[0],2*action)

            if len(replay_buffer) >= batch_size*10:
                training_step(batch_size)
                # new_weights = model_action.get_weights()
                # weights.append(new_weights)
                #print(f"Episode {episode}: new weights = {new_weights}")
            # else:
            #     print(f"Episode {episode}: pas assez de donnÃ©es dans le replay buffer.")
            # print(values)
            # if list_values:
            #    # print(f'list values {list_values[-1]}')


        traci.simulationStep()
    print(f'episode : {episode}')
    traci.close()


 Retrying in 1 seconds




Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 786 ACT 32 BUF 0)                   #180.00 (1ms ~= 10.00*RT, ~93000.00UPS, TraCI: 3ms, vehicles TOT 230 ACT 93 BUF 4)   0.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 485 ACT 124 BUF 0)                  (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 715 ACT 169 BUF 1)                 episode : 0
Step #800.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 786 ACT 4 BUF 0)                   
 Retrying in 1 seconds




Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 207 ACT 102 BUF 0)                 state : [ 0  0 14  0  9 12  0  0 12  0  8  8  0  0  0  0  0  0  0  0  0  0  0  0
  3  3 15  4 12 15  8  2 12  2  8  9  1  1  0  3  0  0  2  0  0  0  0  0]
Q_values : [-9.970377 -5.387272 -4.266041 -3.680519]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 703 ACT 4 BUF 0)                   0.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 408 ACT 112 BUF 0)                  (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 624 ACT 107 BUF 0)                 episode : 1
Step #800.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 703 ACT 0 BUF 0)                   
 Retrying in 1 seconds




Step #540.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 645 ACT 118 BUF 0)                  #180.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 197 ACT 74 BUF 1)                  0.00 (1ms ~= 10.00*RT, ~124000.00UPS, TraCI: 6ms, vehicles TOT 440 ACT 124 BUF 7) state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.2863379  -0.6454021  -0.65432364 -1.0530971 ]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 707 ACT 0 BUF 0)                   episode : 2
Step #800.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 707 ACT 0 BUF 0)                   
 Retrying in 1 seconds




Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 715 ACT 48 BUF 0)                   #180.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 211 ACT 116 BUF 2)                 0.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 420 ACT 94 BUF 3)                   (0ms ?*RT. ?UPS, TraCI: 4ms, vehicles TOT 627 ACT 99 BUF 3)                  episode : 3
Step #800.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 715 ACT 16 BUF 0)                  
 Retrying in 1 seconds




Stepstate : [ 0 16  8  0  0  3  0  7  2  0  0  5  0  0  0  0  0  0  0  0  0  0  0  0 0)   
  2 23 10  5  1  3  2 11  3  5  3  7  2  0  0  2  3  0  4  0  0  1  3  0]
Q_values : [ 5.731629 -3.469774 -7.295589 -3.11418 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 391 ACT 82 BUF 2)                  state : [ 0  0  1  0 12 13  0  0  2  0 10  2  0  0  0  0  0  0  0  0  0  0  0  0
  4  1  1  1 12 13  5  6  2  2 16  2  3  5  1  3  0  1  2  3  0  2  0  2]
Q_values : [-9.520152  -5.98915   -4.3526664 -1.9113783]
Step #540.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 598 ACT 109 BUF 3)                 state : [ 0  2 22  0  0  1  0  3 18  0  0  5  0  0  0  0  0  0  0  0  0  0  0  0
  1  2 23  2 12  4  1  4 19  2  9  7  2  0  3  3  9  4  3  0  0  1 13  0]
Q_values : [-11.300394   -3.708486   -4.5201554  -7.2953863]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 682 ACT 6 BUF 0)                   episode : 4
Step #800.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 682 ACT 6 BUF 0) 



state : [0 0 1 0 1 0 0 2 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 4 5 7 2 1 5 5 3 6 5 0
 0 1 1 0 0 3 0 2 1 0 0]
Q_values : [-6.2950315 -2.5115051 -3.1658587 -5.2782893]
state : [ 0  7  1  0  1  0  0  8  3  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0
  3  9  3  3  4  3  1 10  7  6  9  3  1  0 11  3  0  0  3  0 11  2  0  0]
Q_values : [ 1.4371746 -0.8857857 -5.472938  -6.7423887]
Stepstate : [ 0  2  0  0  3  5  0  1  0  0 12  3  0  0  0  0  0  0  0  0  0  0  0  0      
  6  7  2  3  5  5  2  2  1  2 16  5  2  0  0  0  0  9  2  0  0  0  0  9]
Q_values : [ 5.87912   -9.621298  -4.77155    4.3284783]
state : [ 0 14  0  0  2  6  0 13  0  0  2 15  0  0  0  0  0  0  0  0  0  0  0  0
  4 19  1  7  6 10  4 16  3  6  3 18  5  0  0  3  0  7  2  0  0  2  0  6]
Q_values : [ 13.11539   -14.674018   -5.9476585   7.175049 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 445 ACT 122 BUF 2)                 state : [ 0  5  0  0  7 13  0  0  0  0  2 21  0  0  0  0  0  0  0  0  0  0  0  0
  3  8  3  6 12



Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 228 ACT 134 BUF 0)                 state : [ 0  3  7  0  3  0  0  4  6  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  8  9  3  8  9  3  5  8  4  7  3  3  0 11  5  0  0  2  0 11  4  0  0]
Q_values : [ -4.0185595   1.8592317  -2.3526092 -11.278214 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 462 ACT 105 BUF 2)                 state : [ 0  2  1  0 13  0  0  4  3  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0
  2  4  2  4 16  3  4  7  4  6 13  2  3  0  1  4  0  0  2  0 11  6  0  0]
Q_values : [ -4.543215    4.5376596   1.7113421 -10.764806 ]
state : [ 0  8  1  0  0  6  0  4  1  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0
  1 12  4  1 11  9  4 12  2  1 10  9  0  0  0  1 12  2  1  0  0  2 13  1]
Q_values : [-5.811893 -7.701044 -9.493757 -1.043771]
Step #540.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 685 ACT 100 BUF 1)                 state : [ 0  4  0  0  0 10  0  9  0  0  0 12  0  0  0  0  0  0  0  0  0  0  0  0
  0  4  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 2 1 0 0 2 1 1 3 1 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.1868124 -2.5655584 -3.037323  -2.1860495]
state : [ 0  0  0  0  9  5  0  1  0  0  9  4  0  0  0  0  0  0  0  0  0  0  0  0
  5  2  2  5  9  7  4  2  1  4 13  6  3  0  0  1  0  0  3  0  0  0  0  4]
Q_values : [-3.571425   -4.901232   -3.6588445   0.10499316]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 188 ACT 83 BUF 1)                  state : [ 0  4  4  0  0  1  0  9  5  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  3  6  6  5  3  5  3 13  9  2  8  6  4  0  0  1 13  0  4  0  0  3 12  1]
Q_values : [-1.898515  -4.2415323 -7.7894034 -4.846719 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 403 ACT 104 BUF 1)                 state : [ 0  6  0  0  3  2  0 11  0  0  6  1  0  0  0  0  0  0  0  0  0  0  0  0
  4 10  3  3  6  4  4 13  3  1  9  2  1  0  1  1  0 11  1  0  0  2  0 11]
Q_values : [  9.240209  -10.643643   -4.2309136   5.183241 ]
state : [ 0 



Step #179.00 (1ms ~= 10.00*RT, ~90000.00UPS, TraCI: 0ms, vehicles TOT 219 ACT 90 BUF 0)   state : [ 0  9 13  1  3  0  0 11 12  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  9 15  6  9  3  6 18 15  4 13  0  1  0  1  3  0  0  2  0  1  1  0  0]
Q_values : [ 6.643915  11.939324   2.6027465 -4.6433315]
state : [ 0  9  0  0  6  3  0 16  0  0 10  1  0  0  0  0  0  0  0  0  0  0  0  0
  6 10  4  6 10  7  7 18  6  4 14  3  3  0  1  3  0 11  1  0  0  4  0 11]
Q_values : [ 3.1711226  3.3008275 -3.7575805 -5.414584 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 441 ACT 122 BUF 0)                 state : [ 0  0  5  0  3  4  0  0  8  0  5 11  0  0  0  0  0  0  0  0  0  0  0  0
  1  3  9  2  8  7  4  1 10  8 10 15  1 10  0  4  0  0  3  5  0  0  0  0]
Q_values : [-4.341371  22.908937   5.5007095 16.459385 ]
state : [ 0  6  2  0  0  2  0  3  1  0  0  9  0  0  0  0  0  0  0  0  0  0  0  0
  4 10  7  2  3  2  2  9  4  7  2 10  2  0  2  1 12  0  5  0  0  1 10  1]
Q_values : [ 3.365728  -1.014



state : [0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 4 3 5 4 0 8 5 1 6 3 3 1
 0 0 0 0 0 2 0 0 0 0 0]
Q_values : [ -6.8740435  -11.677376     0.50437355  -2.1779792 ]
state : [0 3 3 0 0 0 0 3 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 4 9 5 5 2 2 6 5 2 1 4 5 3
 0 0 4 2 0 5 0 0 0 4 0]
Q_values : [-0.02694651 -2.245054   -1.6310419  -3.8246655 ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 455 ACT 145 BUF 1)                  #180.00 (0ms ?*RT. ?UPS, TraCI: 37ms, vehicles TOT 227 ACT 118 BUF 0)                state : [ 0  5  3  0  3  0  0 11  8  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  7  7  2  4 12  4 14  9  2  4 15  2  0 11  3  0  0  1  0 11  4  0  0]
Q_values : [  1.8860275  -1.6824908  -7.0568047 -10.677202 ]
state : [ 0  1  8  0  6  0  0  0 12  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  4 10  8  8  3  3  2 14  3  9  5  3  0 11  5  0  0  0  0 12  2  0  0]
Q_values : [ 6.6539536  -0.35317826  2.1121993   0.25809324]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles T



state : [ 0 10 11  0  2  0  0  7  7  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0
  5 15 15  2  4  1  3  9  9  7  7  2  4  0  2  1  0  0  3  0  2  1  0  0]
Q_values : [ 8.770723   2.8872805  1.7233523 -2.408736 ]
Stepstate : [ 0  0  0  0  7  8  0  1  0  0 10  4  0  0  0  0  0  0  0  0  0  0  0  0      
  1  2  9  2 10  9  2  2  0  6 12  7  1  0  0  4  0 11  2  0  0  3  0 10]
Q_values : [-5.5959306  -4.6933913   0.04115498 -4.6214843 ]
Step #369.00 (1ms ~= 10.00*RT, ~108000.00UPS, TraCI: 0ms, vehicles TOT 401 ACT 108 BUF 0) state : [ 0 14  0  0  7  0  0 10  2  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0
  4 18  1  4 11  1  1 11  2  5 13  3  3  0  3  2  0  0  5  0  4  6  0  0]
Q_values : [ 8.126588  14.5704155 11.598622   3.447047 ]
Step #540.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 628 ACT 121 BUF 2)                 state : [ 0  1  6  0  0  9  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0
  3  5  9  2 11 11  5  4  2  2  8 14  5  0  0  0 13  0  1  0  0  3 10  3]
Q_values : [-11



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.0634303  -0.842325   -0.05638576 -1.0616312 ]
state : [0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 1 5 5 2 3 5 3 3 2 3 0
 0 0 0 0 0 2 0 0 3 0 1]
Q_values : [-10.33023   -8.714648  -8.115335 -13.235527]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 217 ACT 111 BUF 0)                 state : [ 0  0  4  0  7 10  0  0  9  0  6 15  0  0  0  0  0  0  0  0  0  0  0  0
  3  0  5  2 12 13  5  3 12  5  8 16  3  2  0  5  0  2  1  4  1  4  0  2]
Q_values : [-4.0900674  6.0769577  1.488404  13.391007 ]
state : [ 0  0  5  0 10  0  0  2 11  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  2  2  6  4 16  1  6  3 16  1  9  6  1  0 11  2  0  0  2  0 11  3  0  0]
Q_values : [-1.1154392  3.888905   6.557869  -0.5087154]
state : [ 0  1  6  0  0  1  0  2 15  0  0  6  0  0  0  0  0  0  0  0  0  0  0  0
  4  2 10  6  6  4  5  5 17  4  1  8  2  0  2  5  6  3  1  0  1  0 13  0]



state : [ 0  1  0  0  8  5  0  2  0  0  1  5  0  0  0  0  0  0  0  0  0  0  0  0
  2  2  3  4 11  7  4  6  2  2  2 10  0  0  0  5  1 11  1  0  0  0  0 11]
Q_values : [-2.9399905 -1.2861108  3.9683704 -1.2069181]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 228 ACT 97 BUF 2)                  state : [ 0  0  7  0  5  2  0  0  6  0  5  2  0  0  0  0  0  0  0  0  0  0  0  0
  2  0  9  5  8  3  4  5 12  7  9  6  4  9  2  1  0  0  1  8  0  4  0  0]
Q_values : [-12.767209   -6.3984394  -8.273056  -12.437148 ]
state : [ 0  0  0  0  7  3  0  3  0  0  8  6  0  0  0  0  0  0  0  0  0  0  0  0
  4  2  0  2  9  5  3 10  2  8 12  8  3  0  0  1  0  9  3  0  0  1  0 11]
Q_values : [-1.6953083 -3.972021   8.918212  -1.7948031]
state : [ 0  2  0  0  0  5  0  8  2  0  0  8  0  0  0  0  0  0  0  0  0  0  0  0
  4  4  2  3  1  5  2 14  4  5  5 10  2  0  0  2 11  0  6  0  0  1  7  2]
Q_values : [ 0.08872765 -2.1093793   1.7515478  -3.3474493 ]
state : [ 0  3  2  0  0  5  0 11  4  0  0 10  0  0  0 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 3 3 0 3 3 1 3 2 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-7.872141  -5.4799895 -7.767131  -9.191481 ]
state : [0 5 0 0 1 6 0 4 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 5 8 1 4 3 8 7 6 0 6 1 5 2
 0 0 5 0 5 2 0 0 1 0 4]
Q_values : [ -6.912638   -5.7925267  -9.069876  -10.030122 ]
state : [ 0  8  0  0  5  0  0  7  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  4 11  0  3  9  2  3  9  4  2  3  2  2  0  7  3  0  0  4  0  9  1  0  0]
Q_values : [ -4.45248    -5.705617   -7.7282867 -10.246712 ]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 205 ACT 82 BUF 0)                  state : [ 0  6  4  0  0  3  0  3  9  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  6  9  4  5  3  3  3  7 10  4  0  4  2  0  2  1  9  0  1  0  0  1  6  0]
Q_values : [-0.37474555  0.15017205 -2.875056   -1.417056  ]
Step #369.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 439 ACT 101 BUF 0)                 state : [ 0  8  8  0 11  0  0 10  3  0 12  0  0  0  0  0



state : [0 2 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 3 6 3 6 4 3 3 2 5 2 2
 0 0 2 0 1 0 0 0 1 0 1]
Q_values : [ -7.233441   -3.7985063 -10.649689   -9.165502 ]
state : [ 0  0  6  1  4  1  0  0  5  0 10  4  0  0  0  0  0  0  0  0  0  0  0  0
  2  0  8  4  7  1  4  1  6  2 13  6  1  6  0  5  0  0  2  5  2  1  0  0]
Q_values : [-2.6319754  1.9031339  7.453132   0.5800563]
state : [ 0  0  9  0  0  2  0  0  8  0  2  7  0  0  0  0  0  0  0  0  0  0  0  0
  6  1 10  2  2  2  4  2 11  3  5  9  3  3  0  4  0  0  1  0  0  2  0  0]
Q_values : [-2.0617268  4.0267916  3.8075798  2.4609573]
state : [ 0  0  0  0  2  2  0  1  0  0  4  8  0  0  0  0  0  0  0  0  0  0  0  0
  4  2  2  4  5  4  5  4  3  5  5 11  1  0  0  2  0 10  1  0  0  3  0 11]
Q_values : [-8.644964  -2.232726  -0.9990262 -3.8462746]
Step #179.00 (1ms ~= 10.00*RT, ~80000.00UPS, TraCI: 0ms, vehicles TOT 208 ACT 80 BUF 2)   state : [ 0  8 16  0  2  0  0  5 17  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  7 13 19  2  4  4  3  8 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 1 0 1 6 2 2 4 2 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-8.9159155 -6.390168  -4.764826  -5.5243235]
state : [ 0  9  5  0  5  0  0  8 10  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  4 10  8  1  8  1  7 13 11  4  7  2  2  0 11  2  0  0  1  0  8  4  0  0]
Q_values : [-4.7246003   0.19205758 -5.5588803  -0.33027196]
state : [ 0  9  0  1  7  1  0 11  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  4 12  0  4 13  2  4 17  3  7 13  4  0  0  1  3  0  8  0  0  0  3  0 11]
Q_values : [ 3.5918174 -1.9580755  8.13962    3.9327939]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 220 ACT 86 BUF 0)                  state : [ 0  3  0  0  3  8  0  7  0  0  2  9  0  0  0  0  0  0  0  0  0  0  0  0
  4  7  1  5  4 10  7  9  0  3  7 12  1  0  0  1  0  3  1  0  0  4  0  7]
Q_values : [-2.3300757  0.6166078 -2.5354729  6.123975 ]
state : [ 0  0  2  0  5  1  0  0  2  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0
  4  1  5  2  6  2  6  2 



state : [ 0  0  6  0  8  3  0  0  3  0  2  2  0  0  0  0  0  0  0  0  0  0  0  0
  3  1  9  4  9  4  5  3  5  4  4 10  2  4  1  2  0  0  4  2  0  1  0  0]
Q_values : [-5.6289306 -5.2171116 -1.3046224 -2.158656 ]
state : [ 0  3  9  0  1  0  0  2  6  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  5  4 10  2  3  3  2  5  7  6  4  2  5  0 11  2  0  0  1  0  5  1  0  0]
Q_values : [-6.411518  -2.4469028 -2.4326828 -4.911589 ]
state : [ 0  4 10  0  0  2  0  2  7  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  3  7 12  2  1  5  4  7  9  2  1  5  0  0  1  0  3  0  4  0  1  3  3  0]
Q_values : [-3.0080428   3.2544577  -6.47809    -0.14544931]
state : [ 0  7  0  0  2  7  0  7  0  0  1  5  0  0  0  0  0  0  0  0  0  0  0  0
  4 12  3  4  4 12  5  7  1  2  3  7  2  0  0  1  0  2  1  0  0  2  0  5]
Q_values : [-4.243734  -4.409682  -4.1348076 -1.4973255]
Stepstate : [ 0 11  2  0  3  0  1  8  1  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0      
  8 17  5  6  6  1  6  9  5  3  4  1  2  0  7  2  0  0  1



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.000474  -1.296668  -0.599753  -1.1791062]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 3 1 1 2 1 1 0 3 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-6.5087953 -5.4112105 -1.1323681 -6.127625 ]
state : [0 3 4 0 1 0 0 3 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 4 7 5 4 2 3 4 9 2 1 2 3 2
 0 3 2 0 0 1 0 2 1 0 0]
Q_values : [-2.4282408  -5.058781    0.25886643 -7.632503  ]
state : [ 0  0 13  0  3 10  0  0 14  0  2 11  0  0  0  0  0  0  0  0  0  0  0  0
  4  4 17  1  4 11  6  3 18  3  4 13  5  7  2  0  0  0  3  6  3  2  0  0]
Q_values : [ 7.331266 23.077774  7.305075 16.937897]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 214 ACT 104 BUF 2)                 state : [ 0  8  0  0 10  0  0 10  0  0  8  3  0  0  0  0  0  0  0  0  0  0  0  0
  2 14  4  4 12  0  3 13  1  2 12  3  1  0  0  0  0  9  1  0  1  4  0 10]
Q_values : [20.416218  10.572061  19.957418   6.239



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.1653943   0.10542959 -1.0670785   0.706403  ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 3 0 0 3 1 1 4 0 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-9.04926   -3.2892978 -6.353458  -2.4442253]
state : [0 2 2 0 0 1 0 0 4 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 4 3 4 5 1 4 4 4 6 1 1 3 1
 0 0 3 3 0 2 0 0 0 1 0]
Q_values : [-4.024987    0.05446567 -4.541946   -3.5071878 ]
state : [0 2 0 0 1 3 0 3 0 0 1 3 0 0 0 0 0 0 0 0 0 0 0 0 6 5 1 3 4 4 3 7 4 4 3 3 4
 0 0 2 0 4 0 0 0 2 0 6]
Q_values : [-3.449291    0.34801272 -4.5961566  -4.9649577 ]
state : [0 3 0 0 3 4 0 5 0 0 2 3 0 0 0 0 0 0 0 0 0 0 0 0 5 9 1 2 6 6 5 7 3 5 4 3 0
 0 0 3 0 1 3 0 0 3 0 4]
Q_values : [-2.2235107   0.59839165 -3.7108035  -3.2136161 ]
state : [ 0  0  5  0  8  6  0  0  1  0  3  6  0  0  0  0  0  0  0  0  0  0  0  0
  1  0  8  5 12 10  5  3  3  2  5 11  3 11  0  3  0  0  3  6  2  1  0  0]
Q_values 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2 1 2 1 1 2 3 0 3 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-4.5790043 -5.15678   -6.272648  -5.870522 ]
state : [0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 5 3 3 4 2 1 3 4 4 1 3 1
 0 0 0 0 0 1 0 0 0 0 0]
Q_values : [-7.538592 -7.981255 -9.701074 -9.460358]
state : [0 0 3 0 2 2 0 0 4 0 1 3 0 0 0 0 0 0 0 0 0 0 0 0 2 5 5 6 4 2 4 5 9 2 4 7 0
 1 0 0 0 0 2 4 0 1 0 0]
Q_values : [-12.180765 -11.047241 -13.513145 -12.260096]
state : [ 0  2  0  0  4  2  0  3  0  0  1  5  0  0  0  0  0  0  0  0  0  0  0  0
  5  4  3  2  9  4  7  7  4  4  7 10  4  0  0  1  0  6  1  0  0  0  0  8]
Q_values : [-13.173008 -13.566328 -16.149126 -13.999055]
state : [ 0  0  3  0  9  2  0  0  3  0  4  9  0  0  0  0  0  0  0  0  0  0  0  0
  8  2  3  5 12  7  4  2  8  5 10 10  0  8  0  3  0  1  3  2  1  1  0  1]
Q_values : [-14.140893   -7.0905614  -8.272494   -6.5303   ]
Stepstate : [ 0  2 14  0  0  1  0  1 11  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 2 0 4 2 0 4 0 1 3 1 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-16.850714 -19.185215  -9.083588  -6.187172]
state : [0 5 2 0 0 1 0 2 3 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 4 8 4 4 3 5 4 4 6 4 5 3 4
 0 1 1 0 0 2 0 0 4 5 0]
Q_values : [-8.625147  -6.5561333 -5.262153  -7.0566854]
state : [ 0  7  4  0  0  3  0  3  6  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0
  4 11  6  5  4  6  3  5  8  6  3  5  2  0  0  2  4  0  2  0  0  3  2  0]
Q_values : [-5.6689487 -4.476535  -3.982843  -4.6272593]
state : [ 0  9  6  0  0  6  0  5  8  0  0  5  0  0  0  0  0  0  0  0  0  0  0  0
  7 13  8  1  4  7  2  7 12  5  2  6  4  0  0  2  1  0  3  0  0  2  3  0]
Q_values : [ 4.9115405  8.493052  -1.2026554  5.647849 ]
state : [ 0 12  0  0  2  7  0  6  0  0  2  6  0  0  0  0  0  0  0  0  0  0  0  0
  2 15  3  2  7  9  2  8  5  3  4  7  2  0  0  1  0  8  4  0  0  5  0 11]
Q_values : [-2.0780137 -4.6131673 -3.6716905 -2.2900662]
Stepstate : [ 0  0  5  0  6 12  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.4394631 -1.5909984 -2.199368  -1.2425163]
state : [ 0  0  0  0  9  9  0  2  0  0  6  6  0  0  0  0  0  0  0  0  0  0  0  0
  3  3  1  6 11 12  4  5  3  3 10  8  2  0  0  2  0  1  3  0  0  4  0  4]
Q_values : [-0.89040315  1.440186   10.334511    7.9249196 ]
state : [ 0  3  1  0  0 12  0  4  2  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0
  2  6  4  7  6 14  3  7  6  1  1  8  2  0  0  2  9  2  3  0  0  2  8  2]
Q_values : [-0.17314184 -4.694812   -5.559196    2.0665417 ]
state : [ 0  5  2  0  2  0  0  6  6  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  3  5  5  6  6  6  2 11  9  4  5  0  5  0  8  1  0  0  2  0 11  2  0  0]
Q_values : [ 1.8723954 -5.736194  -3.0948973 -7.975663 ]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 238 ACT 100 BUF 0)                 state : [ 0  3 10  0  0  3  0  4 13  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  6  4 16  5  3  5  4



state : [0 1 1 0 2 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 5 4 4 6 1 3 4 5 2 2 1 0
 0 1 1 0 0 0 0 0 1 0 0]
Q_values : [ -6.0851445  -7.436354   -5.912716  -11.515224 ]
state : [0 2 2 0 0 1 0 3 5 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 6 7 2 0 1 5 9 5 2 2 3 2
 0 0 1 2 0 1 0 0 1 6 0]
Q_values : [-4.4618053 -4.94365   -5.104876  -6.768688 ]
state : [ 0  0  7  0  1  4  0  0  7  0  3  5  0  0  0  0  0  0  0  0  0  0  0  0
  5  1 11  5  3  8  7  6 11  5  3  6  1  3  0  3  0  0  1  2  0  0  0  0]
Q_values : [-7.0771656 -4.1916304 -6.3116713 -6.488854 ]
state : [ 0  0  3  0  8 13  0  0  4  0  8 11  0  0  0  0  0  0  0  0  0  0  0  0
  6  2  4  4 13 14  3  1  9  3 17 15  3  3  0  4  0  0  1  5  0  3  0  0]
Q_values : [ 4.41728  10.426573 12.649797 12.683348]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 234 ACT 119 BUF 1)                 state : [ 0  3  8  0 19  0  0  0 12  0 23  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  8 11  4 21  1  3  5 14  5 24  1  3  0  8  1  0  0  4  0  4  3  0  0]
Q_v



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.9660468 -1.2804725 -2.3040533 -1.3042616]
state : [0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 4 1 4 7 2 2 3 4 2 4 1 1
 0 0 1 0 0 2 0 2 0 0 0]
Q_values : [-10.917607   -5.4768887  -5.431034   -7.617971 ]
state : [0 1 1 0 0 1 0 1 4 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 3 3 3 4 5 5 5 7 5 2 4 4
 0 1 0 3 0 2 0 1 1 5 0]
Q_values : [-9.452109  -4.1550565 -6.084886  -7.9390645]
state : [0 3 0 0 1 5 0 4 0 0 2 4 0 0 0 0 0 0 0 0 0 0 0 0 5 6 0 3 3 8 5 6 1 4 8 4 1
 0 0 2 0 3 2 0 0 2 0 7]
Q_values : [-4.0695896 -3.8010318 -3.1939678 -1.2498298]
state : [0 5 0 0 3 0 0 4 1 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 2 8 3 3 3 2 2 6 4 6 9 1 1
 0 4 4 0 0 4 0 8 4 0 1]
Q_values : [-2.654889  -2.9107807 -3.1169984 -6.7184973]
state : [ 0  0  2  1  4  1  0  0  4  0  9  1  0  0  0  0  0  0  0  0  0  0  0  0
  4  3  5  3  7  2  1  1  6  5 12  1  1  7  0  1  0  0  3  5  2  2  0  0]
Q_values : [-5.779421



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.3989613 -1.9247756 -3.528998  -1.977798 ]
state : [0 1 2 0 0 0 0 1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 4 4 3 0 5 4 1 4 4 1 0
 0 2 0 0 0 0 0 2 2 0 0]
Q_values : [-7.0710306 -6.942453  -9.243421  -9.156984 ]
state : [0 3 0 0 2 0 0 3 0 0 3 1 0 0 0 0 0 0 0 0 0 0 0 0 4 7 1 4 3 3 4 9 1 1 4 5 2
 0 1 2 0 4 1 0 0 1 0 1]
Q_values : [-4.6738315 -8.849457  -8.119115  -6.657302 ]
state : [ 0  0  2  0  6  6  0  0  3  0  8  9  0  0  0  0  0  0  0  0  0  0  0  0
  6  4  3  4  9 10  2  2  9  4 11 13  5  1  0  3  0  0  2  1  0  3  0  0]
Q_values : [-4.616205  -3.308551   3.2460465  2.7591982]
Stepstate : [ 0  6  0  0  1 12  0  4  0  0  3 18  0  0  0  0  0  0  0  0  0  0  0  0 2)   
  3  9  2  2  8 13  2  7  3  3  5 20  3  0  0  3  0  1  1  0  0  2  0  3]
Q_values : [16.826918   6.4640393  7.597331  26.501974 ]
state : [ 0  6  0  0  4  1  0  9  0  0  2  1  0  0  0  0  0  0  0  0  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.4622076 -3.1462765 -3.0656576 -2.5462768]
state : [0 2 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 4 2 2 2 1 4 2 5 3 4 5 2
 0 0 1 1 0 1 0 0 0 0 0]
Q_values : [ -7.0568438  -9.530031   -8.161274  -10.850296 ]
state : [0 0 1 0 0 1 0 0 5 0 2 4 0 0 0 0 0 0 0 0 0 0 0 0 7 5 3 4 2 3 4 4 9 3 4 7 1
 0 0 2 0 0 2 2 1 3 0 0]
Q_values : [-8.8089695 -4.4425077 -7.4991407 -8.296606 ]
state : [ 0  8  0  0  3  3  0  5  0  0  5  7  0  0  0  0  0  0  0  0  0  0  0  0
  3 10  2  2  7  4  4  5  2  2 10  9  4  0  0  2  0  1  2  0  0  4  0  1]
Q_values : [4.991655  1.0128851 3.5308106 5.020374 ]
state : [ 1  9  2  0  5  0  0  5  2  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0
  4 12  3  4  6  1  2 10  3  4 10  2  1  0  9  2  0  0  0  0  4  0  0  0]
Q_values : [ 9.942976   3.5341387  8.543439  -1.9470975]
state : [ 0  0  2  0  6  1  0  0  3  0  9  2  0  0  0  0  0  0  0  0  0  0  0  0
  1



state : [0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 4 8 2 3 2 2 2 3 1 4 1 1 1
 0 0 1 0 2 2 0 0 1 0 1]
Q_values : [-5.0606804 -6.518902  -7.9199033 -5.9303427]
state : [ 0  0  1  0  0  2  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  3  2  3  3  4 10  1  3  4  4  4  1  0  2  0  1  0  1  4  6  1  2  0  0]
Q_values : [-11.918927   -6.1709857  -8.5363245  -8.905256 ]
state : [ 0  4  1  0  0 10  0  2  1  0  0  5  0  0  0  0  0  0  0  0  0  0  0  0
  3  4  4  1  3 14  4  6  5  0  0  7  3  0  0  2  5  0  2  0  0  0  5  1]
Q_values : [ 0.7252303  0.2084961 -2.0547335  7.2809305]
state : [ 0  4  4  0  3  0  0  5  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  4  7  1  4  2  3  6  9  3  2  0  0  0  7  3  0  0  0  0 11  2  0  0]
Q_values : [4.077116  4.945499  8.47547   2.8621066]
state : [ 0  8  0  0  1  4  0  7  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  6 12  2  4  8 10  3  9  4  6  3  4  2  0  0  2  0  7  2  0  0  3  0 10]
Q_values : [-7.622715  -8.488306  -7.6869426 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.728159  -3.6855192 -2.7435076 -1.9035199]
state : [0 0 1 0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 2 2 3 3 3 0 1 3 3 1 1
 1 0 0 0 0 0 1 0 2 0 0]
Q_values : [-9.693099 -8.917768 -5.334188 -5.796317]
state : [0 0 2 0 0 3 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 2 5 4 0 4 4 1 1 2 3 4 1
 0 0 3 0 0 1 0 0 2 2 0]
Q_values : [-8.323403  -6.3713217 -5.837735  -4.555726 ]
state : [0 2 3 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 7 4 1 4 6 2 4 4 2 3 2
 0 4 1 0 0 0 0 4 1 0 0]
Q_values : [-10.932603   -7.929406   -6.5583     -6.9737325]
state : [ 0  6  0  0  1  6  0  3  0  0  2  4  0  0  0  0  0  0  0  0  0  0  0  0
  5 10  3  1  2 10  5  5  0  4  5  5  2  0  0  1  0  8  1  0  0  3  0  6]
Q_values : [ 0.70382446  0.4252137  -1.8753245   4.7675967 ]
state : [ 0  9  2  0  2  0  0  5  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0
  8 13  5  3  3  2  4  5  1  4  6  1  1  0  5  



state : [ 0  2  0  0  5  6  0  1  0  0  1  3  0  0  0  0  0  0  0  0  0  0  0  0
  6  7  2  2  7 11  4  5  2  5  6  5  3  0  0  5  0  1  3  0  0  1  0  5]
Q_values : [ -8.127245  -10.393134   -4.8292794  -9.391457 ]
state : [ 0  5  7  0  3  0  0  0  6  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  5 10  9  4  5  9  2  2 11  0  6  2  1  0 10  1  0  0  1  0 11  4  0  0]
Q_values : [-8.894264  -4.1846156 -6.0958657 -5.883052 ]
Stepstate : [ 0  7  0  0  5  9  0  2  0  0  4  1  0  0  0  0  0  0  0  0  0  0  0  0      
  3 12  5  4 10  9  2  3  4  2  7  3  3  0  0  1  0  9  0  0  2  2  0 10]
Q_values : [-4.3210454  -5.274406    0.24573064 -1.8493738 ]
state : [ 0 10  5  0  0  9  0  2  4  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0
  7 12  9  4  5 12  3  4  9  4  1  4  2  0  0  1  7  0  1  0  0  2  8  1]
Q_values : [ 2.3380566  7.5834575 -3.8624334  7.700142 ]
state : [ 0 11  9  0  3  0  0  3  7  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  5 12 12  4  7  4  2  7  9  1  4  3  3  0  4  2  0  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.6040542 -1.98522   -3.3652117 -1.0438087]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2 4 2 2 3 1 1 1 2 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-7.0614343 -5.8443575 -9.982289  -7.1269145]
state : [ 0  3  2  0  0  6  0  4  1  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  3  7  4  3  1 10  5  5  3  3  1  8  2  0  0  0  1  0  1  0  0  1  2  0]
Q_values : [-6.0001073  -0.76000714 -9.986284   -3.8124232 ]
state : [ 0  6  0  0  1 10  0  4  0  0  1  8  0  0  0  0  0  0  0  0  0  0  0  0
  4 10  1  5  1 10  9  6  2  4  4 10  1  0  0  3  0  4  3  0  0  2  0  3]
Q_values : [ 1.4781145  5.31583   -6.3654337  8.491429 ]
state : [ 0 11  7  0  2  0  0 12  5  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1 17  9  6  8  5  3 14  8  2  3  2  0  0  2  3  0  0  5  0  3  1  0  0]
Q_values : [ 7.7815595 -0.4429865 -4.3595147 -7.7351904]
state : [ 0  0  9  0  6  4  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.3102143 -3.0184622 -3.3907495 -1.3068278]
state : [0 3 1 0 0 2 0 4 4 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 4 5 2 4 4 5 3 9 7 6 5 4 2
 0 0 1 6 0 7 0 0 1 5 1]
Q_values : [ -7.435271  -6.8083    -9.036909 -10.462867]
state : [0 5 0 0 2 5 0 8 0 0 4 3 0 0 0 0 0 0 0 0 0 0 0 0 6 9 0 3 4 8 3 9 2 6 6 6 2
 0 0 1 0 2 4 0 0 3 0 7]
Q_values : [ 4.6092086  -1.405645    0.538388    0.64395595]
state : [ 1  2  0  0  4 10  0  1  0  0  7  6  0  0  0  0  0  0  0  0  0  0  0  0
  3  5  2  1  7 13  3  3  1  3 10  7  2  0  0  0  0  0  1  0  0  2  0  3]
Q_values : [0.7318502 1.707279  7.1033487 6.506435 ]
state : [ 0  4  1  0  0 13  0  1  1  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0
  4  6  4  2  3 14  2  1  4  5  4 12  1  0  0  3  6  4  3  0  0  3  5  2]
Q_values : [ 2.0225353  3.1451058 -5.695093  11.126348 ]
state : [ 0  5  4  0  1  0  0  1  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  3



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.9409571 -3.505329  -3.7348094 -1.0487053]
state : [ 0 10  1  0  4  0  0  8  1  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0
  4 11  2  7  7  2  2 13  5  6  8  4  1  0 10  2  0  1  0  0  4  1  0  1]
Q_values : [ 6.5060506   0.49333704  0.8340609  -5.1081724 ]
state : [ 0  0  2  0  6  2  0  0  5  0  6  3  0  0  0  0  0  0  0  0  0  0  0  0
  2  2  5  3  8  5  6  5  6  8  9  8  5 10  3  2  0  0  1  9  1  2  0  0]
Q_values : [-8.052875  -4.4397116 -7.138949  -6.493936 ]
state : [ 0  1  0  0  8  5  0  3  0  0  8  7  0  0  0  0  0  0  0  0  0  0  0  0
  4  2  1  5 13  8  5  6  7  4 10 11  2  0  0  2  0  5  6  0  0  1  0  6]
Q_values : [-4.6792555 -3.7027738  1.3849318  1.924026 ]
Step #179.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 232 ACT 90 BUF 1)                  state : [ 0  0  2  0 20  4  0  0  2  0 14  5  0  0  0  0  0  0  0  0  0  0  0  0
  7  1  7  3 22  6  2  2 



state : [0 0 1 0 1 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 3 2 2 0 7 3 0 5 5 1 2
 0 0 1 0 0 1 0 1 3 0 0]
Q_values : [-5.710999 -9.439988 -9.380805 -8.342936]
state : [ 0  0  3  0  2  0  0  0  0  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0
  2  3  6  4  4  2  4  2  0  4 10  2  1  3  0  4  0  0  3  5  0  2  0  0]
Q_values : [ -9.299335 -11.076569 -10.021594 -10.592787]
state : [ 0  0  6  0  3  1  0  0  0  0  9  2  0  0  0  0  0  0  0  0  0  0  0  0
  4  3  7  7  4  5  5  1  1  7 11  4  2  1  0  3  0  0  2  3  0  1  0  0]
Q_values : [-6.2994137 -6.1282845 -4.8573203 -4.057789 ]
state : [ 0  1  7  0  3  0  0  0  1  0 11  0  0  0  0  0  0  0  0  0  0  0  0  0
  5  7  7  3  7  2  3  3  5  4 13  0  4  0  4  4  0  0  4  0  5  2  0  0]
Q_values : [-2.0023248 -3.6133215 -2.0346131 -7.712244 ]
state : [ 0  0  7  0  6  2  0  0  4  0 13  0  0  0  0  0  0  0  0  0  0  0  0  0
  3  5 10  3 11  3  5  3  7  2 15  1  2  2  0  1  0  0  3  4  1  3  0  0]
Q_values : [-2.632928   2.6757033  6.3111677 -0.8



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.759206  -3.2940602 -3.6519299 -0.7246462]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 2 1 3 2 1 1 7 2 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-9.259559 -8.744858 -8.99067  -5.716463]
state : [0 2 2 0 3 0 0 1 2 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 7 8 4 2 8 2 7 2 5 4 6 2 0
 0 4 2 0 0 3 0 0 1 0 0]
Q_values : [-11.064832  -7.700284  -8.965548  -6.697759]
state : [ 0  7  4  1  9  0  0  1  5  0  5  0  0  0  0  0  0  0  0  0  0  0  0  0
  8 12  4  4 12  1  3  2  6  3  9  1  2  0  2  5  0  0  2  0  2  3  0  0]
Q_values : [ 4.2552867  -0.41092372  4.312619    1.9676994 ]
state : [ 0 11  4  0  0  1  0  1  6  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  5 15  4  7  4  6  4  5  9  2  3  3  0  0  0  3  7  2  1  0  0  6  5  4]
Q_values : [-0.0726034   4.982732   -4.5665865   0.71450067]
state : [ 0 18  1  0  7  0  0  6  2  0  3  0  0  0  0  0  0  0  0  0  0  0  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.3440878 -2.3680618 -3.4820266 -1.3371923]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2 0 3 1 2 6 1 2 3 1 3 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ -9.835191  -8.809924 -11.933027 -10.510546]
state : [0 1 0 0 0 1 0 1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 3 3 0 3 3 6 5 6 1 2 1 5 0
 0 0 2 0 1 1 0 0 1 0 2]
Q_values : [-10.937066   -9.6383705 -13.141158  -11.201902 ]
state : [ 0  2  0  0  1  6  0  5  0  0  1  4  0  0  0  0  0  0  0  0  0  0  0  0
  5  7  1  4  5  7  3 10  2  1  3  6  1  0  0  3  0  0  2  0  0  1  0  1]
Q_values : [-7.1206503 -9.102996  -8.578883  -6.142891 ]
state : [ 0  0 10  0  3  3  0  0  6  0  1  1  0  0  0  0  0  0  0  0  0  0  0  0
  4  7 13  3  5  6  3  2 13  5  1  1  1 12  0  2  0  0  1  6  2  5  0  0]
Q_values : [ -7.7975163   2.8511548 -10.09215    -8.021609 ]
state : [ 0  4  0  1  5  6  0  2  0  0  1  1  0  0  0  0  0  0  0  0  0  0  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.0865426  -1.7810228  -3.1592765  -0.19231111]
state : [0 1 0 0 1 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 4 3 3 0 4 5 3 3 3 4 1
 0 0 0 0 0 2 0 0 0 0 0]
Q_values : [-10.5624895  -9.37923    -9.510098   -8.29448  ]
state : [0 3 4 0 2 0 0 4 3 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 6 1 5 0 1 7 6 2 4 1 2
 0 4 3 0 0 2 0 0 1 0 0]
Q_values : [-3.6847265 -4.278676  -5.6812525 -5.5141077]
state : [0 0 6 0 4 1 0 0 6 0 3 1 0 0 0 0 0 0 0 0 0 0 0 0 4 4 7 3 4 2 2 2 8 6 6 3 0
 5 0 1 0 0 1 3 1 2 0 0]
Q_values : [-5.3597736 -1.1837336 -2.93091   -3.1479023]
state : [ 0  1  0  0  4  2  0  1  0  0  4  3  0  0  0  0  0  0  0  0  0  0  0  0
  4  7  2  4  5  5  3  4  2  4 12  9  2  0  0  1  0  7  2  0  0  2  0  8]
Q_values : [-13.652377   -7.0635304  -8.436408   -6.340157 ]
state : [ 0  4  1  0  4  0  0  2  1  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  9  7  4  9  1  5  5  4  5 14  3  1



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 2 1 1 3 2 2 2 3 7 2 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.6852665 -10.6983385  -8.323866  -12.104594 ]
state : [0 1 1 0 0 2 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 4 1 3 0 5 4 4 5 4 2 2 0
 0 0 1 1 0 3 0 0 3 1 0]
Q_values : [-7.426896  -7.3270535 -7.5348186 -8.071189 ]
state : [0 4 0 0 0 5 0 2 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 5 7 1 2 2 6 3 3 5 2 5 3 3
 0 0 2 0 1 4 0 0 2 0 5]
Q_values : [-2.6448455 -5.333292  -6.500905  -3.6024227]
state : [ 0  0  1  0  0  6  0  0  5  0  4  3  0  0  0  0  0  0  0  0  0  0  0  0
  3  6  1  2  5 11  1  0  6  5  6  7  0  3  0  2  0  0  0  6  0  2  0  1]
Q_values : [-8.584695  -3.8497114 -3.5361493 -1.8896972]
state : [0 5 1 0 2 0 0 0 5 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 3 7 3 3 7 3 3 2 8 1 8 2 1
 0 7 2 0 0 3 0 9 2 0 0]
Q_values : [-7.1511583 -5.603837  -3.971147  -9.320438 ]
state : [ 0  6  3  0  0  3  0  1  8  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0
  4  7  5  5  1  7  3  5 10  2  0  2  1  0  0  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.7314878 -3.4273605 -3.9300265 -1.5132954]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 3 2 1 2 2 3 2 2 2 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.545577 -13.344168 -11.136252 -10.432336]
state : [0 3 6 0 0 1 0 2 3 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 2 8 7 4 3 1 5 7 7 4 3 3 0
 0 0 3 6 1 2 0 0 1 6 0]
Q_values : [-4.507347 -4.201811 -8.509085 -6.994488]
state : [ 0  4  0  0  1  1  0  4  0  0  1  3  0  0  0  0  0  0  0  0  0  0  0  0
  1 10  1  2  3  4  5  7  3  2  4  5  4  0  0  4  0  8  2  0  0  0  0  7]
Q_values : [-2.8259506 -5.824645  -8.017531  -4.0982046]
state : [ 0  0  1  0  3  4  0  0  3  0  3  5  0  0  0  0  0  0  0  0  0  0  0  0
  5  4  2  6  5  5  4  5  3  9  9  6  1  7  0  3  0  0  1 10  0  0  0  1]
Q_values : [-6.4645395 -6.396571  -5.0735216 -4.646041 ]
state : [ 0  4  0  0  0  4  0  9  0  0  1  4  0  0  0  0  0  0  0  0  0  0  0  0
  5  8 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.9979498 -2.9649591 -3.8248076 -1.8434888]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 5 2 1 3 2 1 1 3 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.51602  -10.549988  -9.847519  -9.603695]
state : [0 0 1 0 0 0 0 1 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 2 2 4 2 4 4 2 1 5 1 3
 0 0 0 0 0 0 0 1 1 0 0]
Q_values : [-9.445286 -9.321519 -8.926876 -8.869816]
state : [0 0 2 0 3 0 0 3 2 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 4 0 4 1 2 3 4 1 7 0 3
 0 2 4 0 0 0 0 3 2 0 0]
Q_values : [-3.090237  -4.2091355 -4.61302   -4.6186438]
state : [0 0 3 0 3 1 0 0 4 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 9 5 8 3 6 2 2 2 5 3 8 3 0
 3 0 1 0 0 0 2 1 1 0 0]
Q_values : [-8.954202  -5.571733  -6.3451357 -7.3819766]
state : [0 2 0 0 4 2 0 1 0 0 7 3 0 0 0 0 0 0 0 0 0 0 0 0 4 8 3 6 9 4 2 2 0 4 9 3 0
 0 0 1 0 8 1 0 0 5 0 5]
Q_values : [-6.9802065 -7.650341  -3.8250942 -5.0917463]
state : [ 0  8  3  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.0245953 -3.3450649 -3.313613  -2.1430354]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 1 0 3 2 1 6 2 1 4 3 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.966353 -10.698026  -8.967236 -10.362233]
state : [0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 4 2 3 1 5 2 1 6 3 5 3 3 3
 0 0 3 3 0 1 0 0 4 1 0]
Q_values : [-12.773306 -11.81513   -9.637367 -11.629324]
state : [0 2 2 0 0 2 0 6 2 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 4 3 4 6 3 4 1 7 6 2 1 5 1
 0 0 0 4 0 2 0 0 1 4 0]
Q_values : [-3.162061  -2.6678562 -7.3567605 -2.525259 ]
state : [ 0  3  4  0  2  0  0  6  5  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  7  4  3  4  1  4 13  7  6  4  1  3  0  5  0  0  0  2  0  4  2  0  0]
Q_values : [ 0.71736836  0.31438994 -4.3934603  -5.9190426 ]
state : [ 0  0  4  0  3  1  0  0  7  0  2  1  0  0  0  0  0  0  0  0  0  0  0  0
  4  2  7  4  3  2  6  2 12  7  4  5  3  8  2  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.8499537 -3.1480408 -3.0294003 -2.056497 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 3 3 0 3 1 2 4 2 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.810712 -11.101393  -8.99849  -10.218295]
state : [0 0 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 1 3 2 0 4 3 7 3 7 0 0
 0 0 1 2 0 0 0 0 1 4 0]
Q_values : [-11.181285  -7.348871 -11.622135 -18.359741]
state : [ 0  1  2  0  0  1  0 10  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  3  2  3  3  2  3  4 13  2  4  1  1  3  0  0  1  8  0  4  0  1  2  6  0]
Q_values : [ 1.999399  -0.4174416 -6.631715  -2.777007 ]
state : [0 1 4 0 0 4 0 0 3 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 5 7 6 6 3 5 3 4 7 1 4 4 2
 0 0 3 1 0 5 0 0 3 2 0]
Q_values : [-11.89473    -8.852398  -10.954656   -7.8433876]
state : [ 0  4  5  0  2  0  0  3  6  0  2  0  0  0  0  0  0  0  0  0  0  0  0  0
  2  6  9  4  5  1  6 10  8  3  5  1  2  0  5  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.7522073 -2.991529  -3.4447691 -2.0810227]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 1 0 3 0 0 1 1 0 4 4 3 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-8.94246  -8.93789  -9.130964 -8.961047]
state : [0 0 0 0 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 3 3 1 3 6 3 3 2 1 6 5 7 0
 0 0 1 0 0 2 0 0 3 0 0]
Q_values : [-16.601027 -16.54595  -12.600184 -14.984456]
state : [ 0  1  1  1  0  2  0  2  1  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0
  5  2  4  6  1  7  7  8  1  7  7 12  1  0  0  1  3  1  2  0  0  2  3  0]
Q_values : [ -9.818192  -8.158292 -11.447984  -7.360271]
state : [ 0  0 11  0  0  6  0  2  9  0  0  7  0  0  0  0  0  0  0  0  0  0  0  0
  4  4 14  3  0  8  6  4 12  2  3 11  3  0  0  3  1  0  6  0  0  3  3  1]
Q_values : [-2.2752984  9.955841  -5.100649   4.5798855]
Stepstate : [ 0  3  0  0  0  7  0  4  0  0  2 11  0  0  0  0  0  0  0  0  0  0  0  0    



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.3153124 -3.6475189 -4.1842575 -2.4639852]
state : [0 1 1 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 6 2 4 3 1 3 5 4 3 1 2 0
 0 0 1 0 0 1 0 0 1 2 0]
Q_values : [ -7.903249  -9.381032 -10.944023  -9.509494]
state : [0 0 1 0 2 1 0 0 4 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 6 2 2 8 4 3 5 3 6 2 2 4 1
 2 1 2 0 0 1 6 0 3 0 0]
Q_values : [ -9.413741   -7.8484273 -10.033447   -7.7794795]
state : [0 0 2 0 2 0 0 2 5 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 6 0 5 3 4 6 6 3 6 3 7
 0 4 2 0 0 1 0 4 4 0 0]
Q_values : [-9.338768 -6.845971 -9.23884  -9.926526]
state : [0 1 0 0 3 3 0 5 0 0 4 2 0 0 0 0 0 0 0 0 0 0 0 0 3 2 3 4 6 5 7 8 2 4 6 6 1
 0 0 2 0 6 1 0 0 2 0 6]
Q_values : [-6.627329  -8.79821   -7.5306296 -5.9586473]
state : [ 0  1  1  0  3  0  0  7  2  0  6  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  6  5  5  9  2  7 10  4  4  8  0  1  0  6  3  0  0  2  0  6  3  0  1]
Q_values : [ -4.5328946  



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.510713  -2.8520253 -3.6717677 -2.2303126]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 4 0 3 2 1 5 2 3 4 4 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ -9.72264  -10.506272 -10.868199 -12.086249]
state : [0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 3 2 2 5 5 4 5 3 7 3 7 5 1
 1 0 2 0 0 0 3 0 0 0 0]
Q_values : [-15.212979 -13.258566 -12.345213 -14.961877]
state : [0 2 2 0 0 4 0 3 6 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 4 4 5 6 4 5 6 6 7 5 4 7 2
 0 0 3 3 2 2 0 0 1 5 0]
Q_values : [-4.499081 -2.343996 -9.024846 -2.911963]
state : [0 2 0 0 3 5 0 5 0 0 2 6 0 0 0 0 0 0 0 0 0 0 0 0 4 3 1 1 6 8 2 7 2 2 8 7 5
 0 0 5 0 6 3 0 0 5 0 7]
Q_values : [-4.370118  -8.498975  -5.96031   -1.9018762]
state : [ 0  2  1  0  5  0  0  6  1  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0
  8  7  4  3  7  1  5  8  3  3 11  0  2  0  7  1  0  1  1  0  8  2  0  1]
Q_values : [ -3.6452825  -6.7



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.1697993 -2.297826  -2.3898153 -1.5222389]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 1 2 0 3 3 3 2 2 0 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-8.490961 -7.466122 -8.693947 -6.791644]
state : [0 1 1 0 0 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 1 2 0 7 8 3 4 3 1 1
 0 1 0 0 0 1 0 3 0 0 0]
Q_values : [-7.1950917 -7.031118  -7.952675  -7.8893642]
state : [0 2 0 0 1 0 0 5 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 3 5 0 2 3 1 7 9 3 7 4 2 0
 0 0 4 0 2 1 0 0 1 0 3]
Q_values : [-3.3432772 -7.1326737 -6.700262  -6.566234 ]
state : [0 0 1 0 2 1 0 0 2 0 4 2 0 0 0 0 0 0 0 0 0 0 0 0 3 6 1 6 3 2 2 1 4 5 9 3 1
 7 0 5 0 0 4 4 0 2 0 1]
Q_values : [-8.072057  -7.9290924 -5.0617576 -6.8687096]
state : [0 3 1 0 0 1 0 0 4 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 6 7 3 5 5 4 3 5 7 5 1 4 4
 0 0 2 6 1 5 0 0 3 2 1]
Q_values : [ -9.704382  -9.795588 -10.512609  -8.794707]
state : [ 0  7  0  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-3.1155987 -3.6042824 -3.505831  -1.979195 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 2 3 1 4 1 0 3 3 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.219868 -12.499163 -10.198603  -9.874588]
state : [0 2 1 0 1 0 0 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 3 5 3 1 2 2 2 3 4 1 1
 0 0 2 0 0 1 0 1 0 0 0]
Q_values : [ -9.198452 -10.011219  -8.493137  -8.935106]
state : [0 3 3 0 0 1 0 2 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 4 5 4 2 4 6 5 5 3 3 2 3 2
 0 0 0 3 0 2 0 1 1 3 0]
Q_values : [-8.422281  -8.683621  -9.916552  -7.1244745]
state : [0 4 4 0 2 0 0 5 2 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 5 7 6 6 2 2 9 4 5 3 1 2
 0 3 3 0 0 2 0 6 1 0 0]
Q_values : [-3.1497648 -5.209242  -6.2583804 -7.1779284]
state : [0 0 7 0 6 1 0 0 4 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 4 4 7 6 8 5 3 4 4 3 3 3 3
 7 1 0 0 0 2 3 2 2 0 0]
Q_values : [-6.1335335 -2.5499837 -4.4494133 -4.7796597]
state : [0 3 0 0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.7745736 -3.7453933 -3.094846  -2.5863867]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 0 4 0 1 4 2 1 5 3 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [ -9.054026 -11.620963  -9.072876  -9.454142]
state : [0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 7 3 3 3 2 1 3 0 1 6 3 3 2
 2 0 2 0 0 1 0 0 1 0 0]
Q_values : [-7.4237313 -8.961319  -8.221844  -7.300393 ]
state : [0 1 1 0 1 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 6 5 3 4 4 1 2 3 2 2 9 2 3
 0 3 1 0 0 4 0 1 5 0 0]
Q_values : [-11.779754 -15.398945 -10.217685 -14.02313 ]
state : [0 5 3 0 0 1 0 1 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 5 7 4 7 3 4 5 3 5 4 0 6 1
 0 0 1 5 1 2 0 0 3 3 0]
Q_values : [ -6.3369007  -9.564833  -10.474552   -6.7394176]
state : [0 0 4 0 1 3 0 0 4 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 5 2 6 5 9 5 4 5 9 6 2 7 4
 1 1 2 0 0 2 6 1 3 0 0]
Q_values : [-10.569316  -9.20468   -9.479128  -5.615142]
state : [ 0 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.414886  -2.8222814 -2.8048458 -1.8246534]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 3 0 1 3 2 0 6 2 2 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-8.801778 -8.441262 -9.275198 -7.556676]
state : [0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 2 1 0 4 3 3 5 6 3 1
 0 1 0 0 0 2 0 1 0 0 0]
Q_values : [-11.12399   -8.710468  -9.5912   -10.02558 ]
state : [ 0  1  0  0  1  0  0  2  0  0  5  3  0  0  0  0  0  0  0  0  0  0  0  0
  2  2  2  4  5  0  1  6  1  4 10  4  1  0  1  4  0  3  5  0  0  1  0  3]
Q_values : [-7.1076384 -9.858889  -3.6853347 -6.895349 ]
state : [0 2 2 0 0 0 0 4 1 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 2 6 4 2 4 5 6 8 3 2 2 8 2
 0 0 0 9 1 2 0 0 0 3 0]
Q_values : [-10.464136 -10.444897 -13.09949   -7.424513]
state : [ 0  3  4  0  1  0  0  7  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  1 10  5  2  6  3  8 11  6  3  5  3  1  0  8  3  0  0 



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.676548  -2.4878721 -3.0643673 -2.3081954]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 5 3 0 3 1 0 3 3 1 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-11.114915  -9.671401 -10.530094 -10.602102]
state : [0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 5 3 0 5 6 1 3 3 0 4 4 2 1
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.519873 -11.109139 -11.168244 -12.466236]
state : [0 2 0 0 5 1 0 2 0 0 4 2 0 0 0 0 0 0 0 0 0 0 0 0 4 6 2 7 9 4 4 5 2 3 6 4 3
 0 0 2 0 0 2 0 0 3 0 0]
Q_values : [-8.424225 -9.076491 -5.134228 -9.826292]
state : [ 0  4  2  0  0  4  0  4  2  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0
  3  7  4  2  2  9  6 10  4  5  2  5  4  0  0  1  5  0  1  0  0  2  7  0]
Q_values : [ -4.8311853  -2.684461  -10.928347   -3.3963456]
state : [ 0  7  0  0  1  9  0  8  0  0  1  5  0  0  0  0  0  0  0  0  0  0  0  0
  5  8  1  5  5 11  6 12  4  6  3  6  1  0  0  3  0



state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-2.2470841 -2.8597608 -2.795867  -2.086863 ]
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 3 1 1 1 4 1 6 3 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-10.400682 -10.86664   -9.929914 -10.486454]
state : [0 0 3 0 0 3 0 0 2 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 7 2 9 4 1 7 3 1 3 5 1 5 0
 4 0 2 0 0 5 1 0 3 0 0]
Q_values : [-7.5799594 -2.9026923 -6.959626  -4.6924386]
state : [0 0 0 0 0 6 0 1 0 0 1 5 0 0 0 0 0 0 0 0 0 0 0 0 5 5 1 6 2 9 1 3 2 3 3 8 2
 0 0 2 0 7 3 0 0 4 0 3]
Q_values : [-7.682519  -7.300917  -7.7950754 -1.92875  ]
state : [0 3 1 0 1 0 0 3 2 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 4 6 3 2 9 0 1 4 3 1 5 0 4
 0 8 0 0 1 2 0 9 4 0 1]
Q_values : [-3.8443756 -4.749043  -3.9259634 -8.246258 ]
state : [0 0 3 0 7 0 0 0 3 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 4 1 5 5 9 2 2 3 4 3 8 2 2
 3 0 1 0 0 2 4 1 3 0 0]
Q_values : [-7.813082  -6.714816  -2.4277265 -6.7144923]
state : [0 1 5 0

In [92]:
rewards = []
best_score = 0
reward = 0
total_reward = 0
list_values = []
weights = []
wait_times = []
# replay_buffer = deque(maxlen=2000)

#SEED
seed_value = np.random.randint(1000)
#seed_value = 41
sumoCmd = [sumo_gui_bin, "-c", simulConfig, "--start", "--seed", str(seed_value)]

if traci.isLoaded():
    traci.close()
traci.start(sumoCmd)
lane_ids =  traci.lane.getIDList()
# print(lane_ids[0])
# for lane in lane_ids:
#     print(traci.lane.getLastStepVehicleNumber(lane))
# north_lane = traci.lane.getLastStepVehicleNumber("N_0")
# south_lane = traci.lane.getLastStepVehicleNumber("S_0")
# east_lane = traci.lane.getLastStepVehicleNumber("E_0")
# west_lane = traci.lane.getLastStepVehicleNumber("W_0")
trafic_light_ids = traci.trafficlight.getIDList()

state = np.array(get_state(lane_ids))
action=-1
# print(state)
wait_times.append(0)
for step in range(100000): ## TO CHANGED
    #epsilon = max(1 - episode / 10, 0.01)
    nom_du_feu= traci.trafficlight.getIDList()[0]

    if step%2000 == 0:
        state=np.array(get_state(lane_ids))
        action = epsilon_greedy_policy(state,0)*2
        # action = np.random.randint(8)
        print("action", action)
        #print(traci.trafficlight.getAllProgramLogics(nom_du_feu))

        #print(traci.trafficlight.getAllProgramLogics(nom_du_feu)[0].phases[action])
        traci.trafficlight.setPhase(trafic_light_ids[0],action)
    traci.simulationStep()

traci.close()


 Retrying in 1 seconds
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-1.9985943 -2.986529  -2.6044343 -1.9641954]
action 6
state : [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 5 3 6 3 3 5 6 3 5 2 0 0
 0 0 0 0 0 0 0 0 0 0 0]
Q_values : [-12.212148  -15.789782  -12.175854  -15.5898485]
action 4
state : [ 0  1  1  0  0  1  0  2  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  4  9  9  4  8  7  5 12  6  6  3  1  1  0  0  0  1  0  2  0  0  1  0  0]
Q_values : [-15.527282 -19.307728 -15.268804 -21.734404]
action 4
state : [ 0  8  7  1  0  6  0  8  6  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
  3 12 11  7  6  9  5 16  8  7  4  1  1  0  0  2  2  1  2  0  0  0  5  0]
Q_values : [ 5.564353   5.2033463 -9.127427  -4.615077 ]
action 0
state : [ 0  0 11  0  1  9  0  0  7  1  3  1  0  0  0  0  0  0  0  0  0  0  0  0
  5  6 15  6  6 10  5  9 10  9  4  5  4  5  4  3  0  0  5  5  3  2  0  0]
Q_values : [-5.9652185  3.7164

FatalTraCIError: Connection closed by SUMO.