In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import traci
import sys
import argparse
import tensorflow as tf
import numpy as np
from dotenv import load_dotenv

# Import internal modules
from rl_package.rl_logic.Environnement import EnvironnementSumo
from rl_package.rl_logic.Agent import AgentSumo
from rl_package.params import *
print(BATCH_SIZE,NAME_SIMULATION,WINDOW)

32 simu_simple 200


In [3]:


# Load environment variables
load_dotenv()

SUMO_GUI_BIN = '/Library/Frameworks/EclipseSUMO.framework/Versions/1.22.0/EclipseSUMO/bin/sumo-gui'
#SUMO_BIN = r"C:/Program Files/rl_project/Eclipse/Sumo/bin/sumo.exe"
# SIMUL_CONFIG = r"double_traffic/double_traffic.sumo.cfg"
SIMUL_CONFIG
WINDOW=200
BATCH_SIZE=6
# SUMO command

In [4]:
def preprocess():
    """
    Determines the number of inputs and outputs required for each agent.
    """
    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings']
    env = EnvironnementSumo(sumoCmd, WINDOW)
    inputs_per_agents = []
    outputs_per_agents = []
    positions_phases = []

    for trafficlight in env.trafficlights_ids:
        # Get the number of lanes controlled by this traffic light
        n_lanes = len(env.control_lanes(trafficlight))
        inputs_per_agents.append(n_lanes * 3)  # Inputs: queue + vehicle count

        # Get the number of valid traffic light phases (excluding yellow)
        n_phases,position = env.get_phase_without_yellow(trafficlight)
        #print(f'trafficlight :{trafficlight},\n lane associated {env.control_lanes(trafficlight)}')
        n_outputs = len(n_phases)
        positions_phases.append(position)
        outputs_per_agents.append(n_outputs)

    env.close()
    return inputs_per_agents, outputs_per_agents,positions_phases  # List of inputs, outputs per agent, and the postion phases of each trafficlight


In [5]:
def train_models(inputs_per_agents, outputs_per_agents, position_phases, type_model="DQN"):
    """
    Trains multiple reinforcement learning agents to optimize traffic lights.
    Saves each model separately.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    # Load pre-trained models if available
    for i, agent in enumerate(agents):
        agent.build_model()
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.model_action = tf.keras.models.load_model(model_path)
            if agent.model_target:  # For Double/Dueling DQN
                agent.model_target = tf.keras.models.load_model(model_path)

    sumoCmd = [SUMO_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings','--scale','0.4']



    for episode in range(EPISODE):
        print(f'🔄 Episode {episode}/{EPISODE}')
        env = EnvironnementSumo(sumoCmd, WINDOW)

        #Store the position phases of the trafficlight in the environment
        env.position_phases = position_phases

        epsilon = max(1 - episode / EPISODE, 0.01)  # Decaying epsilon for exploration

        traffic_lights = env.trafficlights_ids
        states = [env.get_states_per_traffic_light(traffic_light) for traffic_light in traffic_lights]

        for _ in range(50):  # Steps per episode
            actions = [agent.epsilon_greedy_policy(np.array(states[i]), epsilon) for i, agent in enumerate(agents)]
            next_states, rewards = env.step(actions)

            for i in range(len(agents)):
                agents[i].add_to_memory(np.array(states[i]), np.array(actions[i]), np.array(rewards[i]), np.array(next_states[i]))

            states = next_states

            if len(agents[0].replay_buffer) >= BATCH_SIZE *1:
                for agent in agents:
                    agent.training_step(BATCH_SIZE)

            if env.get_total_number_vehicles() == 0:
                break  # Stop simulation if no vehicles left

        # Update target networks every 5 episodes for Double/Dueling DQN
        if episode % 5 == 0 and type_model != 'DQN':
            for agent in agents:
                agent.model_target.set_weights(agent.model_action.get_weights())

        env.close()

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        agent.model_action.save(model_path)
        print(f"✅ Model saved for Agent {i} at: {model_path}")


In [5]:

def load_trained_agents(inputs_per_agents, outputs_per_agents, type_model="DQN"):
    """
    Loads pre-trained agents from saved model files.
    If any model is missing, exits the program.
    """
    agents = [AgentSumo(type_model, inputs, outputs) for inputs, outputs in zip(inputs_per_agents, outputs_per_agents)]

    for i, agent in enumerate(agents):
        model_path = f"models/{NAME_SIMULATION}_{type_model}_Agent{i}.keras"
        if os.path.exists(model_path):
            print(f"🔄 Loading pre-trained model for Agent {i} from {model_path}...")
            agent.build_model()
            agent.model_action = tf.keras.models.load_model(model_path)
        else:
            print(f"❌ No pre-trained model found for Agent {i}.")
            sys.exit(1)

    return agents

In [6]:
def scenario(agents,positions_phases):
    """
    Runs a SUMO simulation using the trained agents.
    """
    sumoCmd = [SUMO_GUI_BIN, "-c", SIMUL_CONFIG, '--start', '--no-warnings','--scale','1']
    print(sumoCmd)
    WINDOW=200
    env = EnvironnementSumo(sumoCmd, WINDOW)
    #Store the position phases of the trafficlight in the environment
    env.position_phases = positions_phases
    env.full_simul(agents)

In [17]:

type_model = "2DQN"
SIMUL_CONFIG = "/Users/arseneclaustre/code/psels/RL_traffic/Traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
WINDOW=2000
EPISODE = 50
print(NAME_SIMULATION)
inputs_per_agents, outputs_per_agents,positions_phases = preprocess()
print(f'inputs : {inputs_per_agents}')
print(f'outputs : {outputs_per_agents}')
print(f'positions : {positions_phases}')
train_models(inputs_per_agents, outputs_per_agents, positions_phases, type_model)
agents = load_trained_agents(inputs_per_agents, outputs_per_agents, type_model)
scenario(agents,positions_phases)


simu_simple
 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 0 ACT 0 BUF 0)                      
inputs : [36]
outputs : [4]
positions : [[0, 2, 4, 6]]
🚀 Création d'un nouveau modèle 2DQN...
🔄 Loading pre-trained model for Agent 0 from models/simu_simple_2DQN_Agent0.keras...
🔄 Episode 0/50
 Retrying in 1 seconds




Step #1000.00 (1ms ~= 10.00*RT, ~40000.00UPS, TraCI: 53ms, vehicles TOT 324 ACT 40 BUF 0)   ~= 10.00*RT, ~28000.00UPS, TraCI: 52ms, vehicles TOT 235 ACT 28 BUF 5)  
🔄 Episode 1/50
 Retrying in 1 seconds




state [0.   0.   0.01 0.   0.26 0.   0.   0.   0.   0.   0.35 0.   0.   0.
 0.5  0.   1.   0.   0.   0.   0.   0.   1.5  0.   0.   0.   1.   0.
 1.   0.   0.   0.   0.   0.   2.   0.  ]
Q_values [-1.8922786  -0.11480398  1.610095   -0.5270375 ]
Step #720.00 (1ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 234 ACT 19 BUF 0)                  state [0.   0.   0.   0.   0.31 0.   0.   0.   0.   0.   0.4  0.   0.   0.
 0.   0.   1.5  0.   0.   0.   0.   0.   1.   0.   0.   0.   0.   1.5
 3.   0.5  0.   0.   0.   0.   1.   0.  ]
Q_values [-2.4163368  0.2584448  1.8139789 -0.7625415]
Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 54ms, vehicles TOT 324 ACT 31 BUF 0)                 
🔄 Episode 2/50
 Retrying in 1 seconds




state [0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   0.94 0.   0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   2.5  0.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   3.   0.  ]
Q_values [-2.5843313   0.37205225  1.9416676  -0.7460659 ]
Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 54ms, vehicles TOT 324 ACT 34 BUF 0)                  ?*RT. ?UPS, TraCI: 55ms, vehicles TOT 235 ACT 18 BUF 5)                 
🔄 Episode 3/50
 Retrying in 1 seconds




state [0.   0.   0.   0.   0.   0.84 0.   0.   0.   0.   0.32 0.84 0.   0.
 0.   0.   0.5  1.   0.   0.   0.   0.   1.5  1.   0.   0.   0.   0.
 1.   2.   0.   0.   0.   0.   2.   1.  ]
Q_values [-3.0332575   1.3254639   2.5907555  -0.84076923]
state [0.   0.   0.   0.   0.   1.2  0.   0.   0.   0.   0.   1.04 0.   0.
 0.   0.   0.   2.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   2.   1.   0.   0.   0.   1.   1.  ]
Q_values [-3.3409595 -2.5450618 -1.7949071  7.4004774]
Step #720.00 (1ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 234 ACT 28 BUF 0)                  state [0.   0.   0.2  0.   0.32 0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.5  0.   1.5  0.   0.   0.   0.   0.   0.   0.   0.   0.5  1.   1.5
 3.   0.5  1.   0.   0.   0.5  0.5  0.  ]
Q_values [-5.9566145 -4.5597625 -3.3664753 -4.6220694]
Step #1000.00 (0ms ?*RT. ?UPS, TraCI: 53ms, vehicles TOT 324 ACT 35 BUF 0)                 
🔄 Episode 4/50
 Retrying in 1 seconds




state [0.   0.   0.   0.   0.   1.13 0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   1.5  0.   0.   0.   0.   0.   0.   0.   0.   1.   0.
 0.   1.5  1.   0.   0.   0.   0.5  0.  ]
Q_values [-3.1973803   0.34306085  2.0092318   0.24697495]
state [0.   0.   0.71 0.   0.08 0.   0.   0.   0.   0.   0.   0.   0.   0.
 2.   0.   0.5  0.   0.   0.   0.   0.   0.   0.   0.   0.   3.   0.
 0.5  0.   1.   0.   0.   0.   1.   0.  ]
Q_values [-4.040493   1.0041981  2.519476  -1.9276351]
state [0.   0.   1.23 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 3.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.5  3.5  0.
 0.   0.   1.   0.   0.   0.5  0.5  0.  ]
Q_values [-3.7847896  1.564257   2.790337  -1.5972203]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 234 ACT 21 BUF 0)                  state [0.  0.  0.3 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  1.  0.  0.  0.  0.  0.  0. ]
Q_values [-5.3082685 -1.7134007



state [0.   0.   1.32 0.   0.   0.   0.   0.   0.   0.   0.47 0.   0.   0.
 4.5  0.   0.   0.   0.   0.   0.   0.   1.5  0.   1.   1.   5.5  1.
 1.   1.   1.   1.   1.   1.   2.5  1.  ]
Q_values [-6.4255304 -1.9871657 -1.505723  -6.179785 ]
state [0.   0.04 2.29 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 5.5  0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.   6.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-3.951054   2.188609   2.561385  -1.7517424]
state [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.2 0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  1.  0.  1.  1.  2.5 1.  1.  1.  1.  1.  1.  1.  2.  1. ]
Q_values [-6.721943  -1.3164585 -2.24881   -7.00669  ]
state [0.   0.   0.   0.   0.93 1.11 0.   0.   0.   0.   1.65 0.7  0.   0.
 0.   0.   2.5  3.   0.   0.   0.   0.   2.   1.   0.   0.   1.   1.5
 3.5  4.   1.   0.   0.   0.5  2.5  1.  ]
Q_values [-2.82857     0.07702351  0.08488655  2.2341065 ]
Step #720.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 234 ACT 30



state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-2.7782986 -2.1605203 -2.7753205  3.2426243]
state [0.   0.   0.   0.   0.46 0.44 0.   0.   0.   0.   2.62 0.44 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   4.   1.   0.   0.   1.5  0.
 1.   1.   1.   0.   0.   0.   5.   1.  ]
Q_values [-6.2768116 -0.809721  -4.823185  -6.9159164]
state [0.   0.   0.   0.   1.23 1.2  0.   0.   0.   0.   5.77 1.04 0.   0.
 0.   0.   2.   2.   0.   0.   0.   0.   6.   1.   0.   0.   1.   0.
 2.   2.   1.   0.   0.   0.   7.   1.  ]
Q_values [-6.2838306 -3.9998913 -9.016907  -9.340793 ]
state [0.   0.   0.   0.   0.06 0.04 0.   0.   0.   0.   0.79 0.   0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   3.   0.   0.   0.   1.   0.
 1.   1.   1.   0.   0.   0.5  3.5  0.  ]
Q_values [-4.535895    0.664099   -0.95798707 -4.012681  ]
state [0.   0.   0.3  0.   1.08 



state [0.   0.   0.   0.   0.09 0.14 0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.5  1.   0.   0.   0.   0.   0.   0.   0.   0.   1.   0.
 0.5  1.   1.   0.   0.   0.   1.   0.  ]
Q_values [-4.9532456  0.2121067  1.9925381 -3.2692187]
state [0.   0.05 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.48 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   2.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  2.5  1.  ]
Q_values [-2.25752    -2.8442209   0.09244704  3.203762  ]
state [0.   0.   0.2  0.   0.46 0.   0.   0.   0.   0.   1.5  0.   0.   0.
 1.   0.   1.   0.   0.   0.   0.   0.   3.   0.   0.   0.   2.   0.
 1.   0.   1.   0.   0.   0.   4.   0.  ]
Q_values [ -7.525668  -10.568248   -3.1484523  -5.773334 ]
state [0.   0.   2.25 0.   0.36 0.   0.   0.   0.   0.   0.52 0.   0.   0.
 4.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   5.   0.
 1.   0.   1.   0.   0.   0.5  2.5  0.  ]
Q_values [-6.632737  -6.395916  -1.433351  -4.1888957]
state [0.   0.   4.08 0.   0



state [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.71 0.   0.   0.
 0.   0.   0.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   1.
 1.   1.   1.   0.   0.   0.   3.   0.  ]
Q_values [-6.2340393  -1.2819228   0.42577195 -6.9121404 ]
state [0.   0.   2.65 0.   0.16 0.   0.   0.   0.   0.   0.2  0.   0.   0.
 4.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   0.   5.   0.
 1.   0.   1.   0.   0.   0.   2.   0.  ]
Q_values [-5.6206584   0.09512806  1.9675542  -5.3120794 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.48 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   2.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  2.5  1.  ]
Q_values [-4.7445636  1.7072653  3.1851187 -3.3290677]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   0.   1.   1.  ]
Q_values [-1.6748896 -3.104527   0.7930095  1.6614118]
state [0.   0.66 0.1  0.   0



state [0.   0.   2.58 0.   0.16 0.   0.   0.   1.04 0.   0.2  0.   0.   0.
 4.   0.   1.   0.   0.   0.   1.   0.   1.   0.   0.   0.   5.   0.
 1.   0.   1.   0.   1.   0.   2.   0.  ]
Q_values [-1.735189   9.385843   0.813149  -2.9005692]
state [0.   0.   0.   0.   1.43 0.   0.   0.   0.   0.   3.08 0.   0.   0.
 0.   0.   2.   0.   0.   0.   0.   0.   4.5  0.   0.   0.   0.   0.
 2.   0.   0.   0.   0.   0.   5.   0.  ]
Q_values [-4.264712   2.312844   4.0895195 -2.2493913]
state [0.   0.   0.01 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.5  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   1.   0.
 0.   0.   0.   0.   0.   0.   0.   0.  ]
Q_values [-3.437784    0.11036229  1.4439371  -0.28480566]
state [0.   0.45 1.47 0.   0.   0.   0.   0.46 0.44 0.   0.   0.   0.   1.
 3.   0.   0.   0.   0.   1.   1.   0.   0.   0.   0.   1.   4.   0.
 0.   0.   1.   1.   1.   0.   0.5  0.  ]
Q_values [-3.4289014  8.851741   1.9950392 -3.3814468]
state [0.   0.   0.52 0.   0.36 



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-3.2666082  -2.1870418  -0.80579424  0.18076587]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-6.7696066 -1.7716179  3.3909276 -6.012677 ]
state [0.   0.   0.2  0.   0.76 0.   0.   0.   0.   0.   2.65 0.   0.   0.
 1.   0.   1.   0.   0.   0.   0.   0.   4.   0.   0.   0.   2.   1.
 2.   1.   1.   0.   0.   0.   5.   0.  ]
Q_values [-6.3004465 -1.6466768  3.35076   -5.7151237]
state [0.   0.   3.08 0.   0.   0.   0.   0.   0.   0.   0.33 0.   0.   0.
 4.5  0.   0.5  0.   0.   0.   0.   0.   1.5  0.   0.   0.   5.   0.
 1.   0.5  0.   0.   0.   0.   2.   0.  ]
Q_values [-4.1905427  1.6906908  3.7360735 -2.61915  ]
state [0.   0.   0.12 0.   0.   0.28 0.   0.   0.   0.   0.   0.   0.   0.
 1.   0.   0.   0.5  0.   0.   0.   0.   0.   0.   0.   0.   1.5  0.
 0.   0.5  0.   0.   0.   



state [0.   0.   0.   0.   0.25 1.39 0.   0.   0.   0.   0.33 0.   0.   0.
 0.   0.   1.   2.   0.   0.   0.   0.   1.5  0.   0.   0.   0.   0.
 1.   2.   0.   0.   0.   0.   2.   0.  ]
Q_values [-2.9969697  -0.81677425  0.7509122   0.33770204]
state [0.   0.   0.   0.   0.76 0.   0.   0.   0.   0.   2.11 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   3.5  0.   1.   1.   1.   1.
 2.   1.   1.   1.   1.   1.   4.5  1.  ]
Q_values [-7.9401064 -8.143881   2.831704  -5.476068 ]
state [0.   0.   8.51 0.   0.   0.   0.   0.   2.04 0.   0.   0.   0.   0.
 7.5  0.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   8.   0.
 0.   0.   0.   0.   1.   0.   0.   0.  ]
Q_values [ 6.9832025  38.67111    -5.5191174   0.57982874]
state [0.   0.   3.23 0.   0.   1.72 0.   0.   0.84 0.   0.01 0.9  0.   0.
 4.   0.   0.   3.5  0.   0.   1.   0.   0.5  1.   0.   0.   4.5  0.
 1.   4.   0.   0.   1.   0.   1.   1.  ]
Q_values [-3.0065734  4.7587495  3.8007145 -2.4905446]
Step #720.00 (1ms ?*RT. ?UPS



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-6.449895  -4.1694603  4.7104287 -5.185478 ]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-2.8208919 -3.2075787 -1.2066526  2.107414 ]
state [0.   0.   0.71 0.   0.76 0.   0.   0.   0.   0.   1.53 0.   0.   0.
 2.   0.   1.   0.   0.   0.   0.   0.   3.   0.   0.   0.   3.   1.
 2.   1.   1.   0.   0.   0.   4.   0.  ]
Q_values [-6.9443192 -5.0360937  4.4923296 -5.9667397]
state [0.   0.   1.23 0.   0.   0.04 0.   0.   0.   0.   0.   0.   0.   0.
 3.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   4.   0.
 0.   1.   1.   0.   0.   0.5  0.5  0.  ]
Q_values [-4.86063    5.4149704  3.5799336 -2.5990484]
state [0.   0.   0.   0.   0.   0.24 0.   0.   0.   0.   0.01 0.   0.   0.
 0.   0.   0.   1.   0.   0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-1.6813743  -0.59991145  0.5779886   1.7012181 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-5.795538  -3.7440703  3.8386228 -5.2677727]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-1.365417    0.33921123  0.92528105  1.6964984 ]
state [0.   1.26 1.23 0.   0.   0.   0.   1.26 0.   0.   0.   0.   0.   1.
 3.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   1.5  3.5  0.
 0.   0.   1.   1.   0.   0.5  0.5  0.  ]
Q_values [-0.14960861 -1.4330803   0.21363401  4.3377447 ]
state [0.   2.46 0.21 0.   0.   0.64 0.   2.46 0.   0.   0.   0.   0.   1.
 1.5  0.   0.   1.   0.   1.   0.   0.   0.   0.   0.   1.5  2.   1.5
 1.   1.5  1.   1



state [0.   0.   0.1  0.   0.   0.64 0.   0.   0.   0.   0.   0.64 0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.5  1.5  1.5
 1.   1.5  1.   0.   0.   0.5  0.5  1.  ]
Q_values [-2.91524   -2.1048539  0.5267377  2.1646414]
state [0.   0.   0.33 0.   0.   0.   0.   0.   0.   0.   0.01 0.   0.   0.
 1.5  0.   0.5  0.   0.   0.   0.   0.   0.5  0.   0.   0.   2.   0.
 1.   0.5  0.   0.   0.   0.   1.   0.  ]
Q_values [-4.5036535  3.9951792  2.3641245 -3.1898892]
state [0.   0.   0.32 0.   0.56 0.   0.   0.   0.   0.   0.94 0.   0.   0.
 1.5  0.   1.   0.   0.   0.   0.   0.   2.5  0.   0.   0.   2.   0.
 1.   0.   0.   0.   0.   0.   3.   0.  ]
Q_values [-4.152995   1.0471743  5.1821504 -2.9918568]
state [0.   0.   0.71 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 2.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   3.   1.
 1.   1.   1.   0.   0.   0.   0.5  0.  ]
Q_values [-5.0624914  6.0313945  0.6934104 -3.9377205]
state [0.   0.   0.94 0.   0.86 0. 



state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-3.1914692  3.0820096  2.5691144 -1.041501 ]
state [0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.2  0.44 0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   1.   1.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   0.   2.   1.  ]
Q_values [-4.6908255  -3.3002124   0.86839485 -2.5910816 ]
state [0.   0.   0.1  0.   0.   0.64 0.   0.   0.   0.   0.   0.64 0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.5  1.5  1.5
 1.   1.5  1.   0.   0.   0.5  0.5  1.  ]
Q_values [-2.5999193  -2.5773637   0.27369165  0.6334791 ]
state [0.   0.   0.33 0.   0.   0.   0.   0.   0.   0.   0.01 0.   0.   0.
 1.5  0.   0.5  0.   0.   0.   0.   0.   0.5  0.   0.   0.   2.   0.
 1.   0.5  0.   0.   0.   0.   1.   0.  ]
Q_values [-3.8519354   0.31264064  4.813023   -3.2377474 ]
state [0.   0.   0.96 0



state [0.   0.   0.1  0.   0.   0.64 0.   0.   0.   0.   0.1  0.64 0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   1.   1.   0.   0.5  1.5  1.5
 1.   1.5  1.   0.   0.   0.5  1.5  1.  ]
Q_values [-0.829797  -3.5152535 -1.2995176  1.7364874]
state [0.   0.   0.33 0.   0.   0.   0.   0.   0.   0.   0.32 0.   0.   0.
 1.5  0.   0.5  0.   0.   0.   0.   0.   1.5  0.   0.   0.   2.   0.
 1.   0.5  0.   0.   0.   0.   2.   0.  ]
Q_values [-3.6877332  -0.26523143  4.30771    -3.9234407 ]
state [0.   0.   0.2  0.   0.   0.38 0.   0.   0.   0.   0.   0.   0.   0.
 1.   0.   0.   0.5  0.   0.   0.   0.   0.   0.   0.   0.   2.   1.
 1.   1.5  1.   0.   0.   0.   1.   0.  ]
Q_values [-1.7877986  -3.7026532  -1.2475524  -0.60329294]
state [0.   0.   1.53 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 3.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   4.   0.
 0.   0.   1.   0.   0.   0.   0.5  0.  ]
Q_values [-3.155518   6.839686   3.4609723 -2.46255  ]
state [0.   0.   0.   0.   



state [0.   0.   0.26 0.   0.   0.24 0.   0.   0.24 0.   0.01 0.24 0.   0.
 1.5  0.   0.   1.   0.   0.   1.   0.   0.5  1.   0.   0.   2.   0.
 0.   1.   0.   0.   1.   0.   1.   1.  ]
Q_values [-0.93367577 -0.48342383 -1.2714381   2.8998911 ]
state [0.   0.   2.58 0.   0.   0.14 0.   0.   1.04 0.   0.   0.   0.   0.
 4.   0.   0.   1.   0.   0.   1.   0.   0.   0.   0.   0.   5.   0.
 0.   1.   1.   0.   1.   0.   0.5  0.  ]
Q_values [-2.9071078  16.035898    0.75279045  1.1491933 ]
state [0.   0.   0.   0.   0.   0.34 0.   0.   0.   0.   0.06 0.   0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.5  0.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   0.5  1.   0.  ]
Q_values [-1.0536902 -1.0215108 -1.8710537  3.3156476]
state [0.   0.   0.01 0.   0.   0.24 0.   0.   0.   0.   0.   0.   0.   0.
 0.5  0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   1.   0.
 0.   1.   0.   0.   0.   0.   0.   0.  ]
Q_values [-1.2833395  -0.96740705 -1.861609    3.0662265 ]
state [0.   0.   0.01 0.



state [0.   0.06 0.03 0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   1.
 1.   0.   1.   0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.5  0.
 1.   0.   1.   1.   1.   0.5  1.5  0.  ]
Q_values [-1.5535104 -1.9566623 -1.8077917  0.3573966]
state [0.   0.   1.16 0.   0.66 0.   0.   0.   0.64 0.   1.2  0.   0.   0.
 3.   0.   1.   0.   0.   0.   1.   0.   3.   0.   0.   0.5  3.5  1.5
 2.   0.5  1.   0.   1.   0.5  3.5  0.  ]
Q_values [-3.8466136  -0.19280839  4.7506986  -3.813128  ]
state [0.   0.   0.71 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 2.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   3.   1.
 1.   1.   1.   0.   0.   0.   1.   0.  ]
Q_values [-5.648484   -1.658205   -0.19356346 -6.5967827 ]
state [0.   0.   0.   0.   0.36 1.59 0.   0.   0.   0.   0.89 0.   0.   0.
 0.   0.   1.   2.   0.   0.   0.   0.   2.5  0.   0.   0.   1.   0.
 1.   2.   1.   0.   0.   0.5  3.   0.  ]
Q_values [-2.5925422  -0.69451004  2.6253304  -1.2409279 ]
state [0.   0.   0.   0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-3.0742526  -1.9265031   1.08142     0.40800762]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-7.8642464 -7.448362  -7.0479283 -6.2346745]
state [0.   0.06 0.03 0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   1.
 1.   0.   1.   0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.5  0.
 1.   0.   1.   1.   1.   0.5  1.5  0.  ]
Q_values [-3.6311991  -0.02719438  5.8511434  -2.9634092 ]
state [0.   0.   0.01 0.   0.   0.   0.   0.   0.   0.   0.32 0.   0.   0.
 0.5  0.   0.5  0.   0.   0.   0.   0.   1.5  0.   0.   0.   1.   0.
 1.   0.5  0.   0.   0.   0.   2.   0.  ]
Q_values [-3.7979195 -0.5752339  4.865418  -3.1104114]
state [0.   0.   0.2  0.   0.   0.08 0.   0.   0.   0.   0.   0.   0.   0.
 1.   0.   0.   0.5  0.   0.   0.   0.   0.   0.   0.   0.   2.   0.
 0.   0.5  1.   0.   0



state [0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   0.28 0.24 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   1.5  1.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   2.   1.  ]
Q_values [-3.3593078 -0.3462068  5.5167704 -2.6881409]
state [0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.   0.44 0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   0.   1.   1.  ]
Q_values [-3.2908406 -1.899289  -1.0114574  1.1157491]
state [0.   0.   0.33 0.   0.   0.   0.   0.   0.   0.   0.32 0.   0.   0.
 1.5  0.   0.5  0.   0.   0.   0.   0.   1.5  0.   0.   0.   2.   0.
 1.   1.   0.   0.   0.   0.   2.   0.  ]
Q_values [-3.9934814 -1.4890076  2.4395666 -3.229248 ]
state [0.   0.   1.23 0.   0.   0.34 0.   0.   0.   0.   0.06 0.   0.   0.
 3.   0.   0.   1.   0.   0.   0.   0.   0.5  0.   0.   0.5  3.5  0.
 0.   1.   1.   0.   0.   0.5  1.   0.  ]
Q_values [-3.5504189  3.2766457  3.315518  -1.5334868]
state [0.   0.   1.86 0.   0.   0.54



state [0.   0.   0.   0.   0.   1.2  0.   0.   0.   0.   0.   1.04 0.   0.
 0.   0.   0.   2.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   2.   1.   0.   0.   0.   1.   1.  ]
Q_values [-3.5455992 -1.548172  -1.6566062  3.7443447]
state [0.   0.   0.32 0.   0.   0.   0.   0.   0.   0.   0.32 0.   0.   0.
 1.5  0.   0.   0.   0.   0.   0.   0.   1.5  0.   0.   0.   2.   0.
 0.   0.   0.   0.   0.   0.   2.   0.  ]
Q_values [-5.7309284  -2.565047    0.12393284 -5.0554748 ]
state [0.   0.   0.71 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 2.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   3.   1.
 1.   1.   1.   0.   0.   0.   0.5  0.  ]
Q_values [-6.3165073  0.5976074 -1.711021  -4.3209662]
state [0.   0.   0.   0.   0.46 0.44 0.   0.   0.   0.   0.47 0.   0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   1.5  0.   0.   0.   1.   0.
 1.   1.   1.   0.   0.   0.   2.5  0.  ]
Q_values [-4.397809  -1.681958   2.589236  -3.4788766]
state [0.   0.   0.   0.   0.   



state [0.   0.   2.58 0.   0.08 1.2  0.   0.   1.04 0.   0.   1.04 0.   0.
 4.   0.   0.5  2.   0.   0.   1.   0.   0.   1.   0.   0.   5.   0.
 0.5  2.   1.   0.   1.   0.   1.   1.  ]
Q_values [-2.9318457  9.251408   2.0790648 -2.642705 ]
state [0.   0.   0.   0.   0.18 1.6  0.   0.   0.   0.   0.1  1.24 0.   0.
 0.   0.   0.5  2.   0.   0.   0.   0.   1.   1.   0.   0.   1.   0.
 0.5  2.   1.   0.   0.   0.5  1.5  1.  ]
Q_values [-1.6755805 -1.890165  -2.0063357  4.7085896]
state [0.   0.   0.   0.   0.38 0.   0.   0.   0.   0.   0.71 0.   0.   0.
 0.   0.   0.5  0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   1.
 1.5  1.   1.   0.   0.   0.   3.   0.  ]
Q_values [-2.1218677 -0.9519479  4.5537477 -2.7930577]
state [0.   0.   0.1  0.   0.   0.04 0.   0.   0.   0.   0.   0.   0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.5  1.5  0.
 0.   1.   1.   0.   0.   0.5  0.5  0.  ]
Q_values [-2.696169  -1.8666397 -1.0738521  1.0321417]
state [0.   0.   1.24 0.   0.   0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-1.1185776 -2.4590569 -1.4272008  1.9515266]
state [0.   0.46 0.   0.   0.   0.   0.   0.46 0.   0.   0.2  0.   0.   1.
 0.   0.   0.   0.   0.   1.   0.   0.   1.   0.   0.   1.   1.   0.
 0.   0.   1.   1.   0.   0.   2.   0.  ]
Q_values [-2.3660297 -2.0020554  2.6720817 -2.8760552]
state [0.   0.66 0.1  0.   0.   0.   0.   0.66 0.   0.   0.   0.   0.   1.
 1.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   1.5  1.5  1.5
 1.   0.5  1.   1.   0.   0.5  0.5  0.  ]
Q_values [-1.7551575  -2.294126   -0.81252575  0.5619261 ]
state [0.   0.   1.23 0.   0.   0.18 0.   0.   0.   0.   0.   0.   0.   0.
 3.   0.   0.   0.5  0.   0.   0.   0.   0.   0.   0.   0.5  3.5  0.
 0.   0.5  1.   0.   0.   0.5  0.5  0.  ]
Q_values [-3.0585997  3.8018618  0.6292305 -2.6275058]
state [0.   0.   0.   0.   0.   0.28 0.   0.   0.   0.   0.01 0.   0.   0.
 0.   0.   0.   0.5  0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.292021   -0.8552851  -0.6160946  -0.10001683]
state [0.   0.06 0.03 0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   1.
 1.   0.   1.   0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.5  0.
 1.   0.   1.   1.   1.   0.5  1.5  0.  ]
Q_values [-5.0445733 -1.2087548 -4.9001703 -2.5821357]
state [0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   0.67 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0.   0.   3.   0.  ]
Q_values [-1.3625953 -1.1549127  5.4124837 -2.4570658]
state [0.   0.   0.   0.   0.36 0.   0.   0.   0.   0.   0.51 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0.   0.5  2.5  0.  ]
Q_values [-1.3434966 -1.1634259  5.464957  -2.4580867]
state [0.   0.   0.   0.   0.   0.24 0.   0.   0.   0.   0.01 0.   0.   0.
 0.   0.   0.   1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-1.3426038  -1.1605297  -1.0079365   0.65918136]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.402068  -3.5835261 -4.4738526 -7.4132023]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-1.4940917  -0.66734844 -0.6808257  -1.1211073 ]
state [0.   0.   0.   0.   0.46 0.44 0.   0.   0.   0.   0.67 0.44 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   2.   1.   0.   0.   1.   0.
 1.   1.   1.   0.   0.   0.   3.   1.  ]
Q_values [-1.5445611 -1.6134341  4.6192675 -3.1473722]
state [0.   0.   0.1  0.   0.   0.64 0.   0.   0.   0.   0.   0.64 0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.5  1.5  1.5
 1.   1.5  1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-5.3327093 -4.9857984 -6.0808725 -5.495896 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-3.3474548  -2.976747    0.02170706 -1.7461718 ]
state [0.   0.26 0.01 0.   0.   0.24 0.   0.26 0.   0.   0.   0.24 0.   1.
 0.5  0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   0.   1.   0.   0.   0.   1.  ]
Q_values [-3.0577395  -1.8333083   0.96257067  1.1718585 ]
state [0.   0.   0.51 0.   0.36 0.   0.   0.   0.   0.   1.23 0.   0.   0.
 2.   0.   1.   0.   0.   0.   0.   0.   3.   0.   0.   0.5  2.5  0.
 1.   0.   1.   0.   0.   0.5  3.5  0.  ]
Q_values [-1.6595753 -1.4659419  5.9889097 -2.6280174]
state [0.   0.   0.   0.   0.06 0.04 0.   0.   0.   0.   0.1  0.   0.   0.
 0.   0.   1.   1.   0



state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-4.764042   -3.5725534  -0.7993598   0.11235261]
state [0.   0.26 0.01 0.   0.26 0.   0.   0.26 0.   0.   0.28 0.   0.   1.
 0.5  0.   1.   0.   0.   1.   0.   0.   1.5  0.   0.   1.   1.   0.
 1.   0.   0.5  1.   0.   0.   2.   0.  ]
Q_values [-1.8606635 -1.4376     6.0758653 -2.4774632]
state [0.   0.   0.94 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 2.5  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   3.   0.
 0.5  1.   0.   0.   0.   0.   0.   0.  ]
Q_values [-4.712784  -1.2015816 -0.6411662 -3.7268322]
state [0.   0.   0.   0.   0.   0.34 0.   0.   0.   0.   0.06 0.   0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.5  0.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   0.5  1.   0.  ]
Q_values [-4.082329   -2.9759297  -0.01059341 -0.12319469]
state [0.   0.   0.   0.   0



state [0.   0.   0.64 0.   0.46 0.44 0.   0.   0.44 0.   0.67 0.44 0.   0.
 2.   0.   1.   1.   0.   0.   1.   0.   2.   1.   0.   0.   3.   0.
 1.   1.   1.   0.   1.   0.   3.   1.  ]
Q_values [-2.0903087  -1.274849    1.0240898   0.23329902]
state [0.   0.   1.16 0.   0.   0.64 0.   0.   0.64 0.   0.   0.64 0.   0.
 3.   0.   0.   1.   0.   0.   1.   0.   0.   1.   0.   0.   4.   1.5
 1.   1.5  1.   0.   1.   0.5  0.5  1.  ]
Q_values [-0.81421447  7.5977635   1.2319703  -0.75205314]
state [0.   0.   0.   0.   0.   0.84 0.   0.   0.   0.   0.01 0.84 0.   0.
 0.   0.   0.5  1.   0.   0.   0.   0.   0.5  1.   0.   0.   0.   0.
 1.   2.   0.   0.   0.   0.   1.   1.  ]
Q_values [-2.4689314  -0.94337124  2.4126487   4.66113   ]
state [0.   0.   0.   0.   0.16 0.   0.   0.   0.   0.   0.2  0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0.   0.   2.   0.  ]
Q_values [-2.2244587 -1.4866468  4.494666  -3.0545933]
state [0.   0.   0.09 0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.8115058 -5.6033807 -5.604626  -5.496423 ]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-2.0905771 -2.3512425  0.4922452  1.1273103]
state [0.   0.   0.26 0.   0.26 0.   0.   0.   0.24 0.   0.28 0.   0.   0.
 1.5  0.   1.   0.   0.   0.   1.   0.   1.5  0.   0.   0.   2.   0.
 1.   0.   0.   0.   1.   0.   2.   0.  ]
Q_values [-1.9508829  -0.7897166  -0.06648922 -2.985811  ]
state [0.   0.   0.64 0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.
 2.   0.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   3.   0.
 0.   0.   1.   0.   1.   0.   0.5  0.  ]
Q_values [-1.5080566  4.355782  -1.2315164 -4.1938124]
state [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.06 0.   0.   0.
 0.   0.   0.   0.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.7148018  -1.5024405   0.26700926  1.209866  ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.2043803 -4.640626  -5.057915  -3.6376717]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-2.318134   -0.44063556  0.17503357  0.2838695 ]
state [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.01 0.   0.   0.
 0.   0.   0.5  0.   0.   0.   0.   0.   0.5  0.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   1.   0.  ]
Q_values [-1.654032   -1.9391332  -0.33473206 -3.1126332 ]
state [0.   0.   0.09 0.   0.   0.34 0.   0.   0.   0.   0.06 0.   0.   0.
 1.   0.   0.   1.   0.   0.   0.   0.   0.5  0.   0.   0.5  1.5  0.
 0.   1.   1.   0.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.053677   -3.0704508  -0.29429865  1.7232904 ]
state [0.   0.26 0.   0.   0.26 0.24 0.   0.26 0.   0.   0.28 0.24 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.   0.   1.   0.   0.
 1.   1.   0.5  1.   0.   0.   2.   1.  ]
Q_values [-1.7121353  -3.1324944  -0.02490997 -0.4769827 ]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   0.   1.   1.  ]
Q_values [-1.6086175  -3.1191378  -0.00465202 -2.1445198 ]
state [0.   0.66 0.1  0.   0.   0.64 0.   0.66 0.   0.   0.   0.64 0.   1.
 1.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.5  1.5  1.5
 1.   1.5  1.   1.   0.   0.5  0.5  1.  ]
Q_values [-2.068655   -3.3638697  -0.01355124  1.1742322 ]
state [0.   0.86 0.33 0.   0.   0.   0.   0.86 0.   0.   0.01 0.   0.   1.
 1.5  0.   0.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.913123   -2.3079548  -0.05971098  0.21364975]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.42199   -5.1022167 -6.570389  -4.6295166]
state [0.   0.   0.26 0.   0.26 0.   0.   0.   0.24 0.   0.28 0.   0.   0.
 1.5  0.   1.   0.   0.   0.   1.   0.   1.5  0.   0.   0.   2.   0.
 1.   0.   0.   0.   1.   0.   2.   0.  ]
Q_values [-2.9415572   2.2368703  -0.98757267 -3.8482633 ]
state [0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   0.67 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0.   0.   3.   0.  ]
Q_values [-1.9877197 -1.4035121  6.972961  -2.2934103]
state [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.01 0.   0.   0.
 0.   0.   0.5  0.   0.   0.   0.   0.   0.5  0.   0.   0.   0.   0.
 1.   1.   0.   0.   0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-1.2681427   0.53694415  0.92682314  0.21893167]
state [0.   0.   0.64 0.   0.   0.   0.   0.   0.44 0.   0.2  0.   0.   0.
 2.   0.   0.   0.   0.   0.   1.   0.   1.   0.   0.   0.   3.   0.
 0.   0.   1.   0.   1.   0.   2.   0.  ]
Q_values [-1.0457597  5.390568  -2.1794558 -3.267691 ]
state [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.94 0.   0.   0.
 0.   0.   0.5  0.   0.   0.   0.   0.   2.5  0.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   3.   0.  ]
Q_values [-1.6742196 -1.3036429  6.572557  -2.2656286]
state [0.   0.   0.71 0.   0.   0.74 0.   0.   0.   0.   0.   0.   0.   0.
 2.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   3.   1.
 1.   2.   1.   0.   0.   0.   1.   0.  ]
Q_values [-1.2537503  -0.28417885 -2.0669851  -2.414039  ]
state [0.   0.   0.   0.   0.05 0.99 0.   0.   0.   0.   0.1  0.   0.   0.
 0.   0.   1.   2.   0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-4.742092  -5.83157   -7.748251  -5.6253057]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-4.8216867   0.22898245  1.0084486   1.7298365 ]
state [0.   0.   0.26 0.   0.26 0.   0.   0.   0.24 0.   0.28 0.   0.   0.
 1.5  0.   1.   0.   0.   0.   1.   0.   1.5  0.   0.   0.   2.   0.
 1.   0.   0.   0.   1.   0.   2.   0.  ]
Q_values [-4.2755346   2.0172377   0.04024458 -1.4428678 ]
state [0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   0.67 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0.   0.   3.   0.  ]
Q_values [-2.3941314 -1.7237213  6.3991737 -2.2564328]
state [0.  0.  0.1 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
 0.  0.  0.  0.  0.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-4.19532    -2.48326     0.7265873  -0.04110146]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-5.270444  -5.99938   -8.039625  -5.7220454]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-4.29093   -1.7187046  1.8901229  1.6353725]
state [0.   0.   0.26 0.   0.   0.24 0.   0.   0.24 0.   0.   0.24 0.   0.
 1.5  0.   0.   1.   0.   0.   1.   0.   0.   1.   0.   0.   2.   0.
 0.   1.   0.   0.   1.   0.   0.   1.  ]
Q_values [-3.8925776  2.7018151  1.0782275 -1.5949129]
state [0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.   0.44 0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-4.404467   -2.058795    0.09455347 -1.7300858 ]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-4.2519565 -1.2678405 -1.3440061 -3.0175374]
state [0.   0.26 0.   0.   0.   0.24 0.   0.26 0.   0.   0.01 0.24 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.5  1.   0.   1.   0.   0.
 0.   1.   0.5  1.   0.   0.   1.   1.  ]
Q_values [-4.241005  -2.4344144 -0.6779661 -3.1463723]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   0.   1.   1.  ]
Q_values [-4.3363013 -2.7879539 -1.1330309 -3.2696908]
state [0.   0.66 0.1  0.   0.   0.64 0.   0.66 0.   0.   0.   0.64 0.   1.
 1.   0.   0.   1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-6.0484076 -6.0695577 -6.3742404 -3.5700068]
state [0.   0.06 0.03 0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   1.
 1.   0.   1.   0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.5  0.
 1.   0.   1.   1.   1.   0.5  1.5  0.  ]
Q_values [-3.880387  -1.189118  -0.375638  -2.2370682]
state [0.   0.26 0.26 0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   1.
 1.5  0.   0.   0.   0.   1.   1.   0.   0.   0.   0.   1.   2.   0.
 0.   0.   0.   1.   1.   0.   0.   0.  ]
Q_values [-3.9532783  0.3035698 -1.6472788 -2.0200934]
state [0.   0.46 0.   0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   1.
 0.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   1.   0.
 0.   0.   1.   1.   0.   0.   1.   0.  ]
Q_values [-4.47051   -4.5033245 -1.8719301 -3.0848808]
state [0.   0.   0.   0.   0.56 0.54 0.   0.   0.   0.   0.94 0.   0.   0.
 0.   0.   1.   1.   0.   0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.3739657 -4.6810994  1.5120473 -1.2064492]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.2874837 -5.1243143 -1.894464  -4.8660545]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-1.9045739  -0.20695078  0.78201675 -3.6833193 ]
state [0.   0.26 0.26 0.   0.   0.24 0.   0.26 0.24 0.   0.   0.24 0.   1.
 1.5  0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.   2.   0.
 0.   1.   0.   1.   1.   0.   0.   1.  ]
Q_values [-1.8669318  2.8334603 -0.619987  -3.7418303]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.6382666   1.323535    0.30072498 -1.2091837 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-2.9512348  2.7618194 -9.41628   -5.798241 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-2.1486096  1.7930682 -0.7248535 -3.5535946]
state [0.   0.26 0.   0.   0.26 0.24 0.   0.26 0.   0.   0.28 0.24 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.   0.   1.   0.   0.
 1.   1.   0.5  1.   0.   0.   2.   1.  ]
Q_values [-2.9274855  1.3504909  1.0117364  2.1229262]
state [0.   0.46 0.   0.   0.46 0.   0.   0.46 0.   0.   0.67 0.   0.   1.
 0.   0.   1.   0.   0.   1.   0.   0.   2.   0.   0.   1.   1.   0.
 1.   0.   1.   1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-1.6739756 -0.0572136 -8.501959  -3.7344272]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-1.5436532   0.7806813  -0.66591835 -1.3505532 ]
state [0.   0.26 0.   0.   0.26 0.24 0.   0.26 0.   0.   0.28 0.24 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.5  1.   0.   1.   0.   0.
 1.   1.   0.5  1.   0.   0.   2.   1.  ]
Q_values [-1.2152443  -0.77358997  2.7055335  -2.6719804 ]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   0.   1.   1.  ]
Q_values [-1.5398488   0.6933994  -0.04328728 -0.5862572 ]
state [0.   0.66 0.   0.   0.   0.64 0.   0.66 0.   0.   0.1  0.64 0.   1.
 0.   0.   0.   1.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-4.2554493 -1.9081374 -0.9814806  1.444508 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-4.7864676 -3.2502086 -8.9233265 -4.565688 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-4.3473086 -2.18092   -1.3947315  1.2233135]
state [0.   0.26 0.01 0.   0.26 0.   0.   0.26 0.   0.   0.28 0.   0.   1.
 0.5  0.   1.   0.   0.   1.   0.   0.   1.5  0.   0.   1.   1.   0.
 1.   0.   0.5  1.   0.   0.   2.   0.  ]
Q_values [-2.8344047 -2.298767   1.7331986 -1.7487581]
state [0.   0.46 0.2  0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   1.
 1.   0.   0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   2.   0.
 0.   0.   1.   1.   0.   0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.440763  -2.5205772  2.7703137 -0.5418292]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-2.9101703 -2.706961   2.5252557  0.6408373]
state [0.   0.   0.26 0.   0.   0.24 0.   0.   0.24 0.   0.   0.24 0.   0.
 1.5  0.   0.   1.   0.   0.   1.   0.   0.   1.   0.   0.   2.   0.
 0.   1.   0.   0.   1.   0.   0.   1.  ]
Q_values [-1.4895855  2.9050179  2.2254205  0.4604665]
state [0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.   0.44 0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   1.   1.   0.   0.   0.   1.   1.  ]
Q_values [-3.4567957 -2.726738   2.2683024  2.6551538]
state [0.  0.  0.1 0.  0.  0.  0.  0.  0.  0.  0.1 0.  0.  0.  1.  0.  0.  0.
 0.  0.  0.  0.  1.  0.  0.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-3.366395  -1.5860765 -1.3895149 -1.6139922]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-4.2441454 -2.4687586 -8.077803  -5.378902 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-3.3461766 -1.6388717 -1.4213705 -2.4244614]
state [0.   0.26 0.01 0.   0.   0.24 0.   0.26 0.   0.   0.   0.24 0.   1.
 0.5  0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   0.   1.   0.   0.   0.   1.  ]
Q_values [-3.6450653 -1.603132  -1.1889315 -1.372916 ]
state [0.   0.46 0.2  0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 1.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   2.   0.
 0.   1.   1.   1.   0.   0.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-0.70849496 -1.7371054  -0.42507744 -2.3150043 ]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-0.8961647 -4.5066757 -8.054333  -5.804862 ]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-0.32058963  0.5754404   0.01807594 -2.4912376 ]
state [0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   0.29 0.24 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   1.5  1.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   2.   1.  ]
Q_values [-0.5196761  -1.9280512  -0.11906242  2.9310105 ]
state [0.   0.   0.   0.   0.86 0.   0.   0.   0.   0.   1.82 0.   0.   0.
 0.   0.   1.5  0.   0.   0.   0.   0.   3.5  0.   0.   0.   0.   0.
 2.   1.   0.   0.



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-5.051439  -5.2814093 -3.1128578 -6.034009 ]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-4.508072   0.5571362  0.3558731 -2.896238 ]
state [0.   0.26 0.   0.   0.   0.24 0.   0.26 0.   0.   0.01 0.24 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.5  1.   0.   1.   0.   0.
 0.   1.   0.5  1.   0.   0.   1.   1.  ]
Q_values [-4.151025  -3.3644319  1.9131718 -1.3208582]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   0.   1.   1.  ]
Q_values [-4.011435  -3.5159814  1.5182238 -2.3036764]
state [0.   0.66 0.1  0.   0.   0.64 0.   0.66 0.   0.   0.   0.64 0.   1.
 1.   0.   0.   1.   0.   1.  



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-0.3142337 -1.6638418  1.482666  -1.7128675]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-1.7353607 -3.241036   1.1974955 -4.4434667]
state [0.   0.06 0.03 0.   0.   0.04 0.   0.06 0.04 0.   0.   0.04 0.   1.
 1.   0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.5  1.5  0.
 0.   1.   1.   1.   1.   0.5  0.5  1.  ]
Q_values [-0.19308609  0.32991004  1.3970251  -2.9010034 ]
state [0.   0.26 0.26 0.   0.   0.24 0.   0.26 0.24 0.   0.   0.24 0.   1.
 1.5  0.   0.   1.   0.   1.   1.   0.   0.   1.   0.   1.   2.   0.
 0.   1.   0.   1.   1.   0.   0.   1.  ]
Q_values [ 0.3743791  2.2998228  0.7566004 -1.800518 ]
state [0.   0.46 0.   0.   0.   0.44 0.   0.46 0.   0.   0.   0.44 0.   1.
 0.   0.   0.   1.   0.   1.   0.   0.   0.   1.   0.   1.   1.   0.
 0.   1.   1.   1.   0.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-4.504301   -3.0117397  -0.7523184  -0.83519244]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-7.920811  -4.9338803 -7.8015833 -6.192869 ]
state [0.   0.06 0.   0.   0.06 0.04 0.   0.06 0.   0.   0.06 0.04 0.   1.
 0.   0.   1.   1.   0.   1.   0.   0.   1.   1.   0.   1.5  0.5  0.
 1.   1.   1.   1.   0.   0.5  1.5  1.  ]
Q_values [-3.8878942  -2.9740868  -0.27328968  0.78223205]
state [0.   0.26 0.01 0.   0.26 0.   0.   0.26 0.   0.   0.29 0.   0.   1.
 0.5  0.   1.   0.   0.   1.   0.   0.   1.5  0.   0.   1.   1.   0.
 1.   0.   0.5  1.   0.   0.   2.   0.  ]
Q_values [-3.0752006 -2.4635425  3.4524646 -3.44132  ]
state [0.   0.66 0.   0.   0.   0.   0.   0.66 0.   0.   0.06 0.   0.   1.
 0.   0.   0.   0.   0.   1.   0.   0.   0.5  0.   0.   1.   1.   1.5
 1.   0.5  1.   1.   



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-2.3299406  -1.4764807  -0.26594543 -0.20348787]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-3.3926077 -5.240678  -7.431978  -4.074883 ]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-3.0506687  2.8916721  0.8880186 -1.7881856]
state [0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   0.28 0.24 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   1.5  1.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   2.   1.  ]
Q_values [-1.3753242  -2.3206086   0.74660015  5.9204607 ]
state [0.   0.   0.   0.   0.46 0.   0.   0.   0.   0.   0.67 0.   0.   0.
 0.   0.   1.   0.   0.   0.   0.   0.   2.   0.   0.   0.   1.   0.
 1.   0.   1.   0.   0



state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-0.3228003  -2.6861684   0.42019176  0.69802976]
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Q_values [-0.3094094 -3.7665846 -3.725645  -2.7088253]
state [0.   0.   0.03 0.   0.06 0.04 0.   0.   0.04 0.   0.06 0.04 0.   0.
 1.   0.   1.   1.   0.   0.   1.   0.   1.   1.   0.   0.5  1.5  0.
 1.   1.   1.   0.   1.   0.5  1.5  1.  ]
Q_values [-1.3965603  2.2199655  0.3539467 -1.4008619]
state [0.   0.   0.   0.   0.26 0.24 0.   0.   0.   0.   0.28 0.24 0.   0.
 0.   0.   1.   1.   0.   0.   0.   0.   1.5  1.   0.   0.   0.   0.
 1.   1.   0.   0.   0.   0.   2.   1.  ]
Q_values [-0.45226836 -2.3109374   3.5431633  -2.2474005 ]
state [0.   0.   0.   0.   0.   0.44 0.   0.   0.   0.   0.   0.44 0.   0.
 0.   0.   0.   1.   0.   0.   0.   0.   0.   1.   0.   0.   1.   0.
 0.   1.   1.   0.   0

FatalTraCIError: Connection closed by SUMO.

In [None]:
print(NAME_SIMULATION)

simu_simple


In [7]:
type_model = "2DQN"
SIMUL_CONFIG = "/Users/pr/code/psels/RL_traffic/Traffic/Traditional_traffic/traditional_traffic.sumo.cfg"
inputs_per_agents, outputs_per_agents,positions_phases = preprocess()
agents = load_trained_agents(inputs_per_agents, outputs_per_agents, type_model)

 Retrying in 1 seconds




Step #0.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 0 ACT 0 BUF 0)                      
🔄 Loading pre-trained model for Agent 0 from models/simu_simple_2DQN_Agent0.keras...
🚀 Création d'un nouveau modèle 2DQN...


In [8]:
print(agents)

[<rl_package.rl_logic.Agent.AgentSumo object at 0x14d2f2470>]


In [10]:
scenario(agents,positions_phases)

 Retrying in 1 seconds


X Fatal error.


Could not connect to TraCI server at localhost:60662 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:60662 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:60662 [Errno 61] Connection refused
 Retrying in 1 seconds
Could not connect to TraCI server at localhost:60662 [Errno 61] Connection refused
 Retrying in 1 seconds
200
state [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Q_values [-1.8551202  -2.4557972  -0.83280563 -0.6843622 ]
200
state [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 0.  0.  0.  0.  0.  0.  1.5 2.  0.5 1.  1.  1.  2.  1.  1.  1.5 2.  0.5]
Q_values [-2.7647412 -4.1467075 -7.3522043 -4.015019 ]
200
state [0.   0.   0.01 0.   0.04 0.01 0.   0.   0.01 0.   0.05 0.02 0.   0.
 0.5  0.   1.   0.5  0.   0.   0.5  0.   1.   0.5  0.5  0.5  3.   1.
 1.   1.   2.5  0.   1.   0.5  3.5  1

X Fatal error.
