In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from state import get_state, queue
import os
import traci

In [4]:
from dotenv import load_dotenv
load_dotenv()


sumo_bin = os.getenv("SUMO")
sumo_gui_bin = os.getenv("SUMO-GUI")
simulConfig = os.getenv("SIMUL-CONFIG")

In [5]:
import tensorflow as tf
import numpy as np
from collections import deque

In [17]:
tf.random.set_seed(42)  # extra code – ensures reproducibility on the CPU

input_shape = [48]  # == env.observation_space.shape
n_outputs = 4  # == env.action_space.n
#[((Phase(duration=30.0, state='GGrGrrGGrGrr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grGgrrgrGgrr', minDur=20.0, maxDur=20.0), Phase(duration=30.0, state='GrrGGrGrrGGr', minDur=30.0, maxDur=30.0), Phase(duration=20.0, state='grrgrGgrrgrG', minDur=20.0, maxDur=20.0)), [0, 2, 4, 6])]
model_action = tf.keras.Sequential([
    tf.keras.layers.Dense(64,activation='relu', input_shape=input_shape),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(n_outputs, activation= 'linear')])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
target = tf.keras.models.clone_model(model_action)  # clone the model's architecture
target.set_weights(model_action.get_weights())  # copy the weights

In [19]:
def epsilon_greedy_policy(state, epsilon=0):
    if np.random.rand() < epsilon:
        return np.random.randint(n_outputs)  # random action
    else:
        Q_values = model_action.predict(state[np.newaxis], verbose=0)[0]
        print(f"state : {state}")
        print(f"Q_values : {Q_values}")
        return Q_values.argmax()  # optimal action according to the DQN

In [20]:
def sample_experiences(batch_size):
    indices = np.random.randint(len(replay_buffer), size=batch_size)
    batch = [replay_buffer[index] for index in indices]
    states, actions, rewards, next_states = [
        np.array([experience[field_index] for experience in batch])
        for field_index in range(4)
    ]
    return states, actions, rewards, next_states

In [21]:
batch_size = 32
discount_factor = 0.5
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
loss_fn = tf.keras.losses.MeanSquaredError()


def training_step(batch_size):
    experiences = sample_experiences(batch_size)
    states, actions, rewards, next_states = experiences  # a changer
        #################### CHANGED SECTION ####################
    next_Q_values = model_action.predict(next_states, verbose=0)  # ≠ target.predict()
    best_next_actions = next_Q_values.argmax(axis=1)
    next_mask = tf.one_hot(best_next_actions, n_outputs).numpy()
    max_next_Q_values = (target.predict(next_states, verbose=0) * next_mask
                        ).sum(axis=1)
    #########################################################
    next_Q_values = target.predict(next_states, verbose=0)
    max_next_Q_values = next_Q_values.max(axis=1)
    # runs = 1.0 - (dones | truncateds)  # episode is not done or truncated
    target_Q_values = rewards + discount_factor * max_next_Q_values
    target_Q_values = target_Q_values.reshape(-1, 1)
    mask = tf.one_hot(actions, n_outputs)
    with tf.GradientTape() as tape:
        all_Q_values = model_action(states)
        Q_values = tf.reduce_sum(all_Q_values * mask, axis=1, keepdims=True)
        loss = tf.reduce_mean(loss_fn(target_Q_values, Q_values))

    grads = tape.gradient(loss, model_action.trainable_variables)
    optimizer.apply_gradients(zip(grads, model_action.trainable_variables))

In [None]:
sumoConfig3 = r"Traditional_traffic/traditional_traffic.sumo.cfg"

replay_buffer = deque(maxlen=10000)


sumoCmd = [sumo_bin, "-c",simulConfig,'--start']
print(sumoCmd)
for episode in range(80):
    if traci.isLoaded():
        traci.close()
    traci.start(sumoCmd)
    lane_ids =  traci.lane.getIDList()
    # print(lane_ids[0])

    trafic_light_ids = traci.trafficlight.getIDList()

    # state = np.array(queue(lane_ids))
    state=np.array(get_state(lane_ids))
    action=-1
    # print(state)
    for step in range(130000): ## TO CHANGED
        epsilon = max(1 - episode / 80, 0.01)

        if step%2000 == 0:
            #######################################################################
            next_state = np.array(get_state(lane_ids))
            reward = np.sum(state[:24])- np.sum(next_state[:24])
            replay_buffer.append((state, action, reward, next_state))
            #######################################################################

            state=next_state
            action = epsilon_greedy_policy(state, epsilon)
            traci.trafficlight.setPhase(trafic_light_ids[0],2*action)

            if len(replay_buffer) >= batch_size*10:
                training_step(batch_size)

        traci.simulationStep()

    if episode%5==0:
        target.set_weights(model_action.get_weights())
    print(f'episode : {episode}')
    traci.close()


['/Users/arseneclaustre/sumo/bin/sumo', '-c', 'Traditional_traffic/traditional_traffic.sumo.cfg', '--start']
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1334616ms, vehicles TOT 0 ACT 0 BUF 0)                
 Retrying in 1 seconds




Step #100.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 84 ACT 51 BUF 0)                   



Step #141.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 108 ACT 44 BUF 0)                  



Step #180.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 148 ACT 67 BUF 8)                  



Step #220.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 176 ACT 55 BUF 4)                  



Step #240.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 189 ACT 53 BUF 3)                  



Step #300.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 243 ACT 64 BUF 3)                  



Step #320.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 252 ACT 64 BUF 0)                  



Step #363.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 284 ACT 59 BUF 4)                  



Step #440.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 344 ACT 71 BUF 4)                  



Step #660.00 (1ms ~= 10.00*RT, ~98000.00UPS, TraCI: 1ms, vehicles TOT 517 ACT 98 BUF 5)    #621.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 492 ACT 98 BUF 0)                  



Step #843.00 (0ms ?*RT. ?UPS, TraCI: 0ms, vehicles TOT 650 ACT 68 BUF 4)                  



In [None]:
sumoCmd = [sumo_gui_bin, "-c",simulConfig,"--start", "--no-warnings"]

if traci.isLoaded():
    traci.close()
traci.start(sumoCmd)
lane_ids =  traci.lane.getIDList()
trafic_light_ids = traci.trafficlight.getIDList()

state = np.array(get_state(lane_ids))
action=1
# print(state)
alpha=0.5
wating_times= np.ones(shape=(24))

for step in range(130000): ## TO CHANGED
    #epsilon = max(1 - episode / 10, 0.01)
    nom_du_feu= traci.trafficlight.getIDList()[0]

    if step%2000 == 0:
        next_state = np.array(get_state(lane_ids))
        next_state[:24] = next_state[:24]*(1+alpha)**wating_times
        for i in range(24):
            if next_state[i] >= state[i] and state[i]!=0:
                wating_times[i]+=1
            else:
                wating_times[i]=1

        next_state[:24] = next_state[:24]*(1+alpha)**wating_times
        state= next_state
        print("etat",state)
        action = epsilon_greedy_policy(state,0)*2
        # action = np.random.randint(8)
        # print("action", action)
        # print(traci.trafficlight.getAllProgramLogics(nom_du_feu))
        # print(traci.trafficlight.getAllProgramLogics(nom_du_feu)[0].phases[action])
        traci.trafficlight.setPhase(trafic_light_ids[0],action)
    traci.simulationStep()

traci.close()


BrokenPipeError: [Errno 32] Broken pipe