In [1]:
# IMPORTS
##########################

import agent
import environment
import doubledqn
import tools
import memory

import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import time


# MAIN
##################################

num_actions = 2
state_shape = (1,11) # State var in rows
memory_size = 100000
gamma = 0.8
target_update_frequency = 100
num_init_samples_mem = 1000
batch_size = 50
max_episode_length = 100000
optimizer = 'adam'
loss = "mse"
eps = 0.2
env_name = "Simple_Cross"
experiment_id = "Reward_Waiting_Time"
monitoring = True # Store variables for TensorBoard monitoring and model_checkpoints

# Define logs directory if monitoring enabled
if monitoring:
    output_dir = tools.get_output_folder("./Logs",experiment_id)
    summary_writer = tf.summary.FileWriter(logdir=output_dir)
else:
    output_dir = None
    summary_writer = None

# Initialize Q-networks (value and target)
q_network = agent.get_model('simple',(state_shape[1],),num_actions)
target_q_network = agent.get_model('simple',(state_shape[1],),num_actions)

# Initialize environment
sumo_env =  environment.Env(    "cross.net.xml",
                                "cross.rou.xml",
                                state_shape,
                                num_actions,
                                use_gui=False
                           )

# Initialize replay memory
mem = memory.ReplayMemory(    memory_size,
                                 state_shape,
                                 num_actions
                             )

# Initialize Double DQN algorithm
ddqn = doubledqn.DoubleDQN(     q_network,
                                target_q_network,
                                mem,
                                gamma,
                                target_update_frequency,
                                num_init_samples_mem,
                                batch_size,
                                optimizer,
                                loss,
                                max_episode_length,
                                sumo_env,
                                output_dir,
                                experiment_id,
                                summary_writer
                            )

# Fill Replay Memory
ddqn.fill_replay(sumo_env)

# Train
_ = ddqn.train(  sumo_env, 1, "epsGreedy", eps=eps)

Instructions for updating:
Colocations handled automatically by placer.


Using TensorFlow backend.


Filling experience replay memory...
...Done
Instructions for updating:
Use tf.cast instead.

Current reward mean+std: 14.0 0.0


In [2]:
tools.generate_routefile()
sumo_env =  environment.Env(    "cross.net.xml",
                                "cross.rou.xml",
                                state_shape,
                                num_actions,
                                use_gui=True
                           )
data=ddqn.evaluate(sumo_env,"greedy")

In [18]:
import pandas as pd
pd.DataFrame(data)

Unnamed: 0,action,it,next_state,q_values,reward,state
0,1,1,"[[0.010146103896103896, 0.020292207792207792, ...","[[-8.057847, -5.194837]]",-0.0,"[[0.010146103896103896, 0.040584415584415584, ..."
1,1,2,"[[0.010146103896103896, 0.010146103896103896, ...","[[-10.506003, -0.0733846]]",-0.0,"[[0.010146103896103896, 0.020292207792207792, ..."
2,1,3,"[[0.010146103896103896, 0.010146103896103896, ...","[[-12.426409, -0.36252594]]",-0.0,"[[0.010146103896103896, 0.010146103896103896, ..."
3,1,4,"[[0.010146103896103896, 0.020292207792207792, ...","[[-11.726765, -2.9848783]]",-0.0,"[[0.010146103896103896, 0.010146103896103896, ..."
4,0,5,"[[0.0016038573966701188, 0.020292207792207792,...","[[-6.6298327, -7.932057]]",-6.0,"[[0.010146103896103896, 0.020292207792207792, ..."
5,1,6,"[[0.010146103896103896, 0.020292207792207792, ...","[[-11.901776, 3.529679]]",6.0,"[[0.0016038573966701188, 0.020292207792207792,..."
6,1,7,"[[0.020292207792207792, 0.010146103896103896, ...","[[-8.805028, -1.2283254]]",-0.0,"[[0.010146103896103896, 0.020292207792207792, ..."
7,1,8,"[[0.020292207792207792, 0.0022885455506515087,...","[[-8.848594, -3.6441333]]",-0.0,"[[0.020292207792207792, 0.010146103896103896, ..."
8,1,9,"[[0.020292207792207792, 0.020292207792207792, ...","[[-10.154388, -1.2444631]]",-0.0,"[[0.020292207792207792, 0.0022885455506515087,..."
9,1,10,"[[0.020292207792207792, 0.020292207792207792, ...","[[-14.500717, 2.5590017]]",-3.0,"[[0.020292207792207792, 0.020292207792207792, ..."


In [15]:
pd.DataFrame(data).state[22]

array([[1.01461039e-02, 2.02922078e-02, 1.01461039e-02, 2.02922078e-02,
        4.69677030e+00, 1.32240679e+01, 9.25382090e+00, 1.63081880e+01,
        2.00000000e+00, 0.00000000e+00, 1.40000000e+01]])