In [1]:
# IMPORTS
##########################

%load_ext autoreload
%autoreload 2

import agent
import environment
import doubledqn
import tools
import memory
import simulation
import multiprocessing
import pandas as pd
import os
import json

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt
import time
import itertools
from keras import optimizers 

def iter_params(**kwargs):
    keys = kwargs.keys()
    vals = kwargs.values()
    for instance in itertools.product(*vals):
        yield dict(zip(keys, instance))
        
import re
import glob

Using TensorFlow backend.


In [2]:
def get_grid_search_results(path):
    
    path = os.path.join(path,'GS_results.json')
    
    gs_results= {
        "run_id" : [],
        "unfinished_runs" : [],
        "RL_mean_delay" : [],
        "fixed_mean_delay" : [],
        "reward" : [],
        "policy" : [],
        "eps" : [],
        "update_freq" : []
        
    }

    with open(path) as file: 
        data = json.load(file) 
        
        for run in data['results']:
            
            gs_results["run_id"].append(run["run"])
            gs_results["unfinished_runs"].append(run["eval_delay"]["unfinished_runs"])
            gs_results["RL_mean_delay"].append(run["eval_delay"]["average_delay"])
            gs_results["fixed_mean_delay"].append(np.mean(run["eval_delay"]["episode_mean_delays_fixed"]))
            gs_results["reward"].append(run["args"]["reward"])
            gs_results["policy"].append(run["args"]["policy"])
            gs_results["eps"].append(run["args"]["eps"])
            gs_results["update_freq"].append(run["args"]["target_update_freq"])
    return pd.DataFrame(gs_results)

In [3]:
path = "./logs/Complex Balanced_Negative_Policy/"
res = get_grid_search_results(path)
res.sort_values("RL_mean_delay")

Unnamed: 0,run_id,unfinished_runs,RL_mean_delay,fixed_mean_delay,reward,policy,eps,update_freq
1,5,4,189.42062,470.290687,balanced,epsGreedy,0.05,5000
16,17,0,202.35015,425.52145,balanced,epsGreedy,0.05,10000
11,9,1,206.613914,455.903349,balanced,linDecEpsGreedy,0.1,5000
6,7,0,209.90637,430.923903,balanced,linDecEpsGreedy,0.2,5000
18,21,1,210.409072,448.069406,balanced,linDecEpsGreedy,0.1,10000
9,15,0,225.45007,441.90798,balanced,epsGreedy,0.1,10000
0,3,2,245.248098,457.63849,balanced,epsGreedy,0.1,5000
17,18,0,254.671569,461.606941,negative,epsGreedy,0.05,10000
12,14,0,269.231201,440.958992,negative,epsGreedy,0.2,10000
15,16,0,269.33238,460.8337,negative,epsGreedy,0.1,10000


We choose run 17! Run 5 seems to be too instable in tensorboard

In [13]:
sumo_RL = simulation.simulator()
sumo_RL.load("./logs/Complex Balanced_Negative_Policy/run_17/model_checkpoints/runComplex Balanced_Negative_Policy_iter90000.h5")
sumo_RL.evaluate(runs=10)

Evaluate 17 -- running episode 1 / 10
Evaluate 17 -- running episode 2 / 10
Evaluate 17 -- running episode 3 / 10
Evaluate 17 -- running episode 4 / 10
Evaluate 17 -- running episode 5 / 10
Evaluate 17 -- running episode 6 / 10
Evaluate 17 -- running episode 7 / 10
Evaluate 17 -- running episode 8 / 10
Evaluate 17 -- running episode 9 / 10
Evaluate 17 -- running episode 10 / 10


{'runs': 10,
 'unfinished_runs': 0,
 'average_delay': 200.02846573303572,
 'episode_mean_delays': [265.60053499777086,
  178.96005706134093,
  214.82247992863515,
  160.03290083410565,
  213.92980009298,
  173.60754189944134,
  225.22365393465256,
  203.22432560340746,
  193.43245735361918,
  171.4509056244042],
 'episode_mean_delays_fixed': [353.1702317290553,
  304.5482185273159,
  309.10383244206776,
  289.48425925925926,
  296.6177426846261,
  313.48744186046514,
  303.7861085556578,
  310.7503546099291,
  327.5251036388761,
  293.2266666666667]}

In [None]:
# SELECTED RUN SIMPLE
sumo_RL = simulation.simulator()
sumo_RL.load("./logs/Simple_run_4_reproduce/run_6/model_checkpoints/runSimple_run_4_reproduce_iter80000.h5")
sumo_RL.evaluate(runs=1, use_gui= True)

In [None]:
# SELECTED RUN COMPLEX
sumo_RL = simulation.simulator()
sumo_RL.load("./logs/Complex_run_2_reproduce/run_4/model_checkpoints/runComplex_run_2_reproduce_iter90000.h5")
sumo_RL.evaluate(runs=1, use_gui= True)

In [None]:
# FOR EVALUATION OF OTHER DEMAND SET UP
path = "../Experiment_results/Simple_run_4_reproduce"
runs = re.findall(r'run_\d+',' '.join(os.listdir(path)))

demand = "test"

with open(os.path.join(path,demand+"_eval.json"), "a") as file:
    file.write('{ "results": [')

sumo_RL = simulation.simulator()
for i,run in enumerate(runs[0]):
    model_folder = os.path.join(path,run) + '/*/*.h5'
    model_path = max(glob.iglob(model_folder), key=os.path.getmtime)
    sumo_RL.load(model_path)
    results = sumo_RL.evaluate(runs=1)
    
    with open(os.path.join(path,demand+"_eval.json"), "a") as file:
        json.dump(results, file , indent=4) 
        if i != len(runs)-1:
            file.write(",\n")
            
with open(os.path.join(path,demand+"_eval.json"), "a") as file:
    file.write("]}")
    


In [None]:
º

In [None]:
path = "./logs/Simple Balanced_Negative_Policy/"
get_grid_search_results(path)