In [1]:
# IMPORTS
##########################

import agent
import environment
import doubledqn
import tools
import memory
import simulation
import multiprocessing
import pandas as pd
import csv
import os

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt
import time
import itertools
from keras import optimizers 

def iter_params(**kwargs):
    keys = kwargs.keys()
    vals = kwargs.values()
    for instance in itertools.product(*vals):
        yield dict(zip(keys, instance))

Using TensorFlow backend.


Instructions for updating:
Colocations handled automatically by placer.


In [None]:
# ONE RUN
#############################################

param = { 
    
    "batch_size" : 30, # 30,50,60,[70]
    "target_update_freq" : 10000,#10000], # 10000,20000,30000,[40000]
    "gamma" : 0.99,# # 0.98,0.99,0.995,[0.999]
    "train_freq" : 1, # 2,3,4,[5]
    "max_size" : 100000,#,70000], # 20000,50000,70000,[100000]
    "max_ep_length" : 2000, # 1000,2000,3000,[4000]
    "policy" : "epsGreedy",#, "epsGreedy"],
    "eps" : 0.1,#, 0.3, 0.1],
    "delta_time" : 10,
    #"optimizer": [optimizers.RMSprop(lr= 0.001), optimizers.Adagrad(), optimizers.Adam()]
}

sumo_RL = simulation.simulator(
                    connection_label = "single_worker",
                    q_network_type = 'simple',
                    target_q_network_type = 'simple',
                    gamma = param["gamma"],
                    target_update_freq = param["target_update_freq"],
                    train_freq = param["train_freq"],
                    num_burn_in = 1000,
                    batch_size = param["batch_size"],
                    optimizer = optimizers.RMSprop(lr= 0.001),
                    loss_func = "mse",
                    max_ep_length = param["max_ep_length"],
                    experiment_id = "Testing_Folders",
                    model_checkpoint = True,
                    opt_metric = None,

                   # environment parameters
                    net_file = "cross.net.xml",
                    route_file = "cross.rou.xml",
                    demand = "nominal",
                    state_shape = (1,11),
                    num_actions = 2,
                    use_gui = False,
                    delta_time = param["delta_time"],

                   # memory parameters
                    max_size = param["max_size"],

                   # additional parameters

                    policy = param["policy"],
                    eps = param["eps"],
                    num_episodes = 2,
                    monitoring = True,
                    episode_recording = False,
                    hparams = param.keys())

sumo_RL.train()
# agent.load("./logs/First gridsearch/run_144/model_checkpoints/runFirst gridsearch_iter165000.h5")

# agent.ddqn.train(env = agent.env, num_episodes = 100, policy = agent.policy, connection_label = agent.connection_label)

evaluation_results = sumo_RL.evaluate(runs=2, use_gui= False)
evaluation_results
# data= agent.ddqn.evaluate(env = agent.env, policy = "greedy")

In [2]:
# GRIDSEARCH
###############################################################

experiment_id = "TEsting arguments_2" 


param = { 
    
    "batch_size" : [30,100], # 30,50,60,[70]
    "target_update_freq" : [10000],#10000], # 10000,20000,30000,[40000]
    "gamma" : [0.99],# # 0.98,0.99,0.995,[0.999]
    "train_freq" : [1], # 2,3,4,[5]
    "max_size" : [100000],#,70000], # 20000,50000,70000,[100000]
    "max_ep_length" : [2000], # 1000,2000,3000,[4000]
    "policy" : ["epsGreedy"],#, "epsGreedy"],
    "eps" : [0.1,0.2,0.3,0.4],#, 0.3, 0.1],
    "delta_time" : [10,15,20,30,40],
    #"optimizer": [optimizers.RMSprop(lr= 0.001), optimizers.Adagrad(), optimizers.Adam()]
}

param_grid = iter_params(**param)



def worker(input, output):
    """Runs through a chunk of the grid"""

    for position, args in iter(input.get, 'STOP'):
        result = worker_task(position, args)
        output.put(result)


def worker_task(position, args):
    """Tells the worker what to do with grid chunk"""
    # initialise all obje
    
    # print('Run', position + 1, '-- parameters', args)
    
    sumo_RL = simulation.simulator(
                    connection_label = position + 1,
                    q_network_type = 'simple',
                    target_q_network_type = 'simple',
                    gamma = args["gamma"],
                    target_update_freq = args["target_update_freq"],
                    train_freq = args["train_freq"],
                    num_burn_in = 1000,
                    batch_size = args["batch_size"],
                    optimizer = optimizers.RMSprop(lr= 0.001),
                    loss_func = "mse",
                    max_ep_length = args["max_ep_length"],
                    experiment_id = experiment_id,
                    model_checkpoint = True,
                    opt_metric = None,

                   # environment parameters
                    net_file = "cross.net.xml",
                    route_file = "cross.rou.xml",
                    demand = "nominal",
                    state_shape = (1,11),
                    num_actions = 2,
                    use_gui = False,
                    delta_time = args["delta_time"],

                   # memory parameters
                    max_size = args["max_size"],

                   # additional parameters

                    policy = args["policy"],
                    eps = args["eps"],
                    num_episodes = 2,
                    monitoring = True,
                    episode_recording = False,
                    hparams = args.keys())
    
    # print("training agent", position + 1)
    sumo_RL.train()
    # print("evaluating agent", position + 1)
    evaluation_results = sumo_RL.evaluate(runs = 5)

    return (multiprocessing.current_process().name, position + 1, args, evaluation_results["unfinished_runs"],evaluation_results["average_delay"])


def gridsearch(param_grid):
    """Runs a parallelised gridsearch"""

    number_of_processes = multiprocessing.cpu_count()

    # Set up task list
    tasks = [(idx, val) for idx, val in enumerate(param_grid)]

    # Create queues
    task_queue = multiprocessing.Queue()
    done_queue = multiprocessing.Queue()

    # Submit tasks
    for task in tasks:
        task_queue.put(task)
    # Start worker processes
    for i in range(number_of_processes):
        print('Started process #', i + 1)
        multiprocessing.Process(target = worker,
                                args = (task_queue, done_queue)).start()
      
    # Get and print results
    results = []
    for i in range(len(tasks)):
        results.append(done_queue.get())
        with open(os.path.join("./logs",exp_id,"GS_results"), "a",newline='') as file:
            writer = csv.writer(file, dialect = 'excel')
            writer.writerow(results[-1])                
        print('%s -- [RESULTS]: Run %s -- Parameters %s -- Mean duration %6.0f' % results[-1])
        
    # Tell child processes to stop
    for i in range(number_of_processes):
        task_queue.put('STOP')

    # Now combine the results
    scores = [result[-1] for result in results]
    lowest = min(scores)
    winner = results[scores.index(lowest)]
    return winner, results

multiprocessing.freeze_support()
winner, results = gridsearch(param_grid)
winner

Started process # 1
Started process # 2
Started process # 3
Started process # 4
Started process # 5
Started process # 6
Started process # 7
Started process # 8
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during 

NameError: name 'exp_id' is not defined

Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Episode finished during memory replay fill. Starting new episode...
Run 9 -- running episode 1 / 2
Run 10 -- running

In [None]:
winner

In [None]:
decay_test = []
eps_test = 0.8
for i in range(300000):
    eps_test *= omega_test ** i
    decay_test.append(eps_test)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(300000),decay_test)

In [None]:
first_results = pd.DataFrame(results).sort_values(by = 3)