In [None]:
# IMPORTS
##########################

%load_ext autoreload
%autoreload 2

import agent
import environment
import doubledqn
import tools
import memory
import simulation
import multiprocessing
import pandas as pd
import os
import json

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt
import time
import itertools
from keras import optimizers 

def iter_params(**kwargs):
    keys = kwargs.keys()
    vals = kwargs.values()
    for instance in itertools.product(*vals):
        yield dict(zip(keys, instance))

In [None]:
# ONE RUN
#############################################

param = { 
    
    "batch_size" : 30, # 30,50,60,[70]
    "target_update_freq" : 5000,#10000], # 10000,20000,30000,[40000]
    "gamma" : 0.99,# # 0.98,0.99,0.995,[0.999]
    "train_freq" : 1, # 2,3,4,[5]
    "max_size" : 100000,#,70000], # 20000,50000,70000,[100000]
    "max_ep_length" : 2000, # 1000,2000,3000,[4000]
    "policy" : "linDecEpsGreedy",#, "epsGreedy"],
    "eps" : 0.2,#, 0.3, 0.1],
    "delta_time" : 10,
    #"optimizer": [optimizers.RMSprop(lr= 0.001), optimizers.Adagrad(), optimizers.Adam()]
}

sumo_RL = simulation.simulator(
                    connection_label = "single_worker",
                    q_network_type = 'simple',
                    target_q_network_type = 'simple',
                    gamma = param["gamma"],
                    target_update_freq = param["target_update_freq"],
                    train_freq = param["train_freq"],
                    num_burn_in = 200,
                    batch_size = param["batch_size"],
                    optimizer = 'adam',
                    loss_func = "mse",
                    max_ep_length = param["max_ep_length"],
                    experiment_id = "Check_output",
                    model_checkpoint = True,
                    opt_metric = None,

                   # environment parameters
                    network = "simple", # complex
                    net_file = "simple_cross.net.xml", # "complex_cross.net.xml",
                    route_file = "cross.rou.xml",
                    demand = "rush",
                    state_shape = (1,15), #(1,11) or (1,29)
                    num_actions = 2, # 2 or 4
                    use_gui = False,
                    delta_time = param["delta_time"],

                   # memory parameters
                    max_size = param["max_size"],

                   # additional parameters

                    policy = param["policy"],
                    eps = param["eps"],
                    num_episodes = 30,
                    monitoring = True,
                    episode_recording = False,
                    hparams = param.keys())

sumo_RL.train()
# agent.load("./logs/First gridsearch/run_144/model_checkpoints/runFirst gridsearch_iter165000.h5")

# agent.ddqn.train(env = agent.env, num_episodes = 100, policy = agent.policy, connection_label = agent.connection_label)

#evaluation_results = agent.evaluate(runs=2, use_gui= False)
#evaluation_results
# data= agent.ddqn.evaluate(env = agent.env, policy = "greedy")

In [None]:
from ann_visualizer.visualize import ann_viz;
#Build your model here
ann_viz(sumo_RL.ddqn.q_network,filename="../network", title = " Q network")

In [None]:
# GRIDSEARCH
###############################################################

experiment_id = "Balanced_Negative_Policy" 


param = { 
    
    "batch_size" : [30], #,50],#,50],# 30,50,60,[70]
    "target_update_freq" : [5000,10000],#,40000],# 100000],  #10000], # 10000,20000,30000,[40000]
    "gamma" : [0.99],#,0.95],# # 0.98,0.99,0.995,[0.999]
    "train_freq" : [1],#,4], # 2,3,4,[5]
    "max_size" : [10000],#,70000], # 20000,50000,70000,[100000]
    "max_ep_length" : [1000],
    "policy" : ["epsGreedy","linDecEpsGreedy"],#, "epsGreedy"],
    "eps" : [0.2, 0.1, 0.05],
    "delta_time" : [10],#,10,10,10],#,10,10,10,10,10,10,10],
    "reward" : ["balanced","negative"]
    #"optimizer": [optimizers.RMSprop(lr= 0.001), optimizers.Adagrad(), optimizers.Adam()]
}

param_grid = iter_params(**param)



def worker(input, output):
    """Runs through a chunk of the grid"""

    for position, args in iter(input.get, 'STOP'):
        result = worker_task(position, args)
        output.put(result)


def worker_task(position, args):
    """Tells the worker what to do with grid chunk"""
    # initialise all obje
    
    # print('Run', position + 1, '-- parameters', args)
    
    sumo_RL = simulation.simulator(
                    connection_label = position + 1,
                    q_network_type = 'simple',
                    target_q_network_type = 'simple',
                    gamma = args["gamma"],
                    target_update_freq = args["target_update_freq"],
                    train_freq = args["train_freq"],
                    num_burn_in = 1000,
                    batch_size = args["batch_size"],
                    optimizer = 'adam',
                    loss_func = "mse",
                    max_ep_length = args["max_ep_length"],
                    experiment_id = experiment_id,
                    model_checkpoint = True,
                    opt_metric = None,

                   # environment parameters
                    network = "simple", # complex
                    net_file = "simple_cross.net.xml", # "complex_cross.net.xml",
                    route_file = "cross.rou.xml",
                    demand = "rush",
                    state_shape = (1,15),#(1,41)
                    num_actions = 2, #4 
                    use_gui = False,
                    delta_time = args["delta_time"],
                    reward = args["reward"],

                   # memory parameters
                    max_size = args["max_size"],

                   # additional parameters

                    policy = args["policy"],
                    eps = args["eps"],
                    num_episodes = 10,
                    monitoring = True,
                    episode_recording = False,
                    eval_fixed = True,
                    hparams = args.keys())
    
    
    # print("training agent", position + 1)
    train_data = sumo_RL.train()
    # print("evaluating agent", position + 1)
    evaluation_results = sumo_RL.evaluate(runs = 5)

    return ({"run" : position + 1,
             "args" : args, 
             "eval_delay" : evaluation_results, 
             "eval_mean_delay" : evaluation_results["average_delay"],
             "train_data": train_data})


def gridsearch(param_grid):
    """Runs a parallelised gridsearch"""

    number_of_processes = multiprocessing.cpu_count()

    # Set up task list
    tasks = [(idx, val) for idx, val in enumerate(param_grid)]

    # Create queues
    task_queue = multiprocessing.Queue()
    done_queue = multiprocessing.Queue()

    # Submit tasks
    for task in tasks:
        task_queue.put(task)
    # Start worker processes
    for i in range(number_of_processes):
        print('Started process #', i + 1)
        multiprocessing.Process(target = worker,
                                args = (task_queue, done_queue)).start()
                    
    with open(os.path.join("./logs",experiment_id,"GS_results.json"), "a") as file:
            file.write('{ "results": [')
      
    # Get and print results
    results = []
    for i in range(len(tasks)):
        results.append(done_queue.get())
        
        with open(os.path.join("./logs",experiment_id,"GS_results.json"), "a") as file:
            json.dump(results[-1], file , indent=4) 
            if i != len(tasks)-1:
                file.write(",\n")
            
    with open(os.path.join("./logs",experiment_id,"GS_results.json"), "a") as file:
        file.write("]}")
                  
        #print('%s -- [RESULTS]: Run %s -- Parameters %s -- Mean duration %6.0f' % results[-1])
        
    # Tell child processes to stop
    for i in range(number_of_processes):
        task_queue.put('STOP')

    # Now combine the results
#     scores = [result[-1] for result in results]
#     lowest = min(scores)
#     winner = results[scores.index(lowest)]
#    return winner, results

multiprocessing.freeze_support()
gridsearch(param_grid)

In [None]:
curr_eps = 
total_it = 100
for itr in range(100)
    if itr < total_it:
        curr_eps = (curr_eps - init_eps) / total_it * itr + init_eps
    else :
        curr_eps = init_eps

In [None]:
decay_test = []
eps_test = 0.8
for i in range(300000):
    eps_test *= omega_test ** i
    decay_test.append(eps_test)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.arange(300000),decay_test)

In [None]:
first_results = pd.DataFrame(results).sort_values(by = 3)

In [None]:
# Get max evaluate results

res = {}
with open('./logs/Test_evaluate/GS_results.json') as file:
    data = json.load(file)
    for run in data['results']:
        run_no = run["run"]
        res[f"{run_no}"] = run["eval_mean_delay"]
        
res

In [None]:
pd.read_json('./logs/Test_evaluate/GS_results.json',)