In [1]:
# IMPORTS
##########################

import agent
import environment
import doubledqn
import tools
import memory
import simulation
import multiprocessing

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt
import time
import itertools


Using TensorFlow backend.


In [2]:
# first_simulation = simulation.simulator()

In [3]:
# first_simulation.evaluate()

In [4]:
# define the grid search parameters
batch_size = [50]
target_update_frequency = [50, 100]
gamma = [0.7, 0.9]
eps = [0.1, 0.2]

param_grid = itertools.product(batch_size, target_update_frequency, gamma, eps)

In [5]:
def worker(input, output):
    """Runs through a chunk of the grid"""

    for position, args in iter(input.get, 'STOP'):
        result = worker_task(position, args)
        output.put(result)


def worker_task(position, args):
    """Tells the worker what to do with grid chunk"""
    # initialise all objects
    agent = simulation.simulator(connection_label = position,
                                batch_size = args[0],
                                target_update_freq = args[1],
                                gamma = args[2],
                                eps = args[3])
    print(' training grid position', position + 1, 'with parameters', args)

    result = agent.evaluate(cv = 5)

    return (multiprocessing.current_process().name, position + 1, args, result)


def gridsearch(param_grid):
    """Runs a parallelised gridsearch"""

    number_of_processes = multiprocessing.cpu_count()

    # Set up task list
    tasks = [(idx, val) for idx, val in enumerate(param_grid)]

    # Create queues
    task_queue = multiprocessing.Queue()
    done_queue = multiprocessing.Queue()

    # Submit tasks
    for task in tasks:
        task_queue.put(task)

    # Start worker processes
    for i in range(number_of_processes):
        # print('Started process #', i + 1)
        multiprocessing.Process(target = worker,
                                args = (task_queue, done_queue)).start()

    # Get and print results
    results = []
    for i in range(len(tasks)):
        results.append(done_queue.get())
        print('%s working on task %s with parameters %s gives result %6.0f' % results[-1])
        
    # Tell child processes to stop
    for i in range(number_of_processes):
        task_queue.put('STOP')

    # Now combine the results
    scores = [result[-1] for result in results]
    lowest = min(scores)
    winner = results[scores.index(lowest)]
    return winner, results

if __name__ == '__main__':
    multiprocessing.freeze_support()
    winner, results = gridsearch(param_grid)

Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
Filling experience replay memory...
...done filling replay memory
...done filling replay memory
...done filling replay memory
...done filling replay memory
...done filling replay memory
...done filling replay memory
...done filling replay memory
...done filling replay memory
Running episode 2 / 2 training grid position 8 with parameters (50, 100, 0.9, 0.2)
Running episode 2 / 2 training grid position 6 with parameters (50, 100, 0.7, 0.2)
Running episode 2 / 2 training grid position 2 with parameters (50, 50, 0.7, 0.2)
Running episode 2 / 2 training grid position 4 with parameters (50, 50, 0.9, 0.2)
Running episode 2 / 2 training grid position 1 with parameters (50, 50, 0.7, 0.1)
Running episode 2 / 2 training grid position 3 with paramete

In [6]:
results

[('Process-8', 8, (50, 100, 0.9, 0.2), 3155.84),
 ('Process-6', 6, (50, 100, 0.7, 0.2), 3182.67956043956),
 ('Process-2', 2, (50, 50, 0.7, 0.2), 3158.317802197802),
 ('Process-4', 4, (50, 50, 0.9, 0.2), 3208.610989010989),
 ('Process-1', 1, (50, 50, 0.7, 0.1), 4090.5235164835162),
 ('Process-3', 3, (50, 50, 0.9, 0.1), 5580.743296703296),
 ('Process-5', 5, (50, 100, 0.7, 0.1), 5792.809670329671),
 ('Process-7', 7, (50, 100, 0.9, 0.1), 5654.965714285714)]

In [7]:
winner

('Process-8', 8, (50, 100, 0.9, 0.2), 3155.84)