In [1]:
import gym
import random
import os
import numpy as np
from multiprocessing  import Process, Queue
from queue            import Empty
from collections      import deque
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam

globalMemory = Queue()

class Agent():
    def __init__(self, state_size, action_size):
        self.weight_backup      = "cartpole_weight.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.memory             = deque(maxlen=2000)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        if os.path.isfile(self.weight_backup):
            model.load_weights(self.weight_backup)
            self.exploration_rate = self.exploration_min
        return model

    def save_model(self):
            self.brain.save(self.weight_backup)

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        globalMemory.put((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if globalMemory.qsize() < sample_batch_size:
            return
        batch_indexes = set(random.sample(range(globalMemory.qsize()), sample_batch_size))
        replayCopy = []
        while True:
            try:
                item = globalMemory.get(block=True, timeout=1)
            except Empty:
                break
            else:
                replayCopy.append(item)
        for item in replayCopy:
            globalMemory.put(item)
        sample_batch = [replayCopy[index] for index in batch_indexes]
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
              target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            target_f[0][action] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

class CartPole:
    def __init__(self):
        self.sample_batch_size = 32
        self.episodes          = 10
        self.env               = gym.make('CartPole-v1')

        self.state_size        = self.env.observation_space.shape[0]
        self.action_size       = self.env.action_space.n
        self.agent             = Agent(self.state_size, self.action_size)


    def run(self):
        try:
            for index_episode in range(self.episodes):
                state = self.env.reset()
                state = np.reshape(state, [1, self.state_size])

                done = False
                index = 0
                while not done:
                    # self.env.render()

                    action = self.agent.act(state)

                    next_state, reward, done, _ = self.env.step(action)
                    next_state = np.reshape(next_state, [1, self.state_size])
                    self.agent.remember(state, action, reward, next_state, done)
                    state = next_state
                    index += 1
                print("Episode {}# Score: {}".format(index_episode, index + 1))
                self.agent.replay(self.sample_batch_size)
        finally:
            self.agent.save_model()

def runDQN():
    cartpole = CartPole()
    cartpole.run()

if __name__ == "__main__":
    processes = []
    for n in range(3):
        p = Process(target=runDQN, args=())
        p.name = 'process' + str(n)
        processes.append(p)
        
    for p in processes:
        p.start()
    
    for p in processes:
        p.join()
        
    print("Training finished.")

Episode 0# Score: 19
Episode 1# Score: 37
Episode 0# Score: 31
Episode 1# Score: 25
Episode 2# Score: 13
Episode 3# Score: 22
Episode 0# Score: 20
Episode 4# Score: 19
Episode 1# Score: 13
Episode 5# Score: 17
Episode 2# Score: 16
Episode 6# Score: 59
Episode 3# Score: 14
Episode 4# Score: 26
Episode 5# Score: 32
Episode 7# Score: 14
Episode 6# Score: 21
Episode 8# Score: 66
Episode 7# Score: 24
Episode 8# Score: 20
Episode 9# Score: 25
Episode 9# Score: 11


Process process1:
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-1-7b3f4b6ef4da>", line 113, in runDQN
    cartpole.run()
  File "<ipython-input-1-7b3f4b6ef4da>", line 109, in run
    self.agent.save_model()
  File "<ipython-input-1-7b3f4b6ef4da>", line 40, in save_model
    self.brain.save(self.weight_backup)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py", line 2002, in save
    signatures, options, save_traces)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/saving/save.py", line 154, in save_model
    model, filepath, overwrite, include_optimizer)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/saving/hdf5_format.py", line 108, in save_model_to_hdf5
    f = h5py.File(filepat

Episode 2# Score: 13
Episode 3# Score: 14
Episode 4# Score: 19
Episode 5# Score: 57
Episode 6# Score: 14
Episode 7# Score: 16
Episode 8# Score: 18
Episode 9# Score: 20


Process process0:
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 300, in _bootstrap
    util._exit_function()
  File "/usr/lib/python3.7/multiprocessing/util.py", line 360, in _exit_function
    _run_finalizers()
  File "/usr/lib/python3.7/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/usr/lib/python3.7/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 192, in _finalize_join
    thread.join()
  File "/usr/lib/python3.7/threading.py", line 1044, in join
    self._wait_for_tstate_lock()
  File "/usr/lib/python3.7/threading.py", line 1060, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt


KeyboardInterrupt: ignored