In [2]:
!pip install ray

import gym
import random
import os
import ray
import numpy as np
import multiprocessing as mp
from keras.models     import Sequential
from keras.layers     import Dense
from keras.optimizers import Adam

ray.init()

@ray.remote
class FinalNetwork(object):
    def __init__(self):
        self.x = CartPole([], 0)
    
    def update_final_network(self, state, target):
        self.x.agent.brain.fit(state, target, epochs=1, verbose=0)
        
    def save_final_network(self):
        self.x.agent.brain.save(self.x.agent.weights_file)
    

class Agent():
    def __init__(self, state_size, action_size, shared_replay, final_network_actor):
        self.weights_file      = "final_network.h5"
        self.state_size         = state_size
        self.action_size        = action_size
        self.shared_replay      = list(shared_replay)
        self.learning_rate      = 0.001
        self.gamma              = 0.95
        self.exploration_rate   = 1.0
        self.exploration_min    = 0.01
        self.exploration_decay  = 0.995
        self.brain              = self._build_model()
        self.final_network_actor = final_network_actor

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

        if os.path.isfile(self.weights_file):
            model.load_weights(self.weights_file)
            self.exploration_rate = self.exploration_min
        return model

    def act(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        act_values = self.brain.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.shared_replay.append((state, action, reward, next_state, done))

    def replay(self, sample_batch_size):
        if len(self.shared_replay) < sample_batch_size:
            return
        sample_batch = random.sample(self.shared_replay, sample_batch_size)
        for state, action, reward, next_state, done in sample_batch:
            target = reward
            if not done:
              target = reward + self.gamma * np.amax(self.brain.predict(next_state)[0])
            target_f = self.brain.predict(state)
            target_f[0][action] = target
            self.brain.fit(state, target_f, epochs=1, verbose=0)
            self.final_network_actor.update_final_network.remote(state, target_f)
        if self.exploration_rate > self.exploration_min:
            self.exploration_rate *= self.exploration_decay

class CartPole:
    def __init__(self, l, f):
        self.sample_batch_size = 32
        self.episodes          = 10
        self.env               = gym.make('CartPole-v1')

        self.state_size        = self.env.observation_space.shape[0]
        self.action_size       = self.env.action_space.n
        self.agent             = Agent(self.state_size, self.action_size, l, f)
        self.f_n               = f


    def run(self):
        for index_episode in range(self.episodes):
            state = self.env.reset()
            state = np.reshape(state, [1, self.state_size])

            done = False
            index = 0
            while not done:
                # self.env.render()

                action = self.agent.act(state)

                next_state, reward, done, _ = self.env.step(action)
                next_state = np.reshape(next_state, [1, self.state_size])
                self.agent.remember(state, action, reward, next_state, done)
                state = next_state
                index += 1
            print("Episode {}# Score: {}".format(index_episode, index + 1))
            self.agent.replay(self.sample_batch_size)

def worker(l, f):
    cartpole = CartPole(l, f)
    cartpole.run()

if __name__ == "__main__":
    manager = mp.Manager()
    shared_list = manager.list()
    
    final_network = FinalNetwork.remote()
    
    processes = []
    
    for i in range(3):
        processes.append(mp.Process(target=worker, args=(shared_list, final_network)))
        
    for i in range(3):
        processes[i].start()

    for i in range(3):
        processes[i].join()
        
    print("\n\nTraining is finished.\n\n")
    
    final_network.save_final_network.remote()
    
    print("Neural network saved.\n\n")

Collecting ray
[?25l  Downloading https://files.pythonhosted.org/packages/11/14/15d0f0aec20a4674a996429160565a071688f27f49f789327ebed8188ffb/ray-1.2.0-cp37-cp37m-manylinux2014_x86_64.whl (47.5MB)
[K     |████████████████████████████████| 47.5MB 82kB/s 
[?25hCollecting gpustat
[?25l  Downloading https://files.pythonhosted.org/packages/b4/69/d8c849715171aeabd61af7da080fdc60948b5a396d2422f1f4672e43d008/gpustat-0.6.0.tar.gz (78kB)
[K     |████████████████████████████████| 81kB 7.0MB/s 
Collecting aiohttp
[?25l  Downloading https://files.pythonhosted.org/packages/88/c0/5890b4c8b04a79b7360e8fe4490feb0bb3ab179743f199f0e6220cebd568/aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 49.6MB/s 
Collecting opencensus
[?25l  Downloading https://files.pythonhosted.org/packages/e2/d6/b952f11b29c3a0cbec5620de3c4260cecd8c4329d83e91587edb48691e15/opencensus-0.7.12-py2.py3-none-any.whl (127kB)
[K     |████████████████████████████████| 

2021-04-06 00:35:09,048	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Episode 0# Score: 26
Episode 1# Score: 14
Episode 0# Score: 24
Episode 0# Score: 22
Episode 1# Score: 11
Episode 1# Score: 11
Episode 2# Score: 26


[2m[36m(pid=217)[0m 2021-04-06 00:35:12.908248: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[2m[36m(pid=217)[0m 2021-04-06 00:35:17.941343: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
[2m[36m(pid=217)[0m 2021-04-06 00:35:17.942592: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
[2m[36m(pid=217)[0m 2021-04-06 00:35:17.986482: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[2m[36m(pid=217)[0m 2021-04-06 00:35:17.986559: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (5803b9a183e1): /proc/driver/nvidia/version does not exist
[2m[36m(pid=217)[0m 2021-04-06 00:35:17.987214: I tensorflow/compiler/jit/xla_gpu_device.cc:99

Episode 2# Score: 15
Episode 2# Score: 21
Episode 3# Score: 13
Episode 3# Score: 12
Episode 3# Score: 15
Episode 4# Score: 17
Episode 4# Score: 20
Episode 4# Score: 12
Episode 5# Score: 18
Episode 6# Score: 11
Episode 5# Score: 23
Episode 5# Score: 19
Episode 7# Score: 17
Episode 6# Score: 18
Episode 6# Score: 29
Episode 7# Score: 34
Episode 8# Score: 28
Episode 7# Score: 25
Episode 9# Score: 14
Episode 8# Score: 13
Episode 8# Score: 20
Episode 9# Score: 14
Episode 9# Score: 13


Training is finished.


Neural network saved.


