In [10]:
!pip install keras-rl2

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[?25l[K     |██████▎                         | 10 kB 25.3 MB/s eta 0:00:01[K     |████████████▋                   | 20 kB 10.1 MB/s eta 0:00:01[K     |██████████████████▉             | 30 kB 9.1 MB/s eta 0:00:01[K     |█████████████████████████▏      | 40 kB 8.4 MB/s eta 0:00:01[K     |███████████████████████████████▍| 51 kB 5.5 MB/s eta 0:00:01[K     |████████████████████████████████| 52 kB 683 kB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5


In [1]:
import gym 
import random
import numpy as np


In [2]:
environment = gym.make('CartPole-v0')                                       #   Making CartPole Environment
environment.reset()
states = environment.observation_space.shape[0]                             #   Number of States
actions = environment.action_space.n   

In [3]:
print('States:{}  Actions:{}'.format(states,actions))

States:4  Actions:2


In [4]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [7]:
def build_model(states, actions):                       #   Deep Neural Network with 2 hidden layers 
    model = Sequential([
    Flatten(input_shape = (1, states)),             
    Dense(22, activation = 'relu'),                     #   Relu Activation function is used and number of neurons are set to 24
    Dense(22, activation = 'relu'),                     #   Increase neurons to increase reward
    Dense(actions)
])
    return model

In [17]:
model = build_model(states, actions)
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 4)                 0         
                                                                 
 dense_6 (Dense)             (None, 22)                110       
                                                                 
 dense_7 (Dense)             (None, 22)                506       
                                                                 
 dense_8 (Dense)             (None, 2)                 46        
                                                                 
Total params: 662
Trainable params: 662
Non-trainable params: 0
_________________________________________________________________


In [16]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [18]:
memory = SequentialMemory(limit=50000, window_length=1)                             #   Assigning memory limit
agent = DQNAgent(model=model, memory=memory, policy=BoltzmannQPolicy(),             
                  nb_actions=actions, nb_steps_warmup=20, target_model_update=1e-2)

In [20]:
agent.compile(Adam(learning_rate=1e-3), metrics=['mae'])                            #   If error occurs, recompile the previous three cells in order
agent.fit(environment, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 15:54 - reward: 1.0000

  updates=self.state_updates,


50 episodes - episode_reward: 197.820 [181.000, 200.000] - loss: 3.726 - mae: 39.087 - mean_q: 79.114

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 199.960 [198.000, 200.000] - loss: 3.081 - mae: 38.699 - mean_q: 78.042

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 199.700 [185.000, 200.000] - loss: 4.476 - mae: 38.821 - mean_q: 77.975

Interval 4 (30000 steps performed)
51 episodes - episode_reward: 197.373 [169.000, 200.000] - loss: 10.855 - mae: 42.339 - mean_q: 84.834

Interval 5 (40000 steps performed)
done, took 695.869 seconds


<keras.callbacks.History at 0x7fc5555e5290>

In [27]:
scores = agent.test(environment, nb_episodes=50, visualize=False)                   #   visualize=True if not in Colab
print(np.mean(scores.history['episode_reward']))

Testing for 50 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 198.000, steps: 198
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 198.000, steps: 198
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200
Episode 16: reward: 200.000, steps: 200
Episode 17: reward: 200.000, steps: 200
Episode 18: reward: 200.000, steps: 200
Episode 19: reward: 200.000, steps: 200
Episode 20: reward: 200.000, steps: 200
Episode 21: reward: 200.000, steps: 200
Episode 22: reward: 200.000, steps: 200
Episode 23: reward: 200.000, steps: 200
Episode 24: reward: 200.000, steps: 200
Episode 25: reward: 2