In [2]:
# gym -> openAI gym package

import gym 
import random

In [2]:
# Set up the enviornment

env = gym.make('CartPole-v0')
states = env.observation_space.shape[0] # 4 states
actions = env.action_space.n # 2 actions

In [7]:
# Rendering enviornment and checking random inputs for action 

random_actions = 10
for random_action in range(1, random_actions + 1): 
    state = env.reset()
    done = False
    score = 0
    
    while not done: 
        env.render()
        action = random.choice([0,1])
        n_state, reward, done, info = env.step(action)
        score+=reward
        
    print("Action number: {} Score: {}".format(random_action, score))

env.close()

# Random action gets us till 41 max, we need to cross 200

Action number: 1 Score: 12.0
Action number: 2 Score: 14.0
Action number: 3 Score: 41.0
Action number: 4 Score: 13.0
Action number: 5 Score: 10.0
Action number: 6 Score: 28.0
Action number: 7 Score: 21.0
Action number: 8 Score: 25.0
Action number: 9 Score: 21.0
Action number: 10 Score: 37.0


In [4]:
# Deep Learning imports

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [5]:
# Building the Deep Learning Model

def build_model(states, actions):  
    model = Sequential(name= "Reinforcement Learning Base")
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(24, activation="relu"))    
    model.add(Dense(actions, activation="linear"))
    return model

In [37]:
model = build_model(states, actions)

In [38]:
model.summary()

Model: "Reinforcement Learning Base"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 4)                 0         
                                                                 
 dense_11 (Dense)            (None, 24)                120       
                                                                 
 dense_12 (Dense)            (None, 24)                600       
                                                                 
 dense_13 (Dense)            (None, 2)                 50        
                                                                 
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [6]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [7]:
# Building our agent

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [40]:
# Using a DQN model
# Training, 50k steps

dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

  super(Adam, self).__init__(name, **kwargs)


Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 10:24 - reward: 1.0000

  updates=self.state_updates,


102 episodes - episode_reward: 97.814 [9.000, 200.000] - loss: 3.873 - mae: 20.653 - mean_q: 41.814

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 197.040 [175.000, 200.000] - loss: 8.419 - mae: 42.950 - mean_q: 86.605

Interval 3 (20000 steps performed)
51 episodes - episode_reward: 197.569 [173.000, 200.000] - loss: 6.045 - mae: 43.884 - mean_q: 88.298

Interval 4 (30000 steps performed)
50 episodes - episode_reward: 198.900 [162.000, 200.000] - loss: 5.759 - mae: 41.267 - mean_q: 83.003

Interval 5 (40000 steps performed)
done, took 376.367 seconds


<keras.callbacks.History at 0x1d22fc31c10>

In [41]:
# Tesing our model on 100 different scenarios 

scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200
Episode 16: reward: 200.000, steps: 200
Episode 17: reward: 200.000, steps: 200
Episode 18: reward: 200.000, steps: 200
Episode 19: reward: 200.000, steps: 200
Episode 20: reward: 200.000, steps: 200
Episode 21: reward: 200.000, steps: 200
Episode 22: reward: 200.000, steps: 200
Episode 23: reward: 200.000, steps: 200
Episode 24: reward: 200.000, steps: 200
Episode 25: reward: 

In [42]:
# Visualize results for the model

_ = dqn.test(env, nb_episodes=15, visualize=True)
env.close()

Testing for 15 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200


In [43]:
# Save weights for future use 

dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [44]:
# Deleting key variables to test loading the model

del model
del dqn
del env

In [8]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

  super(Adam, self).__init__(name, **kwargs)


In [9]:
# Load weights

dqn.load_weights('dqn_weights.h5f')

In [10]:
# Tesing on loaded weights, and visualizing the tests

_ = dqn.test(env, nb_episodes=5, visualize=True)
env.close()

Testing for 5 episodes ...


  updates=self.state_updates,


Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
