In [1]:
import gym
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


In [2]:

env=gym.make('CartPole-v1')
states=env.observation_space.shape[0]
actions=env.action_space.n

In [3]:
states,actions,env.observation_space

(4, 2, Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32))

In [4]:
episodes=20
for episode in range(1,episodes+1):
    state=env.reset()
    done=False
    score=0

    while not done:
        env.render()
        action=random.choice([0,1])
        n_state,reward,done,info=env.step(action)
        score+=reward
    print('Episode:{} ,Score:{}'.format(episode,score))



Episode:1 ,Score:13.0
Episode:2 ,Score:9.0
Episode:3 ,Score:38.0
Episode:4 ,Score:42.0
Episode:5 ,Score:54.0
Episode:6 ,Score:16.0
Episode:7 ,Score:18.0
Episode:8 ,Score:19.0
Episode:9 ,Score:20.0
Episode:10 ,Score:13.0
Episode:11 ,Score:27.0
Episode:12 ,Score:16.0
Episode:13 ,Score:15.0
Episode:14 ,Score:19.0
Episode:15 ,Score:34.0
Episode:16 ,Score:26.0
Episode:17 ,Score:29.0
Episode:18 ,Score:39.0
Episode:19 ,Score:31.0
Episode:20 ,Score:13.0


In [5]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(actions, activation="linear"))
    return model


model = build_model(states, actions)
print(model)


<keras.engine.sequential.Sequential object at 0x0000026030B05190>


In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 4)                 0         
                                                                 
 dense (Dense)               (None, 24)                120       
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 2)                 50        
                                                                 
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [10]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory,
                   nb_actions=actions, policy=policy)
    dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
    return dqn

In [11]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

470 episodes - episode_reward: 21.243 [9.000, 124.000] - loss: 0.016 - mae: 1.536 - mean_q: 3.060

Interval 2 (10000 steps performed)
415 episodes - episode_reward: 24.104 [9.000, 115.000] - loss: 0.016 - mae: 1.980 - mean_q: 3.994

Interval 3 (20000 steps performed)
406 episodes - episode_reward: 24.635 [9.000, 124.000] - loss: 0.016 - mae: 2.398 - mean_q: 4.870

Interval 4 (30000 steps performed)
305 episodes - episode_reward: 32.790 [9.000, 139.000] - loss: 0.019 - mae: 2.823 - mean_q: 5.763

Interval 5 (40000 steps performed)
done, took 430.811 seconds


<keras.callbacks.History at 0x2606a2002d0>

In [12]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))
dqn.save_weights('dqn_weights1.h5f', overwrite=True)

_ = dqn.test(env, nb_episodes=15, visualize=True)

Testing for 100 episodes ...
Episode 1: reward: 151.000, steps: 151
Episode 2: reward: 156.000, steps: 156
Episode 3: reward: 150.000, steps: 150
Episode 4: reward: 155.000, steps: 155
Episode 5: reward: 160.000, steps: 160
Episode 6: reward: 158.000, steps: 158
Episode 7: reward: 162.000, steps: 162
Episode 8: reward: 164.000, steps: 164
Episode 9: reward: 153.000, steps: 153
Episode 10: reward: 154.000, steps: 154
Episode 11: reward: 152.000, steps: 152
Episode 12: reward: 171.000, steps: 171
Episode 13: reward: 158.000, steps: 158
Episode 14: reward: 148.000, steps: 148
Episode 15: reward: 149.000, steps: 149
Episode 16: reward: 157.000, steps: 157
Episode 17: reward: 156.000, steps: 156
Episode 18: reward: 160.000, steps: 160
Episode 19: reward: 158.000, steps: 158
Episode 20: reward: 166.000, steps: 166
Episode 21: reward: 152.000, steps: 152
Episode 22: reward: 161.000, steps: 161
Episode 23: reward: 158.000, steps: 158
Episode 24: reward: 158.000, steps: 158
Episode 25: reward: 

: 

In [None]:
env.close()