In [5]:
import gym
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [6]:
env = gym.make('CartPole-v1')
states = env.observation_space.shape[0]
actions = env.action_space.n

In [7]:
# episodes = 10
# for episode in range(1, episodes+1):
#     state = env.reset()
#     done = False
#     score = 0
    
#     while not done:
#         env.render()
#         action = random.choice([0,1])
#         n_state, reward, done, info = env.step(action)
#         score += reward
#     print('Episode:{} Score:{}'.format(episode, score))

# Create a Depp Learning Model with Keras

In [8]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [9]:
model = build_model(states,actions)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 4)                 0         
                                                                 
 dense (Dense)               (None, 24)                120       
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 2)                 50        
                                                                 
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [11]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [12]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

  super(Adam, self).__init__(name, **kwargs)


Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 10:50 - reward: 1.0000

  updates=self.state_updates,


92 episodes - episode_reward: 107.174 [9.000, 358.000] - loss: 2.527 - mae: 19.582 - mean_q: 39.752

Interval 2 (10000 steps performed)
36 episodes - episode_reward: 277.611 [200.000, 500.000] - loss: 4.290 - mae: 43.045 - mean_q: 87.135

Interval 3 (20000 steps performed)
35 episodes - episode_reward: 289.829 [216.000, 477.000] - loss: 3.342 - mae: 48.567 - mean_q: 97.884

Interval 4 (30000 steps performed)
34 episodes - episode_reward: 286.235 [214.000, 422.000] - loss: 2.244 - mae: 47.976 - mean_q: 96.582

Interval 5 (40000 steps performed)
done, took 303.417 seconds


<keras.callbacks.History at 0x17f6010b040>

In [13]:
scores = dqn.test(env, nb_episodes=100, visualize = False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 363.000, steps: 363
Episode 2: reward: 470.000, steps: 470
Episode 3: reward: 374.000, steps: 374
Episode 4: reward: 431.000, steps: 431
Episode 5: reward: 381.000, steps: 381
Episode 6: reward: 288.000, steps: 288
Episode 7: reward: 420.000, steps: 420
Episode 8: reward: 363.000, steps: 363
Episode 9: reward: 394.000, steps: 394
Episode 10: reward: 325.000, steps: 325
Episode 11: reward: 350.000, steps: 350
Episode 12: reward: 430.000, steps: 430
Episode 13: reward: 278.000, steps: 278
Episode 14: reward: 282.000, steps: 282
Episode 15: reward: 408.000, steps: 408
Episode 16: reward: 296.000, steps: 296
Episode 17: reward: 366.000, steps: 366
Episode 18: reward: 382.000, steps: 382
Episode 19: reward: 361.000, steps: 361
Episode 20: reward: 300.000, steps: 300
Episode 21: reward: 361.000, steps: 361
Episode 22: reward: 446.000, steps: 446
Episode 23: reward: 326.000, steps: 326
Episode 24: reward: 312.000, steps: 312
Episode 25: reward: 

In [14]:
#_ = dqn.test(env, nb_episodes =5, visualize = True)

# Reloading Agent from Memory

In [15]:
dqn.save_weights('dqn_weight.h4f', overwrite = True)

In [16]:
del model
del dqn
del env

In [17]:
env = gym.make('CartPole-v1')
states = env.observation_space.shape[0]
actions = env.action_space.n
model = build_model(states,actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [19]:
!pwd

C:\Users\Seedevice\SanghyunRyu\RL_tutorial


In [20]:
dqn.load_weights('dqn_weight')

In [21]:
_ = dqn.test(env, nb_episodes =5, visualize = True)

Testing for 5 episodes ...
Episode 1: reward: 488.000, steps: 488
Episode 2: reward: 354.000, steps: 354
Episode 3: reward: 361.000, steps: 361
Episode 4: reward: 356.000, steps: 356
Episode 5: reward: 245.000, steps: 245
