# Import Library

In [2]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory

from keras.models import Sequential
from keras.layers import Dense
from keras.utils.vis_utils import plot_model

# Import Environment

In [52]:
ENV_NAME = 'MountainCar-v0'

In [53]:
gym.logger.set_level(40)
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Define Neural Network

In [54]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# Define Agent

In [55]:
dqn = DQNAgent(model=model, nb_actions=nb_actions,  memory=SequentialMemory(limit=50000, window_length=1),
               nb_steps_warmup=10, target_model_update=1e-3, policy=EpsGreedyQPolicy())

dqn.compile(Adam(lr=1e-3), metrics=['mse'])

# Fase di Training - Step 5.000

In [56]:
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

Training for 5000 steps ...
Interval 1 (0 steps performed)


<keras.callbacks.History at 0x1298870b8>

# Save Weight - Step 5.000

In [57]:
dqn.save_weights('dqn_{}_weights-exp-5K.h5f'.format(ENV_NAME), overwrite=True)

# Load Weight - Step 5.000

In [58]:
weights_filename = 'dqn_MountainCar-v0_weights-exp-5K.h5f'
dqn.load_weights(weights_filename)

# Testing x 100 Mean - Step 5.000

In [59]:
a = dqn.test(env, nb_episodes=100, visualize=False)

Testing for 100 episodes ...
Episode 1: reward: -200.000, steps: 200
Episode 2: reward: -200.000, steps: 200
Episode 3: reward: -200.000, steps: 200
Episode 4: reward: -200.000, steps: 200
Episode 5: reward: -200.000, steps: 200
Episode 6: reward: -200.000, steps: 200
Episode 7: reward: -200.000, steps: 200
Episode 8: reward: -200.000, steps: 200
Episode 9: reward: -200.000, steps: 200
Episode 10: reward: -200.000, steps: 200
Episode 11: reward: -200.000, steps: 200
Episode 12: reward: -200.000, steps: 200
Episode 13: reward: -200.000, steps: 200
Episode 14: reward: -200.000, steps: 200
Episode 15: reward: -200.000, steps: 200
Episode 16: reward: -200.000, steps: 200
Episode 17: reward: -200.000, steps: 200
Episode 18: reward: -200.000, steps: 200
Episode 19: reward: -200.000, steps: 200
Episode 20: reward: -200.000, steps: 200
Episode 21: reward: -200.000, steps: 200
Episode 22: reward: -200.000, steps: 200
Episode 23: reward: -200.000, steps: 200
Episode 24: reward: -200.000, steps: 

In [60]:
print('Mean', np.mean(a.history['episode_reward']))

Mean -200.0


# Load Weight - Step 100.000

In [61]:
weights_filename = 'dqn_MountainCar-v0_weights-exp-100K.h5f'
dqn.load_weights(weights_filename)

# Testing x 100 Mean - Step 100.000

In [62]:
a = dqn.test(env, nb_episodes=100, visualize=False)

Testing for 100 episodes ...
Episode 1: reward: -112.000, steps: 112
Episode 2: reward: -110.000, steps: 110
Episode 3: reward: -108.000, steps: 108
Episode 4: reward: -115.000, steps: 115
Episode 5: reward: -111.000, steps: 111
Episode 6: reward: -112.000, steps: 112
Episode 7: reward: -111.000, steps: 111
Episode 8: reward: -114.000, steps: 114
Episode 9: reward: -96.000, steps: 96
Episode 10: reward: -114.000, steps: 114
Episode 11: reward: -98.000, steps: 98
Episode 12: reward: -111.000, steps: 111
Episode 13: reward: -115.000, steps: 115
Episode 14: reward: -107.000, steps: 107
Episode 15: reward: -114.000, steps: 114
Episode 16: reward: -112.000, steps: 112
Episode 17: reward: -115.000, steps: 115
Episode 18: reward: -114.000, steps: 114
Episode 19: reward: -110.000, steps: 110
Episode 20: reward: -113.000, steps: 113
Episode 21: reward: -113.000, steps: 113
Episode 22: reward: -114.000, steps: 114
Episode 23: reward: -114.000, steps: 114
Episode 24: reward: -112.000, steps: 112


In [63]:
print('Mean', np.mean(a.history['episode_reward']))

Mean -110.96


# Visualize 5 Episode

In [64]:
dqn.test(env, nb_episodes=5, visualize=True)

Testing for 5 episodes ...
Episode 1: reward: -112.000, steps: 112
Episode 2: reward: -111.000, steps: 111
Episode 3: reward: -114.000, steps: 114
Episode 4: reward: -113.000, steps: 113
Episode 5: reward: -114.000, steps: 114


<keras.callbacks.History at 0x1296f4b00>