# Biblioteki i funkcje pomocnicze

In [None]:
from __future__ import division
import argparse

from PIL import Image
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Conv2D, Permute, Lambda
from keras.optimizers import Adam
import keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

# Konfiguracja gry Riverraid

![Riverraid](./images/riverraid.png)


#### OpenAI Gym
- [Riverraid-v0](https://gym.openai.com/envs/Riverraid-v0/) (wersja `Riverraid-v4` pomija 4 klatki/powtarza akcję 4 razy)

In [None]:
ENV_NAME = 'Riverraid-v4'

env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
nb_actions = env.action_space.n

# Parametry i model

In [None]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape)) # (width, height, channels)
model.add(Conv2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Conv2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

print('Input shape: {}'.format(input_shape))
print('Output shape: {}'.format(nb_actions))
# print(model.summary())

# Nauka agenta

In [None]:
processor = AtariProcessor()

memory = SequentialMemory(limit=1000000, 
                          window_length=WINDOW_LENGTH)

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                              attr='eps', 
                              value_max=1, 
                              value_min=0.1, 
                              value_test=.05,
                              nb_steps=1000000)

dqn = DQNAgent(model=model, 
               nb_actions=nb_actions, 
               policy=policy, 
               memory=memory,
               processor=processor, 
               enable_double_dqn=True,
               enable_dueling_network=False, 
               nb_steps_warmup=500, 
               gamma=.99, 
               target_model_update=100,
               train_interval=4, 
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), 
            metrics=['mae'])

weights_filename = 'weights/{}_weights.h5f'.format(ENV_NAME)
log_filename = 'logs/{}_log.json'.format(ENV_NAME)
checkpoint_weights_filename = 'weights/' + ENV_NAME + '_weights_{step}.h5f'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
callbacks += [FileLogger(log_filename, interval=100000)]

In [None]:
# dqn.fit(env, 
#         callbacks=callbacks, 
#         nb_steps=2000000, 
#         log_interval=100000, 
#         visualize=True)

# Testowanie agenta

In [None]:
dqn.load_weights(filepath='./weights/Riverraid-v4_weights.h5f')
dqn.test(env, nb_episodes=10, action_repetition=1, visualize=True)
env.close()

# Konfiguracja gry Breakout

![Breakout](./images/breakout.png)


#### OpenAI Gym
- [Breakout-v0](https://gym.openai.com/envs/Breakout-v0/) (wersja `Breakout-v4` pomija 4 klatki/powtarza akcję 4 razy)

In [None]:
ENV_NAME = 'Breakout-v4'

env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
nb_actions = env.action_space.n

# Parametry i model

In [None]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape)) # (width, height, channels)
model.add(Conv2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Conv2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

print('Input shape: {}'.format(input_shape))
print('Output shape: {}'.format(nb_actions))
# print(model.summary())

# Nauka agenta

In [None]:
processor = AtariProcessor()

memory = SequentialMemory(limit=1000000, 
                          window_length=WINDOW_LENGTH)

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                              attr='eps', 
                              value_max=1, 
                              value_min=0.1, 
                              value_test=.05,
                              nb_steps=1000000)

dqn = DQNAgent(model=model, 
               nb_actions=nb_actions, 
               policy=policy, 
               memory=memory,
               processor=processor, 
               enable_double_dqn=True,
               enable_dueling_network=False,
               nb_steps_warmup=500, 
               gamma=.99, 
               target_model_update=100,
               train_interval=4, 
               delta_clip=1.)
dqn.compile(Adam(lr=.00025), 
            metrics=['mae'])

weights_filename = 'weights/{}_weights.h5f'.format(ENV_NAME)
log_filename = 'logs/{}_log.json'.format(ENV_NAME)
checkpoint_weights_filename = 'weights/' + ENV_NAME + '_weights_{step}.h5f'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
callbacks += [FileLogger(log_filename, interval=100000)]

In [None]:
# dqn.fit(env, 
#         callbacks=callbacks, 
#         nb_steps=2000000, 
#         log_interval=100000, 
#         visualize=True)

# Testowanie agenta

In [None]:
dqn.load_weights(filepath='./weights/Breakout-v4_weights.h5f')
dqn.test(env, nb_episodes=10, action_repetition=1, visualize=True)
env.close()

# Źródło

1. [keras-rl Github](https://github.com/keras-rl/keras-rl)