In [124]:
import gym
import numpy as np
from collections import deque

from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from keras.optimizers import Adam

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [3]:
env = gym.make('Pong-v0')

In [138]:
EPISODES = 1000
GAMMA = 0.99

EXPLORE_INIT = 1.0
EXPLORE_FINAL = 0.01
EXPLORE_FINAL_FRAME = 100000.0

MEMORY_SIZE = 50000
MEMORT_START_SIZE = 1000

BATCH_SIZE = 16
ACTION_SIZE = env.action_space.n

Remove outer game area and normalize to 0-1

In [132]:
def preprocess(image):
    image = image / 255
    image = image[34:194:3,0:160:3,0:3]
    return image

In [133]:
# env.reset()
# for i in range(0,1000):
#     action = env.action_space.sample()
#     obs, reward, done, _ = env.step(action)

# image = env.render(mode='rgb_array')
# d_image = preprocess(image)
# print(d_image.shape)

# plt.imshow(d_image)

In [134]:
def create_Q_model(learning_rate=0.001):
    model = Sequential()

    model.add(Conv2D(16, (3,3), padding='same', activation='relu', input_shape=(54, 54, 3)))
    model.add(MaxPooling2D())
    model.add(Conv2D(32, (3,3), padding='same', activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(ACTION_SIZE, activation='linear'))

    optimizer = Adam(lr=learning_rate)
    model.compile(loss='mse', optimizer=optimizer)

    return model

In [135]:
class Memory():
    def __init__(self, max_size):
        self.buffer = deque(maxlen=max_size)

    def add(self, experience):
        self.buffer.append(experience)
    
    def size(self):
        return len(self.buffer)

    def sample(self, batch_size):
        idx = np.random.choice(
            np.arange(len(self.buffer)),
            size=batch_size,
            replace=False
        )
        return [self.buffer[ii] for ii in idx]

In [136]:
model = create_Q_model()
memory = Memory(max_size = MEMORY_SIZE)

In [None]:
explore_rate = EXPLORE_INIT
frame = 0

for ep in range(0, EPISODES):
    env.reset()
    state, reward, done, _ = env.step(env.action_space.sample())
    state = preprocess(state)
    frame += 1
    
    total_reward = 0
    
    while True:
        if frame <= EXPLORE_FINAL_FRAME:
            explore_rate = EXPLORE_INIT - (EXPLORE_INIT - EXPLORE_FINAL) * frame / EXPLORE_FINAL_FRAME
        else:
            explore_rate = EXPLORE_FINAL
        
        if np.random.rand() > explore_rate:
            action = np.argmax(model.predict([state])[0])
        else:
            action = env.action_space.sample()
            
        
        next_state, reward, done, _ = env.step(env.action_space.sample())
        next_state = preprocess(next_state)
        frame += 1
        state = next_state
        
#         env.render()
        
        total_reward += reward
        
        memory.add((state, action, reward, done, next_state))
        
        if memory.size() >= MEMORT_START_SIZE:
            minibatch = memory.sample(BATCH_SIZE)
            
            inputs = np.zeros((BATCH_SIZE, 54, 54, 3))
            targets = np.zeros((BATCH_SIZE, ACTION_SIZE))
            
            for i, (state_b, action_b, reward_b, done_b, next_state_b) in enumerate(minibatch):
                inputs[i:i+1] = state_b
                next_state_b = np.expand_dims(next_state_b, axis=0)
                state_b = np.expand_dims(state_b, axis=0)
                if done_b:
                    target = reward_b
                else:
                    target = reward_b + GAMMA * np.amax(model.predict(next_state_b))
                targets[i] = model.predict(state_b)[0]
                targets[i][action_b] = target
                
            model.fit(inputs, targets, epochs=1, verbose=0)
        
        if done:
            print("Episode {}, Total Reward {}, Explore Rate {}".format(ep + 1, total_reward, explore_rate))
            break