# Play MountainCar

In [None]:
from gym.utils import play
import gym
env = gym.make("MountainCar-v0")
play.play(env)
env.close()

# Training

In [None]:
from qlearning import *
import gym

def state_function(state):
    return (round(state[0], 1) , round(state[1], 2))
env = gym.make("MountainCar-v0")    
game = GamePlayer(env, state_function)

In [None]:
total_episodes = 4000
alpha = 0.2                # Learning rate
gamma = 0.9                 # Discounting rate
decay_rate = 0.0005        # Exponential decay rate for exploration prob
epsilon = 0.9                 # Exploration rate
#game.erase_training()
rewards = game.train(total_episodes, alpha, gamma, epsilon, decay_rate, logEvery=1000)
print("Total reward average:", sum(rewards)/len(rewards))
print(len(game.qtable))

In [None]:
action_function = lambda state: game.q_trained_action(state_function(state))
visualize_computer_playing(5, env, action_function)

## Matplotlib display

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [None]:
xmin, ymin = tuple(env.observation_space.low)
xmax, ymax = tuple(env.observation_space.high)
X = [ x for x in np.arange(xmin, xmax, 0.05)]
Y = [ y for y in np.arange(ymin, ymax, 0.005)]
colors = 'bgry'

In [None]:
action_function = lambda state: game.q_trained_action(state_function(state))
for x in X:
    for y in Y:
        plt.scatter(x, y, c=colors[action_function((x, y))], label='.')
plt.show()

# Using keras

In [None]:
import importlib
import qlearning
importlib.reload(qlearning)
import gym

In [None]:
env = gym.make("MountainCar-v0")
# Redefine rewards
import types

env.reset_backup = env.reset
env.step_backup = env.step
env.episode_step = 0

def reset(self):
    self.episode_step = 0
    return self.reset_backup()
    
def step(self, action):
    self.episode_step += 1
    state, reward, done, info = self.step_backup(action)
    if done and self.episode_step < 200:
        reward = 10+200-self.episode_step
    return state, reward, done, info

env.reset = types.MethodType(reset, env)
env.step = types.MethodType(step, env)

In [None]:
from collections import deque
import numpy as np

class DoneMemory():
    def __init__(self):
        self.mem_done = qlearning.Memory(2000)
        self.mem_not_done = qlearning.Memory(200000)
    
    def add(self, experience):
        state, action, reward, done, next_state = experience
        if done:
            self.mem_done.add(experience)
            return
        self.mem_not_done.add(experience)
    
    def sample(self, batch_size):
        len_done = min(len(self.mem_done), int(batch_size/10))
        samples = self.mem_not_done.sample(batch_size-len_done)
        samples += self.mem_done.sample(len_done)
        return samples

In [None]:
game = qlearning.GamePlayer(env)
game.memory = DoneMemory()

In [None]:
N = 200
total_episodes = 4000
game.keras_dqn_replay(N, total_episodes, alpha=0.1, gamma=0.9, layers_size=[50, 50, 50],
                      decay_rate=0.9995, epsilon=0.8, logEvery=200)

In [None]:
import numpy as np
batch = game.memory.sample(len(game.memory.mem_not_done))
for state, action, reward, done, nstate in batch:
    if done:
        prediction = game.model.predict(np.array(state).reshape(1, 2))[0]
        print(state, action, nstate, reward, prediction, game.keras_trained_action(state))
    if reward >=0:
        prediction = game.model.predict(np.array(state).reshape(1, 2))[0]
        prediction_n = game.model.predict(np.array(nstate).reshape(1, 2))[0]
        print("Wow", state, action, reward, done, prediction, prediction_n)

In [None]:
qlearning.visualize_computer_playing(5, env, game.keras_trained_action)

## Matplotlib display

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [None]:
xmin, ymin = tuple(env.observation_space.low)
xmax, ymax = tuple(env.observation_space.high)
X = [ x for x in np.arange(xmin, xmax, 0.05)]
Y = [ y for y in np.arange(ymin, ymax, 0.005)]
colors = 'bgry'

In [None]:
for x in X:
    for y in Y:
        plt.scatter(x, y, c=colors[game.keras_trained_action((x, y))], label='o')
plt.show()