In [1]:
from retro_contest.local import make
from agent import NeuralAgent
import tensorflow as tf
import numpy as np
import sys
from functools import reduce

In [7]:
def step(x, thresh=0.5):
    return (np.sign(x - thresh) + 1)/2

In [11]:
num_episodes = 10
learning_rate = 1e-6
num_hidden = 32
memory_size = 10000
batch_size = 32
tau = 1e-3
explore_mu = 0
explore_theta = 0.15
explore_sigma = 1e-1
memory_a = 0.3
memory_eps = 1e-2
reset_n = 50

tf.reset_default_graph()
tf.logging.set_verbosity(tf.logging.WARN)

env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')

state_size = reduce(lambda x,y: x*y, env.observation_space.shape, 1)
action_size = env.action_space.n
agent = NeuralAgent(state_size=state_size,
                    action_size=action_size,
                    learning_rate=learning_rate,
                    num_hidden=num_hidden,
                    memory_size=memory_size,
                    batch_size=batch_size,
                    exploration_mu=explore_mu,
                    exploration_theta=explore_theta,
                    exploration_sigma=explore_sigma,
                    memory_a=memory_a,
                    memory_eps=memory_eps,
                    tau=tau
                   )

rewards = [list() for i in range(num_episodes)]
total_rewards = []
saver = tf.train.Saver()
try:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        best_reward = 0
        i_best = 0
        last_reset = 0
        for i_episode in range(1, num_episodes+1):
            state = np.reshape(env.reset(), (-1))
            total_reward = 0
            while True:
                action = step(agent.act(sess, state), thresh=0.1)
                next_state, reward, done, info = env.step(action)
                next_state = np.reshape(next_state, (-1))
                agent.step(sess, state, action, reward, next_state, done)
                total_reward += reward
                rewards[i_episode - 1].append(reward)
                state = next_state
                if done:
                    total_rewards.append(total_reward)
                    if total_reward > best_reward:
                        best_reward = total_reward
                        i_best = i_episode
                        saver.save(sess, "checkpoints/retro-contest-bestckpt")
                    print("\rEpisode = {:4d}, Total Reward = {:.3f}".format(
                        i_episode, total_reward), end="")  # [debug]
                    break
            sys.stdout.flush()
        saver.save(sess, "checkpoints/retro-contest.ckpt")
finally:
    env.close()

Episode =   10, Total Reward = 635.693

In [9]:
env.close()

In [5]:
env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
state = env.reset()

In [33]:
np.reshape(state, (1,-1)).shape

(1, 215040)

In [34]:
ns, rw, d, i = env.step(env.action_space.sample())

In [38]:
np.reshape(ns, (1,-1)).shape

(1, 215040)

In [6]:
%timeit env.step(env.action_space.sample())

3.21 ms ± 50.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [12]:
%timeit env.action_space.sample()

3.46 µs ± 13.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
