import tensorflow as tf  
config = tf.ConfigProto()  
config.gpu_options.allow_growth = True  
session = tf.Session(config=config)  

In [1]:
# OpenGym CartPole-v0 with A3C on GPU
# -----------------------------------
#
# A3C implementation with GPU optimizer threads.
# 
# Made as part of blog series Let's make an A3C, available at
# https://jaromiru.com/2017/02/16/lets-make-an-a3c-theory/
#
# author: Jaromir Janisch, 2017

In [None]:
def calc_dimensions(env):
    n_actions = env.action_space.n
    obs_shape = env.observation_space.shape
    height = obs_shape[0]//2; width = obs_shape[1]//2; n_frames = 4
    state_shape = (height, width, n_frames)
    return (state_shape, n_actions)

def start(threads):
    for thread in threads:
        thread.start()

def stop(threads):
    for thread in threads:
        thread.stop()
    for thread in threads:
        thread.join()

In [None]:
import numpy as np
import gym, time, random

from ai_agent import Agent
from ai_environment import Environment
from ai_brain import Brain
from ai_optimizer import Optimizer

ENV = 'BreakoutDeterministic-v4'
RUN_TIME = 30*60
THREADS = 12
OPTIMIZERS = 4

test_env = gym.make(ENV)
state_shape, actions_shape = calc_dimensions(test_env)

weights_name = "a3c_weights.h5"
brain = Brain(state_shape, actions_shape)#, model_weights=weights_name)

envs = [Environment(gym.make(ENV), Agent(brain, actions_shape)) for i in range(THREADS)]
opts = [Optimizer(brain) for i in range(OPTIMIZERS)]

start(opts); start(envs)
time.sleep(RUN_TIME)
stop(envs); stop(opts)

print("Training finished")

brain.save_weights(weights_name)

1, 1, 2, 2, 5, 4, 2, 2, 1, 0, 1, 2, 1, 1, 0, 0, 2, 2, 0, 3, 3, 3, 2, 3, 1, 2, 1, 3, 2, 2
1, 1, 2, 1, 3, 2, 0, 0, 3, 2, 1, 2, 2, 2, 4, 0, 2, 3, 0, 1, 2, 1, 3, 1, 2, 2, 0, 4, 0, 0
3, 3, 1, 2, 1, 3, 0, 1, 1, 0, 2, 1, 0, 2, 5, 1, 2, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 2, 1, 3


In [None]:
agent_test = Agent(brain, actions_shape, eps_start=0., eps_end=0.)
env_test = Environment(test_env, agent_test, render=True)
env_test.run()