Playing ATARI-ram games using python-neat and convolutional autoencoder

# Import

In [None]:
import numpy as np
import gym
from gym import wrappers
from __future__ import print_function
import os
%matplotlib inline
import neat
import visualize
os.environ['DISPLAY']=':0'

from six.moves import cPickle as pickle
import gym
import numpy as np
from keras.models import model_from_json
import json
from object_detection import *
from skimage.transform import resize
from skimage.color import rgb2gray
from keras import backend as K
K.set_image_dim_ordering('th')

# Define config

In [None]:
fc_config_filename = 'fc.config'
game_name = 'Skiing'
game_version = 'v0'
game = game_name + '-' + game_version
num_evaluations = 1
num_cores = 11
population_size = 25

# Create environment

In [None]:
env = gym.make(game)

# Object detection features (frame -> lowres)

In [None]:
# fit detector and save
odf = ObjectDetectionFeatures2(env)
odf_filename = 'odf-' + game + '.config'
#pickle.dump(odf, open(odf_filename, 'wb'))

In [None]:
# load detector
#odf = pickle.load(open(odf_filename, 'rb'))

In [None]:
# transform for detector
output_shape = (60, 60)
process_image = lambda x: (resize(odf.get_simple_image(x), output_shape, order=0) * 255).astype('uint8')

# Convolutional autoencoder

In [None]:
with open('./data/{}_Encoder_08_02.txt'.format(game_name), 'r') as model_file:
    encoder = model_from_json(json.loads(next(model_file)))

encoder.load_weights('./data/{}_Encoder_08_02.h5'.format(game_name))

# Create neat-python population

In [None]:
# Load configuration.
config_initial = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                             neat.DefaultSpeciesSet, neat.DefaultStagnation, fc_config_filename)

config_initial.genome_config.num_inputs = 64 #env.observation_space.shape[0]
config_initial.genome_config.num_outputs = env.action_space.n
config_initial.pop_size = population_size

In [None]:
game_fc_config_filename = 'fc-' + game + '.config'

config_initial.save(game_fc_config_filename)

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation, game_fc_config_filename)

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add reporters

In [None]:
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter())
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(5))

# Define fitness via game score

In [None]:
buf = []

def transform_observation(observation):
    global buf
    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    if len(buf) < 3:
        buf.append(image)
        buf.append(image)
        buf.append(image)
    else:
        buf.pop(0)
        buf.append(image)
    features = encoder.predict(np.array([buf]))[0]
    return features

# a = argmax_a Q(s,a)
def predict_action(observation, network):
    observation = transform_observation(observation)
    output = network.activate(observation)
    action = np.argmax(output)
    return(action)

# play num_evaluations games, take mean
def evaluate_network(env, network):
    rewards = []
    i = 0
    while i < num_evaluations:
        rewards += [get_reward(env, network)]
        i += 1

    res = np.array(rewards).mean()
    return res

# play 1 game with network
def get_reward(env, network):
    global buf
    buf = []
    observation = env.reset()
    done = False
    iteration, total_reward = 0, 0
        
    while not done:
        #env.render()
        action = predict_action(observation, network)
        observation, reward, done, info = env.step(action)
        total_reward += reward

        #if iteration % 500 == 0:
        #    print(str(iteration))

        #if total_reward < -12000 or iteration >= 8000:
        #    break

        iteration += 1
        
    return total_reward

def evaluate_genome(genome, config):
    network = neat.nn.FeedForwardNetwork.create(genome, config)
    fitness = evaluate_network(env, network)
    return fitness

evaluator = neat.parallel.ParallelEvaluator(num_workers = num_cores, eval_function = evaluate_genome, timeout = None)

In [None]:
# %timeit -n1 print(evaluate_genome(p.species.get_species(1).members[1], config))

# Run evolution

In [None]:
# Run evolution
winner = p.run(evaluator.evaluate, 200)

In [None]:
winner = p.run(evaluator.evaluate, 2000)

# Print results

In [None]:
# Display the winning genome.
#print('\nBest genome:\n{!s}'.format(winner))

# Show output of the most fit genome against training data.
winner_network = neat.nn.FeedForwardNetwork.create(winner, config)
visualize.draw_net(config, winner, False)
visualize.plot_stats(stats, ylog = False, view = False)
visualize.plot_species(stats, view = False)

#p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
#p.run(eval_genomes, 10)

# Evaluate from checkpoint & send to OpenAI

In [None]:
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-1244')
p.run(evaluator.evaluate, 1)
winner = p.best_genome

In [None]:
env_eval = gym.make(game)
monitor_path = '/tmp/' + game + '-eval'
env_eval = wrappers.Monitor(env_eval, monitor_path)
def evaluate_with_video(game, network):
    for i_episode in range(100):
        observation = env_eval.reset()
        total_reward = 0
        t = 0
        while True:
            env_eval.render()
            action = predict_action(observation, network)
            observation, reward, done, info = env_eval.step(action)
            total_reward += reward
            t += 1
            if done:
                print("Episode finished after {0} timesteps reward = {1}".format(t+1, total_reward))
                break

In [None]:
evaluate_with_video(game, winner_network)

In [None]:
env_eval.close()

In [None]:
gym.upload(monitor_path, api_key='sk_ciz2F0csRzCkpESayoRuug')

# Measure performance

In [None]:
from time import time

In [None]:
# FPS total
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    
    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    if len(buf) < 3:
        buf.append(image)
        buf.append(image)
        buf.append(image)
    else:
        buf.pop(0)
        buf.append(image)
    features = encoder.predict(np.array([buf]))[0]
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

In [None]:
# FPS emulator-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

In [None]:
# FPS OD-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

In [None]:
%timeit image = process_image()

In [None]:
%timeit odf.get_simple_image(env.ale.getScreenGrayscale()[:, :, 0])

In [None]:
for i in range(10000):
    features = encoder.predict(np.array([buf]))[0]

In [None]:
image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
if len(buf) < 3:
    buf.append(image)
    buf.append(image)
    buf.append(image)
%timeit features = encoder.predict(np.array([buf]))[0]

In [None]:
s

In [None]:
env