Playing ATARI-ram games using python-neat and convolutional autoencoder

# Import

In [1]:
from time import time
import numpy as np
import gym
from gym import wrappers
from __future__ import print_function
import os
%matplotlib inline
import neat
import visualize
os.environ['DISPLAY']=':0'

from six.moves import cPickle as pickle
import gym
import numpy as np
import json
from object_detection import *
from skimage.transform import resize
from skimage.color import rgb2gray
import multiprocessing
from multiprocessing.reduction import reduce_connection
from hashlib import sha256

# Define config

In [2]:
fc_config_filename = 'fc.config'
game_name = 'Skiing'
game_version = 'v0'
game = game_name + '-' + game_version
num_evaluations = 2
num_cores = 11
population_size = 80
encoder_filename = './data/{}_Encoder_08_02'.format(game_name)

# Create environment

In [32]:
env = gym.make(game)

[2017-02-09 14:39:26,274] Making new env: Skiing-v0


# Object detection features (frame -> lowres)

In [33]:
# fit detector and save
#odf = ObjectDetectionFeatures2(env)
odf_filename = 'odf-' + game + '.config'
#pickle.dump(odf, open(odf_filename, 'wb'))

In [None]:
# load detector
odf = pickle.load(open(odf_filename, 'rb'))

In [None]:
# transform for detector
output_shape = (60, 60)
#process_image = lambda x: (resize(odf.get_simple_image(x), output_shape, order=0) * 255).astype('uint8')
process_image = lambda x: (np.random.rand(60,60))

# Create neat-python population

In [6]:
# Load configuration.
config_initial = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                             neat.DefaultSpeciesSet, neat.DefaultStagnation, fc_config_filename)

config_initial.genome_config.num_inputs = 64 #env.observation_space.shape[0]
config_initial.genome_config.num_outputs = env.action_space.n
config_initial.pop_size = population_size

In [7]:
game_fc_config_filename = 'fc-' + game + '.config'

config_initial.save(game_fc_config_filename)

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation, game_fc_config_filename)

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add reporters

In [8]:
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter())
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(5))

# Stuff for pipes

In [9]:
def compress_pipe(p):
    pp = pickle.dumps(reduce_connection(p))
    return(pp)
def decompress_pipe(pp):
    upw = pickle.loads(pp)
    pp = upw[0](upw[1][0],upw[1][1],upw[1][2])
    return(pp)

# Spawn cuda conv autoencoder

In [10]:
cuda_process_p.terminate()
cuda_process_p.join()

NameError: name 'cuda_process_p' is not defined

In [11]:
m = multiprocessing.Manager()
cuda_q = m.Queue()

def cuda_process():
    print("Importing keras...")
    import os    
    os.environ['THEANO_FLAGS'] = "device=gpu1"
    from keras.models import model_from_json
    from keras import backend as K
    K.set_image_dim_ordering('th')
    
    print("Loading GPU encoder...")
    
    with open(encoder_filename + '.txt', 'r') as model_file:
        gpu_encoder = model_from_json(json.loads(next(model_file)))

    gpu_encoder.load_weights(encoder_filename + '.h5')
    
    print("Listening")
    global cuda_q
    while True:
        [buf, p] = cuda_q.get(block = True)
        #print("processing cuda...")
        try:
            features = gpu_encoder.predict(np.array([buf]))[0]
        except:
            print("Error encoding")
            features = np.zeros(64)
        p = decompress_pipe(p)
        p.send(features)
        
cuda_process_p = multiprocessing.Process(target = cuda_process, args = ())
cuda_process_p.daemon = True
cuda_process_p.start()

Importing keras...


Using Theano backend.


# Open CPU encoder

In [12]:
os.environ['THEANO_FLAGS'] = "device=cpu"
from keras.models import model_from_json
from keras import backend as K
K.set_image_dim_ordering('th')

print("Loading CPU encoder...")

with open(encoder_filename + '.txt', 'r') as model_file:
    cpu_encoder = model_from_json(json.loads(next(model_file)))

cpu_encoder.load_weights(encoder_filename + '.h5')

Using Theano backend.


Loading CPU encoder...


# Define fitness via game score

In [29]:
buf = []
use_gpu = False

def get_features_cpu(buf):
    return(cpu_encoder.predict(np.array([buf]))[0])

def get_features_gpu(buf):
    global cuda_q
    a, b = multiprocessing.Pipe()
    b = compress_pipe(b)
    cuda_q.put([buf, b])
    features = a.recv()
    return(features)

def get_features(buf):
    #global use_gpu
    
    s = np.random.rand(1)[0]
    use_gpu = (s > 0.3)
    
    if use_gpu:
        return(get_features_gpu(buf))
    return(get_features_cpu(buf))

def transform_observation(observation):
    global buf

    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    if len(buf) < 3:
        buf.append(image)
        buf.append(image)
        buf.append(image)
    else:
        buf.pop(0)
        buf.append(image)

    features = get_features(buf)
    return features

# a = argmax_a Q(s,a)
def predict_action(observation, network):
    observation = transform_observation(observation)
    output = network.activate(observation)
    action = np.argmax(output)
    return(action)

# play num_evaluations games, take mean
def evaluate_network(env, network):
    rewards = []
    i = 0
    while i < num_evaluations:
        rewards += [get_reward(env, network)]
        i += 1

    res = np.array(rewards).mean()
    return res

# play 1 game with network
def get_reward(env, network):
    global buf
    buf = []
    observation = env.reset()
    done = False
    iteration, total_reward = 0, 0
        
    while not done:
        #env.render()
        action = predict_action(observation, network)
        observation, reward, done, info = env.step(action)
        total_reward += reward

        #if iteration % 500 == 0:
        #    print(str(iteration))

        if total_reward < -12000 or iteration >= 8000:
            break

        iteration += 1
        
    return total_reward

def evaluate_genome(genome, config):
    # updating randomstate
    h = sha256(genome.__str__())
    seed = np.frombuffer(h.digest(), dtype='uint32')
    rstate = np.random.RandomState(seed)
    
    network = neat.nn.FeedForwardNetwork.create(genome, config)
    fitness = evaluate_network(env, network)
    return fitness

evaluator = neat.parallel.ParallelEvaluator(num_workers = num_cores, eval_function = evaluate_genome, timeout = None)

In [None]:
%timeit -n1 print(evaluate_genome(p.species.get_species(1).members[1], config))

# Run evolution

In [30]:
# Run evolution
winner = p.run(evaluator.evaluate, 1)


 ****** Running generation 4 ****** 

Population's average fitness: -9511.40000 stdev: 1114.45630
Best fitness: -9013.00000 - size: (3, 186) - species 1 - id 128
Species length: 1 totaling 30 individuals
Species no improv: {1: 2}
Average adjusted fitness: 0.833
Spawn amounts: [30]
Species fitness  : [0.83336676696756951]
Mean genetic distance 1.20414163283, std dev 0.247341803784
Total extinctions: 0
Generation time: 204.250 sec (233.672 average)


In [None]:
winner = p.run(evaluator.evaluate, 2000)

# Print results

In [None]:
# Display the winning genome.
#print('\nBest genome:\n{!s}'.format(winner))

# Show output of the most fit genome against training data.
winner_network = neat.nn.FeedForwardNetwork.create(winner, config)
visualize.draw_net(config, winner, False)
visualize.plot_stats(stats, ylog = False, view = False)
visualize.plot_species(stats, view = False)

#p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
#p.run(eval_genomes, 10)

# Evaluate from checkpoint & send to OpenAI

In [None]:
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-1244')
p.run(evaluator.evaluate, 1)
winner = p.best_genome

In [None]:
env_eval = gym.make(game)
monitor_path = '/tmp/' + game + '-eval'
env_eval = wrappers.Monitor(env_eval, monitor_path)
def evaluate_with_video(game, network):
    for i_episode in range(100):
        observation = env_eval.reset()
        total_reward = 0
        t = 0
        while True:
            env_eval.render()
            action = predict_action(observation, network)
            observation, reward, done, info = env_eval.step(action)
            total_reward += reward
            t += 1
            if done:
                print("Episode finished after {0} timesteps reward = {1}".format(t+1, total_reward))
                break

In [None]:
evaluate_with_video(game, winner_network)

In [None]:
env_eval.close()

In [None]:
gym.upload(monitor_path, api_key='sk_ciz2F0csRzCkpESayoRuug')

# Measure performance

In [18]:
# FPS total
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    features = transform_observation(1)
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

FPS: 92.2502232408


In [57]:
# FPS emulator-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

FPS: 178.771620251


In [29]:
# FPS OD-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    buf = [image,image,image]
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

FPS: 165.642134662


In [24]:
for i in range(1000):
    get_features_gpu(buf)

In [15]:
get_features_gpu(buf)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [21]:
%timeit get_features_gpu(buf)

100 loops, best of 3: 4.83 ms per loop


In [22]:
%timeit get_features_cpu(buf)

10 loops, best of 3: 27 ms per loop
