Playing ATARI-ram games using python-neat and convolutional autoencoder

# Import

In [1]:
from time import time
import numpy as np
import gym
from gym import wrappers
from __future__ import print_function
import os
%matplotlib inline
import neat
import visualize
os.environ['DISPLAY']=':0'

from six.moves import cPickle as pickle
import gym
import numpy as np
import json
from object_detection import *
from skimage.transform import resize
from skimage.color import rgb2gray
import multiprocessing
from multiprocessing.reduction import reduce_connection
from hashlib import sha256
import SharedArray as sa

In [2]:
from time import clock

# Define config

In [3]:
fc_config_filename = 'fc.config'
game_name = 'Skiing'
game_version = 'v0'
game = game_name + '-' + game_version
num_evaluations = 1
num_cores = 11
population_size = 60
encoder_filename = './data/{}_Encoder_08_02'.format(game_name)
cpu_percent = 0.4

# Create environment

In [4]:
env = gym.make(game)

[2017-02-10 10:41:51,455] Making new env: Skiing-v0


# Object detection features (frame -> lowres)

In [6]:
# fit detector and save
odf = ObjectDetectionFeatures2(env)
#odf_filename = 'odf-' + game + '.config'
#pickle.dump(odf, open(odf_filename, 'wb'))

In [7]:
# load detector
#odf = pickle.load(open(odf_filename, 'rb'))

In [5]:
# transform for detector
output_shape = (60, 60)
process_image = lambda x: (resize(odf.get_simple_image(x), output_shape, order=0) * 255).astype('uint8')
#process_image = lambda x: (np.random.rand(60,60))

# Create neat-python population

In [6]:
# Load configuration.
config_initial = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                             neat.DefaultSpeciesSet, neat.DefaultStagnation, fc_config_filename)

config_initial.genome_config.num_inputs = 64 #env.observation_space.shape[0]
config_initial.genome_config.num_outputs = env.action_space.n
config_initial.pop_size = population_size

In [7]:
game_fc_config_filename = 'fc-' + game + '.config'

config_initial.save(game_fc_config_filename)

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation, game_fc_config_filename)

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add reporters

In [8]:
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter())
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(5))

# Stuff for pipes

In [9]:
def compress_pipe(p):
    pp = pickle.dumps(reduce_connection(p))
    return(pp)
def decompress_pipe(pp):
    upw = pickle.loads(pp)
    pp = upw[0](upw[1][0],upw[1][1],upw[1][2])
    return(pp)

# Spawn cuda conv autoencoder

In [10]:
cuda_process_p.terminate()
cuda_process_p.join()

NameError: name 'cuda_process_p' is not defined

In [11]:
m = multiprocessing.Manager()
cuda_q = m.Queue()

In [12]:
def cuda_process():
    print("Importing keras...")
    import os    
    os.environ['THEANO_FLAGS'] = "device=gpu1"
    from keras.models import model_from_json
    from keras import backend as K
    K.set_image_dim_ordering('th')
    
    print("Loading GPU encoder...")
    
    with open(encoder_filename + '.txt', 'r') as model_file:
        gpu_encoder = model_from_json(json.loads(next(model_file)))

    gpu_encoder.load_weights(encoder_filename + '.h5')
    
    print("Listening")
    global cuda_q
    while True:
        [sn_frame, sn_features, p] = cuda_q.get(block = True)
        fr = sa.attach(sn_frame)
        ft = sa.attach(sn_features)
        p = decompress_pipe(p)
        
        try:
            ft[:] = gpu_encoder.predict(np.array([fr]))[0]
        except:
            ft[:] = np.zeros(64)

        try:
            p.send(' ')
        except:
            continue
        
cuda_process_p = multiprocessing.Process(target = cuda_process, args = ())
cuda_process_p.daemon = True
cuda_process_p.start()

Importing keras...


Using Theano backend.


# Open CPU encoder

In [16]:
os.environ['THEANO_FLAGS'] = "device=cpu"
from keras.models import model_from_json
from keras import backend as K
K.set_image_dim_ordering('th')

print("Loading CPU encoder...")

with open(encoder_filename + '.txt', 'r') as model_file:
    cpu_encoder = model_from_json(json.loads(next(model_file)))

cpu_encoder.load_weights(encoder_filename + '.h5')

Using Theano backend.


Loading CPU encoder...


# Define fitness via game score

In [17]:
buf = []
use_gpu = False
shared_name_frame = None
shared_name_features = None
shared_array_frame = None
shared_array_features = None

def init_shared(obj):
    global shared_name_frame, shared_name_features, shared_array_frame, shared_array_features
    h = sha256(obj.__str__())
    seed = np.frombuffer(h.digest(), dtype='uint32')
    rstate = np.random.RandomState(seed)
    
    shared_name_frame = "shm://" + str(os.getpid()) + '_frame' + str(seed[0])
    shared_name_features = "shm://" + str(os.getpid()) + '_features' + str(seed[0])

    try:
        sa.delete(shared_name_frame[6:])
    except:
        print('-')
    try:
        sa.delete(shared_name_features[6:])
    except:
        print('s')
    
    shared_array_frame = sa.create(shared_name_frame, (3, 60, 60))
    shared_array_features = sa.create(shared_name_features, (64))

def get_features_cpu(buf):
    return(cpu_encoder.predict(np.array([buf]))[0])

def get_features_gpu(buf):
    global cuda_q
    a, b = multiprocessing.Pipe()
    b = compress_pipe(b)
    shared_array_frame[:] = np.array(buf)
    cuda_q.put([shared_name_frame, shared_name_features, b])
    a.recv()
    return(shared_array_features)

def get_features(buf):
    #global use_gpu
    
    s = np.random.rand(1)[0]
    use_gpu = (s > cpu_percent)
    
    if use_gpu:
        return(get_features_gpu(buf))
    return(get_features_cpu(buf))

def transform_observation(observation):
    global buf

    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    if len(buf) < 3:
        buf.append(image)
        buf.append(image)
        buf.append(image)
    else:
        buf.pop(0)
        buf.append(image)

    features = get_features(buf)
    return features

# a = argmax_a Q(s,a)
def predict_action(observation, network):
    observation = transform_observation(observation)
    output = network.activate(observation)
    action = np.argmax(output)
    return(action)

# play num_evaluations games, take mean
def evaluate_network(env, network):
    rewards = []
    i = 0
    while i < num_evaluations:
        rewards += [get_reward(env, network)]
        i += 1

    res = np.array(rewards).mean()
    return res

# play 1 game with network
def get_reward(env, network):
    global buf
    buf = []
    observation = env.reset()
    done = False
    iteration, total_reward = 0, 0
        
    while not done:
        #env.render()
        action = predict_action(observation, network)
        observation, reward, done, info = env.step(action)
        total_reward += reward

        #if iteration % 500 == 0:
        #    print(str(iteration))

        if total_reward < -12000 or iteration >= 8000:
            break

        iteration += 1
        
    return total_reward

def evaluate_genome(genome, config):
    init_shared(genome)
    network = neat.nn.FeedForwardNetwork.create(genome, config)
    fitness = evaluate_network(env, network)
    return fitness

evaluator = neat.parallel.ParallelEvaluator(num_workers = num_cores, eval_function = evaluate_genome, timeout = None)

In [18]:
#%timeit -n1 print(evaluate_genome(p.species.get_species(1).members[1], config))

# Run evolution

In [None]:
# Run evolution
winner = p.run(evaluator.evaluate, 30)


 ****** Running generation 0 ****** 

Population's average fitness: -10797.91667 stdev: 1602.34915
Best fitness: -9013.00000 - size: (8, 512) - species 1 - id 2
Species length: 1 totaling 60 individuals
Species no improv: {1: 0}
Average adjusted fitness: 0.741
Spawn amounts: [60]
Species fitness  : [0.7406021411616529]
Mean genetic distance 1.13228420648, std dev 0.137559609654
Total extinctions: 0
Generation time: 1035.596 sec
Saving checkpoint to neat-checkpoint-0

 ****** Running generation 1 ****** 

Population's average fitness: -10045.86667 stdev: 1713.08026
Best fitness: -9013.00000 - size: (8, 512) - species 1 - id 2
Species length: 1 totaling 60 individuals
Species no improv: {1: 0}
Average adjusted fitness: 0.850
Spawn amounts: [60]
Species fitness  : [0.84989584847163691]
Mean genetic distance 1.25833458452, std dev 0.182277822776
Total extinctions: 0
Generation time: 785.711 sec (910.653 average)
Saving checkpoint to neat-checkpoint-1

 ****** Running generation 2 ****** 


In [None]:
# Run evolution
winner = p.run(evaluator.evaluate, 30)

# Print results

In [16]:
# Display the winning genome.
#print('\nBest genome:\n{!s}'.format(winner))

# Show output of the most fit genome against training data.
winner_network = neat.nn.FeedForwardNetwork.create(winner, config)
visualize.draw_net(config, winner, False)
visualize.plot_stats(stats, ylog = False, view = False)
visualize.plot_species(stats, view = False)

#p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
#p.run(eval_genomes, 10)

ValueError: max() arg is an empty sequence

# Evaluate from checkpoint & send to OpenAI

In [17]:
def get_winner(p):
    max_fitness = -9999999999
    best_genome = None
    for v in p.population:
        genome = p.population[v]
        if genome.fitness > max_fitness:
            max_fitness = genome.fitness
            winner = genome
    print(max_fitness)
    return(winner)

In [23]:
s = 'neat-checkpoint-0 neat-checkpoint-15 neat-checkpoint-21 neat-checkpoint-28 neat-checkpoint-34 neat-checkpoint-40 neat-checkpoint-47 neat-checkpoint-53 neat-checkpoint-8 neat-checkpoint-1 neat-checkpoint-16 neat-checkpoint-22 neat-checkpoint-29 neat-checkpoint-35 neat-checkpoint-41 neat-checkpoint-48 neat-checkpoint-54 neat-checkpoint-9 neat-checkpoint-10 neat-checkpoint-17 neat-checkpoint-23 neat-checkpoint-3 neat-checkpoint-36 neat-checkpoint-42 neat-checkpoint-49 neat-checkpoint-55 neat-checkpoint-11 neat-checkpoint-18 neat-checkpoint-24 neat-checkpoint-30 neat-checkpoint-37 neat-checkpoint-43 neat-checkpoint-5 neat-checkpoint-56 neat-checkpoint-12 neat-checkpoint-19 neat-checkpoint-25 neat-checkpoint-31 neat-checkpoint-38 neat-checkpoint-44 neat-checkpoint-50 neat-checkpoint-57 neat-checkpoint-13 neat-checkpoint-2 neat-checkpoint-26 neat-checkpoint-32 neat-checkpoint-39 neat-checkpoint-45 neat-checkpoint-51 neat-checkpoint-6 neat-checkpoint-14 neat-checkpoint-20 neat-checkpoint-27 neat-checkpoint-33 neat-checkpoint-4 neat-checkpoint-46 neat-checkpoint-7 '
for x in s.split(' '):
    p = neat.Checkpointer.restore_checkpoint(x)
    winner = get_winner(p)

-9013.0
-9011.0
-7521.0
-7849.0
-7521.0
-9011.0
-9011.0
-9011.0
-7521.0
-9013.0
-7521.0
-7849.0
-7521.0
-7521.0
-9013.0
-9013.0
-8856.0
-7520.0
-9011.0
-7520.0
-7521.0
-7521.0
-7526.0
-9013.0
-9013.0
-9013.0
-9011.0
-7520.0
-7521.0
-7891.0
-7526.0
-9013.0
-9011.0
-9013.0
-7520.0
-7521.0
-9011.0
-7520.0
-9011.0
-9013.0
-9011.0
-9013.0
-7521.0
-9011.0
-9011.0
-7520.0
-9013.0
-8864.0
-9011.0
-9011.0
-8548.0
-9011.0
-7520.0
-7520.0
-7521.0
-9013.0
-9011.0


IOError: [Errno 2] No such file or directory: ''

-9013.0


In [None]:
env_eval = gym.make(game)
monitor_path = '/tmp/' + game + '-eval'
env_eval = wrappers.Monitor(env_eval, monitor_path)
def evaluate_with_video(game, network):
    for i_episode in range(100):
        observation = env_eval.reset()
        total_reward = 0
        t = 0
        while True:
            env_eval.render()
            action = predict_action(observation, network)
            observation, reward, done, info = env_eval.step(action)
            total_reward += reward
            t += 1
            if done:
                print("Episode finished after {0} timesteps reward = {1}".format(t+1, total_reward))
                break

In [None]:
evaluate_with_video(game, winner_network)

In [None]:
env_eval.close()

In [None]:
gym.upload(monitor_path, api_key='sk_ciz2F0csRzCkpESayoRuug')

# Measure performance

In [None]:
# FPS total
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    features = transform_observation(1)
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

In [None]:
# FPS emulator-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

In [28]:
# FPS OD-only
t_initial = time()
env.reset()
F = 100
buf = []
for i in range(F):
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    image = process_image(env.ale.getScreenGrayscale()[:, :, 0])
    buf = [image,image,image]
t_end = time()
print("FPS: " + str(1. * F / (t_end - t_initial)))

FPS: 180.967358771


In [26]:
init_shared('sdsf')

In [29]:
for i in range(10):
    get_features_gpu(buf)

In [None]:
%timeit get_features_gpu(buf)