Playing ATARI-ram games using python-neat

# Import

In [1]:
import numpy as np
import gym
from gym import wrappers
from __future__ import print_function
import os
%matplotlib inline
import neat
import visualize
os.environ['DISPLAY']=':0'

# Define config

In [2]:
fc_config_filename = 'fc.config'
game = 'Skiing-ram-v0'
num_evaluations = 3
num_cores = 10
population_size = 60

# Create environment

In [3]:
env = gym.make(game)

[2017-02-07 20:35:36,060] Making new env: Skiing-ram-v0


# Create neat-python population

In [4]:
# Load configuration.
config_initial = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation, fc_config_filename)

config_initial.genome_config.num_inputs = env.observation_space.shape[0]
config_initial.genome_config.num_outputs = env.action_space.n
config_initial.pop_size = population_size

In [5]:
game_fc_config_filename = 'fc-' + game + '.config'

config_initial.save(game_fc_config_filename)

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation, game_fc_config_filename)

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add reporters

In [6]:
# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter())
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(5))

# Define fitness via game score

In [7]:
def transform_observation(observation):
    observation = observation / 255. * 2 - 1
    return observation

# a = argmax_a Q(s,a)
def predict_action(observation, network):
    observation = transform_observation(observation)
    output = network.activate(observation)
    action = np.argmax(output)
    return(action)

# play num_evaluations games, take mean
def evaluate_network(env, network):
    rewards = []
    i = 0
    while i < num_evaluations:
        rewards += [get_reward(env, network)]
        i += 1

    res = np.array(rewards).mean()
    return res

# play 1 game with network
def get_reward(env, network):
    observation = env.reset()
    done = False
    iteration, total_reward = 0, 0
        
    while not done:
        #env.render()
        action = predict_action(observation, network)
        observation, reward, done, info = env.step(action)
        total_reward += reward

        #if iteration % 500 == 0:
        #    print(str(iteration))

        if total_reward < -12000 or iteration >= 8000:
            break

        iteration += 1
        
    return total_reward

def evaluate_genome(genome, config):
    network = neat.nn.FeedForwardNetwork.create(genome, config)
    fitness = evaluate_network(env, network)
    return fitness
    
#print(evaluate_genome(p.species.get_species(1).members[1], config))

evaluator = neat.parallel.ParallelEvaluator(num_workers = num_cores, eval_function = evaluate_genome, timeout = None)

# Run evolution

In [8]:
# Run evolution
winner = p.run(evaluator.evaluate, 100)


 ****** Running generation 0 ****** 

Population's average fitness: -11704.80000 stdev: 1029.51919
Best fitness: -9011.00000 - size: (3, 384) - species 1 - id 22
Species length: 1 totaling 60 individuals
Species no improv: {1: 0}
Average adjusted fitness: 0.412
Spawn amounts: [60]
Species fitness  : [0.41157710790738289]
Mean genetic distance 1.04247239079, std dev 0.222082918557
Total extinctions: 0
Generation time: 216.894 sec

 ****** Running generation 1 ****** 

Population's average fitness: -11305.59444 stdev: 1310.48218
Best fitness: -8925.00000 - size: (3, 382) - species 1 - id 82
Species length: 1 totaling 60 individuals
Species no improv: {1: 0}
Average adjusted fitness: 0.455
Spawn amounts: [60]
Species fitness  : [0.45549074921215682]
Mean genetic distance 1.6061062895, std dev 0.178414288046
Total extinctions: 0
Generation time: 196.075 sec (206.485 average)
Saving checkpoint to neat-checkpoint-1

 ****** Running generation 2 ****** 

Population's average fitness: -10231.

CompleteExtinctionException: 

# Print results

In [14]:
# Display the winning genome.
#print('\nBest genome:\n{!s}'.format(winner))
winner = p.best_genome
# Show output of the most fit genome against training data.
winner_network = neat.nn.FeedForwardNetwork.create(winner, config)
visualize.draw_net(config, winner, False)
visualize.plot_stats(stats, ylog = False, view = False)
visualize.plot_species(stats, view = False)

#p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
#p.run(eval_genomes, 10)

# Evaluate from checkpoint & send to OpenAI

In [None]:
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-76')
p.run(evaluator.evaluate, 1)
winner = p.best_genome

In [43]:
env_eval = gym.make(game)
monitor_path = '/tmp/' + game + '-eval'
env_eval = wrappers.Monitor(env_eval, monitor_path)
def evaluate_with_video(game, network):
    for i_episode in range(100):
        observation = env_eval.reset()
        total_reward = 0
        t = 0
        while True:
            env_eval.render()
            action = predict_action(observation, network)
            observation, reward, done, info = env_eval.step(action)
            total_reward += reward
            t += 1
            if done:
                print("Episode finished after {0} timesteps reward = {1}".format(t+1, total_reward))
                break

In [None]:
evaluate_with_video(game, winner_network)

In [None]:
env_eval.close()

In [None]:
gym.upload(monitor_path, api_key='sk_ciz2F0csRzCkpESayoRuug')