In [1]:
from six.moves import cPickle
import cv2
import numpy as np
import gym
from gym import wrappers
import sys, os
from functools import partial
from collections import defaultdict
sys.path.append('/home/etoestja/peas')
from peas.networks.rnn import NeuralNetwork
from peas.methods.neat import NEATPopulation, NEATGenotype

In [2]:
class GYMGame(object):
    def __init__(self, game = 'Skiing-v0'):
        self.env = gym.make(game)
        self.actions = self.env.action_space.n
        self.input_size = self.env.observation_space.shape[0]
    # Resizing to black-white 42x42
    def resize_frame(self, frame):
        frame = frame[34:34+160, :160]
        # Resize by half, then down to 42x42 (essentially mipmapping). If
        frame = cv2.resize(frame, (84, 84))
        frame = cv2.resize(frame, (42, 42))
        frame = cv2.resize(frame, (10, 10))
        frame = frame.mean(2)
        frame = frame.astype(np.float32)
        frame *= (1.0 / 255.0)
        return frame
    # a = argmax_a Q(s,a)
    def predict_action(self, observation, network):
        #compressed_observation = self.resize_frame(observation).flatten()
        #compressed_observation = self.env._get_ram()/255.*2-1
        s = network.feed(observation)[-self.actions:]
        action = np.argmax(s)
        return(action)
    def solve(self, network):
        return False
    def evaluate(self, network):
        if not isinstance(network, NeuralNetwork):
            network = NeuralNetwork(network)
        
        observation = self.env.reset()
        done = False
        iteration, total_reward = 0, 0
        
        while not done:
            #env.render()
            action = self.predict_action(observation, network)
            observation, reward, done, info = self.env.step(action)
            total_reward += reward

            #if iteration % 500 == 0:
                #print(str(iteration))

            if total_reward < -12000 or iteration >= 8000:
                break

            iteration += 1

        
        res = {'fitness': total_reward, 'steps': iteration}
        #print res
        return res
    def genotype(self):
        return lambda: NEATGenotype(inputs = self.input_size,
                                outputs = self.actions,
                                weight_range=(-3,3),
                                types=['tanh'])

In [3]:
# Create game
game = GYMGame('MsPacman-ram-v0')

# Genotype of the network
genotype = game.genotype()

[2017-02-06 17:42:11,568] Making new env: MsPacman-ram-v0


In [4]:
# Create a population
pop = NEATPopulation(genotype, popsize=60, max_cores=11)

# Run the evolution, tell it to use the task as an evaluator
pop.epoch(generations=100, evaluator=game, solution=game)

Running in 11 processes.

== Generation 1 ==
Best (570.00): NEATGenotype with 137 nodes and 1152 connections. {'steps': 716, 'fitness': 570.0}
Solved: None
Species: [60]
Age: [0]
No improvement: [0]
Running in 11 processes.

== Generation 2 ==
Best (460.00): NEATGenotype with 137 nodes and 1153 connections. {'steps': 691, 'fitness': 460.0}
Solved: None
Species: [27, 33]
Age: [1, 0]
No improvement: [1, 0]
Running in 11 processes.

== Generation 3 ==
Best (1650.00): NEATGenotype with 138 nodes and 1154 connections. {'steps': 803, 'fitness': 1650.0}
Solved: None
Species: [20, 23, 17]
Age: [2, 1, 0]
No improvement: [0, 0, 0]
Running in 11 processes.

== Generation 4 ==
Best (1650.00): NEATGenotype with 138 nodes and 1154 connections. {'steps': 764, 'fitness': 1650.0}
Solved: None
Species: [19, 16, 9, 3, 13]
Age: [3, 2, 1, 0, 0]
No improvement: [1, 0, 0, 0, 0]
Running in 11 processes.

== Generation 5 ==
Best (1650.00): NEATGenotype with 138 nodes and 1155 connections. {'steps': 803, 'fitne

{'champions': [<peas.methods.neat.NEATGenotype at 0x7f3d7fa60750>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8c3fdd50>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60410>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8c3fdcd0>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8dec7590>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60c90>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60610>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8dec71d0>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7f9441d0>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60890>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60910>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8c3fd250>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8dec7190>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7fa60850>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8c3fdb10>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8dec7f90>,
  <peas.methods.neat.NEATGenotype at 0x7f3d8c3fdfd0>,
  <peas.methods.neat.NEATGenotype at 0x7f3d7f9443d0>,
  <peas.methods

In [11]:
dir(pop.champions[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'bias_as_node',
 'conn_genes',
 'distance',
 'distance_disjoint',
 'distance_excess',
 'distance_weight',
 'feedforward',
 'get_network_data',
 'initial_weight_stdev',
 'inputs',
 'mate',
 'max_depth',
 'max_nodes',
 'mutate',
 'node_genes',
 'outputs',
 'prob_add_conn',
 'prob_add_node',
 'prob_disable_conn',
 'prob_mutate_bias',
 'prob_mutate_response',
 'prob_mutate_type',
 'prob_mutate_weight',
 'prob_reenable_conn',
 'prob_reenable_parent',
 'prob_reset_weight',
 'response_default',
 'stats',
 'stdev_mutate_bias',
 'stdev_mutate_response',
 'stdev_mutate_weight',
 'types',
 'visualize',
 'weight_range']

# Adjusting image

In [11]:
observation, reward, done, info = game.env.step(0)

In [12]:
from scipy.misc import imsave

In [13]:
frame = observation[50:50+160, 30:-30]
frame = cv2.resize(frame, (84, 84))
frame = cv2.resize(frame, (30, 30))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)

IndexError: too many indices for array