In [1]:
from six.moves import cPickle
import cv2
import numpy as np
import gym
from gym import wrappers
import sys, os
from functools import partial
from collections import defaultdict
sys.path.append('/home/etoestja/peas')
from peas.networks.rnn import NeuralNetwork
from peas.methods.neat import NEATPopulation, NEATGenotype

In [2]:
class GYMGame(object):
    def __init__(self, game = 'Skiing-v0'):
        self.env = gym.make(game)
        self.actions = self.env.action_space.n
        self.input_size = self.env.observation_space.shape[0]
        
    # Resizing to black-white 42x42
    def resize_frame(self, frame):
        frame = frame[34:34+160, :160]
        # Resize by half, then down to 42x42 (essentially mipmapping). If
        frame = cv2.resize(frame, (84, 84))
        frame = cv2.resize(frame, (42, 42))
        frame = cv2.resize(frame, (10, 10))
        frame = frame.mean(2)
        frame = frame.astype(np.float32)
        frame *= (1.0 / 255.0)
        return frame
    
    # a = argmax_a Q(s,a)
    def predict_action(self, observation, network):
        #compressed_observation = self.resize_frame(observation).flatten()
        #compressed_observation = self.env._get_ram()/255.*2-1
        observation = observation / 255. * 2 - 1
        s = network.feed(observation)[-self.actions:]
        action = np.argmax(s)
        return(action)
    
    def solve(self, network):
        return False
    
    def evaluate(self, network):
        rewards = []
        stepss = []
        N = 3
        i = 0
        while i < N:
            t_res = self.evaluateOne(network)
            rewards += [t_res['fitness']]
            stepss += [t_res['steps']]
            i += 1
        
        #print rewards
        
        res = {'fitness': np.array(rewards).mean(), 'steps': np.array(stepss).mean()}
        return res
    
    def evaluateOne(self, network):
        if not isinstance(network, NeuralNetwork):
            network = NeuralNetwork(network)
            
        observation = self.env.reset()
        done = False
        iteration, total_reward = 0, 0
        
        while not done:
            #env.render()
            action = self.predict_action(observation, network)
            observation, reward, done, info = self.env.step(action)
            total_reward += reward

            #if iteration % 500 == 0:
                #print(str(iteration))

            #if total_reward < -12000 or iteration >= 8000:
            #    break

            iteration += 1
        
        res = {'fitness': total_reward, 'steps': iteration}
        #print res
        return res
    
    def genotype(self):
        return lambda: NEATGenotype(inputs = self.input_size,
                                outputs = self.actions,
                                weight_range=(-3,3),
                                types=['tanh'])

In [3]:
# Create game
game = GYMGame('MsPacman-ram-v0')

# Genotype of the network
genotype = game.genotype()

[2017-02-07 10:58:16,023] Making new env: MsPacman-ram-v0


In [5]:
# Create a population
pop = NEATPopulation(genotype, popsize = 60, max_cores = 10)

In [6]:
# Run the evolution, tell it to use the task as an evaluator
pop.epoch(generations = 100, evaluator = game, solution = game)

Running in 10 processes.

== Generation 1 ==
Best (1780.00): NEATGenotype with 137 nodes and 1152 connections. {'steps': 959.0, 'fitness': 1780.0}
Solved: None
Species: [60]
Age: [0]
No improvement: [0]
Running in 10 processes.

== Generation 2 ==
Best (656.67): NEATGenotype with 137 nodes and 1153 connections. {'steps': 784.33333333333337, 'fitness': 656.66666666666663}
Solved: None
Species: [60]
Age: [1]
No improvement: [1]
Running in 10 processes.

== Generation 3 ==
Best (930.00): NEATGenotype with 138 nodes and 1155 connections. {'steps': 843.0, 'fitness': 930.0}
Solved: None
Species: [16, 17, 9, 18]
Age: [2, 0, 0, 0]
No improvement: [0, 0, 0, 0]
Running in 10 processes.

== Generation 4 ==
Best (930.00): NEATGenotype with 139 nodes and 1156 connections. {'steps': 799.0, 'fitness': 930.0}
Solved: None
Species: [15, 14, 8, 12, 3, 7]
Age: [3, 1, 1, 1, 0, 0]
No improvement: [1, 0, 0, 0, 0, 0]
Running in 10 processes.

== Generation 5 ==
Best (850.00): NEATGenotype with 138 nodes and 

{'champions': [<peas.methods.neat.NEATGenotype at 0x7f2402f3e3d0>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e610>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e550>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e6d0>,
  <peas.methods.neat.NEATGenotype at 0x7f241809ab10>,
  <peas.methods.neat.NEATGenotype at 0x7f241809abd0>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb8150>,
  <peas.methods.neat.NEATGenotype at 0x7f24027100d0>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb8e90>,
  <peas.methods.neat.NEATGenotype at 0x7f241809ac90>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb84d0>,
  <peas.methods.neat.NEATGenotype at 0x7f241809ad90>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb8610>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb8d50>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e4d0>,
  <peas.methods.neat.NEATGenotype at 0x7f241acb8490>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e410>,
  <peas.methods.neat.NEATGenotype at 0x7f2402f3e350>,
  <peas.methods

In [11]:
dir(pop.champions[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'bias_as_node',
 'conn_genes',
 'distance',
 'distance_disjoint',
 'distance_excess',
 'distance_weight',
 'feedforward',
 'get_network_data',
 'initial_weight_stdev',
 'inputs',
 'mate',
 'max_depth',
 'max_nodes',
 'mutate',
 'node_genes',
 'outputs',
 'prob_add_conn',
 'prob_add_node',
 'prob_disable_conn',
 'prob_mutate_bias',
 'prob_mutate_response',
 'prob_mutate_type',
 'prob_mutate_weight',
 'prob_reenable_conn',
 'prob_reenable_parent',
 'prob_reset_weight',
 'response_default',
 'stats',
 'stdev_mutate_bias',
 'stdev_mutate_response',
 'stdev_mutate_weight',
 'types',
 'visualize',
 'weight_range']

# Adjusting image

In [None]:
a

In [11]:
observation, reward, done, info = game.env.step(0)

In [12]:
from scipy.misc import imsave

In [13]:
frame = observation[50:50+160, 30:-30]
frame = cv2.resize(frame, (84, 84))
frame = cv2.resize(frame, (30, 30))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)

IndexError: too many indices for array