In [3]:
from six.moves import cPickle
import cv2
import numpy as np
import gym
from gym import wrappers
import sys, os
from functools import partial
from collections import defaultdict
sys.path.append('/home/etoestja/peas')
from peas.networks.rnn import NeuralNetwork
from peas.methods.neat import NEATPopulation, NEATGenotype

In [13]:
class GYMGame(object):
    def __init__(self, game = 'Skiing-v0'):
        self.env = gym.make(game)
        self.actions = self.env.action_space.n
        self.input_size = self.env.observation_space.shape[0]
        
    # Resizing to black-white 42x42
    def resize_frame(self, frame):
        frame = frame[34:34+160, :160]
        # Resize by half, then down to 42x42 (essentially mipmapping). If
        frame = cv2.resize(frame, (84, 84))
        frame = cv2.resize(frame, (42, 42))
        frame = cv2.resize(frame, (10, 10))
        frame = frame.mean(2)
        frame = frame.astype(np.float32)
        frame *= (1.0 / 255.0)
        return frame
    
    # a = argmax_a Q(s,a)
    def predict_action(self, observation, network):
        #compressed_observation = self.resize_frame(observation).flatten()
        #compressed_observation = self.env._get_ram()/255.*2-1
        observation = observation / 255. * 2 - 1
        s = network.feed(observation)[-self.actions:]
        action = np.argmax(s)
        return(action)
    
    def solve(self, network):
        return False
    
    def evaluate(self, network):
        rewards = []
        stepss = []
        N = 5
        i = 0
        while i < N:
            t_res = self.evaluateOne(network)
            rewards += [t_res['fitness']]
            stepss += [t_res['steps']]
            i += 1
        
        #print rewards
        
        res = {'fitness': np.array(rewards).mean(), 'steps': np.array(stepss).mean()}
        return res
    
    def evaluateOne(self, network):
        if not isinstance(network, NeuralNetwork):
            network = NeuralNetwork(network)
            
        observation = self.env.reset()
        done = False
        iteration, total_reward = 0, 0
        
        while not done:
            #env.render()
            action = self.predict_action(observation, network)
            observation, reward, done, info = self.env.step(action)
            total_reward += reward

            #if iteration % 500 == 0:
                #print(str(iteration))

            #if total_reward < -12000 or iteration >= 8000:
            #    break

            iteration += 1
        
        res = {'fitness': total_reward, 'steps': iteration}
        #print res
        return res
    
    def genotype(self):
        return lambda: NEATGenotype(inputs = self.input_size,
                                outputs = self.actions,
                                weight_range=(-3,3),
                                types=['tanh'])

In [14]:
# Create game
game = GYMGame('MsPacman-ram-v0')

# Genotype of the network
genotype = game.genotype()

[2017-02-06 23:36:49,461] Making new env: MsPacman-ram-v0


In [15]:
# Create a population
pop = NEATPopulation(genotype, popsize=11, max_cores=11)

# Run the evolution, tell it to use the task as an evaluator
pop.epoch(generations=2, evaluator=game, solution=game)

Running in 11 processes.
[60.0, 60.0, 60.0, 60.0, 60.0]
[170.0, 170.0, 490.0, 170.0, 170.0]
[290.0, 210.0, 250.0, 210.0, 230.0]
[190.0, 120.0, 150.0, 80.0, 120.0]
[260.0, 180.0, 240.0, 320.0, 250.0]
[360.0, 430.0, 400.0, 140.0, 360.0]
[110.0, 140.0, 140.0, 110.0, 140.0]
[1880.0, 510.0, 240.0, 200.0, 110.0]
[170.0, 200.0, 70.0, 170.0, 240.0]
[340.0, 460.0, 380.0, 360.0, 340.0]
[220.0, 340.0, 290.0, 290.0, 510.0]

== Generation 1 ==
Best (588.00): NEATGenotype with 137 nodes and 1152 connections. {'steps': 689.20000000000005, 'fitness': 588.0}
Solved: None
Species: [11]
Age: [0]
No improvement: [0]
Running in 11 processes.
[60.0, 60.0, 60.0, 60.0, 60.0]
[90.0, 110.0, 70.0, 110.0, 80.0]
[330.0, 200.0, 90.0, 110.0, 150.0]
[180.0, 180.0, 180.0, 110.0, 70.0]
[80.0, 200.0, 530.0, 190.0, 160.0]
[380.0, 270.0, 370.0, 240.0, 480.0]
[840.0, 260.0, 490.0, 330.0, 180.0]
[290.0, 570.0, 570.0, 570.0, 570.0]
[180.0, 220.0, 470.0, 480.0, 270.0]
[310.0, 320.0, 170.0, 620.0, 600.0]
[200.0, 300.0, 310.0, 

{'champions': [<peas.methods.neat.NEATGenotype at 0x7f27b46e5d10>,
  <peas.methods.neat.NEATGenotype at 0x7f27b46e5cd0>],
 'stats': defaultdict(list,
             {'fitness_avg': [258.54545454545456, 269.81818181818181],
              'fitness_max': [588.0, 514.0],
              'fitness_min': [60.0, 60.0],
              'solved': [False, False],
              'steps_avg': [613.80000000000007, 664.38181818181818],
              'steps_max': [824.20000000000005, 976.20000000000005],
              'steps_min': [430.0, 456.39999999999998]})}

In [11]:
dir(pop.champions[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'bias_as_node',
 'conn_genes',
 'distance',
 'distance_disjoint',
 'distance_excess',
 'distance_weight',
 'feedforward',
 'get_network_data',
 'initial_weight_stdev',
 'inputs',
 'mate',
 'max_depth',
 'max_nodes',
 'mutate',
 'node_genes',
 'outputs',
 'prob_add_conn',
 'prob_add_node',
 'prob_disable_conn',
 'prob_mutate_bias',
 'prob_mutate_response',
 'prob_mutate_type',
 'prob_mutate_weight',
 'prob_reenable_conn',
 'prob_reenable_parent',
 'prob_reset_weight',
 'response_default',
 'stats',
 'stdev_mutate_bias',
 'stdev_mutate_response',
 'stdev_mutate_weight',
 'types',
 'visualize',
 'weight_range']

# Adjusting image

In [11]:
observation, reward, done, info = game.env.step(0)

In [12]:
from scipy.misc import imsave

In [13]:
frame = observation[50:50+160, 30:-30]
frame = cv2.resize(frame, (84, 84))
frame = cv2.resize(frame, (30, 30))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)

IndexError: too many indices for array