In [1]:
import ipl 

organism = ipl.Organism()
organism.verbosity = 1

game = ipl.games.ElMazeGame(3,2)
organism.configure(game.player_config())

action_program = [
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0]
]


In [8]:
game = ipl.games.ElMazeGame(3,2)

organism.verbosity = 1
organism.reset_state()
for iturn, pa in enumerate(action_program):
  print('\nRUNNER: Turn {}'.format(iturn + 1))
  organism.handle_sensor_input(game.sensors())

  oa = organism.choose_action(pa)
  if oa.outcomes and len(oa.outcomes):
    print('Expected outcomes:')
    for outcome in oa.outcomes:
      print('\t{}'.format(outcome))
  else:
    print('(Action has no outcomes)')

  game.act(oa.actuators)
  

print()
print('RUNNER: Predefined action sequence complete.')
organism.handle_sensor_input(game.sensors())

organism.maintenance()


RUNNER: Turn 1
ORGANISM: Received sensor input: [1, 0, 0, 0, 0]
ORGANISM: Generated actions (len=3)
	 ACTION: [1, 0, 0, 0] (-> 3 outcomes) $1.00
		 OUTCOME: [1, 0, 0, 1, 0] (96% $1.00 = $0.97)
		 OUTCOME: [1, 1, 1, 0, 0] ( 1% $1.00 = $0.02)
		 OUTCOME: [1, 0, 1, 0, 0] ( 1% $1.00 = $0.01)
	 ACTION: [0, 0, 0, 0] (-> 3 outcomes) $1.00
		 OUTCOME: [1, 0, 0, 1, 0] (37% $1.00 = $0.38)
		 OUTCOME: [1, 1, 0, 0, 0] (37% $1.00 = $0.37)
		 OUTCOME: [1, 0, 0, 0, 0] (24% $1.00 = $0.25)
	 ACTION: [1, 0, 0, 1] (-> 3 outcomes) $1.00
		 OUTCOME: [1, 0, 0, 1, 0] (78% $1.00 = $0.79)
		 OUTCOME: [0, 0, 0, 0, 1] (16% $1.00 = $0.16)
		 OUTCOME: [0, 1, 0, 0, 1] ( 5% $1.00 = $0.05)
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 3 outcomes) $1.00
Expected outcomes:
	OUTCOME: [1, 0, 0, 1, 0] (96% $1.00 = $0.97)
	OUTCOME: [1, 1, 1, 0, 0] ( 1% $1.00 = $0.02)
	OUTCOME: [1, 0, 1, 0, 0] ( 1% $1.00 = $0.01)

RUNNER: Turn 2
ORGANISM: Received sensor input: [1, 0, 0, 1, 0]
ORGANISM: Experience repo size: 396

In [3]:
game.io_vector_labels()

{'sensors': ['FORWARD', 'LEFT', 'RIGHT', 'BACK', 'VICTORY'],
 'actuators': ['GO', 'TURN LEFT', 'TURN RIGHT', 'TURN BACK']}

In [4]:
print('# learned vectors: {}'.format(len(organism.experience_repo.experiences)))
retval = []

for i in range(0,31):
    a = [int(x) for x in list('{:05b}'.format(i))]
    est = organism.outcome_likelihood_estimator.estimate( ipl.nnplanner.Experience([0, 1, 0, 1, 0], [0, 1, 0, 0],  a))
    retval.append( (a, int(est*100) ) )

retval.sort(key=lambda x: -x[1])
retval

# learned vectors: 22


[([0, 0, 0, 0, 1], 100),
 ([1, 0, 0, 0, 1], 100),
 ([1, 1, 0, 0, 0], 100),
 ([1, 1, 0, 0, 1], 100),
 ([1, 1, 1, 0, 1], 87),
 ([0, 1, 0, 0, 1], 85),
 ([1, 0, 1, 0, 1], 70),
 ([1, 0, 0, 0, 0], 69),
 ([1, 1, 1, 0, 0], 65),
 ([0, 1, 0, 0, 0], 55),
 ([0, 1, 1, 1, 1], 47),
 ([0, 0, 1, 0, 1], 46),
 ([0, 0, 0, 0, 0], 42),
 ([1, 0, 1, 0, 0], 36),
 ([1, 1, 0, 1, 1], 33),
 ([0, 1, 0, 1, 1], 29),
 ([1, 0, 1, 1, 1], 26),
 ([0, 0, 1, 1, 1], 25),
 ([0, 1, 1, 0, 1], 22),
 ([0, 1, 1, 0, 0], 18),
 ([1, 0, 0, 1, 1], 15),
 ([1, 1, 0, 1, 0], 13),
 ([0, 0, 0, 1, 1], 10),
 ([0, 0, 1, 0, 0], 8),
 ([1, 1, 1, 1, 0], 8),
 ([0, 1, 1, 1, 0], 5),
 ([0, 0, 0, 1, 0], 0),
 ([0, 0, 1, 1, 0], 0),
 ([0, 1, 0, 1, 0], 0),
 ([1, 0, 0, 1, 0], 0),
 ([1, 0, 1, 1, 0], 0)]

In [5]:
organism.verbosity = 0
for i in range(0, 20):
    print('Run #{}'.format(i))
    game = ipl.games.ElMazeGame(3,2)    

    organism.reset_state()
    for pa in action_program:
      organism.handle_sensor_input(game.sensors())
      oa = organism.choose_action(pa)
      game.act(oa.actuators)
    

    organism.handle_sensor_input(game.sensors())
    organism.maintenance()
    

Run #0
Run #1
Run #2
Run #3
Run #4
Run #5
Run #6
Run #7
Run #8
Run #9
Run #10
Run #11
Run #12
Run #13
Run #14
Run #15
Run #16
Run #17
Run #18
Run #19


In [6]:
len(organism.experience_repo)

393

In [7]:
import sklearn.neural_network
neuralnet = sklearn.neural_network.MLPRegressor(
        hidden_layer_sizes=(32, 32),
        solver='lbfgs'
    )


td = organism.experience_repo.training_data()
neuralnet.fit(td[0], td[1])

retval = []

for i in range(0,31):
    a = [int(x) for x in list('{:05b}'.format(i))]
    est = neuralnet.predict([ [1, 0, 0, 1, 0] + [1, 0, 0, 0] + a ])[0]
    retval.append( (a, int(est*100) ) )

retval.sort(key=lambda x: -x[1])
retval

[([0, 0, 0, 0, 1], 36),
 ([1, 0, 0, 1, 0], 35),
 ([0, 1, 0, 1, 0], 34),
 ([1, 0, 0, 0, 1], 4),
 ([0, 1, 0, 0, 0], 2),
 ([0, 0, 1, 0, 1], 1),
 ([1, 0, 1, 0, 1], 1),
 ([1, 1, 0, 0, 1], 1),
 ([1, 1, 1, 0, 1], 1),
 ([0, 0, 0, 0, 0], 0),
 ([0, 0, 0, 1, 0], 0),
 ([0, 0, 0, 1, 1], 0),
 ([0, 0, 1, 1, 0], 0),
 ([0, 0, 1, 1, 1], 0),
 ([0, 1, 0, 1, 1], 0),
 ([0, 1, 1, 0, 1], 0),
 ([0, 1, 1, 1, 0], 0),
 ([0, 1, 1, 1, 1], 0),
 ([1, 0, 0, 0, 0], 0),
 ([1, 0, 0, 1, 1], 0),
 ([1, 0, 1, 1, 0], 0),
 ([1, 0, 1, 1, 1], 0),
 ([1, 1, 0, 1, 0], 0),
 ([1, 1, 0, 1, 1], 0),
 ([1, 1, 1, 0, 0], 0),
 ([1, 1, 1, 1, 0], 0),
 ([0, 1, 0, 0, 1], -1),
 ([0, 1, 1, 0, 0], -1),
 ([0, 0, 1, 0, 0], -2),
 ([1, 0, 1, 0, 0], -3),
 ([1, 1, 0, 0, 0], -3)]