In [1]:
import ipl 

organism = ipl.Organism()
organism.verbosity = 1

game = ipl.games.ElMazeGame(3,2)
organism.configure(game.player_config())

action_program = [
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0]
]


In [8]:
game = ipl.games.ElMazeGame(3,2)

organism.verbosity = 1
organism.reset_state()
for iturn, pa in enumerate(action_program):
  print('\nRUNNER: Turn {}'.format(iturn + 1))
  organism.handle_sensor_input(game.sensors())

  oa = organism.choose_action(pa)
  if oa.outcomes and len(oa.outcomes):
    print('Expected outcomes:')
    for outcome in oa.outcomes:
      print('\t{}'.format(outcome))
  else:
    print('(Action has no outcomes)')

  game.act(oa.actuators)
  

print()
print('RUNNER: Predefined action sequence complete.')
organism.handle_sensor_input(game.sensors())

organism.maintenance()


RUNNER: Turn 1
ORGANISM: Received sensor input: [1, 0, 0, 0, 0]
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 3 outcomes) $0.02
Expected outcomes:
	OUTCOME: [1, 0, 0, 1, 0] (98% $0.00 = $0.00)
	OUTCOME: [0, 1, 0, 1, 1] ( 1% $1.00 = $0.02)
	OUTCOME: [0, 1, 1, 1, 0] ( 0% $0.00 = $0.00)

RUNNER: Turn 2
ORGANISM: Received sensor input: [1, 0, 0, 1, 0]
ORGANISM: Experience repo size: 156
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 3 outcomes) $0.50
Expected outcomes:
	OUTCOME: [0, 1, 0, 1, 0] (50% $0.00 = $0.00)
	OUTCOME: [0, 0, 0, 0, 1] (49% $1.00 = $0.50)
	OUTCOME: [1, 0, 0, 1, 0] ( 0% $0.00 = $0.00)

RUNNER: Turn 3
ORGANISM: Received sensor input: [1, 0, 0, 1, 0]
ORGANISM: Experience repo size: 159
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 3 outcomes) $0.01
Expected outcomes:
	OUTCOME: [1, 0, 0, 1, 0] (98% $0.00 = $0.00)
	OUTCOME: [0, 0, 0, 0, 1] ( 1% $1.00 = $0.01)
	OUTCOME: [0, 1, 0, 1, 0] ( 0% $0.00 = $0.00)

RUNNER: Turn 4
ORGANISM: Received se

In [3]:
game.io_vector_labels()

{'sensors': ['FORWARD', 'LEFT', 'RIGHT', 'BACK', 'VICTORY'],
 'actuators': ['GO', 'TURN LEFT', 'TURN RIGHT', 'TURN BACK']}

In [4]:
print('# learned vectors: {}'.format(len(organism.experience_repo.experiences)))
retval = []

for i in range(0,31):
    a = [int(x) for x in list('{:05b}'.format(i))]
    est = organism.outcome_likelihood_estimator.estimate( ipl.nnplanner.Experience([0, 1, 0, 1, 0], [0, 1, 0, 0],  a))
    retval.append( (a, int(est*100) ) )

retval.sort(key=lambda x: -x[1])
retval

# learned vectors: 22


[([1, 0, 0, 0, 0], 158),
 ([0, 1, 0, 0, 1], 147),
 ([0, 0, 0, 0, 1], 144),
 ([0, 0, 0, 0, 0], 135),
 ([1, 0, 1, 0, 0], 134),
 ([0, 1, 0, 0, 0], 124),
 ([1, 0, 0, 0, 1], 121),
 ([0, 0, 1, 0, 0], 99),
 ([1, 1, 0, 0, 0], 99),
 ([0, 0, 1, 0, 1], 92),
 ([0, 1, 1, 0, 1], 89),
 ([1, 1, 0, 0, 1], 83),
 ([1, 0, 1, 0, 1], 82),
 ([0, 1, 1, 0, 0], 68),
 ([1, 0, 0, 1, 0], 62),
 ([1, 0, 1, 1, 0], 56),
 ([0, 1, 0, 1, 1], 50),
 ([1, 0, 0, 1, 1], 45),
 ([1, 1, 1, 0, 1], 35),
 ([1, 1, 0, 1, 1], 31),
 ([1, 1, 1, 0, 0], 22),
 ([0, 0, 1, 1, 0], 19),
 ([1, 0, 1, 1, 1], 15),
 ([0, 0, 0, 1, 0], 12),
 ([0, 0, 0, 1, 1], 8),
 ([0, 1, 0, 1, 0], 0),
 ([0, 1, 1, 1, 0], 0),
 ([1, 1, 0, 1, 0], 0),
 ([0, 1, 1, 1, 1], -8),
 ([0, 0, 1, 1, 1], -13),
 ([1, 1, 1, 1, 0], -22)]

In [5]:
organism.verbosity = 0
for i in range(0, 10):
    print('Run #{}'.format(i))
    game = ipl.games.ElMazeGame(3,2)    

    organism.reset_state()
    for pa in action_program:
      organism.handle_sensor_input(game.sensors())
      oa = organism.choose_action(pa)
      game.act(oa.actuators)
    

    organism.handle_sensor_input(game.sensors())
    organism.maintenance()
    

Run #0
Run #1
Run #2
Run #3
Run #4
Run #5
Run #6
Run #7
Run #8
Run #9


In [6]:
len(organism.experience_repo)

153

In [7]:
import sklearn.neural_network
neuralnet = sklearn.neural_network.MLPRegressor(
        hidden_layer_sizes=(32, 32),
        solver='lbfgs'
    )


td = organism.experience_repo.training_data()
neuralnet.fit(td[0], td[1])

retval = []

for i in range(0,31):
    a = [int(x) for x in list('{:05b}'.format(i))]
    est = neuralnet.predict([ [1, 0, 0, 1, 0] + [1, 0, 0, 0] + a ])[0]
    retval.append( (a, int(est*100) ) )

retval.sort(key=lambda x: -x[1])
retval

[([0, 0, 0, 0, 1], 36),
 ([0, 1, 0, 1, 0], 36),
 ([1, 0, 0, 1, 0], 35),
 ([0, 1, 1, 1, 1], 16),
 ([0, 1, 1, 1, 0], 8),
 ([0, 1, 1, 0, 1], 6),
 ([0, 0, 1, 1, 1], 1),
 ([1, 1, 0, 0, 0], 1),
 ([1, 1, 0, 0, 1], 1),
 ([0, 0, 0, 0, 0], 0),
 ([0, 0, 0, 1, 0], 0),
 ([0, 0, 0, 1, 1], 0),
 ([0, 0, 1, 0, 1], 0),
 ([0, 1, 0, 0, 0], 0),
 ([0, 1, 0, 0, 1], 0),
 ([0, 1, 0, 1, 1], 0),
 ([0, 1, 1, 0, 0], 0),
 ([1, 0, 0, 0, 0], 0),
 ([1, 0, 0, 0, 1], 0),
 ([1, 0, 1, 0, 1], 0),
 ([1, 1, 0, 1, 0], 0),
 ([1, 1, 1, 0, 0], 0),
 ([1, 1, 1, 1, 0], -3),
 ([1, 0, 0, 1, 1], -8),
 ([1, 0, 1, 1, 1], -9),
 ([1, 1, 0, 1, 1], -9),
 ([1, 1, 1, 0, 1], -9),
 ([1, 0, 1, 0, 0], -13),
 ([1, 0, 1, 1, 0], -14),
 ([0, 0, 1, 1, 0], -19),
 ([0, 0, 1, 0, 0], -26)]