In [1]:
import ipl 

organism = ipl.Organism()
organism.verbosity = 1

game = ipl.games.ElMazeGame(3,2)
organism.configure(game.player_config())

action_program = [
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0],
    [0, 1, 0, 0],
    [1, 0, 0, 0],
    [1, 0, 0, 0]
]


In [8]:
game = ipl.games.ElMazeGame(3,2)

organism.verbosity = 1
organism.reset_state()
for iturn, pa in enumerate(action_program):
  print('\nRUNNER: Turn {}'.format(iturn + 1))
  organism.handle_sensor_input(game.sensors())

  oa = organism.choose_action(pa)
  if oa.outcomes and len(oa.outcomes):
    print('Expected outcomes:')
    for outcome in oa.outcomes:
      print('\t{}'.format(outcome))
  else:
    print('(Action has no outcomes)')

  game.act(oa.actuators)
  

print()
print('RUNNER: Predefined action sequence complete.')
organism.handle_sensor_input(game.sensors())

organism.maintenance()


RUNNER: Turn 1
ORGANISM: Received sensor input: [0, 0, 0, 0, 1]
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 9 outcomes)
Expected outcomes:
	OUTCOME: [0, 1, 1, 1, 0] (-18260% $0.00 = $-0.00)
	OUTCOME: [0, 0, 1, 0, 1] (-41067% $1.00 = $-410.68)
	OUTCOME: [1, 0, 1, 0, 0] (280% $0.00 = $0.00)
	OUTCOME: [1, 0, 0, 0, 0] (53263% $0.00 = $0.00)
	OUTCOME: [1, 1, 1, 0, 0] (-17945% $0.00 = $-0.00)
	OUTCOME: [0, 0, 0, 1, 0] (39664% $0.00 = $0.00)
	OUTCOME: [0, 0, 0, 1, 1] (-3649% $1.00 = $-36.50)
	OUTCOME: [1, 0, 0, 1, 0] (-23723% $0.00 = $-0.00)
	OUTCOME: [1, 1, 0, 0, 0] (11538% $0.00 = $0.00)

RUNNER: Turn 2
ORGANISM: Received sensor input: [0, 0, 0, 0, 1]
ORGANISM: Experience repo size: 133
ORGANISM: Committing to action: ACTION: [1, 0, 0, 0] (-> 7 outcomes)
Expected outcomes:
	OUTCOME: [0, 1, 0, 0, 0] (2% $0.00 = $0.00)
	OUTCOME: [1, 0, 1, 1, 1] (8% $1.00 = $0.08)
	OUTCOME: [0, 0, 0, 1, 1] (-12% $1.00 = $-0.13)
	OUTCOME: [1, 0, 0, 1, 0] (3% $0.00 = $0.00)
	OUTCOME: [0, 1, 1, 0, 1

In [3]:
game.io_vector_labels()

{'sensors': ['FORWARD', 'LEFT', 'RIGHT', 'BACK', 'VICTORY'],
 'actuators': ['GO', 'TURN LEFT', 'TURN RIGHT', 'TURN BACK']}

In [4]:
print('# learned vectors: {}'.format(len(organism.experience_repo.experiences)))
retval = []

for i in range(0,31):
    a = [int(x) for x in list('{:05b}'.format(i))]
    est = organism.outcome_likelihood_estimator.estimate( ipl.nnplanner.Experience([1, 0, 0, 1, 0], [1, 0, 0, 0],  a))
    retval.append( (a, int(est*100) ) )

retval.sort(key=lambda x: -x[1])
retval

# learned vectors: 48


[([1, 0, 0, 1, 0], 99),
 ([0, 0, 0, 0, 1], 97),
 ([0, 1, 0, 1, 0], 53),
 ([0, 1, 0, 1, 1], 35),
 ([0, 0, 0, 1, 0], 25),
 ([0, 0, 1, 1, 0], 20),
 ([0, 0, 1, 1, 1], 11),
 ([0, 1, 0, 0, 1], 9),
 ([1, 1, 1, 0, 0], 7),
 ([0, 0, 1, 0, 1], 4),
 ([1, 1, 1, 0, 1], 3),
 ([0, 1, 0, 0, 0], 2),
 ([1, 0, 1, 1, 1], 2),
 ([1, 1, 0, 1, 1], 2),
 ([0, 0, 0, 1, 1], 1),
 ([0, 1, 1, 1, 0], 0),
 ([1, 0, 0, 0, 1], 0),
 ([1, 1, 0, 1, 0], 0),
 ([0, 0, 0, 0, 0], -1),
 ([1, 1, 0, 0, 0], -1),
 ([1, 0, 1, 0, 0], -2),
 ([1, 1, 0, 0, 1], -2),
 ([0, 1, 1, 1, 1], -3),
 ([1, 0, 1, 1, 0], -4),
 ([1, 1, 1, 1, 0], -4),
 ([0, 1, 1, 0, 0], -5),
 ([1, 0, 0, 1, 1], -6),
 ([1, 0, 1, 0, 1], -6),
 ([0, 0, 1, 0, 0], -8),
 ([0, 1, 1, 0, 1], -9),
 ([1, 0, 0, 0, 0], -10)]

In [5]:
organism.verbosity = 0
for i in range(0, 10):
    print('Run #{}'.format(i))
    game = ipl.games.ElMazeGame(3,2)    

    organism.reset_state()
    for pa in action_program:
      organism.handle_sensor_input(game.sensors())
      oa = organism.choose_action(pa)
      game.act(oa.actuators)
    
    organism.handle_sensor_input(game.sensors())
    organism.maintenance()
    

Run #0
Run #1
Run #2
Run #3
Run #4
Run #5
Run #6
Run #7
Run #8
Run #9


In [6]:
es = list(organism.experience_repo.experiences)
len(es)

116