In [1]:
import numpy as np
from agent import *
from maze import *
from neural_network import *
from hrr import *

In [2]:
# Number of training cycles
episodes = 1000

# Hrr parameters
hrr_length = 1024
normalized = True

# How many steps to take before quiting
steps_till_quit = 100

# Arguments for maze
size_of_maze = 5
non_obs_task_switch_rate = 12
num_non_obs_tasks = 3
num_obs_tasks = 3
goals = [[0,2,1], [1,0,2],[2,1,0]]
# random goal setting
# goals = np.stack([np.random.choice(range(size_of_maze), num_non_obs_tasks, replace=False) for _ in range(num_obs_tasks)])

# Arguments for neural network
input_size = hrr_length
output_size = 1
bias = 1
discount = 0.9
alpha = 0.01

# Reward for temporal difference learning
reward_bad = 0
reward_good = 1

# Expolration rate
e_soft = 0.01

# Threshold for non observable task switching
threshold = -0.2

# Print frequency
p_freq = 100

# Eligibility trace
eligibility = [0] * hrr_length

# Eligibility trace rate
eli_lambda = 0.9

In [3]:
agent = agent()
maze = maze(size_of_maze, non_obs_task_switch_rate, num_non_obs_tasks, num_obs_tasks, goals)
nn = NeuralNetwork(input_size, output_size, bias, discount, alpha)
ltm = LTM("hrrs_1", hrr_length, normalized)

In [4]:
signals = ["red", "blue", "green"]

In [5]:
for state in range(size_of_maze):
    ltm.encode("state_" + str(state))
    for non_obs in range(num_non_obs_tasks):
        ltm.encode("non_obs_" + str(non_obs))
        for signal in signals:
            ltm.encode(signal)
            ltm.encode("state_" + str(state) + "+non_obs_" + str(non_obs) + "+" + signal)
            print("state_" + str(state) + "+non_obs_" + str(non_obs) + "+" + signal)

state_0+non_obs_0+red
state_0+non_obs_0+blue
state_0+non_obs_0+green
state_0+non_obs_1+red
state_0+non_obs_1+blue
state_0+non_obs_1+green
state_0+non_obs_2+red
state_0+non_obs_2+blue
state_0+non_obs_2+green
state_1+non_obs_0+red
state_1+non_obs_0+blue
state_1+non_obs_0+green
state_1+non_obs_1+red
state_1+non_obs_1+blue
state_1+non_obs_1+green
state_1+non_obs_2+red
state_1+non_obs_2+blue
state_1+non_obs_2+green
state_2+non_obs_0+red
state_2+non_obs_0+blue
state_2+non_obs_0+green
state_2+non_obs_1+red
state_2+non_obs_1+blue
state_2+non_obs_1+green
state_2+non_obs_2+red
state_2+non_obs_2+blue
state_2+non_obs_2+green
state_3+non_obs_0+red
state_3+non_obs_0+blue
state_3+non_obs_0+green
state_3+non_obs_1+red
state_3+non_obs_1+blue
state_3+non_obs_1+green
state_3+non_obs_2+red
state_3+non_obs_2+blue
state_3+non_obs_2+green
state_4+non_obs_0+red
state_4+non_obs_0+blue
state_4+non_obs_0+green
state_4+non_obs_1+red
state_4+non_obs_1+blue
state_4+non_obs_1+green
state_4+non_obs_2+red
state_4+non_

In [6]:
ltm.print()

<hrr.LTM object at 0x7f0d500bf978>
state_1+non_obs_0+blue [-0.0215362  -0.02156886 -0.0174406  ... -0.05685053  0.01029033
  0.08425531]
state_2+non_obs_0+green [-0.00693471 -0.00356956  0.07875096 ... -0.03770554  0.02417415
  0.06215579]
state_3+non_obs_0+red [-0.01072354  0.00847526 -0.0047046  ...  0.00330023 -0.03028382
  0.07770113]
state_4+non_obs_2+blue [-0.05620595  0.00642283 -0.04263474 ... -0.0547405   0.01682522
  0.03474663]
state_2+non_obs_0+blue [-0.04122582 -0.00619116  0.01495604 ... -0.05905387  0.00770521
  0.07546107]
state_3+non_obs_0+green [-0.00940261  0.02203953  0.03550946 ... -0.00177624 -0.00576735
  0.06450275]
state_3+non_obs_0+blue [-0.04369371  0.01941794 -0.02828546 ... -0.02312457 -0.02223629
  0.07780804]
state_0+non_obs_1+blue [-5.27708419e-02 -2.22876857e-02  2.75047778e-05 ... -4.93143333e-02
 -2.15792798e-02 -2.97082010e-02]
state_4+non_obs_0+blue [-0.03787172  0.00474404 -0.03033776 ... -0.07452283  0.04794531
  0.073822  ]
state_1 [ 0.01728509 -

In [7]:
ltm.clean()
ltm, agent, maze, nn

(<hrr.LTM at 0x7f0d500bf978>,
 <agent.agent at 0x7f0d500bf860>,
 <maze.maze at 0x7f0d500bf898>,
 <neural_network.NeuralNetwork at 0x7f0d500bf940>)