In [1]:
import numpy as np
from agent import *
from maze import *
from neural_network import *
from hrr import *

In [2]:
# Number of training cycles
episodes = 1000

# Hrr parameters
hrr_length = 1024
normalized = True

# How many steps to take before quiting
steps_till_quit = 1

# Maze parameters
size_of_maze = 5
non_obs_task_switch_rate = 12
num_non_obs_tasks = 3
num_obs_tasks = 3
goals = [[0,2,1], [1,0,2],[2,1,0]]
signals = ["red", "green", "blue"]
# random goal setting
# goals = np.stack([np.random.choice(range(size_of_maze), num_non_obs_tasks, replace=False) for _ in range(num_obs_tasks)])

# Arguments for neural network
input_size = hrr_length
output_size = 1
bias = 1
discount = 0.9
alpha = 0.01

# Reward for temporal difference learning
reward_bad = 0
reward_good = 1

# Expolration rate
e_soft = 0.01

# Threshold for non observable task switching
threshold = -0.2

# Print frequency
p_freq = 100

# Eligibility trace
eligibility = [0] * hrr_length

# Eligibility trace rate
eli_lambda = 0.9

atr = np.random.randint(0, num_non_obs_tasks)
wm = np.random.randint(0, num_obs_tasks)

In [3]:
agent = agent()
maze = maze(size_of_maze, non_obs_task_switch_rate, num_non_obs_tasks, num_obs_tasks, goals)
nn = NeuralNetwork(input_size, output_size, bias, discount, alpha, reward_good, reward_bad)
ltm = LTM("hrrs_1", hrr_length, normalized)

In [4]:
for state in range(size_of_maze):
    for non_obs in range(num_non_obs_tasks):
        for signal in signals:
            ltm.encode("state_" + str(state) + "*non_obs_" + str(non_obs) + "*" + str(signal))

non_obs_0*red
non_obs_0*state_0
red*state_0
non_obs_0*red*state_0
green*non_obs_0
green*state_0
non_obs_0*state_0
green*non_obs_0*state_0
blue*non_obs_0
blue*state_0
non_obs_0*state_0
blue*non_obs_0*state_0
non_obs_1*red
non_obs_1*state_0
red*state_0
non_obs_1*red*state_0
green*non_obs_1
green*state_0
non_obs_1*state_0
green*non_obs_1*state_0
blue*non_obs_1
blue*state_0
non_obs_1*state_0
blue*non_obs_1*state_0
non_obs_2*red
non_obs_2*state_0
red*state_0
non_obs_2*red*state_0
green*non_obs_2
green*state_0
non_obs_2*state_0
green*non_obs_2*state_0
blue*non_obs_2
blue*state_0
non_obs_2*state_0
blue*non_obs_2*state_0
non_obs_0*red
non_obs_0*state_1
red*state_1
non_obs_0*red*state_1
green*non_obs_0
green*state_1
non_obs_0*state_1
green*non_obs_0*state_1
blue*non_obs_0
blue*state_1
non_obs_0*state_1
blue*non_obs_0*state_1
non_obs_1*red
non_obs_1*state_1
red*state_1
non_obs_1*red*state_1
green*non_obs_1
green*state_1
non_obs_1*state_1
green*non_obs_1*state_1
blue*non_obs_1
blue*state_1
non_ob

In [5]:
for x in range(1):
    
    # Starting state
    current = random.randint(0, size_of_maze - 1)
    
    # Signal for the maze run
    signal = np.random.choice(signals)
    
    # Maze progresses
    non_obs, goal = maze.step_maze(signals.index(signal))
    for y in range(steps_till_quit):
        
        print(str(signal) + "*" + "non_obs_" + str(non_obs) + "*state_" + str(current))
        current_view = ltm.lookup(str(signal) + "*" + "non_obs_" + str(non_obs) + "*state_" + str(current))
        print(current_view)
        
        left, right = agent.get_moves(4, size_of_maze)
        
        move = agent.pick(left, right, atr, wm, nn)
    

blue*non_obs_0*state_4
[-0.00956401 -0.01685698 -0.01625829 ... -0.00229199 -0.01869434
 -0.0410843 ]


In [6]:
ltm.print()

<hrr.LTM object at 0x7f1e341ecb38>
non_obs_0*red*state_4 [ 7.51071004e-05  2.75871266e-03  5.10650721e-02 ... -2.32153622e-02
  1.79704537e-02 -3.27185259e-02]
blue*non_obs_1*state_4 [ 0.03905197 -0.03286212  0.04459123 ... -0.03156148  0.02470741
 -0.02922728]
blue*non_obs_2*state_4 [-0.06350512  0.01323179 -0.04528316 ... -0.00085292  0.01180637
  0.00401437]
green*state_3 [-0.04004424  0.00316993 -0.03713127 ... -0.02106244 -0.01686757
  0.0363668 ]
non_obs_2*red*state_4 [-0.04407889  0.02831904  0.00814255 ... -0.00633896  0.01195038
 -0.0279131 ]
red*state_2 [ 0.01170069  0.05035243 -0.0025673  ...  0.02453016  0.03788731
  0.01480834]
green*non_obs_0*state_1 [ 0.00064747 -0.00405346  0.00398968 ...  0.01079661  0.0226364
  0.05821929]
non_obs_2*state_1 [-0.03782605 -0.05692164 -0.05222756 ...  0.01009747  0.01460684
 -0.03197095]
blue*state_3 [ 0.03388999 -0.06989457  0.06230295 ...  0.01389296 -0.05868186
  0.03374484]
non_obs_1*state_0 [-0.00905972  0.03742995 -0.00755    ... -

In [7]:
ltm.clean()
del ltm, agent, maze, nn