In [1]:
import numpy as np
from agent import *
from maze import *
from neural_network import *
from hrr import *

In [2]:
# Number of training cycles
episodes = 1000

# Hrr parameters
hrr_length = 1024
normalized = True

# How many steps to take before quiting
steps_till_quit = 100

# Maze parameters
size_of_maze = 5
non_obs_task_switch_rate = 12
num_non_obs_tasks = 3
num_obs_tasks = 3
goals = [[0,2,1], [1,0,2],[2,1,0]]
signals = ["red", "green", "blue"]

# random goal setting
# goals = np.stack([np.random.choice(range(size_of_maze), num_non_obs_tasks, replace=False) for _ in range(num_obs_tasks)])

# Arguments for neural network
input_size = hrr_length
output_size = 1
bias = 1
discount = 0.9
alpha = 0.01

# Reward for temporal difference learning
reward_bad = 0
reward_good = 1

# Expolration rate
e_soft = 0.01

# Threshold for non observable task switching
threshold = -0.2

# Print frequency
p_freq = 100

# Eligibility trace
eligibility = [0] * hrr_length

# Eligibility trace rate
eli_lambda = 0.9

atr = np.random.randint(0, num_non_obs_tasks + 1)
wm = " "

# Neural network
weights = hrr(hrr_length, normalized)
bias = 0

In [3]:
agent = agent()
maze = maze(size_of_maze, non_obs_task_switch_rate, num_non_obs_tasks, num_obs_tasks, goals)
nn = NeuralNetwork(input_size, output_size, bias, discount, alpha, reward_good, reward_bad)
ltm = LTM("hrrs_1", hrr_length, normalized)

In [4]:
for non_obs in range(num_non_obs_tasks):
    for signal in [" "] + signals:
        for state in range(size_of_maze):
            ltm.encode("state_" + str(state) + "*non_obs_" + str(non_obs) + "*" + str(signal))

Making key from lookup: non_obs_0
Making key from lookup: state_0
 *non_obs_0
Making key from inner most loop:  *non_obs_0
 *state_0
Making key from inner most loop:  *state_0
non_obs_0*state_0
Making key from inner most loop: non_obs_0*state_0
 *non_obs_0*state_0
Making key from inner most loop:  *non_obs_0*state_0
Making key from lookup: state_1
 *non_obs_0
 *state_1
Making key from inner most loop:  *state_1
non_obs_0*state_1
Making key from inner most loop: non_obs_0*state_1
 *non_obs_0*state_1
Making key from inner most loop:  *non_obs_0*state_1
Making key from lookup: state_2
 *non_obs_0
 *state_2
Making key from inner most loop:  *state_2
non_obs_0*state_2
Making key from inner most loop: non_obs_0*state_2
 *non_obs_0*state_2
Making key from inner most loop:  *non_obs_0*state_2
Making key from lookup: state_3
 *non_obs_0
 *state_3
Making key from inner most loop:  *state_3
non_obs_0*state_3
Making key from inner most loop: non_obs_0*state_3
 *non_obs_0*state_3
Making key from in

In [5]:
def policy(moves, wms, non_obs, rand_on):
    
    # Random move
    if((np.random.random() < e_soft) and (rand_on == 1)):
        return np.random.choice(moves), wms[1], non_obs[1]
    
    for move in moves:
        for wm in wms:
            for non_ob in non_obs:
                pass
                #val = np.dot(weights, ltm.lookup("state_" + str(current) + "*non_obs_" + str(non_obs) + "*" + str(signal))) + bias
                
    s_move = left
    s_wm = wms[1]
    s_non_ob = non_obs[1]
    return s_move, s_wm, s_non_ob

In [6]:
for x in range(1):
    
    # Starting state
    current = random.randint(0, size_of_maze - 1)
    
    # Signal for the maze run
    signal = np.random.choice(signals)
    
    # Maze progresses
    non_obs, goal = maze.step_maze(signals.index(signal))
    for y in range(steps_till_quit):

        current_view = ltm.lookup(str(wm) + "*non_obs_" + str(non_obs) + "*state_" + str(current))
        
        left, right = agent.get_moves(4, size_of_maze)
        
        #move = agent.pick(left, right, atr, wm, nn)
        
        move, wm, atr = policy([left, right], [signal, wm], [0, atr], 1)

In [7]:
ltm.print()

<hrr.LTM object at 0x7f29711d5470>
non_obs_0*red*state_4 [ 0.06553133 -0.04445942 -0.02784322 ... -0.00988799  0.03999624
  0.04380749]
green*non_obs_2*state_3 [ 0.01400037  0.01065049 -0.00166471 ... -0.01753405 -0.07614447
 -0.01340314]
blue*non_obs_2*state_4 [ 0.01438846 -0.02253167 -0.01813056 ... -0.01965262 -0.01526461
  0.03227338]
green*state_3 [-0.01579056  0.00707823  0.02555411 ... -0.00974809  0.02578209
  0.01430613]
 *state_1 [ 0.02700621 -0.01202156 -0.06309992 ...  0.02513871  0.0807961
 -0.0383933 ]
non_obs_2*red*state_4 [-0.00950293  0.05032489 -0.01232675 ... -0.02721509 -0.04329873
  0.01364717]
red*state_2 [ 0.03523559 -0.0502053   0.02512832 ...  0.00810139 -0.00629769
  0.02668734]
green*non_obs_0*state_1 [-0.00934198  0.02128203 -0.03092576 ...  0.03537252 -0.01444776
  0.02679818]
 *state_3 [ 0.0170712   0.0208782   0.01266966 ... -0.01526375  0.04545512
  0.00914981]
blue*state_3 [ 0.02022688  0.03585006 -0.04414287 ... -0.01718465  0.02665665
 -0.00530813]
no

In [8]:
ltm.clean()
del ltm, agent, maze, nn