In [16]:
maze_definition = "
#######
#T    #
#     #
#     #
#     #
#    T#
#######
"

state_reward_function = ['T' => 0.0, '#' => -100]

Dict{Char,Any} with 2 entries:
  '#' => -100
  'T' => 0.0

# Convert definition to maze representation

In [17]:
function convert_string_to_string_array(s::String)
    index = 1
    while s[index] == ' ' || s[index] == '\n'
        index = index + 1
    end
    maze = nothing
    row = []
    for i=index:length(s)
        if s[i] == '\n'
            if maze == nothing
                maze = row
            else
                maze = hcat(maze, row)
            end
            row = []
        else
            row = [row, s[i]]
        end
    end
    return maze
end

actions = [:north, :east, :south, :west, :none]

maze   = convert_string_to_string_array(maze_definition)
V      = zeros(size(maze))
policy = reshape([actions[int64(ceil(rand() * (length(actions)-1)))] for i = 1:length(maze)], (7,7))
for i=1:length(maze)
    if maze[i] in ['#', 'T']
        V[i] = state_reward_function[maze[i]]
        policy[i] = :none
    end
end
V

7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0     0.0     0.0     0.0     0.0  -100.0
 -100.0     0.0     0.0     0.0     0.0     0.0  -100.0
 -100.0     0.0     0.0     0.0     0.0     0.0  -100.0
 -100.0     0.0     0.0     0.0     0.0     0.0  -100.0
 -100.0     0.0     0.0     0.0     0.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0

In [18]:
function update_greedy_policy!(maze::Matrix{Char}, policy::Matrix{Any}, V::Matrix{Float64})
    for i = 2:size(maze)[1]-1
        for j = 2:size(maze)[2]-1
            if maze[i,j] == ' '
                possible_actions = []
                action_values = []
                if maze[i-1,j] != '#'
                    possible_actions = [possible_actions, :north]
                    action_values    = [action_values, V[i-1, j]]
                end
                if maze[i+1,j] != '#'
                    possible_actions = [possible_actions, :south]
                    action_values    = [action_values, V[i+1, j]]
                end

                if maze[i,j-1] != '#'
                    possible_actions = [possible_actions, :west]
                    action_values    = [action_values, V[i, j-1]]
                end
                if maze[i,j+1] != '#'
                    possible_actions = [possible_actions, :east]
                    action_values    = [action_values, V[i, j+1]]
                end
                best_action_value = maximum(action_values)
                possible_actions = possible_actions[findin(action_values,best_action_value)]
                action = possible_actions[int(ceil(rand() * length(possible_actions)))]
                policy[i,j] = action
            end
        end
    end
end

update_greedy_policy! (generic function with 1 method)

In [19]:
function update_state_value_function(maze::Matrix{Char}, policy::Matrix{Any}, V::Matrix{Float64})
    new_values = zeros(size(V))
    delta = 100.0
    while delta > 1.0
        for i=2:size(V)[1] - 1
            for j = 2:size(V)[2] - 1
                if policy[i,j] == :north
                    new_values[i,j] = V[i-1,j] - 1.0 # -1.0 for transition
                elseif policy[i,j] == :south
                    new_values[i,j] = V[i+1,j] - 1.0 # -1.0 for transition
                elseif policy[i,j] == :east
                    new_values[i,j] = V[i,j+1] - 1.0 # -1.0 for transition
                elseif policy[i,j] == :west
                    new_values[i,j] = V[i,j-1] - 1.0 # -1.0 for transition
                else
                    new_values[i,j] = V[i,j] # terminal states
                end
            end
        end
        delta = maximum(abs(new_values[2:end-1,2:end-1] .- V[2:end-1,2:end-1]))
        for i=2:size(V)[1] - 1
            for j=2:size(V)[2] - 1
                V[i,j] = new_values[i,j]
            end
        end
        println(delta)
    end
end

update_state_value_function (generic function with 1 method)

In [20]:
update_greedy_policy!(maze, policy, V)
policy

7x7 Array{Any,2}:
 :none  :none   :none   :none   :none   :none   :none
 :none  :none   :south  :west   :south  :south  :none
 :none  :south  :north  :north  :west   :west   :none
 :none  :north  :south  :west   :south  :south  :none
 :none  :north  :south  :north  :west   :south  :none
 :none  :east   :east   :north  :north  :none   :none
 :none  :none   :none   :none   :none   :none   :none

In [21]:
update_state_value_function(maze, policy, V)
V

1.0


7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0    -1.0    -1.0    -1.0    -1.0  -100.0
 -100.0    -1.0    -1.0    -1.0    -1.0    -1.0  -100.0
 -100.0    -1.0    -1.0    -1.0    -1.0    -1.0  -100.0
 -100.0    -1.0    -1.0    -1.0    -1.0    -1.0  -100.0
 -100.0    -1.0    -1.0    -1.0    -1.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0

In [22]:
update_greedy_policy!(maze, policy, V)
policy

7x7 Array{Any,2}:
 :none  :none   :none   :none   :none   :none   :none
 :none  :none   :west   :west   :east   :west   :none
 :none  :north  :east   :east   :east   :south  :none
 :none  :east   :west   :north  :east   :north  :none
 :none  :north  :south  :west   :south  :south  :none
 :none  :north  :east   :north  :east   :none   :none
 :none  :none   :none   :none   :none   :none   :none

In [23]:
update_state_value_function(maze, policy, V)
V

1.0


7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0    -1.0    -2.0    -2.0    -2.0  -100.0
 -100.0    -1.0    -2.0    -2.0    -2.0    -2.0  -100.0
 -100.0    -2.0    -2.0    -2.0    -2.0    -2.0  -100.0
 -100.0    -2.0    -2.0    -2.0    -2.0    -1.0  -100.0
 -100.0    -2.0    -2.0    -2.0    -1.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0

In [24]:
update_greedy_policy!(maze, policy, V)
policy

7x7 Array{Any,2}:
 :none  :none   :none   :none   :none  :none   :none
 :none  :none   :west   :west   :east  :west   :none
 :none  :north  :west   :north  :east  :west   :none
 :none  :north  :south  :south  :west  :south  :none
 :none  :north  :west   :west   :east  :south  :none
 :none  :east   :west   :east   :east  :none   :none
 :none  :none   :none   :none   :none  :none   :none

In [25]:
update_state_value_function(maze, policy, V)
V

1.0


7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0    -1.0    -2.0    -3.0    -3.0  -100.0
 -100.0    -1.0    -2.0    -3.0    -3.0    -3.0  -100.0
 -100.0    -2.0    -3.0    -3.0    -3.0    -2.0  -100.0
 -100.0    -3.0    -3.0    -3.0    -2.0    -1.0  -100.0
 -100.0    -3.0    -3.0    -2.0    -1.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0

In [26]:
update_greedy_policy!(maze, policy, V)
policy

7x7 Array{Any,2}:
 :none  :none   :none  :none   :none   :none   :none
 :none  :none   :west  :west   :west   :south  :none
 :none  :north  :west  :north  :south  :south  :none
 :none  :north  :west  :north  :east   :south  :none
 :none  :north  :west  :south  :east   :south  :none
 :none  :east   :east  :east   :east   :none   :none
 :none  :none   :none  :none   :none   :none   :none

In [27]:
update_state_value_function(maze, policy, V)
V

1.0


7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0    -1.0    -2.0    -3.0    -4.0  -100.0
 -100.0    -1.0    -2.0    -3.0    -4.0    -3.0  -100.0
 -100.0    -2.0    -3.0    -4.0    -3.0    -2.0  -100.0
 -100.0    -3.0    -4.0    -3.0    -2.0    -1.0  -100.0
 -100.0    -4.0    -3.0    -2.0    -1.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0

In [28]:
update_greedy_policy!(maze, policy, V)
policy

7x7 Array{Any,2}:
 :none  :none   :none   :none   :none   :none   :none
 :none  :none   :west   :west   :west   :south  :none
 :none  :north  :west   :north  :south  :south  :none
 :none  :north  :west   :south  :south  :south  :none
 :none  :north  :north  :east   :south  :south  :none
 :none  :east   :east   :east   :east   :none   :none
 :none  :none   :none   :none   :none   :none   :none

In [29]:
update_state_value_function(maze, policy, V)
V

0.0


7x7 Array{Float64,2}:
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0
 -100.0     0.0    -1.0    -2.0    -3.0    -4.0  -100.0
 -100.0    -1.0    -2.0    -3.0    -4.0    -3.0  -100.0
 -100.0    -2.0    -3.0    -4.0    -3.0    -2.0  -100.0
 -100.0    -3.0    -4.0    -3.0    -2.0    -1.0  -100.0
 -100.0    -4.0    -3.0    -2.0    -1.0     0.0  -100.0
 -100.0  -100.0  -100.0  -100.0  -100.0  -100.0  -100.0