In [1]:
using POMDPs
using Distributions

In [2]:
#Define the state and the observed state
type GridWorldState 
    x::Int64 # x tip position
    y::Int64 # y tip position
    θ::Int64 # θ tip angle
    done::Bool # are we in a terminal state; if so later will set reward to 0
    
    ######### NEED TO SET BUMPED AS AN ADDITIONAL STATE TO KEEP IN BOUND IN TRANSITION MODEL
end

type ObservedState
    xobs::Int64 # x observed tip position
    yobs::Int64 # y observed tip position
    θobs::Int64 # θ observed tip angle
end

In [3]:
#Define the POMDP container
type GridWorld <: POMDP{GridWorldState, Symbol, ObservedState}  #action is "symbol".. left or right
    size_x::Int64 #grid size
    size_y::Int64 #grid size
    size_θ::Int64 #thetas possible.  8 buckets of angles
    reward_states::Vector{GridWorldState} # the states in which agent recieves reward
    reward_values::Vector{Float64} # reward values for those states
    penalty::Float64 # penalty for taking more steps to reach
    tprob::Float64 # probability of transitioning to the desired state
    discount_factor::Float64 # discount factor
    scan_correctly::Float64 #Prob of scanning the tip correctly
end

In [4]:
#Constructor of gridworld with some default values
# initial state constructor
#Set target location at rs.
GridWorldState(x::Int64, y::Int64, θ::Int64) = GridWorldState(x,y,θ,false)
function GridWorld(;sx::Int64=10, # size_x
                    sy::Int64=10, # size_y
                    sθ::Int64=8, # size_theta
                    rs::Vector{GridWorldState}=[GridWorldState(4,3,1),GridWorldState(4,3,2),GridWorldState(4,3,3),GridWorldState(4,3,4),GridWorldState(4,3,5),GridWorldState(4,3,6),GridWorldState(4,3,7),GridWorldState(4,3,8)], # reward states.  x=4,y=3,theta any
                    rv::Vector{Float64}=rv = [100.,100,100,100,100,100,100,100], # reward value
                    pen::Float64=-1.0, # penalty for each move made
                    tp::Float64=0.7, ########## tprob depends on where you go to by transition distribution function which will be encoded later?
                    df::Float64=0.9, #discount factor
                    sc::Float64=0.6)  #prob of scan correctly  
    return GridWorld(sx, sy, sθ, rs, rv, pen, tp, df,sc)
end
# we can now create a GridWorld pomdp instance like this:
pomdp = GridWorld()
pomdp.reward_states # pomdp contains all the default values from the constructor

8-element Array{GridWorldState,1}:
 GridWorldState(4,3,1,false)
 GridWorldState(4,3,2,false)
 GridWorldState(4,3,3,false)
 GridWorldState(4,3,4,false)
 GridWorldState(4,3,5,false)
 GridWorldState(4,3,6,false)
 GridWorldState(4,3,7,false)
 GridWorldState(4,3,8,false)

In [5]:
#State Space
type StateSpace <: AbstractSpace
    states::Vector{GridWorldState}
end

#Alter the function states so that it "knows" the GridWorld datatype we defined
function POMDPs.states(pomdp::GridWorld)
    s = GridWorldState[] # initialize an array of GridWorldStates
    # loop over all our states, binary variable: done (d)
    for d = 0:1, y = 1:pomdp.size_y, x = 1:pomdp.size_x, θ = 1:pomdp.size_θ
        push!(s, GridWorldState(x,y,θ,d))
    end
    return StateSpace(s)
end

# How state space calls this function:
state_space = states(pomdp);
state_space.states[1] # remeber that our state space instance has an array called states in it

#Define iterator
function POMDPs.iterator(space::StateSpace)
    return space.states 
end

# Uniform sampling if have matrix of state space:
function POMDPs.rand(rng::AbstractRNG, space::StateSpace, s::GridWorldState)
    sp = space.states[rand(rng, 1:end)]
    copy!(s, sp)
    s
end;

In [6]:
# Action Space:
type ActionSpace <: AbstractSpace
    actions::Vector{Symbol}
end

# Function to return action space:
function POMDPs.actions(pomdp::GridWorld)
    acts = [:left, :right]
    return ActionSpace(acts)
end;
POMDPs.actions(pomdp::GridWorld, s::GridWorldState, as::ActionSpace=actions(pomdp)) = as;

# Iterator function for action space:
function POMDPs.iterator(space::ActionSpace)
    return space.actions 
end;

#Function to sample from action space:
function POMDPs.rand(rng::AbstractRNG, space::ActionSpace, a::Symbol)
    return space.actions[rand(rng, 1:end)]
end;

# Initialize action space and action space:
POMDPs.create_state(pomdp::GridWorld) = GridWorldState(1,1,0) # the 0 is an initial theta, x=1,y=1
POMDPs.create_action(pomdp::GridWorld) = :left;

In [7]:
# Observation Space:
type ObservationSpace <: AbstractSpace
    obs::Vector{GridWorldState} 
end

# function returning observation space
function POMDPs.observations(pomdp::GridWorld)
    obs= GridWorldState[]
    # loop over all states
    for y = 1:pomdp.size_y, x = 1:pomdp.size_x, θ = 1:pomdp.size_θ
        push!(s, GridWorldState(x,y,θ))
    end
    return ObservationSpace(obs);
end;
POMDPs.observations(::GridWorld, s::GridWorldState, obs::ObservationSpace) = obs;

# function returning an iterator over that space
function POMDPs.iterator(space::ObservationSpace)
    return space.observations
end

In [8]:
# Transition Distribution: (setup)
type TransitionDistribution <: AbstractDistribution
    neighbors::Array{GridWorldState} # the states s' in the distribution
    probs::Array{Float64} # the probability corresponding to each state s'
    cat::Categorical # this comes from Distributions.jl and is used for sampling
end

function POMDPs.create_transition_distribution(pomdp::GridWorld)
    # can have at most 9 neighbors in grid world
    neighbors =  [GridWorldState(i,i) for i = 1:9] #initializing. these will get overwritten by action and tprob=.7
    probabilities = zeros(9) + 1.0/9.0
    cat = Categorical(9)
    return TransitionDistribution(neighbors, probabilities, cat)
end;

#Iterator for distributions for d:
function POMDPs.iterator(d::TransitionDistribution)
    return d.neighbors
end;

#Prob. density function:
function POMDPs.pdf(d::TransitionDistribution, s::GridWorldState)
    for (i, sp) in enumerate(d.neighbors)
        #if s == sp  #HERE USE THE POSEQUAL FUNCTION TO COMPARE ALL ELEMENTS OF STATE RATHER THAN S==SP
            return d.probs[i]
        end
    end   
    return 0.0
end;

#Iterator for PDF:
function POMDPs.rand(rng::AbstractRNG, d::TransitionDistribution, s::GridWorldState)
    d.cat = Categorical(d.probs) # init the categorical distribution
    ns = d.neighbors[rand(d.cat)] # sample a neighbor state according to the distribution c
    copy!(s, ns)
    return s # return the pointer to s
end;

LoadError: LoadError: syntax: unexpected end
while loading In[8], in expression starting on line 29

In [9]:
#Observation distribution: (setup)
type ObservationDistribution <: AbstractDistribution
    neighbors::Array{GridWorldState} # the states s' in the distribution
    probs::Array{Float64} # the probability corresponding to each state s'
    cat::Categorical # this comes from Distributions.jl and is used for sampling
end

function POMDPs.create_observation_distribution(pomdp::GridWorld)
    # can have at most 9 neighbors in grid world
    neighbors =  [GridWorldState(i,i) for i = 1:9] #initializing. these will get overwritten by action and tprob=.7
    probabilities = zeros(9) + 1.0/9.0
    cat = Categorical(9)
    return ObservationDistribution(neighbors, probabilities, cat)
end;

#Iterator for distributions for d:
function POMDPs.iterator(d::ObservationDistribution)
    return d.neighbors
end;

#Prob. density function:
function POMDPs.pdf(d::ObservationDistribution, s::GridWorldState)
    for (i, sp) in enumerate(d.neighbors)
        if s == sp
            return d.probs[i]
        end
    end   
    return 0.0
end;

#Iterator for PDF:
function POMDPs.rand(rng::AbstractRNG, d::ObservationDistribution, s::GridWorldState)
    d.cat = Categorical(d.probs) # init the categorical distribution
    ns = d.neighbors[rand(d.cat)] # sample a neighbor state according to the distribution c
    copy!(s, ns)
    return s # return the pointer to s
end;

In [10]:
# # Transition Model:
# #at state s
# #for th1, only N5,N4,N6 allowed with N5 most likely.
# #For th2, only N6, N7, N5 allowed with N6 most likely.
# #Depends on current state or (action) Left or right which is the most likely

# # transition helpers
# function inbounds(pomdp::GridWorld,x::Int64,y::Int64)
#     if 1 <= x <= pomdp.size_x && 1 <= y <= pomdp.size_y
#         return true
#     else
#         return false
#     end
# end

# function inbounds(pomdp::GridWorld,state::GridWorldState)
#     x = state.x #point x of state
#     y = state.y
#     return inbounds(pomdp, x, y)
# end

# function fill_probability!(p::Vector{Float64}, val::Float64, index::Int64)
#     for i = 1:length(p)
#         if i == index
#             p[i] = val
#         else
#             p[i] = 0.0
#         end
#     end
# end;


# function POMDPs.transition(pomdp::GridWorld,
#                            state::GridWorldState,
#                            action::Symbol,
#                            d::GridWorldDistribution=create_transition_distribution(pomdp))
#     # re-assign names so we don't have to call actiond.direction every time etc
#     a = action
#     x = state.x
#     y = state.y
#     θ = state.θ
    
#     neighbors = d.neighbors
#     probability = d.probs
    
#     # let's handle the done case first
#     if state.done
#         # can only transition to the same done state
#         fill!(probability, 0.0)
#         probability[1] = 1.0
#         copy!(neighbors[1], state)
#         # when we sample d, we will only get the state in neighbors[1] - our done state
#         return d
#     end
    
#     fill!(probability, 0.0)    # probability of transition is 0 unless neighbor in one of the specific angle diections
#     probability[9] = 0.0

#     neighbors[1].x = x; neighbors[1].y = y-1
#     neighbors[2].x = x+1; neighbors[2].y = y-1
#     neighbors[3].x = x+1; neighbors[3].y = y
#     neighbors[4].x = x+1; neighbors[4].y = y+1
#     neighbors[5].x = x; neighbors[5].y = y+1
#     neighbors[6].x = x-1; neighbors[6].y = y+1
#     neighbors[7].x = x-1; neighbors[7].y = y
#     neighbors[8].x = x-1; neighbors[8].y = y-1
#     neighbors[9].x = x; neighbors[9].y = y
    
#     for i = 1:9 neighbors[i].bumped = false end
#     for i = 1:9 neighbors[i].done = false end
#     reward_states = pomdp.reward_states
#     reward_values = pomdp.reward_values
#     n = length(reward_states)
#     for i = 1:n
#         #if state == reward_states[i] && reward_values[i] > 0.0
#         if posequal(state, reward_states[i]) && reward_values[i] > 0.0
#             fill_probability!(probability, 1.0, 9)
#             neighbors[9].done = true
#             return d
#         end
#     end
#     if θ=1
#         if a == :right
        
#             if !inbounds(pomdp, neighbors[4])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[4] = 0.8
#             end
#             if !inbounds(pomdp, neighbors[5])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[4] = 0.15
#             end
#             if !inbounds(pomdp, neighbors[6])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[6] = 0.05
#             end
#         end
        
#         if a == :left
#             if !inbounds(pomdp, neighbors[4])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[4] = 0.05
#             end
#             if !inbounds(pomdp, neighbors[5])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[4] = 0.15
#             end
#             if !inbounds(pomdp, neighbors[6])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[6] = 0.8
#             end
#         end
#     end
    
#     if θ=2
#         if a == :right
        
#             if !inbounds(pomdp, neighbors[5])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[5] = 0.8
#             end
#             if !inbounds(pomdp, neighbors[6])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[6] = 0.15
#             end
#             if !inbounds(pomdp, neighbors[7])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[7] = 0.05
#             end
#         end
        
#         if a == :left
#             if !inbounds(pomdp, neighbors[5])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[5] = 0.05
#             end
#             if !inbounds(pomdp, neighbors[6])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[6] = 0.15
#             end
#             if !inbounds(pomdp, neighbors[7])       #If not in bounds set wall bump to true.
#                 fill_probability!(probability, 1.0, 9)
#                 neighbors[9].bumped = true
#             else
#                 probability[7] = 0.8
#             end
#         end
#     end
#     #### THIS ABOVE SHOULD BE REPEATED FOR CORRESPONDING VALUES IN THETA 3-8

    
    
    
#     ##THE following is used to null probability of states that arnet neighbors, and raise the other ones to compensate
#     ##This is really messy for our problem since the other probabilities arent constant.  Maybe instead if
#     ##a neighbor is out of bounds simulation just ends.  In this case can delete all the if inbounds above and not use
#     ##bumped as a state.
#     ## So at the beginning of the transition fucntion say if !inbounds, ENDSIM= TRUE
    
# #     count = 0
# #     new_probability = 0.1

# #     for i = 1:length(neighbors)
# #         if !inbounds(pomdp, neighbors[i])
# #          count += 1
# #             probability[i] = 0.0
# #          end
# #      end

# #     if count == 1
# #         new_probability = 0.15
# #     elseif count == 2
# #         new_probability = 0.3
# #     end

# #     if count > 0
# #         for i = 1:length(neighbors)
# #             if probability[i] == 0.1
# #                probability[i] = new_probability
# #             end
# #         end
# #     end
# #     d
# end;

In [11]:
type GridWorldObservationDistribution <: AbstractDistribution
    neighbor::Array{GridWorldState} # the states s' in the distribution
    prob::Array{Float64} # the probability corresponding to each state s'
    cata::Categorical # this comes from Distributions.jl and is used for sampling
end

function POMDPs.create_observation_distribution(pomdp::GridWorld)
    neighbor[1].x = x; neighbor[1].y = y; neighbor[1].θ = θ
    neighbor[2].x = x; neighbor[2].y = y; neighbor[2].θ = θ+1
    neighbor[3].x = x; neighbor[3].y = y; neighbor[3].θ = θ-1
    
    neighbor[4].x = x+1; neighbor[4].y = y; neighbor[4].θ = θ
    neighbor[5].x = x+1; neighbor[5].y = y; neighbor[5].θ = θ+1
    neighbor[6].x = x+1; neighbor[6].y = y; neighbor[6].θ = θ-1
    
    neighbor[7].x = x; neighbor[7].y = y+1; neighbor[7].θ = θ
    neighbor[8].x = x; neighbor[8].y = y+1; neighbor[8].θ = θ+1
    neighbor[9].x = x; neighbor[9].y = y+1; neighbor[9].θ = θ-1
    
    neighbor[10].x = x+1; neighbor[10].y = y+1; neighbor[10].θ = θ
    neighbor[11].x = x+1; neighbor[11].y = y+1; neighbor[11].θ = θ+1
    neighbor[12].x = x+1; neighbor[12].y = y+1; neighbor[12].θ = θ-1

    neighbor[13].x = x-1; neighbor[13].y = y; neighbor[13].θ = θ
    neighbor[14].x = x-1; neighbor[14].y = y; neighbor[14].θ = θ+1
    neighbor[15].x = x-1; neighbor[15].y = y; neighbor[15].θ = θ-1
    
    neighbor[16].x = x; neighbor[16].y = y-1; neighbor[16].θ = θ
    neighbor[17].x = x; neighbor[17].y = y-1; neighbor[17].θ = θ+1
    neighbor[18].x = x; neighbor[18].y = y-1; neighbor[18].θ = θ-1
    
    neighbor[19].x = x-1; neighbor[19].y = y-1; neighbor[19].θ = θ
    neighbor[20].x = x-1; neighbor[20].y = y-1; neighbor[20].θ = θ+1
    neighbor[21].x = x-1; neighbor[21].y = y-1; neighbor[21].θ = θ-1
    
    neighbor[22].x = x-1; neighbor[22].y = y+1; neighbor[22].θ = θ
    neighbor[23].x = x-1; neighbor[23].y = y+1; neighbor[23].θ = θ+1
    neighbor[24].x = x-1; neighbor[24].y = y+1; neighbor[24].θ = θ-1
    
    neighbor[25].x = x+1; neighbor[25].y = y-1; neighbor[25].θ = θ
    neighbor[26].x = x+1; neighbor[26].y = y-1; neighbor[26].θ = θ+1
    neighbor[27].x = x+1; neighbor[27].y = y-1; neighbor[27].θ = θ-1
    
    probabilities=[0.36,0.12,0.12,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01,0.03,0.01,0.01];
    cata = Categorical(27)
    return GridWorldObservationDistribution(neighbor, probabilities, cata)
end;

function POMDPs.iterator(d::GridWorldObservationDistribution)
    return d.neighbor
end;

posequal(s1::GridWorldState, s2::GridWorldState) = s1.x == s2.x && s1.y == s2.y && s1.θ ==  s2.θ

function POMDPs.pdf(d::GridWorldObservationDistribution, s::GridWorldState)
    for (i, sobs) in enumerate(d.neighbor)
        if s == sobs
            return d.prob[i]
        end
    end   
    return 0.0
end;

function POMDPs.rand(rng::AbstractRNG, d::GridWorldObservationDistribution, s::GridWorldState)
    d.cata = Categorical(d.prob) # init the categorical distribution
    ns = d.neighbor[rand(d.cata)] # sample a neighbor state according to the distribution c
    copy!(s, ns)
    return s # return the pointer to s
end;




