In [1]:
using POMDPs
using Distributions

In [2]:
#Define the state and the observed state
type GridWorldState 
    x::Int64 # x tip position
    y::Int64 # y tip position
    θ::Int64 # θ tip angle
    done::Bool # are we in a terminal state; if so later will set reward to 0
end

type ObservedState
    xobs::Int64 # x observed tip position
    yobs::Int64 # y observed tip position
    θobs::Int64 # θ observed tip angle
end

In [3]:
#Define the POMDP container
type GridWorld <: POMDP{GridWorldState, Symbol, ObservedState}  #action is "symbol".. left or right
    size_x::Int64 #grid size
    size_y::Int64 #grid size
    size_θ::Int64 #thetas possible.  8 buckets of angles
    reward_states::Vector{GridWorldState} # the states in which agent recieves reward
    reward_values::Vector{Float64} # reward values for those states
    penalty::Float64 # penalty for taking more steps to reach
    tprob::Float64 # probability of transitioning to the desired state
    discount_factor::Float64 # discount factor
end

In [4]:
#Constructor of gridworld with some default values
# initial state constructor
#Set target location at rs.
GridWorldState(x::Int64, y::Int64, θ::Int64) = GridWorldState(x,y,θ,false)
function GridWorld(;sx::Int64=10, # size_x
                    sy::Int64=10, # size_y
                    sθ::Int64=8, # size_theta
                    rs::Vector{GridWorldState}=[GridWorldState(4,3,1),GridWorldState(4,3,2),GridWorldState(4,3,3),GridWorldState(4,3,4),GridWorldState(4,3,5),GridWorldState(4,3,6),GridWorldState(4,3,7),GridWorldState(4,3,8)], # reward states.  x=4,y=3,theta any
                    rv::Vector{Float64}=rv = [100.,100,100,100,100,100,100,100], # reward value
                    pen::Float64=-1.0, # penalty for each move made
                    tp::Float64=0.7, ########## tprob depends on where you go to by transition distribution function which will be encoded later?
                    df::Float64=0.9) #discount factor
    return GridWorld(sx, sy, sθ, rs, rv, pen, tp, df)
end
# we can now create a GridWorld pomdp instance like this:
pomdp = GridWorld()
pomdp.reward_states # pomdp contains all the default values from the constructor

8-element Array{GridWorldState,1}:
 GridWorldState(4,3,1,false)
 GridWorldState(4,3,2,false)
 GridWorldState(4,3,3,false)
 GridWorldState(4,3,4,false)
 GridWorldState(4,3,5,false)
 GridWorldState(4,3,6,false)
 GridWorldState(4,3,7,false)
 GridWorldState(4,3,8,false)

In [5]:
#State Space
type StateSpace <: AbstractSpace
    states::Vector{GridWorldState}
end

#Alter the function states so that it "knows" the GridWorld datatype we defined
function POMDPs.states(pomdp::GridWorld)
    s = GridWorldState[] # initialize an array of GridWorldStates
    # loop over all our states, binary variable: done (d)
    for d = 0:1, y = 1:pomdp.size_y, x = 1:pomdp.size_x, θ = 1:pomdp.size_θ
        push!(s, GridWorldState(x,y,θ,d))
    end
    return StateSpace(s)
end

# How state space calls this function:
state_space = states(pomdp);
state_space.states[1] # remeber that our state space instance has an array called states in it

#Define iterator
function POMDPs.iterator(space::StateSpace)
    return space.states 
end

# Uniform sampling if have matrix of state space:
function POMDPs.rand(rng::AbstractRNG, space::StateSpace, s::GridWorldState)
    sp = space.states[rand(rng, 1:end)]
    copy!(s, sp)
    s
end;

In [6]:
# Action Space:
type ActionSpace <: AbstractSpace
    actions::Vector{Symbol}
end

# Function to return action space:
function POMDPs.actions(pomdp::GridWorld)
    acts = [:left, :right]
    return ActionSpace(acts)
end;
POMDPs.actions(pomdp::GridWorld, s::GridWorldState, as::ActionSpace=actions(pomdp)) = as;

# Iterator function for action space:
function POMDPs.iterator(space::ActionSpace)
    return space.actions 
end;

#Function to sample from action space:
function POMDPs.rand(rng::AbstractRNG, space::ActionSpace, a::Symbol)
    return space.actions[rand(rng, 1:end)]
end;

# Initialize action space and action space:
POMDPs.create_state(pomdp::GridWorld) = GridWorldState(1,1,0) # the 0 is an initial theta, x=1,y=1
POMDPs.create_action(pomdp::GridWorld) = :left;

In [7]:
# Observation Space:
type ObservationSpace <: AbstractSpace
    obs::Vector{GridWorldState} 
end

# function returning observation space
function POMDPs.observations(pomdp::GridWorld)
    obs= GridWorldState[]
    # loop over all states
    for y = 1:pomdp.size_y, x = 1:pomdp.size_x, θ = 1:pomdp.size_θ
        push!(s, GridWorldState(x,y,θ))
    end
    return ObservationSpace(obs);
end;
POMDPs.observations(::GridWorld, s::GridWorldState, obs::ObservationSpace) = obs;

# function returning an iterator over that space
function POMDPs.iterator(space::ObservationSpace)
    return space.observations
end

In [8]:
# Transition Distribution:
type TransitionDistribution <: AbstractDistribution
    neighbors::Array{GridWorldState} # the states s' in the distribution
    probs::Array{Float64} # the probability corresponding to each state s'
    cat::Categorical # this comes from Distributions.jl and is used for sampling
end

function POMDPs.create_transition_distribution(pomdp::GridWorld)
    # can have at most 9 neighbors in grid world
    neighbors =  [GridWorldState(i,i) for i = 1:9] #initializing. these will get overwritten by action and tprob=.7
    probabilities = zeros(9) + 1.0/9.0
    cat = Categorical(9)
    return TransitionDistribution(neighbors, probabilities, cat)
end;

#Iterator for distributions for d:
function POMDPs.iterator(d::TransitionDistribution)
    return d.neighbors
end;

#Prob. density function:
function POMDPs.pdf(d::TransitionDistribution, s::GridWorldState)
    for (i, sp) in enumerate(d.neighbors)
        if s == sp
            return d.probs[i]
        end
    end   
    return 0.0
end;

#Iterator for PDF:
function POMDPs.rand(rng::AbstractRNG, d::TransitionDistribution, s::GridWorldState)
    d.cat = Categorical(d.probs) # init the categorical distribution
    ns = d.neighbors[rand(d.cat)] # sample a neighbor state according to the distribution c
    copy!(s, ns)
    return s # return the pointer to s
end;

In [9]:
#Observation distribution:
type ObservationDistribution <: AbstractDistribution
    neighbors::Array{GridWorldState} # the states s' in the distribution
    probs::Array{Float64} # the probability corresponding to each state s'
    cat::Categorical # this comes from Distributions.jl and is used for sampling
end

function POMDPs.create_observation_distribution(pomdp::GridWorld)
    # can have at most 9 neighbors in grid world
    neighbors =  [GridWorldState(i,i) for i = 1:9] #initializing. these will get overwritten by action and tprob=.7
    probabilities = zeros(9) + 1.0/9.0
    cat = Categorical(9)
    return ObservationDistribution(neighbors, probabilities, cat)
end;

#Iterator for distributions for d:
function POMDPs.iterator(d::ObservationDistribution)
    return d.neighbors
end;

#Prob. density function:
function POMDPs.pdf(d::ObservationDistribution, s::GridWorldState)
    for (i, sp) in enumerate(d.neighbors)
        if s == sp
            return d.probs[i]
        end
    end   
    return 0.0
end;

#Iterator for PDF:
function POMDPs.rand(rng::AbstractRNG, d::ObservationDistribution, s::GridWorldState)
    d.cat = Categorical(d.probs) # init the categorical distribution
    ns = d.neighbors[rand(d.cat)] # sample a neighbor state according to the distribution c
    copy!(s, ns)
    return s # return the pointer to s
end;

In [None]:
# Transition Model:


In [None]:
# Reward Model:


In [None]:
# Observation Model:
