In [3]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 

In [4]:
Random.seed!(123)

MersenneTwister(123)

**Environment Parameters**

In [5]:
# assume families normally distributed as integers around average family size in afghanistan of 8. 
# Truncated to include 1 or greater and rounded to be integers 
@with_kw struct EvacuationParameters
    # need the probability that a particular family shows up at the door....
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(8, 4.0, 1, Inf), 10)] 
    visa_status::Vector{Int} = [-5, -4, -3, -2, 0, 1, 2, 3, 4, 5] #TODO: map to various status 
    capacity::Int = 20
    time::Int = 60 
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.7 # don't we always transition into this since time moves forward? I'm confused... 
#    null_state::State = State(x, 0, x, x) == 0 [how would do this?]
end


EvacuationParameters

In [6]:
params = EvacuationParameters(); 

**States**

In [7]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [8]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# Question: Do I need the end state here? 
𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]

1-element Vector{Array{State, 4}}:
 [State(1, 1, 1, 1) State(1, 2, 1, 1) … State(1, 59, 1, 1) State(1, 60, 1, 1); State(2, 1, 1, 1) State(2, 2, 1, 1) … State(2, 59, 1, 1) State(2, 60, 1, 1); … ; State(19, 1, 1, 1) State(19, 2, 1, 1) … State(19, 59, 1, 1) State(19, 60, 1, 1); State(20, 1, 1, 1) State(20, 2, 1, 1) … State(20, 59, 1, 1) State(20, 60, 1, 1)]

[State(1, 1, 2, 1) State(1, 2, 2, 1) … State(1, 59, 2, 1) State(1, 60, 2, 1); State(2, 1, 2, 1) State(2, 2, 2, 1) … State(2, 59, 2, 1) State(2, 60, 2, 1); … ; State(19, 1, 2, 1) State(19, 2, 2, 1) … State(19, 59, 2, 1) State(19, 60, 2, 1); State(20, 1, 2, 1) State(20, 2, 2, 1) … State(20, 59, 2, 1) State(20, 60, 2, 1)]

[State(1, 1, 3, 1) State(1, 2, 3, 1) … State(1, 59, 3, 1) State(1, 60, 3, 1); State(2, 1, 3, 1) State(2, 2, 3, 1) … State(2, 59, 3, 1) State(2, 60, 3, 1); … ; State(19, 1, 3, 1) State(19, 2, 3, 1) … State(19, 59, 3, 1) State(19, 60, 3, 1); State(20, 1, 3, 1) State(20, 2, 3, 1) … State(20, 59, 3, 1) State(20, 60, 3, 1)]

In [9]:
#size(𝒮[1])
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 20*60*10*10

**Actions**

In [10]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [11]:
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

**Transition Function** 

In [18]:
# The dynamics to transition the agent live in the transition function T(s'|s, a). The transition function 
# I think our transition function is wrong...
# function T(s::State, a::Action, s_prime::State)
#     # if we accept the person, we want to decriment by family size 
#     if a == ACCEPT
#         s_prime.c = s.c - s.f
#     end 
#     s_prime.t = s_prime.t - 1 # how to update  time
#     # maybe make some terminal state when time = 0?
#     # do I return something here? 
# end

# think about how to fix the transition function 
# The transition function returns a distribution over next states  given the current state and an action .
function T(s::State, a::Action)
#     Nₐ = length(𝒜)
#     next_states = Vector{State}(undef, Nₐ + 1)
#     probabilities = zeros(Nₐ + 1)
#     p_transition = params.p_transition # what are our probabilities here????    
#     for (i, a_prime) in enumerate(𝒜)
#     # if we accept the person, we want to decriment by family size 
    if a == ACCEPT
        s_prime.c = s.c - s.f
    end 
    s_prime.t = s_prime.t - 1 # how to update  time
    # maybe make some terminal state when time = 0?
    # do I return something here? 
    return s_prime 
end
    

T (generic function with 1 method)

**Reward Function**

In [19]:
function R(s::State, a::Action)
    # reward is just the visa status times family size i think! 
    return s.v*s.f
end 

R (generic function with 1 method)

In [20]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
γ = 0.95

0.95

In [21]:
# when time is 0...possibly encode this into the parameters 
termination(s::State) = s.t == 0

termination (generic function with 1 method)

**MDP Formulation**

In [22]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [23]:
mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = 𝒮,
    isterminal   = termination,
    render       = render
    );

In [24]:
render(mdp)

LoadError: MethodError: no method matching render()
[0mClosest candidates are:
[0m  render([91m::Union{QuickMDP, QuickPOMDP}[39m, [91m::Any...[39m; kwargs...) at /Users/lisaeinstein/.julia/packages/QuickPOMDPs/mIT3P/src/quick.jl:211
[0m  render([91m::Union{MDP, POMDP}[39m) at /Users/lisaeinstein/.julia/packages/POMDPModelTools/SycBB/src/visualization.jl:50
[0m  render([91m::Union{MDP, POMDP}[39m, [91m::Any[39m) at /Users/lisaeinstein/.julia/packages/POMDPModelTools/SycBB/src/visualization.jl:30

In [25]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

**Policy**

In [26]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [27]:
policy = solve(solver, mdp)

INFO: POMDPLinter requirements for [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m and dependencies. ([✔] = implemented correctly; [X] = not implemented; [?] = could not determine)

For [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m:
[32m  [✔] discount(::QuickMDP{EvacuationArray{State, 4}ActionNamedTuple{(:stateindex, :isterminal, :render, :transition, :reward, :states, :actions, :discount, :initialstate, :actionindex), Tuple{Dict{Array{State, 4}, Int64}, typeof(termination), typeof(render), typeof(T), typeof(R), Vector{Array{State, 4}}, Vector{Action}, Float64, Vector{Array{State, 4}}, Dict{Action, Int64}}}})[39m
[32m  [✔] transition(::QuickMDP{EvacuationArray{State, 4}ActionNamedTuple{(:stateindex, :isterminal, :render, :transition, :reward, :states, :actions, :discount, :initialstate, :actionindex), Tuple{Dict{Array{State, 4}, Int64}, typeof(termination), typeof(render), typeof(T), typeof(R), Vector{Array{State, 4}}, Vector{Action}, Float64, Vector{Array{

LoadError: MethodError: no method matching T(::Array{State, 4}, ::Action)
[0mClosest candidates are:
[0m  T([91m::State[39m, ::Action) at In[18]:15

In [16]:
@requirements_info ValueIterationSolver() mdp()

LoadError: UndefVarError: mdp not defined

In [None]:
# a = action(policy, s) # returns the optimal action for state s
# value(policy, s) # returns the optimal value at state s
