In [1]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools #for sparse cat 
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 

In [2]:
Random.seed!(123)

MersenneTwister(123)

**Environment Parameters**

In [3]:
# moving below becasue need state defined...

**States**

In [4]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [5]:
# assume families normally distributed as integers around average family size in afghanistan of 8. 
# Truncated to include 1 or greater and rounded to be integers 
@with_kw struct EvacuationParameters
    # need the probability that a particular family shows up at the door....
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(3, 1, 1, Inf), 10)] 
    visa_status::Vector{Int} = [-4, -3, -2, 0, 1, 2, 3, 4] #TODO: map to various status 
    capacity::Int = 20
    time::Int = 60
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.8 # don't we always transition into this since time moves forward? I'm confused... 
    # null_state::State = (-1, -1, Anything, Anything) # is there someway to do this???
end


EvacuationParameters

In [6]:
params = EvacuationParameters(); 

In [8]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# Question: Do I need the end state here? 
#𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]
𝒮 = []

for c in 0:params.capacity # capacity ends at 0 
    for t in 0:params.time # time ends at 0 
        for f in 1:params.size[2] # number of families 
            for v in 1:params.size[1] # number of visa statuses 
                new = State(c, t, f, v) 
                𝒮 = [𝒮; new]
            end
        end        
    end
end


In [9]:
@show 𝒮

𝒮 = Any[State(0, 0, 1, 1), State(0, 0, 1, 2), State(0, 0, 1, 3), State(0, 0, 1, 4), State(0, 0, 1, 5), State(0, 0, 1, 6), State(0, 0, 1, 7), State(0, 0, 1, 8), State(0, 0, 2, 1), State(0, 0, 2, 2), State(0, 0, 2, 3), State(0, 0, 2, 4), State(0, 0, 2, 5), State(0, 0, 2, 6), State(0, 0, 2, 7), State(0, 0, 2, 8), State(0, 0, 3, 1), State(0, 0, 3, 2), State(0, 0, 3, 3), State(0, 0, 3, 4), State(0, 0, 3, 5), State(0, 0, 3, 6), State(0, 0, 3, 7), State(0, 0, 3, 8), State(0, 0, 4, 1), State(0, 0, 4, 2), State(0, 0, 4, 3), State(0, 0, 4, 4), State(0, 0, 4, 5), State(0, 0, 4, 6), State(0, 0, 4, 7), State(0, 0, 4, 8), State(0, 0, 5, 1), State(0, 0, 5, 2), State(0, 0, 5, 3), State(0, 0, 5, 4), State(0, 0, 5, 5), State(0, 0, 5, 6), State(0, 0, 5, 7), State(0, 0, 5, 8), State(0, 0, 6, 1), State(0, 0, 6, 2), State(0, 0, 6, 3), State(0, 0, 6, 4), State(0, 0, 6, 5), State(0, 0, 6, 6), State(0, 0, 6, 7), State(0, 0, 6, 8), State(0, 0, 7, 1), State(0, 0, 7, 2), State(0, 0, 7, 3), State(0, 0, 7, 4), Stat

e(0, 41, 3, 5), State(0, 41, 3, 6), State(0, 41, 3, 7), State(0, 41, 3, 8), State(0, 41, 4, 1), State(0, 41, 4, 2), State(0, 41, 4, 3), State(0, 41, 4, 4), State(0, 41, 4, 5), State(0, 41, 4, 6), State(0, 41, 4, 7), State(0, 41, 4, 8), State(0, 41, 5, 1), State(0, 41, 5, 2), State(0, 41, 5, 3), State(0, 41, 5, 4), State(0, 41, 5, 5), State(0, 41, 5, 6), State(0, 41, 5, 7), State(0, 41, 5, 8), State(0, 41, 6, 1), State(0, 41, 6, 2), State(0, 41, 6, 3), State(0, 41, 6, 4), State(0, 41, 6, 5), State(0, 41, 6, 6), State(0, 41, 6, 7), State(0, 41, 6, 8), State(0, 41, 7, 1), State(0, 41, 7, 2), State(0, 41, 7, 3), State(0, 41, 7, 4), State(0, 41, 7, 5), State(0, 41, 7, 6), State(0, 41, 7, 7), State(0, 41, 7, 8), State(0, 41, 8, 1), State(0, 41, 8, 2), State(0, 41, 8, 3), State(0, 41, 8, 4), State(0, 41, 8, 5), State(0, 41, 8, 6), State(0, 41, 8, 7), State(0, 41, 8, 8), State(0, 41, 9, 1), State(0, 41, 9, 2), State(0, 41, 9, 3), State(0, 41, 9, 4), State(0, 41, 9, 5), State(0, 41, 9, 6), Stat

 6, 1), State(1, 21, 6, 2), State(1, 21, 6, 3), State(1, 21, 6, 4), State(1, 21, 6, 5), State(1, 21, 6, 6), State(1, 21, 6, 7), State(1, 21, 6, 8), State(1, 21, 7, 1), State(1, 21, 7, 2), State(1, 21, 7, 3), State(1, 21, 7, 4), State(1, 21, 7, 5), State(1, 21, 7, 6), State(1, 21, 7, 7), State(1, 21, 7, 8), State(1, 21, 8, 1), State(1, 21, 8, 2), State(1, 21, 8, 3), State(1, 21, 8, 4), State(1, 21, 8, 5), State(1, 21, 8, 6), State(1, 21, 8, 7), State(1, 21, 8, 8), State(1, 21, 9, 1), State(1, 21, 9, 2), State(1, 21, 9, 3), State(1, 21, 9, 4), State(1, 21, 9, 5), State(1, 21, 9, 6), State(1, 21, 9, 7), State(1, 21, 9, 8), State(1, 21, 10, 1), State(1, 21, 10, 2), State(1, 21, 10, 3), State(1, 21, 10, 4), State(1, 21, 10, 5), State(1, 21, 10, 6), State(1, 21, 10, 7), State(1, 21, 10, 8), State(1, 22, 1, 1), State(1, 22, 1, 2), State(1, 22, 1, 3), State(1, 22, 1, 4), State(1, 22, 1, 5), State(1, 22, 1, 6), State(1, 22, 1, 7), State(1, 22, 1, 8), State(1, 22, 2, 1), State(1, 22, 2, 2), Stat

2, 1, 4, 3), State(2, 1, 4, 4), State(2, 1, 4, 5), State(2, 1, 4, 6), State(2, 1, 4, 7), State(2, 1, 4, 8), State(2, 1, 5, 1), State(2, 1, 5, 2), State(2, 1, 5, 3), State(2, 1, 5, 4), State(2, 1, 5, 5), State(2, 1, 5, 6), State(2, 1, 5, 7), State(2, 1, 5, 8), State(2, 1, 6, 1), State(2, 1, 6, 2), State(2, 1, 6, 3), State(2, 1, 6, 4), State(2, 1, 6, 5), State(2, 1, 6, 6), State(2, 1, 6, 7), State(2, 1, 6, 8), State(2, 1, 7, 1), State(2, 1, 7, 2), State(2, 1, 7, 3), State(2, 1, 7, 4), State(2, 1, 7, 5), State(2, 1, 7, 6), State(2, 1, 7, 7), State(2, 1, 7, 8), State(2, 1, 8, 1), State(2, 1, 8, 2), State(2, 1, 8, 3), State(2, 1, 8, 4), State(2, 1, 8, 5), State(2, 1, 8, 6), State(2, 1, 8, 7), State(2, 1, 8, 8), State(2, 1, 9, 1), State(2, 1, 9, 2), State(2, 1, 9, 3), State(2, 1, 9, 4), State(2, 1, 9, 5), State(2, 1, 9, 6), State(2, 1, 9, 7), State(2, 1, 9, 8), State(2, 1, 10, 1), State(2, 1, 10, 2), State(2, 1, 10, 3), State(2, 1, 10, 4), State(2, 1, 10, 5), State(2, 1, 10, 6), State(2, 1, 

 42, 6, 2), State(2, 42, 6, 3), State(2, 42, 6, 4), State(2, 42, 6, 5), State(2, 42, 6, 6), State(2, 42, 6, 7), State(2, 42, 6, 8), State(2, 42, 7, 1), State(2, 42, 7, 2), State(2, 42, 7, 3), State(2, 42, 7, 4), State(2, 42, 7, 5), State(2, 42, 7, 6), State(2, 42, 7, 7), State(2, 42, 7, 8), State(2, 42, 8, 1), State(2, 42, 8, 2), State(2, 42, 8, 3), State(2, 42, 8, 4), State(2, 42, 8, 5), State(2, 42, 8, 6), State(2, 42, 8, 7), State(2, 42, 8, 8), State(2, 42, 9, 1), State(2, 42, 9, 2), State(2, 42, 9, 3), State(2, 42, 9, 4), State(2, 42, 9, 5), State(2, 42, 9, 6), State(2, 42, 9, 7), State(2, 42, 9, 8), State(2, 42, 10, 1), State(2, 42, 10, 2), State(2, 42, 10, 3), State(2, 42, 10, 4), State(2, 42, 10, 5), State(2, 42, 10, 6), State(2, 42, 10, 7), State(2, 42, 10, 8), State(2, 43, 1, 1), State(2, 43, 1, 2), State(2, 43, 1, 3), State(2, 43, 1, 4), State(2, 43, 1, 5), State(2, 43, 1, 6), State(2, 43, 1, 7), State(2, 43, 1, 8), State(2, 43, 2, 1), State(2, 43, 2, 2), State(2, 43, 2, 3), 

6), State(3, 22, 8, 7), State(3, 22, 8, 8), State(3, 22, 9, 1), State(3, 22, 9, 2), State(3, 22, 9, 3), State(3, 22, 9, 4), State(3, 22, 9, 5), State(3, 22, 9, 6), State(3, 22, 9, 7), State(3, 22, 9, 8), State(3, 22, 10, 1), State(3, 22, 10, 2), State(3, 22, 10, 3), State(3, 22, 10, 4), State(3, 22, 10, 5), State(3, 22, 10, 6), State(3, 22, 10, 7), State(3, 22, 10, 8), State(3, 23, 1, 1), State(3, 23, 1, 2), State(3, 23, 1, 3), State(3, 23, 1, 4), State(3, 23, 1, 5), State(3, 23, 1, 6), State(3, 23, 1, 7), State(3, 23, 1, 8), State(3, 23, 2, 1), State(3, 23, 2, 2), State(3, 23, 2, 3), State(3, 23, 2, 4), State(3, 23, 2, 5), State(3, 23, 2, 6), State(3, 23, 2, 7), State(3, 23, 2, 8), State(3, 23, 3, 1), State(3, 23, 3, 2), State(3, 23, 3, 3), State(3, 23, 3, 4), State(3, 23, 3, 5), State(3, 23, 3, 6), State(3, 23, 3, 7), State(3, 23, 3, 8), State(3, 23, 4, 1), State(3, 23, 4, 2), State(3, 23, 4, 3), State(3, 23, 4, 4), State(3, 23, 4, 5), State(3, 23, 4, 6), State(3, 23, 4, 7), State(3,

), State(4, 2, 7, 6), State(4, 2, 7, 7), State(4, 2, 7, 8), State(4, 2, 8, 1), State(4, 2, 8, 2), State(4, 2, 8, 3), State(4, 2, 8, 4), State(4, 2, 8, 5), State(4, 2, 8, 6), State(4, 2, 8, 7), State(4, 2, 8, 8), State(4, 2, 9, 1), State(4, 2, 9, 2), State(4, 2, 9, 3), State(4, 2, 9, 4), State(4, 2, 9, 5), State(4, 2, 9, 6), State(4, 2, 9, 7), State(4, 2, 9, 8), State(4, 2, 10, 1), State(4, 2, 10, 2), State(4, 2, 10, 3), State(4, 2, 10, 4), State(4, 2, 10, 5), State(4, 2, 10, 6), State(4, 2, 10, 7), State(4, 2, 10, 8), State(4, 3, 1, 1), State(4, 3, 1, 2), State(4, 3, 1, 3), State(4, 3, 1, 4), State(4, 3, 1, 5), State(4, 3, 1, 6), State(4, 3, 1, 7), State(4, 3, 1, 8), State(4, 3, 2, 1), State(4, 3, 2, 2), State(4, 3, 2, 3), State(4, 3, 2, 4), State(4, 3, 2, 5), State(4, 3, 2, 6), State(4, 3, 2, 7), State(4, 3, 2, 8), State(4, 3, 3, 1), State(4, 3, 3, 2), State(4, 3, 3, 3), State(4, 3, 3, 4), State(4, 3, 3, 5), State(4, 3, 3, 6), State(4, 3, 3, 7), State(4, 3, 3, 8), State(4, 3, 4, 1), S

Excessive output truncated after 524295 bytes.

102480-element Vector{Any}:
 State(0, 0, 1, 1)
 State(0, 0, 1, 2)
 State(0, 0, 1, 3)
 State(0, 0, 1, 4)
 State(0, 0, 1, 5)
 State(0, 0, 1, 6)
 State(0, 0, 1, 7)
 State(0, 0, 1, 8)
 State(0, 0, 2, 1)
 State(0, 0, 2, 2)
 State(0, 0, 2, 3)
 State(0, 0, 2, 4)
 State(0, 0, 2, 5)
 ⋮
 State(20, 60, 9, 5)
 State(20, 60, 9, 6)
 State(20, 60, 9, 7)
 State(20, 60, 9, 8)
 State(20, 60, 10, 1)
 State(20, 60, 10, 2)
 State(20, 60, 10, 3)
 State(20, 60, 10, 4)
 State(20, 60, 10, 5)
 State(20, 60, 10, 6)
 State(20, 60, 10, 7)
 State(20, 60, 10, 8)

In [10]:
@show size(𝒮)
#@show size[1,1,1,1]
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 100*60*10*10

size(𝒮) = (102480,)


(102480,)

**Actions**

In [11]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [12]:
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

In [13]:
length(𝒜)

2

In [14]:
v′ = rand(params.visa_status, 1)

1-element Vector{Int64}:
 3

In [15]:
v′ = rand(params.visa_status, 1)[1]

1

In [16]:
# only inbounds if room for the family [assuming would not separate even though might]
# and if time is available to enter the airport 
validstate(s::State) = 1 ≤ s.c && 1 < s.t 

validstate (generic function with 1 method)

**Transition Function** 

In [17]:
#XXXXXXXXXXXXXXXXXXXXXXXXXNote from Lilian:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# check out https://juliapomdp.github.io/POMDPModelTools.jl/stable/distributions/
# we are supposed to return a sparsecat structure with all the states and the prob of going to them next
# so i think this is incorrect because we seem to be not returning that
# SparseCat(values, probabilities) to create a sparse categorical distribution.
#textbook page 148

#Other notes:
#T = P(S' | S, A), so the probs are always 100% for each action 

#I figured it out, just havent coded it yet, just have to send back a SparseCat with 
# if action = accept, new state (S') to be time -1 and seats -1 100% and the rest 0%
# if action = reject, send back exact same state space (s') but with time - 1 100% of the time

#this is assuming an accept means 100% you get on the plane and reject means you can't sneak onto it. I 
#I think it might be realistic to make an accept give you a 80% chance of actually getting a seat if accepted.
#i read your article and it seemed like it was so disorganized that some of the accepted people didnt 
#make it. 

#need to add global rep of all the states so we can check it

# think about how to fix the transition function 
# The transition function returns a distribution over next states given the current state and an action .
function T(s::State, a::Action)
    # randomly sample a family size and visa status 
    f′ = rand(params.family_sizes, 1)[1]
    v′ = rand(params.visa_status, 1)[1] # TODO: possibly make this weighted in some way 
    #next_states = Vector{State}(undef, Nₐ) 
    if a == ACCEPT 
        Nₐ = 2 #80% get a seat, 20% lost it due to poor organization
        next_states = Vector{State}(undef, Nₐ) 
        probabilities = [.80, .20]
        next_state_accept = State(s.c - s.f, s.t - 1, f′, v′) # they get seats
        if validstate(next_state_accept)
            next_states[1] = next_state_accept
        end
        next_states[2] = State(s.c, s.t - 1, f′, v′) # where they don't get seats...should work becuase of terminal state
    
    elseif a == REJECT
        Nₐ = 1
        next_states = Vector{State}(undef, Nₐ) 
        probabilities = [1.0]
        next_states[1] = State(s.c, s.t - 1, f′, v′) #where they don't get seats
    # handle out-of-bounds transitions
    else Nₐ = 1
        next_states = Vector{State}(undef, Nₐ) 
        probabilities = [1.0]
        next_states[1] = s
    end
    return SparseCat(next_states, probabilities)
end

    

T (generic function with 1 method)

In [18]:
# struct State
#     c::Int # chairs remaining 
#     t::Int # time remaining 
# #     f::Int # family size 
# #     v::Int # visa status 
# # end 
T(State(6, 4, 4, 5), ACCEPT)


# ACCEPT: State(50, 9, ANYTHING, ANYTHING)
# ACCEPT: State(42, 9, ANYTHING, ANYTHING)
# REJECT: State(50, 9, ANYTHING, ANYTHING)

                               [1mSparseCat distribution[22m           
                     [90m┌                                        ┐[39m 
   [0mState(2, 3, 5, 0) [90m┤[39m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[39m[0m 0.8 [90m [39m 
   [0mState(6, 3, 5, 0) [90m┤[39m[38;5;2m■■■■■■■■■[39m[0m 0.2                           [90m [39m 
                     [90m└                                        ┘[39m 

**Reward Function**

In [19]:
function R(s::State, a::Action)
    # reward is just the visa status times family size i think! 
    if a == ACCEPT
        return s.v*s.f
    end
    return 0
end 

R (generic function with 1 method)

In [20]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
γ = 0.95

0.95

In [21]:
# when time is 0...possibly encode this into the parameters 
termination(s::State) = (s.t == 0 || s.c == 0)

termination (generic function with 1 method)

**MDP Formulation**

In [22]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [23]:
  # struct State
        #     c::Int # chairs remaining 
        #     t::Int # time remaining 
        #     f::Int # family size 
        #     v::Int # visa status 
        # end 
c_initial = params.capacity
t_initial = params.time
f_initial = rand(params.family_sizes, 1)[1]
v_initial = rand(params.visa_status, 1)[1]

initial_state = State(c_initial, t_initial, f_initial, v_initial)

State(20, 60, 3, 3)

In [None]:
statetype = typeof(initial_state)
@show initialstate_array = [initial_state]

In [None]:
initialstate_array

In [None]:
mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = initialstate_array,
    isterminal   = termination,
    render       = render,
    statetype    = statetype #
    );

In [None]:
#render(mdp)

In [None]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

**Policy**

In [None]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [None]:
@show policy = solve(solver, mdp) 

In [None]:
@requirements_info ValueIterationSolver() mdp()

In [None]:
# a = action(policy, s) # returns the optimal action for state s
# value(policy, s) # returns the optimal value at state s
