In [1]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools #for sparse cat 
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 

In [2]:
Random.seed!(123)

MersenneTwister(123)

**Environment Parameters**

In [3]:
# assume families normally distributed as integers around average family size in afghanistan of 8. 
# Truncated to include 1 or greater and rounded to be integers 
@with_kw struct EvacuationParameters
    # need the probability that a particular family shows up at the door....
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(3, 1, 1, Inf), 10)] 
    visa_status::Vector{Int} = [-4, -3, -2, 0, 1, 2, 3, 4] #TODO: map to various status 
    capacity::Int = 20
    time::Int = 60
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.8 # don't we always transition into this since time moves forward? I'm confused... 
#    null_state::State = State(x, 0, x, x) == 0 [how would do this?]
end


EvacuationParameters

In [4]:
params = EvacuationParameters(); 

**States**

In [5]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [6]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# Question: Do I need the end state here? 
#𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]
𝒮 = []

for c in 1:params.capacity
    for t in 1:params.time
        for f in 1:params.size[2]
            for v in 1:params.size[1]
                new = State(c, t, f, v)
                𝒮 = [𝒮; new]
            end
        end        
    end
end


In [7]:
@show size(𝒮)
#@show size[1,1,1,1]
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 100*60*10*10

size(𝒮) = (96000,)


(96000,)

**Actions**

In [8]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [9]:
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

In [10]:
length(𝒜)

2

In [11]:
v′ = rand(params.visa_status, 1)

1-element Vector{Int64}:
 3

In [12]:
v′ = rand(params.visa_status, 1)[1]

1

**Transition Function** 

In [13]:
#XXXXXXXXXXXXXXXXXXXXXXXXXNote from Lilian:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# check out https://juliapomdp.github.io/POMDPModelTools.jl/stable/distributions/
# we are supposed to return a sparsecat structure with all the states and the prob of going to them next
# so i think this is incorrect because we seem to be not returning that
# SparseCat(values, probabilities) to create a sparse categorical distribution.
#textbook page 148

#Other notes:
#T = P(S' | S, A), so the probs are always 100% for each action 

#I figured it out, just havent coded it yet, just have to send back a SparseCat with 
# if action = accept, new state (S') to be time -1 and seats -1 100% and the rest 0%
# if action = reject, send back exact same state space (s') but with time - 1 100% of the time

#this is assuming an accept means 100% you get on the plane and reject means you can't sneak onto it. I 
#I think it might be realistic to make an accept give you a 80% chance of actually getting a seat if accepted.
#i read your article and it seemed like it was so disorganized that some of the accepted people didnt 
#make it. 

#need to add global rep of all the states so we can check it

# think about how to fix the transition function 
# The transition function returns a distribution over next states given the current state and an action .
function T(s::State, a::Action)
    
    f′ = rand(params.family_sizes, 1)[1]
    v′ = rand(params.visa_status, 1)[1] # TODO: possibly make this weighted in some way 
    
    if a == ACCEPT
        # struct State
        #     c::Int # chairs remaining 
        #     t::Int # time remaining 
        #     f::Int # family size 
        #     v::Int # visa status 
        # end 
        
        Nₐ = 2 #80% get a seat, 20% lost it due to poor orgnaization
        next_states = Vector{State}(undef, Nₐ) 
        probabilities = [.80, .20]
        
        next_states[1] = State(s.c - s.f, s.t - 1, f′, v′) #they get seats
        next_states[2] = State(s.c, s.t - 1, f′, v′) #where they don't get seats
        
    elseif a == REJECT
        Nₐ = 1
        next_states = Vector{State}(undef, Nₐ) 
        probabilities = [1.0]
        next_states[1] = State(s.c, s.t - 1, f′, v′) #where they don't get seats
    
    end
    return SparseCat(next_states, probabilities)
end

    

T (generic function with 1 method)

In [30]:
# struct State
#     c::Int # chairs remaining 
#     t::Int # time remaining 
# #     f::Int # family size 
# #     v::Int # visa status 
# # end 
T(State(50, 10, 8, 1), ACCEPT)


# ACCEPT: State(50, 9, ANYTHING, ANYTHING)
# ACCEPT: State(42, 9, ANYTHING, ANYTHING)
# REJECT: State(50, 9, ANYTHING, ANYTHING)

                                [1mSparseCat distribution[22m           
                      [90m┌                                        ┐[39m 
   [0mState(42, 9, 3, 2) [90m┤[39m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[39m[0m 0.8 [90m [39m 
   [0mState(50, 9, 3, 2) [90m┤[39m[38;5;2m■■■■■■■■■[39m[0m 0.2                           [90m [39m 
                      [90m└                                        ┘[39m 

**Reward Function**

In [32]:
function R(s::State, a::Action)
    # reward is just the visa status times family size i think! 
    if a == ACCEPT
        return s.v*s.f
    end
    return 0
end 

R (generic function with 2 methods)

In [33]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
γ = 0.95

0.95

In [34]:
# when time is 0...possibly encode this into the parameters 
termination(s::State) = (s.t == 0 || s.c == 0)

termination (generic function with 1 method)

**MDP Formulation**

In [35]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [40]:
  # struct State
        #     c::Int # chairs remaining 
        #     t::Int # time remaining 
        #     f::Int # family size 
        #     v::Int # visa status 
        # end 
c_initial = params.capacity
t_initial = params.time
f_initial = rand(params.family_sizes, 1)[1]
v_initial = rand(params.visa_status, 1)[1]

initialstate = State(c_initial, t_initial, f_initial, v_initial) 

State(20, 60, 3, -2)

In [67]:
typeof(initialstate)

State

In [69]:

mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = initialstate,
    isterminal   = termination,
    render       = render,
    statetype    = typeof(initialstate) #
    );

In [70]:
#render(mdp)

In [71]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

**Policy**

In [72]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [73]:
@show policy = solve(solver, mdp) 

LoadError: KeyError: key State(1, 0, 4, 2) not found

In [27]:
@requirements_info ValueIterationSolver() mdp()

LoadError: MethodError: objects of type QuickMDP{Evacuation, Any, Action, NamedTuple{(:stateindex, :isterminal, :render, :transition, :reward, :states, :actions, :discount, :initialstate, :actionindex), Tuple{Dict{State, Int64}, typeof(termination), typeof(render), typeof(T), typeof(R), Vector{Any}, Vector{Action}, Float64, Vector{Any}, Dict{Action, Int64}}}} are not callable

In [None]:
# a = action(policy, s) # returns the optimal action for state s
# value(policy, s) # returns the optimal value at state s
