In [2]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 

In [3]:
Random.seed!(123)

MersenneTwister(123)

**Environment Parameters**

In [4]:
# assume families normally distributed as integers around average family size in afghanistan of 8. 
# Truncated to include 1 or greater and rounded to be integers 
@with_kw struct EvacuationParameters
    # need the probability that a particular family shows up at the door....
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(8, 4.0, 1, Inf), 10)] 
    visa_status::Vector{Int} = [-5, -4, -3, -2, 0, 1, 2, 3, 4, 5] #TODO: map to various status 
    capacity::Int = 20
    time::Int = 60 
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.7 # don't we always transition into this since time moves forward? I'm confused... 
#    null_state::State = State(x, 0, x, x) == 0 [how would do this?]
end


EvacuationParameters

In [5]:
params = EvacuationParameters(); 

**States**

In [6]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [7]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# Question: Do I need the end state here? 
#𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]
𝒮 = []

for c in 1:params.capacity
    for t in 1:params.time
        for f in 1:params.size[2]
            for v in 1:params.size[1]
                new = State(c, t, f, v)
                𝒮 = [𝒮; new]
            end
        end        
    end
end


In [8]:
@show size(𝒮)
#@show size[1,1,1,1]
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 20*60*10*10

size(𝒮) = (120000,)


(120000,)

**Actions**

In [9]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [10]:
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

**Transition Function** 

In [15]:
#XXXXXXXXXXXXXXXXXXXXXXXXXNote from Lilian:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# check out https://juliapomdp.github.io/POMDPModelTools.jl/stable/distributions/
# we are supposed to return a sparsecat structure with all the states and the prob of going to them next
# so i think this is incorrect because we seem to be not returning that
# SparseCat(values, probabilities) to create a sparse categorical distribution.
#textbook page 148

#Other notes:
#T = P(S' | S, A), so the probs are always 100% for each action 

#I figured it out, just havent coded it yet, just have to send back a SparseCat with 
# if action = accept, new state (S') to be time -1 and seats -1 100% and the rest 0%
# if action = reject, send back exact same state space (s') but with time - 1 100% of the time

#this is assuming an accept means 100% you get on the plane and reject means you can't sneak onto it. I 
#I think it might be realistic to make an accept give you a 80% chance of actually getting a seat if accepted.
#i read your article and it seemed like it was so disorganized that some of the accepted people didnt 
#make it. 

#need to add global rep of all the states so we can check it

# think about how to fix the transition function 
# The transition function returns a distribution over next states given the current state and an action .
function T(s::State, a::Action)
     Nₐ = length(𝒜) #should be 2
     next_states = Vector{State}(undef, Nₐ + 1)
#     probabilities = zeros(Nₐ + 1)
#     p_transition = params.p_transition # what are our probabilities here????    
#     for (i, a_prime) in enumerate(𝒜)
#     # if we accept the person, we want to decriment by family size 
    #if a == ACCEPT
    #    s_prime.c = s.c - s.f
    #end 
    s_prime.t = s_prime.t - 1 # how to update  time
    # maybe make some terminal state when time = 0?
    # do I return something here? 
    return s_prime 
end

#marking out what each part does
#function T(s::State, a::Action)
#    #if R(s) != 0 
    #    return Deterministic(params.null_state)
    #end

#    Nₐ = length(𝒜) #length of action space, so 2
#    next_states = Vector{State}(undef, Nₐ + 1) #creates an uninitialized vector of 3 
#    probabilities = zeros(Nₐ + 1) #make a zero vector of length 3
#    p_transition = params.p_transition #transitions probs I've determined in the notes above

#    for (i, a′) in enumerate(𝒜)
#        prob = (a′ == a) ? p_transition : (1 - p_transition) / (Nₐ - 1)
#        destination = s + MOVEMENTS[a′]
#        next_states[i+1] = destination

#        if inbounds(destination)
#            probabilities[i+1] += prob
#        end
#    end
    
    # handle out-of-bounds transitions
#    next_states[1] = s
#    probabilities[1] = 1 - sum(probabilities)

#    return SparseCat(next_states, probabilities)
#end


    

T (generic function with 1 method)

**Reward Function**

In [14]:
function R(s::State, a::Action=missing)
    # reward is just the visa status times family size i think! 
    return s.v*s.f
end 

R (generic function with 2 methods)

In [20]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
γ = 0.95

0.95

In [21]:
# when time is 0...possibly encode this into the parameters 
termination(s::State) = s.t == 0

termination (generic function with 1 method)

**MDP Formulation**

In [22]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [23]:
mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = 𝒮,
    isterminal   = termination,
    render       = render
    );

In [24]:
render(mdp)

LoadError: MethodError: no method matching render()
[0mClosest candidates are:
[0m  render([91m::Union{QuickMDP, QuickPOMDP}[39m, [91m::Any...[39m; kwargs...) at /Users/lisaeinstein/.julia/packages/QuickPOMDPs/mIT3P/src/quick.jl:211
[0m  render([91m::Union{MDP, POMDP}[39m) at /Users/lisaeinstein/.julia/packages/POMDPModelTools/SycBB/src/visualization.jl:50
[0m  render([91m::Union{MDP, POMDP}[39m, [91m::Any[39m) at /Users/lisaeinstein/.julia/packages/POMDPModelTools/SycBB/src/visualization.jl:30

In [25]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

**Policy**

In [26]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [27]:
policy = solve(solver, mdp)

INFO: POMDPLinter requirements for [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m and dependencies. ([✔] = implemented correctly; [X] = not implemented; [?] = could not determine)

For [34msolve(::ValueIterationSolver, ::Union{MDP,POMDP})[39m:
[32m  [✔] discount(::QuickMDP{EvacuationArray{State, 4}ActionNamedTuple{(:stateindex, :isterminal, :render, :transition, :reward, :states, :actions, :discount, :initialstate, :actionindex), Tuple{Dict{Array{State, 4}, Int64}, typeof(termination), typeof(render), typeof(T), typeof(R), Vector{Array{State, 4}}, Vector{Action}, Float64, Vector{Array{State, 4}}, Dict{Action, Int64}}}})[39m
[32m  [✔] transition(::QuickMDP{EvacuationArray{State, 4}ActionNamedTuple{(:stateindex, :isterminal, :render, :transition, :reward, :states, :actions, :discount, :initialstate, :actionindex), Tuple{Dict{Array{State, 4}, Int64}, typeof(termination), typeof(render), typeof(T), typeof(R), Vector{Array{State, 4}}, Vector{Action}, Float64, Vector{Array{

LoadError: MethodError: no method matching T(::Array{State, 4}, ::Action)
[0mClosest candidates are:
[0m  T([91m::State[39m, ::Action) at In[18]:15

In [16]:
@requirements_info ValueIterationSolver() mdp()

LoadError: UndefVarError: mdp not defined

In [None]:
# a = action(policy, s) # returns the optimal action for state s
# value(policy, s) # returns the optimal value at state s
