In [3]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools #for sparse cat 
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 

In [4]:
Random.seed!(123)

MersenneTwister(123)

**Environment Parameters**

In [5]:
# assume families normally distributed as integers around average family size in afghanistan of 8. 
# Truncated to include 1 or greater and rounded to be integers 
@with_kw struct EvacuationParameters
    # need the probability that a particular family shows up at the door....
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(8, 4.0, 1, Inf), 10)] 
    visa_status::Vector{Int} = [-5, -4, -3, -2, 0, 1, 2, 3, 4, 5] #TODO: map to various status 
    capacity::Int = 100
    time::Int = 60 
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.8 # don't we always transition into this since time moves forward? I'm confused... 
#    null_state::State = State(x, 0, x, x) == 0 [how would do this?]
end


EvacuationParameters

In [6]:
params = EvacuationParameters(); 

**States**

In [7]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [None]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# Question: Do I need the end state here? 
#ùíÆ = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]
ùíÆ = []

for c in 1:params.capacity
    for t in 1:params.time
        for f in 1:params.size[2]
            for v in 1:params.size[1]
                new = State(c, t, f, v)
                ùíÆ = [ùíÆ; new]
            end
        end        
    end
end


In [None]:
@show size(ùíÆ)
#@show size[1,1,1,1]
#(20, 60, 10, 10)
#length(ùíÆ[1]) = 120000 = 20*60*10*10

**Actions**

In [None]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [None]:
ùíú = [REJECT, ACCEPT]

In [None]:
length(ùíú)

In [None]:
v‚Ä≤ = rand(params.visa_status, 1)

In [None]:
v‚Ä≤ = rand(params.visa_status, 1)[1]

**Transition Function** 

In [None]:
#XXXXXXXXXXXXXXXXXXXXXXXXXNote from Lilian:XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# check out https://juliapomdp.github.io/POMDPModelTools.jl/stable/distributions/
# we are supposed to return a sparsecat structure with all the states and the prob of going to them next
# so i think this is incorrect because we seem to be not returning that
# SparseCat(values, probabilities) to create a sparse categorical distribution.
#textbook page 148

#Other notes:
#T = P(S' | S, A), so the probs are always 100% for each action 

#I figured it out, just havent coded it yet, just have to send back a SparseCat with 
# if action = accept, new state (S') to be time -1 and seats -1 100% and the rest 0%
# if action = reject, send back exact same state space (s') but with time - 1 100% of the time

#this is assuming an accept means 100% you get on the plane and reject means you can't sneak onto it. I 
#I think it might be realistic to make an accept give you a 80% chance of actually getting a seat if accepted.
#i read your article and it seemed like it was so disorganized that some of the accepted people didnt 
#make it. 

#need to add global rep of all the states so we can check it

# think about how to fix the transition function 
# The transition function returns a distribution over next states given the current state and an action .
function T(s::State, a::Action)
    N‚Çê = length(ùíú) #should be 2 (it is just checked)
    next_states = Vector{State}(undef, N‚Çê + 1) #(why do we want this to be 1 bigger than states?)
    probabilities = zeros(N‚Çê + 1) # probabilities the same length as next possible states. We will make probs.
    p_transition = params.p_transition # assume 80% probability of getting on the plane if accepted 
    for (i, a‚Ä≤) in enumerate(ùíú) # itereates over all possible next actions (accept/rejest )
        # if we accept the person, we want to decriment by family size 
        s‚Ä≤ = s
        f‚Ä≤ = rand(params.family_sizes, 1)[1]
        v‚Ä≤ = rand(params.visa_status, 1)[1] # TODO: possibly make this weighted in some way 
        if a‚Ä≤ == ACCEPT
            prob = (a‚Ä≤ == a) ? p_transition : (1 - p_transition) / (N‚Çê - 1)
            c‚Ä≤ = s.c - s.f 
        elseif a‚Ä≤ == REJECT
            c‚Ä≤ = s.c
        end
        t‚Ä≤ = s.t - 1 
        s‚Ä≤ = State(c‚Ä≤, t‚Ä≤, f‚Ä≤, v‚Ä≤)
        next_states[i+1] = s‚Ä≤
        probabilities[i+1] += prob
    end
    return SparseCat(next_states, probabilities)
end

    

In [None]:
# struct State
#     c::Int # chairs remaining 
#     t::Int # time remaining 
#     f::Int # family size 
#     v::Int # visa status 
# end 
T(State(50, 10, 8, 1), ACCEPT)


ACCEPT: State(50, 9, ANYTHING, ANYTHING)
ACCEPT: State(42, 9, ANYTHING, ANYTHING)
REJECT: State(50, 9, ANYTHING, ANYTHING)

**Reward Function**

In [None]:
function R(s::State, a::Action=missing)
    # reward is just the visa status times family size i think! 
    return s.v*s.f
end 

In [None]:
#render(mdp; show_rewards=true)
#@bind Œ≥ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
Œ≥ = 0.95

In [None]:
# when time is 0...possibly encode this into the parameters 
termination(s::State) = (s.t == 0 || s.c == 0)

**MDP Formulation**

In [None]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [None]:
mdp = QuickMDP(Evacuation,
    states       = ùíÆ,
    actions      = ùíú,
    transition   = T,
    reward       = R,
    discount     = Œ≥,
    initialstate = ùíÆ,
    isterminal   = termination,
    render       = render
    );

In [None]:
#render(mdp)

In [None]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

**Policy**

In [None]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [None]:
@show policy = solve(solver, mdp) 

In [None]:
@requirements_info ValueIterationSolver() mdp()

In [None]:
# a = action(policy, s) # returns the optimal action for state s
# value(policy, s) # returns the optimal value at state s
