In [1]:
using POMDPs # for MDP type
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools #for sparse cat 
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style
using QuickPOMDPs
using Distributions 
#using PlotlyJS
using LinearAlgebra
using POMDPSimulators
using Measures

In [2]:
Random.seed!(0xC0FFEE)

MersenneTwister(12648430)

**States**

In [3]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

**Environment Parameters**

In [4]:
@with_kw struct EvacuationParameters
    family_sizes::Vector{Int} = [1, 2, 3, 4, 5] # set with no repeats 
    family_prob = [.1, .2, .3, .2, .2]
    visa_status::Vector{Int} = [-2, -1, 0, 1, 2] #TODO: map to various status 
    visa_prob = [.1, .1, .4, .2, .2]
    # p_visa_status::Vector{Float} = # must sum to 1 
    capacity::Int = 60 # keeping these both as integers of 20 for now. 
    time::Int = 60
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
    p_transition::Real = 0.8 # don't we always transition into this since time moves forward? I'm confused... 
    null_state::State = State(-1, -1, -1 ,-1) # is there someway to do this???
    accept_prob = [.80, .20]
    reject_prob = [1.0]
end

EvacuationParameters

In [5]:
params = EvacuationParameters(); 

In [6]:
@show number_states = params.capacity * params.time * size(params.family_sizes)[1] * size(params.visa_status)[1]

number_states = params.capacity * params.time * (size(params.family_sizes))[1] * (size(params.visa_status))[1] = 90000


90000

In [7]:
# The state space S for the evacuation problem is the set of all combinations 
𝒮 = []
for c in 0:params.capacity # capacity ends at 0 
    for t in 0:params.time # time ends at 0 
        for f in params.family_sizes # family size here we should have the ACTUAL family sizes 
            for v in params.visa_status # actual visa statuses 
                new = State(c, t, f, v) 
                𝒮 = [𝒮; new]
            end
        end        
    end
end
𝒮 = [𝒮; params.null_state]




93026-element Vector{Any}:
 State(0, 0, 1, -2)
 State(0, 0, 1, -1)
 State(0, 0, 1, 0)
 State(0, 0, 1, 1)
 State(0, 0, 1, 2)
 State(0, 0, 2, -2)
 State(0, 0, 2, -1)
 State(0, 0, 2, 0)
 State(0, 0, 2, 1)
 State(0, 0, 2, 2)
 State(0, 0, 3, -2)
 State(0, 0, 3, -1)
 State(0, 0, 3, 0)
 ⋮
 State(60, 60, 3, 2)
 State(60, 60, 4, -2)
 State(60, 60, 4, -1)
 State(60, 60, 4, 0)
 State(60, 60, 4, 1)
 State(60, 60, 4, 2)
 State(60, 60, 5, -2)
 State(60, 60, 5, -1)
 State(60, 60, 5, 0)
 State(60, 60, 5, 1)
 State(60, 60, 5, 2)
 State(-1, -1, -1, -1)

**Actions**

In [8]:
# the possible actions are whether accept or reject a family at the gate 
@enum Action REJECT ACCEPT

In [9]:
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

In [10]:
# only inbounds if room for the family [assuming would not separate even though might]
# and if time is available to enter the airport 
validtime(s::State) = 0 < s.t 

validtime (generic function with 1 method)

In [11]:
validcapacity(s::State) = 0 ≤ s.c # maybe try back to 0 

validcapacity (generic function with 1 method)

**Transition Function** 

In [69]:
#***** OLD ONE THAT WORKED ******
function T(s::State, a::Action)
    next_states = []
    if validtime(s) 
        
        f′ = rand(params.family_sizes) # pull according to those probabilities 
        v′ = rand(params.visa_status) # TODO: possibly make this weighted in some way 
        # keep pushing to next states and have a bigger associated probability mass with those possible next states. 
        # need a for loop iterating over all family sizes and and visa statuses 
        
        if a == ACCEPT 
            next_state_accept = State(s.c - s.f, s.t - 1, f′, v′) # they get seats
            next_state_reject = State(s.c, s.t - 1, f′, v′)
            push!(next_states, next_state_accept)
            push!(next_states, next_state_reject)
            if !validcapacity(next_state_accept) 
                probabilities = [0, 1] #no room for full family :( so we make probability 0 to accept and 1 reject
            else
                probabilities = [.80, .20]
            end

        elseif a == REJECT
            probabilities = [1.0]
            push!(next_states, State(s.c, s.t - 1, f′, v′))
        end
    else
        push!(next_states,params.null_state)
        probabilities = [1]
    end
    return SparseCat(next_states, probabilities)
end

T (generic function with 1 method)

In [53]:
# #***** ENUMERATING OVER ALL STATES ******

# function T(s::State, a::Action)
#     #c::Int # chairs remaining 
#     #t::Int # time remaining 
#     #f::Int # family size 
#     #v::Int # visa status 
#     next_states = []
#     probabilities = [] 

# #     if !validtime(s) 
# #         push!(next_states,params.null_state)
# #         push!(probabilities, 1) # double check 
# #     else
#     if validtime(s)
#         if a == ACCEPT 
#             next_state_accept = State(s.c - s.f, s.t - 1, 1, 1) # they get seats
#             if !validcapacity(next_state_accept) 
#                 prob = [0,1] #no room for full family :( so we make probability 0 to accept and 1 reject
#             else
#                 prob = params.accept_prob
#             end
#             for f in 1:length(params.family_sizes)
#                 for v in 1:length(params.visa_status)
#                      #if get on plan
#                     push!(next_states, State(s.c - s.f, s.t - 1, params.family_sizes[f], params.visa_status[v]))
#                     push!(probabilities, prob[1] * params.visa_prob[v] * params.family_prob[f])
#                      #if not
#                     push!(next_states, State(s.c, s.t - 1, params.family_sizes[f], params.visa_status[v]))
#                     push!(probabilities, prob[2] * params.visa_prob[v] * params.family_prob[f])                    
#                 end
#             end
#         else   #if reject     
#             for f in 1:length(params.family_sizes)
#                 for v in 1:length(params.visa_status)
#                     push!(next_states, State(s.c, s.t - 1, params.family_sizes[f], params.visa_status[v]))
#                     push!(probabilities, params.reject_prob[1] * params.visa_prob[v] * params.family_prob[f])    
#                 end
#             end  
#         end
#     end                
#     return SparseCat(next_states, probabilities)
# end      

T (generic function with 1 method)

In [70]:
check = T(State(2,0,2,2), ACCEPT)
#check = T(State(0,10,2,2), ACCEPT) # im confused with what should be happening here....

                                   [1mSparseCat distribution[22m           
                         [90m┌                                        ┐[39m 
   [0mState(-1, -1, -1, -1) [90m┤[39m[38;5;2m■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■[39m[0m 1.0 [90m [39m 
                         [90m└                                        ┘[39m 

**Reward Function**

In [71]:
function R(s::State, a::Action)
    # reward is just the visa status times family size i think! 
    if a == ACCEPT
        return s.v*s.f
    end
    return 0
end 

R (generic function with 1 method)

In [72]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
# hard code for now. maybe come back to it. 
γ = 0.95

0.95

In [73]:
termination(s::State)= s == params.null_state # change to 1 or the other 

termination (generic function with 1 method)

**MDP Formulation**

In [74]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [75]:
c_initial = params.capacity
t_initial = params.time
f_initial = rand(params.family_sizes, 1)[1]
v_initial = rand(params.visa_status, 1)[1]

initial_state = State(c_initial, t_initial, f_initial, v_initial)

State(60, 60, 5, 0)

In [76]:
statetype = typeof(initial_state)
initialstate_array = [initial_state]

1-element Vector{State}:
 State(60, 60, 5, 0)

In [77]:
mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = initialstate_array, 
    isterminal   = termination,
    render       = render,
    statetype    = statetype 
    );

In [78]:
solver = ValueIterationSolver(max_iterations=30, belres=1e-6, verbose=true);

In [79]:
# MY PREDICTION ABOUT WHAT IS GOING WRONG: 
# Read more about simulations. maybe we are supposed to plot the policy that is created from the simulation 
# we are close! 

**Policy**

In [91]:
mdp_policy = solve(solver, mdp) 

[Iteration 1   ] residual:       83.5 | iteration runtime:    375.053 ms, (     0.375 s total)
[Iteration 2   ] residual:       29.1 | iteration runtime:    346.695 ms, (     0.722 s total)
[Iteration 3   ] residual:       28.9 | iteration runtime:    360.088 ms, (      1.08 s total)
[Iteration 4   ] residual:         26 | iteration runtime:    345.268 ms, (      1.43 s total)
[Iteration 5   ] residual:       26.6 | iteration runtime:    355.430 ms, (      1.78 s total)
[Iteration 6   ] residual:       25.2 | iteration runtime:    346.626 ms, (      2.13 s total)
[Iteration 7   ] residual:       27.7 | iteration runtime:    350.874 ms, (      2.48 s total)
[Iteration 8   ] residual:       26.4 | iteration runtime:    345.487 ms, (      2.83 s total)
[Iteration 9   ] residual:         27 | iteration runtime:    354.675 ms, (      3.18 s total)
[Iteration 10  ] residual:       28.1 | iteration runtime:    352.277 ms, (      3.53 s total)
[Iteration 11  ] residual:       30.5 | iteration 

ValueIterationPolicy:
 State(0, 0, 1, -2) -> REJECT
 State(0, 0, 1, -1) -> REJECT
 State(0, 0, 1, 0) -> REJECT
 State(0, 0, 1, 1) -> ACCEPT
 State(0, 0, 1, 2) -> ACCEPT
 State(0, 0, 2, -2) -> REJECT
 State(0, 0, 2, -1) -> REJECT
 State(0, 0, 2, 0) -> REJECT
 State(0, 0, 2, 1) -> ACCEPT
 State(0, 0, 2, 2) -> ACCEPT
 State(0, 0, 3, -2) -> REJECT
 State(0, 0, 3, -1) -> REJECT
 State(0, 0, 3, 0) -> REJECT
 State(0, 0, 3, 1) -> ACCEPT
 State(0, 0, 3, 2) -> ACCEPT
 State(0, 0, 4, -2) -> REJECT
 State(0, 0, 4, -1) -> REJECT
 State(0, 0, 4, 0) -> REJECT
 State(0, 0, 4, 1) -> ACCEPT
 State(0, 0, 4, 2) -> ACCEPT
 State(0, 0, 5, -2) -> REJECT
 State(0, 0, 5, -1) -> REJECT
 State(0, 0, 5, 0) -> REJECT
 State(0, 0, 5, 1) -> ACCEPT
 State(0, 0, 5, 2) -> ACCEPT
 …

**Basesline Policies**

In [None]:
# Examples of how to make policies: 
#https://htmlview.glitch.me/?https://github.com/JuliaAcademy/Decision-Making-Under-Uncertainty/blob/master/html/2-POMDPs.jl.html

In [112]:
"""AcceptAll"""
struct AcceptAll <: Policy end

# accept everyone until capacity is 0
function POMDPs.action(::AcceptAll, s::State)    # action(policy, state)
    return ACCEPT
end;

AcceptAll_policy = AcceptAll()

simulations(AcceptAll_policy, mdp, 100) #how it will be called 

(0.27834994383923933, 15.08033104525932)

In [None]:
#simulations(test_policy, mdp, 100) #how it will be called 

In [116]:
"""AMCITS"""
struct AMCITS <: Policy end

function POMDPs.action(::AMCITS, s::State)
    return (s.v == 2)  ? ACCEPT : REJECT
end;

AMCITS_policy = AMCITS()

simulations(AMCITS_policy, mdp, 10) #how it will be called 

(24.206226109920827, 8.56462147939779)

In [118]:
"""SIV_AMCITS """
struct SIV_AMCITS <: Policy end

function POMDPs.action(::SIV_AMCITS, s::State)
    return (s.v == 2 || s.v == 1) ? ACCEPT : REJECT
end;
SIV_AMCITS_policy = SIV_AMCITS()


simulations(SIV_AMCITS_policy, mdp, 10) #how it will be called 

(34.29551665080522, 7.7824231666262875)

In [108]:
"""AfterThresholdAMCITS"""
# if want to change this need to make it a mutable struct 
@with_kw struct AfterThresholdAMCITS <: Policy
    threshold = 20 # could define this in parameters 
end

function POMDPs.action(policy::AfterThresholdAMCITS, s::State)
    if s.t <= 20 #policy.threshold
        return s.v == 5 ? ACCEPT : REJECT
    else
        return action(mdp_policy, s)
    end
end

# function POMDPs.action(::AfterThresholdAMCITS, s::State)
    
#     # regular MDP up until that point 
#     return (((s.c-s.f) >= 0 && s.t >= 0) && s.t <= 20 && s.v == 5) ? ACCEPT : REJECT # make the threshold a variable 
# end;
        
SIV_AfterThresholdAMCITS_policy = AfterThresholdAMCITS()
#simulations(SIV_AfterThresholdAMCITS_policy, mdp, 10)


AfterThresholdAMCITS
  threshold: Int64 20


In [109]:
"""BeforeThresholdAMCITS"""
@with_kw struct BeforeThresholdAMCITS <: Policy
    threshold = 20
end

function POMDPs.action(policy::BeforeThresholdAMCITS, s::State)
    if s.t >= 20 #policy.threshold
        return s.v == 5 ? ACCEPT : REJECT
    else
        return action(mdp_policy, s)
    end
end

        
BeforeThresholdAMCITS_policy = BeforeThresholdAMCITS()
#simulations(BeforeThresholdAMCITS_policy, mdp, 10)
# could play with changing this threshold


BeforeThresholdAMCITS
  threshold: Int64 20


In [96]:
function simulation(policy, mdp)
    sim = RolloutSimulator()
    r = simulate(sim, mdp, policy) #accumulated discounted reward 
    # could we also return the number of ppl on the plane? 
    return r
end

simulation (generic function with 1 method)

In [98]:
simulation(SIV_AfterThresholdAMCITS_policy, mdp)

30.039894189611125

In [97]:
simulation(mdp_policy, mdp)

30.629320285409488

**Simulation**

In [83]:
# # for reference, this is what is happening in sim
# # b = initialize_belief(up, b0)

# r_total = 0.0
# d = 1.0
# while !isterminal(pomdp, s)
#     a = action(policy, b)
#     s, o, r = @gen(:sp,:o,:r)(pomdp, s, a) # gen is 
#     r_total += d*r
#     d *= discount(pomdp)
#     b = update(up, b, a, o)
# end

# gen is     
#     sp = rand(transition(pomdp, s, a))
#     o = rand(observation(pomdp, s, a, sp))
#     r = reward(pomdp, s, a, sp, o)
#     s = sp
# function simulation(policy, mdp)
#     sim = RolloutSimulator()
#     r = simulate(sim, mdp, policy) #accumulated discounted reward 
#     # could we also return the number of ppl on the plane? 
#     return r
# end

# INTEGRATE HISTORY HERE https://juliapomdp.github.io/POMDPSimulators.jl/latest/histories/#Examples and use in stats
function simulation(policy, mdp)
    hr = HistoryRecorder()
    #sim = RolloutSimulator()
    history = simulate(hr, mdp, policy)
    #r = simulate(sim, mdp, policy) #accumulated discounted reward 
    # could we also return the number of ppl on the plane? 
    return history
end


simulation (generic function with 1 method)

In [87]:
hist = simulation(policy, mdp)

61-element SimHistory{NamedTuple{(:s, :a, :sp, :r, :info, :t, :action_info), Tuple{State, Action, State, Int64, Nothing, Int64, Nothing}}, Float64}:
 (s = State(60, 60, 5, 0), a = REJECT, sp = State(60, 59, 5, 0), r = 0, info = nothing, t = 1, action_info = nothing)
 (s = State(60, 59, 5, 0), a = ACCEPT, sp = State(55, 58, 1, -1), r = 0, info = nothing, t = 2, action_info = nothing)
 (s = State(55, 58, 1, -1), a = ACCEPT, sp = State(54, 57, 3, 2), r = -1, info = nothing, t = 3, action_info = nothing)
 (s = State(54, 57, 3, 2), a = ACCEPT, sp = State(51, 56, 1, -1), r = 6, info = nothing, t = 4, action_info = nothing)
 (s = State(51, 56, 1, -1), a = REJECT, sp = State(51, 55, 2, 1), r = 0, info = nothing, t = 5, action_info = nothing)
 (s = State(51, 55, 2, 1), a = ACCEPT, sp = State(49, 54, 2, 2), r = 2, info = nothing, t = 6, action_info = nothing)
 (s = State(49, 54, 2, 2), a = ACCEPT, sp = State(47, 53, 4, 1), r = 4, info = nothing, t = 7, action_info = nothing)
 (s = State(47, 53, 

In [89]:
hist[1][:s] # returns the first state in the history


State(60, 60, 5, 0)

In [90]:
hist[:a] # returns all of the actions in the history


Base.Generator{Vector{NamedTuple{(:s, :a, :sp, :r, :info, :t, :action_info), Tuple{State, Action, State, Int64, Nothing, Int64, Nothing}}}, POMDPSimulators.var"#8#9"{Symbol}}(POMDPSimulators.var"#8#9"{Symbol}(:a), NamedTuple{(:s, :a, :sp, :r, :info, :t, :action_info), Tuple{State, Action, State, Int64, Nothing, Int64, Nothing}}[(s = State(60, 60, 5, 0), a = REJECT, sp = State(60, 59, 5, 0), r = 0, info = nothing, t = 1, action_info = nothing), (s = State(60, 59, 5, 0), a = ACCEPT, sp = State(55, 58, 1, -1), r = 0, info = nothing, t = 2, action_info = nothing), (s = State(55, 58, 1, -1), a = ACCEPT, sp = State(54, 57, 3, 2), r = -1, info = nothing, t = 3, action_info = nothing), (s = State(54, 57, 3, 2), a = ACCEPT, sp = State(51, 56, 1, -1), r = 6, info = nothing, t = 4, action_info = nothing), (s = State(51, 56, 1, -1), a = REJECT, sp = State(51, 55, 2, 1), r = 0, info = nothing, t = 5, action_info = nothing), (s = State(51, 55, 2, 1), a = ACCEPT, sp = State(49, 54, 2, 2), r = 2, info

In [None]:
hr = HistoryRecorder(max_steps=5)
history = simulate(hr, mdp, policy);

#history = simulate(his, mdp, policy)
# r = simulate(sim, mdp, policy) #accumulated discounted reward 
hist[1][:s] # returns the first state in the history
hist[:a] # returns all of the actions in the history


In [99]:
function simulations(policy, mdp, n_sims) # n is number of times to run 
    policy_rewards = []
    for i in 1:n_sims
        push!(policy_rewards, simulation(policy, mdp))
    end
    std_policy_reward = std(policy_rewards)
    mean_policy_reward = mean(policy_rewards)
    return mean_policy_reward, std_policy_reward
end

simulations (generic function with 1 method)

In [105]:
function experiments()
    # policies and n_sims can probably be put in our params function as a list. here for now. 
    n_sims = 100
    policies = [AcceptAll_policy, AMCITS_policy, 
        SIV_AMCITS_policy, SIV_AfterThresholdAMCITS_policy, 
        BeforeThresholdAMCITS_policy]
    m_std_rewards = []
    for curr_policy in policies
        push!(m_std_rewards, simulations(curr_policy, mdp, n_sims))
    end
    # Could print them in a way that makes it more fun 
    return m_std_rewards

end

experiments (generic function with 1 method)

In [111]:
mean_std_rewards = experiments()

5-element Vector{Any}:
 (1.1547832999920435, 15.382295784885098)
 (0.0, 0.0)
 (0.0, 0.0)
 (19.88893457857716, 8.662625524326367)
 (1.8599272341900732, 1.139445687696571)

**Visualizations**

In [None]:
function vis_time_step(policy, c, t) # pass in policy and chairs and time remaing. Spit out graph of family size versus visa status.
    (v_size, f_size) = params.size  #visa, family 8, 15
    policyGraph = zeros(v_size, f_size) 
    visa_statuses=params.visa_status
    family_sizes=params.family_sizes
    
    for f in 1:f_size
        for v in 1:v_size
            act = action(policy, State(c, t, family_sizes[f], visa_statuses[v])) 
            if act == ACCEPT
                policyGraph[v,f] = 100
            else
                policyGraph[v,f] = 0
            end
        end
    end
    
    x=params.visa_status
    y=params.family_sizes
    z=policyGraph'
    title_time_cap="t = $t c = $c $policy" # MAKE title smaller 
    return heatmap(x, y, z, 
        aspect_ratio = :equal, 
        legend = :none, 
        xlims = (params.visa_status[1], 
        params.visa_status[length(params.visa_status)]), 
        xlabel = "Visa Status", 
        ylabel = "Family Size", 
        ylims = (params.family_sizes[1], 
        params.family_sizes[length(params.family_sizes)]), 
        title=title_time_cap, 
        xtickfont = font(5, "Courier"), 
        ytickfont = font(5, "Courier"),
        thickness_scaling = .5,
        palette = cgrad([:red, :green], [0, 1]),
        )
    
end

In [None]:
x = vis_time_step(AcceptAll_policy, 10, 30)

In [None]:
function vis_all(policy)
    total_time = params.time 
    total_capacity = params.capacity
    graph_per_n = 4
    heat_maps = []
    time_points = (total_time/graph_per_n) + 1 # to include 0 
    capacity_points = (total_capacity/graph_per_n) + 1 
    num_graphs = trunc(Int, time_points*capacity_points)
    
    for t in 0:total_time
        if t % graph_per_n == 0 
            for c in 0:total_capacity
                if c % graph_per_n == 0
                push!(heat_maps, vis_time_step(policy, c, t))
                end
            end
        end
    end 
    plot((heat_maps...), layout = num_graphs, margin = 5mm)

end

In [None]:
vis_all(policy) # THIS IS VISIA;L

In [None]:
vis_all(AcceptAll_policy)

In [None]:
vis_all(AMCITS_policy) 

       

In [None]:
vis_all(SIV_AMCITS_policy)

In [None]:
vis_all(SIV_AfterThresholdAMCITS_policy)


In [None]:
vis_all(BeforeThresholdAMCITS_policy)

**Aggregate Metrics**

In [None]:
function getmetrics(policy, mdp)
    # I think this somehow needs to simulated in our rollout 
    total_people = 0
    total_accept = 0
    total_reject = 0
    total_accepted_people = 0 
    
    # Initialize visa_statuses dictionary
    visa_statuses = params.visa_status
    visa_dict = Dict()
    for v in visa_statuses
        visa_dict[v] = 0
    end
    
    for c in 0:params.capacity # capacity ends at 0 
        for t in 0:params.time # time ends at 0 
            for f in params.family_sizes # family size here we should have the ACTUAL family sizes 
                for v in params.visa_status # actual visa statuses  
                    state = State(c, t, f, v)
                    total_people += f 
                    if action(policy, state) == ACCEPT
                        total_accept += 1
                        total_accepted_people += f
                        visa_dict[v] += 1
                    else 
                        total_reject += 1
                    end
                end        
            end
        end
    end
    
    print("Total people: ", total_people, )
    print("Total accept: ", total_accept)
    print("Total reject: ", total_reject)
    visa_dict 
       
end 

In [None]:
getmetrics(policy, mdp)