In [1]:
#import Pkg
#Pkg.add("POMDPs")
using POMDPs # for MDP type
# import Pkg
#Pkg.add("DiscreteValueIteration")
#Pkg.add("Parameters")
#Pkg.add("Random")
#Pkg.add("Plots")
#Pkg.add("POMDPModelTools")
#Pkg.add("POMDPPolicies")
#Pkg.add("Distributions")
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style

In [2]:
using QuickPOMDPs
using Distributions 

In [3]:
Random.seed!(123)


MersenneTwister(123)

In [None]:
# average family size in Afganistan 
#family_sizes = Normal(8, 1.0)
#family_size = rand(family_sizes, 10)
#family_size = convert(Int64, RoundUp(family_size, digits=0))
# figure out how to round up or down for this 

In [None]:
#family_sizes = rand(Normal(8, 2.0), 10) # truncate positive #s

In [20]:
# Define the problem with QuickPOMPDPs.kl 


Truncated(Normal{Float64}(μ=8.0, σ=5.0), range=(1.0, Inf))

**Environment Parameters**

In [None]:
# Parameters defining the evacuation environment 
# (rather than creating global variables we 
# consolodate them into a single EvacuationParams structure)

In [4]:
#trunc(Int, 1.2)

1

In [5]:
#rand(Normal(8, 2.0), 10)

10-element Vector{Float64}:
 10.380535761972553
 12.09635941557848
 10.285301805734399
  8.91883124081416
  7.206641841409554
  6.670574909616625
  9.961935653517067
  7.849033867204488
  8.547630742424312
  7.611541865788559

In [22]:
#rand(TruncatedNormal(8, 5, 1, Inf), 10)


10-element Vector{Float64}:
  4.170081621700107
  7.004044236748712
 15.469801484596978
  8.11195129084749
  8.120795138501837
 13.538238533361696
  8.745929928379955
 16.44753477736475
  5.390476483426479
 11.127693645571131

In [23]:
#[trunc(Int, x) for x in rand(TruncatedNormal(8, 4.0, 1, Inf), 10)]

10-element Vector{Int64}:
  1
 10
 14
  2
  8
 11
 10
 11
  6
 11

In [18]:
@with_kw struct EvacuationParameters
    # family distribution 
    # visa distribution [should this include time and chairs?]
    # convert(Int64, round(1.3, digits=0))
    # no magic variable? 
    family_sizes::Vector{Int} =  [trunc(Int, x) for x in rand(TruncatedNormal(8, 4.0, 1, Inf), 10)] # TODO: truncate positive #s and make int 
    visa_status::Vector{Int} = [-5, -4, -3, -2, 0, 1, 2, 3, 4, 5] #TODO: map to various status 
    capacity::Int = 20
    time::Int = 60 
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
#    null_state::State = State(x, 0, x, x) == 0 [how would do this?]
end


EvacuationParameters

In [None]:
#truncated(rand(Normal(8, 2.0), 10), 0.0, Inf) # truncate positive #s
#family_sizes = convert(Int64, round((rand(Normal(8, 2.0), 10), digits=0)

In [19]:
params = EvacuationParameters(); 

In [None]:
# params.family_sizes
# params.visa_status
# params.size
# render(mdp) graphing...

**States**

In [None]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [None]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# in a grird world it is the set of all (x, y values in the 10X 10 grid...)
# a list of all the structs ... 
𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]

In [None]:
#size(𝒮[1])
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 20*60*10*10


**Actions**

In [None]:
# the possible actions \scrs<TAB> are wither accepting or rejecting a family
@enum Action REJECT ACCEPT

In [None]:
#A = [REJECT::Action=0, ACCEPT::Action=1]
𝒜 = [REJECT, ACCEPT]

**Transition Function** 

In [None]:
# The dynamics to transition the agent live in the transition function T(s'|s, a). The transition function 
function T(s::State, a::Action, s_prime::State)
    # if we accept the person, we want to decriment by family size 
    if a == ACCEPT
        s_prime.c = s.c - s.f
    end 
    s_prime.t = s_prime.t - 1 # how to update  time
    # maybe make some terminal state when time = 0?
    # do I return something here? 
end
    

**Reward Function**

In [None]:
# The reward functions R(s) and R(s,a) return rewards for a given State. Certain formulations may use R(s) or R(s,a) 
# do we only compute rewards 
function R(s::State, a::Action)
    # reward is just the visa status times family size i think! assuming 
    return s.v*s.f
end 

In [None]:
#render(mdp; show_rewards=true)
#@bind γ Slider(0:0.05:1, default=0.95, show_value=true)
γ = 0.95

In [None]:
# when time is 0
termination(s::State) = s.t == 0

**MDP Formulation**

In [None]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Evacuation <: MDP{State, Action} end

In [None]:
mdp = QuickMDP(Evacuation,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = 𝒮,
    isterminal   = termination,
    render       = render
    );

In [None]:
render(mdp)

In [None]:
solver = ValueIterationSolver(max_iterations=30);

**Policy**

In [None]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [None]:
policy = solve(solver, mdp)