In [1]:
#import Pkg
#Pkg.add("POMDPs")
using POMDPs # for MDP type
# import Pkg
#Pkg.add("DiscreteValueIteration")
#Pkg.add("Parameters")
#Pkg.add("Random")
#Pkg.add("Plots")
#Pkg.add("POMDPModelTools")
#Pkg.add("POMDPPolicies")
#Pkg.add("Distributions")
using DiscreteValueIteration
using POMDPPolicies
using POMDPModelTools
using Parameters
using Random
using Plots; default(fontfamily="Computer Modern", framestyle=:box) # LaTex-style

In [2]:
using QuickPOMDPs
using Distributions 

In [3]:
Random.seed!(123)


MersenneTwister(123)

In [None]:
# average family size in Afganistan 
#family_sizes = Normal(8, 1.0)
#family_size = rand(family_sizes, 10)
#family_size = convert(Int64, RoundUp(family_size, digits=0))
# figure out how to round up or down for this 

In [None]:
#family_sizes = rand(Normal(8, 2.0), 10) # truncate positive #s

In [None]:
# Define the problem with QuickPOMPDPs.kl 


**Environment Parameters**

In [None]:
# Parameters defining the evacuation environment 
# (rather than creating global variables we 
# consolodate them into a single EvacuationParams structure)

In [11]:
@with_kw struct EvacuationParameters
    # family distribution 
    # visa distribution [should this include time and chairs?]
    #convert(Int64, round(1.3, digits=0))
    family_sizes::Vector{Float64} =  rand(Normal(8, 2.0), 10) # truncate positive #s
    visa_status::Vector{Int} = [-5, -4, -3, -2, 0, 1, 2, 3, 4, 5]
    capacity::Int = 20
    time::Int = 60
    size::Tuple{Int, Int} = (length(visa_status), length(family_sizes)) # size of grid 
end



EvacuationParameters

In [None]:
#truncated(rand(Normal(8, 2.0), 10), 0.0, Inf) # truncate positive #s
#family_sizes = convert(Int64, round((rand(Normal(8, 2.0), 10), digits=0)

In [12]:
params = EvacuationParameters(); 

In [None]:
# params.family_sizes
# params.visa_status
# params.size
# render(mdp) graphing...

**States**

In [13]:
# A state s in the evacuation problem is a discrete set of 4 values: 
# chairs remaining, time remaining, family size, and visa status
struct State
    c::Int # chairs remaining 
    t::Int # time remaining 
    f::Int # family size 
    v::Int # visa status 
end 

In [14]:
# The state space S for the evacuation problem is the set of 
# all combinations 
# in a grird world it is the set of all (x, y values in the 10X 10 grid...)
# a list of all the structs ... 
𝒮 = [[State(c, t, f, v) for  c=1:params.capacity, t=1:params.time, f=1:params.size[2], v=1:params.size[1]]]


1-element Vector{Array{State, 4}}:
 [State(1, 1, 1, 1) State(1, 2, 1, 1) … State(1, 59, 1, 1) State(1, 60, 1, 1); State(2, 1, 1, 1) State(2, 2, 1, 1) … State(2, 59, 1, 1) State(2, 60, 1, 1); … ; State(19, 1, 1, 1) State(19, 2, 1, 1) … State(19, 59, 1, 1) State(19, 60, 1, 1); State(20, 1, 1, 1) State(20, 2, 1, 1) … State(20, 59, 1, 1) State(20, 60, 1, 1)]

[State(1, 1, 2, 1) State(1, 2, 2, 1) … State(1, 59, 2, 1) State(1, 60, 2, 1); State(2, 1, 2, 1) State(2, 2, 2, 1) … State(2, 59, 2, 1) State(2, 60, 2, 1); … ; State(19, 1, 2, 1) State(19, 2, 2, 1) … State(19, 59, 2, 1) State(19, 60, 2, 1); State(20, 1, 2, 1) State(20, 2, 2, 1) … State(20, 59, 2, 1) State(20, 60, 2, 1)]

[State(1, 1, 3, 1) State(1, 2, 3, 1) … State(1, 59, 3, 1) State(1, 60, 3, 1); State(2, 1, 3, 1) State(2, 2, 3, 1) … State(2, 59, 3, 1) State(2, 60, 3, 1); … ; State(19, 1, 3, 1) State(19, 2, 3, 1) … State(19, 59, 3, 1) State(19, 60, 3, 1); State(20, 1, 3, 1) State(20, 2, 3, 1) … State(20, 59, 3, 1) State(20, 60, 3, 1)]

In [23]:
#size(𝒮[1])
#(20, 60, 10, 10)
#length(𝒮[1]) = 120000 = 20*60*10*10


120000

**Actions**

In [25]:
# the possible actions \scrs<TAB> are wither accepting or rejecting a family
@enum Action REJECT ACCEPT

In [27]:
#A = [REJECT::Action=0, ACCEPT::Action=1]
𝒜 = [REJECT, ACCEPT]

2-element Vector{Action}:
 REJECT::Action = 0
 ACCEPT::Action = 1

**Transition Function** 

In [29]:
# The dynamics to transition the agent live in the transition function T(s'|s, a). The transition function 
function T(s::State, a::Action, s_prime::State)
    # if we accept the person, we want to decriment 
    if a == ACCEPT
        s_prime.c = s - s.c 
    end 
    s_prime.t = s_prime.t - 1 # how to update  time
end
    

T (generic function with 1 method)

**Reward Function**

In [None]:
# The reward functions R(s) and R(s,a) return rewards for a given State. Certain formulations may use R(s) or R(s,a) 
# to computer the rewards 
function R(s, a)
    if s == State()
        return 1
    elseif s == State()
        return 5
    elseif s == State()
        return 10
    elseif s == State()
        return -10
    else 
        return 0
    end 
end 

In [None]:
# We define the Airport abstract MDP type so we can reference it in other methods.
abstract type Airport <: MDP{State, Action} end

In [None]:
render(mdp; show_rewards=true)

**MDP Formulation**

In [None]:
mdp = QuickMDP(GridWorld,
    states       = 𝒮,
    actions      = 𝒜,
    transition   = T,
    reward       = R,
    discount     = γ,
    initialstate = 𝒮,
    isterminal   = termination,
    render       = render);

In [None]:
solver = ValueIterationSolver(max_iterations=30);

**Policy**

In [None]:
# Set discount factor to variable gamma and solve the MDP to obptain policy pi mapping states to actions a

In [None]:
policy = solve(solver, mdp)