## Belief State Reinforcement Learning

In [27]:
using Random
using Printf
using StaticArrays
using Flux
using FileIO
using BSON: @load
using JLD2
using ProgressMeter
using POMDPs
using POMDPModelTools
using DeepRL
using DeepQLearning
using LocalApproximationValueIteration
using DiscreteValueIteration
using AutomotiveDrivingModels
using AutoViz
using AutomotivePOMDPs
using MDPModelChecking
using PedCar
using AutomotiveSensors

In [6]:
include("masking.jl")
include("util.jl")
include("masked_dqn.jl")
include("render_helpers.jl")

In [8]:
include("training_scripts/RNNFiltering/RNNFiltering.jl")
using Main.RNNFiltering



In [9]:
rng = MersenneTwister(1)
cam = FitToContentCamera(0.);

## Environment

In [13]:
mdp = PedCarMDP(pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7)
pomdp = UrbanPOMDP(env=mdp.env,
                    sensor = GaussianSensor(false_positive_rate=0.0, 
                                            pos_noise = LinearNoise(min_noise=0.5, increase_rate=0.05), 
                                            vel_noise = LinearNoise(min_noise=0.5, increase_rate=0.05)),
                   ego_goal = LaneTag(2, 1),
                     obs_dist = ObstacleDistribution(mdp.env, upper_obs_pres_prob=0., left_obs_pres_prob=1.0, right_obs_pres_prob=1.0),
                   max_cars=1, 
                   max_peds=1, 
                   car_birth=0.05, 
                   ped_birth=0.05, 
                   max_obstacles=1, # no fixed obstacles
                   lidar=false,
                   ego_start=20,
                   ΔT=0.1);

## Load DRQN Policy

In [25]:
threshold = 0.999
problem_file="training-script/drqn/log7/problem.jld"
weights_file="training-script/drqn/log7/weights.jld"
@load "pc_processed.bson" qmat util pol
safe_policy = ValueIterationPolicy(mdp, qmat, util, pol)
mask = SafetyMask(mdp, safe_policy, threshold);

SafetyMask{PedCarMDP,ValueIterationPolicy}(PedCarMDP
  env: UrbanEnv
  ΔT: Float64 0.5
  pos_res: Float64 2.0
  vel_res: Float64 2.0
  vel_ped_res: Float64 1.0
  car_action_space: Array{Float64}((14,)) [-9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0]
  ped_action_space: Array{Float64}((3,)) [0.0, 1.0, 2.0]
  car_models: Dict{SArray{Tuple{2},LaneTag,1,2},DriverModel}
  car_type: VehicleDef
  ego_type: VehicleDef
  ped_type: VehicleDef
  a_noise: Float64 1.0
  v_noise: Float64 1.0
  ped_birth: Float64 0.7
  car_birth: Float64 0.7
  ego_start: Float64 20.0
  ego_goal: LaneTag
  off_grid: VehicleState
  collision_cost: Float64 -1.0
  action_cost: Float64 0.0
  goal_reward: Float64 1.0
  γ: Float64 0.95
  _ped_grid: Dict{LaneTag,GridInterpolations.RectangleGrid{3}}
  _car_grid: Dict{LaneTag,GridInterpolations.RectangleGrid{2}}
  _l_grid: Dict{LaneTag,GridInterpolations.RectangleGrid{1}}
  _v_grid: GridInterpolations.RectangleGrid{1}
  _car_transition_dict: Dic

In [26]:
solver = load(problem_file)["solver"]
env_ = POMDPEnvironment(pomdp)
graph = TensorFlow.Graph()
train_graph = DeepQLearning.build_graph(solver, env_, graph)
policy = DeepQLearning.restore_policy(env_, solver, train_graph, weights_file)
masked_policy = MaskedDQNPolicy(pomdp, policy, mask);

Error encountered while loading "training-script/drqn/log7/problem.jld".
Fatal error:


ArgumentError: ArgumentError: Package JLD not found in current path:
- Run `Pkg.add("JLD")` to install the JLD package.


## Load RNN Belief Updater

In [31]:
n_models = 2
models = Vector{Chain}(undef, n_models)
for i=1:n_models
    models[i] = BSON.load("training_scripts/RNNFiltering/model_$i.bson")[:model] 
end

In [None]:
beliefs = Vector{Vector{Float64}}(n_models)
for i=1:n_models       
    pred = models[i](o).tracker.data
    b_ = process_prediction(pomdp, pred, o)
    beliefs[i] = b_
end

In [17]:
struct PedCarRNNUpdater <: Updater 
    models::Vector{Chain}
    beliefs::Vector{Vector{Float64}}
    mdp::PedCarMDP
    pomdp::UrbanPOMDP
end

function POMDPs.update(up::PedCarRNNUpdater, bold, a, o::Vector{Float64})
    n_models = length(up.models)
    for i=1:n_models       
        pred = models[i](o).tracker.data
        up.beliefs[i] = pred
    end
    return up.beliefs
end    

UndefVarError: UndefVarError: Chain not defined

In [None]:
function action(policy::MaskedDQNPolicy, b::Vector{Vector{Float64}})
    safe_acts = safe_actions(policy.problem, policy.mask, b)
    val = value(policy.q, b)
    act = best_action(acts, val, policy.problem)
    return act
end

function value(policy::DQNPolicy, b::Vector{UrbanState})
    n_features = 4
    pomdp = policy.env.problem
    vals = zeros(n_actions(pomdp))
    for i=1:length(b)
        bb = process_prediction(pomdp, pred, b[i])
        vals += value(policy, bb)
    end
    return vals./length(b)
end

function POMDPs.safe_actions(pomdp::UrbanPOMDP, mask::SafetyMask{PedCar, P}, b::Vector{Vector{Float64}}) where P <: Policy
    vals = zeros(n_actions(pomdp))
    for i=1:length(b)
        bb = process_prediction(pomdp, pred, b[i])
        s = obs_to_scene(pomdp, o)
        vals += compute_probas(pomdp, mask, s, PED_ID, CAR_ID)# need to change b
    end
    vals ./= length(b)
    
    safe_acts = UrbanAction[]
    sizehint!(safe_acts, n_actions(mask.mdp))
    action_space = actions(mask.mdp)
    if maximum(p_sa) <= mask.threshold
        push!(safe_acts, action_space[indmax(vals)])
    else
        for (j, a) in enumerate(action_space)
            if vals[j] > mask.threshold
                push!(safe_acts, a)
            end
        end
    end
    return safe_acts
end