# Intersection with a crosswalk

**Load dependencies**

In [1]:
using Revise
using Random
using Printf
using Flux
using POMDPs
using POMDPModelTools
using POMDPSimulators
using BeliefUpdaters
using POMDPPolicies
using DiscreteValueIteration
using MDPModelChecking
using StaticArrays
using DeepRL
using DeepQLearning
using AutomotiveDrivingModels
using AutomotivePOMDPs
using AutomotiveSensors
using LocalApproximationValueIteration
using Reel
using AutoViz
using ProgressMeter
using JLD2
using FileIO
using BSON
using PedCar

loaded


┌ Info: Recompiling stale cache file /mnt/c/Users/Maxime/wsl/.julia/compiled/v1.0/LocalApproximationValueIteration/Dvh7I.ji for LocalApproximationValueIteration [a40420fb-f401-52da-a663-f502e5b95060]
└ @ Base loading.jl:1184
│ - If you have LocalApproximationValueIteration checked out for development and have
│   added Random as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with LocalApproximationValueIteration
┌ Info: Recompiling stale cache file /mnt/c/Users/Maxime/wsl/.julia/compiled/v1.0/PedCar/NmDDZ.ji for PedCar [90cf7f26-d5c7-593d-a0e1-4a8367407571]
└ @ Base loading.jl:1184
│ - If you have PedCar checked out for development and have
│   added AutomotivePOMDPs as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with PedCar


In [2]:
includet("../src/masking.jl")
includet("../src/util.jl")
includet("../src/masked_dqn.jl")
includet("../src/qmdp_approximation.jl")
includet("../src/render_helpers.jl")

In [3]:
rng = MersenneTwister(1);
cam = FitToContentCamera(0.);

## Scenario

In [4]:
params = UrbanParams(nlanes_main=1,
                     crosswalk_pos =[VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
                     crosswalk_length =  [14.0, 14., 14.0],
                     crosswalk_width = [4.0, 4.0, 3.1],
                     stop_line = 22.0)
env = UrbanEnv(params=params);

** Discrete states MDP **

In [5]:
mdp = PedCarMDP(env=env, pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7);
init_transition!(mdp);

In [6]:
@printf("spatial resolution %2.1f m \n", mdp.pos_res)
@printf("pedestrian velocity resolution %2.1f m/s \n", mdp.vel_ped_res)
@printf("car velocity resolution %2.1f m/s \n", mdp.vel_res)
@printf("number of states %d \n", n_states(mdp))
@printf("number of actions %d \n", n_actions(mdp))

spatial resolution 2.0 m 
pedestrian velocity resolution 1.0 m/s 
car velocity resolution 2.0 m/s 
number of states 23456940 
number of actions 4 


**Continuous states MDP**

In [7]:
pomdp = UrbanPOMDP(env=env,
                   sensor = PerfectSensor(),
                   ego_goal = LaneTag(2, 1),
                   max_cars=1, 
                   max_peds=1, 
                   car_birth=0.7, 
                   ped_birth=0.7, 
                   max_obstacles=0., # no fixed obstacles
                   lidar=false,
                   ego_start=20,
                   ΔT=0.1);

## Load policies

In [8]:
@load "../pc_util_processed.jld2" qmat util pol
safe_policy = ValueIterationPolicy(mdp, qmat, util, pol);

In [9]:
threshold = 0.99
mask = SafetyMask(mdp, safe_policy, threshold);
continuous_safe_policy = SafePOMDPPolicy(mask, pomdp)
discrete_safe_random = MaskedEpsGreedyPolicy(mdp, 1.0, mask, rng)
continuous_safe_random = RandomMaskedPOMDPPolicy(mask, pomdp, rng);

In [12]:
qnetwork = BSON.load("../training_scripts/drqn-log/log11/model.bson")[:qnetwork]
dqn_policy = NNPolicy(pomdp, qnetwork, actions(pomdp), 1)
masked_policy = MaskedNNPolicy(pomdp, dqn_policy, mask);

In [10]:
# Load VI data for maksing
# @time state_space = states(mdp);
# vi_data = load("pedcar_utility.jld2")
# @showprogress for s in state_space
#     if !s.crash && isterminal(mdp, s)
#         si = stateindex(mdp, s)
#         vi_data["util"][si] = 1.0
#         vi_data["qmat"][si, :] = ones(n_actions(mdp))
#     end
# end
# policy = ValueIterationPolicy(mdp, vi_data["qmat"], vi_data["util"], vi_data["pol"]);
# util = policy.util
# qmat = policy.qmat
# pol = policy.policy 
# @save "pc_util_processed.jld2" util qmat pol

# Evaluation

**Discrete Environment: Safe Policy**

In [19]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(mdp, safe_policy, n_ep=10000, max_steps=100, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 33.186449 seconds (52.49 M allocations: 9.026 GiB, 5.31% gc time)
Summary for 

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:33[39m


10000 episodes: 
Average reward: 0.189 
Average # of steps: 43.908 
Average # of violations: 0.000 


**Discrete Environment: Safe Random**

In [21]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(mdp, discrete_rand_pol, n_ep=10000, max_steps=100, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 55.923841 seconds (173.52 M allocations: 20.131 GiB, 7.47% gc time)
Summary for 10000 episodes: 
Average reward: 0.041 
Average # of steps: 77.997 
Average # of violations: 0.960 


[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:56[39m


**Continuous Environment: Safe Policy**

In [31]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, continuous_safe_policy, n_ep=10000, max_steps=100, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

975.104676 seconds (4.19 G allocations: 310.309 GiB, 9.76% gc time)


[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:16:15[39m


Summary for 10000 episodes: 
Average reward: 0.002 
Average # of steps: 97.676 
Average # of violations: 0.110 


**Continuous Environment: Safe Random**

In [32]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, continuous_safe_random, n_ep=10000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

2072.605747 seconds (13.60 G allocations: 992.852 GiB, 15.94% gc time)
Summary for 10000 episodes: 
Average reward: -0.000 
Average # of steps: 99.992 
Average # of violations: 0.030 


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:34:32[39m


**Continuous Environment: Safe RL**

In [13]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, masked_policy, n_ep=1000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

469.990081 seconds (2.56 G allocations: 186.884 GiB, 17.60% gc time)
Summary for 1000 episodes: 
Average reward: 0.003 
Average # of steps: 246.514 
Average # of violations: 0.000 


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:07:50[39m


**Collisions analysis**

In [None]:
hr = HistoryRecorder(rng=rng, max_steps=100)
s0 = initialstate(mdp, rng)
@time hist2 = simulate(hr, mdp, safe_policy, s0);

In [26]:
@showprogress for ep=1:10000
    global hist2
    hr = HistoryRecorder(rng=rng, max_steps=100)
    s0 = initialstate(mdp, rng)
    hist2 = simulate(hr, mdp, rand_pol, s0)
    if sum(hist2.reward_hist .< 0.) != 0.
        println("Crash")
        break
    end
end
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
push!(action_hist, UrbanAction(NaN))
duration, fps, render_hist = animate_states(mdp, state_hist, action_hist, mask)
film = roll(render_hist, fps = fps, duration = duration)

[32mProgress:   0%|                                         |  ETA: 0:20:39[39m

Crash


[32mProgress:   0%|                                         |  ETA: 0:20:33[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:05[39m


In [None]:
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
push!(action_hist, UrbanAction(NaN))
duration, fps, render_hist = animate_states(mdp, state_hist, action_hist, mask)
film = roll(render_hist, fps = fps, duration = duration)