# Intersection with a crosswalk

**Load dependencies**

In [1]:
using Revise
using Random
using Printf
using Flux
using POMDPs
using POMDPModelTools
using POMDPSimulators
using BeliefUpdaters
using POMDPPolicies
using DiscreteValueIteration
using MDPModelChecking
using StaticArrays
using RLInterface
using DeepQLearning
using AutomotiveDrivingModels
using AutomotivePOMDPs
using AutomotiveSensors
using LocalApproximationValueIteration
using Reel
using AutoViz
using ProgressMeter
using JLD2
using FileIO
using BSON
using PedCar

loaded


┌ Info: Recompiling stale cache file /mnt/c/Users/Maxime/wsl/.julia/compiled/v1.0/LocalApproximationValueIteration/Dvh7I.ji for LocalApproximationValueIteration [a40420fb-f401-52da-a663-f502e5b95060]
└ @ Base loading.jl:1184
│ - If you have LocalApproximationValueIteration checked out for development and have
│   added Random as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with LocalApproximationValueIteration
┌ Info: Recompiling stale cache file /mnt/c/Users/Maxime/wsl/.julia/compiled/v1.0/PedCar/NmDDZ.ji for PedCar [90cf7f26-d5c7-593d-a0e1-4a8367407571]
└ @ Base loading.jl:1184
│ - If you have PedCar checked out for development and have
│   added AutomotivePOMDPs as a dependency but haven't updated your primary
│   environment's manifest file, try `Pkg.resolve()`.
│ - Otherwise you may need to report an issue with PedCar


In [2]:
includet("../src/masking.jl")
includet("../src/util.jl")
includet("../src/masked_dqn.jl")
includet("../src/qmdp_approximation.jl")
includet("../src/render_helpers.jl")

In [3]:
rng = MersenneTwister(1);
cam = FitToContentCamera(0.);

## Scenario

In [4]:
params = UrbanParams(nlanes_main=1,
                     crosswalk_pos =[VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
                     crosswalk_length =  [14.0, 14., 14.0],
                     crosswalk_width = [4.0, 4.0, 3.1],
                     stop_line = 22.0)
env = UrbanEnv(params=params);

** Discrete states MDP **

In [5]:
mdp = PedCarMDP(env=env, pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7);
init_transition!(mdp);

In [6]:
@printf("spatial resolution %2.1f m \n", mdp.pos_res)
@printf("pedestrian velocity resolution %2.1f m/s \n", mdp.vel_ped_res)
@printf("car velocity resolution %2.1f m/s \n", mdp.vel_res)
@printf("number of states %d \n", n_states(mdp))
@printf("number of actions %d \n", n_actions(mdp))

spatial resolution 2.0 m 
pedestrian velocity resolution 1.0 m/s 
car velocity resolution 2.0 m/s 
number of states 23456940 
number of actions 4 


**Continuous states MDP**

In [7]:
pomdp = UrbanPOMDP(env=env,
                   sensor = PerfectSensor(),
                   ego_goal = LaneTag(2, 1),
                   max_cars=1, 
                   max_peds=1, 
                   car_birth=0.7, 
                   ped_birth=0.7, 
                   max_obstacles=0., # no fixed obstacles
                   lidar=false,
                   ego_start=20,
                   ΔT=0.5);

## Load policies

In [8]:
@load "../pc_util_processed.jld2" qmat util pol
safe_policy = ValueIterationPolicy(mdp, qmat, util, pol);

In [9]:
threshold = 0.99
mask = SafetyMask(mdp, safe_policy, threshold);
continuous_safe_policy = SafePOMDPPolicy(mask, pomdp)
discrete_safe_random = MaskedEpsGreedyPolicy(mdp, 1.0, mask, rng)
continuous_safe_random = RandomMaskedPOMDPPolicy(mask, pomdp, rng);

In [10]:
qnetwork = BSON.load("../training_scripts/drqn-log/log13/model.bson")[:qnetwork]
dqn_policy = NNPolicy(pomdp, qnetwork, actions(pomdp), 1)
masked_policy = MaskedNNPolicy(pomdp, dqn_policy, mask);

# Evaluation

**Discrete Environment: Safe Policy**

In [11]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(mdp, safe_policy, n_ep=10000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 32.214079 seconds (76.17 M allocations: 12.299 GiB, 5.74% gc time)
Summary for 10000 episodes: 
Average reward: 0.167 
Average # of steps: 51.077 
Average # of violations: 0.000 


[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:31[39m


**Discrete Environment: Safe Random**

In [12]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(mdp, discrete_safe_random, n_ep=10000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 53.469413 seconds (219.37 M allocations: 26.353 GiB, 7.81% gc time)
Summary for 10000 episodes: 
Average reward: 0.047 
Average # of steps: 89.573 
Average # of violations: 1.550 


[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:53[39m


**Continuous Environment: Safe Policy**

In [13]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, continuous_safe_policy, n_ep=1000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 76.335680 seconds (554.82 M allocations: 40.384 GiB, 14.30% gc time)
Summary for 1000 episodes: 
Average reward: 0.176 
Average # of steps: 43.842 
Average # of violations: 2.000 


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:13[39m


**Continuous Environment: Safe Random**

In [14]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, continuous_safe_random, n_ep=1000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

132.912827 seconds (1.03 G allocations: 74.830 GiB, 15.27% gc time)
Summary for 1000 episodes: 
Average reward: 0.051 
Average # of steps: 78.408 
Average # of violations: 3.600 


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:13[39m


###### **Continuous Environment: Safe RL**

In [31]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, masked_policy, n_ep=1000, max_steps=400, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m

 93.323057 seconds (422.53 M allocations: 30.931 GiB, 12.89% gc time)
Summary for 1000 episodes: 
Average reward: 0.175 
Average # of steps: 45.101 
Average # of violations: 3.900 


[32mProgress: 100%|█████████████████████████████████████████|  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:01:33[39m


In [32]:
evaluation_policy = masked_evaluation(mask)
scores_eval = DeepQLearning.evaluation(evaluation_policy, dqn_policy, POMDPEnvironment(pomdp),                                  
                         1000,
                         400,
                         true)

Evaluation ... Avg Reward 0.93 | Violations (%) 3.50 | Avg Steps 42.72

0.929

**Collisions analysis**

In [20]:
hr = HistoryRecorder(rng=rng, max_steps=400)
s0 = initialstate(pomdp, rng)
up = PreviousObservationUpdater()
o0 = generate_o(pomdp, s0, UrbanAction(0.), s0, rng)
b0 = initialize_belief(up, o0)
@time hist2 = simulate(hr, pomdp, masked_policy, up, b0, s0);

  0.103746 seconds (566.19 k allocations: 42.512 MiB, 20.05% gc time)


In [59]:
up = PreviousObservationUpdater()
@showprogress for ep=1:10000
    global hist2
    hr = HistoryRecorder(rng=rng, max_steps=100)
    s0 = initialstate(pomdp, rng)
    o0 = generate_o(pomdp, s0, UrbanAction(0.), s0, rng)
    b0 = initialize_belief(up, o0)
    hist2 = simulate(hr, pomdp, masked_policy, up, b0, s0)
    if sum(hist2.reward_hist .< 0.) != 0.
        println("Crash")
        break
    end
end

[32mProgress:   0%|                                         |  ETA: 0:12:49[39m

Crash


[32mProgress:   0%|                                         |  ETA: 0:12:28[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m


In [21]:
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
belief_hist = h.belief_hist
safe_acts = [i[1] for i in h.ainfo_hist]
probas = [i[2] for i in h.ainfo_hist]
routes = [i[3] for i in h.ainfo_hist]

push!(safe_acts, [UrbanAction(NaN)])
push!(probas, [NaN])
push!(routes, PedCar.OFF_ROUTE)
push!(action_hist, UrbanAction(NaN))
duration, fps, render_hist = animate_states(pomdp, state_hist, action_hist, belief_hist, safe_acts, probas, routes, mask, interp=true, obsviz=true)
film = roll(render_hist, fps = fps, duration = duration)

In [18]:
actionvalues(policy, o0)

UndefVarError: UndefVarError: policy not defined