# Safe RL with multiple cars and pedestrians

In [1]:
rng = MersenneTwister(2);

In [2]:
using AutomotivePOMDPs
using MDPModelChecking

The method will not be callable.


In [3]:
using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
using DiscreteValueIteration, DeepQLearning, DeepRL
using ProgressMeter, Parameters, JLD

In [4]:
cam = FitToContentCamera(0.)

AutoViz.FitToContentCamera(0.0)

In [55]:
include("masking.jl")
include("util.jl")
include("render_helpers.jl")
include("masked_dqn.jl")

## Scenario

In [6]:
params = UrbanParams(nlanes_main=1,
                     crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
                     crosswalk_length =  [14.0, 14., 14.0],
                     crosswalk_width = [4.0, 4.0, 3.1],
                     stop_line = 22.0)
env = UrbanEnv(params=params);

In [112]:
pomdp = UrbanPOMDP(env=env,
                   ego_goal = LaneTag(2, 1),
                   max_cars=2, 
                   max_peds=2, 
                   car_birth=0.05, 
                   ped_birth=0.05, 
                   obstacles=false, # no fixed obstacles
                   lidar=false,
                   pos_obs_noise = 0., # fully observable
                   vel_obs_noise = 0.);

## Build and load policy

In [8]:
threshold = 0.9999
ped_mdp = PedMDP(env = env, vel_res=2., pos_res=2., ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0), ped_birth=0.7)
car_mdp = CarMDP(env = env, vel_res=2., pos_res=2.)
ped_mask_file = "pedmask_new.jld"
car_mask_file = "carmask_new.jld"
ped_mask_data = load(ped_mask_file)
car_mask_data = load(car_mask_file)
ped_mask = SafetyMask(ped_mdp, StormPolicy(ped_mdp, ped_mask_data["risk_vec"], ped_mask_data["risk_mat"]), threshold)
car_mask = SafetyMask(car_mdp, StormPolicy(car_mdp, car_mask_data["risk_vec"], car_mask_data["risk_mat"]), threshold);

In [None]:
policy = DeepQLearning.restore(problem_file="jointmdp-log/log4/problem.jld", weights_file="jointmdp-log/log4/weights.jld");

In [113]:
include("decomposition.jl")

In [114]:
dec_mask = DecomposedMask(pomdp, car_mask, ped_mask);

In [115]:
rand_pol = RandomMaskedPOMDPPolicy(dec_mask, pomdp, rng);

## Simulation

In [119]:
hr = HistoryRecorder(rng=rng, max_steps=100)
s0 = initial_state(pomdp, rng)
o0 = generate_o(pomdp, s0, rng)
up = FastPreviousObservationUpdater{UrbanObs}()
b0 = initialize_belief(up, o0)
@time hist2 = simulate(hr, pomdp, rand_pol, up, b0, s0);

  0.501341 seconds (4.53 M allocations: 145.586 MiB, 17.36% gc time)


In [103]:
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
safe_actions_hist = h.ainfo_hist
push!(action_hist, CarMDPAction(NaN))
push!(safe_actions_hist, [CarMDPAction(NaN)])
duration, fps, render_hist = animate_states(pomdp, state_hist, action_hist, safe_actions_hist, dec_mask)
film = roll(render_hist, fps = fps, duration = duration)

## Evaluation

In [120]:
@time rewards_mask, steps_mask, violations_mask = evaluation_loop(pomdp, rand_pol, n_ep=100, max_steps=100, rng=rng);
print_summary(rewards_mask, steps_mask, violations_mask)

 63.830275 seconds (541.40 M allocations: 17.082 GiB, 17.15% gc time)
Summary for 100 episodes: 
Average reward: 0.000 
Average # of steps: 100.000 
Average # of violations: 0.000 


In [90]:
@time for ep=1:1000
    hr = HistoryRecorder(rng=rng, max_steps=100)
    s0 = initial_state(pomdp, rng)
    o0 = generate_o(pomdp, s0, rng)
    up = FastPreviousObservationUpdater{UrbanObs}()
    b0 = initialize_belief(up, o0)
    hist2 = simulate(hr, pomdp, rand_pol, up, b0, s0)
    if sum(hist2.reward_hist .< 0.) != 0.
        println("Crash")
        break
    end
end

Crash
  3.233593 seconds (7.35 M allocations: 237.220 MiB, 4.31% gc time)
