# Joint problem: Avoid 1 car and 1 pedestrian

**Load dependencies**

In [1]:
rng = MersenneTwister(2);

In [2]:
using AutomotivePOMDPs
using MDPModelChecking



In [3]:
using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
using DiscreteValueIteration, DeepQLearning, DeepRL
using ProgressMeter, Parameters, JLD



In [4]:
cam = FitToContentCamera(0.)

AutoViz.FitToContentCamera(0.0)

In [5]:
include("masking.jl")
include("util.jl")
include("render_helpers.jl")
include("masked_dqn.jl")

## Driving environment

In [6]:
params = UrbanParams(nlanes_main=1,
                     crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
                     crosswalk_length =  [14.0, 14., 14.0],
                     crosswalk_width = [4.0, 4.0, 3.1],
                     stop_line = 22.0)
env = UrbanEnv(params=params);

## Discretized MDPs

In [7]:
ped_mdp = PedMDP(env = env, vel_res=1., pos_res=1., ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0), ped_birth=0.7)
car_mdp = CarMDP(env = env, vel_res=1., pos_res=2.);

In [8]:
ped_mask_file = "pedmask.jld"
car_mask_file = "carmask.jld"
ped_mask = load(ped_mask_file)["mask"]
car_mask = load(car_mask_file)["mask"];

## Continuous space scenario

In [9]:
pomdp = UrbanPOMDP(env=env,
                   ego_goal = LaneTag(2, 1),
                   max_cars=1, 
                   max_peds=1, 
                   car_birth=0.3, 
                   ped_birth=0.7, 
                   obstacles=false, # no fixed obstacles
                   lidar=false,
                   pos_obs_noise = 0., # fully observable
                   vel_obs_noise = 0.);

In [10]:
masks = SafetyMask[ped_mask, car_mask]
ids = [101, 2]
joint_mask = JointMask([ped_mdp, car_mdp], masks, ids)
rand_pol = RandomMaskedPOMDPPolicy(joint_mask, pomdp, rng);

In [11]:
hr = HistoryRecorder(rng=rng, max_steps=100)
s0 = initial_state(pomdp, rng)
o0 = generate_o(pomdp, s0, rng)
up = FastPreviousObservationUpdater{UrbanObs}()
b0 = initialize_belief(up, o0)
@time hist2 = simulate(hr, pomdp, rand_pol, up, b0, s0);

  5.441887 seconds (4.47 M allocations: 173.682 MiB, 5.22% gc time)


In [12]:
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
safe_actions_hist = h.ainfo_hist
push!(action_hist, CarMDPAction(NaN))
push!(safe_actions_hist, [CarMDPAction(NaN)])
duration, fps, render_hist = animate_states(pomdp, state_hist, action_hist, safe_actions_hist, joint_mask, interp=false)
film = roll(render_hist, fps = fps, duration = duration)

In [13]:
@showprogress for ep=1:20
    hr = HistoryRecorder(rng=rng, max_steps=100)
    s0 = initial_state(pomdp, rng)
    o0 = generate_o(pomdp, s0, rng)
    up = FastPreviousObservationUpdater{UrbanObs}()
    b0 = initialize_belief(up, o0)
    hist2 = simulate(hr, pomdp, rand_pol, up, b0, s0)
    if sum(hist2.reward_hist .< 0.) != 0.
        println("Crash")
        break
    end
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:18[39m


## Visualize trained policy

In [21]:
policy = DeepQLearning.restore(problem_file="jointmdp-log/log2/problem.jld", weights_file="jointmdp-log/log2/weights.jld");

2018-06-01 10:56:23.155730: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA


MethodError: [91mMethodError: show(::IOContext{Base.AbstractIOBuffer{Array{UInt8,1}}}, ::Records.Frame{Records.Entity{AutomotiveDrivingModels.VehicleState,AutomotiveDrivingModels.VehicleDef,Int64}}) is ambiguous. Candidates:
  show(io, scene::Records.Frame{Records.Entity{AutomotiveDrivingModels.VehicleState,AutomotiveDrivingModels.VehicleDef,Int64}}) in AutomotiveDrivingModels at /mnt/c/Users/Maxime/wsl/.julia/v0.6/AutomotiveDrivingModels/src/2d/vehicles/scenes.jl:5
  show(io::IO, frame::Records.Frame{E}) where E in Records at /mnt/c/Users/Maxime/wsl/.julia/v0.6/Records/src/frames.jl:16
Possible fix, define
  show(::IO, ::Records.Frame{Records.Entity{AutomotiveDrivingModels.VehicleState,AutomotiveDrivingModels.VehicleDef,Int64}})[39m

In [23]:
masked_policy = MaskedDQNPolicy(pomdp, policy, joint_mask);

In [26]:
function POMDPToolbox.action_info(policy::MaskedDQNPolicy, s)
    return action(policy, s), safe_actions(policy.problem, policy.mask, s)
end

In [35]:
hr = HistoryRecorder(rng=rng, max_steps=100)
s0 = initial_state(pomdp, rng)
o0 = generate_o(pomdp, s0, rng)
up = FastPreviousObservationUpdater{UrbanObs}()
b0 = initialize_belief(up, o0)
@time hist2 = simulate(hr, pomdp, masked_policy, up, b0, s0);

  0.251545 seconds (1.11 M allocations: 35.148 MiB, 17.30% gc time)


In [42]:
h = hist2
state_hist = h.state_hist
action_hist = h.action_hist
safe_actions_hist = h.ainfo_hist
push!(action_hist, CarMDPAction(NaN))
push!(safe_actions_hist, [CarMDPAction(NaN)])
duration, fps, render_hist = animate_states(pomdp, state_hist, action_hist, safe_actions_hist, joint_mask, interp=false)
film = roll(render_hist, fps = fps, duration = duration)

In [None]:
model = pomdp.models[2]


In [41]:
@showprogress for ep=1:20
    hr = HistoryRecorder(rng=rng, max_steps=100)
    s0 = initial_state(pomdp, rng)
    o0 = generate_o(pomdp, s0, rng)
    up = FastPreviousObservationUpdater{UrbanObs}()
    b0 = initialize_belief(up, o0)
    hist2 = simulate(hr, pomdp, rand_pol, up, b0, s0)
    if n_steps(hist2) >= 100
        println("time out!")
        break
    end
end

[32mProgress:   5%|██                                       |  ETA: 0:00:03[39m

time out!


[32mProgress:  10%|████                                     |  ETA: 0:00:06[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
