# Test the neural network policy in the training environment

In [1]:
using POMDPs, StatsBase, POMDPToolbox, QMDP, DeepRL, AutomotiveDrivingModels, AutoViz, SARSOP, Images, PyCall, Reel




In [2]:
include("occluded_crosswalk_env.jl")
include("pomdp_types.jl")
include("spaces.jl")
include("transition.jl")
include("observation.jl")
include("belief.jl")
include("decomposition.jl")
include("adm_helpers.jl")
include("render_helpers.jl")


 in module Logging at C:\Users\Maxime\.julia\v0.6\Logging\src\Logging.jl:115.


In [3]:
function POMDPs.generate_o(pomdp::OCPOMDP, s::OCState, rng::AbstractRNG)
    o = generate_o(pomdp, s, OCAction(0.), s, rng)
    return o
end

In [4]:
function preprocess_o(o::OCObs, pomdp::OCPOMDP, queue=nothing)
    nframes = 10
    o_mat = convert_o(Vector{Float64}, o, pomdp)
    o_mat = reshape(o_mat, (size(o_mat)...))
    if queue == nothing
        w, h, nch = size(o_mat)
        o_stacked = repeat(reshape(o_mat, (w, h, nch)), outer=(1,1,nframes))
        o_stacked = reshape(o_stacked, (w, h, nch*nframes))
        return o_stacked
    else
        queue = efficient_dequeue(queue, o_mat)
    end
    return queue
end
        
        

preprocess_o (generic function with 2 methods)

In [5]:
"""
    given a queue of fix length, enqueu new_elem and dequeue the oldest element
    without allocating new memory
"""
# function efficient_dequeue!(queue::Array{Float64,3}, new_elem::Array{Float64,3}, nch::Int64=size(new_elem,3),nqueue::Int64=div(size(queue, 3),size(new_elem, 3)))
#     # first shift all the old element
#     for i=1:nqueue-1
#         queue[:,:,nch*(i-1)+1:nch*i] = queue[:,:,nch*i:nch*(i+1)-1]
#     end
#     # enqueue the last one
#     queue[:,:,nch*(nqueue-1)+1:nch*nqueue] = new_elem
#     return queue
# end

function efficient_dequeue(queue::Array{Float64,3}, new_elem::Array{Float64,3}, nch::Int64=size(new_elem,3),nqueue::Int64=div(size(queue, 3),size(new_elem, 3)))
    # first shift all the old element
    queue = circshift(queue, (0,0,-nch))
    # enqueue the first one
    queue[:,:,(nqueue-1)*nch+1:nqueue*nch] = new_elem
    return queue
end

efficient_dequeue

In [6]:
pomdp = OCPOMDP()
pomdp.p_birth = 0.3
pomdp.pos_res = 0.5
pomdp.vel_res = 0.5
pomdp.pos_obs_noise = 0.5
pomdp.vel_obs_noise = 0.5
rng = MersenneTwister(1)

MersenneTwister(UInt32[0x00000001], Base.dSFMT.DSFMT_state(Int32[1749029653, 1072851681, 1610647787, 1072862326, 1841712345, 1073426746, -198061126, 1073322060, -156153802, 1073567984  …  1977574422, 1073209915, 278919868, 1072835605, 1290372147, 18858467, 1815133874, -1716870370, 382, 0]), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], 382)

In [7]:
@pyimport tensorflow as tf
nn_wrapper = pyimport("dqn.nn_wrapper")


PyObject <module 'dqn.nn_wrapper' from 'C:\\Users\\Maxime\\OneDrive - Leland Stanford Junior University\\Research\\policy-correction\\dqn\\nn_wrapper.py'>

In [8]:
nn = nn_wrapper[:NNWrapper]("../../dqn/test16/")

INFO:tensorflow:Restoring parameters from ../../dqn/test16/model.ckpt


2017-10-18 11:59:43.536158: W c:\tf_jenkins\home\workspace\release-win\m\windows\py\35\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
2017-10-18 11:59:43.536196: W c:\tf_jenkins\home\workspace\release-win\m\windows\py\35\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-18 11:59:43.536203: W c:\tf_jenkins\home\workspace\release-win\m\windows\py\35\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-18 11:59:43.536209: W c:\tf_jenkins\home\workspace\release-win\m\windows\py\35\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn

PyObject <dqn.nn_wrapper.NNWrapper object at 0x000000002A8A7B70>

In [9]:
r_avg = 0
step_avg = 0
crash = 0
time_out = 0
success = 0
n_ep = 100
max_steps = 100
dist = initial_state_distribution(pomdp)
saved_states = OCState[]

for i=1:n_ep    
    disc = 1.0
    r_total = 0.0    
    s = rand(rng, dist)
    a_map = actions(pomdp)
    o_init = generate_o(pomdp, s, rng)
    o = preprocess_o(o_init, pomdp)
    step = 1

    while !isterminal(pomdp, s) && step <= max_steps # TODO also check for terminal observation
        a = a_map[nn[:action](o)+1]
#         println(o, "\n\n")
        sp, o_, r = generate_sor(pomdp, s, a, rng)
#         println(o_)
        push!(saved_states, sp)
        r_total += disc*r

        s = sp
        op = preprocess_o(o_, pomdp, o)
#         println(op)
        
        o = op

#         disc *= discount(pomdp)
        step += 1
    end
    r_avg += r_total
    if r_total <= -1
        crash += 1
    elseif step >= 100
        time_out += 1
    else
            success += 1
    end
    step_avg += step
#     println(r_total)
end
r_avg /= n_ep
step_avg /= n_ep
println("Average reward: $r_avg; Average # of steps: $step_avg; # of crashes: $crash; # of time out $time_out; # of successes $success")

Average reward: 0.94; Average # of steps: 27.68; # of crashes: 3; # of time out 0; # of successes 97


## Visualize policy

In [10]:
duration, fps, render_states = animate_states(pomdp, saved_states)
speed_factor = 1
film = roll(render_states, fps = fps*speed_factor, duration = duration/speed_factor)