In [4]:
#---------------------------------------------------
# Written for ECE 1724 Winter 2020 by Michael D.
#---------------------------------------------------

using YOLO # Based on Julia's repository YOLO v2
push!(LOAD_PATH, "/home/ec2-user/efs/julia/")
using PSO # Based on https://github.com/yuehhua/PSO.jl
using Dates, LightXML


(run_tests, verbose, tvsize) = (true, true, 5)


function test_frame(crossing=false, idx=1, n=tvsize)
    if crossing
        (x, y, w, h) = (idx/n, idx/n, rand()*0.01, rand()*0.01)
    else
        (x, y, w, h) = (rand()*0.2, rand()*0.2, rand()*0.01, rand()*0.01)
    end
    lbl = p -> YOLO.TruthLabel(p, 15, 1)
    ns = v -> v + rand()*0.1
    return map(b -> lbl(YOLO.BBOX(ns(x), ns(y), ns(w), ns(h))), 1:3)
end


#----------------------Setup code---------------------
println("Initializing...")
if !run_tests
    #YOLO.download_dataset("voc2007")
    settings = YOLO.pretrained.v2_tiny_voc.load(minibatch_size=1)
    model = YOLO.v2_tiny.load(settings)
    YOLO.loadWeights!(model, settings)
    voc = YOLO.datasets.VOC.populate()
end
test_lbls = map(j -> rand()>0.5, 1:10)
test_vids = map(l -> map(i -> test_frame(l, i, tvsize), 1:tvsize), test_lbls)
allboxes = []
println("Initialization complete")
#----------------------------------------------------


function classify(img_idx; render=false, vid="video_001")
    if run_tests
        return test_vids[parse(Int, replace(vid, r"video_" => ""))][img_idx]
    else
        img = sort(voc.image_paths)[img_idx]
        img_idx = findfirst(p -> p==img, voc.image_paths)
        vocloaded = YOLO.load(voc, settings, indexes = [img_idx])
        res = model(vocloaded.imstack_mat)
        predictions = YOLO.postprocess(res, settings, conf_thresh=0.3, iou_thresh=0.3)

        if render
            #using Makie, YOLO
            mtx = vocloaded.imstack_mat[:,:,:,1]
            scene = YOLO.renderResult(mtx, predictions, settings, save_file="test.jpg")
            #display(scene)
        end
    end
    predictions
end

    
function process_frames(video_id)
    (max_score, pedestrians) = (0.0, [])
    empty!(allboxes)
    base = "/home/ec2-user/efs/julia/JAAD/images/"
    
    for (root, dirs, files) in walkdir(string(base, video_id))
        for (i, file) in enumerate(files)
            start = Dates.now()
            person = 7
            bboxes = filter(lbl -> lbl.class>0, classify(i, vid=video_id))
            bboxes = map(lbl -> lbl.bbox, bboxes)
            cross_score = predict_crossing_pso(pedestrians, bboxes, i)
            if verbose
                println(string("process_frames() vid = ", video_id, ", file = ", file, ", score = ",cross_score,", ts = ", Dates.now()-start))
            end
            if cross_score > max_score
                max_score = cross_score
            end
            if run_tests && (i+1)>=tvsize
                return max_score
            end
        end
    end
    return max_score
end


function process_labels(video_id)
    xdoc = parse_file(string("/home/ec2-user/efs/julia/JAAD/annotations/", video_id, ".xml"))
    # get the root element
    xroot = root(xdoc)  # an instance of XMLElement

    for t in xroot["track"]
        for c in t["box"]
            for a in c["attribute"]
                if "cross" == attribute(a, "name") && "crossing" == content(a)
                    println(string(video_id, " cross = ", content(a)))
                    return true
                end
            end
        end
    end
    println(string(video_id, " has no crossing pedistrians"))
    return false
end

function centroid(box)
    return (box[1].x + box[1].w/2, box[1].y + box[1].h/2)
end

function distance(c1, c2)
    return sqrt((c2[1]-c1[1])^2 + (c2[2]-c1[2])^2)
end


function closest(objects, x)
    (minD, c, pos) = (-1.0, (0.0, 0.0), 0)
    for (i, o) in enumerate(objects)
        d = distance(x, centroid(o))
        if (d < minD || minD == -1)
            (minD, c, pos) = (d, o, i)
        end
    end
    return (c, pos)
end


function crossing_probability(x,  args=allboxes[size(allboxes, 1)], kwargs...) 
    # TODO: Train vision module (YOLO) to identify the road
    #       compare box with the road's bounding box 
    bbox, i = closest(allboxes[size(allboxes, 1)], x)
    box = bbox[1]
    println(string())
    if size(allboxes, 1) <= 1 || size(allboxes[size(allboxes,1)-1],1) < Int(bbox[2])
        bias = box.x<0.5 && box.x+box.w>0.5 && box.y<0.5 && box.y+box.h>0.5 ? 0.4 : 0
        prob = min(1.0, 1-distance(centroid(bbox), (0.5, 0.5)) + bias)
    else
        prev = allboxes[Int(size(allboxes, 1)-1)][Int(bbox[2])] 
        prox = distance(centroid(bbox), (0.5,0.5))
        dir = prox <= distance(centroid(prev), (0.5,0.5)) ? 1 : -1
        momentum = distance(centroid(bbox), centroid(prev)) # < 1
        rotation = 1.0 - min(1.0, (bbox[1].w*bbox[1].h)/(prev[1].w*prev[1].h)  ) # < 1
        prob = min(1.0, 1-prox + dir*momentum - 0.025*rotation)
    end
    return -prob
end


function bounds(x, args=allboxes, kwargs...)
    bbox, i = closest(args, x)
    d = distance(centroid(bbox), (0.5, 0.5))
    return [d, d]
end


function append_path(paths, bboxes, frame)
    push!(paths, [])
    for bbox in bboxes
        (obj, i) = size(paths,1)<=1 ? (1,1) : closest(paths[size(paths, 1)-1], centroid([bbox])) 
        append!(paths[size(paths,1)], [(bbox, i)])
    end
    paths
end


function predict_crossing_pso(paths, bboxes, frame)
    append_path(paths, bboxes, frame)
    lb = [0.0, 0.0]
    ub = [10.0, 10.0]
    push!(allboxes, [])
    append!(allboxes[size(allboxes, 1)], paths[size(paths, 1)])
    #pso(...,bounds,args, kwargs, swarm, omega, phip, phig, maxiter, minstep, minfn, vb, pout)
    xopt, fopt = pso(crossing_probability, lb, ub, bounds, (allboxes), Dict(), 10, 0.0, 0.0, 0.0, 0, 0.0, 0.0, false)
    return -fopt
end


function format(s, nz, n)
    i = 1
    while n*i < 10^nz
        s = string(s, "0")
        i *= 10
    end
    s = string(s, n)
    return s
end


#classify(1, render=false, generate=true)
if run_tests
    println(string("test_labels = ", test_lbls))
    scores = map(x -> process_frames(format("video_", 3, x)), 1:size(test_lbls,1))
    (test_lbls, scores)
else
    lbls = map(v -> process_labels(string("video_000", v)), 1:1)
    scores = map(v -> process_frames(string("video_000", v)), 1:1)
    (lbls, scores)
end


Initializing...
Initialization complete
test_labels = Bool[1, 1, 1, 1, 1, 1, 1, 1, 0, 0]










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0001, file = 00000.png, score = 0.758807663574212, ts = 285 milliseconds










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0001, file = 00001.png, score = 1.0, ts = 59 milliseconds










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0001, file = 00002.png, score = 0.4697725417170485, ts = 35 milliseconds










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0001, file = 00003.png, score = 0.10027264749387181, ts = 0 milliseconds










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0002, file = 00000.png, score = 0.6862589506209547, ts = 0 milliseconds










Stopping search: maximum iterations reached --> 0
process_frames() vid = video_0002, file = 00001.png, scor

(Bool[1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.6594391970160263, 0.8262298108818378])