Delves deep into ngsim_env/scripts/extract_ngsim_demonstrations.jl and
unearths its inner workings and what exactly is the form of the features
it write to ngsim.h5

In [1]:
using AutomotiveDrivingModels
using AutoRisk
using HDF5
using NGSIM

In [2]:
function build_feature_extractor()
    subexts = [
        CoreFeatureExtractor(),
        TemporalFeatureExtractor(),
        WellBehavedFeatureExtractor(),
        CarLidarFeatureExtractor(20, carlidar_max_range = 50.),
        ForeForeFeatureExtractor()
    ]
    ext = MultiFeatureExtractor(subexts)
    return ext
end

build_feature_extractor (generic function with 1 method)

In [3]:
function extract_ngsim_features(
        timestep_delta = 1, # timesteps between feature extractions
        record_length = 10, # number of frames for record to track in the past
        offset = 500, # from ends of the trajectories
        prime = 10,
        maxframes = nothing; # nothing for no max
        output_filename = "ngsim.h5",
        n_expert_files = 1) # number of time periods for which to extract.

    ext = build_feature_extractor()
    features = Dict{Int, Dict{Int, Array{Float64}}}()

    tic()
    # extract 
    for traj_idx in 1:n_expert_files

        # setup
        trajdata = load_trajdata(traj_idx)
        roadway = get_corresponding_roadway(traj_idx)
        features[traj_idx] = extract_features(
            ext, 
            trajdata, 
            roadway, 
            timestep_delta, 
            record_length, 
            offset, 
            prime,
            maxframes
        )
    end
    toc()

    output_filepath = joinpath("../data/trajectories/", output_filename)
    println("output filepath: $(output_filepath)")
    write_features(features, output_filepath, ext)

end

extract_ngsim_features (generic function with 6 methods)

In [4]:
function write_features(features, output_filepath, ext)
    n_features = length(ext)

    # compute max length across samples
    maxlen = 0
    for (traj_idx, feature_dict) in features
        for (veh_id, veh_features) in feature_dict
            maxlen = max(maxlen, size(veh_features, 2))
        end
    end
    println("max length across samples: $(maxlen)")

    # write trajectory features
    h5file = h5open(output_filepath, "w")
    for (traj_idx, feature_dict) in features
        #println("traj_idx = $(traj_idx)")
        feature_array = zeros(n_features, maxlen, length(feature_dict))
        for (idx, (veh_id, veh_features)) in enumerate(feature_dict)
            feature_array[:, 1:size(veh_features, 2), idx] = reshape(veh_features, (n_features, size(veh_features, 2), 1))
        end
        h5file["$(traj_idx)"] = feature_array

        println("feature_array.shape = $(size(feature_array))")
    end
    
    # write feature names
    attrs(h5file)["feature_names"] = feature_names(ext)
    close(h5file)
end

write_features (generic function with 1 method)

In [32]:
function extract_features(
        ext,
        trajdata, 
        roadway, 
        timestep_delta, 
        record_length, 
        offset, 
        prime,
        maxframes)
    n_features = length(ext)
    max_n_objects = maximum(n_objects_in_frame(trajdata, i) for i in 1 : nframes(trajdata))
    scene = Scene(max_n_objects)
    rec = SceneRecord(record_length, 0.1, max_n_objects)
    features = Dict{Int, Array{Float64}}()
    ctr = 0
    n_frames = nframes(trajdata)
    
    for frame in (offset - prime : offset - 1)
        # prime the rec
        AutomotiveDrivingModels.update!(rec, get!(scene, trajdata, frame))
    end

    veh_features = pull_features!(ext, rec, roadway, 1)
#     println("veh_features shape = $(size(veh_features))")

#     for frame in offset : (n_frames - offset)
    for frame in offset : offset+33
        ctr += 1
        println("ctr = $(ctr)")
        if maxframes != nothing && ctr >= maxframes
            break
        end

        print("\rframe $(frame) / $(n_frames - offset)")
            
        # update the rec
        AutomotiveDrivingModels.update!(rec, get!(scene, trajdata, frame))

        # every timestep_delta step, extract features
        if frame % timestep_delta == 0
            for (vidx, veh) in enumerate(scene)
                # extract features
                veh_features = pull_features!(ext, rec, roadway, vidx)
                #println("veh.id = $(veh.id) \n")
                
                # add entry to features if vehicle not yet encountered
                if !in(veh.id, keys(features))
                    features[veh.id] = zeros(n_features, 0)
                end

                # stack onto existing features
                features[veh.id] = cat(2, features[veh.id], 
                    reshape(veh_features, (n_features, 1)))
            end
        end
    end
    
    
    println("shape features of vehicle 23= $(size(features[23]))")
    println("length of keys of features i.e total number 
        of distinct vehicles encountered \n $(length(keys(features)))")
    
    # Print the shape of the associated feature for every vehicle index
#     for (key,value) in features
#         println("key: $(key) ==> shape: $(size(value))")
#     end
        
    return features
end

extract_features (generic function with 1 method)

In [33]:
extract_ngsim_features(output_filename="ngsim_understand.h5", n_expert_files=1)

ctr = 1
frame 500 / 9036ctr = 2
frame 501 / 9036ctr = 3
frame 502 / 9036ctr = 4
frame 503 / 9036ctr = 5
frame 504 / 9036ctr = 6
frame 505 / 9036ctr = 7
frame 506 / 9036ctr = 8
frame 507 / 9036ctr = 9
frame 508 / 9036ctr = 10
frame 509 / 9036ctr = 11
frame 510 / 9036ctr = 12
frame 511 / 9036ctr = 13
frame 512 / 9036ctr = 14
frame 513 / 9036ctr = 15
frame 514 / 9036ctr = 16
frame 515 / 9036ctr = 17
frame 516 / 9036ctr = 18
frame 517 / 9036ctr = 19
frame 518 / 9036ctr = 20
frame 519 / 9036ctr = 21
frame 520 / 9036ctr = 22
frame 521 / 9036ctr = 23
frame 522 / 9036ctr = 24
frame 523 / 9036ctr = 25
frame 524 / 9036ctr = 26
frame 525 / 9036ctr = 27
frame 526 / 9036ctr = 28
frame 527 / 9036ctr = 29
frame 528 / 9036ctr = 30
frame 529 / 9036ctr = 31
frame 530 / 9036ctr = 32
frame 531 / 9036ctr = 33
frame 532 / 9036ctr = 34
frame 533 / 9036shape features of vehicle 23= (66, 3)
keys of features 
 125
numkeys = 125
elapsed time: 4.251027035 seconds
output filepath: ../data/trajectories/ngsim_unders

Raunak's understanding about the feature_array.shape = (66, a, b) which is what gets written to the .h5 file by the function `write_features`
- 66 is the number of features
- a is the maximum number of frames in which a vehicle was present, out of all the vehs
    - eg: if veh.id 15 was present in 3 out of the number of frames for which the loop was run, and veh.id 58 was present in 76 out of the number of frames looped over, then this number is 76
- b is the total number of vehicles encountered over the entire loop
    - Continuiing with the above example, this will be 2 because two vehicles namely id 15 and id 58 have been encountered in the looping over frames. No matter that id 15 was only present in 3 of the umpteen number of frames looped over, it was seen means it gets its own space in the features array that we create