In [2]:
# NOTES

# Mind map
# https://miro.com/app/board/o9J_kwo5k8s=/

# Prediction labels
# [       0     ,      1    ,   2  ]
# ["pedestrian" , "bicycle" , "car"]

# Coordinate format: [longitude, latitude]

# Time format used:
# "%Y-%m-%dT%H:%M:%S.%fZ"

# Sequence format: Geojson
#     type
#     properties:
#       captured_at (start of sequence)
#       coordinateProperties
#         cas (camera angle between 0 and 360)
#         image_keys
#     geometry
#       coordinates [longitude, latitude]

# Image format: Geojson
#    type
#    properties:
#       captured_at, 
#       camera_model, camera_make, ca, pano, seq_key, key, username, user_key
#    geometry
    

# Potential features
#     Average velocity
#     Maximum velocity
#     Maximum stop time (calculate time between neighbours points relative to avg speed)
#     Possible to use CAS coordinate property to estimte slope and the difference in velocity?
#     Smallest turn (small turn will indicate not a car road)
#     Trajectory measurements
#     length
#     
#     
#     Image detection
#       Traffic lights (number or boolean)
#       Roundabouts (number or boolean)
#       Bicycle tracks on the side
#       Highway/number or lanes
#       Sidewalk


# Imports

In [28]:
# Imports
import json
import math
import requests
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, date
from pprint import pprint as pp


# Utility functions

In [29]:
# Utility functions
def readJson(file):
    with open(file,"r") as f:
        rf=json.load(f)
    return rf

def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
    # Print New Line on Complete
    if iteration == total: 
        print()

def requestImagesBySequence(sequence_key):
    request = requests.get("https://a.mapillary.com/v3/images/?sequence_keys="+sequence_key+"&per_page=10000&client_id="+client_id)
    return request.json()

# Feature functions

In [8]:
# Functions
def avg_speed_n_forward(sequence, start,n):
    return 0

def height_calculations(images):
    request = ""
    data = []
    print(images)
    for im in images:
        data.append[im]
    return 0,0

def calc_sd(feat_list):
    mean = sum(feat_list)/len(feat_list)
    feat_list = list(map(lambda x: (x-mean)**2, feat_list))
    
    return np.sqrt(sum(feat_list)/len(feat_list))

def calc_median(_min, _max):
    return (_max-_min)/2

In [32]:
class Sequence:
    def __init__(self, sequence):
        #{type:"FeatureCollection",features:[{id,images},{id,images},...]
        self.id = sequence["id"]
        self.images = list(reversed(sequence["images"]))
        self.iterate()
    
    def iterate(self):
        if(not self.valid_sequence()):
            return 0
        # initial variables
        num_segments = len(self.images)-1
        dist = 0
        duration = 0
        max_speed = 0
        max_accel = 0
        num_stops = 0
        min_speed = 100000
        min_accel = 100000
        sum_speed = 0
        sum_accel = 0
        speeds = []
        accels = []
        dist_start_end = self.distance(self.images[0]["geometry"]["coordinates"], self.images[-1]["geometry"]["coordinates"])
        
        prev = self.images[0]
        prev_speed = 0
        
        percent_within = [0,0,0,0]
        
        for i,feat in enumerate(self.images[1:]):
            feat_dist = self.distance(prev["geometry"]["coordinates"], feat["geometry"]["coordinates"])
            feat_time = datetime.strptime(feat["properties"]["captured_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
            prev_time = datetime.strptime(prev["properties"]["captured_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
            feat_dur = abs(feat_time-prev_time).total_seconds()
            
            feat_speed = 0 if feat_dur==0.0 else feat_dist/feat_dur
            feat_accel = prev_speed-feat_speed/feat_dur if feat_dur!=0.0 else 0
            
            percent_within[self.isWithinMeanSpeed(feat_speed)] += 1
            # speed features
            if(feat_speed<min_speed):
                min_speed = feat_speed
            if(feat_speed>max_speed):
                max_speed = feat_speed
            if(feat_speed<0.2):
                num_stops += 1
            # acceleration features
            if(feat_accel>max_accel and i>0):
                max_accel = feat_accel
            if(feat_accel<min_accel):
                min_accel = feat_accel
            
            speeds.append(feat_speed)
            accels.append(feat_accel)
            
            dist+=feat_dist
            duration+=feat_dur
            sum_speed+=feat_speed
            sum_accel+=feat_accel
            prev = feat
            prev_speed = feat_speed
        
        self.avg_speed = dist/duration
        self.min_speed = min_speed
        self.max_speed = max_speed
        self.max_accel = max_accel
        self.max_deaccel = min_accel
        self.med_speed, self.med_accel = calc_median(min_speed, max_speed),calc_median(min_accel, max_accel)
        self.sd_speed,self.sd_accel = calc_sd(speeds),calc_sd(accels)
        self.percent_ped_speed = 100*percent_within[0]/num_segments
        self.percent_bic_speed = 100*percent_within[1]/num_segments
        self.percent_car_speed = 100*percent_within[2]/num_segments
        self.num_stops = num_stops
        # returns none when it is a roundtrip
        self.sinuosity = dist/dist_start_end if dist_start_end!=0 else 100000
        self.distance = dist
        self.duration = duration
    
    def get_feature_names(self):
        features = vars(self)
        excluded_keys = {"images"}
        return [x for x in features if x not in excluded_keys]
    
    def get_features(self):
        features = vars(self)
        excluded_keys = {"images"}
        return [features[x] for x in features if x not in excluded_keys]
    
    def distance(self, start, end):
        deg2rad = self.deg2rad
        start = [deg2rad(start[0]),deg2rad(start[1])]
        end = [deg2rad(end[0]),deg2rad(end[1])]

        R =  6373000
        lon1, lat1, lon2, lat2 = start[0], start[1], end[0], end[1]
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = (math.sin(dlat/2)**2)+math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2
        c = 2*math.atan2(math.sqrt(a), math.sqrt(1-a))
        return R*c
    
    @staticmethod
    def deg2rad(deg):
        return deg*(math.pi/180)
    
    @staticmethod
    def isWithinMeanSpeed(speed):
        # From research in order to calculate the percentage of "within expected speeds"
        mean_ped_speed = 1.25
        mean_bic_speed = 5.3
        min_car_speed = 9.44
        sd_ped = 0.245
        sd_bic = 1.56
        if(abs(mean_ped_speed-speed)<sd_ped):
            return 0
        elif(abs(mean_bic_speed-speed)<sd_bic):
            return 1
        elif(speed>min_car_speed):
            return 2
        else:
            return 3

    def valid_sequence(self):
        # only want to keep trajectories with 2 or more coordinates
        if(len(self.images)<5):
            return False
        # some sequences have invalid durations, not including those
        start = datetime.strptime(self.images[0]["properties"]["captured_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
        end = datetime.strptime(self.images[-1]["properties"]["captured_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
        if(start==end):
            return False
        return True
        
        
    def plot_trajectory(self):
        lat = list(map(lambda x:x["geometry"]["coordinates"][0], self.images))
        lng = list(map(lambda x:x["geometry"]["coordinates"][1], self.images))
        plt.plot(lat, lng, 'bo--', linewidth=2, markersize=6)
        plt.show()

In [10]:
self = Sequence(data["features"][0])
self.get_feature_names()


['id',
 'avg_speed',
 'min_speed',
 'max_speed',
 'max_accel',
 'max_deaccel',
 'med_speed',
 'med_accel',
 'sd_speed',
 'sd_accel',
 'percent_ped_speed',
 'percent_bic_speed',
 'percent_car_speed',
 'num_stops',
 'sinuosity',
 'distance',
 'duration']

In [37]:
# write features for an area
def area_features(data, f_loc="training_data/features.csv", write=False, stop_early=False, sample_size=20):
    num_seq = len(data["features"])
    print(f"[INFO] {num_seq} sequences:")
    label_names = (",").join(Sequence(data["features"][0]).get_feature_names()[1:])+",id\n"
    f = open(f_loc, "w+")
    if(write==True):
        f.write(label_names)
    for num, seq in enumerate(data["features"]):
        if(stop_early==True and num>sample_size-1):
            break
        printProgressBar(num, num_seq)
        seq = Sequence(seq)
        if(len(seq.images) < 2):
            continue
        if(write==True):
            s = ""
            features = seq.get_features()
            for feat in features[1:]:
                s += f"{feat},"
            f.write(f"{s}{features[0]}\n")
    f.close()
    print("[INFO] Done generating features")

In [35]:
data = readJson("mapillary_data/mapillary_data_no_dupes.json")

In [38]:
area_features(data, f_loc="training_data/features_no_dupes.csv",stop_early=False, write=True)

[INFO] 2327 sequences:
[INFO] Done generating features██████████████████████████████████████████████████████████████████████-| 100.0% 
