# Stage 1: Map Matching

## **Input**: trajectories, 

## **Output**: each GPS point should have one corresponding matched road segment.


In [66]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.functional as F
import os, sys
import math
import osmnx as ox
from shapely.geometry import shape, Point, LineString, MultiLineString
from sklearn.metrics.pairwise import haversine_distances
import networkx as nx
import folium

RADIUS_OF_EARTH_M = 6371000
MILES_PER_METER = 0.000621371
HOURS_PER_SECOND = 3600.0

# Basic Algorithm

In [67]:
# great circle distance 
def great_circle_dist_x_to_z(x, z):
    # convert decimal degrees to radians 
    x = np.deg2rad(x)
    z = np.deg2rad(z)
    
    longitude = z[1]
    latitude = z[0]
    prev_longitude = x[1]
    prev_latitude = x[0]
    
    # haversine formula 
    dlon = longitude - prev_longitude # lon2 - lon1 
    dlat = latitude - prev_latitude # lat2 - lat1 
    
    # a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    a = np.add(np.square(np.sin(dlat / 2)),
               np.multiply(np.cos(prev_latitude), 
                           np.multiply(np.cos(latitude), np.square(np.sin(dlon / 2)))
                          )
              )
    
    # c = 2 * asin(sqrt(a)) 
    c = np.arcsin(np.sqrt(a)) * 2
    
    return RADIUS_OF_EARTH_M * c

In [68]:
# great circle distance 
def great_circle_dist_z_to_z(df):
    # convert decimal degrees to radians 
    df = df.copy()
    df = np.deg2rad(df)
    
    longitude = df[:, 3]
    latitude = df[:, 2]
    prev_longitude = df[:, 1]
    prev_latitude = df[:, 0]
    
    # haversine formula 
    dlon = longitude - prev_longitude # lon2 - lon1 
    dlat = latitude - prev_latitude # lat2 - lat1 
    
    # a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    a = np.add(np.square(np.sin(dlat / 2)),
               np.multiply(np.cos(prev_latitude), 
                           np.multiply(np.cos(latitude), np.square(np.sin(dlon / 2)))
                          )
              )
    
    # c = 2 * asin(sqrt(a)) 
    c = np.arcsin(np.sqrt(a)) * 2
    
    return RADIUS_OF_EARTH_M * c

In [69]:
# get the x_t_i
def get_perpendicular_point(point, line):
    # point: (x, y)
    # line: ((x1, y1), (x2, y2))
    
    x, y = point
    x1, y1 = line[0]
    x2, y2 = line[1]
    
    # calculate the slope of the line
    if x2 - x1 == 0:
        # vertical line
        x_intersect = x1
        y_intersect = y
    else:
        slope = (y2 - y1) / (x2 - x1)
        intercept = y1 - slope * x1
    
        # calculate the intersection point of the line and the perpendicular line
        x_intersect = (slope*y + x - slope*intercept) / (slope**2 + 1)
        y_intersect = (slope*x_intersect) + intercept
        
    return [x_intersect, y_intersect]

# HMM Model

In [70]:
## HMM model
class HMMModel(nn.Module):
    def __init__(self, sigma = 4.07, beta = 3.0, normalize=True, 
                 prob_floor=0.0, n = 50, viterbi_trellis=[], 
                 prev_candidate_roads = [], viterbi_list =[]):
        self.normalize = normalize
        self.prob_floor = min(max(prob_floor, 0), 1)
        self.n = n
        self.sigma = sigma
        self.beta = beta
        self.weights = np.zeros((1, self.n))
        self.viterbi_trellis = viterbi_trellis  # list of particle np arrays
        self.prev_candidate_roads = prev_candidate_roads
        self.viterbi_list = viterbi_list
      
    def apply_emission_model(self, sampled_states, obs_coords):
        # TODO: simplify the code below
        # get the sigma 
        # 1.4826 median𝑡(‖𝑧𝑡 − 𝑥𝑡,𝑖∗‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒)
        
        probs = []
        for obs in sampled_states:
            # ‖𝑧𝑡 − 𝑥𝑡,𝑖‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒
            dist_obs_roads = obs[1].Z_t_TO_X_t_i
            
            # 𝑝(𝑧𝑡|𝑟𝑖)
            probs.append(np.exp(np.power(dist_obs_roads / self.sigma, 2) * (-0.5)) * (1 / (math.sqrt(2 * math.pi) * self.sigma)))

        # Normalize result
        if self.normalize:
            probs = probs / np.sum(probs)
        return probs
    
    def apply_transition_model(self, candidate_roads, dist_prev):
        # TODO: simplify the code below
        # get the beta
        # 𝛽 = 1/ln(2) mediant (|‖𝑧𝑡 − 𝑧𝑡+1‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒 − ‖𝑥𝑡,𝑖∗ −𝑥𝑡+1,𝑗∗‖𝑟𝑜𝑢𝑡𝑒|)
        
        probs = []
        # NOTE: ‖𝑧𝑡 − 𝑧𝑡+1‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒 is given by dist_prev
        for point in candidate_roads:
            prob = []
            for prev_point in self.prev_candidate_roads:
                # calculate ‖𝑥𝑡,𝑖 − 𝑥𝑡+1,𝑗‖𝑟𝑜𝑢𝑡𝑒 
                # TODO: need to recalculate the route
                # using a conventional route planner configured 
                # to give the route with the shortest distance
                dist_road = great_circle_dist_x_to_z(prev_point[1].X_t_i, point[1].X_t_i)
                
                # TODO: terminate the search for a route when ‖𝑥𝑡,𝑖 − 𝑥𝑡+1,𝑗‖𝑟𝑜𝑢𝑡𝑒 
                # becomes greater than ‖𝑧𝑡 − 𝑧𝑡+1‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒 by 2000 meters
                # or more, and assign a probability of zero.
                
                # calculate the difference between ‖𝑧𝑡 − 𝑧𝑡+1‖𝑔𝑟𝑒𝑎𝑡-𝑐𝑖𝑟𝑐𝑙𝑒 and ‖𝑥𝑡,𝑖 − 𝑥𝑡+1,𝑗‖𝑟𝑜𝑢𝑡𝑒
                diff_dist = np.abs(np.subtract(dist_road, dist_prev))
                
                # calculate the transition probability
                prob.append(np.exp(-diff_dist / self.beta) * (1 / self.beta))
                
            probs.append(prob)
        
        # Normalize result?
        if self.normalize:
            probs = probs / np.sum(probs)
        return probs  # the probs is a list of lists transition prob
    
    def update_dist(self, obs, candidate_roads, num_iter, n, max_dist2=None):
#         print(obs)
        longitude = obs[1].longitude
        latitude = obs[1].latitude
        
        # n depends on how many candidate roads you have at time t
        self.n = n
        
        obs_coords = [latitude, longitude] 
        dist_prev = obs[1].dist_from_prev_m
        
        # transition probability
        if num_iter > 1:
            
            trans_probs = self.apply_transition_model(candidate_roads, 
                                                      dist_prev) # (n * c, 1) first n rows for first candidate, etc.
            sampled_states = candidate_roads
#             print("Transition: ")
#             print(trans_probs)
#             print("--------------------------------------------------------------")
        else:
            sampled_states = candidate_roads
        
        # emission probabilities
        emission_probs = self.apply_emission_model(sampled_states, obs_coords) # (n, 1)
#         print("Emission: ")
#         print(emission_probs)
#         print("--------------------------------------------------------------")
        
        # Joint prob, for viterbi backtracking.
        if num_iter > 1:
            # since the num_iter is different we need to v(j)
            max_v_list = []
            max_v = 0
            for i in range(len(emission_probs)):
                for j in range(len(trans_probs[i])):
                    # get the Recursion
                    joint = np.multiply(self.weights[j], np.multiply(emission_probs[i], trans_probs[i][j]))
                    if max_v < joint:
                        max_v = joint
                max_v_list.append(max_v)
                max_v = 0
#             print("Max_v: ")
#             print(max_v_list)
#             print("--------------------------------------------------------------")
            # the recursion for other states
            joint_prob = max_v_list
            # for other states, the prob for each road is the joint_prob
            self.viterbi_trellis.append(joint_prob)
            self.viterbi_list.append(candidate_roads)
        else:
            # the recursion for the first state
            joint_prob = emission_probs
            # since it is the first state, the prob for each road is the emission itself
            self.viterbi_trellis.append(joint_prob)
            # remember each candidate_roads place
            self.viterbi_list.append(candidate_roads)
            
        # rememeber the current recursion
        self.weights = joint_prob
        # remember the previous candidate_roads
        self.prev_candidate_roads = candidate_roads
        # TODO: Maybe record the back-pointers?
        
        # Estimate current particle filter fit quality of hypotheses to data; should research good metrics more.
        fit_quality = [np.max(self.weights), np.mean(self.weights), np.median(self.weights)]
        
        return fit_quality
    
    def viterbi(self):
        # Start with the last observation to the viterbi trellis
        best_last_state_idx = np.argmax(self.weights)
        backtracked_states = []
        # Backtrack through the viterbi trellis (#obs, n, 2) actual lat/long states
        for j in range(len(self.viterbi_trellis) - 1 , -1, -1):
            best_last_state = self.viterbi_list[j][best_last_state_idx]
            backtracked_states.append([best_last_state[1].StartNodeLat, best_last_state[1].StartNodeLong])
            best_last_state_idx = np.argmax(self.viterbi_trellis[j - 1])
        # Put in chronological order
        backtracked_states = backtracked_states[::-1]
        return backtracked_states

# Preprocess Data & Grab the Candidate road

In [71]:
# preprocess the data
def preprocess_traces(df, sigma=4.07):
    data = df.copy()

    #debug
    data[["latitude", "longitude"]] = data[["StartNodeLat", "StartNodeLong"]]
    data[["prev_latitude", "prev_longitude"]] = data[["latitude", "longitude"]].shift(-1)
    # Ignore warning about invalid value in arcsin (nan)
    data["dist_from_prev_m"] = great_circle_dist_z_to_z(data[["prev_latitude", "prev_longitude", "latitude", "longitude"]].values)  # 1.93 sec
    
    """
    data[["prev_latitude", "prev_longitude"]] = data[["latitude", "longitude"]].shift(-1)
    # Ignore warning about invalid value in arcsin (nan)
    data["dist_from_prev_m"] = great_circle_dist_z_to_z(data[["prev_latitude", "prev_longitude", "latitude", "longitude"]].values)  # 1.93 sec
    
    # compute up speed from time
    data['seconds'] = (data['time'].dt.hour * 3600) + (data['time'].dt.minute * 60) + data['time'].dt.second
    data['seconds_from_prev'] = data['seconds'] - data['seconds'].shift(-1)
    data['speed_from_prev_mps'] = data['dist_from_prev_m'] / data['seconds_from_prev'].shift(-1)

    # Take cumsum of dist
    dist_cum = data.dist_from_prev_m.cumsum()
    # removing points that are within 2𝜎 of the previous included point.
    # Select points closest to multiples of 2*sigma, in cumsum dist
    dist_cum_idx = dist_cum // (2 * sigma)
    filter_idx = np.subtract(dist_cum_idx, dist_cum_idx.shift(1)) == 0
    data = data[~filter_idx]
    """
    
    # seperate the route based on the occupancy
    trips_list = []
    trip = []
    # for loop all the point
    for obs in data.iterrows():
        print(obs) #debug
        # if this point's occupancy is 1
        #if obs[1].occupancy == 1:
        trip.append(obs)
        """
        else:
            trips_list.append(trip)
            trip = []"""
    trips_list = [x for x in trips_list if x != []]
    #return trips_list
    return trip #debug

In [72]:
# preprocess the Map
def preprocess_Map(G, nodes_p):
    # get the node
    data_node = pd.DataFrame(columns=['NodeID', 'Latitude', 'Longitude'])
    for nodeid, row in nodes_p.iterrows():
      nodeid, latitude,longitude = (nodeid,row['y'], row['x']) 
      new_row = {'NodeID': nodeid, 'Latitude': latitude, 'Longitude': longitude}
      data_node = pd.concat([data_node, pd.DataFrame([new_row])])
    # get the edge
    data_edge = pd.DataFrame(columns=['StartNodeID', 'EndNodeID'])
    for node_id_1, node_id_2,_ in G.edges:
      StartNodeID,EndNodeID = (node_id_1,node_id_2)    
      new_row = {'StartNodeID': node_id_1, 'EndNodeID': node_id_2}
      data_edge = pd.concat([data_edge, pd.DataFrame([new_row])])

    data_node.rename(columns = {'NodeID':'StartNodeID', 'Longitude':'StartNodeLong', 'Latitude':'StartNodeLat'}, inplace = True)
    df_merge = pd.merge(data_edge, data_node, on="StartNodeID")

    data_node.rename(columns = {'StartNodeID':'EndNodeID', 'StartNodeLong':'EndNodeLong', 'StartNodeLat':'EndNodeLat' }, inplace = True)
    df_merge = pd.merge(df_merge, data_node, on="EndNodeID")
    
    return df_merge

In [73]:
# get the candidate road
# TODO: 
# Since we know we are tracking ordinary vehicles on public streets, if a 
# calculated route would require the vehicle to exceed a speed of 50 m/s 
# (112 miles per hour, 180 kilometers per hour), or travel in excess of 
# three times the posted speed limit, we consider the route to be unreasonable,
# and set its probability to zero.

def get_candidate_roads(Map, data):
    # [candidate road for time 0, candidate road for time 1,....]
    candidate_roads = [] 
    # let's get the candidate roads for each time t
    for obs in data:
        # get the lat and long for the point
        z_long = obs[1].longitude
        z_lat = obs[1].latitude
        z = [z_lat, z_long]
        # match the coords and the dataset
        candidate_road = []
        for edge in Map.iterrows():
            p1 = np.array([edge[1].StartNodeLat, edge[1].StartNodeLong])
            p2 = np.array([edge[1].EndNodeLat, edge[1].EndNodeLong])
            p3 = np.array(z)
            
            # 𝑥𝑡,𝑖: the perpendicular point from the z to the road 
            x_t_i = get_perpendicular_point(p3, (p1, p2))
            
            # check if 𝑥𝑡,𝑖 is on the line
            dist_p1_x = great_circle_dist_x_to_z(p1, x_t_i)
            dist_p2_x = great_circle_dist_x_to_z(p2, x_t_i)
            dist_p1_p2 = great_circle_dist_x_to_z(p1, p2)
            
            if ((dist_p1_x + dist_p2_x) - dist_p1_p2)/2 > 50:
                continue
            
            # calculate the distance from the point to the road
            d = great_circle_dist_x_to_z(p3, x_t_i)
            
            # Any road segment more than 200 meters away from the GPS point.
            if d < 50:
#                 print(p1)
#                 print(p2)
#                 print(p3)
#                 print(x_t_i)
                edge[1]['X_t_i'] = x_t_i
                edge[1]['Z_t_TO_X_t_i'] = d
                edge[1]['Z_t_TO_Z_t1'] = obs[1].dist_from_prev_m
                candidate_road.append(edge)
        # TODO: When a break is detected between time step 𝑡 and time step 𝑡 + 1, 
        # we remove measured points 𝑧𝑡 and 𝑧𝑡+1 from the model, and check to see 
        # if the break has been healed. The break is considered healed if the 
        # measured points at 𝑡 − 1 and 𝑡 + 2 lead to a reconnection in the HMM 
        # after rechecking the points with the bulleted conditions above. If the 
        # break is still present, we continue to remove the points on either side 
        # of the break until either the break is healed, or the break is more than 
        # 180 seconds long. 
        if len(candidate_road) == 0:
            print("No matching roads found within max road distance! Aborting particle filter.")
            return None
        candidate_roads.append(candidate_road)
    return candidate_roads

In [74]:
# densify the trip
def trip_densify(trip):
    delta_t = 10
    temp_list = pd.DataFrame(columns=['latitude', 'longitude', 'occupancy', 'time'])
    # Loop through each pair of consecutive GPS points in the trajectory
    for i in range(len(trip) - 1):
        if trip[i][1]['dist_from_prev_m'] < 100:
            continue
        # Calculate the time difference between the two GPS points
        dt = (trip[i+1][1]['time'] - trip[i][1]['time']).total_seconds()
        # Calculate the number of additional points needed to achieve the desired sampling frequency
        n = int(np.ceil(dt / delta_t)) - 1
        # Interpolate between the two GPS points to generate the additional points
        for j in range(n):
            t = (j + 1) * delta_t / dt
            lat = (1 - t) * trip[i][1]['latitude'] + t * trip[i+1][1]['latitude']
            long = (1 - t) * trip[i][1]['longitude'] + t * trip[i+1][1]['longitude']
            new_time = trip[i][1]['time'] + pd.Timedelta((j + 1) * delta_t, unit='s')
            new_point =  {'latitude': lat,
                         'longitude': long,
                         'occupancy': 1,
                         'time': new_time}
            temp_list = pd.concat([temp_list, pd.DataFrame([new_point])]).reset_index(drop=True)
    # Combine the original trajectory with the new points to create the densified trajectory
    dens_trip = trip.copy()
    dealing_list = pd.DataFrame(columns=['latitude', 'longitude', 'occupancy', 'time'])
    num = 0
    for x in dens_trip:
        orig_point =  {'latitude': x[1].latitude,
                       'longitude': x[1].longitude,
                       'occupancy': 1,
                       'time': x[1].time}
        num += 1
        dealing_list = pd.concat([dealing_list, pd.DataFrame([orig_point])]).reset_index(drop=True)
    traj_dens = pd.concat([dealing_list, temp_list], ignore_index=True).reset_index(drop=True)
    traj_dens = traj_dens.sort_values('time').reset_index(drop=True)
    
    # print(c_trip)
    data = traj_dens.copy()
    data = data.sort_values(by='time', ascending=False).reset_index(drop=True)
    data[["prev_latitude", "prev_longitude"]] = data[["latitude", "longitude"]].shift(-1)
    # Ignore warning about invalid value in arcsin (nan)
    data["dist_from_prev_m"] = great_circle_dist_z_to_z(data[["prev_latitude", "prev_longitude", "latitude", "longitude"]].values)  # 1.93 sec

    # compute up speed from time
    data['seconds'] = (data['time'].dt.hour * 3600) + (data['time'].dt.minute * 60) + data['time'].dt.second
    data['seconds_from_prev'] = data['seconds'] - data['seconds'].shift(-1)
    data['speed_from_prev_mps'] = data['dist_from_prev_m'] / data['seconds_from_prev'].shift(-1)
    
    return data

# Load the Data and Training

In [75]:
# get the training path
train_path = './training/'

#debug
handcraft_path = './handcrafted2.csv'

In [76]:
# get the Map
bounds = [-122.5206, 37.8152, -122.3554, 37.7088]
north, south, east, west = bounds[1], bounds[3], bounds[2], bounds[0]
G = ox.graph_from_bbox(north, south, east, west, network_type='drive')
nodes_p, edges_p = ox.graph_to_gdfs(G, nodes= True, edges = True)
Map = preprocess_Map(G, nodes_p)

In [77]:
# get all the files in the training folder
#all_files = [f for f in os.listdir(train_path)]

"""
# read the 1st file in the folder as a test case
train_df = pd.read_csv(os.path.join(train_path, all_files[362]), sep=" ", index_col=None, header=None, 
                      names=['latitude', 'longitude', 'occupancy', 'time']) """

# debug\
train_df = pd.read_csv(handcraft_path, sep=",")

"""
# convert the time from unix into date time
train_df.loc[:, ["time"]] = pd.to_datetime(train_df.time, origin="unix", unit='s')
"""
# preprocess the data
trips_list = preprocess_traces(train_df)


# pick one trip for HMM training
# [0, 131]
# [0, 1, 129, 130, 131, 132, 133, 134, 135]
""" trip = trips_list[0]
trip = sorted(trip, key = lambda point: point[1]['time']) """

"""
# increase the point for one trip
c_trip = trip_densify(trip) #debug
final_trip = c_trip.copy()
final_trip.loc[len(final_trip)-1, 'prev_latitude'] = trip[0][1][4]
final_trip.loc[len(final_trip)-1, 'prev_longitude'] = trip[0][1][5]
final_trip.loc[len(final_trip)-1, 'dist_from_prev_m'] = trip[0][1][6]
trip = []
for data in final_trip.iterrows():
    trip.append(data)
"""

# get the candidate roads
trip = trips_list #debug
candidate_roads = get_candidate_roads(Map, trip)

(0, StartNodeLong                -122.4021
StartNodeLat                   37.7863
EndNodeLong                   -122.403
EndNodeLat                      37.787
time                2008-06-09 6:56:05
latitude                       37.7863
longitude                    -122.4021
prev_latitude                  37.7867
prev_longitude               -122.4026
dist_from_prev_m              62.52115
Name: 0, dtype: object)
(1, StartNodeLong                -122.4026
StartNodeLat                   37.7867
EndNodeLong                  -122.4034
EndNodeLat                     37.7876
time                2008-06-09 6:56:15
latitude                       37.7867
longitude                    -122.4026
prev_latitude                   37.787
prev_longitude                -122.403
dist_from_prev_m             48.459912
Name: 1, dtype: object)
(2, StartNodeLong                 -122.403
StartNodeLat                    37.787
EndNodeLong                  -122.4026
EndNodeLat                     37.7867
time

In [78]:
""" # c Roads
c_fg = folium.FeatureGroup(name='C Roads')

# for i in range(len(candidate_roads)):
for i in range(len(trip)-1):
    # Draw traces
    current_coord = [trip[i+1][1][0], trip[i+1][1][1]]
    previous_coord = [trip[i][1][0], trip[i][1][1]]
    traces = folium.PolyLine(locations=[previous_coord, current_coord], weight=3, color='Blue')
    
    # Draw nodes
    node_end = folium.Marker(current_coord,
                  popup=('End'),
                 icon = folium.Icon(color='red',icon=''))

    node_prev = folium.Marker(previous_coord,
                  popup=('Previous'),
                 icon = folium.Icon(color='blue',icon=''))

    # Add to the feature group
    c_fg.add_child(traces)
    c_fg.add_child(node_end)
    c_fg.add_child(node_prev)

# Trip picked for HMM Training
trip_fg = folium.FeatureGroup(name='One Trip for HMM Training')

for i in range(len(trip)):

    # Draw traces
    current_coord = [trip[i][1][0], trip[i][1][1]]
    previous_coord = [trip[i][1][4], trip[i][1][5]]
    traces = folium.PolyLine(locations=[previous_coord, current_coord], weight=3, color='Blue')
    
    # Draw nodes
    node_end = folium.Marker(current_coord,
                  popup=('End'),
                 icon = folium.Icon(color='red',icon=''))

    node_prev = folium.Marker(previous_coord,
                  popup=('Previous'),
                 icon = folium.Icon(color='blue',icon=''))

    # Add to the feature group
    trip_fg.add_child(traces)
    trip_fg.add_child(node_end)
    trip_fg.add_child(node_prev)


# Create a map centered at San Francisco
sf_map = folium.Map(location=[37.759457, -122.444781], zoom_start=12, prefer_canvas=True, control_scale=True)

# Add the feature groups to the map
# sf_map.add_child(perpendicular_fg)
sf_map.add_child(c_fg)
# sf_map.add_child(candidate_fg)
sf_map.add_child(trip_fg)

# Add a layer control feature
folium.LayerControl().add_to(sf_map)

# Add click for coords feature
sf_map.add_child(folium.LatLngPopup())

# save map
#sf_map.save('SF_MAP.html')

# display map
#sf_map
 """

" # c Roads\nc_fg = folium.FeatureGroup(name='C Roads')\n\n# for i in range(len(candidate_roads)):\nfor i in range(len(trip)-1):\n    # Draw traces\n    current_coord = [trip[i+1][1][0], trip[i+1][1][1]]\n    previous_coord = [trip[i][1][0], trip[i][1][1]]\n    traces = folium.PolyLine(locations=[previous_coord, current_coord], weight=3, color='Blue')\n    \n    # Draw nodes\n    node_end = folium.Marker(current_coord,\n                  popup=('End'),\n                 icon = folium.Icon(color='red',icon=''))\n\n    node_prev = folium.Marker(previous_coord,\n                  popup=('Previous'),\n                 icon = folium.Icon(color='blue',icon=''))\n\n    # Add to the feature group\n    c_fg.add_child(traces)\n    c_fg.add_child(node_end)\n    c_fg.add_child(node_prev)\n\n# Trip picked for HMM Training\ntrip_fg = folium.FeatureGroup(name='One Trip for HMM Training')\n\nfor i in range(len(trip)):\n\n    # Draw traces\n    current_coord = [trip[i][1][0], trip[i][1][1]]\n    previo

In [86]:
%%time
# initialize the model
model = HMMModel()

# training
def train(data, candidate_roads):
    data = data.copy()
    num_iter = 0
    converged = False
    for obs in data:
        num_iter += 1
        fit_quality = model.update_dist(obs, candidate_roads[num_iter-1], num_iter, len(candidate_roads[num_iter-1]))
        if fit_quality == "Aborted":
            return "Aborted"
        print("On iteration %d, fit quality of MAX %3.2f, MEAN %3.2f, MEDIAN %3.2f" % 
              (num_iter, fit_quality[0], fit_quality[1], fit_quality[2]))
    print("Done.")

train(trip, candidate_roads)

On iteration 1, fit quality of MAX 0.35, MEAN 0.17, MEDIAN 0.08
On iteration 2, fit quality of MAX 0.14, MEAN 0.14, MEDIAN 0.14
On iteration 3, fit quality of MAX 0.14, MEAN 0.14, MEDIAN 0.14
On iteration 4, fit quality of MAX 0.08, MEAN 0.03, MEDIAN 0.00
On iteration 5, fit quality of MAX 0.01, MEAN 0.01, MEDIAN 0.01
On iteration 6, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 7, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 8, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 9, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 10, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 11, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 12, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 13, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 14, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 15, fit quality of MAX 0.00, MEAN 0.00, MEDIAN 0.00
On iteration 16, fit quality of MA

# Get the result from model

In [87]:
backtracked_trace = model.viterbi()
print(backtracked_trace)

[[37.7850152, -122.4004778], [37.7862613, -122.4020414], [37.7862613, -122.4020414], [37.7879652, -122.4034972], [37.7879652, -122.4034972], [37.7877685, -122.4050568], [37.7881787, -122.4067203], [37.7873652, -122.4082456], [37.7871562, -122.4098893], [37.7869469, -122.4115357], [37.7869469, -122.4115357], [37.7878756, -122.411723], [37.7878756, -122.411723], [37.7876672, -122.4133664], [37.7878756, -122.411723], [37.7880844, -122.4100772], [37.7880844, -122.4100772], [37.7880844, -122.4100772], [37.7878249, -122.4083373], [37.7881787, -122.4067203], [37.7887033, -122.4052473], [37.788901, -122.4036842], [37.788901, -122.4036842], [37.7917031, -122.4042334], [37.7944619, -122.4047926], [37.7962229, -122.4051467], [37.7986989, -122.4056034], [37.7978752, -122.4066341], [37.7978752, -122.4066341], [37.7850152, -122.4004778], [37.7862613, -122.4020414], [37.7862613, -122.4020414], [37.7879652, -122.4034972], [37.7879652, -122.4034972], [37.7877685, -122.4050568], [37.7881787, -122.406720

# Post Process Results

In [88]:
#debug
""" # measure distance of matched nodes to trip nodes

for i in range(len(backtracked_trace)-1):

    # get closest point of matched node to trip node
    dists = []
    for j in range(len(trip)):
        df_dist = pd.DataFrame({'matched_latitude': backtracked_trace[i][0], 
                                'matched_longitude': backtracked_trace[i][1], 
                                'trip_latitude': trip[j][1][0], 
                                'trip_longitude': trip[j][1][1]}, index=[0])

        dist = great_circle_dist_z_to_z(df_dist[["matched_latitude", "matched_longitude", "trip_latitude", "trip_longitude"]].values)
        dists.append(dist)
    dists.sort() # sort grabbed distances to shortest first

    # -------------- SET POST PROCESS DISTANCE --------------
    if(dists[0] > 75): # meters
        backtracked_trace[i] = [-1, -1] # set far coordinates in list to [-1, -1], need list to be used in dictionary in the next part

# remove rows with [-1, -1]
backtracked_trace = [x for x in backtracked_trace if x != [-1, -1]]

# remove duplicate nodes in list
backtracked_trace = list(dict.fromkeys(tuple(x) for x in backtracked_trace)) """

' # measure distance of matched nodes to trip nodes\n\nfor i in range(len(backtracked_trace)-1):\n\n    # get closest point of matched node to trip node\n    dists = []\n    for j in range(len(trip)):\n        df_dist = pd.DataFrame({\'matched_latitude\': backtracked_trace[i][0], \n                                \'matched_longitude\': backtracked_trace[i][1], \n                                \'trip_latitude\': trip[j][1][0], \n                                \'trip_longitude\': trip[j][1][1]}, index=[0])\n\n        dist = great_circle_dist_z_to_z(df_dist[["matched_latitude", "matched_longitude", "trip_latitude", "trip_longitude"]].values)\n        dists.append(dist)\n    dists.sort() # sort grabbed distances to shortest first\n\n    # -------------- SET POST PROCESS DISTANCE --------------\n    if(dists[0] > 75): # meters\n        backtracked_trace[i] = [-1, -1] # set far coordinates in list to [-1, -1], need list to be used in dictionary in the next part\n\n# remove rows with [-1, -

# Visualization

In [89]:
# get the distance of trip
trip_list = []
for point in trip:
    trip_list.append([point[1].latitude, point[1].longitude])

total_dist_of_a_trip = 0
for i in range(len(trip_list) - 1):
    total_dist_of_a_trip += great_circle_dist_x_to_z(trip_list[i], trip_list[i+1])

# get the distance of matched
trip_list = []
for point in backtracked_trace:
    trip_list.append([point[0], point[1]])

total_dist_of_a_matched = 0
for i in range(len(trip_list) - 1):
    total_dist_of_a_matched += great_circle_dist_x_to_z(trip_list[i], trip_list[i+1])

# get the accuracy
# TODO: Find a better way to compute the accuracy
accuracy = np.abs(total_dist_of_a_trip-total_dist_of_a_matched)/total_dist_of_a_trip
print("The accuracy of our model: ", accuracy)

The accuracy of our model:  1.99276803303633


In [94]:
# Add the standard road network

""" # Add the matched roads
matched_fg = folium.FeatureGroup(name='Matched Roads')

#debug 
counter = 0
for i in range(len(trip_list)):

    # Draw traces
    if i < len(trip_list)-1:
        traces = folium.PolyLine(locations=[trip_list[i], trip_list[i+1]], weight=3, color='Green')
    
    # Draw nodes
    node = folium.Marker(trip_list[i],
                  popup=(i),
                 icon = folium.Icon(color='green',icon=''))

    # Add the polyline to the feature group
    matched_fg.add_child(traces)
    matched_fg.add_child(node)
    counter = counter + 1 #debug

#debug 
print("Matched Nodes = ", counter) """

# Trip picked for HMM Training
trip_fg = folium.FeatureGroup(name='One Trip for HMM Training')

# debug counter
counter = 0
for i in range(len(trip)-1):

    # Draw traces
    current_coord = [trip[i][1][5], trip[i][1][6]]
    previous_coord = [trip[i][1][7], trip[i][1][8]]
    traces = folium.PolyLine(locations=[previous_coord, current_coord], weight=3, color='Blue')

    # Draw nodes
    node = folium.Marker(current_coord,
                  popup=(i),
                 icon = folium.Icon(color='blue',icon=''))

    # Add to the feature group
    trip_fg.add_child(traces)
    trip_fg.add_child(node)
    counter = counter + 1 #debug

#debug 
print("Trip Nodes = ", counter)

# Create a map centered at San Francisco
sf_map = folium.Map(location=[37.759457, -122.444781], zoom_start=12, prefer_canvas=True, control_scale=True)

# Add the feature groups to the map
#sf_map.add_child(matched_fg)
sf_map.add_child(trip_fg)

# Add a layer control feature
folium.LayerControl().add_to(sf_map)

# Add click for coords feature
sf_map.add_child(folium.LatLngPopup())

# save map
#sf_map.save('SF_MAP.html')

# display map
sf_map

Trip Nodes =  28


In [139]:
""" print(trip[0][1])
print("\n")

print(trip[0][1][5])
print(trip[0][1][6])
print(trip[0][1][7])
print(trip[0][1][6]) """

' print(trip[0][1])\nprint("\n")\n\nprint(trip[0][1][5])\nprint(trip[0][1][6])\nprint(trip[0][1][7])\nprint(trip[0][1][6]) '

# Get Road Segments

In [155]:
def get_slope(coordA, coordB):
    #(x1, y1) and (x2, y2)
    x1 = coordA[0] # start lat
    y1 = coordA[1] # start long
    x2 = coordB[0] # end lat
    y2 = coordB[1] # end long

    if(x1 != x2):
            slope = (y2 - y1) / (x2 - x1)
    else:
        slope = 999 #undefined slope

    return slope

In [156]:
# get a list of all road segments

segment = [] # store nodes
road_segments = [] # store segments

# get a segment by checking changes in slope at the next node
for i in range(len(trip)):

    # start after first index to get previous slope
    if(i > 0):
        #(x1, y1) and (x2, y2)
        #current node
        curr_node = [trip[i][1][5], trip[i][1][6]]
        curr_node_prev = [trip[i][1][7], trip[i][1][8]]

        #previous node
        prev_node = [trip[i-1][1][5], trip[i-1][1][6]]
        prev_node_prev = [trip[i-1][1][7], trip[i-1][1][8]]
        
        #get slope of current node and slope of previous node
        slope = get_slope(curr_node, curr_node_prev)
        slope_prev = get_slope(prev_node, prev_node_prev)

        #debug
        print('\ni = ', i)
        print('slope = ', slope)
        print('|change| = ', abs(slope - slope_prev))

        # decide the change in slope parameter to check turning point
    
        p = 1 # ------- change in slope parameter ---------------------------

        #add node by index
        if(slope == 999): # same x coord, add node
            segment.append(i)
            print('Undefined adding: ', segment)

        elif(abs(slope - slope_prev) > p): # turning node found if change > parameter
            print('turning node found!') 
            print('appending = ', segment)
            road_segments.append(segment)
            segment = []
        
        else: 
            segment.append(i)
            print('Adding: ', segment)


print(road_segments)


i =  1
slope =  -1.333333333349123
|change| =  0.08333333336244575
Adding:  [1]

i =  2
slope =  -0.6666666666666666
|change| =  0.6666666666824564
Adding:  [1, 2]

i =  3
slope =  -0.24999999997335465
|change| =  0.416666666693312
Adding:  [1, 2, 3]

i =  4
slope =  5.000000000094739
|change| =  5.250000000068094
turning node found!
appending =  [1, 2, 3]

i =  5
slope =  15.999999999431566
|change| =  10.999999999336826
turning node found!
appending =  []

i =  6
slope =  8.500000000017764
|change| =  7.499999999413802
turning node found!
appending =  []

i =  7
slope =  8.0
|change| =  0.5000000000177636
Adding:  [7]

i =  8
slope =  5.333333333443862
|change| =  2.6666666665561376
turning node found!
appending =  [7]

i =  9
slope =  -0.16666666667258787
|change| =  5.50000000011645
turning node found!
appending =  []

i =  10
slope =  -0.4999999999467093
|change| =  0.3333333332741214
Adding:  [10]

i =  11
slope =  0.0
|change| =  0.4999999999467093
Adding:  [10, 11]

i =  12
sl