# Iterative Map Matching

In [54]:
import pandas as pd
import geopandas as gpd
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
from leuvenmapmatching import visualization as mmviz
import pickle
import time
import datetime
from pathlib import Path
from tqdm import tqdm
from shapely.ops import Point, LineString
import matplotlib.pyplot as plt

from importlib import reload

import map_match

## Set up the filepaths

In [55]:
#file paths
network_fp = Path.home() / "Documents/BikewaySimData/Projects/gdot/networks/reconciled_network.gpkg"
traces_fp = Path.home() / 'Documents/BikewaySimData/Projects/gdot/gps_traces'
export_fp = Path('D:/matched_traces')


# Trace Data

In [56]:
#load all traces
with (traces_fp/'cleaned_traces.pkl').open('rb') as fh:
    coords_dict, trips_df = pickle.load(fh)

#one trace
tripids = [29837,7257,9806,30000,8429]

subset = trips_df[(trips_df['total_distance_ft'] > 5280) & (trips_df['total_distance_ft'] < 5280*5)]
random_trips = subset['tripid'].sample(200).tolist()
random_trips = list(set(random_trips + tripids))

#random_trips = trips_df.loc[trips_df['tripid'].isin(tripids),'tripid'].tolist()

# #load existing matches/if none then create a new dict
# if (export_fp/'sample_matched.pkl').exists():
#     with (export_fp/'sample_matched.pkl').open('rb') as fh:
#         match_dict = pickle.load(fh)
# else:
#     matched_traces = dict()


# Prepare Network

In [57]:
#import network
edges = gpd.read_file(network_fp,layer="links_w_signals_elevation")[['A','B','linkid','oneway','geometry']]
nodes = gpd.read_file(network_fp,layer="nodes")[['N','geometry']]

# #turn oneway into boolean (figure out why this is changing)
edges['oneway'] = edges['oneway'] == 1

# prepare network
exploded_edges, exploded_nodes, map_con = map_match.prepare_network(edges,nodes,False)

# for examining exploding network
# exploded_edges.to_file(network_fp,layer="exploded_links")
# exploded_nodes.to_file(network_fp,layer="exploded_nodes")

The matching setting dictionary stores all of the settings used for map matching, so they can be retrieved later for study

In [84]:
if (export_fp / 'matching_settings_df.pkl').exists():
    with (export_fp / 'matching_settings_df.pkl').open('rb') as fh:
        matching_settings_df = pickle.load(fh)
else:
    matching_settings_df = pd.DataFrame()

[Leueven Documentation](https://github.com/wannesm/LeuvenMapMatching/blob/9ca9f0b73665252f2ee492fae9dd243feef2f39d/leuvenmapmatching/matcher/distance.py)

In [98]:
from importlib import reload
reload(map_match)

matching_settings = {
    'obs_noise': 50, #Standard deviation of noise
    'obs_noise_ne': 100, #Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
    'max_dist_init': 2000, #Maximum distance from start location (if not given, uses max_dist)
    'max_dist': 1000, #Maximum distance from path (this is a hard cut, min_prob_norm should be better)
    'min_prob_norm': 0.005, #Minimum normalized probability of observations (ema)
    'non_emitting_states': False, #Allow non-emitting states
    'non_emitting_length_factor': 0.75, #Reduce the probability of a sequence of non-emitting states the longer it is.
    'max_lattice_width': 55, #Restrict the lattice (or possible candidate states per observation) to this value.
    'dist_noise': 50, #Standard deviation of difference between distance between states and distance between observations.
    'dist_noise_ne': 200, #for no emitting If not given, set to dist_noise
    'restrained_ne': True, #Avoid non-emitting states if the distance between states and between observations is close to each other.
    'avoid_goingback': True, #If true, the probability is lowered for a transition that returns back to a previous edges or returns to a position on an edge.
    'increase_max_lattice_width': False,
    'export_graph': False
}

#add to matching_settings_tuple if contents are unique
row = pd.DataFrame([matching_settings])
matching_settings_df = pd.concat([matching_settings_df,row],ignore_index=True)
if matching_settings_df.duplicated().any():
    print('Settings have been used before')
matching_settings_df.drop_duplicates(inplace=True)

#use this in the qaqc section to line up the ratings with the settings used
matching_index = matching_settings_df[(matching_settings_df == tuple(row.loc[0,:])).all(axis=1)].index.item()

# Single Match


In [60]:
# tripid = 5265

# trace = coords_dict[tripid]
# match = map_match.leuven_match(trace,matching_settings,map_con,exploded_edges)

# Multi Match

In [61]:
match_dict = {}
for tripid in tqdm(random_trips):
    
    if (tripid in match_dict.keys()) == False:
        trace = coords_dict[tripid]
        match = map_match.leuven_match(trace,matching_settings,map_con,exploded_edges)
        match_dict[tripid] = match
    
    # may remove essential part of trace so just ignore for now
    # retrace = match['edges']
    
    # #find retraces and remove them?
    # #it doesn't seem like the thing removes much
    # #match['edges'] = retrace[-(retrace['linkid'] == retrace['linkid'].shift(1))]
    # match['edges'] = retrace[-(retrace['linkid'].duplicated(keep=False))]
    
    # if tripid in match_dict.keys():
    #     match_ratio_cond = match['match_ratio'] > match_dict[tripid]['match_ratio']
    #     match_distance_cond = match['match_lines'].length.sum() < match_dict[tripid]['match_lines'].length.sum()
    #     print(match_ratio_cond)
    #     print(match_distance_cond)
    #     if match_ratio_cond & match_distance_cond:
    #         print('success')
    #         match_dict[tripid] = match
    # else:
    #     match_dict[tripid] = match


100%|██████████| 205/205 [10:01<00:00,  2.93s/it]


In [99]:
if export_fp.exists() == False:
    export_fp.mkdir(parents=True)

# export the matching settings tested
with (export_fp/'matching_settings_df.pkl').open('wb') as fh:
    pickle.dump(matching_settings_df,fh)

# export the matched traces
with (export_fp/f'match_{matching_index}_{len(match_dict.keys())}trips.pkl').open('wb') as fh:
    pickle.dump(match_dict,fh)

# # export 
# with (export_fp/'200_sample.pkl').open('wb') as fh:
#     pickle.dump(match_dict,fh)

# Examine matches

In [49]:
# export 
with (export_fp/'all_trips_no_emitting.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

# # export the matching settings tested
# with (export_fp/'matching_settings_df.pkl').open('wb') as fh:
#     pickle.dump(matching_settings_df,fh)

EOFError: Ran out of input

In [111]:
#for exmamining trips 
if (export_fp/'qaqc.pkl').exists():
    with (export_fp/'qaqc.pkl').open('rb') as fh:
        qaqc_dict = pickle.load(fh)
else:
    qaqc_dict = {}

In [116]:
qaqc_dict

{(5635, 1): {'rating': '', 'notes': '', 'last_reviewed': 1708367418.6603637}}

'2024-02-19 13:35:27'

Visualize and take notes

In [129]:
import time
from IPython.display import display, clear_output

from importlib import reload
reload(map_match)

for tripid in match_dict.keys():

    if isinstance(match_dict.get(tripid,0),str):
        qaqc_dict[(tripid,matching_index)] = 'failed match'
        continue


#TODO get condition that won't write to dict if a trip is skipped

    if qaqc_dict.get((tripid,matching_index),0) == 0:

        html_map = map_match.visualize_match(tripid, match_dict, edges)
        clear_output(wait=True)
        display(html_map)
        
        # Wait for user input to proceed to the next trip
        user_input = input("Rate from 1-10 with 1 being no match to 10 being perfect match (press 'enter' to skip or 'q' to quit)")
        
        if user_input.lower() == 'q':
            break  # Exit the loop if the user enters 'q'
        if user_input == '':
            continue

        notes = input("Input notes if desired and press enter")

        #save user input for that matching index
        qaqc_dict[(tripid,matching_index)] = {
            'rating': user_input,
            'notes': notes,
            'last_reviewed': datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')
        }
    
#TODO add the trip date

# Additional cleanup or actions after examining all trips can be added here
print("Finished examining trips.")

Finished examining trips.


In [131]:
qaqc_dict

{(5635, 1): {'rating': '', 'notes': '', 'last_reviewed': 1708367418.6603637},
 (6150, 1): {'rating': '10',
  'notes': '',
  'last_reviewed': '2024-02-19 13:48:50'},
 (3082, 1): {'rating': '10',
  'notes': '',
  'last_reviewed': '2024-02-19 13:49:13'},
 (9739, 1): {'rating': '1',
  'notes': 'only in park?',
  'last_reviewed': '2024-02-19 13:49:29'},
 (8204, 1): {'rating': '10',
  'notes': '',
  'last_reviewed': '2024-02-19 13:59:11'},
 (2575, 1): {'rating': '10',
  'notes': '',
  'last_reviewed': '2024-02-19 13:59:40'},
 (31254, 1): {'rating': '6',
  'notes': 'l5p',
  'last_reviewed': '2024-02-19 13:59:54'},
 (15382, 1): {'rating': '9',
  'notes': '',
  'last_reviewed': '2024-02-19 14:00:21'},
 (30745, 1): {'rating': '1',
  'notes': 'trader joes',
  'last_reviewed': '2024-02-19 14:03:13'},
 (26139, 1): {'rating': '10',
  'notes': '',
  'last_reviewed': '2024-02-19 14:03:31'},
 (1054, 1): {'rating': '4',
  'notes': 'grant park parking deck',
  'last_reviewed': '2024-02-19 14:04:01'},
 (1

In [130]:
with (export_fp/'qaqc.pkl').open('wb') as fh:
    pickle.dump(qaqc_dict,fh)

# Post Match Cleanup
Some trips have out-and-backing and or take the wrong link if two nodes have more than one link between them. This step goes through and cleans these trips.

For out and backing:
Subset network graph to only the edges between origin and destination and then use Dijkstra

In [None]:
qaqc_dict

In [138]:
new_dict = {key:item for key, item in qaqc_dict.items() if isinstance(item,str)==False}
qaqc_df = pd.DataFrame.from_dict(new_dict,orient='index')


Unnamed: 0,Unnamed: 1,rating,notes,last_reviewed
7766,1,9,out and back,2024-02-19 14:16:19
29281,1,9,out and back,2024-02-19 14:18:43
2307,1,8,some weird out and backing,2024-02-19 14:42:42
801,1,9,out and backing,2024-02-19 14:45:08
2355,1,9,out and backing,2024-02-19 14:50:13
3893,1,9,out and backing,2024-02-19 14:51:07
3389,1,9,out and backing,2024-02-19 14:51:57
13124,1,9,out and backing,2024-02-19 14:52:43
27975,1,10,out and backing,2024-02-19 14:54:05


In [1]:
qaqc_df

NameError: name 'qaqc_df' is not defined

Post match cleaning

In [158]:
qaqc_df[qaqc_df['notes'].str.contains('out and back')]
match_dict[801]['edges']

Unnamed: 0,linkid,forward
0,41671.0,True
1,45286.0,False
2,41723.0,False
3,41723.0,True
4,31683.0,False
5,41228.0,False
6,44378.0,True
7,31876.0,False
8,31875.0,True
9,31881.0,True


In [144]:
gpd.GeoDataFrame(match_dict[801]['edges'].merge(edges,on='linkid')).explore()

In [165]:
exploded_edges.columns

Index(['A', 'B', 'linkid', 'oneway', 'geometry', 'length_ft', 'A_sort',
       'B_sort'],
      dtype='object')

In [174]:
#TODO use to make network graph, then subset with trip
import networkx as nx

MDG = nx.MultiDiGraph()  # Create a MultiDiGraph
    #itertuples used to maintain the type
for idx, row in exploded_edges.iterrows():
    #edge_data = {linkid: row[2],'reverse_link': False, 'azimuth': row[4]}
    MDG.add_edge(int(row['A']), int(row['B']), **{'weight': row['length_ft']})#**edge_data)  # Add edge with linkid attribute
    #add reverse link if oneway is not true
    MDG.add_edge(int(row['B']), int(row['A']), **{'weight': row['length_ft']})
    # if row[3] == False:
    #     edge_data['reverse_link'] = True 
    #     #reverse the azimuth
    #     edge_data['azimuth'] = row[5]
    #     MDG.add_edge(row[1], row[0], **edge_data)

#exploded_edges, exploded_nodes

In [193]:
tripid = 801

test = match_dict[801]['edges'].merge(edges,on='linkid')
sub_nodes = test['A'].append(test['B']).unique().tolist()

In [194]:
#get start and end linkid
start = match_dict[tripid]['edges'].iloc[0,:]
end = match_dict[tripid]['edges'].iloc[-1,:]

#get start and end node
start_a_b = edges.loc[edges['linkid']==start['linkid'],['A','B']]
end_a_b = edges.loc[edges['linkid']==end['linkid'],['A','B']]

if start['forward']:
    start = start_a_b['A'].item()
else:
    start = start_a_b['B'].item()

if end['forward']:
    end = end_a_b['B'].item()
else:
    end = end_a_b['A'].item()

In [195]:
sub_nodes[0]

69616383

In [196]:
start

69616383

In [197]:
start in sub_nodes

True

In [198]:
end in sub_nodes

True

In [204]:
path

[69616383,
 69372783,
 69372780,
 69515827,
 69375784,
 69342281,
 69172103,
 3590387336,
 566371279,
 69375819,
 69347809,
 2508763915,
 6404727829,
 9498001936,
 6404727826,
 6404727825,
 6404727832,
 6404727824,
 6697070810,
 6404727808,
 69179068,
 9497354981,
 69258137,
 561031703,
 6011072497,
 593706784,
 549266898,
 549266973,
 344456219,
 344456241,
 344456236,
 926766945,
 8202468354,
 344456220,
 344456158,
 344456113,
 344456091,
 344456087,
 69342371,
 560084673,
 69528686,
 1088436852,
 1088436857,
 1088436921,
 1088436848,
 1088436861,
 2210737616,
 3416614561]

In [203]:
subgraph = MDG.subgraph(sub_nodes)
length, path = nx.single_source_dijkstra(subgraph,start,end,weight='weight')


Unnamed: 0,A,B,linkid,oneway,geometry,length_ft,A_sort,B_sort


In [None]:
#turn to edge list
edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
edge_df = pd.DataFrame(edge_list,columns=['A','B'])
forward = pd.merge(edge_df,edges[['A','B','linkid','geometry']],on=['A','B'])#[['linkid','A','B','geometry']]
forward

KeyError: "['A', 'B'] not in index"

In [None]:
reverse = pd.merge(edge_df,edges[['A','B','linkid','geometry']],left_on=['B','A'],right_on=['A','B'])[['linkid','A','B','geometry']]
shortest_path = pd.concat([forward,reverse],ignore_index=True)

In [None]:
shortest_path = shortest_path.loc[shortest_path.groupby(['A','B'])['length_ft'].idxmin()]

KeyError: 'A'

In [None]:
gpd.GeoDataFrame(shortest_path).explore()

In [None]:
#TODO deal with duplicate links

In [210]:
shortest_path.explore()

AttributeError: 'DataFrame' object has no attribute 'explore'

For multi-edges, buffer the 2+ edges and take the one that hits the most gps points

In [163]:
import numpy as np
exploded_edges['A_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,0]
exploded_edges['B_sort'] = np.sort(exploded_edges[['A','B']].to_numpy())[:,1]
duplicate_edges = exploded_edges.loc[exploded_edges[['A_sort','B_sort']].duplicated(keep=False),'linkid'].unique()

array([ 1829.,  1962.,  1966.,  2226.,  2379.,  2377.,  3124.,  3026.,
        3681.,  3680.,  3950.,  2473.,  4022.,  4023.,  4117.,  1970.,
        3126.,  4403.,  4749.,  4747.,  3320.,  4930.,  4933.,  5065.,
        5062.,  5411.,  5835.,  6007.,  3866.,  6008.,  8300.,  9537.,
        9547., 10571., 10579., 12736., 12734., 13478., 13747., 13743.,
       14123., 17561., 17851., 19128., 19127., 19163., 12787., 19204.,
       19688., 20289., 20694., 20690., 15778., 15774., 21432., 21437.,
       21438., 22332.,  8448., 15225., 15229., 15566., 15563., 21129.,
       22986.,  6482.,  8940., 19823., 19831., 23398., 21411., 23402.,
        8301., 12623., 23709., 23805., 22621., 20868., 17051., 22455.,
       20596., 17850., 17572., 24473., 27818., 27819., 28007., 28513.,
       28490., 28826., 29028., 29031., 30250., 24436., 28263., 31690.,
       31884., 32505., 28069., 33669., 34207., 34211., 35498., 35497.,
       37947., 38065., 40904., 41867., 41874., 42242., 42330., 24904.,
      

In [None]:
gps_points = match_dict[tripid]['trace']

In [None]:
# matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
# matched_trip = gpd.GeoDataFrame(matched_trip)
# from shapely.ops import MultiLineString
# buffered_geo = MultiLineString(matched_trip.geometry.tolist()).buffer(100)
# match['trace'].intersects(buffered_geo).sum()

In [None]:
# # export 
# with (export_fp/'sample_matched.pkl').open('wb') as fh:
#     pickle.dump(match_dict,fh)

In [None]:
# with (export_fp/'sample_matched.pkl').open('rb') as fh:
#     match_dict = pickle.load(fh)

# Visualization


In [None]:
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster, PolyLineTextPath
from folium.map import FeatureGroup

#tripid = 29837#7257#9806#30000#8429

# Your GeoDataFrames
matched_trip = match_dict[tripid]['edges'].merge(edges, on='linkid')
matched_trip = gpd.GeoDataFrame(matched_trip)
gps_points = match_dict[tripid]['trace']
match_lines = match_dict[tripid]['match_lines']

#get the start and end point for mapping
start_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmin(),'geometry']
end_pt = gps_points.to_crs(epsg='4326').loc[gps_points['sequence'].idxmax(),'geometry']

# reproject and get the center of the map
x_mean = gps_points.to_crs(epsg='4326')['geometry'].x.mean()
y_mean = gps_points.to_crs(epsg='4326')['geometry'].y.mean()

# Create a Folium map centered around the mean of the GPS points
center = [y_mean,x_mean]
mymap = folium.Map(location=center, zoom_start=14)

# Convert GeoDataFrames to GeoJSON
matched_trip_geojson = matched_trip[['linkid','geometry']].to_crs(epsg='4326').to_json()
gps_points_geojson = gps_points[['sequence','geometry']].to_crs(epsg='4326').to_json()
match_lines_geojson = match_lines[['sequence','match_lines']].to_crs(epsg='4326').to_json()

# Create FeatureGroups for each GeoDataFrame
matched_trip_fg = FeatureGroup(name='Matched Trip')
gps_points_fg = FeatureGroup(name='GPS Points')
match_lines_fg = FeatureGroup(name='Match Lines')

# Add GeoJSON data to FeatureGroups
folium.GeoJson(matched_trip_geojson, name='Matched Trip', style_function=lambda x: {'color': 'red'}).add_to(matched_trip_fg)

# Add circles to the GPS Points FeatureGroup
for idx, row in gps_points.iterrows():
    folium.Circle(location=[row['lat'], row['lon']], radius=5, color='grey', fill=True, fill_color='grey').add_to(gps_points_fg)

# Add GeoJSON data to Match Lines FeatureGroup with transparent and grey style
folium.GeoJson(match_lines_geojson, name='Match Lines', style_function=lambda x: {'color': 'grey', 'opacity': 0.5}).add_to(match_lines_fg)

# Add FeatureGroups to the map
matched_trip_fg.add_to(mymap)
gps_points_fg.add_to(mymap)
match_lines_fg.add_to(mymap)

# Add start and end points with play and stop buttons
start_icon = folium.Icon(color='green',icon='play',prefix='fa')
end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

# Add layer control to toggle layers on/off
folium.LayerControl().add_to(mymap)

# Add legend with statistics
#TODO what happened to duration
legend_html = f'''
    <div style="position: fixed; 
            bottom: 5px; left: 5px; width: 300px; height: 250px; 
            border:2px solid grey; z-index:9999; font-size:14px;
            background-color: white;
            opacity: 0.9;">
    &nbsp; <b>Trip ID: {tripid} </b> <br>
    &nbsp; <b> Match Date: {match_dict[tripid]['time']} </b> <br>
    &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i>,
    End Point &nbsp; <i class="fa fa-stop" style="color:red"></i> <br>
    
    &nbsp; Matched Path &nbsp; <div style="width: 20px; height: 5px; background-color: red; display: inline-block;"></div> <br>
    &nbsp; Match Lines Path &nbsp; <div style="width: 20px; height: 5px; background-color: gray; display: inline-block;"></div> <br>
 
    &nbsp; Points Matched: {match_dict[tripid]['last_matched']}/{match_dict[tripid]['trace'].shape[0]} <br>
    &nbsp; Match Ratio: {match_dict[tripid]['match_ratio']:.2f} <br>
    &nbsp; GPS Distance: {match_dict[tripid]['gps_distance']:.1f} ft. <br>
    &nbsp; Matched Trace Distance: {matched_trip.length.sum():.0f} ft. <br>
    &nbsp; Mean Matching Distance: {match_dict[tripid]['match_lines'].length.mean():.0f} ft. 

    </div>
    '''
mymap.get_root().html.add_child(folium.Element(legend_html))

# Save the map to an HTML file or display it in a Jupyter notebook
#mymap.save('map.html')
# mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
mymap  # Uncomment if you are using Jupyter notebook

#TODO add in the legend with trip info and then we're golden


In [None]:
match_dict[tripid].keys()

In [None]:
match_dict[tripid]['match_ratio']

In [None]:
# tripid

# #%% batch match
# for tripid, trace in tqdm(coords_dict.items()):
#     try:
#         matched_traces, trips_df = map_match.leuven_match(trace,tripid,matched_traces,matching_settings,edges,map_con)
#         #update trips_df
#     except:
#         if tripid in matched_traces.keys():
#             failed_match.append(tripid)
#         export_files = (matched_traces,trips_df,failed_match)
#         with (export_fp/'matched_traces.pkl').open('wb') as fh:
#             pickle.dump(export_files,fh)
        
        
# #export
# export_files = (matched_traces,trips_df,failed_match)
# with (export_fp/'matched_traces.pkl').open('wb') as fh:
#     pickle.dump(export_files,fh)
# #export filepath
# project_dir = Path.home() / 'Downloads/cleaned_trips'

# trace = gpd.read_file(project_dir/'example_trace0.gpkg',layer='example_trace0').reset_index().drop(columns=['sequence']).rename(columns={'index':'sequence'})

# #load network
# network_fp = project_dir / "networks/final_network.gpkg"
# edges = gpd.read_file(network_fp,layer="links")
# nodes = gpd.read_file(network_fp,layer="nodes")

# #turn network into dict to quickly retrieve geometries
# edges['tup'] = list(zip(edges['A'],edges['B']))
# geos_dict = dict(zip(edges['tup'],edges['geometry']))

# # create network graph needed for map matching
# map_con = InMemMap("osm", use_latlon = False)

# # create network graph needed for map matching (using a projected coordinate system so latlon false)
# map_con = InMemMap("marta_osm", use_latlon = False)

# #redo the latlon columns
# nodes['X'] = nodes.geometry.x
# nodes['Y'] = nodes.geometry.y

# #add edges and nodes to leuven graph network object (make sure latlon is in same order and crs as trace)
# for row in nodes[['N','X','Y']].itertuples(index=False):
#     map_con.add_node(row[0], (row[2], row[1]))
# for row in edges[['A','B']].itertuples(index=False):
#     map_con.add_edge(row[0], row[1])
# matcher = DistanceMatcher(map_con, # the network graph
#                      max_dist=1000,  # maximum distance for considering a link a candidate match for a GPS point
#                      min_prob_norm=0.001, # drops routes that are below a certain normalized probability  
#                      #non_emitting_length_factor=0.75, # not sure what this does, it's not in the documentation but can't be above 1
#                      non_emitting_states=False, # allow for states that don't have matching GPS points
#                      obs_noise=500, # the standard error in GPS measurement
#                      max_lattice_width=100)  # limits the number of possible routes to consider, can increment if no solution is found

In [None]:
help(InMemMap)

In [None]:
help(DistanceMatcher)

:param map_con: Map object to connect to map database
        :param obs_noise: Standard deviation of noise
        :param obs_noise_ne: Standard deviation of noise for non-emitting states (is set to obs_noise if not given)
        :param max_dist_init: Maximum distance from start location (if not given, uses max_dist)
        :param max_dist: Maximum distance from path (this is a hard cut, min_prob_norm should be better)
        :param min_prob_norm: Minimum normalized probability of observations (ema)
        :param non_emitting_states: Allow non-emitting states. A non-emitting state is a state that is
            not associated with an observation. Here we assume it can be associated with a location in between
            two observations to allow for pruning. It is advised to set min_prob_norm and/or max_dist to avoid
            visiting all possible nodes in the graph.
        :param non_emitting_length_factor: Reduce the probability of a sequence of non-emitting states the longer it
            is. This can be used to prefer shorter paths. This is separate from the transition probabilities because
            transition probabilities are averaged for non-emitting states and thus the length is also averaged out.
        :param max_lattice_width: Restrict the lattice (or possible candidate states per observation) to this value.
            If there are more possible next states, the states with the best likelihood so far are selected.

        :param dist_noise: Standard deviation of difference between distance between states and distance
            between observatoins. If not given, set to obs_noise
        :param dist_noise_ne: If not given, set to dist_noise
        :param restrained_ne: Avoid non-emitting states if the distance between states and between
            observations is close to each other.
        :param avoid_goingback: If true, the probability is lowered for a transition that returns back to a
            previous edges or returns to a position on an edge.


In [None]:
# #get list of coords
# gps_trace = list(zip(trace.geometry.y,trace.geometry.x))

# #perform matching
# states, last_matched = matcher.match(gps_trace)
# only_nodes = matcher.path_pred_onlynodes

# print("States\n------")
# print(states)
# print("Nodes\n------")
# print(only_nodes)
# print("")
# matcher.print_lattice_stats()
# fig, ax = plt.subplots(1, 1)
# mmviz.plot_map(map_con, matcher=matcher,
#                ax=ax,
#                show_labels=True, show_matching=True, show_graph=False,
#                filename="my_plot.png")
# test = matcher.lattice[4]
# m = max(test.values_all(), key=lambda m: m.logprob) # for the 4th point get the one with the highest logprob

# m.logprob
# import numpy as np
# t = {x.cname.split('_')[0] + '_' + x.cname.split('_')[1]: x.logprob for x in test.values_all()}
# check = pd.DataFrame.from_dict(t,orient='index',columns=['logprob']).sort_values('logprob',ascending=False)
# check
# (check.index == '5424132517_7151205661').sum()
# testing = trace.copy()
# testing.geometry = testing.buffer(1000)
# intersect = gpd.overlay(edges,testing)
# intersect[(intersect['A_B'] == '5424132517_7151205661') & (intersect['sequence'] == 4)]

# #reduce the states size with match_nodes
# reduced_states = list(set(edges))

# #calculate the match ratio
# match_ratio = last_matched / (len(gps_trace)-1)
    
# #retreive matched edges from network
# geos_list = [geos_dict.get(id,0) for id in reduced_states]

# #turn into geodataframe
# matched_trip = gpd.GeoDataFrame(data={'A_B':reduced_states,'geometry':geos_list},geometry='geometry',crs='epsg:2240')

# #turn tuple to str
# matched_trip['A_B'] = matched_trip['A_B'].apply(lambda row: f'{row[0]}_{row[1]}')

# #reset index to add an edge sequence column
# matched_trip.reset_index().rename(columns={'index':'edge_sequence'},inplace=True)

# trace['interpolated_point'] = pd.Series([ Point(x.edge_m.pi) for x in matcher.lattice_best ])
# trace = trace.loc[0:last_matched]
# trace['match_lines'] = trace.apply(lambda row: LineString([row['geometry'],row['interpolated_point']]),axis=1)

# interpolated_points = trace[['sequence','interpolated_point']]
# interpolated_points = gpd.GeoDataFrame(interpolated_points,geometry='interpolated_point')

# match_lines = trace[['sequence','match_lines']]
# match_lines = gpd.GeoDataFrame(match_lines,geometry='match_lines')
# match_lines['length'] = match_lines.length


# interpolated_points.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='interpolated_points')
# match_lines.to_file(project_dir/f"single_example/{tripid}.gpkg",layer='match_lines')

# #%%






