# Preparing Matched Traces for Calibration
Before calibration, we need decide which traces made the cut using the match_ratio. Then we need to attach the start and end nodes and then examine how well the shortest path (with/without turns) explains the routing behavior to compare against the optimization.

In [12]:
import pickle
from pathlib import Path
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
from shapely.ops import MultiLineString

import folium
import geopandas as gpd
from folium.plugins import MarkerCluster, PolyLineTextPath
from folium.map import FeatureGroup

In [13]:
#file paths
export_fp = Path.home() / 'Documents/BikewaySimData/Projects/gdot/gps_traces'
network_fp = Path.home() / "Documents/BikewaySimData/Projects/gdot/networks/reconciled_network.gpkg"

#import network
edges = gpd.read_file(network_fp,layer='links_w_signals_elevation')[['A','B','linkid','oneway','geometry']]
nodes = gpd.read_file(network_fp,layer="nodes")[['N','geometry']]

#there's are duplicate nodes? (TODO: go back to network processing)
edges.drop_duplicates(inplace=True)
nodes.drop_duplicates(inplace=True)

#turn oneway into boolean
edges['oneway'] = edges['oneway'] == "1"

#also linkid is turning into a float?
#edges['linkid'] = edges['linkid'].apply(lambda x: int(x))

In [14]:
def make_multidigraph(network_df, source='source', target='target', linkid ='linkid', oneway='oneway'):
    
    network_df['length_ft'] = network_df.length
    
    MDG = nx.MultiDiGraph()  # Create a MultiDiGraph
    
    #itertuples used to maintain the type
    for row in network_df[[source, target, linkid, oneway, 'length_ft']].itertuples(index=False):
        edge_data = {linkid: row[2],'reverse_link': False, 'length_ft': row[4]}
        MDG.add_edge(row[0], row[1], **edge_data) # Add edge with linkid attribute
        
        #add reverse link if oneway is not true
        if row[3] == False:
            edge_data['reverse_link'] = True 
            MDG.add_edge(row[1], row[0], **edge_data)

    return MDG

import networkx as nx
#make network
MDG = make_multidigraph(edges,'A','B','linkid','oneway')

In [15]:
#import matched traces
with (export_fp/'200_sample.pkl').open('rb') as fh:
    match_dict = pickle.load(fh)

### Turn matching dict into dataframe
- Include the tripid
- Find start and ending node for shortest path routing (last node is the last matched)
- Find Euclidean distance between start and ending node (for finding loop trips)
- Add the match ratio
- Add list of edges

In [16]:
matched_trips = {}
loop_trips = []

for tripid,items in tqdm(match_dict.items()):

    #failed matches won't be dicts
    if isinstance(items,dict):

        #get start and end linkid
        start = match_dict[tripid]['edges'].iloc[0,:]
        end = match_dict[tripid]['edges'].iloc[-1,:]
        
        #get start and end node
        start_a_b = edges.loc[edges['linkid']==start['linkid'],['A','B']]
        end_a_b = edges.loc[edges['linkid']==end['linkid'],['A','B']]

        if start['forward']:
            start = start_a_b['A'].item()
        else:
            start = start_a_b['B'].item()

        if end['forward']:
            end = end_a_b['B'].item()
        else:
            end = end_a_b['A'].item()

        #euclidean distance between points
        snode = nodes.loc[nodes['N']==start,'geometry'].item()
        dnode = nodes.loc[nodes['N']==end,'geometry'].item()
        
        #add geo features
        edge_geo = pd.merge(match_dict[tripid]['edges'],edges[['linkid','geometry']],on=['linkid'],how='left')
        edge_geo = gpd.GeoDataFrame(edge_geo,geometry='geometry')
        edge_geo_dissolved = MultiLineString(edge_geo['geometry'].tolist())
        linkids = set(edge_geo['linkid'].tolist())

        #shortest path routing here
        impedance, path = nx.single_source_dijkstra(MDG,start,end,weight="length_ft")
        
        if len(path) < 2:
            loop_trips.append(tripid)
            continue

        #turn to edge list
        edge_list = [(path[i],path[i+1]) for i in range(len(path)-1)]
        edge_df = pd.DataFrame(edge_list,columns=['A','B'])

        #TODO use .array version to get rid of errors

        forward = pd.merge(edge_df,edges[['A','B','linkid','geometry']],on=['A','B'])[['linkid','geometry']]
        reverse = pd.merge(edge_df,edges[['A','B','linkid','geometry']],left_on=['B','A'],right_on=['A','B'])[['linkid','geometry']]
        shortest_path = pd.concat([forward,reverse],ignore_index=True)
        shortest_linkids = set(shortest_path['linkid'].tolist())
        shortest_geo = gpd.GeoDataFrame(shortest_path)
        shortest_geo_dissolved = MultiLineString(shortest_geo['geometry'].tolist())

        #exact overlap
        chosen_and_shortest = linkids & shortest_linkids
        overlap_length = edges.set_index('linkid').loc[list(chosen_and_shortest)]['length_ft'].sum()
        exact_overlap = overlap_length / edge_geo.length.sum()

        #buffer overlap
        buffer_ft = 500
        chosen = edge_geo_dissolved.buffer(buffer_ft)
        shortest = shortest_geo_dissolved.buffer(buffer_ft)
        intersection = chosen.intersection(shortest)
        buffer_overlap = intersection.area / (chosen.area + shortest.area - intersection.area)

        #collapse to multilinestring with length
        #add length
        matched_trips[tripid] = {'start':start,
                              'end':end,
                              'start_end_dist_ft': snode.distance(dnode),
                              'match_ratio': match_dict[tripid]['match_ratio'], 
                              'linkids':str(linkids),
                              'geometry':edge_geo_dissolved,
                              'length_ft':edge_geo.length.sum(),
                              'shortest_length_ft': impedance,
                              'shortest_linkids': shortest_linkids,
                              'shortest_geo': shortest_geo_dissolved,
                              'shortest_exact_overlap_length': overlap_length,
                              'shortest_exact_overlap_prop': exact_overlap,
                              'shortest_buffer_overlap': buffer_overlap,
                              'shortest_intersect_geo': intersection
                              }

100%|██████████| 205/205 [00:18<00:00, 11.36it/s]


## Use linkids to add network summaries 

In [17]:
#turn into dataframe
df = pd.DataFrame.from_dict(matched_trips,orient='index')
#into geodataframe
gdf = gpd.GeoDataFrame(df,geometry='geometry',crs='epsg:2240')

gdf.reset_index(inplace=True)
gdf.rename(columns={'index':'tripid'},inplace=True)

test_merge = pd.read_csv(export_fp.parent/'all_attrs.csv')
prev = gdf.copy()
gdf = gdf.merge(test_merge,on='tripid')

In [36]:
def visualize(tripid,gdf,nodes):

     gdf = gdf.copy()

     # Your GeoDataFrames
     chosen_path = gdf.loc[gdf['tripid']==tripid,['tripid','geometry']]
     shortest_path = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_geo']].set_geometry('shortest_geo').set_crs(gdf.crs)
     intersection = gdf.loc[gdf['tripid']==tripid,['tripid','shortest_intersect_geo']].set_geometry('shortest_intersect_geo').set_crs(gdf.crs)

     #start point
     start_N = gdf.loc[gdf['tripid']==tripid,'start'].item()
     start_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==start_N,'geometry'].item()

     #end point
     end_N = gdf.loc[gdf['tripid']==tripid,'end'].item()
     end_pt = nodes.to_crs('epsg:4326').loc[nodes['N']==end_N,'geometry'].item()

     # reproj
     x_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.x
     y_mean = chosen_path.to_crs(epsg='4326').geometry.item().centroid.y

     # Create a Folium map centered around the mean of the GPS points
     center = [y_mean,x_mean]
     mymap = folium.Map(location=center, zoom_start=14)

     # Convert GeoDataFrames to GeoJSON
     chosen_path_geojson = chosen_path.to_crs(epsg='4326').to_json()
     shortest_path_geojson = shortest_path.to_crs(epsg='4326').to_json()
     intersection_geojson = intersection.to_crs(epsg='4326').to_json()

     # Create FeatureGroups for each GeoDataFrame
     chosen_path_fg = FeatureGroup(name='Chosen Path')
     shortest_path_fg = FeatureGroup(name='Shortest Path',show=False)
     intersection_fg = FeatureGroup(name='Buffer Intersection',show=False)

     # Add GeoJSON data to FeatureGroups
     folium.GeoJson(chosen_path_geojson, name='Chosen Path', style_function=lambda x: {'color': 'red'}).add_to(chosen_path_fg)

     # Add GeoJSON data to Match Lines FeatureGroup with transparent and grey style
     folium.GeoJson(shortest_path_geojson, name='Shortest Path', style_function=lambda x: {'color': 'blue'}).add_to(shortest_path_fg)

     folium.GeoJson(intersection_geojson, name='Buffer Intersection', style_function=lambda x: {'color': 'yellow'}).add_to(intersection_fg)

     # Add FeatureGroups to the map
     chosen_path_fg.add_to(mymap)
     shortest_path_fg.add_to(mymap)
     intersection_fg.add_to(mymap)

     # Add start and end points with play and stop buttons
     start_icon = folium.Icon(color='green',icon='play',prefix='fa')
     end_icon = folium.Icon(color='red',icon='stop',prefix='fa')
     folium.Marker(location=[start_pt.y, start_pt.x],icon=start_icon).add_to(mymap)
     folium.Marker(location=[end_pt.y, end_pt.x],icon=end_icon).add_to(mymap)

     #autofit content not in this version?
     #folium.FitOverlays().add_to(mymap)
     
     # Add layer control to toggle layers on/off
     folium.LayerControl().add_to(mymap)

     #retrive overlap
     exact_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_exact_overlap_prop'].item()
     buffer_overlap = gdf.loc[gdf['tripid']==tripid,'shortest_buffer_overlap'].item()

     attr = gdf.loc[gdf['tripid']==tripid].squeeze()

     # Add legend with statistics
     legend_html = f'''
          <div style="position: fixed; 
                    bottom: 5px; left: 5px; width: 300px; height: 600px; 
                    border:2px solid grey; z-index:9999; font-size:14px;
                    background-color: white;
                    opacity: 0.9;">
          &nbsp; <b>Tripid: {tripid}</b> <br>
          &nbsp; Start Point &nbsp; <i class="fa fa-play" style="color:green"></i><br>
          &nbsp; End Point &nbsp; <i class="fa fa-stop" style="color:red"></i><br>
          &nbsp; Exact Overlap: {exact_overlap*100:.2f}% <br>
          &nbsp; Buffer Overlap: {buffer_overlap*100:.2f}% <br>
          
          &nbsp; Trip Type: {attr['trip_type']} <br>
          &nbsp; Length (mi): {attr['length_ft']/5280:.0f} <br>
          &nbsp; Age: {attr['age']} <br>
          &nbsp; Gender: {attr['gender']} <br>
          &nbsp; Income: {attr['income']} <br>
          &nbsp; Ethnicity: {attr['ethnicity']} <br>
          &nbsp; Cycling Frequency: {attr['cyclingfreq']} <br>
          &nbsp; Rider History: {attr['rider_history']} <br>
          &nbsp; Rider Type: {attr['rider_type']} <br><br>

          &nbsp; Bike Lane %: {attr['bl']*100:.2f}% <br>
          &nbsp; Protected Bike Lane %: {attr['pbl']*100:.2f}% <br>
          &nbsp; Multi-Use Path %: {attr['mu']*100:.2f}% <br>
          &nbsp; Residential %: {attr['highway.residential']*100:.2f}% <br>
          &nbsp; Secondary %: {attr['highway.secondary']*100:.2f}% <br>
          &nbsp; Tertiary %: {attr['highway.tertiary']*100:.2f}% <br>

          &nbsp; # of bridges: {attr['bridge']} <br>
          &nbsp; # of left turns: {attr['left']} <br>
          &nbsp; # of straight turns: {attr['straight']} <br>
          &nbsp; # of right turns: {attr['right']} <br>
          &nbsp; # of stressful turns: {attr['unsignalized_left_straight_nonlocal']} <br>
          &nbsp; # of signalized turns: {attr['signalized_left_straight']} <br>
          </div>
          '''

          # &nbsp; Percent Detour: {attr['percent_detour']:.0f}% <br>
          # &nbsp; Shortest Path Overlap: {attr['shortest_buffer_overlap']*100:.0f}% <br>
          # &nbsp; Modeled Path Overlap: {attr['overlap']*100:.0f}% <br>
          #   'bl', 'mu', 'pbl', 'bridge', 'link_type.road',
          #   'link_type.bike', 'highway.secondary', 'highway.tertiary',
          #   'highway.cycleway', , 'bridge.yes',
          #   'speedlimit_range_mph.21-30 MPH', 'speedlimit_range_mph.31-40 MPH',
          #   'lanes_per_direction.1', 'lanes_per_direction.2-3',
          #   'lanes_per_direction.> 4', 'straight', 'right', 'left', 'uturn',
          #   'highway.pedestrian', 'highway.footway', 'highway.path',
          #   'highway.steps', 'highway.service', 'speedlimit_range_mph.6-20 MPH',
          #   'highway.trunk', 'highway.secondary_link', 'highway.primary',
          #   'highway.tertiary_link', 'tunnel.yes', 'link_type.connector',
          #   'highway.unclassified', 'highway.living_street',
          #   'tunnel.building_passage', 'highway.trunk_link', 'highway.primary_link',
          #   'speedlimit_range_mph.41-54 MPH', 'userid', 'trip_type', 'description',
          #   'starttime', 'endtime', 'created_date', 'age', 'gender', 'income',
          #   'ethnicity', 'homeZIP', 'schoolZip', 'workZip', 'cyclingfreq',
          #   'rider_history', 'rider_type'

     mymap.get_root().html.add_child(folium.Element(legend_html))

     # Save the map to an HTML file or display it in a Jupyter notebook
     #mymap.save('map.html')
     # mymap.save('/path/to/save/map.html')  # Use an absolute path if needed
     return mymap  # Uncomment if you are using Jupyter notebook

#TODO add in the legend with trip info and then we're golden


In [37]:
examined = []

In [38]:
tripid = gdf['tripid'].sample(1).item()
examined.append(tripid)
visualize(tripid,gdf,nodes)

AttributeError: module 'folium' has no attribute 'FitOverlays'

In [None]:
with (export_fp/'test_matches.pkl').open('wb') as fh:
    pickle.dump(gdf,fh)

In [None]:
#viz version (used for optimization too)
