## Setup

In [1]:
# TODO - Data Processing
# Figure out the clearest way to save the entire track data, probably [x0 y0 x1 y1 highway surface paved length cumlength]
# Save outputs to a file
# Subdivide entire trajectory into smaller batches and run those

# TODO - Data Analysis
# Set up another notebook that just does the rendering & analysis

In [2]:
# --- Preamble
from IPython.display import IFrame
from itertools import groupby 
from csv import writer
import networkx as nx
import pandas as pd
import numpy as np
import osmnx as ox
import warnings
import folium
%matplotlib inline



In [3]:
# --- Configure settings
trail = 'gr131' # Name of the trail file
delta = 0.005 # Bounding box tolerance [deg]
points_per_batch = 100 # Subdivide the trail into batches of this many points
paved_types = ["primary", "unclassified", "tertiary", "residential", "secondary", "service"] # Highway types that are paved
ox.settings.useful_tags_path = ["bridge","tunnel","oneway","lanes","ref","name","highway","maxspeed","service","access","area","landuse","width","est_width","junction","surface","tracktype"] # To make OSMNX output the information we need to determine paved status
ox.settings.useful_tags_way = ["bridge","tunnel","oneway","lanes","ref","name","highway","maxspeed","service","access","area","landuse","width","est_width","junction","surface","tracktype"] # To make OSMNX output the information we need to determine paved status

## Function definitions

In [4]:
# --- Defining some functions
# This function computes the distances between point-node1 and point-node2
def get_distances(node1,node2,point):
    R = 6371.8 # Mean earth radius [km]
    # Converting degrees to radians
    lon1 = np.radians(node1[1])
    lat1 = np.radians(node1[0])
    lon2 = np.radians(node2[1])
    lat2 = np.radians(node2[0])
    lat  = np.radians(point[0])
    lon  = np.radians(point[1])
    # Computing distances between point-node1 and point-node2
    r1 = [R*np.cos(lat1)*np.cos(lon1), R*np.cos(lat1)*np.sin(lon1), R*np.sin(lat1)]
    r2 = [R*np.cos(lat2)*np.cos(lon2), R*np.cos(lat2)*np.sin(lon2), R*np.sin(lat2)]
    r  = [R*np.cos(lat)*np.cos(lon),   R*np.cos(lat)*np.sin(lon),   R*np.sin(lat)]
    dr1 = [r1[0]-r[0], r1[1]-r[1], r1[2]-r[2]]
    dr2 = [r2[0]-r[0], r2[1]-r[1], r2[2]-r[2]]
    d1 = np.sqrt(dr1[0]*dr1[0] + dr1[1]*dr1[1] + dr1[2]*dr1[2])
    d2 = np.sqrt(dr2[0]*dr2[0] + dr2[1]*dr2[1] + dr2[2]*dr2[2])
    return d1,d2

# # This function returns a list with the [lat, lon] of each point within an OSM edge
# def edge_to_coords(network_edges,edge_id):
#     lon = list(network_edges.loc[(edge_id[0],edge_id[1])]['geometry'][0].coords.xy[0])
#     lat = list(network_edges.loc[(edge_id[0],edge_id[1])]['geometry'][0].coords.xy[1])
#     edge_coords = list(zip(lon,lat))
#     return [[coord[1],coord[0]] for coord in edge_coords]

# Distance between two points
def cartesian_distance(lat0,lon0,lat1,lon1):
    R = 6371.8 # Mean earth radius [km]
    lat0r = np.radians(lat0)
    lon0r = np.radians(lon0)
    lat1r = np.radians(lat1)
    lon1r = np.radians(lon1)
    
    r1 = [R*np.cos(lat0r)*np.cos(lon0r), R*np.cos(lat0r)*np.sin(lon0r), R*np.sin(lat0r)]
    r2 = [R*np.cos(lat1r)*np.cos(lon1r), R*np.cos(lat1r)*np.sin(lon1r), R*np.sin(lat1r)]
    dr = [r1[0] - r2[0], r1[1] - r2[1], r1[2] - r2[2]]
    return 1000.0*np.sqrt(dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2])

## Batch processing

In [6]:
# --- Load and process the GPX track
filename_in = 'data/' + trail + '.csv'
hike = pd.read_csv(filename_in)

# --- Count number of batches
n_batch = int(np.ceil(hike.shape[0]/points_per_batch))

# --- Looping over all batches
for b in range(n_batch): # b is the batch counter
# for b in range(64,n_batch): # b is the batch counter
    
    ## --- PREPARING --- ##
    
    # Define the range of GPX points to process in the current batch
    n1 = b*points_per_batch # First point
    n2 = min(n1 + points_per_batch, len(hike)) # Last point
    coords = hike.loc[n1:n2][['latitude','longitude']].values.tolist()
    print(f'Processing batch {b} of {n_batch-1}, covering GPX track points {n1} through {n2}')

    # Calculate the bounding box around points n1 through n2
    lat_min = hike.loc[n1:n2]['latitude'].min()
    lat_max = hike.loc[n1:n2]['latitude'].max()
    lon_min = hike.loc[n1:n2]['longitude'].min()
    lon_max = hike.loc[n1:n2]['longitude'].max()
    
    
    ## --- OSM NETWORK --- ##
    
    # Download the street network based on bounding box
    print('Downloading street network...')
    G = ox.graph_from_bbox(lat_max+delta, lat_min-delta,
                           lon_max+delta, lon_min-delta,
                           network_type="all_private", clean_periphery=False)
    
    # Processing the street network
    print('Processing street network...')
    network_points, network_edges = ox.graph_to_gdfs(G) # Convert the street network
    network_points.sort_index(inplace=True) # Sort the nodes for faster selections with .loc
    network_edges.sort_index(inplace=True) # Sort the edges for faster selections with .loc
    nodes = network_points[['y','x']].values.tolist() # Vector with [lat, lon] of all network nodes
    
    ## --- MAP MATCHING --- ##

    # Setup
    k = 0 # To display where we are in the loop
    node_list_raw = [] # Will contain the nodes between which pathfinding should take place
    warnings.filterwarnings("ignore", category=UserWarning) # TODO: Figure out projection issue so this warning is not thrown

    print('Beginning main processing loop...')
    for point in coords:

        txtstring = '\r' + 'Handling GPX point ' + str(k) + ' of ' + str(len(coords)-1)
        print(txtstring, end='\r', flush=True)

        # Step 1: Determine which edge this GPX point is closest to
        ox_nearest = ox.distance.nearest_edges(G, point[1], point[0]) # OSMNX call to find the nearest edge, returns a tuple
        nearest_edge = [ox_nearest[0], ox_nearest[1]] # 
        nearest_edge_coords = edge_to_coords(network_edges, nearest_edge) # Get coordinates of the points that make up that edge

        # Step 2: Determine whether this GPX point is closer to the start/end of the edge
        vertex_start = nearest_edge_coords[0] # Start vertex coords of the edge
        vertex_end   = nearest_edge_coords[-1] # End vertex coords of the edge
        d_start, d_end = get_distances(vertex_start, vertex_end, point) # Distance point->vertex_start and point->vertex_end

        # Step 3: Store the ID of the nearest vertex in node_list_raw
        if d_start<d_end:
            node_list_raw.append(nearest_edge[0])
        else:
            node_list_raw.append(nearest_edge[1])

        k += 1 # Increment counter
        
    print('')

    # Step 4: Remove successive duplicates from the node list
    node_list = [node[0] for node in groupby(node_list_raw)]
    
    ## --- PATH CONSTRUCTION --- ##

    # Setup
    route_list_raw = [] # Will contain IDs of all nodes that make up the shortest route between the nodes in node_list

    # Step 1: Use OSMNX to generate the shortest walking route between each node pair in node_list, store the corresponding nodes in route_list
    for i in range(0,len(node_list)-1):

        txtstring = '\r' + 'Handling node_list pair ' + str(i) + ' of ' + str(len(node_list)-2)
        print(txtstring, end='\r', flush=True)

        # Extracting information about the nodes
        id_start     = node_list[i] # ID of the start node
        id_end       = node_list[i+1] # ID of the end node
        node_start   = network_points.loc[id_start] # Start node with relevant information
        node_end     = network_points.loc[id_end] # End node with relevant information
        vertex_start = [node_start['y'],node_start['x']] # Coordinates of start node
        vertex_end   = [node_end['y'],node_end['x']] # Coordinates of end node

    #     print(f'Calculating shortest route between nodes {id_start} and {id_end}')

        # Pathfinding between node_start and node_end
        route1 = ox.shortest_path(G, node_list[i], node_list[i+1]) # Route from node_start -> node_end
        route2 = ox.shortest_path(G, node_list[i+1], node_list[i]) # Route from node_end -> node_start

        # Select the direction that results in the shortest path (we do this to deal with one-way streets)
        if len(route1)<len(route2):
            route_list_raw.extend(route1)
        else:
            route_list_raw.extend(route2[::-1]) # We flip route2 because it runs opposite to the GPX track

    print('')
            
    # Remove successive duplicates from the route_list
    route_list = [node[0] for node in groupby(route_list_raw)]
    
    ## --- SEGMENT EXTRACTION --- ##
    
    # Setup
    segment_list_raw = [] # Will contain all OSM edge segments that were matched to the GPX track [x0 y0 x1 y1 d dcum highway surface paved]

    for i in range(0,len(route_list)-1): # Loop over all pairs in the route_list

    #     print(f'Handling route_list section #{i} of {len(route_list)-1}')
        txtstring = '\r' + 'Handling route_list pair ' + str(i) + ' of ' + str(len(route_list)-2)
        print(txtstring, end='\r', flush=True)

        # Extracting information about the nodes
        id_start     = route_list[i] # ID of the start node
        id_end       = route_list[i+1] # ID of the end node
        node_start   = network_points.loc[id_start] # Start node with relevant information
        node_end     = network_points.loc[id_end] # End node with relevant information
        vertex_start = [node_start['y'],node_start['x']] # Coordinates of start node
        vertex_end   = [node_end['y'],node_end['x']] # Coordinates of end node

    #     print(f'   It connects nodes {id_start} and {id_end}')

        # Selecting the corresponding edge
        edge_id = [id_start, id_end]
        try: # To deal with one-way streets
            edge = network_edges.loc[(id_start, id_end)] # Get coordinates of the points that make up that edge
            edge_coords = edge_to_coords(network_edges, [id_start, id_end])
        except:
            edge = network_edges.loc[(id_end, id_start)] # Get coordinates of the points that make up that edge
            edge_coords = edge_to_coords(network_edges, [id_end, id_start])
            edge_coords = edge_coords[::-1]

        # Filling the segment_list_raw list
        for j in range(0,len(edge_coords)-1): # Loop over all vertex pairs in the edge        
            x0 = edge_coords[j][0]
            y0 = edge_coords[j][1]
            x1 = edge_coords[j+1][0]
            y1 = edge_coords[j+1][1]
            d_cart = cartesian_distance(x0,y0,x1,y1)
            d_osm = edge['length'][0]/(len(edge_coords)-1) # just divide the length equally between segments
            highway = edge['highway'][0]
            if 'surface' in edge.columns:
                surface = edge['surface'][0]
            if 'tracktype' in edge.columns:
                tracktype = edge['tracktype'][0]
            newline = [x0,y0,x1,y1,d_cart,d_osm,highway,surface,tracktype]
            segment_list_raw.append(newline)

    print('')
    
    ## --- SAVING --- ##
            
    # --- Writing outputs to a file
    print('Writing outputs to file...')
    filename_out = 'data/' + trail + '_' + str(n1) + 'to' + str(n2) + '.csv'
    with open(filename_out, 'w') as file:
        csv_writer = writer(file)
        headers = ['x0','y0','x1','y1','d_cart','d_osm','highway','surface','tracktype']
        csv_writer.writerow(headers)
        for segment in segment_list_raw:
            csv_writer.writerow(segment)
    print('')
    print('')

Processing batch 0 of 21, covering GPX track points 0 through 100
Downloading street network...
Processing street network...
Beginning main processing loop...
Handling GPX point 0 of 100



Handling GPX point 100 of 100
Handling node_list pair 19 of 19
Handling route_list pair 20 of 20
Writing outputs to file...


Processing batch 1 of 21, covering GPX track points 100 through 200
Downloading street network...
Processing street network...
Beginning main processing loop...
Handling GPX point 100 of 100
Handling node_list pair 28 of 28
Handling route_list pair 33 of 33
Writing outputs to file...


Processing batch 2 of 21, covering GPX track points 200 through 300
Downloading street network...
Processing street network...
Beginning main processing loop...
Handling GPX point 100 of 100
Handling node_list pair 30 of 30
Handling route_list pair 35 of 35
Writing outputs to file...


Processing batch 3 of 21, covering GPX track points 300 through 400
Downloading street network...
Processing street network...
Beginning main processing loop...
Handling GPX point 100 of 100
Handling node_list pair 45 of 45
Handling route_list pair 56 of 56
Writing outputs to file...


Processing ba