## Importing modules and packages

In [1]:
# Importing packages
import osmnx  as ox
import pandas as pd
import numpy  as np
import folium
import os.path
import shapely
import geopandas as gpd
from IPython.display import IFrame

# Importing modules
import gr_mapmatch # Contains functions that perform the map matching of roads
import gr_placematch # Contains functions that perform the map matching of places
import gr_utils # Contains useful geometry functions
import gr_plot

# Configuring modules & packages
ox.settings.useful_tags_way = [
    "bridge","tunnel","name","highway","area","landuse","surface","tracktype"
] # Configuring which parameters we want to obtain from OSM



## Input parameters

In [2]:
trailname = 'gr16' # Name of the hiking trail to be considered (will search for trail.csv or trail.gpx as sources)
delta = 0.005 # Tolerance around bounding box per trail section [deg]
points_per_batch = 100 # Subdivide the trail into batches of this many points

## Loading GPX file

In [None]:
filename_gpx = 'data_input/' + trailname + '.gpx'
filename_csv = 'data_output/' + trailname + '.csv'
if not os.path.isfile(filename_csv): # The GPX file was not processed into a clean CSV file before
    if not os.path.isfile(filename_gpx): # The GPX file does not exist, throw error
        raise ValueError(f'The GPX file <{filename_gpx}> was not found! Please make sure it exists.')
    else: # The GPX file exists, so convert it into a clean CSV file
        print(f'Converting GPX file <{filename_gpx}> into cleaned CSV file <{filename_csv}>...')
        gr_utils.process_gpx(filename_gpx,filename_csv)
        print('Completed conversion.')
print(f'Loading trail points from <{filename_gpx}>...')
trail = pd.read_csv(filename_csv) # Now read the cleaned CSV file into a DataFrame (latitude, longitude, elevation)
print('Finished loading.')

## Gathering road information from OSM network

In [4]:
# TODO - make this optional only if the roads file is not found!!!

# Matching GPX track to OSM network (uses _osm_network_download under the hood)
n_trail = len(trail) # Number of GPX points in the trail
n_batch = int(np.ceil(trail.shape[0]/points_per_batch)) # Number of batches to be run
for b in range(n_batch): # Using batch counter b
    
    # Define the range of GPX points to process in the current batch
    n1 = b*points_per_batch # First point of this batch
    n2 = min(n1 + points_per_batch, n_trail) # Last point of this batch (clipped)
    trail_section = trail.loc[n1:n2] # Select that range of GPX points
    trail_coords  = gr_mapmatch.trail_to_coords(trail_section) # Convert the points into a list of [lat, lon] pairs
    
    # Check if this batch was processed before
    # TODO - update name to have _roads_ in there
    batch_out = f'cache/{trailname}_{n1}to{n2}.csv'
    print(f'Handling {b} of {n_batch-1} that covers GPX track points {n1} through {n2}...')
    if os.path.isfile(batch_out): # It already exists
        print('   This batch was processed before, skipping.')
    else: # It does not exist, so process it
        network, segment_list = gr_mapmatch.match_batch(trail_section, trail_coords, delta)
        gr_utils.write_batch(batch_out, segment_list)
        print('   Finished this batch.')
        print('')

Handling 0 of 60 that covers GPX track points 0 through 100...
   This batch was processed before, skipping.
Handling 1 of 60 that covers GPX track points 100 through 200...
   This batch was processed before, skipping.
Handling 2 of 60 that covers GPX track points 200 through 300...
   This batch was processed before, skipping.
Handling 3 of 60 that covers GPX track points 300 through 400...
   This batch was processed before, skipping.
Handling 4 of 60 that covers GPX track points 400 through 500...
   This batch was processed before, skipping.
Handling 5 of 60 that covers GPX track points 500 through 600...
   This batch was processed before, skipping.
Handling 6 of 60 that covers GPX track points 600 through 700...
   This batch was processed before, skipping.
Handling 7 of 60 that covers GPX track points 700 through 800...
   This batch was processed before, skipping.
Handling 8 of 60 that covers GPX track points 800 through 900...
   This batch was processed before, skipping.
Han

## Merging road information & removing backtracks

In [3]:
filename_roads = 'cache/' + trailname + '_roads.csv'
if not os.path.isfile(filename_roads): # The merged file does not exist
    print('Merged section file was not found, merging and saving...')
    data_roads_raw = gr_utils.merge_roads(trailname, trail, points_per_batch) # Merge the different sections
    data_roads = gr_mapmatch.remove_repeat_segments(data_roads_raw) # Remove backtracked sections
    gr_utils.write_roads(trailname, data_roads) # Write the merged sections
    print('Saved.')
else: # The merged file does exist
    print('Loading merged section file...')
    data_roads = gr_utils.read_roads(trailname) # Read the merged sections
    print('Loaded.')

Loading merged section file...
Loaded.


## Gathering place information from OSM network

In [6]:
## Matching GPX track to OSM places (uses _osm_place_download under the hood)
n_roads = len(data_roads) # Number of segments in data_roads
points_per_batch_places = 100 # Subdivide the trail into batches of this many segments
n_batch_places = int(np.ceil(n_roads/points_per_batch_places)) # Number of batches to be run
delta_places = 0.005 # bbox delta in deg
data_roads['dev_dist'] = 0.0 # filling
for b in range(n_batch_places): # Using batch counter b
    
    # Define the range of segments to process in the current batch
    n1 = b*points_per_batch_places # First point of this batch
    n2 = min(n1 + points_per_batch_places, n_roads) - 1 # Last point of this batch (clipped)

    # Check if this batch was processed before
    batch_out = f'cache/{trailname}_places_{n1}to{n2}.csv'
    print(f'Handling {b} of {n_batch_places-1} that covers road segments {n1} through {n2}...')
    if os.path.isfile(batch_out): # It already exists
        print('   This batch was processed before, skipping.')
    else: # It does not exist, so process it (use loc to avoid selection error)
        data_roads.loc[n1:n2,'dev_dist'] = gr_placematch.match_batch(data_roads.loc[n1:n2], delta_places)
        gr_utils.write_batch_places(batch_out, data_roads.loc[n1:n2])
        print('   Finished this batch.')

Handling 0 of 62 that covers road segments 0 through 99...
   This batch was processed before, skipping.
Handling 1 of 62 that covers road segments 100 through 199...
   This batch was processed before, skipping.
Handling 2 of 62 that covers road segments 200 through 299...
   This batch was processed before, skipping.
Handling 3 of 62 that covers road segments 300 through 399...
   This batch was processed before, skipping.
Handling 4 of 62 that covers road segments 400 through 499...
   This batch was processed before, skipping.
Handling 5 of 62 that covers road segments 500 through 599...
   This batch was processed before, skipping.
Handling 6 of 62 that covers road segments 600 through 699...
   This batch was processed before, skipping.
Handling 7 of 62 that covers road segments 700 through 799...
   This batch was processed before, skipping.
Handling 8 of 62 that covers road segments 800 through 899...
   This batch was processed before, skipping.
Handling 9 of 62 that covers ro

## Merging place information

In [4]:
filename_places = 'cache/' + trailname + '_places.csv'
if not os.path.isfile(filename_places): # The merged file does not exist
    print('Merged places file was not found, merging...')
    data_places = gr_utils.merge_places(trailname, data_roads, points_per_batch_places) # Merge the different sections
    print('Saving...')
    gr_utils.write_places(trailname, data_places) # Write the merged sections
    print('Saved.')
else: # The merged file does exist
    print('Loading merged section file...')
    data_places = gr_utils.read_places(trailname) # Read the merged sections
    print('Loaded.')

Loading merged section file...
Loaded.


## Establishing paving/traffic/development status & GR type

In [11]:
filename = f'cache/{trailname}_places.csv'
data_places = pd.read_csv(filename,dtype={'highway':str, 'surface': str, 'tracktype':str},index_col=0)

In [10]:
data_places.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6258 entries, 0 to 6502
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   x0               6258 non-null   float64
 1   y0               6258 non-null   float64
 2   x1               6258 non-null   float64
 3   y1               6258 non-null   float64
 4   d_cart           6258 non-null   float64
 5   d_osm            6258 non-null   float64
 6   highway          6258 non-null   object 
 7   surface          6258 non-null   object 
 8   tracktype        6258 non-null   object 
 9   first_highway    6258 non-null   object 
 10  first_surface    6258 non-null   object 
 11  first_tracktype  6258 non-null   object 
 12  paved            6258 non-null   int64  
 13  traffic          6258 non-null   int64  
dtypes: float64(6), int64(2), object(6)
memory usage: 862.4+ KB


In [12]:
# Establish paving status
tracktype_p0 = ['grade4','grade5']
tracktype_p1 = ['grade2','grade3']
tracktype_p2 = ['grade1']
surface_p0 = ['ground','grass','dirt','sand','earth','mud']
surface_p1 = ['unpaved','gravel','fine_gravel','wood','compacted','rocks','pebblestone','woodchips','snow','ice','salt']
highway_p1 = ['track','path','footway','bridleway']
# Establish paved status
data_places = gr_mapmatch.get_paved_type(data_places,tracktype_p0,tracktype_p1,tracktype_p2,surface_p0,surface_p1,highway_p1)
# Establish traffic status
types_slow = ['pedestrian','track','footway','bridleway','steps','corridor','path']
types_heavy = ['motorway','trunk','primary','secondary','tertiary']
data_places = gr_mapmatch.get_traffic_type(data_places,types_slow,types_heavy)
# Establish development status
tol_d = 0.5
data_places = gr_placematch.get_development_type(data_places,tol_d)

In [14]:
data_places.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6013 entries, 0 to 6012
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       6013 non-null   int64  
 1   x0               6013 non-null   float64
 2   y0               6013 non-null   float64
 3   x1               6013 non-null   float64
 4   y1               6013 non-null   float64
 5   d_cart           6013 non-null   float64
 6   d_osm            6013 non-null   float64
 7   highway          6013 non-null   object 
 8   surface          6013 non-null   object 
 9   tracktype        6013 non-null   object 
 10  dev_dist         6013 non-null   float64
 11  first_highway    6013 non-null   object 
 12  first_surface    6013 non-null   object 
 13  first_tracktype  6013 non-null   object 
 14  paved            6013 non-null   int64  
 15  traffic          6013 non-null   int64  
 16  development      6013 non-null   bool   
dtypes: bool(1), fl

## Saving completed data frame

## Plotting

In [None]:
# Development status
tol_d = 0.5 # Consider a segment developed if it lies closer than tol_d to a developed area
filepath = gr_plot.show_development(data_places,tol_d)
IFrame(filepath, width=1000, height=500)