# EDA on "road" feature types

Here I'm exploring OpenStreetMap's metadata for roads,
aiming for defining a consistent and representative set of metadata fields
for later analysis.

In [None]:
import numpy as np
import pandas as pd
import pyproj
import shapely
import osmnx

import plotly
from plotly.subplots import make_subplots
from plotly.graph_objects import Scatter

from geo_encodings import draw_shape

import polars as pl
pl.Config.set_tbl_rows(25)


In [None]:
# These are bounding boxes for selected locations around the world.

# Somewhere in Belarus.
lat0, lon0 = 53.107846, 28.792399
lat1, lon1 = 53.221441, 28.989799

# Minsk.
lat0, lon0 = 53.894162, 27.536065
lat1, lon1 = 53.910152, 27.570615

# Milkovo, Kamchatka.
lat0, lon0 = 54.680048, 158.585918
lat1, lon1 = 54.714276, 158.656632

# Asagiri, Japan.
lat0, lon0 = 32.228572, 130.881277
lat1, lon1 = 32.262670, 130.906660

# Melo Uruguay.
lat0, lon0 = -32.395782, -54.201407
lat1, lon1 = -32.338938, -54.122384

# Libreville Gabon.
lat0, lon0 = 0.354229, 9.393414
lat1, lon1 = 0.477928, 9.496069

# Portsmouth NH.
lat0, lon0 = 43.065028, -70.793336
lat1, lon1 = 43.094909, -70.722353

# Handy derived stuff.
query_bounds = [lon0, lat0, lon1, lat1]
center_lon = (lon0 + lon1) / 2
center_lat = (lat0 + lat1) / 2


In [None]:
# Define a local map projection
offset = 20000
proj_def = f"""
+proj=tmerc +lat_0={center_lat} +lon_0={center_lon} 
+k=1.0 +x_0={offset} +y_0={offset} +datum=WGS84 +units=m +no_defs
"""
ltm_crs = pyproj.CRS.from_proj4(proj_def)
wgs84_crs = pyproj.CRS.from_epsg(4326)
proj_forward = pyproj.Transformer.from_crs(wgs84_crs, ltm_crs, always_xy=True).transform
proj_inverse = pyproj.Transformer.from_crs(ltm_crs, wgs84_crs, always_xy=True).transform


## Roads

In [None]:
tags = {
    'highway': True,
}
features = osmnx.features.features_from_bbox(query_bounds, tags=tags).reset_index()
features['gtype'] = [g.geom_type for g in features['geometry']]
features['geomxy'] = [
    shapely.ops.transform(proj_forward, rec['geometry'])
    for rec in features.to_dict('records')
]
print('%d features total' % len(features))

In [None]:
features.columns

In [None]:
features[['highway', 'crossing']].value_counts()

In [None]:
# Definer a structure that can be used to translate OSM's metadata into 
# a simpler consistent set of fields.
translator = [
    {
        'category': 'roadway feature',
        'label': 'traffic signals',
        'gtype': 'Point',
        'keys': {'highway': ['crossing'], 'crossing': ['traffic_signals']},
    },
    {
        'category': 'roadway feature',
        'label': 'traffic signals',
        'gtype': 'Point',
        'keys': {'highway': ['traffic_signals']},
    },
    {
        'category': 'roadway feature',
        'label': 'crosswalk', 
        'gtype': 'Point',
        'keys': {'highway': ['crossing'], 'crossing': ['marked']}
    },
    {
        'category': 'roadway feature',
        'label': 'crosswalk', 
        'gtype': 'Point',
        'keys': {'highway': ['crossing']}
    },
    {
        'category': 'roadway feature',
        'label': 'street lamp',
        'gtype': 'Point',
        'keys': {'highway': ['street_lamp']}
    },
    {
        'category': 'route',
        'label': 'pedestrian way', 
        'gtype': 'LineString',
        'keys': {'highway': 'footway'}
    },
    {
        'category': 'roadway feature',
        'label': 'transit stop', 
        'gtype': 'Point',
        'keys': {'highway': 'bus_stop'}
    },
    {
        'category': 'route',
        'label': 'highway', 
        'gtype': 'LineString',
        'keys': {'highway': ['motorway', 'motorway_link']}
    },
    {
        'category': 'route',
        'label': 'primary road', 
        'gtype': 'LineString',
        'keys': {'highway': ['primary', 'primary_link', 'trunk', 'trunk_link']}
    },
    {
        'category': 'route',
        'label': 'secondary road',
        'gtype': 'LineString',
        'keys': {'highway': ['secondary', 'secondary_link']}
    },
    {
        'category': 'route',
        'label': 'tertiary road', 
        'gtype': 'LineString',
        'keys': {'highway': ['tertiary', 'tertiary_link'], 'gtype': 'LineString'}
    },
    {
        'category': 'route',
        'label': 'residential road', 
        'gtype': 'LineString',
        'keys': {'highway': ['residential']}
    },
    {
        'category': 'route',
        'label': 'service road', 
        'gtype': 'LineString',
        'keys': {'highway': ['service', 'unclassified']}
    },
    {
        'category': 'route',
        'label': 'pedestrian way', 
        'gtype': 'LineString',
        'keys': {'highway': ['pedestrian', 'steps', 'path']}
    },
    {
        'category': 'route',
        'label': 'cycle way', 
        'gtype': 'LineString',
        'keys': {'highway': ['cycleway']}
    },
]


In [None]:
# Apply the translator. That is, go through the list of features, and check each one against the records 
# of "translator" that we just defined. If any match, then save the geometries along with their 
# labels. Generate a report on any that were missed.

keepers = []
missed = []

for feature in features.to_dict('records'):

    matched = False
    
    # Check geometry type first. 
    for fclass in translator:
        if feature['gtype'] == fclass['gtype']:

            # Check any keys for this feature class.
            all_match = True
            for key_name in fclass['keys']:
                key_values = fclass['keys'][key_name]
                if key_name not in feature or feature[key_name] not in key_values:
                    all_match = False
            if all_match:
                keepers.append({
                    'category': fclass['category'],
                    'label': fclass['label'],
                    'gtype': feature['gtype'],
                    'geom': feature['geomxy'],
                })
                matched = True
    if not matched:
        missed.append({
            'highway': feature['highway'],
            'gtype': feature['gtype']
        })

In [None]:
print('%d records unmatched' % len(missed))
m = pl.DataFrame(missed).select(['highway', 'gtype'])
print(m.group_by(['highway', 'gtype']).agg(pl.len()).sort(by='len', descending=True))

In [None]:
df = pl.DataFrame(keepers)
print('%d records matched' % len(df))
pl.Config.set_tbl_rows(25)
print(df.group_by(pl.col(['label', 'gtype'])).agg(pl.len().alias('n')).sort(by='n', descending=True))

In [None]:
# Vis
colors = {
    "highway": "magenta",       
    "primary road": "#d7191c",      
    "secondary road": "#fdae61",     
    "tertiary road": "#ffffbf",      
    "residential road": "#abd9e9",   
    "service road": "#cccccc",         
    'transit stop': 'green',
    'street lamp': 'yellow',
    'traffic signals': 'orange',
    'crosswalk': 'black'
}
used = set()
fig = make_subplots(1, 1)
for rec in df.rows(named=True):
    label = rec['label']
    if label in ['pedestrian way', 'service road']:
        continue
    color = colors.get(label, 'gray')
    showlegend = label not in used
    used.add(label)
    draw_shape(rec['geom'], fig, name=label, color=color, showlegend=showlegend)
fig['layout']['width'] = 800
fig['layout']['height'] = 800
fig.show()


In [None]:
features['highway'].value_counts()

In [None]:
f = features['highway'].values
np.isnan(f)