In [34]:
import pathlib as pl
import datetime as dt

import shapely
import pandas as pd
import r5py as r5
import r5py.sampledata.helsinki as hs
import pyrosm as pr
import geopandas as gpd
import geohexgrid as ghg
from loguru import logger

gpd.options.io_engine = "pyogrio"


# Setup

In [32]:
DATA_DIR = pl.Path("../data") 
WGS84 = "epsg:4326"
NZTM = "epsg:2193"

%ls {DATA_DIR}

# Load Auckland OSM and GTFS DATA
%time akl_pbf_path = pr.get_data("Auckland", directory=DATA_DIR)
akl_gtfs_path = DATA_DIR / "auckland_gtfs_20230824.zip"



[0m[01;32mauckland_gtfs_20230824.zip[0m*  auckland_points.geojson
Auckland.osm.pbf             nz_tas.gpkg
CPU times: user 284 µs, sys: 9 µs, total: 293 µs
Wall time: 298 µs


In [None]:
%time transport_network = r5.TransportNetwork(akl_pbf_path, [akl_gtfs_path])
transport_modes = [
    r5.TransportMode.TRANSIT,
    r5.TransportMode.WALK,
]


# Write an isochrone function for r5py

To address [this Github issue](https://github.com/r5py/r5py/issues/311) .
Start by porting the [r5r isochrone code](https://github.com/ipeaGIT/r5r/blob/master/r-package/R/isochrone.R) to Python.

In [None]:
def get_osm_nodes(transport_network) -> gpd.GeoDataFrame:
    import com.conveyal.r5
    
    k = com.conveyal.r5.streets.VertexStore.FIXED_FACTOR
    v = transport_network._transport_network.streetLayer.vertexStore
    lonlats = zip(list(v.fixedLons.toArray()), list(v.fixedLats.toArray()))
    nodes = gpd.GeoDataFrame(
        geometry=[shapely.Point(lon / k, lat / k) for lon, lat in lonlats],
        crs="epsg:4326",
    )
    nodes["id"] = nodes.index
    return nodes

def get_osm_nodes_and_edges(osm_pbf_path: pl.Path, network_type: str="all") -> gpd.GeoDataFrame:
    """
    Read the OSM protobuf file at the given path and extract from it nodes
    of the given network type.
    Return the nodes as a GeoDataFrame with the columns 'id' (OSM ID), 'geometry'.
    Uses Pyrosm and can be slow at around 40 seconds for Helsinki.
    """
    osm = pr.OSM(str(osm_pbf_path))
    return osm.get_network(network_type="all", nodes=True)


In [132]:
def isochrone_g(
    transport_network: r5.TransportNetwork,
    transport_modes: list[r5.TransportMode],
    origins: gpd.GeoDataFrame,
    time_bounds: list[float],
    grid: gpd.GeoDataFrame,
    departure: dt.datetime|None=None,
    snap_to_network: bool|int=False,
    **kwargs: dict,
) -> gpd.GeoDataFrame:
    """
    Return a GeoDataFrame of isochrones (polygons) from the given origins and 
    of the given time bounds in minutes.
    Use the given transport nework, transport modes, and departure datetime, 
    the latter of which defaults to the current datetime.
       
    Further customise the isochrone calculation as follows.
    
    - Snap the origin points to the street network before routing if and only if ``snap_to_network``
      If ``True``, the default search radius 
      (defined in com.conveyal.r5.streets.StreetLayer.LINK_RADIUS_METERS) is used; 
      if int, then use that many meters as the search radius for snapping.
    - Pass in any keyword arguments accepted by :class:`r5py.RegionalTask`, 
      e.g. `departure_time_window`, `percentiles`, `max_time_walking`.
    
    """
    time_bounds = sorted(set(time_bounds))
    
    # Use the grid to get destination points
    logger.info("Make destinations")
    destinations = grid.assign(geometry=lambda x: x.representative_point())

    # Compute travel times
    logger.info("Compute travel times")
    ttm = r5.TravelTimeMatrixComputer(
        transport_network,
        origins=origins,
        destinations=destinations,
        departure=departure,
        transport_modes=transport_modes,
        snap_to_network=snap_to_network,
        max_time=dt.timedelta(seconds=(time_bounds[-1] + 5)* 60),  # Prune search tree
        **kwargs,
    )
    f = (
        ttm.compute_travel_times()
        .dropna()
        .rename(columns={"travel_time": "travel_time_p50"})
        # Melt in case of multiple travel time percentiles
        .melt(id_vars=["from_id", "to_id"], var_name="pctile", value_name="travel_time")
        .assign(pctile=lambda x: x["pctile"].str.split("_").str[-1])
    )
    if f.empty:
        return gpd.GeoDataFrame()


    # Build isochrones from grid cells with reachable points
    logger.info("Build isochrones")
    records = []
    for (from_id, pctile), group in f.groupby(["from_id", "pctile"]):
        for time_bound in time_bounds:
            iso = (
                grid
                .merge(
                    group
                    .loc[lambda x: x["travel_time"] <= time_bound]
                    .rename(columns={"to_id": "id"})
                )
                .dissolve()
            )
            records.append(
                {
                    "from_id": from_id,
                    "travel_time_percentile": pctile,
                    "time_bound": time_bound,
                    "geometry": iso["geometry"].iat[0],
                }
            )

    logger.info("Build GeoDataFrame")
    return gpd.GeoDataFrame(pd.DataFrame.from_records(records), crs=WGS84)

def fix_isochrone_g(isos: gpd.GeoDataFrame, meters_crs, buffer=10) -> gpd.GeoDataFrame:
    """
    Get rid of minor non-overlaps in cells.
    """
    return (            
        isos
        .to_crs(meters_crs)
        .assign(geometry=lambda x: x.buffer(buffer))
        .dissolve(by=["from_id", "travel_time_percentile", "time_bound"])
        .reset_index()
        .assign(geometry=lambda x: x.buffer(-buffer))
        .to_crs(isos.crs)
    )
    
def isochrone_ch(
    transport_network: r5.TransportNetwork,
    transport_modes: list[r5.TransportMode],
    origins: gpd.GeoDataFrame,
    time_bounds: list[float],
    departure: dt.datetime|None=None,
    snap_to_network: bool|int=False,
    sample_frac: float=0.8,
    concave_hull_ratio=0.15,
    **kwargs: dict,
) -> gpd.GeoDataFrame:
    """
    Return a GeoDataFrame of isochrones (polygons) from the given origins and 
    of the given time bounds in minutes.
    Use the given transport nework, transport modes, and departure datetime, 
    the latter of which defaults to the current datetime.
       
    Further customise the isochrone calculation as follows.
    
    - Use a random sample of ``sample_frac`` of all the underlying OSM nodes as potential destinations.
    - When making the isochrones using concave hulls of reachable points, use the given concave hull ratio.
    - Snap the origin points to the street network before routing if and only if ``snap_to_network``
      If ``True``, the default search radius 
      (defined in com.conveyal.r5.streets.StreetLayer.LINK_RADIUS_METERS) is used; 
      if int, then use that many meters as the search radius for snapping.
    - Pass in any keyword arguments accepted by :class:`r5py.RegionalTask`, 
      e.g. `departure_time_window`, `percentiles`, `max_time_walking`.
    
    """
    time_bounds = sorted(set(time_bounds))
    
    # Use a random sample of network nodes as destination points
    logger.info("Get OSM nodes for destinations destinations")
    osm_nodes = get_osm_nodes(transport_network).sample(frac=sample_frac, random_state=1)

    # Compute travel times
    logger.info("Compute travel times")
    ttm = r5.TravelTimeMatrixComputer(
        transport_network,
        origins=origins,
        destinations=osm_nodes,
        departure=departure,
        transport_modes=transport_modes,
        snap_to_network=snap_to_network,
        max_time=dt.timedelta(seconds=(time_bounds[-1] + 5)* 60),  # Prune search tree
        **kwargs,
    )
    f = ttm.compute_travel_times().dropna()
    if f.empty:
        return gpd.GeoDataFrame()

    # Build isochrones as concave hulls of reachable points
    logger.info("Build isochrones")
    records = []
    for from_id, group in f.groupby("from_id"):
        for time_bound in time_bounds:
            reachable_nodes = osm_nodes.merge(
                group
                .loc[lambda x: x["travel_time"] <= time_bound]
                .rename(columns={"to_id": "id"})
            )
            iso = shapely.concave_hull(reachable_nodes.unary_union, ratio=concave_hull_ratio)
            records.append(
                {
                    "origin_id": from_id,
                    "time_bound": time_bound,
                    "geometry": iso,
                }
            )

    logger.info("Build GeoDataFrame")
    return gpd.GeoDataFrame(pd.DataFrame.from_records(records), crs=WGS84)
  

# def isochrone_be(
#     transport_network: r5.TransportNetwork,
#     transport_modes: list[r5.TransportMode],
#     osm_pbf_path: pl.Path,
#     origins: gpd.GeoDataFrame,
#     time_bounds: list[float],
#     meters_crs:str,
#     buffer:float=10,
#     simplify:float=0,
#     osm_nodes: gpd.GeoDataFrame|None=None,
#     osm_edges: gpd.GeoDataFrame|None=None,
#     departure: dt.datetime|None=None,
#     snap_to_network:bool|int=False,
#     **kwargs: dict,
# ) -> gpd.GeoDataFrame:
#     """
#     Return a GeoDataFrame of isochrones (polygons) from the given origins and 
#     of the given time bounds in minutes.
#     Use the given transport nework, transport modes, the path to OSM protobuf file
#     underlying the transport network, and departure datetime, 
#     the latter of which defaults to the current datetime.

#     Further customise the isochrone calculation as follows.
    
#     - Use a random sample of ``sample_frac` `of all transport network nodes as potential destinations.
#     - Snap the origin points to the street network before routing if and only if ``snap_to_network``
#       If ``True``, the default search radius 
#       (defined in com.conveyal.r5.streets.StreetLayer.LINK_RADIUS_METERS) is used; 
#       if int, then use that many meters as the search radius for snapping.
#     - Pass in any keyword arguments accepted by :class:`r5py.RegionalTask`, 
#       e.g. `departure_time_window`, `percentiles`, `max_time_walking`.
    
#     """
#     time_bounds = sorted(set(time_bounds))
    
#     # Use a random sample of network nodes as destination points
#     logger.info("Get OSM nodes and edges")
#     if osm_nodes is None or osm_edges is None:
#         osm_nodes, osm_edges = get_osm_nodes_and_edges(osm_pbf_path)

#     osm_nodes = osm_nodes.filter(["id", "geometry"])
#     osm_edges = (
#         osm_edges
#         .filter(["v", "geometry"])
#         .rename(columns={"v": "id"})
#     )

#     # Compute travel times
#     logger.info("Compute travel times")
#     ttm = r5.TravelTimeMatrixComputer(
#         transport_network,
#         origins=origins,
#         destinations=osm_nodes,
#         departure=departure,
#         transport_modes=transport_modes,
#         snap_to_network=snap_to_network,
#         max_time=dt.timedelta(seconds=time_bounds[-1] * 60),
#         **kwargs,
#     )
#     bins = [0] + time_bounds
#     labels = time_bounds
#     f = ttm.compute_travel_times().dropna()

#     if f.empty:
#         return gpd.GeoDataFrame()

#     # Build isochrones as buffered edges
#     logger.info("Build isochrones")
#     records = []
#     for from_id, group in f.groupby("from_id"):
#         for time_bound in time_bounds:
#             reachable_nodes = osm_nodes.merge(
#                 group
#                 .loc[lambda x: x["travel_time_bin"] <= time_bound]
#                 .rename(columns={"to_id": "id"})
#             )
#             edge_blob = (
#                 osm_edges
#                 .merge(reachable_nodes.filter(["id"]))
#                 .to_crs(meters_crs)
#                 .unary_union
#             )
#             records.append(
#                 {
#                     "origin_id": from_id,
#                     "time_bound": time_bound,
#                     "geometry": edge_blob,
#                 }
#             )

#     logger.info("Build GeoDataFrame")
#     return (
#         gpd.GeoDataFrame(pd.DataFrame.from_records(records), crs=meters_crs)
#         .assign(geometry=lambda x: x.buffer(buffer).simplify(simplify))
#         .to_crs("epsg:4326")
#     )
  

In [43]:
%%time

# Make a hexagon grid of circumradius 100m covering the study area

study_area = gpd.GeoDataFrame(geometry=[transport_network.extent], crs=WGS84)
grid = (
    ghg.make_grid(study_area.to_crs(NZTM), 100, intersect=True)
    # Clip to land
    .clip(gpd.read_file(DATA_DIR / "auckland.gpkg"))
    .to_crs(WGS84)
)
display(grid.head())
grid.iloc[:100].explore()

Unnamed: 0,cell_id,geometry
508,1181328080,"POLYGON ((174.93686 -37.15165, 174.93573 -37.1..."
690,1181428080,"POLYGON ((174.93853 -37.15085, 174.93740 -37.1..."
689,1181228081,"POLYGON ((174.93515 -37.15090, 174.93402 -37.1..."
873,1181528080,"POLYGON ((174.94019 -37.15004, 174.93907 -37.1..."
872,1181328081,"POLYGON ((174.93682 -37.15009, 174.93569 -37.1..."


CPU times: user 1min 6s, sys: 160 ms, total: 1min 6s
Wall time: 1min 9s


In [138]:
# Get some origin points

origins = gpd.read_file(DATA_DIR / "auckland_points.geojson").assign(id=lambda x: x.index)
tmp = (
    grid
    .sample(100)
    .assign(geometry=lambda x: x.representative_point())
    .rename(columns={"cell_id": "id"})
)
origins = pd.concat([origins, tmp])

display(origins)
display(origins.assign(geometry=lambda x: x.buffer(0.001)).explore())


Unnamed: 0,geometry,id
0,POINT (174.79458 -36.82961),0
1,POINT (174.77850 -36.89609),1
2,POINT (174.89767 -36.88134),2
87710,POINT (175.00915 -36.77579),1186228295
64645,POINT (174.56422 -36.88083),1159628365
...,...,...
40914,POINT (174.93861 -36.97756),1181728189
65072,POINT (174.77618 -36.87629),1172228303
1993,POINT (175.03402 -37.14300),1187128055
94167,POINT (174.68931 -36.75266),1167228408



  display(origins.assign(geometry=lambda x: x.buffer(0.001)).explore())


In [143]:
%%time

# Can do 100 origins in 3 minutes
isos = (
    isochrone_g(
        transport_network=transport_network, 
        transport_modes=transport_modes,
        origins=origins,
        departure=dt.datetime(2023, 8, 28, 8, 0, 0),
        time_bounds=[45],
        grid=grid.rename(columns={"cell_id": "id"}),
        departure_time_window=dt.timedelta(seconds=35*60),
        percentiles=[1],
    )
    .pipe(fix_isochrone_g, NZTM)
)

display(isos.head())
display(isos.iloc[:5].explore(column="from_id", categorical=True, cmap="viridis"))

[32m2023-11-03 15:34:45.594[0m | [1mINFO    [0m | [36m__main__[0m:[36misochrone_g[0m:[36m30[0m - [1mMake destinations[0m
[32m2023-11-03 15:34:47.488[0m | [1mINFO    [0m | [36m__main__[0m:[36misochrone_g[0m:[36m34[0m - [1mCompute travel times[0m
[32m2023-11-03 15:37:41.274[0m | [1mINFO    [0m | [36m__main__[0m:[36misochrone_g[0m:[36m58[0m - [1mBuild isochrones[0m
[32m2023-11-03 15:37:51.467[0m | [1mINFO    [0m | [36m__main__[0m:[36misochrone_g[0m:[36m80[0m - [1mBuild GeoDataFrame[0m


CPU times: user 4min 11s, sys: 1.52 s, total: 4min 13s
Wall time: 3min 10s


Unnamed: 0,from_id,travel_time_percentile,time_bound,geometry
0,0,p1,45,"MULTIPOLYGON (((174.75959 -36.78911, 174.75959..."
1,1,p1,45,"MULTIPOLYGON (((174.86174 -36.91167, 174.86174..."
2,2,p1,45,"MULTIPOLYGON (((174.85829 -36.90938, 174.85829..."
3,1152828388,p1,45,"POLYGON ((174.44880 -36.88854, 174.44880 -36.8..."
4,1153028506,p1,45,"POLYGON ((174.44886 -36.71368, 174.44887 -36.7..."
