This notebook contains code to generate isochrones for 100m x 100m grids using Valhalla's open source routing engine

In [None]:
import geopandas as gpd
from shapely.geometry import Polygon, box, shape
import requests
import subprocess
import os
import logging
import shutil
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import glob
import pickle

In [None]:
# set path for grid files
# you might have grids generated here from the 'generate_grids.ipynb'
grid_path = 'data/*.parquet'

# create a list of all parquet grid files from the specified directory
grids_list = [parquet for parquet in glob.glob(grid_path)]
print(grids_list)

# # configure logging (recommended if you monitor processing over a lot of files)
# log_path = 'logs/isochrones.log'

# # ensure log directory exists
# log_dir = os.path.dirname(log_path)
# if not os.path.exists(log_dir):
#     os.makedirs(log_dir)
    
# logging.basicConfig(filename=log_path, level=logging.INFO,
#                     format='%(asctime)s:%(levelname)s:%(message)s', force=True)

In [None]:
# # file to store failed indices
# failed_indices_file = './failed_isochrones_indices.pkl'

# # load existing failed indices if the file exists
# try:
#     with open(failed_indices_file, 'rb') as f:
#         failed_indices_dict = pickle.load(f)
# except FileNotFoundError:
#     failed_indices_dict = {}

In [None]:
def wait_for_tar_file(file_path, timeout=3600, check_interval=5):
    start_time = time.time()
    while True:
        if os.path.exists(file_path):
            print(f"File found: {file_path}")
            return True
        elif (time.time() - start_time) > timeout:
            print(f"Timeout: File {file_path} not found within {timeout} seconds.")
            return False
        else:
            # print(f"File {file_path} not found, checking again in {check_interval} seconds...")
            time.sleep(check_interval)


def get_isochrone(lat, lon, duration=15):
    """
    Make an HTTP POST request to retrieve isochrone data for given coordinates.
    """
    url = 'http://localhost:8002/isochrone'
    headers = {'Content-Type': 'application/json'}
    data = {
        "locations": [{"lat": lat, "lon": lon}],
        "costing": "pedestrian",
        "contours": [{"time": duration}],
        "polygons": True
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code == 200:
        # Convert the GeoJSON response to a Shapely geometry
        geojson = response.json()
        geometry = shape(geojson['features'][0]['geometry'])
        return geometry
    else:
        return Polygon()

def send_isochrone_request(row):
    if row.street_walk_length != 0:
        return get_isochrone(row.centroid.y, row.centroid.x)
    else:
        return Polygon()

def save_failed_indices(grid_id, failed_indices):
    """
    Save failed indices for a grid to the pickle file.

    Args:
    - grid_id: The ID of the grid being processed.
    - failed_indices: List of indices that failed.
    """
    global failed_indices_dict
    if grid_id not in failed_indices_dict:
        failed_indices_dict[grid_id] = []
    failed_indices_dict[grid_id].extend(failed_indices)

    # remove duplicates
    failed_indices_dict[grid_id] = list(set(failed_indices_dict[grid_id]))

    # save to file
    with open(failed_indices_file, 'wb') as f:
        pickle.dump(failed_indices_dict, f)

def process_isochrones(gdf):
    """
    Process isochrones for a given GeoDataFrame.

    Args:
    - gdf: GeoDataFrame containing grid data.

    Returns:
    - dict: A dictionary of isochrones indexed by grid index.
    """
    isochrones = {}
    failed_indices = []

    with ThreadPoolExecutor(max_workers=6) as executor:
        future_to_index = {executor.submit(send_isochrone_request, row): index for index, row in gdf.iterrows()}
        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                isochrones[index] = future.result()
            except Exception as e:
                logging.error(f"Request failed for index {index}: {e}")
                # record the failed index and return an empty Polygon for it
                failed_indices.append(index)
                isochrones[index] = Polygon()

    # log failed indices for the grid
    if failed_indices:
        grid_id = str(gdf['grid_100000_id'].unique()[0])
        save_failed_indices(grid_id, failed_indices)
        logging.info(f"Failed indices for grid {grid_id} saved to {failed_indices_file}")

    return isochrones

In [None]:
europe_osm_pbf_path = '/path/to/europe-latest.osm.pbf'
europe_bbox_shp_path = '/path/to/osm_eu_bbox.shp'

def process_grid(grid_path):
    """
    Processes each grid to generate isochrones.
    
    Args:
    - grid_path (str): Path to the grid file.
    """
    grid_gdf = gpd.read_parquet(grid)
    grid_id = str(grid_gdf['grid_100000_id'].unique()[0])
    
    if 'street_walk_length' not in grid_gdf.columns:
        logging.info(f'skipping grid {grid_id} as no street walk length')
        print(f'skipping grid {grid_id} as no street walk length')
        return

    try:
        # define output directory and file
        output_dir = 'data/isochrones'
        os.makedirs(output_dir, exist_ok=True)
        output_file = f'isochrones_{grid_id}.parquet'

        # here also check from failed dict indices and filter the gdf accordingly?
        if os.path.exists(os.path.join(output_dir, output_file)) and grid_id not in failed_indices_dict.keys():
            logging.info(f'skipping grid {grid_id} as isochrone already calculated')
            print(f'skipping grid {grid_id} as isochrone already calculated')
            return
            
        if grid_id in failed_indices_dict.keys():
            grid_gdf = grid_gdf.loc[failed_indices_dict[grid_id]]
            output_file = f'isochrones_{grid_id}_failed_indices.parquet'
    
        # make a buffer to cut europe.osm.pbf
        bounds_grid_gdf = grid_gdf.total_bounds
        bounds_box = box(bounds_grid_gdf[0], bounds_grid_gdf[1], bounds_grid_gdf[2], bounds_grid_gdf[3])
        buff_bounds_box = bounds_box.buffer(3000)
    
        buff_gdf = gpd.GeoDataFrame(geometry=[buff_bounds_box], crs=grid_gdf.crs)
        buff_gdf_4326 = buff_gdf.to_crs('epsg:4326')
        buff_bounds_4326 = buff_gdf_4326.total_bounds
        
        # check if grid_gdf is within the osm europe polygon
        osm_eu_bbox_gdf = gpd.read_file(europe_bbox_shp_path)
        if osm_eu_bbox_gdf.geometry.values[0].contains(buff_gdf_4326.geometry.values[0]):
            # osmium extract pbf
            os.makedirs(f'data/custom_files_{grid_id}')
            logging.info(f'started processing grid {grid_id}...')
            print(f'started processing grid {grid_id}...')
            
            clipped_pbf_path = f'data/custom_files_{grid_id}/{grid_id}.pbf'
            bbox = f"{buff_bounds_4326[0]},{buff_bounds_4326[1]},{buff_bounds_4326[2]},{buff_bounds_4326[3]}"
            osmium_result = subprocess.run([
                                            'osmium', 'extract',
                                            f'-b{bbox}',
                                            europe_osm_pbf_path,
                                            '-o', clipped_pbf_path
                                        ], check=True)
        
            if osmium_result.returncode == 0:
                logging.info(f'successfully clipped osm.pbf for grid {grid_id}; initiating docker...')
                print(f'successfully clipped osm.pbf for grid {grid_id}; initiating docker...')
        
                # run docker
                custom_files_path = f'/Volumes/ssd1/custom_files_{grid_id}:/custom_files'
                docker_result = subprocess.run([
                                                'docker', 'run', '-dt', '--name', f'valhalla_{grid_id}',
                                                '-p', '8002:8002',
                                                '-v', custom_files_path,
                                                'ghcr.io/gis-ops/docker-valhalla/valhalla:latest'
                                            ], check=True)
        
                if docker_result.returncode == 0:
                    # wait for .tar file
                    tar_file = f'data/custom_files_{grid_id}/valhalla_tiles.tar'
                    if wait_for_tar_file(tar_file):
                        logging.info(f'successfully created valhalla tiles for grid {grid_id}; sending isochrone reqs...')
                        print(f'successfully created valhalla tiles for grid {grid_id}; sending isochrone reqs...')
        
                        grid_gdf['centroid'] = grid_gdf.centroid
                        grid_gdf = grid_gdf.set_geometry('centroid')
                        grid_gdf_4326 = grid_gdf.to_crs('epsg:4326')
                        
                        start_time = time.time()  # start timing
                        
                        # start processing isochrones
                        isochrones = process_isochrones(gdf=grid_gdf_4326)
                        
                        end_time = time.time()  # end timing
                        elapsed_time = end_time - start_time
                        logging.info(f'isochrone calc for grid {grid_id} took {elapsed_time} seconds')
                        
                        # clean up
                        subprocess.run(['docker', 'stop', f'valhalla_{grid_id}'], check=True)
                        subprocess.run(['docker', 'rm', f'valhalla_{grid_id}'], check=True)
                        subprocess.run(['rm', '-rf', f'data/custom_files_{grid_id}'], check=True)
                        
                        # this assertion wont work for the grids in the failed_indices_dict!
                        # assert len(isochrones) == 1_000_000, 'length of isochrones is not equal to the length of grid_gdf'
                
                        # save isochornes
                        iso_gdf = gpd.GeoDataFrame({'geometry': [isochrones[k] for k in sorted(isochrones.keys())]},
                                                   index=sorted(isochrones.keys()))
        
                        iso_gdf.set_crs('epsg:4326', inplace=True)
                        iso_gdf.to_crs('epsg:3035', inplace=True)
                        iso_gdf.reset_index(inplace=True)
                        iso_gdf.to_parquet(os.path.join(output_dir, output_file))
        
                        logging.info(f'successfully generated isochrones for grid {grid_id}; cleaning up...')
                        print(f'successfully generated isochrones for grid {grid_id}; cleaning up...')

                    else:
                        logging.error(f'failed to create valhalla tiles for grid {grid_id}')
                        print(f'failed to create valhalla tiles for grid {grid_id}')
            else:
                logging.error(f'osmium extraction error for grid {grid_id}')
                print(f'osmium extraction error for grid {grid_id}')
                return
        else:
            logging.info(f'grid {grid_id} is outside osm eu bbox; skipping...')
            print(f'grid {grid_id} is outside osm eu bbox; skipping...')
            return
    except Exception as e:
        logging.error(f'Error processing grid {grid_id}: {e}')
        print(f'Error processing grid {grid_id}: {e}')

In [None]:
# sequential
for elem in grids_list:
    process_grid(elem)