This notebook contains code on collecting street intersection data from OSM for the generated grids and then calculating the number of street intersections (edges>=3) for each 100m x 100m grid

In [None]:
import geopandas as gpd
import osmnx as ox
import os
import glob
import logging
from multiprocess import Pool
import gc

In [None]:
# set path for grid files
# you might have grids generated here from the 'generate_grids.ipynb'
grid_path = 'data/*.parquet'

# create a list of all parquet grid files from the specified directory
grids_list = [parquet for parquet in glob.glob(grid_path)]
print(grids_list)

# # configure logging (recommended if you monitor processing over a lot of files)
# log_path = 'logs/street_intersections.log'

# # ensure log directory exists
# log_dir = os.path.dirname(log_path)
# if not os.path.exists(log_dir):
#     os.makedirs(log_dir)
    
# logging.basicConfig(filename=log_path, level=logging.INFO,
#                     format='%(asctime)s:%(levelname)s:%(message)s', force=True)

In [None]:
def process_grid(grid_path):
    """
    Processes each grid to count street intersections within it.
    
    Args:
    - grid_path (str): Path to the grid file.
    """
    try:
        ox.settings.log_console = True
        grid_number = grid_path.split('_')[-1].split('.')[0]

        grid_gdf = gpd.read_parquet(grid_path)
        output_dir = os.path.join(os.path.dirname(grid_path), 'street_intersection_data')
        output_file = f'street_intersections_{grid_number}.parquet'

        os.makedirs(output_dir, exist_ok=True)

        if os.path.exists(os.path.join(output_dir, output_file)):
            logging.info(f'Skipping {output_file} as it already exists')
            return

        if 'index' not in grid_gdf.columns:
            grid_gdf.reset_index(inplace=True)

        if 'num_street_intersections' in grid_gdf.columns:
            grid_gdf.drop(columns=['num_street_intersections'], inplace=True)

        grid_gdf_4326 = grid_gdf.to_crs('epsg:4326')
        west, south, east, north = grid_gdf_4326.total_bounds
        # logging.info(f'Started processing grid {grid_number}')
        print(f'Started processing grid {grid_number}')

        G = ox.graph_from_bbox(bbox=(north, south, east, west), network_type='walk', simplify=False, retain_all=True)
        gdf_nodes = ox.convert.graph_to_gdfs(G, nodes=True, edges=False)
        gdf_nodes = gdf_nodes[gdf_nodes.street_count >= 3]
        gdf_nodes.to_crs(grid_gdf.crs, inplace=True)
        
        gdf_nodes.to_parquet(os.path.join(output_dir, output_file))
        # logging.info(f'Saved intersection data to {output_file}')
        print(f'Saved intersection data to {output_file}')

        joined = gpd.sjoin(grid_gdf, gdf_nodes, how='left', predicate='intersects')
        node_counts_per_grid = joined.groupby('index')['index_right'].nunique().reset_index(name='num_street_intersections')
        grid_gdf_f = grid_gdf.merge(node_counts_per_grid, on='index', how='left')
        grid_gdf_f.to_parquet(grid_path)
        
        # logging.info(f'Successfully processed grid {grid_path}')
        print(f'Successfully processed grid {grid_path}')
        
        del gdf_nodes, grid_gdf, grid_gdf_f
        gc.collect()
    except Exception as e:
        # logging.error(f'Error processing grid {grid_path}: {e}')
        print(f'Error processing grid {grid_path}: {e}')

In [None]:
# sequential
for elem in grids_list:
    process_grid(elem)

# # parallel processsing if you want to process a lot of files
# num_processes = 5

# with Pool(processes=num_processes) as pool:
#     pool.map(process_grid, grids_list)