This notebook contains code on how to convert the isochrone polygons (and other component data) to arrays, which can be then processed parallely on GPU for stuff like weighted distance decay.

In [None]:
import geopandas as gpd
import pandas as pd
from geocube.api.core import make_geocube
import numpy as np
import rasterio
import os
from multiprocess import Pool

In [None]:
# set path for grid files
# you might have grids generated here from the 'generate_grids.ipynb'
grid_path = 'data/*.parquet'

# create a list of all parquet grid files from the specified directory
grids_list = [parquet for parquet in glob.glob(grid_path)]
print(grids_list)

# # configure logging (recommended if you monitor processing over a lot of files)
# log_path = 'logs/isochrones_to_gpu.log'

# # ensure log directory exists
# log_dir = os.path.dirname(log_path)
# if not os.path.exists(log_dir):
#     os.makedirs(log_dir)
    
# logging.basicConfig(filename=log_path, level=logging.INFO,
#                     format='%(asctime)s:%(levelname)s:%(message)s', force=True)

In [None]:
def find_neighbors(grid_gdf):
    """
    Find neighbors of a grid cell by buffering its boundaries and identifying intersecting grids.
    
    Args:
    - grid_gdf (GeoDataFrame): GeoDataFrame of the grid to find neighbors for.
    
    Returns:
    - List[GeoDataFrame]: List of GeoDataFrames of the neighboring grid cells.
    """
    grid_id = grid_gdf['grid_100000_id'].unique()
    buffer = grid_gdf.unary_union.buffer(3000, cap_style=3)
    potential_neighbors = grid_100km[grid_100km.intersects(buffer)]
    
    nbr_gdfs = []
    for neighbor_id in set(potential_neighbors.index) - set(grid_id):
        nbr_path = f'data/grids_100_{neighbor_id}.parquet'
        temp_nbr_gdf = gpd.read_parquet(nbr_path)
        temp_nbr_gdf = temp_nbr_gdf[temp_nbr_gdf.intersects(buffer)]
        nbr_gdfs.append(temp_nbr_gdf)
    
    return nbr_gdfs

def rasterize_geodf(geodf, columns, resolution):
    """
    Rasterizes specified columns of a GeoDataFrame.
    
    Args:
    - geodf (GeoDataFrame): GeoDataFrame to rasterize.
    - columns (List[str]): List of column names to rasterize.
    - resolution (tuple): The pixel resolution in the form of (width, height).
    
    Returns:
    - Tuple: A tuple containing the rasterized data array, transform, and CRS.
    """
    cube = make_geocube(vector_data=geodf, measurements=columns, resolution=resolution, output_crs="EPSG:3035")
    bands = [cube[col].values for col in columns]
    return np.stack(bands), cube.rio.transform(), cube.rio.crs

def process_grid(grid_path):
    """
    Process each grid to find neighbors, merge data, and create a raster image for GPU processing.
    
    Args:
    - grid_path (str): Path to the grid file.
    """
    grid_gdf = gpd.read_parquet(grid_path)
    grid_num = grid_gdf['grid_100000_id'].unique()[0]
    
    output_dir = 'data/iso_for_gpu'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'img_{grid_num}.tif'
    
    if os.path.exists(os.path.join(output_dir, output_file)):
        print(f'Skipping grid {grid_num} as iso_gpu already calculated')
        return
    
    nbr_gdfs = find_neighbors(grid_gdf=grid_gdf)
    new_grid_gdf = gpd.GeoDataFrame(pd.concat([grid_gdf] + nbr_gdfs, ignore_index=True))
    
    iso_path = f'data/isochrones/isochrones_{grid_num}.parquet'
    iso_gdf = gpd.read_parquet(iso_path)
    
    joined_gdf = iso_gdf.sjoin(new_grid_gdf, how='left', predicate='intersects')
    joined_gdf.drop(columns=['index_right'], inplace=True)
    
    columns = ['street_walk_length', 'num_street_intersections', 'ndvi', 'ent_5',
               'slope', 'population', 'pub_trans_count', 'index']
    raster_data, transform, crs = rasterize_geodf(new_grid_gdf, columns, resolution=(-100, 100))
    
    # Saving raster data
    with rasterio.open(os.path.join(output_dir, output_file), 'w', driver='GTiff',
                       height=raster_data.shape[1], width=raster_data.shape[2],
                       count=len(columns), dtype=raster_data.dtype,
                       crs=crs, transform=transform) as dst:
        for i in range(len(columns)):
            dst.write(raster_data[i], i+1)

In [None]:
# sequential
for elem in grids_list:
    process_grid(elem)

# # parallel
# num_processes = 5

# with Pool(processes=num_processes) as pool:
#     pool.map(process_grid, grids_list)