This notebook shows how to generate 100m x 100m grids from the eurostat 100km x 100km grid file

In [None]:
import geopandas as gpd
import os
from shapely.geometry import Polygon
from multiprocessing import Pool

In [None]:
# function to generate sub-grids within a given boundary
def create_grid(bounds, x_offset, y_offset, crs):
    """
    Creates a grid of polygons within the specified bounds with given x and y offsets.

    Args:
        bounds (dict): A dictionary containing the minx, miny, maxx, and maxy of the bounding box.
        x_offset (float): The horizontal distance between each grid line.
        y_offset (float): The vertical distance between each grid line.
        crs (str): The coordinate reference system to use for the resulting GeoDataFrame.

    Returns:
        GeoDataFrame: A GeoDataFrame containing polygons for each grid cell.
    """
    minx, miny, maxx, maxy = bounds['minx'], bounds['miny'], bounds['maxx'], bounds['maxy']
    grid_rows = []

    # calculate the number of cells in each direction
    x_cells = int((maxx - minx) / x_offset)
    y_cells = int((maxy - miny) / y_offset)

    # create grid cells
    for i in range(x_cells):
        for j in range(y_cells):
            cell_minx = minx + i * x_offset
            cell_miny = miny + j * y_offset
            cell_maxx = cell_minx + x_offset
            cell_maxy = cell_miny + y_offset

            grid_cell = Polygon([
                (cell_minx, cell_miny),
                (cell_maxx, cell_miny),
                (cell_maxx, cell_maxy),
                (cell_minx, cell_maxy)
            ])

            grid_rows.append({'geometry': grid_cell})

    # create GeoDataFrame from grid cells
    grid_gdf = gpd.GeoDataFrame(grid_rows, crs=crs)
    return grid_gdf

In [None]:
# function to create and assign sub-grids to parquet files
def create_and_assign_grid(index, row, x_offset, y_offset, crs):
    """
    Creates sub-grids for a given row and saves them to a file.

    Args:
        index (int): Index of the row.
        row (GeoSeries): Row containing geometry data.
        x_offset (float): Horizontal grid size.
        y_offset (float): Vertical grid size.
        crs (str): Coordinate reference system.

    Returns:
        tuple: The generated grid GeoDataFrame and the grid ID.
    """
    output_file = f'data/grids_100_{index+1}.parquet'

    if os.path.exists(output_file):
        print(f"Skipping grid {index+1} as it already exists.")
    else:
        print(f"Processing grid {index+1}...")
        bounds = {
            'minx': row.geometry.bounds[0],
            'miny': row.geometry.bounds[1],
            'maxx': row.geometry.bounds[2],
            'maxy': row.geometry.bounds[3]
        }
        grid_gdf = create_grid(bounds, x_offset, y_offset, crs)
        grid_gdf['grid_100000_id'] = index + 1
        grid_gdf.to_parquet(output_file)

    return grid_gdf, index + 1

In [None]:
# read Eurostat 100km grid file: https://gisco-services.ec.europa.eu/grid/GISCO_grid_metadata.pdf 
eu_gdf = gpd.read_file('data/grid_100km_surf.gpkg')

# grid parameters
crs = eu_gdf.crs
x_offset = 100
y_offset = 100

# prepare data for parallel processing
# this is conditioned by index=610 which is an area that covers part of the Netherlands
# you can remove the if condition if you want to generate 100mx100m grids for all
# 100kmx100km grids
# the index here starts from 0 and in the func 'create_and_assign_grid' we add +1
# so that the files are named from 1 (personal preference)
data = [(index, row, x_offset, y_offset, crs) 
        for index, row in eu_gdf.iterrows() if index==610]

# ensure the output directory exists
output_directory = 'data'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# for testing as we are only generating it for one grid,
# we can process it in sequence
for elem in data:
    create_and_assign_grid(*elem)
    
# # or process grids in parallel
# with Pool() as pool:
#     results = pool.starmap(create_and_assign_grid, data)