This notebook contains code on collecting slope from NASA DEM for the generated grids and then calculating the slope value for each 100m x 100m grid. The developed code relies on google earth engine for processing and google cloud for intermediate storage of data.

In [None]:
import ee
import geopandas as gpd
import rasterio
from rasterio.windows import from_bounds
import os
import numpy as np
import logging
from multiprocess import Pool
import glob
from functools import partial

### data collection

In [None]:
# set relative path for grid files
grid_path = 'data/*.parquet'

# create a list of all parquet grid files from the specified directory
grids_list = [parquet for parquet in glob.glob(grid_path)]

# authenticate and Initialize Google Earth Engine
ee.Authenticate()
ee.Initialize()

In [None]:
# access NASA DEM data and create slope
dataset = ee.Image('NASA/NASADEM_HGT/001')
elevation = dataset.select('elevation')
slope = ee.Terrain.slope(elevation)

In [None]:
# Function to define and start export tasks
def export_slope(grid_path):
    """
    Export slope images for each grid using Google Earth Engine.
    
    Args:
    - grid_path (str): Path to the grid file.
    """
    grid_number = grid_path.split('_')[-1].split('.')[0]
    img_name = f'slope_30m_{grid_number}.tif'

    grid_data = gpd.read_parquet(grid_path)
    extent = grid_data.to_crs('epsg:4326').total_bounds

    ee_bounds = ee.Geometry.Polygon([
        [[extent[0], extent[1]],
         [extent[0], extent[3]],
         [extent[2], extent[3]],
         [extent[2], extent[1]]]
    ])

    export_params = {
        'image': slope,
        'bucket': 'cog-bucket-test',    # your bucket name
        'description': f'slope_30m_{grid_number}',
        'fileNamePrefix': f'eu_slope/slope_30m_{grid_number}',
        'scale': 30,
        'region': ee_bounds.getInfo()['coordinates'],
        'crs': 'EPSG:3035',
        'fileFormat': 'GeoTIFF',
        'formatOptions': {'cloudOptimized': True},
        'maxPixels': 1e12,
    }

    task = ee.batch.Export.image.toCloudStorage(**export_params)
    task.start()

In [None]:
# sequential
for elem in grids_list:
    export_slope(elem)

### calculate slope for 100m x 100m grids

In [None]:
# set the environment variable for Google Cloud credentials
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'path/to/your/credentials.json'

# list to hold the paths of the parquet files
grids_list = []
for parquet_file in glob.glob('data/*.parquet'):
    grids_list.append(parquet_file)

# # configure logging (recommended if you monitor processing over a lot of files)
# log_path = 'logs/slope.log'

# # ensure log directory exists
# log_dir = os.path.dirname(log_path)
# if not os.path.exists(log_dir):
#     os.makedirs(log_dir)
    
# logging.basicConfig(filename=log_path, level=logging.INFO,
#                     format='%(asctime)s:%(levelname)s:%(message)s', force=True)

In [None]:
def calculate_slope(row, src):
    """
    Calculate the mean slope from a raster for a given polygon in a GeoDataFrame.
    
    Args:
    - row (GeoSeries): A GeoDataFrame row representing a polygon.
    - src (rasterio.io.DatasetReader): Open raster source to read data from.
    
    Returns:
    - float: Mean slope within the polygon.
    """
    left, bottom, right, top = row.geometry.bounds
    window = from_bounds(left, bottom, right, top, src.transform)
    data = src.read(window=window)
    mean_slope = np.nanmean(data[0])
    return mean_slope

def process_grid(grid_path):
    """
    Process a single grid file to calculate and update slope data.
    
    Args:
    - grid_path (str): Path to the grid file.
    """
    try:
        grid_number = grid_path.split('_')[-1].split('.')[0]
        grid_gdf = gpd.read_parquet(grid_path)

        if 'slope' in grid_gdf.columns:
            logging.info(f'Skipping grid {grid_number} as slope already calculated')
            return

        cog_path = f'gs://cog-bucket-test/eu_slope/slope_30m_{grid_number}.tif'
        logging.info(f'Started processing grid {grid_number} at {cog_path}')

        with rasterio.open(cog_path) as src:
            if grid_gdf.crs != src.crs:
                grid_gdf = grid_gdf.to_crs(src.crs)
            calculate_with_src = partial(calculate_slope, src=src)
            grid_gdf['slope'] = grid_gdf.apply(lambda row: calculate_with_src(row), axis=1)

        grid_gdf.to_parquet(grid_path)
        logging.info(f'Successfully processed grid {grid_path}')
    except Exception as e:
        logging.error(f'Error processing grid {grid_path}: {e}')

In [None]:
# sequential
for elem in grids_list:
    process_grid(elem)

# # parallel processing setup
# num_processes = 5
# with Pool(processes=num_processes) as pool:
#     pool.map(process_grid, grids_list)