In [1]:
# import modules
import os
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.enums import Resampling
from multiprocessing import Pool
from functools import partial


In [2]:
# generated with chatGPT 3.5
# Function to clip raster by geometry
def clip_raster(raster_path, output_path, geometry):
    with rasterio.open(raster_path) as src:
        out_image, out_transform = mask(src, geometry, crop=True)
        out_meta = src.meta.copy()

        # Update metadata with new dimensions and transform
        out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

        # Write the clipped raster to disk
        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(out_image)

In [3]:
# import geopackage with aoi

# read polygon from file
gpkg = '../../area_mask/area_of_interest.gpkg'
polygon = gpd.read_file(gpkg, layer='aoi_extracted')

# check CRS
print('Project CRS: {}'.format(polygon.crs))

Project CRS: EPSG:4326


In [4]:
# Path to the folder containing GeoTIFF layers
folder_path = "../../data_env/chelsa/bio"

In [5]:
def clip_raster_parallel(file_name, folder_path, polygon_geometry):
    if file_name.endswith(".tif") or file_name.endswith(".tiff"):
        raster_path = os.path.join(folder_path, file_name)
        output_path = os.path.join(folder_path+"_clipped", "clipped_" + file_name)
        clip_raster(raster_path, output_path, polygon_geometry)
        print("Clipped", file_name)

In [6]:
%%time
if __name__ == "__main__":
    # Number of processes to run in parallel
    num_processes = os.cpu_count()

    # Create a pool of worker processes
    with Pool(processes=num_processes) as pool:
        # Partially apply the clip_raster_parallel function with fixed arguments
        clip_partial = partial(clip_raster_parallel, folder_path=folder_path, polygon_geometry=polygon.geometry)
        
        # Iterate over GeoTIFF files in the folder and map the clip_raster_parallel function to each file
        pool.map(clip_partial, os.listdir(folder_path))

Clipped CHELSA_bio2_1981-2010_V.2.1.tif
Clipped CHELSA_bio15_1981-2010_V.2.1.tif
Clipped CHELSA_bio10_1981-2010_V.2.1.tif
Clipped CHELSA_bio9_1981-2010_V.2.1.tif
Clipped CHELSA_bio14_1981-2010_V.2.1.tif
Clipped CHELSA_bio8_1981-2010_V.2.1.tif
Clipped CHELSA_bio17_1981-2010_V.2.1.tif
Clipped CHELSA_bio13_1981-2010_V.2.1.tif
Clipped CHELSA_bio19_1981-2010_V.2.1.tif
Clipped CHELSA_bio18_1981-2010_V.2.1.tif
Clipped CHELSA_bio16_1981-2010_V.2.1.tif
Clipped CHELSA_bio12_1981-2010_V.2.1.tif
Clipped CHELSA_bio5_1981-2010_V.2.1.tif
Clipped CHELSA_bio1_1981-2010_V.2.1.tif
Clipped CHELSA_bio7_1981-2010_V.2.1.tif
Clipped CHELSA_bio11_1981-2010_V.2.1.tif
Clipped CHELSA_bio6_1981-2010_V.2.1.tif
Clipped CHELSA_bio4_1981-2010_V.2.1.tif
Clipped CHELSA_bio3_1981-2010_V.2.1.tif
CPU times: user 222 ms, sys: 138 ms, total: 360 ms
Wall time: 12.3 s
