In [1]:
import rasterio
import os
import glob
import yaml

import geopandas as gpd
import numpy as np

from rasterio.mask import mask
from shapely.geometry import mapping
from pathlib import Path
from pprint import pprint

In [11]:
path_data_folder = Path(r"../data/catchments")
path_subcatchments = path_data_folder.joinpath(r"subcatchments")
path_strahler = path_data_folder.joinpath(r"strahler_tana.map")
path_save_new_shapefiles = path_data_folder.joinpath(r"subcatchments_strahler")

shapefiles_fnames = list(path_subcatchments.glob("*.gpkg"))

print(shapefiles_fnames)
N = len(shapefiles_fnames)

# test = shapefiles_fnames[0]
# print(test.name)

[PosixPath('../data/catchments/subcatchments/fid_413.gpkg'), PosixPath('../data/catchments/subcatchments/fid_22.gpkg'), PosixPath('../data/catchments/subcatchments/fid_595.gpkg'), PosixPath('../data/catchments/subcatchments/fid_1036.gpkg'), PosixPath('../data/catchments/subcatchments/fid_346.gpkg'), PosixPath('../data/catchments/subcatchments/fid_653.gpkg'), PosixPath('../data/catchments/subcatchments/fid_1173.gpkg'), PosixPath('../data/catchments/subcatchments/fid_192.gpkg'), PosixPath('../data/catchments/subcatchments/fid_604.gpkg'), PosixPath('../data/catchments/subcatchments/fid_1889.gpkg'), PosixPath('../data/catchments/subcatchments/fid_350.gpkg'), PosixPath('../data/catchments/subcatchments/fid_1811.gpkg'), PosixPath('../data/catchments/subcatchments/fid_632.gpkg'), PosixPath('../data/catchments/subcatchments/fid_1112.gpkg'), PosixPath('../data/catchments/subcatchments/fid_777.gpkg'), PosixPath('../data/catchments/subcatchments/fid_409.gpkg'), PosixPath('../data/catchments/subca

In [14]:
execute = True

if execute:
    # Open the raster file
    with rasterio.open(path_strahler) as src:
        for i, subcatchment in enumerate(shapefiles_fnames):
            gdf = gpd.read_file(subcatchment)
            for idx, row in gdf.iterrows():
                # Extract the geometry of the shape
                geom = row.geometry
                print(subcatchment)
                file_name = os.path.basename(subcathment)
                print(file_name)
                clipped_river, out_transform = mask(src, [geom], crop=True)
                
                clipped_river[clipped_river <= 8] = 0
                strahler_numbers, counts = np.unique(clipped_river, return_counts=True)

                # Add the count as a new attribute to the shapefile
                gdf.loc[idx, 'strahler'] = max(strahler_numbers)

            # Save the updated shapefile
            gdf.to_file(path_save_new_shapefiles.joinpath(f'{subcatchment}'), driver="GPKG")

            if i%100 == 0:
                print(f"{i}/{N}")

../data/catchments/subcatchments/fid_413.gpkg


DriverError: sqlite3_open(../data/catchments/subcatchments_strahler/../data/catchments/subcatchments/fid_413.gpkg) failed: unable to open database file

If we want to save the neighbouring filenames:

In [4]:
execute = True

if execute:
    # Read all shapefiles into a list of GeoDataFrames
    shapefiles_fnames = list(path_save_new_shapefiles.glob("*.gpkg"))
    shapefiles = [gpd.read_file(shapefile) for shapefile in shapefiles_fnames]

    print("Initialized list of geodataframes...")

    # Create a dictionary to store neighboring shapefiles for each shapefile
    neighboring_shapefiles = dict()

    # Iterate over each shapefile
    for idx, shapefile in enumerate(shapefiles):
        # Remove the current shapefile from the list
        other_shapefiles_fnames = shapefiles_fnames[:idx] + shapefiles_fnames[idx+1:]
        other_shapefiles = shapefiles[:idx] + shapefiles[idx+1:]
        
        # Get the geometry of the current shapefile
        geometry = shapefile.geometry.unary_union
        
        # Initialize a list to store neighboring shapefiles
        neighbors = []
        
        # Iterate over the other shapefiles to find neighbors
        for idy, other_shapefile in enumerate(other_shapefiles):
            # Check if any geometry of other_shapefile intersects with the geometry of shapefile
            if any(geometry.intersects(other_geometry) for other_geometry in other_shapefile.geometry):
                neighbors.append(shapefiles_fnames[idy].name)
        
        # Store the neighboring shapefiles for the current shapefile
        neighboring_shapefiles[shapefiles_fnames[idx].name] = neighbors
        if idx%100 == 0:
            print(f"{idx}/{N}")

Initialized list of geodataframes...
0/263
100/263
200/263


In [5]:
# pprint(neighboring_shapefiles)

In [6]:
file=open(path_data_folder.joinpath(Path("neighbours.yaml")),"w")
yaml.dump(neighboring_shapefiles,file)
file.close()
print("YAML file saved.")

YAML file saved.


Now we filter on strahler number. We only save the one with the heighest strahler number:

In [7]:
downstream_catchments = dict()

for i, key in enumerate(neighboring_shapefiles.keys()):
    subc = gpd.read_file(path_save_new_shapefiles.joinpath(key))

    neighbouring_subc = neighboring_shapefiles[key]

    if len(neighbouring_subc)==0:
        pass
    else:
        gdf_list = [
            gpd.read_file(path_save_new_shapefiles.joinpath(item))
                    for item in neighbouring_subc]
        strahler_numbers = []
        for gdf in gdf_list:
            for idx, row in gdf.iterrows():
                # Extract the geometry of the shape
                # geom = row.geometry.strahler

                strahler_numbers.append(row["strahler"])

        downstream_catchments[key] = neighbouring_subc[np.argmax(strahler_numbers)]
    
    if i%100 == 0:
        print(f"{i}/{N}")

0/263
100/263
200/263


In [8]:
file=open(path_data_folder.joinpath(Path("downstream_catchments.yaml")),"w")
yaml.dump(downstream_catchments, file)
file.close()
print("YAML file saved.")

YAML file saved.
