In [2]:
import rasterio
import os
import glob
import yaml

import geopandas as gpd
import numpy as np

from rasterio.mask import mask
from shapely.geometry import mapping
from pathlib import Path
from pprint import pprint

Objective: given a map of each subcatchments and DEM derived river segments with Strahler orders, we want to find the downstream neighbour of each subcatchment. We do this by following these steps:

1. Specify paths to find files and save new files.
2. Assign a Strahler order to each subcatchment
3. Find all neighbours of each subcatchment using the geometries.
4. Combine the Strahler orders with the neighbours to derive for each subcatchment the neighbour with the heighest Strahler order, which is its downstream neightbour.
5. Save the dictionary containing neighbours to a .yml file.

### 1. Specify paths to find files and save new files.

In [3]:
path_data_folder = Path(r"../data/catchments")
path_subcatchments = path_data_folder.joinpath(r"subcatchments")
path_strahler = path_data_folder.joinpath(r"strahler_tana.map")
path_save_new_shapefiles = path_data_folder.joinpath(r"subcatchments_strahler")

shapefiles_fnames = list(path_subcatchments.glob("*.gpkg"))

#print(shapefiles_fnames)
N = len(shapefiles_fnames)



### 2. Assign a Strahler order to each subcatchment

In [3]:
execute = True

if execute:
    # Open the raster file
    with rasterio.open(path_strahler) as src:
        for i, subcatchment in enumerate(shapefiles_fnames):
            gdf = gpd.read_file(subcatchment)
            fname = os.path.split(subcatchment)[-1]
            for idx, row in gdf.iterrows():
                # Extract the geometry of the shape
                geom = row.geometry

                clipped_river, out_transform = mask(src, [geom], crop=True)
                
                clipped_river[clipped_river <= 8] = 0
                strahler_numbers, counts = np.unique(clipped_river, return_counts=True)

                # Add the count as a new attribute to the shapefile
                gdf.loc[idx, 'strahler'] = max(strahler_numbers)

            # Save the updated shapefile
            gdf.to_file(path_save_new_shapefiles.joinpath(fname), driver="GPKG")

            if i%25 == 0:
                print(f"{i}/{N}")

0/263
25/263
50/263
75/263
100/263
125/263
150/263
175/263
200/263
225/263
250/263


### 3. Find all neighbours of each subcatchment using the geometries.

If we want to save the neighbouring filenames:

In [4]:
execute = True

if execute:
    # Read all shapefiles into a list of GeoDataFrames
    shapefiles_fnames = list(path_save_new_shapefiles.glob("*.gpkg"))
    shapefiles = [gpd.read_file(shapefile) for shapefile in shapefiles_fnames]

    print("Initialized list of geodataframes...")

    # Create a dictionary to store neighboring shapefiles for each shapefile
    neighboring_shapefiles = dict()

    # Iterate over each shapefile
    for idx, shapefile in enumerate(shapefiles):
        # Remove the current shapefile from the list
        other_shapefiles_fnames = shapefiles_fnames[:idx] + shapefiles_fnames[idx+1:]
        other_shapefiles = shapefiles[:idx] + shapefiles[idx+1:]
        
        # Get the geometry of the current shapefile
        geometry = shapefile.geometry.unary_union
        
        # Initialize a list to store neighboring shapefiles
        neighbors = []
        
        # Iterate over the other shapefiles to find neighbors
        for idy, other_shapefile in enumerate(other_shapefiles):
            # Check if any geometry of other_shapefile intersects with the geometry of shapefile
            if any(geometry.intersects(other_geometry) for other_geometry in other_shapefile.geometry):
                neighbors.append(shapefiles_fnames[idy].name)
        
        # Store the neighboring shapefiles for the current shapefile
        neighboring_shapefiles[shapefiles_fnames[idx].name] = neighbors
        if idx%25 == 0:
            print(f"{idx}/{N}")

Initialized list of geodataframes...
0/263
25/263
50/263
75/263
100/263
125/263
150/263
175/263
200/263
225/263
250/263


Now we save the dictionary containing all neighbours.

In [6]:
file=open(path_data_folder.joinpath(Path("neighbours.yaml")),"w")
yaml.dump(neighboring_shapefiles,file)
file.close()
print("YAML file saved.")

YAML file saved.


### 4. Combine the Strahler orders with the neighbours to derive for each subcatchment the neighbour with the heighest Strahler order, which is its downstream neightbour.

Now we filter on strahler number. We only save the one with the heighest strahler number:

In [7]:
downstream_catchments = dict()

for i, key in enumerate(neighboring_shapefiles.keys()):
    subc = gpd.read_file(path_save_new_shapefiles.joinpath(key))

    neighbouring_subc = neighboring_shapefiles[key]

    if len(neighbouring_subc)==0:
        pass
    else:
        gdf_list = [
            gpd.read_file(path_save_new_shapefiles.joinpath(item))
                    for item in neighbouring_subc]
        strahler_numbers = []
        for gdf in gdf_list:
            for idx, row in gdf.iterrows():
                # Extract the geometry of the shape
                # geom = row.geometry.strahler

                strahler_numbers.append(row["strahler"])

        downstream_catchments[key] = neighbouring_subc[np.argmax(strahler_numbers)]
    
    if i%25 == 0:
        print(f"{i}/{N}")

0/263
25/263
50/263
75/263
100/263
125/263
150/263
175/263
200/263
225/263
250/263


### 5. Save the dictionary containing neighbours to a .yml file.

In [8]:
file=open(path_data_folder.joinpath(Path("downstream_catchments.yaml")),"w")
yaml.dump(downstream_catchments, file)
file.close()
print("YAML file saved.")

YAML file saved.
