# Classify nested catchments

Author: Thiago Nascimento (thiago.nascimento@eawag.ch)

This notebook is part of the EStreams publication and was used to classify potential nested catchments within the dataset.

* Note that this code enables not only the replicability of the current database but also the extrapolation to new catchment areas. 
* Additionally, the user should download and insert the original raw-data in the folder of the same name prior to run this code. 
* The original third-party data used were not made available in this repository due to redistribution and storage-space reasons.  

## Requirements
**Python:**

* Python>=3.6
* Jupyter
* Geopandas=0.10.2
* Pandas
* tqdm

Check the Github repository for an environment.yml (for conda environments) or requirements.txt (pip) file.

**Files:**

* results/estreams_catchments.shp 

**Directory:**

* Clone the GitHub directory locally
* Place any third-data variables in their respective directory.
* ONLY update the "PATH" variable in the section "Configurations", with their relative path to the EStreams directory. 

# Import modules

In [1]:
import pandas as pd
import numpy as np
import datetime
import tqdm
import os
import geopandas as gpd
import networkx as nx
from shapely.geometry import Polygon, Point

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Configurations

In [2]:
# Only editable variable:
PATH = r"/Users/thiagomedeirosdonascimento/Library/CloudStorage/OneDrive-Personal/PhD/Eawag/Papers/Paper1_Database/Paper"

* #### The users should NOT change anything in the code below here. 

In [3]:
PATH_OUTPUT = "results/"

# Set the directory:
os.chdir(PATH)

# Import data
## Catchment boundaries

In [13]:
catchment_boundaries = gpd.read_file('results/estreams_catchments.shp')
catchment_boundaries

Unnamed: 0,basin_id,gauge_id,gauge_coun,area,area_calc,area_flag,area_perc,start_date,end_date,geometry
0,AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ..."
1,AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349..."
2,AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122..."
3,AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404..."
4,AT000005,200097,AT,72.2,72.448,0,-0.343490,1990-01-01,2019-12-31,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ..."
...,...,...,...,...,...,...,...,...,...,...
15042,UAGR0017,6682300,UA,321.0,325.370,0,-1.361371,1978-01-01,1987-12-31,"POLYGON Z ((33.96791 44.63291 0.00000, 33.9679..."
15043,UAGR0018,6682500,UA,49.7,47.594,0,4.237425,1978-01-01,1987-12-31,"POLYGON Z ((34.19958 44.58291 0.00000, 34.2029..."
15044,UAGR0019,6683010,UA,261.0,244.731,1,6.233333,1978-01-01,1987-12-31,"POLYGON Z ((34.19624 44.88375 0.00000, 34.1962..."
15045,UAGR0020,6683200,UA,760.0,731.073,0,3.806184,1978-01-01,1987-12-31,"POLYGON Z ((35.78708 47.28708 0.00000, 35.7870..."


## Network information

In [7]:
network_EU = pd.read_excel("results/estreams_gauging_stations.xlsx")
network_EU.set_index("basin_id", inplace = True)

network_EU

Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,area_flag,area_perc,start_date,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AT000001,200014,Bangs,AT,AT_EHYD,Rhein,9.534835,47.273748,9.534835,47.273748,4647.9,...,0,-0.440608,1996-01-01,2019-12-31,24,288,8766,0.0,8766,CH000197
AT000002,200048,Schruns (Vonbunweg),AT,AT_EHYD,Litz,9.913677,47.080301,9.913677,47.080301,102.0,...,0,-0.281373,1958-10-01,2019-12-31,62,735,22372,0.0,22372,CH000221
AT000003,231662,Loruens-Aeule,AT,AT_EHYD,Ill,9.847765,47.132821,9.847765,47.132821,535.2,...,0,-0.205344,1985-01-02,2019-12-31,35,420,12782,0.0,12782,CH000215
AT000004,200592,Kloesterle (OEBB),AT,AT_EHYD,Alfenz,10.061843,47.128994,10.061843,47.128994,66.6,...,0,0.471471,1998-01-02,2019-12-31,22,264,8034,0.0,8034,CH000227
AT000005,200097,Buers (Bruecke L82),AT,AT_EHYD,Alvier,9.802668,47.150770,9.802668,47.150770,72.2,...,0,-0.343490,1990-01-01,2019-12-31,30,360,10957,0.0,10957,CH000214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,BASHTANOVKA,UA,UA_GRDC,KACHA,33.894739,44.691884,33.900000,44.683333,321.0,...,0,-1.361371,1978-01-01,1987-12-31,10,120,3652,0.0,3652,
UAGR0018,6682500,YALTA,UA,UA_GRDC,DERE-KIOY,34.166667,44.500000,34.166667,44.500000,49.7,...,0,4.237425,1978-01-01,1987-12-31,10,120,3652,0.0,3652,
UAGR0019,6683010,PIONERSKOE,UA,UA_GRDC,SALHYR,34.199841,44.887685,34.200000,44.883333,261.0,...,1,6.233333,1978-01-01,1987-12-31,10,120,3652,0.0,3652,
UAGR0020,6683200,TOKMAK,UA,UA_GRDC,TOKMAK,35.705833,47.251389,35.705833,47.251389,760.0,...,0,3.806184,1978-01-01,1987-12-31,10,120,3652,0.0,3652,


## Subset of the catchments to be used

In [7]:
catchments = catchment_boundaries.copy()
network = network_EU.copy()
catchments

Unnamed: 0,basin_id,gauge_id,gauge_coun,area,area_calc,area_flag,area_perc,start_date,end_date,geometry
0,AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ..."
1,AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349..."
2,AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122..."
3,AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404..."
4,AT000005,200097,AT,72.2,72.448,0,-0.343490,1990-01-01,2019-12-31,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ..."
...,...,...,...,...,...,...,...,...,...,...
15042,UAGR0017,6682300,UA,321.0,325.370,0,-1.361371,1978-01-01,1987-12-31,"POLYGON Z ((33.96791 44.63291 0.00000, 33.9679..."
15043,UAGR0018,6682500,UA,49.7,47.594,0,4.237425,1978-01-01,1987-12-31,"POLYGON Z ((34.19958 44.58291 0.00000, 34.2029..."
15044,UAGR0019,6683010,UA,261.0,244.731,1,6.233333,1978-01-01,1987-12-31,"POLYGON Z ((34.19624 44.88375 0.00000, 34.1962..."
15045,UAGR0020,6683200,UA,760.0,731.073,0,3.806184,1978-01-01,1987-12-31,"POLYGON Z ((35.78708 47.28708 0.00000, 35.7870..."


## Make a buffer around the catchments

In [8]:
# Frst we assign a tolerance to overcome problems of catchments with delineations 
# slightly outside the other catchment. 
# This code may take a while.
tolerance = 0.01
catchments_buffer = catchments.copy()
catchments_buffer['geometry'] = catchments['geometry'].buffer(tolerance)


  catchments_buffer['geometry'] = catchments['geometry'].buffer(tolerance)


# Processing
## Nested catchments count

* First we classifiy the catchments according to their possibility of being nested.
* At the end we have groups (main watershed) to where each sub-catchment is assigned. 

In [9]:
# Nested catchments:
# Initialize an empty list to store nested catchments
nested_catchments = []

# Iterate over each catchment
for index, catchment in tqdm.tqdm(catchments.iterrows()):
    # Get the geometry of the current catchment
    geom = catchment['geometry']
    
    # Iterate over other catchments to check if they are nested
    for index2, other_catchment in catchments_buffer.iterrows():
        # Skip the same catchment
        if index == index2:
            continue
        
        other_geom = other_catchment['geometry']
        
        # Check if the current catchment is completely within the other catchment
        if geom.within(other_geom):
            nested_catchments.append((catchment.basin_id, other_catchment.basin_id))

15047it [4:04:35,  1.03it/s]


In [10]:
# Create the big-groups (main watershed):
# Initialize an empty graph
G = nx.Graph()

# Add nodes for each catchment
for index, catchment in catchments.iterrows():
    G.add_node(catchment['basin_id'])

# Add edges for nested catchments
for nested_pair in nested_catchments:
    G.add_edge(nested_pair[0], nested_pair[1])

# Find connected components
groups = list(nx.connected_components(G))

# Assign groups to catchments
group_assignment = {}
for i, group in enumerate(groups):
    for catchment_id in group:
        group_assignment[catchment_id] = i + 1  # Assigning group numbers starting from 1

# Update the catchments GeoDataFrame with the group assignments
catchments['watershed_main'] = catchments['basin_id'].map(group_assignment)

In [11]:
catchments.head(5)

Unnamed: 0,basin_id,gauge_id,gauge_coun,area,area_calc,area_flag,area_perc,start_date,end_date,geometry,watershed_main
0,AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ...",1
1,AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349...",1
2,AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122...",1
3,AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404...",1
4,AT000005,200097,AT,72.2,72.448,0,-0.34349,1990-01-01,2019-12-31,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ...",1


In [13]:
catchments[catchments.watershed_main == 1]

Unnamed: 0,basin_id,gauge_id,gauge_coun,area,area_calc,area_flag,area_perc,start_date,end_date,geometry,watershed_main
0,AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ...",1
1,AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349...",1
2,AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122...",1
3,AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404...",1
4,AT000005,200097,AT,72.2,72.448,0,-0.343490,1990-01-01,2019-12-31,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ...",1
...,...,...,...,...,...,...,...,...,...,...,...
12556,LU000016,3,LU,360.5,387.289,0,-7.431068,2002-01-01,2021-07-31,"POLYGON Z ((6.04625 49.39291 0.00000, 6.04625 ...",1
12557,LU000017,16,LU,4231.8,4255.524,0,-0.560613,2002-01-01,2021-07-31,"POLYGON Z ((6.12041 49.50791 0.00000, 6.12041 ...",1
12558,LU000018,5,LU,83.6,83.614,0,-0.016746,2002-01-01,2021-07-31,"POLYGON Z ((6.09374 49.72458 0.00000, 6.09874 ...",1
12559,LU000019,12,LU,641.3,638.434,0,0.446905,2002-01-01,2021-07-31,"POLYGON Z ((6.18958 49.99625 0.00000, 6.19041 ...",1


In [14]:
nested_catchments_df = pd.DataFrame(nested_catchments)
nested_catchments_df.columns = ["catchment_1", "catchment_2"]
nested_catchments_df

Unnamed: 0,catchment_1,catchment_2
0,AT000001,AT000013
1,AT000001,CH000026
2,AT000001,CH000042
3,AT000001,CH000092
4,AT000001,CH000185
...,...,...
187086,UAGR0011,UAGR0014
187087,UAGR0012,UAGR0014
187088,UAGR0013,UAGR0012
187089,UAGR0013,UAGR0014


In [15]:
nested_catchments_df.to_excel("results/estreams_nested_catchments.xlsx")

## Gauges hierarchy:
* Here we use the same classification used by Lamah-CE.
* A headwater catchment will have a number 1, while a downstream catchment that has two gauges within (not counting the outlet) has a number 3 (2 gauges + 1 outlet).

In [16]:
# Assign the index to the shapefile:
catchments.set_index("basin_id", inplace = True)

# Keep one field with the same name:
catchments["basin_id"] = catchments.index

In [17]:
# Create one field with the same name as the index:
network["basin_id"]= network.index
network

Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,area_perc,start_date,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect,basin_id
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AT000001,200014,Bangs,AT,AT_EHYD,Rhein,9.534835,47.273748,9.534835,47.273748,4647.9,...,-0.440608,1996-01-01,2019-12-31,24,288,8766,0.0,8766,CH000197,AT000001
AT000002,200048,Schruns (Vonbunweg),AT,AT_EHYD,Litz,9.913677,47.080301,9.913677,47.080301,102.0,...,-0.281373,1958-10-01,2019-12-31,62,735,22372,0.0,22372,CH000221,AT000002
AT000003,231662,Loruens-Aeule,AT,AT_EHYD,Ill,9.847765,47.132821,9.847765,47.132821,535.2,...,-0.205344,1985-01-02,2019-12-31,35,420,12782,0.0,12782,CH000215,AT000003
AT000004,200592,Kloesterle (OEBB),AT,AT_EHYD,Alfenz,10.061843,47.128994,10.061843,47.128994,66.6,...,0.471471,1998-01-02,2019-12-31,22,264,8034,0.0,8034,CH000227,AT000004
AT000005,200097,Buers (Bruecke L82),AT,AT_EHYD,Alvier,9.802668,47.150770,9.802668,47.150770,72.2,...,-0.343490,1990-01-01,2019-12-31,30,360,10957,0.0,10957,CH000214,AT000005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,BASHTANOVKA,UA,UA_GRDC,KACHA,33.894739,44.691884,33.900000,44.683333,321.0,...,-1.361371,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,UAGR0017
UAGR0018,6682500,YALTA,UA,UA_GRDC,DERE-KIOY,34.166667,44.500000,34.166667,44.500000,49.7,...,4.237425,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,UAGR0018
UAGR0019,6683010,PIONERSKOE,UA,UA_GRDC,SALHYR,34.199841,44.887685,34.200000,44.883333,261.0,...,6.233333,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,UAGR0019
UAGR0020,6683200,TOKMAK,UA,UA_GRDC,TOKMAK,35.705833,47.251389,35.705833,47.251389,760.0,...,3.806184,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,UAGR0020


In [18]:
# Create a geometry column with Point objects for being used:
geometry = [Point(lon, lat) for lon, lat in zip(network['lon_snap'], network['lat_snap'])]

# Create a GeoDataFrame
network = gpd.GeoDataFrame(network, geometry=geometry)

# Optional: Set the coordinate reference system (CRS) if known
# For example, if your coordinates are in WGS84 (EPSG:4326)
network.crs = 'EPSG:4326'

### Apply the count taking into account some filters:
       - Points to pay attention:
* Outlet is seldom slightly outside the shapefile. 
* Catchment outlet has one duplicate within the shapefile.
* Catchments within the shapefile also have duplicates. 

       - Solution:
* We exclude the outlet from the count, and count + 1 at the end for all catchments. 
* We apply a filter to delete the catchment outlet to count duplicated_suspects that are within the catchment shapefile. 
* We count the number of duplicates, and when it is even, we simply divide per 2 and substract at the end count = count - (n/2). If it is odd, we do count = count - ((n - 1)/2 + 1). The reason is that when we have a two duplicates, they could delete each other.

In [19]:
# Spatial join to count geometries within the catchments shapefile
joined = gpd.sjoin(catchments, network, how='inner', op='intersects')

# Exclude geometries with the same "basin_id" as in the network GeoDataFrame (exclude the outlet):
joined_filtered = joined[joined['basin_id_left'] != joined['basin_id_right']]

# Here we create a function to deal with the duplicates of the outlet when they happen to be within:
# Parse the "duplicated_suspect" column to extract individual basin_ids
def parse_duplicated_suspect(suspect):
    if pd.isna(suspect):
        return []
    else:
        return suspect.split(', ')

joined_filtered['duplicated_suspect_ids'] = joined_filtered['duplicated_suspect'].apply(parse_duplicated_suspect)

# Exclude basin IDs from the count when there are duplicated suspects
def exclude_duplicated_suspects(row):
    if len(row['duplicated_suspect_ids']) > 0:
        return row['basin_id_left'] not in row['duplicated_suspect_ids']
    else:
        return True

joined_filtered = joined_filtered[joined_filtered.apply(exclude_duplicated_suspects, axis=1)]

# Count the number of geometries for each unique "basin_id" in the catchments shapefile
count_per_basin = joined_filtered['basin_id_left'].value_counts()

# Count the number of non-null values in the "duplicated_suspect" column for each basin ID
duplicates_count = joined_filtered.groupby('basin_id_left')['duplicated_suspect'].count()

# Adjust the count based on the number of duplicates within each catchment
for basin_id, count in duplicates_count.items():
    if count % 2 == 0:
        count_per_basin[basin_id] -= count // 2
    else:
        count_per_basin[basin_id] -= (count - 1) // 2
        count_per_basin[basin_id] += 1

network["gauge_hierarchy"] = np.nan      
network["gauge_hierarchy"] = count_per_basin

# Take into account the outlet:
network['gauge_hierarchy'] = network['gauge_hierarchy'] + 1

network['gauge_hierarchy'] = network['gauge_hierarchy'].fillna(1)

network.head(10)

  if (await self.run_code(code, result,  async_=asy)):
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect,basin_id,geometry,gauge_hierarchy
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AT000001,200014,Bangs,AT,AT_EHYD,Rhein,9.534835,47.273748,9.534835,47.273748,4647.9,...,2019-12-31,24,288,8766,0.0,8766,CH000197,AT000001,POINT (9.53484 47.27375),14.0
AT000002,200048,Schruns (Vonbunweg),AT,AT_EHYD,Litz,9.913677,47.080301,9.913677,47.080301,102.0,...,2019-12-31,62,735,22372,0.0,22372,CH000221,AT000002,POINT (9.91368 47.08030),1.0
AT000003,231662,Loruens-Aeule,AT,AT_EHYD,Ill,9.847765,47.132821,9.847765,47.132821,535.2,...,2019-12-31,35,420,12782,0.0,12782,CH000215,AT000003,POINT (9.84777 47.13282),2.0
AT000004,200592,Kloesterle (OEBB),AT,AT_EHYD,Alfenz,10.061843,47.128994,10.061843,47.128994,66.6,...,2019-12-31,22,264,8034,0.0,8034,CH000227,AT000004,POINT (10.06184 47.12899),1.0
AT000005,200097,Buers (Bruecke L82),AT,AT_EHYD,Alvier,9.802668,47.15077,9.802668,47.15077,72.2,...,2019-12-31,30,360,10957,0.0,10957,CH000214,AT000005,POINT (9.80267 47.15077),1.0
AT000006,200105,Garsella,AT,AT_EHYD,Lutz,9.875898,47.226658,9.875898,47.226658,95.5,...,2019-12-31,56,672,20454,0.0,20454,CH000218,AT000006,POINT (9.87590 47.22666),1.0
AT000007,231688,Beschling,AT,AT_EHYD,Ill,9.67885,47.200301,9.67885,47.200301,1118.6,...,2019-12-31,35,420,12783,0.0,12783,CH000205,AT000007,POINT (9.67885 47.20030),6.0
AT000008,200501,Amerluegen,AT,AT_EHYD,Samina,9.614203,47.205978,9.614203,47.205978,70.0,...,2019-12-31,29,348,10591,0.0,10591,CH000201,AT000008,POINT (9.61420 47.20598),1.0
AT000009,200147,Gisingen,AT,AT_EHYD,Ill,9.57888,47.260362,9.57888,47.260362,1281.0,...,2019-12-31,69,828,25202,0.0,25202,CH000199,AT000009,POINT (9.57888 47.26036),8.0
AT000010,200154,Laterns,AT,AT_EHYD,Frutz,9.728853,47.256933,9.728853,47.256933,33.4,...,2019-12-31,64,768,23373,3.0,9786,CH000209,AT000010,POINT (9.72885 47.25693),1.0


## Assign the new values to the network:

In [26]:
network_EU['watershed_group'] = catchments['watershed_main']
network_EU

Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,area_perc,start_date,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect,watershed_main
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AT000001,200014,Bangs,AT,AT_EHYD,Rhein,9.534835,47.273748,9.534835,47.273748,4647.9,...,-0.440608,1996-01-01,2019-12-31,24,288,8766,0.0,8766,CH000197,1
AT000002,200048,Schruns (Vonbunweg),AT,AT_EHYD,Litz,9.913677,47.080301,9.913677,47.080301,102.0,...,-0.281373,1958-10-01,2019-12-31,62,735,22372,0.0,22372,CH000221,1
AT000003,231662,Loruens-Aeule,AT,AT_EHYD,Ill,9.847765,47.132821,9.847765,47.132821,535.2,...,-0.205344,1985-01-02,2019-12-31,35,420,12782,0.0,12782,CH000215,1
AT000004,200592,Kloesterle (OEBB),AT,AT_EHYD,Alfenz,10.061843,47.128994,10.061843,47.128994,66.6,...,0.471471,1998-01-02,2019-12-31,22,264,8034,0.0,8034,CH000227,1
AT000005,200097,Buers (Bruecke L82),AT,AT_EHYD,Alvier,9.802668,47.150770,9.802668,47.150770,72.2,...,-0.343490,1990-01-01,2019-12-31,30,360,10957,0.0,10957,CH000214,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,BASHTANOVKA,UA,UA_GRDC,KACHA,33.894739,44.691884,33.900000,44.683333,321.0,...,-1.361371,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1916
UAGR0018,6682500,YALTA,UA,UA_GRDC,DERE-KIOY,34.166667,44.500000,34.166667,44.500000,49.7,...,4.237425,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1917
UAGR0019,6683010,PIONERSKOE,UA,UA_GRDC,SALHYR,34.199841,44.887685,34.200000,44.883333,261.0,...,6.233333,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1918
UAGR0020,6683200,TOKMAK,UA,UA_GRDC,TOKMAK,35.705833,47.251389,35.705833,47.251389,760.0,...,3.806184,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1919


In [28]:
network_EU['gauge_hierarchy'] = network['gauge_hierarchy'].astype(int)
network_EU

Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,start_date,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect,watershed_main,gauge_hierarchy
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AT000001,200014,Bangs,AT,AT_EHYD,Rhein,9.534835,47.273748,9.534835,47.273748,4647.9,...,1996-01-01,2019-12-31,24,288,8766,0.0,8766,CH000197,1,14
AT000002,200048,Schruns (Vonbunweg),AT,AT_EHYD,Litz,9.913677,47.080301,9.913677,47.080301,102.0,...,1958-10-01,2019-12-31,62,735,22372,0.0,22372,CH000221,1,1
AT000003,231662,Loruens-Aeule,AT,AT_EHYD,Ill,9.847765,47.132821,9.847765,47.132821,535.2,...,1985-01-02,2019-12-31,35,420,12782,0.0,12782,CH000215,1,2
AT000004,200592,Kloesterle (OEBB),AT,AT_EHYD,Alfenz,10.061843,47.128994,10.061843,47.128994,66.6,...,1998-01-02,2019-12-31,22,264,8034,0.0,8034,CH000227,1,1
AT000005,200097,Buers (Bruecke L82),AT,AT_EHYD,Alvier,9.802668,47.150770,9.802668,47.150770,72.2,...,1990-01-01,2019-12-31,30,360,10957,0.0,10957,CH000214,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,BASHTANOVKA,UA,UA_GRDC,KACHA,33.894739,44.691884,33.900000,44.683333,321.0,...,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1916,1
UAGR0018,6682500,YALTA,UA,UA_GRDC,DERE-KIOY,34.166667,44.500000,34.166667,44.500000,49.7,...,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1917,1
UAGR0019,6683010,PIONERSKOE,UA,UA_GRDC,SALHYR,34.199841,44.887685,34.200000,44.883333,261.0,...,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1918,1
UAGR0020,6683200,TOKMAK,UA,UA_GRDC,TOKMAK,35.705833,47.251389,35.705833,47.251389,760.0,...,1978-01-01,1987-12-31,10,120,3652,0.0,3652,,1919,1


In [34]:
network_EU[network_EU.gauge_hierarchy== 1524]

Unnamed: 0_level_0,gauge_id,gauge_name,gauge_country,gauge_provider,river,lon_snap,lat_snap,lon,lat,area,...,start_date,end_date,num_years,num_months,num_days,num_days_gaps,num_continuous_days,duplicated_suspect,watershed_main,gauge_hierarchy
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ROGR0012,6742900,CEATAL IZMAIL,RO,RO_GRDC,DANUBE RIVER,28.717806,45.223394,28.716657,45.216667,807000.0,...,1931-01-01,2010-12-31,78,936,28483,737.0,22368,,2,1524


### Shapefile

In [12]:
catchment_boundaries

Unnamed: 0,id,area_km2,outlet_lat,outlet_lng,area_offic,area_calc,Code,basin_id,label_area,name,layer,path,geometry
0,DE01945,144000,50.937,6.963,144232,144432.885,DE01945,DE01945,0,BundespegelKoeln,,,"POLYGON Z ((7.96208 46.53708 0.00000, 7.96625 ..."
1,DE01946,148000,51.226,6.770,147680,147934.665,DE01946,DE01946,0,BundespegelDuesseldorf,,,"POLYGON Z ((7.96208 46.53708 0.00000, 7.96625 ..."
2,DE01947,144000,50.937,6.963,144232,144432.885,DE01947,DE01947,0,BundespegelKoeln,,,"POLYGON Z ((7.96208 46.53708 0.00000, 7.96625 ..."
3,DE01948,148000,51.226,6.770,147680,147934.665,DE01948,DE01948,0,BundespegelDuesseldorf,,,"POLYGON Z ((7.96208 46.53708 0.00000, 7.96625 ..."
4,DE01949,159000,51.757,6.395,159300,159352.653,DE01949,DE01949,0,BundespegelRees,,,"POLYGON Z ((7.96208 46.53708 0.00000, 7.96625 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15042,HUGR049,711,46.953,21.084,8985,711.166,HUGR049,HUGR049,888,6444040,HUGR049,C:/Users/nascimth/Documents/Thiago/Eawag/Pytho...,"POLYGON Z ((21.14875 46.98958 0.00000, 21.1487..."
15043,HUGR050,12900,48.357,21.692,12886,12894.134,HUGR050,HUGR050,0,6444750,HUGR050,C:/Users/nascimth/Documents/Thiago/Eawag/Pytho...,"POLYGON Z ((21.69208 48.38125 0.00000, 21.6895..."
15044,HUGR051,31000,46.298,16.886,30969,30990.193,HUGR051,HUGR051,0,6446200,HUGR051,C:/Users/nascimth/Documents/Thiago/Eawag/Pytho...,"POLYGON Z ((12.41625 46.66958 0.00000, 12.4162..."
15045,HUGR019,,,,62723,62532.211,HUGR019,HUGR019,0,6444300,HUGR019,C:\Users\nascimth\Documents\Thiago\Eawag\Pytho...,"POLYGON Z ((23.22125 46.62375 0.00000, 23.2170..."


In [14]:
catchment_boundaries.set_index("basin_id", inplace = True)
catchment_boundaries

Unnamed: 0_level_0,gauge_id,gauge_coun,area,area_calc,area_flag,area_perc,start_date,end_date,geometry
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ..."
AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349..."
AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122..."
AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404..."
AT000005,200097,AT,72.2,72.448,0,-0.343490,1990-01-01,2019-12-31,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ..."
...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,UA,321.0,325.370,0,-1.361371,1978-01-01,1987-12-31,"POLYGON Z ((33.96791 44.63291 0.00000, 33.9679..."
UAGR0018,6682500,UA,49.7,47.594,0,4.237425,1978-01-01,1987-12-31,"POLYGON Z ((34.19958 44.58291 0.00000, 34.2029..."
UAGR0019,6683010,UA,261.0,244.731,1,6.233333,1978-01-01,1987-12-31,"POLYGON Z ((34.19624 44.88375 0.00000, 34.1962..."
UAGR0020,6683200,UA,760.0,731.073,0,3.806184,1978-01-01,1987-12-31,"POLYGON Z ((35.78708 47.28708 0.00000, 35.7870..."


In [17]:
# Retrieve the information needed:
catchment_boundaries["gauge_hierarchy"] = network['gauge_hierarchy'].astype(int)
catchment_boundaries['watershed_group'] = network['watershed_main'].astype(int)

# Adjust the columns order and names:
catchment_boundaries = catchment_boundaries[['gauge_id', 'gauge_coun', 'area', 'area_calc',
       'area_flag', 'area_perc', 'start_date', 'end_date', 'gauge_hierarchy', 'watershed_group', 'geometry']]

catchment_boundaries.columns = ['gauge_id', 'gauge_country', 'area', 'area_calc',
       'area_flag', 'area_perc', 'start_date', 'end_date', 'gauge_hierarchy', 'watershed_group', 'geometry']

In [18]:
catchment_boundaries

Unnamed: 0_level_0,gauge_id,gauge_country,area,area_calc,area_flag,area_perc,start_date,end_date,gauge_hierarchy,watershed_group,geometry
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AT000001,200014,AT,4647.9,4668.379,0,-0.440608,1996-01-01,2019-12-31,14,1,"POLYGON Z ((9.69406 46.54322 0.00000, 9.69570 ..."
AT000002,200048,AT,102.0,102.287,0,-0.281373,1958-10-01,2019-12-31,1,1,"POLYGON Z ((10.13650 47.02949 0.00000, 10.1349..."
AT000003,231662,AT,535.2,536.299,0,-0.205344,1985-01-02,2019-12-31,2,1,"POLYGON Z ((10.11095 46.89437 0.00000, 10.1122..."
AT000004,200592,AT,66.6,66.286,0,0.471471,1998-01-02,2019-12-31,1,1,"POLYGON Z ((10.14189 47.09706 0.00000, 10.1404..."
AT000005,200097,AT,72.2,72.448,0,-0.343490,1990-01-01,2019-12-31,1,1,"POLYGON Z ((9.67851 47.06249 0.00000, 9.67888 ..."
...,...,...,...,...,...,...,...,...,...,...,...
UAGR0017,6682300,UA,321.0,325.370,0,-1.361371,1978-01-01,1987-12-31,1,1916,"POLYGON Z ((33.96791 44.63291 0.00000, 33.9679..."
UAGR0018,6682500,UA,49.7,47.594,0,4.237425,1978-01-01,1987-12-31,1,1917,"POLYGON Z ((34.19958 44.58291 0.00000, 34.2029..."
UAGR0019,6683010,UA,261.0,244.731,1,6.233333,1978-01-01,1987-12-31,1,1918,"POLYGON Z ((34.19624 44.88375 0.00000, 34.1962..."
UAGR0020,6683200,UA,760.0,731.073,0,3.806184,1978-01-01,1987-12-31,1,1919,"POLYGON Z ((35.78708 47.28708 0.00000, 35.7870..."


## Save the data

In [19]:
# Save the dataframe:
network_EU.to_excel('results/estreams_gauging_stations.xlsx')

# Save the shapefile:
catchment_boundaries.to_file('results/estreams_catchments.shp')

  catchment_boundaries.to_file('results/estreams_catchments.shp')


## End