# Importing necessary libraries

In [None]:
import os
import geopandas as gpd
import pandas as pd

In [None]:
# Read the modified catchment shapefile
mod_cat_path = '/home/paulc600/SMM/SMM HYPE files/Modified_SMMcat.shp' # modify if necessary
modifiedcat = gpd.read_file(mod_cat_path)

In [None]:
# Read the river shapefile
riv_path = '/home/paulc600/github/StMaryMilk2023-UofC/modified_TGF/smm_tgf_modified/smm_riv.shp' # modify if necessary
riv = gpd.read_file(riv_path)

In [None]:
# Set the output folder path where you want to save the text files
output_folder_path = '/home/paulc600/local/HYPE Inputs/' # modify as needed

In [None]:
# slc path
slc_path = '/home/paulc600/SMM/HYPE_geospatial/HYPE_output.csv'

In [None]:
# assumptions
riv_seg_str = 'seg_nhm'
riv_ds_seg_str = 'ds_seg_nhm'
riv_seg_slope = 'seg_slope'
riv_seg_len = 'Shape_Leng'

cat_area = 'Shape_Area'
cat_str = 'hru_nhm'

In [None]:
# Sort the GeoDataFrame by a specific column
sorted_modifiedcat = modifiedcat.sort_values(by=riv_seg_str)
sorted_riv = riv.sort_values(by=riv_seg_str)

Checking a few outputs:

In [None]:
# checking
sorted_modifiedcat

In [None]:
# checking
sorted_riv

In [None]:
# Select the specific rows based on the range in a column
selected_rows = modifiedcat[(modifiedcat[riv_seg_str] >= 58662 )]

In [None]:
# Concatenate the selected rows with the river shapefile
merged_data = pd.concat([riv, selected_rows], ignore_index=True)

In [None]:
# Convert the merged data to a GeoDataFrame
merged_riv = gpd.GeoDataFrame(merged_data, crs=riv.crs)

In [None]:
# fill na values
merged_riv=merged_riv.fillna(0)

In [None]:
# checking
merged_riv

Start to create GeoData.txt

In [None]:
merged_riv

In [None]:
# Extract the first two columns from the merged_riv GeoDataFrame (subid and maindown)
geodata = merged_riv.iloc[:, :2]

In [None]:
# Perform the merge based on a common column
geodata = pd.merge(geodata, modifiedcat[[riv_seg_str, cat_area]], on=riv_seg_str)

____

Add coordinate of sub-basins' centroid `lat` and `lon` statistics from gistool (optional):

In [None]:
# FIXME: add gistool script here

Add `elev_mean` from Merit-Hydro in gistool with the `smm_cat.shp` file (optional)

In [None]:
# FIXME: add gistool script here

____

In [None]:
# merge slope to the dataframe from merged_riv
geodata = pd.merge(geodata, merged_riv[[riv_seg_str, riv_seg_slope]], on=riv_seg_str)

In [None]:
# merge length to the dataframe from merged_riv
geodata = pd.merge(geodata, merged_riv[[riv_seg_str, riv_seg_len]], on=riv_seg_str)

In [None]:
# checking
geodata

In [None]:
# read the slc information
slc = pd.read_csv(slc_path)

In [None]:
# checking
slc

In [None]:
# Merge the data from the CSV file with modifiedcat matching the hru ids
merged_cat = pd.merge(modifiedcat, slc, left_on=cat_str, right_on='Unnamed: 0', how='inner')

In [None]:
# checking
merged_cat

In [None]:
# Get the columns to merge based on a range or condition
column_range = merged_cat.columns[14:]  

In [None]:
# checking
column_range

In [None]:
# Merge the selected columns from merged_data with data_to_write matching seg ids
final_geodata = pd.merge(geodata, merged_cat[[riv_seg_str] + list(column_range)],  on=riv_seg_str)
final_geodata

In [None]:
# changing downstream values of nca
# FIXME: improve this method
final_geodata.loc[final_geodata[riv_seg_str] >= 58662, riv_ds_seg_str] = -9999

In [None]:
# zero the rest of the columns for nca
# FIXME: improve this method
final_geodata.loc[final_geodata[riv_seg_str] >= 58662, cat_area:riv_seg_len] = 0

In [None]:
# checking
final_geodata

In [None]:
#Rename the first columns to HYPE GeoData format
final_geodata.rename(columns={riv_seg_str:'subid' , riv_ds_seg_str:'maindown', cat_area:'area', riv_seg_slope:'slope_mean', riv_seg_len:'rivlen'}, inplace=True)   

In [None]:
# Define the range of columns to rename
# FIXME: find a method to automate the numbers here
start_index = 5
end_index = 76
offset=start_index-1

In [None]:
# Generate the new column names
new_column_names = ['SLC_' + str(i - offset) for i in range(start_index, end_index + 1)]

In [None]:
# checking
new_column_names

In [None]:
# Rename the columns in merged_data
final_geodata.columns.values[start_index:end_index + 1] = new_column_names

In [None]:
# checking
final_geodata

In [None]:
# Create a custom sorting key based on 'upstream' and 'downstream' columns
final_geodata['sorting_key'] = final_geodata.apply(lambda row: (row['subid'], row.name)
                                       if row['subid'] not in final_geodata['maindown'].values
                                       else (float('inf'), row.name), axis=1)

In [None]:
# Sort the DataFrame from upstream to downstream
geodata_sorted = final_geodata.sort_values('sorting_key', ascending=False)

In [None]:
# Remove the sorting key column
geodata_sorted = geodata_sorted.drop('sorting_key', axis=1)

In [None]:
# checking
geodata_sorted

In [None]:
geodata_sorted.to_csv(os.path.join(output_folder_path,'GeoData.txt'), sep='\t', index=False)