In [1]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd

### Inputs

In [2]:
# Read the modified catchment shapefile
modifiedcat = gpd.read_file('../geospacial/modified_shapefiles/Modified_SMMcat.shp')

In [3]:
# Read the river shapefile
riv = gpd.read_file('../../../geofabric/modified_TGF/smm_tgf_modified/smm_riv.shp')

In [4]:
# Import latitude, longitude and adjusted elevation from gistool
lat_elev=pd.read_csv('../geospacial/elev_lat_stats_elv.csv')

In [5]:
slc = pd.read_csv('../geospacial/slc/sorted_final_slc.csv')

In [6]:
# Open the text file in read mode
with open('../geospacial/downstream_order.txt', 'r') as file:
    # Read the lines of the file into a list
    lines = file.readlines()

# Optionally, you may strip newline characters from each line
order = [line.strip() for line in lines]

### Create Geodata

In [7]:
# Sort the GeoDataFrame by a specific column
sorted_modifiedcat = modifiedcat.sort_values(by='seg_nhm')
sorted_riv = riv.sort_values(by='seg_nhm')

In [8]:
# Select the added river ID rows based on the range in a column
selected_rows = modifiedcat[(modifiedcat['seg_nhm'] >= 58662 )]

In [9]:
# Concatenate the selected rows with the river shapefile
merged_data = pd.concat([sorted_riv, selected_rows], ignore_index=True)

In [10]:
# Convert the merged data to a GeoDataFrame
merged_riv = gpd.GeoDataFrame(merged_data, crs=riv.crs)

In [11]:
merged_riv=merged_riv.fillna(0)

Add columns

In [12]:
# Extract the first two columns from the merged_riv GeoDataFrame (subid and maindown)
geodata = merged_riv.iloc[:, :2]

In [13]:
# Add subbasin area from catchment shapefile based on river segment
geodata = pd.merge(geodata, modifiedcat[['seg_nhm', 'Shape_Area']], on='seg_nhm')

In [14]:
geodata

Unnamed: 0,seg_nhm,ds_seg_nhm,Shape_Area
0,58183,0,3.652090e+07
1,58184,58183,1.750700e+07
2,58185,58184,5.667200e+06
3,58186,58185,3.803350e+07
4,58188,58186,2.801800e+06
...,...,...,...
468,58671,0,3.860121e+08
469,58672,0,9.586554e+08
470,58673,0,2.876408e+08
471,58674,0,2.727878e+08


In [15]:
# Merge the lat lon elev data with modifiedcat based on hru
lat_elev= pd.merge(modifiedcat, lat_elev, left_on='hru_nhm', right_on='hru_nhm', how='inner')

In [16]:
# merge lat lon elev info into geodata based on river ID
geodata = pd.merge(geodata, lat_elev[['seg_nhm', 'lon', 'lat', 'mean']], on='seg_nhm')

In [17]:
# merge river slope to the geodata from merged_riv
geodata = pd.merge(geodata, merged_riv[['seg_nhm', 'seg_slope']], on='seg_nhm')

In [18]:
# merge river length to the geodata from merged_riv
geodata = pd.merge(geodata, merged_riv[['seg_nhm', 'Shape_Leng']], on='seg_nhm')

In [19]:
geodata

Unnamed: 0,seg_nhm,ds_seg_nhm,Shape_Area,lon,lat,mean,seg_slope,Shape_Leng
0,58183,0,3.652090e+07,-112.919857,49.598073,921.890625,0.00242,8135.825626867998
1,58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,1e-05,4020.941946660042
2,58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934917989161
3,58186,58185,3.803350e+07,-112.879443,49.530760,928.137573,0.00162,26483.13594952753
4,58188,58186,2.801800e+06,-112.934132,49.503520,956.165771,0.00662,6040.944351902939
...,...,...,...,...,...,...,...,...
468,58671,0,3.860121e+08,-108.661660,49.257402,922.959106,0,202460.006602
469,58672,0,9.586554e+08,-108.892603,49.169295,948.367615,0,295200.002203
470,58673,0,2.876408e+08,-109.130976,49.315396,988.979126,0,188640.001102
471,58674,0,2.727878e+08,-110.181345,48.645243,851.569946,0,105620.001701


Add SLC data to geodata

In [20]:
slc

Unnamed: 0.1,Unnamed: 0,0101,0102,0103,0104,0106,0109,0201,0202,0203,...,2112,2113,2116,2210,2212,2213,2214,2216,2310,2313
0,113656,0.0,0.0,0.000820,0.000205,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
1,113683,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000243,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,113696,0.0,0.0,0.001992,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
3,113703,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
4,113680,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,113533,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.003769,0.007538,0.000000,0.000000,0.008794,0.015075,0.0,0.0,0.000000,0.000000
469,113523,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.004098,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
470,113494,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.000000,0.000000,0.034358,0.000000,0.027125,0.0,0.0,0.009042,0.001808
471,113582,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.008130,0.000000,0.000813,0.000000,0.002439,0.000000,0.0,0.0,0.000000,0.000000


In [21]:
# Merge the data from the CSV file with modifiedcat
slc = pd.merge(modifiedcat, slc, left_on='hru_nhm', right_on='Unnamed: 0', how='inner')

In [22]:
slc

Unnamed: 0,hru_nhm,seg_nhm,POI_ID,hru_id,hru_segmen,hru_id_tb,hru_segm_1,Type_NCA,HUC04,Coastal_HR,...,2112,2113,2116,2210,2212,2213,2214,2216,2310,2313
0,113602,58183,6.500040e+13,113,3,3651,1723,0,0904,0,...,0.020893,0.005698,0.0,0.0,0.015195,0.011396,0.0,0.0,0.0,0.0
1,113598,58184,6.500040e+13,109,4,3647,1724,0,0904,0,...,0.008081,0.000000,0.0,0.0,0.050505,0.000000,0.0,0.0,0.0,0.0
2,113588,58185,6.500040e+13,99,5,3637,1725,0,0904,0,...,0.017857,0.000000,0.0,0.0,0.029762,0.000000,0.0,0.0,0.0,0.0
3,113584,58186,6.500040e+13,95,6,3633,1726,0,0904,0,...,0.021296,0.001852,0.0,0.0,0.039815,0.002778,0.0,0.0,0.0,0.0
4,113577,58188,6.500040e+13,88,8,3626,1728,0,0904,0,...,0.011905,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,114309,58671,2.300280e+13,699,0,4358,0,1,1005,0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
469,114313,58672,2.300280e+13,703,0,4362,0,1,1005,0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
470,114322,58673,2.300280e+13,712,0,4371,0,1,1005,0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
471,114388,58674,2.300280e+13,778,0,4437,0,1,1005,0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0


In [23]:
# Count the amount of slc columns
column_range = []
for column in slc.columns:
    # Check if the column header consists of either 3 or 4 digits
    if column.isdigit() and (len(column) == 3 or len(column) == 4):
        column_range.append(column)

In [24]:
len(column_range)

117

In [25]:
# Merge the selected columns from merged_data with data_to_write
final_geodata = pd.merge(geodata, slc[['seg_nhm'] + list(column_range)],  on='seg_nhm')
final_geodata

Unnamed: 0,seg_nhm,ds_seg_nhm,Shape_Area,lon,lat,mean,seg_slope,Shape_Leng,0101,0102,...,2112,2113,2116,2210,2212,2213,2214,2216,2310,2313
0,58183,0,3.652090e+07,-112.919857,49.598073,921.890625,0.00242,8135.825626867998,0.0,0.0,...,0.020893,0.005698,0.0,0.0,0.015195,0.011396,0.0,0.0,0.0,0.0
1,58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,1e-05,4020.941946660042,0.0,0.0,...,0.008081,0.000000,0.0,0.0,0.050505,0.000000,0.0,0.0,0.0,0.0
2,58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934917989161,0.0,0.0,...,0.017857,0.000000,0.0,0.0,0.029762,0.000000,0.0,0.0,0.0,0.0
3,58186,58185,3.803350e+07,-112.879443,49.530760,928.137573,0.00162,26483.13594952753,0.0,0.0,...,0.021296,0.001852,0.0,0.0,0.039815,0.002778,0.0,0.0,0.0,0.0
4,58188,58186,2.801800e+06,-112.934132,49.503520,956.165771,0.00662,6040.944351902939,0.0,0.0,...,0.011905,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,58671,0,3.860121e+08,-108.661660,49.257402,922.959106,0,202460.006602,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
469,58672,0,9.586554e+08,-108.892603,49.169295,948.367615,0,295200.002203,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
470,58673,0,2.876408e+08,-109.130976,49.315396,988.979126,0,188640.001102,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
471,58674,0,2.727878e+08,-110.181345,48.645243,851.569946,0,105620.001701,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0


In [26]:
# set downstream values of 0 to -9999
final_geodata.loc[final_geodata['ds_seg_nhm'] == 0, 'ds_seg_nhm'] = -9999

In [27]:
final_geodata

Unnamed: 0,seg_nhm,ds_seg_nhm,Shape_Area,lon,lat,mean,seg_slope,Shape_Leng,0101,0102,...,2112,2113,2116,2210,2212,2213,2214,2216,2310,2313
0,58183,0,3.652090e+07,-112.919857,49.598073,921.890625,0.00242,8135.825626867998,0.0,0.0,...,0.020893,0.005698,0.0,0.0,0.015195,0.011396,0.0,0.0,0.0,0.0
1,58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,1e-05,4020.941946660042,0.0,0.0,...,0.008081,0.000000,0.0,0.0,0.050505,0.000000,0.0,0.0,0.0,0.0
2,58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934917989161,0.0,0.0,...,0.017857,0.000000,0.0,0.0,0.029762,0.000000,0.0,0.0,0.0,0.0
3,58186,58185,3.803350e+07,-112.879443,49.530760,928.137573,0.00162,26483.13594952753,0.0,0.0,...,0.021296,0.001852,0.0,0.0,0.039815,0.002778,0.0,0.0,0.0,0.0
4,58188,58186,2.801800e+06,-112.934132,49.503520,956.165771,0.00662,6040.944351902939,0.0,0.0,...,0.011905,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,58671,-9999,3.860121e+08,-108.661660,49.257402,922.959106,0,202460.006602,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
469,58672,-9999,9.586554e+08,-108.892603,49.169295,948.367615,0,295200.002203,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
470,58673,-9999,2.876408e+08,-109.130976,49.315396,988.979126,0,188640.001102,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
471,58674,-9999,2.727878e+08,-110.181345,48.645243,851.569946,0,105620.001701,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0


In [28]:
#Rename the first columns to HYPE GeoData format
final_geodata.rename(columns={'seg_nhm':'subid' , 'ds_seg_nhm':'maindown', 'Shape_Area':'area', 'lon': 'longitude','lat': 'latitude', 'mean':'elev_mean', 'seg_slope':'slope_mean', 'Shape_Leng':'rivlen'}, inplace=True)   

Rename columns

In [29]:
# Generate the new column names
new_column_names = ['SLC_' + str(i + 1) for i in range(len(column_range))]

In [30]:
# Create a dictionary mapping old column names to new column names
column_name_mapping = {old_name: new_name for old_name, new_name in zip(column_range, new_column_names)}

# Rename the columns using the mapping
final_geodata.rename(columns=column_name_mapping, inplace=True)

In [31]:
final_geodata

Unnamed: 0,subid,maindown,area,longitude,latitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,...,SLC_108,SLC_109,SLC_110,SLC_111,SLC_112,SLC_113,SLC_114,SLC_115,SLC_116,SLC_117
0,58183,0,3.652090e+07,-112.919857,49.598073,921.890625,0.00242,8135.825626867998,0.0,0.0,...,0.020893,0.005698,0.0,0.0,0.015195,0.011396,0.0,0.0,0.0,0.0
1,58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,1e-05,4020.941946660042,0.0,0.0,...,0.008081,0.000000,0.0,0.0,0.050505,0.000000,0.0,0.0,0.0,0.0
2,58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934917989161,0.0,0.0,...,0.017857,0.000000,0.0,0.0,0.029762,0.000000,0.0,0.0,0.0,0.0
3,58186,58185,3.803350e+07,-112.879443,49.530760,928.137573,0.00162,26483.13594952753,0.0,0.0,...,0.021296,0.001852,0.0,0.0,0.039815,0.002778,0.0,0.0,0.0,0.0
4,58188,58186,2.801800e+06,-112.934132,49.503520,956.165771,0.00662,6040.944351902939,0.0,0.0,...,0.011905,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,58671,-9999,3.860121e+08,-108.661660,49.257402,922.959106,0,202460.006602,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
469,58672,-9999,9.586554e+08,-108.892603,49.169295,948.367615,0,295200.002203,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
470,58673,-9999,2.876408e+08,-109.130976,49.315396,988.979126,0,188640.001102,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
471,58674,-9999,2.727878e+08,-110.181345,48.645243,851.569946,0,105620.001701,0.0,0.0,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0


Sort geodata

In [32]:
# Convert string values to integers
order = list(map(int, order))

# Set 'seg_nhm' column as the index
final_geodata.set_index('subid', inplace=True)

# Convert index values to integers
final_geodata.index = final_geodata.index.astype(int)

# Reindex the DataFrame based on the order specified in the 'order' list
final_geodata_sorted = final_geodata.reindex(order)

In [33]:
final_geodata_sorted

Unnamed: 0_level_0,maindown,area,longitude,latitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,SLC_3,...,SLC_108,SLC_109,SLC_110,SLC_111,SLC_112,SLC_113,SLC_114,SLC_115,SLC_116,SLC_117
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
58675,-9999,2.730521e+08,-107.976305,48.849060,860.715027,0,129339.991902,0.0,0.0,0.00013,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58674,-9999,2.727878e+08,-110.181345,48.645243,851.569946,0,105620.001701,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58673,-9999,2.876408e+08,-109.130976,49.315396,988.979126,0,188640.001102,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58672,-9999,9.586554e+08,-108.892603,49.169295,948.367615,0,295200.002203,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58671,-9999,3.860121e+08,-108.661660,49.257402,922.959106,0,202460.006602,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934917989161,0.0,0.0,0.00000,...,0.017857,0.000000,0.000000,0.000000,0.029762,0.000000,0.0,0.0,0.0,0.0
58231,58228,1.663663e+08,-113.103429,49.481403,1044.178101,0.00285,37256.33749757955,0.0,0.0,0.00000,...,0.012413,0.001052,0.002104,0.002104,0.003577,0.000000,0.0,0.0,0.0,0.0
58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,1e-05,4020.941946660042,0.0,0.0,0.00000,...,0.008081,0.000000,0.000000,0.000000,0.050505,0.000000,0.0,0.0,0.0,0.0
58228,58183,2.611930e+07,-112.932045,49.556180,947.332703,0.00483,11789.84247158781,0.0,0.0,0.00000,...,0.009358,0.001337,0.000000,0.000000,0.022727,0.002674,0.0,0.0,0.0,0.0


In [34]:
final_geodata_sorted.to_csv('../model/add_HDS_GeoData.txt', sep='\t', index=True)