In [1]:
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np

### Read Data

In [2]:
geodata = pd.read_csv('../model/add_HDS_GeoData.txt', header=0, index_col=0, sep='\t')

In [3]:
hds_depths= gpd.read_file('../geospacial/depressions/HYPE_depression_depths.gpkg')

In [4]:
# define ilake slcs for the Milk
milk_ilake= 'SLC_62'

### Remove HDS from St. Mary

In [5]:
    # Creating a DiGraph out of `df` object
riv_graph = nx.from_pandas_edgelist(geodata.reset_index(), source='subid', target='maindown', create_using=nx.DiGraph)

In [6]:
# Find St. Mary Segments
stmary = [58183]
stmary.extend(nx.ancestors(riv_graph, 58183))

In [7]:
len(stmary)

40

In [8]:
# Remove all rows containing info on St Mary so only the Milk is 
hdsdepths_filtered = hds_depths[~hds_depths['seg_nhm'].isin(stmary)]

### Format GeoData

In [9]:
# Set 'seg_nhm' as the index in hdsdepths_filtered
hdsdepths_filtered = hdsdepths_filtered.set_index('seg_nhm')


In [10]:
# Merge based on the index
merged_geodata = geodata.merge(hdsdepths_filtered[['_count', '_mean']], left_index=True, right_index=True, how='left')

In [11]:
# Replace NaN values with 0 in the new columns
merged_geodata['_count'].fillna(0, inplace=True)
merged_geodata['_mean'].fillna(0, inplace=True)

In [12]:
# Rename '_mean' to 'hds_depth'
merged_geodata.rename(columns={'_mean': 'hds_depth'}, inplace=True)

In [13]:
# find index of first and last SLC
filtered_columns = merged_geodata.filter(like='SLC').columns

if len(filtered_columns) > 0:
    first_slc_index = merged_geodata.columns.get_loc(filtered_columns[0])
    last_slc_index = merged_geodata.columns.get_loc(filtered_columns[-1])
    print("First column index with 'SLC':", first_slc_index)
    print("Last column index with 'SLC':", last_slc_index)
else:
    print("No columns with 'SLC' in the name found.")

First column index with 'SLC': 7
Last column index with 'SLC': 123


In [14]:
# Convert count to area
merged_geodata['_count'] = merged_geodata['_count'] * (30 * 30)

In [15]:
# Add a new column 'hds_frac' which is equal to count/area
merged_geodata['hds_frac'] = merged_geodata['_count'] / merged_geodata['area']

In [16]:
merged_geodata[milk_ilake] = merged_geodata['hds_frac']

### Adjust Milk SLC fractions based on weights

In [17]:
# Iterate through each row
for index, row in merged_geodata.iterrows():
    
        # Calculate the sum of slc columns for the current row
        sum_of_columns_row = row.iloc[first_slc_index:last_slc_index+1].sum()

        # Subtract the value in the milk_ilake column for the current row
        result_row = sum_of_columns_row - row[milk_ilake]

        # Calculate the scaling factor based on the formula
        scaling_factor = 1 - row[milk_ilake]

        # Iterate through SLC_1 to SLC_89 columns and update values
        for col in merged_geodata.columns:
            if col.startswith('SLC_') and col != milk_ilake:
                merged_geodata.loc[index, col] = (row[col] / result_row) * scaling_factor


### Check that SLCs still sum to 1

In [18]:
# Check sums of rows
for index, row in merged_geodata.iterrows():
    # Calculate the sum of values in the specified columns
    row_sum = row.iloc[first_slc_index:(last_slc_index + 1)].sum()

    # Check if the sum is approximately equal to 1
    if not np.isclose(row_sum, 1, rtol=1e-6):
        print(f"Warning: Row {index} does not sum to 1 (Sum: {row_sum})")

In [19]:
    # Insert 'hds_depth' column just before the first 'SLC' column
    columns = list(merged_geodata.columns)
    columns.insert(first_slc_index, columns.pop(columns.index('hds_depth')))
    merged_geodata = merged_geodata[columns]

In [20]:
# Delete the 'hds_frac' column
merged_geodata.drop(columns=['hds_frac'], inplace=True)
# Delete the '_count' column
merged_geodata.drop(columns=['_count'], inplace=True)

In [21]:
merged_geodata

Unnamed: 0_level_0,maindown,area,longitude,latitude,elev_mean,slope_mean,rivlen,hds_depth,SLC_1,SLC_2,...,SLC_108,SLC_109,SLC_110,SLC_111,SLC_112,SLC_113,SLC_114,SLC_115,SLC_116,SLC_117
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
58675,-9999,2.730521e+08,-107.976305,48.849060,860.715027,0.00000,129339.991902,2.232598,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58674,-9999,2.727878e+08,-110.181345,48.645243,851.569946,0.00000,105620.001701,0.509844,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58673,-9999,2.876408e+08,-109.130976,49.315396,988.979126,0.00000,188640.001102,2.674205,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58672,-9999,9.586554e+08,-108.892603,49.169295,948.367615,0.00000,295200.002203,1.646900,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
58671,-9999,3.860121e+08,-108.661660,49.257402,922.959106,0.00000,202460.006602,0.782555,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58185,58184,5.667200e+06,-112.839964,49.570476,909.549744,0.00163,1671.934918,0.000000,0.0,0.0,...,0.017857,0.000000,0.000000,0.000000,0.029762,0.000000,0.0,0.0,0.0,0.0
58231,58228,1.663663e+08,-113.103429,49.481403,1044.178101,0.00285,37256.337498,0.000000,0.0,0.0,...,0.012413,0.001052,0.002104,0.002104,0.003577,0.000000,0.0,0.0,0.0,0.0
58184,58183,1.750700e+07,-112.859951,49.577578,924.506165,0.00001,4020.941947,0.000000,0.0,0.0,...,0.008081,0.000000,0.000000,0.000000,0.050505,0.000000,0.0,0.0,0.0,0.0
58228,58183,2.611930e+07,-112.932045,49.556180,947.332703,0.00483,11789.842472,0.000000,0.0,0.0,...,0.009358,0.001337,0.000000,0.000000,0.022727,0.002674,0.0,0.0,0.0,0.0


In [22]:
# Save the GeoDataFrame as a tab-separated text file
merged_geodata.to_csv('../model/GeoData.txt', sep='\t', index=True)