In [1]:
import hydrant.gis.gis as gis
import pandas as pd
from typing import Dict, Union
from itertools import product


In [2]:
## Illustrative examples

In [3]:
# Sample DataFrames
data1 = {
    'ID1': [1, 2, 3],
    'A_1': [0.3, 0, 0],
    'A_2': [0, 0, 0],
    'A_3': [0, 0, 0],
    'A_4': [0, 0, 0],
    'A_5': [0.2, 0, 1],
    'B': [0, 0, 0],
}

data2 = {
    'ID2': [1, 2, 3],
    'B1': [1, 1, 1],
    'B2': [1, 1, 1]
}

data3 = {
    'ID3': [1, 2, 3],
    'C1': [1, 1, 1],
    'C2': [1, 1, 1],
    'D':[1,1,1]
}

# Create DataFrames
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
df3 = pd.DataFrame(data3)

# Usage example:
result, report    = gis.intersect_df(df1, df2, df3, 
                                     df_mappings={'df1': {'id': 'ID1', 'prefix':'A' , 'data_name':'LULC'}, 
                                                  'df2': {'id': 'ID2', 'prefix':'B' , 'data_name':'S'}, 
                                                  'df3': {'id': 'ID3', 'prefix':'C' , 'data_name':'ASP'}},
                                     remove_zero_combinations = True)

print(result)
print(report)

The indexes of all DataFrames are exactly the same with the same order.
     (LULC_1, S1, ASP1)  (LULC_1, S1, ASP2)  (LULC_1, S2, ASP1)  \
ID1                                                               
1                   0.3                 0.3                 0.3   
2                   0.0                 0.0                 0.0   
3                   0.0                 0.0                 0.0   

     (LULC_1, S2, ASP2)  (LULC_5, S1, ASP1)  (LULC_5, S1, ASP2)  \
ID1                                                               
1                   0.3                 0.2                 0.2   
2                   0.0                 0.0                 0.0   
3                   0.0                 1.0                 1.0   

     (LULC_5, S2, ASP1)  (LULC_5, S2, ASP2)  
ID1                                          
1                   0.2                 0.2  
2                   0.0                 0.0  
3                   1.0                 1.0  
     Combinations       B 

In [4]:
# Your DataFrame
data = {
    'ID': [10,20,30],
    'Frac_1': [0.0, 0.2, 0.0],
    'Frac_2': [0.5, 0.5, 0.0],
    'Frac_3': [0.6, 0.3, 0.0],
    'Frac_4': [0.0, 0.0, 0.0],
}

df = pd.DataFrame(data)

df_modified = gis.manipulating_fractions(df,
                                         df_mapping={'id':'ID', 'prefix': 'Frac_'},
                                         action = 'normalize',
                                         minimum_value = 0.20)

print(df)
print(df_modified)

    Frac_1  Frac_2  Frac_3  Frac_4
ID                                
10     0.0     0.5     0.6     0.0
20     0.2     0.5     0.3     0.0
30     0.0     0.0     0.0     0.0
    Frac_1    Frac_2    Frac_3  Frac_4
ID                                    
10     0.0  0.454545  0.545455     0.0
20     0.2  0.500000  0.300000     0.0
30     0.0  1.000000  0.000000     0.0


In [5]:
## Real case

In [6]:
land_cover = pd.read_csv('./data/gis/West_stats_NA_NALCMS_landcover_2020_30m.csv')
soil_type = pd.read_csv('./data/gis/West_stats_soil_classes1.csv')

# # This part can be removed if fraction of each land cover is given
# fractions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
# # Create new columns based on majority fraction
# for frac in fractions:
#     col_name = f"frac_{frac}"
#     soil_type[col_name] = soil_type['majority'].apply(lambda x: 1 if x == frac else 0.0)
    
# soil_type.to_csv('./data/gis/West_stats_soil_classes1.csv')

    
soil_type
# # Usage example:
# result, report_df = gis.intersect_df(land_cover, soil_type, 
#                                      df_mappings={'df1': {'id': 'COMID', 'prefix':'Frac_' , 'data_name':'LULC'}, 
#                                                   'df2': {'id': 'COMID', 'prefix':'Frac_' , 'data_name':'Soil'}},
#                                      remove_zero_combinations = True)

# result, report_df

Unnamed: 0.1,Unnamed: 0,COMID,majority,frac_0,frac_1,frac_2,frac_3,frac_4,frac_5,frac_6,frac_7,frac_8,frac_9,frac_10,frac_11,frac_12
0,0,71022153,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,71022160,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,71022164,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,71022165,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,71022168,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
448,448,71039048,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
449,449,71039109,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
450,450,71039121,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
451,451,71039250,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
land_cover = pd.read_csv('./data/gis/West_stats_NA_NALCMS_landcover_2020_30m.csv')
soil_type = pd.read_csv('./data/gis/West_stats_soil_classes1.csv')

# drop the 0 soil type which is mostly to represent water
soil_type = soil_type.drop(columns = 'frac_0')
# get the majority of the soil type for each subbasin
soil_type   = gis.manipulating_fractions(soil_type,
                                         df_mapping={'prefix': 'frac_'},
                                         action = 'majority')
# smooth land cover and remove below 5% land cover and renormalize
land_cover = gis.manipulating_fractions(land_cover,
                                         df_mapping={'prefix': 'frac_'},
                                         action = 'normalize',
                                         minimum_value = 0.05)

# call the function
result,report         =     intersect_df(soil_type, land_cover,
                                     df_mappings={'df1': {'id': 'COMID', 'prefix':'frac_' , 'data_name':'soil'}, 
                                                  'df2': {'id': 'COMID', 'prefix':'frac_' , 'data_name':'LULC'}},
                                     remove_zero_combinations = True)

print(result) 
print(report)

frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3
frac_3