In [3]:
## Tool developed by Satish Bastola, UNO based on the recently published dataset that analyzed 211 river basins from the Gulf Sate (Lousiana..) peak flod analysis

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from scipy.stats import norm
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
from scipy.stats import genextreme as gev


# Load dataset
df = pd.read_csv("LApkflows.csv")

def estimate_flood_magnitude_ema(T, EMA_mean, EMA_std, EMA_skew):
    # Return periods and exceedance probabilities
    return_periods = T #np.array([2, 5, 10, 25, 50, 100, 200, 500])
    exceedance_probs = 1 / return_periods
    # Compute z-scores (standard normal deviates)
    z_scores = norm.ppf(1 - exceedance_probs)
    # Compute frequency factor K using Cornish-Fisher expansion
    G = EMA_skew
    K = (z_scores +(1/6)*(z_scores**2 - 1)*G +(1/24)*(z_scores**3 - 3*z_scores)*(G**2)-(1/36)*(2*z_scores**3 - 5*z_scores)*(G**3))
    # Compute log(Q_T)
    log_QT = EMA_mean + K * EMA_std
    # Convert to actual discharge values
    QT = 10 ** log_QT

    # Create DataFrame
#    flood_data = pd.DataFrame({"Return Period (years)": return_periods,"Flood Magnitude (Q_T)": QT})
    return QT

# Define features and response columns
features = ['LAT_CENT','LONG_CENT','DRNAREA','ELEV','BSHAPELFP','CSL1085LFP','PRECPRIS00']
response_columns = ['EMAmean','EMAstDev','EMAskew']

# Convert to numeric and drop rows with missing values
df[features + response_columns] = df[features + response_columns].apply(pd.to_numeric, errors='coerce')
df_cleaned = df.dropna(subset=features + response_columns)

# Split data
X = df_cleaned[features]
Y = df_cleaned[response_columns]
X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.4, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

# Define models
models = {
    'Linear Regression': MultiOutputRegressor(LinearRegression()),
    'Random Forest': MultiOutputRegressor(RandomForestRegressor(random_state=42)),
    'Gradient Boosting': MultiOutputRegressor(GradientBoostingRegressor(random_state=42))
}

# Store results
results = []

# Train and evaluate each model
for model_name, model in models.items():
    model.fit(X_train, Y_train)

    for split_name, X_split, Y_actual in zip(['Training', 'Validation', 'Testing'], [X_train, X_val, X_test], [Y_train, Y_val, Y_test]):
        Y_pred = model.predict(X_split)

        for i in range(len(Y_actual)):
            results.append({
                'Model': model_name,
                'Split': split_name,
                'Sample Index': Y_actual.index[i],
                'Actual_EMAmean': Y_actual.iloc[i, 0],
                'Actual_EMAstDev': Y_actual.iloc[i, 1],
                'Actual_EMAskew': Y_actual.iloc[i, 2],
                'Predicted_EMAmean': Y_pred[i][0],
                'Predicted_EMAstDev': Y_pred[i][1],
                'Predicted_EMAskew': Y_pred[i][2],
                'Actual_FF2':estimate_flood_magnitude_ema(2,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF2':estimate_flood_magnitude_ema(2,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF5':estimate_flood_magnitude_ema(5,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF5':estimate_flood_magnitude_ema(5,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF10':estimate_flood_magnitude_ema(10,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF10':estimate_flood_magnitude_ema(10,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF25':estimate_flood_magnitude_ema(25,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF25':estimate_flood_magnitude_ema(25,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF50':estimate_flood_magnitude_ema(50,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF50':estimate_flood_magnitude_ema(50,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF100':estimate_flood_magnitude_ema(100,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF100':estimate_flood_magnitude_ema(100,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF200':estimate_flood_magnitude_ema(200,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF200':estimate_flood_magnitude_ema(200,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                'Actual_FF500':estimate_flood_magnitude_ema(500,Y_actual.iloc[i, 0], Y_actual.iloc[i, 1], Y_actual.iloc[i, 2]),
                'Predicted_FF500':estimate_flood_magnitude_ema(500,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2])              
                
                
            })

# Convert to DataFrame
results_df = pd.DataFrame(results)



# Loop over each row in the results DataFrame
# for index, row in results_df.iterrows():
#     print(f"Model: {row['Model']}")
#     print(f"Split: {row['Split']}")
#     print(f"Sample Index: {row['Sample Index']}")
#     print(f"Actual EMAmean: {row['Actual_EMAmean']:.3f}")
#     print(f"Actual EMAstDev: {row['Actual_EMAstDev']:.3f}")
#     print(f"Actual EMAskew: {row['Actual_EMAskew']:.3f}")
#     print(f"Predicted EMAmean: {row['Predicted_EMAmean']:.3f}")
#     print(f"Predicted EMAstDev: {row['Predicted_EMAstDev']:.3f}")
#     print(f"Predicted EMAskew: {row['Predicted_EMAskew']:.3f}")
#     print("-" * 50)
#     estimate_flood_magnitude_ema(row['Actual_EMAean'], row['Actual_EMAstDev'], row['EMAskew'])


# Display the first few rows
#print(results_df.head())

# Optional: Save to CSV
results_df.to_csv("all_predictions.csv", index=False)

# User supply input
# features = ['LAT_CENT','LONG_CENT','DRNAREA','ELEV','BSHAPELFP','CSL1085LFP','PRECPRIS00']
# response_columns = ['EMAmean','EMAstDev','EMAskew']


def estimate_flood_magnitude_ema(T,EMA_mean,EMA_std,EMA_skew):
    # Return periods and exceedance probabilities
    #return_periods = np.array([2, 5, 10, 25, 50, 100, 200, 500])
    exceedance_probs = 1 / T
    # Compute z-scores (standard normal deviates)
    z_scores = norm.ppf(1 - exceedance_probs)
    # Compute frequency factor K using Cornish-Fisher expansion
    G = EMA_skew
    K = (z_scores +(1/6)*(z_scores**2 - 1)*G +(1/24)*(z_scores**3 - 3*z_scores)*(G**2)-(1/36)*(2*z_scores**3 - 5*z_scores)*(G**3))
    # Compute log(Q_T)
    log_QT = EMA_mean + K * EMA_std
    # Convert to actual discharge values
    QT = 10 ** log_QT

    # Create DataFrame
    #flood_data = pd.DataFrame({"Return Period (years)": return_periods,"Flood Magnitude (Q_T)": QT})
    return QT

results1 = []
zx1=input("type the basin centroid LAT_CENT e.g., 31.193")
zx2=input("type the basin centroid LONG_CENT e.g., -89.693")
zx3=input("type the basin area in DRNAREAsq mile e.g., 0.97")
zx4=input("type the basin elev ELEV feet e.g., 353")
zx5=input("type the basin shapefactor BSHAPELFP e.g., 2.33")
zx6=input("type the basin CSL1085LFP feet/mile e.g., 68.3")
zx7=input("type the basin PRECIPS00 e.g., inches/yr 44.41")

# features = ['LAT_CENT','LONG_CENT','DRNAREA','ELEV','BSHAPELFP','CSL1085LFP','PRECPRIS00']

X_sample = pd.DataFrame([features], columns=['LAT_CENT','LONG_CENT','DRNAREA','ELEV','BSHAPELFP','CSL1085LFP','PRECPRIS00'])

#features = [31.193, -89.963, 0.97, 353, 2.33, 68.3, 44.41]

features = [zx1, zx2, zx3, zx4, zx5, zx6, zx7]
          
          
Xx = features
# Select 10 random samples for validation
X_sample[:] = Xx

# sample_indices = np.random.choice(X.index, size=1, replace=False)
# X_sample1 = X.loc[sample_indices]


#print(Xx)



for model_name, model in models.items():
    #print(model)
    Y_pred = model.predict(X_sample)
    for i in range(len(X_sample)):
        results1.append({'Predicted_FF2':estimate_flood_magnitude_ema(2,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF5':estimate_flood_magnitude_ema(5,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF10':estimate_flood_magnitude_ema(10,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF25':estimate_flood_magnitude_ema(25,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF50':estimate_flood_magnitude_ema(50,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF100':estimate_flood_magnitude_ema(100,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF200':estimate_flood_magnitude_ema(200,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2]),
                         'Predicted_FF500':estimate_flood_magnitude_ema(500,Y_pred[i][0], Y_pred[i][1], Y_pred[i][2])})
       

results_df1 = pd.DataFrame(results1)   
    
print(results_df1[::10])

type the basin centroid LAT_CENT e.g., 31.19331.193
type the basin centroid LONG_CENT e.g., -89.693-89.693
type the basin area in DRNAREAsq mile e.g., 0.970.97
type the basin elev ELEV feet e.g., 353353
type the basin shapefactor BSHAPELFP e.g., 2.332.33
type the basin CSL1085LFP feet/mile e.g., 68.368.3
type the basin PRECIPS00 e.g., inches/yr 44.4144.41
   Predicted_FF2  Predicted_FF5  Predicted_FF10  Predicted_FF25  \
0     104.457954     164.306827      213.061739       286.40509   

   Predicted_FF50  Predicted_FF100  Predicted_FF200  Predicted_FF500  
0      350.468774        423.40902       506.626985       635.021603  
