In [1]:
import pandas as pd
import numpy as np
import os
import sys

# Set paths
notebook_dir = os.getcwd()
src_path = os.path.abspath(os.path.join(notebook_dir, '..', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from utils import add_src_to_path
add_src_to_path()
from model import train_and_apply_with_tuning
from model import retrain_model_with_best_params
from data_split import split_time_series_by_river

# === File paths ===
notebook_dir = os.getcwd()
base_path = os.path.abspath(os.path.join(notebook_dir, '..'))
models_path = os.path.abspath(os.path.join(notebook_dir, '..', "results", "best_models_per_river.csv"))
training_data_path = os.path.abspath(os.path.join(notebook_dir, '..', "data", "Combined_FeatureSet_For_Model.csv"))
prediction_data_path = os.path.abspath(os.path.join(notebook_dir, '..', "data", "Samples_2024.csv"))

# === Load data ===
best_models_df = pd.read_csv(models_path)
df_entiredata = pd.read_csv(training_data_path)
df_prediction = pd.read_csv(prediction_data_path)


# === Result storage ===
results = []
retro_dfs = []  # to collect retrospective river-year prediction table

# === Loop through each river-model pair ===
for _, row in best_models_df.iterrows():
    river_name = row["River_Name"].replace("River_", "")
    river_system = row["System"]
    model_type = row["Model"].split(" - ")[0]
    features = row["Feature_Names"].split(", ")
    top_k_features = row["TopK_Features"]

    print(river_name)

    df_prediction["River_Name"] = df_prediction["River"]
    df_prediction_encoded = pd.get_dummies(df_prediction, columns=["River"], prefix="River")
    df_prediction_encoded = df_prediction_encoded[df_prediction_encoded["River_Name"] == river_name]
    combined_df = df_entiredata[df_entiredata["System"] == river_system]

    # Drop features that have missing values in 2024
    columns_to_drop = [
        'mean_temp_JunAug', 'mean_temp_MaySep', 'max_temp_JunAug',
        'mean_discharge_MarMay', 'max_discharge_AprSep'
    ]
    combined_df = combined_df.drop(columns=[col for col in columns_to_drop if col in combined_df.columns])

    if river_system == "Fraser River":
        columns_to_drop = [
            'total_spawners_y_minus_2', 'total_spawners_y_minus_3', 
            'total_spawners_y_minus_2_to_4'
        ]
        combined_df = combined_df.drop(columns=[col for col in columns_to_drop if col in combined_df.columns])

    # Setup special rules for river Ugashik
    if river_name == "Ugashik":
        apply_strict_dropna = False # Delets the spawner data for Bristol Bay
    else:
        apply_strict_dropna = True

    combined_df = combined_df[combined_df["System"] == river_system]

    # Feature engineering  
    if row["Additional_Features_Used"]:
        features_to_lag = [
            'Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_0.4', 'AgeClass_0.5',
            'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5',
            'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4',
            'AgeClass_3.1', 'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4',
            'Total_Returns_NextYear', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar', 'oni_mean_DecFeb',
            'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep'
        ]
        for feat in features_to_lag:
            for lag in [1, 2, 3, 4, 5]:
                combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)

    if (apply_strict_dropna and river_system == "Bristal Bay") or river_system == "Columbia River":
        # If Columbia River or Bristol Bay, only keep lines with spawner data
        combined_df = combined_df.dropna(subset=['total_spawners_y_minus_2_to_4'])
    if row["Additional_Features_Used"]:
        combined_df = combined_df.dropna(subset=['AgeClass_0.2_Yminus5'])

    # If system is Bristol Bay, consider only years after 1995
    if river_system == "Bristol Bay":
        combined_df = combined_df[combined_df["Year"] >= 1995]

    # Drop features with any NaN values
    combined_df = combined_df.dropna(axis=1, how='any').dropna()
    df_prediction_encoded = df_prediction_encoded.dropna(axis=1, how='any').dropna()

    # Step 1: Tune model based on 80:20 split
    # Split for tuning
    train_df, test_df = split_time_series_by_river(
        combined_df,
        time_column="Year",
        group_columns=["System", "River"],
        test_fraction=0.2,
        gap_years=0
    )

    train_df["River_Name"] = train_df["River"]
    test_df["River_Name"] = test_df["River"]

    train_df_encoded = pd.get_dummies(train_df, columns=["River"], prefix="River")
    test_df_encoded = pd.get_dummies(test_df, columns=["River"], prefix="River")

    # Tune model
    result_dict = train_and_apply_with_tuning(model=model_type, train_df=train_df_encoded, test_df=test_df_encoded, topk_feat=top_k_features)
    best_params = result_dict["Best_Params"]
    selected_features = result_dict["Selected_Feature_Names"]


    # Step 2: Retrain on full training data with best params
    combined_df['River_Name'] = combined_df['River']
    combined_df_encoded = pd.get_dummies(combined_df, columns=["River"], prefix="River")

    predictions, retro_df_2020_2024 = retrain_model_with_best_params(
        model_type=model_type,
        best_params=best_params,
        full_train_df=combined_df_encoded,
        prediction_df=df_prediction_encoded,
        selected_features=selected_features,
        river_name = river_name
    )

    for i, pred in enumerate(predictions):
        results.append({
            "River_Name": river_name,
            "Model": model_type,
            "Prediction": pred
        })

    # Collect retrospective predictions (2019–2024)
    if not retro_df_2020_2024.empty:
        retro_df_2020_2024["River_Name"] = river_name
        retro_dfs.append(retro_df_2020_2024)

# === Save output ===
output_df = pd.DataFrame(results)
output_path = os.path.join(base_path, "results", "predictions_2024.csv")
output_df.to_csv(output_path, index=False)


all_retro_dfs = pd.concat(retro_dfs, ignore_index=True)
retro_output_path = os.path.join(base_path, "results", "retro_predictions_2019_2024.csv")
all_retro_dfs.to_csv(retro_output_path, index=False)


Alagnak
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.47
LR MSE: 10988541002089.13
LR MAPE: 32.59
Egegik


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.33
LR MSE: 14041324200705.45
LR MAPE: 39.73
Igushik
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
No parameter tuning for this model.
PR R2: -8.25
PR MSE: 192841633948745.72
PR MAPE: 76.79
Kvichak
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
No parameter tuning for this model.
PR R2: -8.25
PR MSE: 192841633948745.72
PR MAPE: 76.79
Naknek
Sel

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Nushagak
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 2}
GBRT R2: 0.46
GBRT MSE: 11256462567514.42
GBRT MAPE: 40.35
Ugashik
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.47
LR MSE: 10988541002089.13
LR MAPE: 32.59
Bonneville Lock & Dam
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 's

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'Total_Returns_Yminus3',
       'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3', 'AgeClass_1.1_Yminus4',
       'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2', 'AgeClass_2.2_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.51
LR MSE: 203668908566.62
LR MAPE: 913.48
Late Stuart


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.48
LR MSE: 215446418317.49
LR MAPE: 174.46
Quesnel
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 354220998416.79
XGB MAPE: 382.47
Raft


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'Total_Returns_Yminus3',
       'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3', 'AgeClass_1.1_Yminus4',
       'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1', 'AgeClass_1.3_Yminus2',
       'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1', 'AgeClass_2.2_Yminus2',
       'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.44
GBRT MSE: 233007942018.01
GBRT MAPE: 477.22
Stellako


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.48
LR MSE: 215446418317.49
LR MAPE: 174.46
Wood


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.50
RF MSE: 10391153756060.88
RF MAPE: 35.93
