Connected to Python 3.10.11

In [2]:
import datetime

now = datetime.datetime.now()
print("Current date and time:", now)


Current date and time: 2025-06-30 13:17:51.326162


### Load libraries

In [1]:
import sys
import os
import itertools
import matplotlib.pyplot as plt
import pandas as pd

# Set paths
notebook_dir = os.getcwd()
src_path = r"C:\Users\MuriloFarias\Desktop\NNS-JULIA\PredictSalmonRuns\src"
if src_path not in sys.path:
    sys.path.append(src_path)

from utils import add_src_to_path
add_src_to_path()

from data_split import split_time_series_by_river
from rf_model import train_and_apply_rf_with_tuning
from plot_predictions import plot_predictions_by_river
from plot_predictions import plot_actual_vs_predicted

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error

# Experiment parameters
river_systems = ["Fraser River", "Bristol Bay", "Columbia River"]
top_k_options = [6, 10, 0]  # 0 means use all features
add_feat_options = [True, False]
arima_options = [True, False]

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, 'data', 'Combined_FeatureSet_For_Model.csv')

model_list = ["RF", "GBRT", "XGB", "LR", "PR"]
all_experiments = []

for river_system, top_k_features, add_additional_features, use_arima_on_top in itertools.product(
    river_systems, top_k_options, add_feat_options, arima_options
):
    print(f"\n🧪 Running: {river_system} | TopK={top_k_features} | ExtraFeat={add_additional_features} | ARIMA={use_arima_on_top}")

    combined_df = pd.read_csv(data_path)

    columns_to_drop = [
    'mean_temp_JunAug', 'mean_temp_MaySep', 'max_temp_JunAug',
    'mean_discharge_MarMay', 'max_discharge_AprSep'
]
    combined_df = combined_df.drop(columns=[col for col in columns_to_drop if col in combined_df.columns])
    combined_df = combined_df[combined_df["System"] == river_system]

    if add_additional_features:
        features_to_lag = [
            'Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_0.4', 'AgeClass_0.5',
            'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5',
            'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4',
            'AgeClass_3.1', 'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4',
            'Total_Returns_NextYear', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar', 'oni_mean_DecFeb',
            'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep'
        ]
        for feat in features_to_lag:
            for lag in [1, 2, 3, 4, 5]:
                combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)

    if river_system == "Bristol Bay":
        combined_df = combined_df[combined_df["Year"] >= 1995]

    if river_system in ["Columbia River", "Bristol Bay"] and add_additional_features:
        if 'total_spawners_y_minus_2_to_4' in combined_df.columns:
            combined_df = combined_df.dropna(subset=['total_spawners_y_minus_2_to_4'])
        if 'AgeClass_0.2_Yminus5' in combined_df.columns:
            combined_df = combined_df.dropna(subset=['AgeClass_0.2_Yminus5'])

    combined_df = combined_df.dropna(axis=1, how='any').dropna()

    train_df, test_df = split_time_series_by_river(
        combined_df,
        time_column="Year",
        group_columns=["System", "River"],
        test_fraction=0.2,
        gap_years=0
    )

    train_df["River_Name"] = train_df["River"]
    test_df["River_Name"] = test_df["River"]

    train_df_encoded = pd.get_dummies(train_df, columns=["River"], prefix="River")
    test_df_encoded = pd.get_dummies(test_df, columns=["River"], prefix="River")

    for model_name in model_list:
        try:
            results = train_and_apply_rf_with_tuning(
                model=model_name,
                train_df=train_df_encoded,
                test_df=test_df_encoded,
                topk_feat=top_k_features
            )

            # ✅ Apply ARIMA correction if selected
            if use_arima_on_top:
                residuals = results["Timeline_train"]["Actual"] - results["Timeline_train"]["Predicted"]
                residuals_series = pd.Series(residuals.values, index=results["Timeline_train"]["Year"])

                arima_model = ARIMA(residuals_series, order=(1, 0, 0))
                arima_fit = arima_model.fit()

                residual_forecast = arima_fit.forecast(steps=len(results["Timeline_test"]["Predicted"]))
                hybrid_pred = results["Timeline_test"]["Predicted"] + residual_forecast.values

                results["Timeline_test"]["Predicted"] = hybrid_pred

                # Optionally update metrics
                grouped = results["Timeline_test"].groupby("River_Name")
                metrics_by_river_test = grouped.apply(
                    lambda g: pd.Series({
                        "R2": r2_score(g["Actual"], g["Predicted"]),
                        "MSE": mean_squared_error(g["Actual"], g["Predicted"]),
                        "MAPE": mean_absolute_percentage_error(g["Actual"], g["Predicted"]) * 100
                    })
                ).reset_index()

                results["Metrics_by_River_Test"] = metrics_by_river_test

            # 📊 Prepare final output
            test_df_ = results['Metrics_by_River_Test'].copy()
            train_df_ = results['Metrics_by_River_Train'].copy()

            test_df_ = test_df_.rename(columns={"R2": "R2_Test", "MSE": "MSE_Test", "MAPE": "MAPE_Test"})
            train_df_ = train_df_.rename(columns={"R2": "R2_Train", "MSE": "MSE_Train", "MAPE": "MAPE_Train"})

            merged_df = pd.merge(test_df_, train_df_, on="River_Name", how="outer")
            merged_df.insert(0, "Model", model_name)

            river_system_lookup = pd.concat([train_df_encoded, test_df_encoded])["River_Name"].drop_duplicates()
            river_system_lookup = pd.merge(
                river_system_lookup.to_frame(),
                combined_df[["River", "System"]].drop_duplicates(),
                left_on="River_Name",
                right_on="River",
                how="left"
            ).drop(columns="River")

            merged_df = pd.merge(merged_df, river_system_lookup, on="River_Name", how="left")

            merged_df["Selected_System"] = river_system
            merged_df["TopK_Features"] = top_k_features
            merged_df["Additional_Features_Used"] = add_additional_features
            merged_df["ARIMA_Enabled"] = use_arima_on_top

            feature_names = results.get("Selected_Feature_Names", [])
            merged_df["Num_Features_Used"] = len(feature_names)
            merged_df["Feature_Names"] = ", ".join(feature_names)
            print(merged_df["Feature_Names"])

            all_experiments.append(merged_df)

        except Exception as e:
            print(f"❌ Error with model {model_name}: {e}")

# 📝 Save final results
final_df = pd.concat(all_experiments, ignore_index=True)
output_path = r"C:\Users\MuriloFarias\Desktop\NNS-JULIA\PredictSalmonRuns\murilo_salmon\julia_models\results.csv"
final_df.to_csv(output_path, index=False)

print(f"\n✅ All experiment results saved to:\n{output_path}")


🧪 Running: Fraser River | TopK=6 | ExtraFeat=True | ARIMA=True
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.41
RF MSE: 365025695484.21
RF MAPE: 477.37
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.21
GBRT MSE: 486627008236.27
GBRT MAPE: 442.80
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 354220998416.79
XGB MAPE: 382.47
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.34
LR MSE: 408886514095.51
LR MAPE: 1753.96
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Name

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

No parameter tuning for this model.
PR R2: 0.16
PR MSE: 518184352425.38
PR MAPE: 1269.42
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=6 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['S

Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.41
RF MSE: 365025695484.21
RF MAPE: 477.37
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.21
GBRT MSE: 486627008236.27
GBRT MAPE: 442.80
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 354220998416.79
XGB MAPE: 382.47
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.34
LR MSE: 408886514095.51
LR MAPE: 1753.96
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Name

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.41
RF MSE: 365025695484.21
RF MAPE: 477.37
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.21
GBRT MSE: 486627008236.27
GBRT MAPE: 442.80
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 354220998416.79
XGB MAPE: 382.47
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.34
LR MSE: 408886514095.51
LR MAPE: 1753.96
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Name

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.41
RF MSE: 365025695484.21
RF MAPE: 477.37
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selec

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.21
GBRT MSE: 486627008236.27
GBRT MAPE: 442.80
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 354220998416.79
XGB MAPE: 382.47
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.34
LR MSE: 408886514095.51
LR MAPE: 1753.96
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Riv...
Name: Feature_Name

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  co

Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.49
RF MSE: 316008924972.28
RF MAPE: 558.93
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.23
GBRT MSE: 472923345222.29
GBRT MAPE: 682.68
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: 0.12
XGB MSE: 541215021291.21
XGB MAPE: 608.66
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.32
LR MSE: 420987448442.98
LR MAPE: 1470.70
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age.

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

No parameter tuning for this model.
PR R2: 0.14
PR MSE: 529254599014.91
PR MAPE: 1375.06
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=10 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[f

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.49
RF MSE: 316008924972.28
RF MAPE: 558.93
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.23
GBRT MSE: 472923345222.29
GBRT MAPE: 682.68
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: 0.12
XGB MSE: 541215021291.21
XGB MAPE: 608.66
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.32
LR MSE: 420987448442.98
LR MAPE: 1470.70
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age.

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.49
RF MSE: 316008924972.28
RF MAPE: 558.93
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.23
GBRT MSE: 472923345222.29
GBRT MAPE: 682.68
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: 0.12
XGB MSE: 541215021291.21
XGB MAPE: 608.66
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.32
LR MSE: 420987448442.98
LR MAPE: 1470.70
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age.

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.49
RF MSE: 316008924972.28
RF MAPE: 558.93
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, Ag

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.23
GBRT MSE: 472923345222.29
GBRT MAPE: 682.68
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: 0.12
XGB MSE: 541215021291.21
XGB MAPE: 608.66
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.2', 'AgeClass_2.3', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.32
LR MSE: 420987448442.98
LR MAPE: 1470.70
0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age.

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


0    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
1    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
2    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
3    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
4    Total_Returns, AgeClass_0.2, AgeClass_1.1, Age...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=0 | ExtraFeat=True | ARIMA=True


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.31
RF MSE: 425685571270.28
RF MAPE: 648.27
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, 

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: -0.14
GBRT MSE: 705180141377.25
GBRT MAPE: 513.06
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.04
XGB MSE: 642809615700.83
XGB MAPE: 661.98
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
No parameter tuning for this model.
PR R2: -11.23
PR MSE: 7531212034288.35
PR MAPE: 184

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[f

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.31
RF MSE: 425685571270.28
RF MAPE: 648.27
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, 

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: -0.14
GBRT MSE: 705180141377.25
GBRT MAPE: 513.06
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.04
XGB MSE: 642809615700.83
XGB MAPE: 661.98
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Fraser River | TopK=0 | ExtraFeat=False | ARIMA=True
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'max

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: -0.14
GBRT MSE: 705180141377.25
GBRT MAPE: 513.06
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.04
XGB MSE: 642809615700.83
XGB MAPE: 661.98
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
No parameter tuning for this model.
PR R2: -11.23
PR MSE: 7531212034288.35
PR MAPE: 184

  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.31
RF MSE: 425685571270.28
RF MAPE: 648.27
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, 

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: -0.14
GBRT MSE: 705180141377.25
GBRT MAPE: 513.06
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.04
XGB MSE: 642809615700.83
XGB MAPE: 661.98
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'npgo_mean_DecFeb', 'ao_mean_DecMar',
       'pdo_mean_DecMar', 'pdo_mean_MaySep', 'River_Chilko',
       'River_Late Stuart', 'River_Quesnel', 'River_R

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Bristol Bay | TopK=6 | ExtraFeat=True | ARIMA=True
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.29
XGB MSE: 14877579935305.32
XGB MAPE: 41.67
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.33
LR MSE: 14041324200705.45
LR MAPE: 39.73
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.57
PR MSE: 32628024299379.47
PR MAPE: 48.97
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object

🧪 Running: Bristol Bay | TopK=6 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[f

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.29
XGB MSE: 14877579935305.32
XGB MAPE: 41.67
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.33
LR MSE: 14041324200705.45
LR MAPE: 39.73
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11801802727365.28
RF MAPE: 37.43
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.33
XGB MSE: 13995675498925.39
XGB MAPE: 36.93
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.36
LR MSE: 13325235482348.05
LR MAPE: 35.71
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Ret

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

No parameter tuning for this model.
PR R2: -6.90
PR MSE: 164645367160079.03
PR MAPE: 70.84
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object

🧪 Running: Bristol Bay | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11801802727365.28
RF MAPE: 37.43
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.33
XGB MSE: 13995675498925.39
XGB MAPE: 36.93
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.36
LR MSE: 13325235482348.05
LR MAPE: 35.71
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Ret

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  co

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['To

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.18
GBRT MSE: 17063810749206.27
GBRT MAPE: 47.68
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.35
XGB MSE: 13561079802480.87
XGB MAPE: 40.21
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': 

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.97
PR MSE: 41113430949197.46
PR MAPE: 50.75
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Re

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)


Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.18
GBRT MSE: 17063810749206.27
GBRT MAPE: 47.68
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.35
XGB MSE: 13561079802480.87
XGB MAPE: 40.21
0    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
1    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
2    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
3    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
4    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
5    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
6    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
7    Total_Returns, AgeClass_1.2, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': 

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11904713127055.96
RF MAPE: 37.64
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.38
XGB MSE: 12901471808984.61
XGB MAPE: 34.89
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.40
LR MSE: 12591124782077.07
LR MAPE: 35.42
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Ag

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object

🧪 Running: Bristol Bay | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11904713127055.96
RF MAPE: 37.64
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.38
XGB MSE: 12901471808984.61
XGB MAPE: 34.89
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
1    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
2    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
3    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
4    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
5    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
6    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
7    Total_Returns, AgeClass_1.2, AgeClass_2.1, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.40
LR MSE: 12591124782077.07
LR MAPE: 35.42
0    Total_Returns, AgeClass_1.2, AgeClass_2.1, Ag

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  co

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=171)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10137601457196.06
RF MAPE: 36.07
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7  

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.42
GBRT MSE: 12131406905769.47
GBRT MAPE: 38.37
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
      

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.44
XGB MSE: 11656596052484.40
XGB MAPE: 36.21
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'Ri

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=171)
No parameter tuning for this model.
PR R2: -368.38
PR MSE: 769959227999

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[f

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=171)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10137601457196.06
RF MAPE: 36.07
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7  

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.42
GBRT MSE: 12131406905769.47
GBRT MAPE: 38.37
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
      

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.44
XGB MSE: 11656596052484.40
XGB MAPE: 36.21
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'Ri

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=171)
No parameter tuning for this model.
PR R2: -368.38
PR MSE: 769959227999

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.53
RF MSE: 9898124765061.24
RF MAPE: 35.59
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_N

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.52
GBRT MSE: 10007389630618.96
GBRT MAPE: 38.66
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.32
XGB MSE: 14143272744660.39
XGB MAPE: 37.02
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_m

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
     

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.52
GBRT MSE: 10007389630618.96
GBRT MAPE: 38.66
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.32
XGB MSE: 14143272744660.39
XGB MAPE: 37.02
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_m

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


No parameter tuning for this model.
PR R2: -60226.59
PR MSE: 1255411029586062080.00
PR MAPE: 4837.56
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
1    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
2    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
3    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
4    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
5    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
6    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
7    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=6 | ExtraFeat=True | ARIMA=True


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].

Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.56
RF MSE: 25500230635.73
RF MAPE: 74.16
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.65
GBRT MSE: 20572675540.22
GBRT MAPE: 83.22
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 33270690229.83
XGB MAPE: 107.57
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.68
LR MSE: 18629172816.56
LR MAPE: 52.07
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -23.00
PR MSE: 1403016677325.34
PR MAPE: 732.39
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=6 | ExtraFeat=True | ARIMA=False


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[f

Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.56
RF MSE: 25500230635.73
RF MAPE: 74.16
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.65
GBRT MSE: 20572675540.22
GBRT MAPE: 83.22
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 33270690229.83
XGB MAPE: 107.57
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.68
LR MSE: 18629172816.56
LR MAPE: 52.07
0    AgeClass_1.1, Total_Returns_Yminus1, Total_Ret...
Name: Feature_Names, dtype: object
Selected features:
Index(['AgeClass_1.1', 'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -23.00
PR MSE: 1403016677325.34
PR MAPE: 732.39
0    AgeClass_1.1, Total_Returns_Yminus1, Tota

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.53
RF MSE: 28050072933.34
RF MAPE: 68.28
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.67
GBRT MSE: 19274375121.08
GBRT MAPE: 75.96
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.17
XGB MSE: 48875059950.21
XGB MAPE: 86.78
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.61
LR MSE: 23076030814.49
LR MAPE: 69.17
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')
No parameter tuning for this model.
PR R2: -14.97
PR MSE: 943102316523.88
PR MAPE: 231.78


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.53
RF MSE: 28050072933.34
RF MAPE: 68.28
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.67
GBRT MSE: 19274375121.08
GBRT MAPE: 75.96
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.17
XGB MSE: 48875059950.21
XGB MAPE: 86.78
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.61
LR MSE: 23076030814.49
LR MAPE: 69.17
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'pdo_mean_DecMar'],
      dtype='object')
No parameter tuning for this model.
PR R2: -14.97
PR MSE: 943102316523.88
PR MAPE: 231.78
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=10 | ExtraFeat=True | ARIMA=True


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  co

Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.58
RF MSE: 24300605208.85
RF MAPE: 68.25
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.52
GBRT MSE: 27788111039.32
GBRT MAPE: 77.71
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.46
XGB MSE: 31325117436.10
XGB MAPE: 100.14
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.71
LR MSE: 17111132543.94
LR MAPE: 59.49
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'T

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=10 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.58
RF MSE: 24300605208.85
RF MAPE: 68.25
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.52
GBRT MSE: 27788111039.32
GBRT MAPE: 77.71
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.46
XGB MSE: 31325117436.10
XGB MAPE: 100.14
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'pdo_mean_MaySep_Yminus1'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.71
LR MSE: 17111132543.94
LR MAPE: 59.49
0    Total_Returns, AgeClass_1.1, Total_Returns_Ymi...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus3', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'T

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.49
RF MSE: 30412409759.36
RF MAPE: 76.24
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.64
GBRT MSE: 21128903105.42
GBRT MAPE: 74.30
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.21
XGB MSE: 46839973199.55
XGB MAPE: 85.14
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.51
LR MSE: 28807172946.67
LR MAPE: 78.92
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
No parameter tuning for this model.
PR R2: -650.82
PR MSE: 38498797875827.23
PR MAPE: 1018.46


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.49
RF MSE: 30412409759.36
RF MAPE: 76.24
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.64
GBRT MSE: 21128903105.42
GBRT MAPE: 74.30
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.21
XGB MSE: 46839973199.55
XGB MAPE: 85.14
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.51
LR MSE: 28807172946.67
LR MAPE: 78.92
0    Total_Returns, AgeClass_1.1, AgeClass_1.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'pdo_mean_DecMar', 'pdo_mean_MaySep'],
      dtype='object')
No parameter tuning for this model.
PR R2: -650.82
PR MSE: 38498797875827.23
PR MAPE: 1018.46
0    T

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  co

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.44
RF MSE: 32809465469.62
RF MAPE: 65.08
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4'

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.39
GBRT MSE: 35844967546.23
GBRT MAPE: 100.21
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200}
XGB R2: 0.49
XGB MSE: 29852168495.54
XGB MAPE: 87.52
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -0.34
LR MSE: 78186465669.64
LR MAPE: 88.40


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)
No parameter tuning for this model.
PR R2: -60.07
PR MSE: 3569483396129.08
PR MAPE: 629.06


  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Y

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=0 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.44
RF MSE: 32809465469.62
RF MAPE: 65.08
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns',

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.39
GBRT MSE: 35844967546.23
GBRT MAPE: 100.21
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200}
XGB R2: 0.49
XGB MSE: 29852168495.54
XGB MAPE: 87.52
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=164)
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -0.34
LR MSE: 78186465669.64
LR MAPE: 88.40
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Inde

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=0 | ExtraFeat=False | ARIMA=True
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.47
RF MSE: 31051702893.70
RF MAPE: 76.35
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selec

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.65
GBRT MSE: 20461356112.79
GBRT MAPE: 72.54
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.21
XGB MSE: 46559114677.27
XGB MAPE: 85.72
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.51
LR MSE: 29015283255.17
LR MAPE: 82.04
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names,

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  metrics_by_river_test = grouped.apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return g

0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object

🧪 Running: Columbia River | TopK=0 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.47
RF MSE: 31051702893.70
RF MAPE: 76.35
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Sele

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.65
GBRT MSE: 20461356112.79
GBRT MAPE: 72.54
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')


  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.21
XGB MSE: 46559114677.27
XGB MAPE: 85.72
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names, dtype: object
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'npi_mean_NovMar',
       'oni_mean_DecFeb', 'mei_mean_AprSep', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.51
LR MSE: 29015283255.17
LR MAPE: 82.04
0    Total_Returns, AgeClass_0.1, AgeClass_0.2, Age...
Name: Feature_Names,

  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_test = results_df.groupby("System", group_keys=False).apply(
  river_metrics_test = results_df.groupby("River_Name", group_keys=False).apply(
  system_metrics_train = train_results_df.groupby("System", group_keys=False).apply(
  river_metrics_train = train_results_df.groupby("River_Name", group_keys=False).apply(
