Connected to Python 3.10.11

In [3]:
import sys
import os
import itertools
import matplotlib.pyplot as plt
import pandas as pd

# Set random seed for reproducibility
import random
random.seed(42)

# Set paths
notebook_dir = os.getcwd()
src_path = os.path.abspath(os.path.join(notebook_dir, '..', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from utils import add_src_to_path
add_src_to_path()

from data_split import split_time_series_by_river
from model import train_and_apply_with_tuning

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error

# Experiment parameters
river_systems = ["Fraser River", "Bristol Bay", "Columbia River", "Bristol Bay - Ugashik"]
top_k_options = [6, 10, 20, 0]  # 0 means use all features
add_feat_options = [True, False]
arima_options = [True, False]

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(project_root, 'data', 'Combined_FeatureSet_For_Model.csv')
output_path = os.path.abspath(os.path.join(notebook_dir, '..', 'results', 'Model_Performances.csv'))


model_list = ["RF", "GBRT", "XGB", "LR", "PR"]
all_experiments = []

for river_system, top_k_features, add_additional_features, use_arima_on_top in itertools.product(
    river_systems, top_k_options, add_feat_options, arima_options
):
    print(f"\n Running: {river_system} | TopK={top_k_features} | ExtraFeat={add_additional_features} | ARIMA={use_arima_on_top}")

    combined_df = pd.read_csv(data_path)

    # Drop features that have missing values in 2024
    columns_to_drop = [
        'mean_temp_JunAug', 'mean_temp_MaySep', 'max_temp_JunAug',
        'mean_discharge_MarMay', 'max_discharge_AprSep'
    ]
    combined_df = combined_df.drop(columns=[col for col in columns_to_drop if col in combined_df.columns])

    # Setup special rules for river Ugashik
    if river_system == "Bristol Bay - Ugashik":
        river_system = "Bristol Bay"
        apply_strict_dropna = False # Delets the spawner data for Bristol Bay
    else:
        apply_strict_dropna = True

    combined_df = combined_df[combined_df["System"] == river_system]

    # Feature engineering  
    if add_additional_features:
        features_to_lag = [
            'Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_0.4', 'AgeClass_0.5',
            'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5',
            'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4',
            'AgeClass_3.1', 'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4',
            'Total_Returns_NextYear', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar', 'oni_mean_DecFeb',
            'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep'
        ]
        for feat in features_to_lag:
            for lag in [1, 2, 3, 4, 5]:
                combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)

    if (apply_strict_dropna and river_system == "Bristal Bay") or river_system == "Columbia River":
        # If Columbia River or Bristol Bay, only keep lines with spawner data
        combined_df = combined_df.dropna(subset=['total_spawners_y_minus_2_to_4'])
    if add_additional_features:
        combined_df = combined_df.dropna(subset=['AgeClass_0.2_Yminus5'])

    # If system is Bristol Bay, consider only years after 1995
    if river_system == "Bristol Bay":
        combined_df = combined_df[combined_df["Year"] >= 1995]

    # Drop features with any NaN values
    combined_df = combined_df.dropna(axis=1, how='any').dropna()

    train_df, test_df = split_time_series_by_river(
        combined_df,
        time_column="Year",
        group_columns=["System", "River"],
        test_fraction=0.2,
        gap_years=0
    )

    train_df["River_Name"] = train_df["River"]
    test_df["River_Name"] = test_df["River"]

    train_df_encoded = pd.get_dummies(train_df, columns=["River"], prefix="River")
    test_df_encoded = pd.get_dummies(test_df, columns=["River"], prefix="River")

    for model_name in model_list:
        try:
            results = train_and_apply_with_tuning(
                model=model_name,
                train_df=train_df_encoded,
                test_df=test_df_encoded,
                topk_feat=top_k_features
            )

            # Apply ARIMA correction if selected
            if use_arima_on_top:
                residuals = results["Timeline_train"]["Actual"] - results["Timeline_train"]["Predicted"]
                residuals_series = pd.Series(residuals.values, index=results["Timeline_train"]["Year"])

                arima_model = ARIMA(residuals_series, order=(1, 0, 0))
                arima_fit = arima_model.fit()

                residual_forecast = arima_fit.forecast(steps=len(results["Timeline_test"]["Predicted"]))
                hybrid_pred = results["Timeline_test"]["Predicted"] + residual_forecast.values

                results["Timeline_test"]["Predicted"] = hybrid_pred

                # Optionally update metrics
                r2 = r2_score(results["Timeline_test"]["Actual"], hybrid_pred)
                mse = mean_squared_error(results["Timeline_test"]["Actual"], hybrid_pred)
                mape = mean_absolute_percentage_error(results["Timeline_test"]["Actual"], hybrid_pred) * 100

                # Also update metrics per river (recalculate)
                grouped = results["Timeline_test"].groupby("River_Name")
                metrics_by_river_test = grouped.apply(
                    lambda g: pd.Series({
                        "R2": r2_score(g["Actual"], g["Predicted"]),
                        "MSE": mean_squared_error(g["Actual"], g["Predicted"]),
                        "MAPE": mean_absolute_percentage_error(g["Actual"], g["Predicted"]) * 100
                    })
                ).reset_index()

                results["Metrics_by_River_Test"] = metrics_by_river_test

            # Prepare final output
            test_df_ = results['Metrics_by_River_Test'].copy()
            train_df_ = results['Metrics_by_River_Train'].copy()

            test_df_ = test_df_.rename(columns={"R2": "R2_Test", "MSE": "MSE_Test", "MAPE": "MAPE_Test"})
            train_df_ = train_df_.rename(columns={"R2": "R2_Train", "MSE": "MSE_Train", "MAPE": "MAPE_Train"})

            merged_df = pd.merge(test_df_, train_df_, on="River_Name", how="outer")
            merged_df.insert(0, "Model", model_name)

            river_system_lookup = pd.concat([train_df_encoded, test_df_encoded])["River_Name"].drop_duplicates()
            river_system_lookup = pd.merge(
                river_system_lookup.to_frame(),
                combined_df[["River", "System"]].drop_duplicates(),
                left_on="River_Name",
                right_on="River",
                how="left"
            ).drop(columns="River")

            merged_df = pd.merge(merged_df, river_system_lookup, on="River_Name", how="left")

            merged_df["Selected_System"] = river_system
            merged_df["TopK_Features"] = top_k_features
            merged_df["Additional_Features_Used"] = add_additional_features
            merged_df["ARIMA_Enabled"] = use_arima_on_top

            feature_names = results.get("Selected_Feature_Names", [])
            merged_df["Num_Features_Used"] = len(feature_names)
            merged_df["Feature_Names"] = ", ".join(feature_names)

            all_experiments.append(merged_df)

        except Exception as e:
            print(f"Error with model {model_name}: {e}")

# Save final results
final_df = pd.concat(all_experiments, ignore_index=True)
final_df.to_csv(output_path, index=False)

print(f"\n All experiment results saved to:\n{output_path}")



 Running: Fraser River | TopK=6 | ExtraFeat=True | ARIMA=True
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.40
RF MSE: 248294181626.38
RF MAPE: 431.23
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.01
GBRT MSE: 409256660172.12
GBRT MAPE: 421.91
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.46
XGB MSE: 223853944345.08
XGB MAPE: 588.07
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.39
LR MSE: 250379838903.35
LR MAPE: 218.88
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


No parameter tuning for this model.
PR R2: 0.09
PR MSE: 377006134976.68
PR MAPE: 319.19

 Running: Fraser River | TopK=6 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.40
RF MSE: 248294181626.38
RF MAPE: 431.23
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.01
GBRT MSE: 409256660172.12
GBRT MAPE: 421.91
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.46
XGB MSE: 223853944345.08
XGB MAPE: 588.07
Selected features:
Index(['AgeClass_0.2', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus4', 'Ag

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.32
GBRT MSE: 419782776855.52
GBRT MAPE: 204.18
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.38
XGB MSE: 379612555077.48
XGB MAPE: 192.54
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.23
LR MSE: 472169513997.39
LR MAPE: 848.92


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')
No parameter tuning for this model.
PR R2: 0.45
PR MSE: 340320534871.20
PR MAPE: 300.01

 Running: Fraser River | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.52
RF MSE: 293000955001.35
RF MAPE: 310.78
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.32
GBRT MSE: 419782776855.52
GBRT MAPE: 204.18
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.38
XGB MSE: 379612555077.48
XGB MAPE: 192.54
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.2

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.26
RF MSE: 306517049740.98
RF MAPE: 280.03
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.11
GBRT MSE: 366499311632.35
GBRT MAPE: 461.58
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: -0.07
XGB MSE: 442303160594.11
XGB MAPE: 354.18
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.57
LR MSE: 177246359005.51
LR MAPE: 887.52


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -4.01
PR MSE: 2070990393724.16
PR MAPE: 394.24

 Running: Fraser River | TopK=10 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.26
RF MSE: 306517049740.98
RF MAPE: 280.03
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.11
GBRT MSE: 366499311632.35
GBRT MAPE: 461.58
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawner

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.49
GBRT MSE: 313353785012.94
GBRT MAPE: 268.24
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.46
XGB MSE: 330474450641.54
XGB MAPE: 302.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.23
LR MSE: 475569491258.60
LR MAPE: 1095.06


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.85
PR MSE: 1139354807580.59
PR MAPE: 365.61

 Running: Fraser River | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.52
RF MSE: 297788636002.51
RF MAPE: 307.29
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.49
GBRT MSE: 313353785012.94
GBRT MAPE: 268.24
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_2.3', 'pdo_mean_MaySep', 'total_spawners_y_minus_3',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.46
XGB MSE: 330474450641.54
XGB MAPE: 302.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_1.1', 'AgeClas

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1',
       'AgeClass_2.2_Yminus2', 'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.52
RF MSE: 197960205214.04
RF MAPE: 255.20
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.45
GBRT MSE: 226393847468.38
GBRT MAPE: 312.79
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1',
       'AgeClass_2.2_Yminus2', 'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.37
XGB MSE: 258341742279.70
XGB MAPE: 378.45
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1',
       'AgeClass_2.2_Yminus2', 'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.30
LR MSE: 288781326928.09
LR MAPE: 1070.22


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1',
       'AgeClass_2.2_Yminus2', 'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
No parameter tuning for this model.
PR R2: -57632.67
PR MSE: 23811174132810068.00
PR MAPE: 148225.96

 Running: Fraser River | TopK=20 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 'AgeClass_2.2_Yminus1',
       'AgeClass_2.2_Yminus2', 'AgeClass_2.2_Yminus3', 'AgeClass_2.3_Yminus4',
       'AgeClass_2.3_Yminus5', 'Total_Returns_NextYear_Yminus4',
       'River_Chilko', 'River_Quesnel', 'River_Raft'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.52
RF MSE: 197960205214.04
RF MAPE: 255.20
Selected features:
Index(['AgeClass_0.2', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus3', 'AgeClass_0.2_Yminus4', 'AgeClass_0.3_Yminus3',
       'AgeClass_1.1_Yminus4', 'AgeClass_1.2_Yminus3', 'AgeClass_1.3_Yminus1',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus4', 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.38
GBRT MSE: 384800394524.53
GBRT MAPE: 391.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.44
XGB MSE: 344323114936.63
XGB MAPE: 345.19
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.02
LR MSE: 605427343452.37
LR MAPE: 1930.86


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
No parameter tuning for this model.
PR R2: -4767.13
PR MSE: 2937355705547959.00
PR MAPE: 5741.46

 Running: Fraser River | TopK=20 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft'

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.39
RF MSE: 375218150351.80
RF MAPE: 359.47
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.38
GBRT MSE: 384800394524.53
GBRT MAPE: 391.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.2', 'AgeClass_0.3', 'AgeClass_1.1',
       'AgeClass_1.2', 'AgeClass_1.3', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Pacea_ALPI_Anomaly', 'npi_mean_NovMar',
       'ao_mean_DecMar

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.57
RF MSE: 178798349734.76
RF MAPE: 361.07
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.63
GBRT MSE: 154021821376.32
GBRT MAPE: 340.00
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.60
XGB MSE: 163485390217.08
XGB MAPE: 318.30
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -1.35
LR MSE: 969670115868.80
LR MAPE: 4577.48
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)
No parameter tuning for this model.
PR R2: -8737.67
PR MSE: 3610355235619138.50
PR MAPE: 8341.44


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Fraser River | TopK=0 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.57
RF MSE: 178798349734.76
RF MAPE: 361.07
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object', length=169)
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.63
GBRT MSE: 154021821376.32
GBRT MAPE: 340.00
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.38
GBRT MSE: 384170613432.15
GBRT MAPE: 401.30
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sss_mayaug',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.38
XGB MSE: 381279254204.08
XGB MAPE: 287.28
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sss_mayaug',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.01
LR MSE: 612895805056.97
LR MAPE: 2024.45


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sss_mayaug',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
No parameter tuning for this model.
PR R2: -14.19
PR MSE: 9356667616889.61
PR MAPE: 1582.11

 Running: Fraser River | TopK=0 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}
RF R2: 0.34
RF MSE: 404129978956.60
RF MAPE: 372.83
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'ao_mean_DecMar', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sss_mayaug',
       'total_spawners_y_minus_3', 'River_Chilko', 'River_Late Stuart',
       'River_Quesnel', 'River_Raft', 'River_Stellako'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.38
GBRT MSE: 384170613432.15
GBRT MAPE: 401.30
Selected featu

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.29
XGB MSE: 14877579935305.32
XGB MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.33
LR MSE: 14041324200705.45
LR MAPE: 39.73


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.57
PR MSE: 32628024299379.47
PR MAPE: 48.97

 Running: Bristol Bay | TopK=6 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.33
XGB MSE: 13995675498925.39
XGB MAPE: 36.93
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.36
LR MSE: 13325235482348.05
LR MAPE: 35.71


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -6.90
PR MSE: 164645367160079.03
PR MAPE: 70.84

 Running: Bristol Bay | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11801802727365.28
RF MAPE: 37.43
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.18
GBRT MSE: 17063810749206.27
GBRT MAPE: 47.68
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.35
XGB MSE: 13561079802480.87
XGB MAPE: 40.21
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.35
LR MSE: 13538705124495.12
LR MAPE: 35.24


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.97
PR MSE: 41113430949197.46
PR MAPE: 50.75


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste


 Running: Bristol Bay | TopK=10 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.38
XGB MSE: 12901471808984.61
XGB MAPE: 34.89
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.40
LR MSE: 12591124782077.07
LR MAPE: 35.42


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
No parameter tuning for this model.
PR R2: -8.25
PR MSE: 192841633948745.72
PR MAPE: 76.79


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Bristol Bay | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11904713127055.96
RF MAPE: 37.64
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
  

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10148772968241.17
RF MAPE: 37.90
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 2}
GBRT R2: 0.46
GBRT MSE: 11256462567514.42
GBRT MAPE: 40.35
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.40
XGB MSE: 12559668032664.97
XGB MAPE: 44.57
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.23
LR MSE: 15988088564720.52
LR MAPE: 40.63


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -1316.68
PR MSE: 27466265564984228.00
PR MAPE: 629.21

 Running: Bristol Bay | TopK=20 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10148772968241.17
RF MAPE: 37.90
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeCl

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.57
GBRT MSE: 8997591988215.72
GBRT MAPE: 42.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.07
XGB MSE: 22326986783182.40
XGB MAPE: 45.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.47
LR MSE: 10988541002089.13
LR MAPE: 32.59


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
No parameter tuning for this model.
PR R2: -5238.21
PR MSE: 109208379337244672.00
PR MAPE: 656.48

 Running: Bristol Bay | TopK=20 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10203562908770.70
RF MAPE: 36.73
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.57
GBRT MSE: 8997591988215.72
GBRT MAPE: 42.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.50
RF MSE: 10391153756060.88
RF MAPE: 35.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 2}
GBRT R2: 0.48
GBRT MSE: 10904597678084.94
GBRT MAPE: 35.21
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.44
XGB MSE: 11656596052484.40
XGB MAPE: 36.21
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -3.79
LR MSE: 99882653625661.83
LR MAPE: 101.01
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
No parameter tuning for this model.
PR R2: -368.38
PR MSE: 7699592160198413.00
PR MAPE: 643.66

 Running: Bristol Bay | TopK=0 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.50
RF MSE: 10391153756060.88
RF MAPE: 35.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'R

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.48
GBRT MSE: 10889800048912.36
GBRT MAPE: 39.86
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.17
XGB MSE: 24449092177096.29
XGB MAPE: 44.70
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.48
LR MSE: 10788925069424.01
LR MAPE: 43.97
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


No parameter tuning for this model.
PR R2: -32379.33
PR MSE: 674950157888640896.00
PR MAPE: 2322.82


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Bristol Bay | TopK=0 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.52
RF MSE: 10071513942524.17
RF MAPE: 34.63
Selected features:
Index(['Total_R

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.54
RF MSE: 26819602493.18
RF MAPE: 80.82
Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.42
GBRT MSE: 33646434666.19
GBRT MAPE: 102.01
Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.34
XGB MSE: 38841600696.14
XGB MAPE: 112.21
Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.68
LR MSE: 18629172816.56
LR MAPE: 52.07


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = co

Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -23.00
PR MSE: 1403016677237.08
PR MAPE: 732.39

 Running: Columbia River | TopK=6 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.54
RF MSE: 26819602493.18
RF MAPE: 80.82
Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 10, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.42
GBRT MSE: 33646434666.19
GBRT MAPE: 102.01
Selected features:
Index(['AgeClass_1.1', 'total_spawners_y_minus_3', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus3', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.37
GBRT MSE: 37014148390.33
GBRT MAPE: 94.78
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.47
XGB MSE: 31171025447.51
XGB MAPE: 88.47
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.65
LR MSE: 20206236067.33
LR MAPE: 46.59


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -127.49
PR MSE: 7510879312285.80
PR MAPE: 1074.39

 Running: Columbia River | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.56
RF MSE: 25608217692.28
RF MAPE: 72.08
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.37
GBRT MSE: 37014148390.33
GBRT

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.54
RF MSE: 26901827290.44
RF MAPE: 74.06


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.51
GBRT MSE: 28834923047.54
GBRT MAPE: 81.31
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.38
XGB MSE: 36283095488.36
XGB MAPE: 102.65
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.66
LR MSE: 19894683419.60
LR MAPE: 61.85
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -405.57
PR MSE: 23765667692082.89
PR MAPE: 2346.09


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)



 Running: Columbia River | TopK=10 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.54
RF MSE: 26901827290.44
RF MAPE: 74.06
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'total_spawners_y_minus_3',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 1}
GBRT R2: 0.51
GBRT MSE: 28834923047.54
GBRT MAPE: 81.31
Selected feature

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.43
GBRT MSE: 33562209737.89
GBRT MAPE: 96.59
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'pdo_mean_DecMar',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_2_to_4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.44
XGB MSE: 32482788895.31
XGB MAPE: 93.57
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'pdo_mean_DecMar',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.70
LR MSE: 17492947452.03
LR MAPE: 42.58
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'pdo_mean_DecMar',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_2_to_4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -245.37
PR MSE: 14400994766944.80
PR MAPE: 1716.25


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  return get_prediction_index(
  return get_prediction_index(



 Running: Columbia River | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'pdo_mean_DecMar',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.54
RF MSE: 26659990429.99
RF MAPE: 76.06
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'npi_mean_NovMar', 'pdo_mean_DecMar',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.43
GBRT MSE: 33562209737.89
GBRT MAPE: 96.59
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus1', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus2', 'AgeClass_2.1_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'npi_mean_NovMar_Yminus2',
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_MaySep_Yminus1',
       'pdo_mean_MaySep_Yminus2'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.58
RF MSE: 24539637449.28
RF MAPE: 59.61
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminu

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.30
GBRT MSE: 40649189677.63
GBRT MAPE: 93.01
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus1', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus2', 'AgeClass_2.1_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'npi_mean_NovMar_Yminus2',
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_MaySep_Yminus1',
       'pdo_mean_MaySep_Yminus2'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.46
XGB MSE: 31375517126.20
XGB MAPE: 92.99
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus1', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus2', 'AgeClass_2.1_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'npi_mean_NovMar_Yminus2',
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_MaySep_Yminus1',
       'pdo_mean_MaySep_Yminus2'],
      dtype='object')
Best Parameters: {'fit_intercept': False, 'positive': False}
LR R2: 0.41
LR MSE: 34771946834.53
LR MAPE: 79.82
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


No parameter tuning for this model.
PR R2: -82.03
PR MSE: 4853416907303.06
PR MAPE: 560.34

 Running: Columbia River | TopK=20 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus1', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus2', 'AgeClass_2.1_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'npi_mean_NovMar_Yminus2',
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_MaySep_Yminus1',
       'pdo_mean_MaySep_Yminus2'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.58
RF MSE: 24539637449.28
RF MAPE: 59.61
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2',
       'total_spawners_y_minus_3', 'total_spawners_y_minus_2_to_4',
       'Total_Returns_Yminus1', 'Total_Returns_Yminus3',
       'AgeClass_1.1_Yminus1', 'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3',
       'AgeClass_1.3_Yminus2', 'AgeClass_2.1_Yminus2', 'AgeClass_2.1_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4', 'npi_mean_NovMar_Yminus2',
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_MaySep_Yminus1',
       'pdo_mean_MaySep_Yminus2'],
      dtype='object')
Best Parameters: {'learning_rate': 0.1, 'max_depth': 10, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.30
GBRT MSE: 40649189677.63
GBRT MAPE: 93.01
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2'

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.39
GBRT MSE: 35434422793.22
GBRT MAPE: 101.44
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.43
XGB MSE: 33173819751.74
XGB MAPE: 96.73
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.70
LR MSE: 17476864478.66
LR MAPE: 38.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Columbia River | TopK=20 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'sst_aprjul',
       'sst_anom', 'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.56
RF MSE: 25842100376.31
RF MAPE: 78.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'pdo_mean_DecMar', 'pdo_mean_MaySep', 'ss

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=170)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 33730391834.25
RF MAPE: 67.31
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.54
GBRT MSE: 26853600008.01
GBRT MAPE: 85.94
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=170)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200}
XGB R2: 0.44
XGB MSE: 32699364936.44
XGB MAPE: 96.84
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=170)
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -0.32
LR MSE: 77340084349.90
LR MAPE: 87.91


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=170)
No parameter tuning for this model.
PR R2: -50.48
PR MSE: 3009079095673.46
PR MAPE: 589.57


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Columbia River | TopK=0 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean_DecMar_Yminus4', 'pdo_mean_DecMar_Yminus5',
       'pdo_mean_MaySep_Yminus1', 'pdo_mean_MaySep_Yminus2',
       'pdo_mean_MaySep_Yminus3', 'pdo_mean_MaySep_Yminus4',
       'pdo_mean_MaySep_Yminus5', 'River_Bonneville Lock & Dam'],
      dtype='object', length=170)
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 33730391834.25
RF MAPE: 67.31
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_DecMar_Yminus2', 'pdo_mean_DecMar_Yminus3',
       'pdo_mean

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.47
GBRT MSE: 30882767455.14
GBRT MAPE: 98.65
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4',
       'River_Bonneville Lock & Dam'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200}
XGB R2: 0.36
XGB MSE: 37631062294.44
XGB MAPE: 105.12
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4',
       'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -2.80
LR MSE: 222106014577.26
LR M

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4',
       'River_Bonneville Lock & Dam'],
      dtype='object')
No parameter tuning for this model.
PR R2: -307.10
PR MSE: 18009322110556.89
PR MAPE: 1338.87

 Running: Columbia River | TopK=0 | ExtraFeat=False | ARIMA=False


  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'total_spawners_y_minus_2', 'total_spawners_y_minus_3',
       'total_spawners_y_minus_4', 'total_spawners_y_minus_2_to_4',
       'River_Bonneville Lock & Dam'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.58
RF MSE: 24424859076.51
RF MAPE: 71.76
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'Ag

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.29
XGB MSE: 14877579935305.32
XGB MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.33
LR MSE: 14041324200705.45
LR MAPE: 39.73


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.57
PR MSE: 32628024299379.47
PR MAPE: 48.97

 Running: Bristol Bay - Ugashik | TopK=6 | ExtraFeat=True | ARIMA=False


  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.43
RF MSE: 11983926955142.46
RF MAPE: 45.60
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.22
GBRT MSE: 16301491194281.87
GBRT MAPE: 48.47
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus4'],
      dtype='object')
Best Parameters: 

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.33
XGB MSE: 13995675498925.39
XGB MAPE: 36.93
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.36
LR MSE: 13325235482348.05
LR MAPE: 35.71


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -6.90
PR MSE: 164645367160079.03
PR MAPE: 70.84

 Running: Bristol Bay - Ugashik | TopK=6 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11801802727365.28
RF MAPE: 37.43
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 200, 'min_samples_leaf': 2}
GBRT R2: 0.29
GBRT MSE: 14771322549615.14
GBRT MAPE: 41.67
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.33
XGB MSE: 13995675498925.39
XGB MAPE: 36.93
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.36
LR MSE: 1332523548

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.18
GBRT MSE: 17063810749206.27
GBRT MAPE: 47.68
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.35
XGB MSE: 13561079802480.87
XGB MAPE: 40.21
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.35
LR MSE: 13538705124495.12
LR MAPE: 35.24


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -0.97
PR MSE: 41113430949197.46
PR MAPE: 50.75

 Running: Bristol Bay - Ugashik | TopK=10 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
RF R2: 0.42
RF MSE: 11992003962054.81
RF MAPE: 43.70
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'Total_Returns_Yminus1',
       'Total_Returns_Yminus2', 'Total_Returns_Yminus3',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.18
GBRT MSE: 17063810749206.27
GBRT MAPE: 47.68
Selected f

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.38
XGB MSE: 12901471808984.61
XGB MAPE: 34.89


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.40
LR MSE: 12591124782077.07
LR MAPE: 35.42


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
No parameter tuning for this model.
PR R2: -8.25
PR MSE: 192841633948745.72
PR MAPE: 76.79


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Bristol Bay - Ugashik | TopK=10 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.43
RF MSE: 11904713127055.96
RF MAPE: 37.64
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Igushik',
       'River_Kvichak', 'River_Nushagak'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.25
GBRT MSE: 15669345731787.17
GBRT MAPE: 41.69
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'AgeClass_3.2', 'River_Egegik', 'River_Ig

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10148772968241.17
RF MAPE: 37.90
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeCl

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 2}
GBRT R2: 0.46
GBRT MSE: 11256462567514.42
GBRT MAPE: 40.35


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.40
XGB MSE: 12559668032664.97
XGB MAPE: 44.57
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'Age

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.23
LR MSE: 15988088564720.52
LR MAPE: 40.63


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
No parameter tuning for this model.
PR R2: -1316.68
PR MSE: 27466265564984228.00
PR MAPE: 629.21

 Running: Bristol Bay - Ugashik | TopK=20 | ExtraFeat=True | ARIMA=False


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['Syste

Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeClass_1.2_Yminus3', 'AgeClass_2.1_Yminus5',
       'AgeClass_2.3_Yminus1', 'AgeClass_2.3_Yminus2',
       'Total_Returns_NextYear_Yminus1', 'Total_Returns_NextYear_Yminus2',
       'Total_Returns_NextYear_Yminus3', 'Total_Returns_NextYear_Yminus4',
       'Total_Returns_NextYear_Yminus5', 'River_Igushik'],
      dtype='object')
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10148772968241.17
RF MAPE: 37.90
Selected features:
Index(['Total_Returns', 'AgeClass_1.2', 'AgeClass_2.1', 'AgeClass_2.2',
       'AgeClass_2.3', 'Total_Returns_Yminus1', 'Total_Returns_Yminus2',
       'Total_Returns_Yminus3', 'Total_Returns_Yminus4',
       'AgeClass_1.2_Yminus1', 'AgeCl

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.57
GBRT MSE: 8997591988215.72
GBRT MAPE: 42.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.07
XGB MSE: 22326986783182.40
XGB MAPE: 45.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.47
LR MSE: 10988541002089.13
LR MAPE: 32.59
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


No parameter tuning for this model.
PR R2: -5238.21
PR MSE: 109208379337244672.00
PR MAPE: 656.48

 Running: Bristol Bay - Ugashik | TopK=20 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.51
RF MSE: 10203562908770.70
RF MAPE: 36.73
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak', 'River_Egegik',
       'River_Igushik', 'River_Kvichak', 'River_Nushagak', 'River_Wood'],
      dtype='object')
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.57
GBRT MSE: 8997591988215.72
GBRT MAPE: 42.52
Selected features:
Index(['Total_Returns', 'AgeClass_1.1', 'AgeClass_1.2', 'AgeClass_1.3',
       'AgeClass_1.5', 'AgeClass_2.1', 'AgeClass_2.2', 'AgeClass_2.3',
       'AgeClass_3.2', 'AgeClass_3.3', 'oni_mean_DecFeb', 'npgo_mean_DecFeb',
       'sst_aprjul', 'sst_anom', 'River_Alagnak

  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{lag}'] = combined_df.groupby(['System', 'River'])[feat].shift(lag)
  combined_df[f'{feat}_Yminus{

Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.50
RF MSE: 10391153756060.88
RF MAPE: 35.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'R

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 2}
GBRT R2: 0.48
GBRT MSE: 10904597678084.94
GBRT MAPE: 35.21


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
XGB R2: 0.44
XGB MSE: 11656596052484.40
XGB MAPE: 36.21
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: -3.79
LR MSE: 99882653625661.83
LR MAPE: 101.01
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


No parameter tuning for this model.
PR R2: -368.38
PR MSE: 7699592160198413.00
PR MAPE: 643.66


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Bristol Bay - Ugashik | TopK=0 | ExtraFeat=True | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'River_Igushik', 'River_Kvichak', 'River_Naknek',
       'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object', length=173)
Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.50
RF MSE: 10391153756060.88
RF MAPE: 35.93
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4',
       ...
       'pdo_mean_MaySep_Yminus4', 'pdo_mean_MaySep_Yminus5', 'River_Alagnak',
       'River_Egegik', 'Riv

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'max_iter': 100, 'min_samples_leaf': 1}
GBRT R2: 0.48
GBRT MSE: 10889800048912.36
GBRT MAPE: 39.86
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100}
XGB R2: -0.17
XGB MSE: 24449092177096.29
XGB MAPE: 44.70
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Best Parameters: {'fit_intercept': True, 'positive': False}
LR R2: 0.48
LR MSE: 10788925069424.01
LR MAPE: 43.97


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')
No parameter tuning for this model.
PR R2: -32379.33
PR MSE: 674950157888640896.00
PR MAPE: 2322.82


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(



 Running: Bristol Bay - Ugashik | TopK=0 | ExtraFeat=False | ARIMA=False
Selected features:
Index(['Total_Returns', 'AgeClass_0.1', 'AgeClass_0.2', 'AgeClass_0.3',
       'AgeClass_0.4', 'AgeClass_0.5', 'AgeClass_1.1', 'AgeClass_1.2',
       'AgeClass_1.3', 'AgeClass_1.4', 'AgeClass_1.5', 'AgeClass_2.1',
       'AgeClass_2.2', 'AgeClass_2.3', 'AgeClass_2.4', 'AgeClass_3.1',
       'AgeClass_3.2', 'AgeClass_3.3', 'AgeClass_3.4', 'Pacea_ALPI_Anomaly',
       'npi_mean_NovMar', 'oni_mean_DecFeb', 'mei_mean_AprSep',
       'npgo_mean_DecFeb', 'ao_mean_DecMar', 'pdo_mean_DecMar',
       'pdo_mean_MaySep', 'sst_aprjul', 'sst_anom', 'sss_mayaug',
       'River_Alagnak', 'River_Egegik', 'River_Igushik', 'River_Kvichak',
       'River_Naknek', 'River_Nushagak', 'River_Ugashik', 'River_Wood'],
      dtype='object')
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
RF R2: 0.52
RF MSE: 10071513942524.17
RF MAPE: 34.63
Selected features:
Index