In [None]:
Tune balanced

In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Tune BalancedBagging + XGBoost hyperparameters with K-fold CV (PR-AUC):

- Loads the same balanced 10x parquet:
    cems_with_fraction_balanced_10x.parquet

- Creates burned label (fraction > 0.5).

- Reserves fixed 10% global test set (NOT used for tuning).
  Tuning is done only on the remaining 90% TrainVal data.

- Uses BalancedBaggingClassifier from imbalanced-learn with XGBClassifier
  as the base estimator.

- Runs K-fold CV on the TrainVal set for a small grid of params.
- Uses PR-AUC (average_precision_score) as the tuning metric.
- Saves best params to JSON so they can be reused later.
"""

import os
import json
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import average_precision_score

from imblearn.ensemble import BalancedBaggingClassifier
from xgboost import XGBClassifier

# ----------------- CONFIG -----------------
PARQUET_IN   = "/explore/nobackup/people/spotter5/clelland_fire_ml/ml_training/cems_with_fraction_balanced_10x.parquet"
RANDOM_STATE = 42
N_FOLDS      = 10

# Number of boosting rounds inside the XGB base estimator
XGB_NUM_BOOST_ROUNDS = 600

OUT_ROOT = "/explore/nobackup/people/spotter5/clelland_fire_ml/ml_training/neg_ratio_experiments_globaltest"
OUT_DIR  = os.path.join(OUT_ROOT, "option4_balanced_bagging_xgb_aucpr")
os.makedirs(OUT_DIR, exist_ok=True)

BEST_PARAMS_JSON = os.path.join(OUT_DIR, "tuned_balanced_bagging_xgb_params.json")

# ----------------- LOAD & PREP -----------------
print(f"Loading parquet: {PARQUET_IN}")
df = pd.read_parquet(PARQUET_IN)
if "fraction" not in df.columns:
    raise ValueError("Expected column 'fraction' in dataset.")

df["fraction"] = df["fraction"].astype("float32").clip(0, 1)
before = len(df)
df = df.replace([np.inf, -np.inf], np.nan).dropna(axis=0, how="any").copy()
print(f"Dropped {before - len(df):,} rows with NaNs/Â±inf; {len(df):,} remain.")

# Label: burned = 1 if fraction > 0.5, else 0
df["burned"] = (df["fraction"] > 0.5).astype(np.uint8)

print("\nClass counts (burned label):")
print(df["burned"].value_counts(dropna=False))
print(df["burned"].value_counts(normalize=True).mul(100))

# Same predictor selection as your main script
drop_cols = {"fraction", "burned", "bin", "year", "month", "latitude", "longitude"}
predictors = [c for c in df.columns if c not in drop_cols]

X_full = df[predictors].copy()
y_full = df["burned"].astype(np.uint8)

# Treat land cover as categorical if present
if "b1" in X_full.columns and not pd.api.types.is_categorical_dtype(X_full["b1"]):
    X_full["b1"] = X_full["b1"].astype("category")
    print("\nTreating 'b1' as pandas 'category'.")

# Coerce any non-numeric predictors (except categorical b1) to numeric
coerced = 0
for c in X_full.columns:
    if c == "b1" and pd.api.types.is_categorical_dtype(X_full[c]):
        continue
    if not np.issubdtype(X_full[c].dtype, np.number):
        X_full[c] = pd.to_numeric(X_full[c], errors="coerce")
        coerced += 1

if coerced:
    pre = len(X_full)
    num_cols = [
        c for c in X_full.columns
        if not (c == "b1" and pd.api.types.is_categorical_dtype(X_full["b1"]))
    ]
    mask = X_full[num_cols].notna().all(axis=1)
    if "b1" in X_full.columns and pd.api.types.is_categorical_dtype(X_full["b1"]):
        mask &= X_full["b1"].notna()
    X_full = X_full.loc[mask].copy()
    y_full = y_full.loc[X_full.index]
    print(f"Coerced {coerced} column(s); dropped {pre - len(X_full):,} rows after coercion.")

print(f"\nFinal tuning dataset size: {len(X_full):,} rows")
print(f"Number of predictors: {len(X_full.columns)}")

# ----------------- GLOBAL TEST SPLIT (reserved, not used for tuning) -----------------
idx_trainval, idx_test = train_test_split(
    X_full.index,
    test_size=0.10,
    random_state=RANDOM_STATE,
    stratify=y_full
)
X_tv, y_tv = X_full.loc[idx_trainval], y_full.loc[idx_trainval]
X_test_holdout = X_full.loc[idx_test]
y_test_holdout = y_full.loc[idx_test]

print(f"\nTrainVal size for tuning: {len(X_tv):,} rows")
print(f"Global test (held out, unused here): {len(X_test_holdout):,} rows")
print("Global test class distribution:")
print(y_test_holdout.value_counts())
print(y_test_holdout.value_counts(normalize=True).mul(100))

# ----------------- BASE XGB PARAMS -----------------
# These are params common across all configs; we will vary a few key ones.
base_xgb_params = dict(
    objective="binary:logistic",   # prob outputs for predict_proba
    eval_metric="aucpr",           # just for XGB internal metric
    tree_method="gpu_hist",        # change to "hist" if GPU not available
    predictor="gpu_predictor",
    random_state=RANDOM_STATE,
    n_estimators=XGB_NUM_BOOST_ROUNDS,
    n_jobs=-1,
    use_label_encoder=False,
)

# ----------------- PARAM GRID -----------------
# We vary both XGB hyperparams and BalancedBagging hyperparams.
param_grid = [
    dict(
        # BalancedBagging params
        bb_n_estimators=10,      # number of bags
        bb_max_samples=0.5,      # fraction of samples per bag

        # XGB params
        xgb_max_depth=4,
        xgb_learning_rate=0.05,
        xgb_subsample=0.8,
        xgb_colsample_bytree=0.8,
        xgb_reg_lambda=1.0,
    ),
    dict(
        bb_n_estimators=15,
        bb_max_samples=0.6,

        xgb_max_depth=5,
        xgb_learning_rate=0.05,
        xgb_subsample=0.8,
        xgb_colsample_bytree=0.8,
        xgb_reg_lambda=2.0,
    ),
    dict(
        bb_n_estimators=20,
        bb_max_samples=0.7,

        xgb_max_depth=6,
        xgb_learning_rate=0.03,
        xgb_subsample=0.7,
        xgb_colsample_bytree=0.7,
        xgb_reg_lambda=2.0,
    ),
]

skf = StratifiedKFold(
    n_splits=N_FOLDS,
    shuffle=True,
    random_state=RANDOM_STATE
)

best_params = None
best_score = -np.inf

print(f"\nStarting {N_FOLDS}-fold CV hyperparam search over {len(param_grid)} configs...")
for i, cfg in enumerate(param_grid, start=1):
    print("\n" + "=" * 80)
    print(f"Config {i}/{len(param_grid)}: {cfg}")
    fold_scores = []

    # Build base XGB estimator for this config
    xgb_params = base_xgb_params.copy()
    xgb_params.update(
        dict(
            max_depth=cfg["xgb_max_depth"],
            learning_rate=cfg["xgb_learning_rate"],
            subsample=cfg["xgb_subsample"],
            colsample_bytree=cfg["xgb_colsample_bytree"],
            reg_lambda=cfg["xgb_reg_lambda"],
        )
    )
    base_estimator = XGBClassifier(**xgb_params)

    for fold_idx, (tr_idx, va_idx) in enumerate(skf.split(X_tv, y_tv), start=1):
        X_tr, X_va = X_tv.iloc[tr_idx], X_tv.iloc[va_idx]
        y_tr, y_va = y_tv.iloc[tr_idx], y_tv.iloc[va_idx]

        clf = BalancedBaggingClassifier(
            estimator=base_estimator,
            n_estimators=cfg["bb_n_estimators"],
            max_samples=cfg["bb_max_samples"],
            sampling_strategy="auto",  # balance each bootstrap sample
            replacement=False,
            random_state=RANDOM_STATE,
            n_jobs=-1,
        )

        clf.fit(X_tr, y_tr)
        va_proba = clf.predict_proba(X_va)[:, 1]
        ap = average_precision_score(y_va, va_proba)
        fold_scores.append(ap)
        print(f"  Fold {fold_idx}: AUPRC={ap:.4f}")

    mean_ap = float(np.mean(fold_scores))
    print(f"Mean AUPRC for config {i}: {mean_ap:.4f}")

    if mean_ap > best_score:
        best_score = mean_ap
        best_params = {
            "bb_n_estimators": cfg["bb_n_estimators"],
            "bb_max_samples": cfg["bb_max_samples"],
            "xgb_params": xgb_params,
        }

print("\nBest config based on CV AUPRC:")
print(best_params)
print(f"Best mean AUPRC: {best_score:.4f}")

# Save best configuration to JSON
final_to_save = dict(
    random_state=RANDOM_STATE,
    n_folds=N_FOLDS,
    xgb_num_boost_rounds=XGB_NUM_BOOST_ROUNDS,
    best_mean_auprc=best_score,
    balanced_bagging_n_estimators=best_params["bb_n_estimators"],
    balanced_bagging_max_samples=best_params["bb_max_samples"],
    xgb_params=best_params["xgb_params"],
)

with open(BEST_PARAMS_JSON, "w") as f:
    json.dump(final_to_save, f, indent=2)

print(f"\nSaved tuned BalancedBagging+XGB params to: {BEST_PARAMS_JSON}")


Loading parquet: /explore/nobackup/people/spotter5/clelland_fire_ml/ml_training/cems_with_fraction_balanced_10x.parquet
Dropped 1,781,773 rows with NaNs/Â±inf; 2,569,673 remain.

Class counts (burned label):
0    2354966
1     214707
Name: burned, dtype: int64
0    91.644579
1     8.355421
Name: burned, dtype: float64

Treating 'b1' as pandas 'category'.

Final tuning dataset size: 2,569,673 rows
Number of predictors: 15

TrainVal size for tuning: 2,312,705 rows
Global test (held out, unused here): 256,968 rows
Global test class distribution:
0    235497
1     21471
Name: burned, dtype: int64
0    91.644485
1     8.355515
Name: burned, dtype: float64

Starting 10-fold CV hyperparam search over 3 configs...

Config 1/3: {'bb_n_estimators': 10, 'bb_max_samples': 0.5, 'xgb_max_depth': 4, 'xgb_learning_rate': 0.05, 'xgb_subsample': 0.8, 'xgb_colsample_bytree': 0.8, 'xgb_reg_lambda': 1.0}



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 1: AUPRC=0.8178



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 2: AUPRC=0.8218



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 3: AUPRC=0.8152



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 4: AUPRC=0.8167



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 5: AUPRC=0.8128



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 6: AUPRC=0.8188



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 7: AUPRC=0.8163



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 8: AUPRC=0.8197



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 9: AUPRC=0.8183



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 10: AUPRC=0.8180
Mean AUPRC for config 1: 0.8175

Config 2/3: {'bb_n_estimators': 15, 'bb_max_samples': 0.6, 'xgb_max_depth': 5, 'xgb_learning_rate': 0.05, 'xgb_subsample': 0.8, 'xgb_colsample_bytree': 0.8, 'xgb_reg_lambda': 2.0}



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 1: AUPRC=0.8408



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 2: AUPRC=0.8445



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 3: AUPRC=0.8385



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 4: AUPRC=0.8407



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 5: AUPRC=0.8358



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 6: AUPRC=0.8412



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 7: AUPRC=0.8399



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 8: AUPRC=0.8430



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 9: AUPRC=0.8424



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 10: AUPRC=0.8419
Mean AUPRC for config 2: 0.8409

Config 3/3: {'bb_n_estimators': 20, 'bb_max_samples': 0.7, 'xgb_max_depth': 6, 'xgb_learning_rate': 0.03, 'xgb_subsample': 0.7, 'xgb_colsample_bytree': 0.7, 'xgb_reg_lambda': 2.0}



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 1: AUPRC=0.8397



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 2: AUPRC=0.8438



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 3: AUPRC=0.8384



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 4: AUPRC=0.8395



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 5: AUPRC=0.8359



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 6: AUPRC=0.8413



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 7: AUPRC=0.8393



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 8: AUPRC=0.8425



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 9: AUPRC=0.8418



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor", "use_label_encoder" 

  Fold 10: AUPRC=0.8416
Mean AUPRC for config 3: 0.8404

Best config based on CV AUPRC:
{'bb_n_estimators': 15, 'bb_max_samples': 0.6, 'xgb_params': {'objective': 'binary:logistic', 'eval_metric': 'aucpr', 'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', 'random_state': 42, 'n_estimators': 600, 'n_jobs': -1, 'use_label_encoder': False, 'max_depth': 5, 'learning_rate': 0.05, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_lambda': 2.0}}
Best mean AUPRC: 0.8409

Saved tuned BalancedBagging+XGB params to: /explore/nobackup/people/spotter5/clelland_fire_ml/ml_training/neg_ratio_experiments_globaltest/option4_balanced_bagging_xgb_aucpr/tuned_balanced_bagging_xgb_params.json


In [3]:
't'

't'