# Imports

In [2]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, fbeta_score

from skopt.space import Integer, Categorical

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import fbeta_score, roc_auc_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score

from skopt import BayesSearchCV

from typing import Dict, List, Literal
from skopt.space import Real, Integer, Categorical

from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings("ignore")

## Read dataset

In [22]:
data = pd.read_csv("../data/processed/20. FINAL_mean_delta_multi_output.csv")
print(data.shape)
data.head()

(6091, 210)


Unnamed: 0,Hba1c,Hba1c Time,Hba1c FM,Hba1c FM Time,BMI,BMI Time,Cancer,Cancer Time,Carotid Disease,Carotid Disease Time,...,y_stk_or_aemb_3_months,y_stk_or_aemb_6_months,y_stk_or_aemb_12_months,y_stk_or_aemb_24_months,y_stk_or_aemb,History of Vascular Disease,Antihypertensive Medication,Diabetes Mellitus,Diabetes Medication,Abnormal Kidney Function
0,6.26,-501,6.26,-501,20.7,-19,0,10000,0,10000,...,0,0,0,0,0,0,1,0,0,0
1,6.26,-501,6.26,-501,26.7,-780,1,-4846,0,10000,...,0,0,0,0,0,0,1,0,0,0
2,5.8,-287,6.3,-2701,31.1,-35,0,10000,0,10000,...,0,0,0,0,0,0,1,1,1,1
3,6.26,-501,6.26,-501,21.3,-207,0,10000,0,10000,...,0,0,0,0,0,1,1,0,0,1
4,5.9,-162,5.4,-5209,37.8,-554,1,-86,0,10000,...,0,0,0,0,0,1,1,1,1,0


## Drop columns

In [25]:
targets = [col for col in data.columns if col.startswith('y_')]
print(targets)

cols_to_drop = [col for col in data.columns if col.endswith('_t')]
data = data.drop(columns=cols_to_drop)

cols_to_drop = [col for col in data.columns if col.startswith('Δ')]
data = data.drop(columns=cols_to_drop)

cols_to_drop = [col for col in data.columns if "FM" in col]
data = data.drop(columns=cols_to_drop)

print(data.shape)
data.head()

['y_acs', 'y_aemb', 'y_cvdeath', 'y_death', 'y_hf', 'y_inp', 'y_stk', 'y_acs_1_month', 'y_acs_3_months', 'y_acs_6_months', 'y_acs_12_months', 'y_acs_24_months', 'y_aemb_1_month', 'y_aemb_3_months', 'y_aemb_6_months', 'y_aemb_12_months', 'y_aemb_24_months', 'y_cvdeath_1_month', 'y_cvdeath_3_months', 'y_cvdeath_6_months', 'y_cvdeath_12_months', 'y_cvdeath_24_months', 'y_death_1_month', 'y_death_3_months', 'y_death_6_months', 'y_death_12_months', 'y_death_24_months', 'y_hf_1_month', 'y_hf_3_months', 'y_hf_6_months', 'y_hf_12_months', 'y_hf_24_months', 'y_inp_1_month', 'y_inp_3_months', 'y_inp_6_months', 'y_inp_12_months', 'y_inp_24_months', 'y_stk_1_month', 'y_stk_3_months', 'y_stk_6_months', 'y_stk_12_months', 'y_stk_24_months', 'y_stk_or_aemb_1_month', 'y_stk_or_aemb_3_months', 'y_stk_or_aemb_6_months', 'y_stk_or_aemb_12_months', 'y_stk_or_aemb_24_months', 'y_stk_or_aemb']
(6091, 173)


Unnamed: 0,Hba1c,Hba1c Time,BMI,BMI Time,Cancer,Cancer Time,Carotid Disease,Carotid Disease Time,Coronary Disease,Coronary Disease Time,...,y_stk_or_aemb_3_months,y_stk_or_aemb_6_months,y_stk_or_aemb_12_months,y_stk_or_aemb_24_months,y_stk_or_aemb,History of Vascular Disease,Antihypertensive Medication,Diabetes Mellitus,Diabetes Medication,Abnormal Kidney Function
0,6.26,-501,20.7,-19,0,10000,0,10000,0,10000,...,0,0,0,0,0,0,1,0,0,0
1,6.26,-501,26.7,-780,1,-4846,0,10000,0,10000,...,0,0,0,0,0,0,1,0,0,0
2,5.8,-287,31.1,-35,0,10000,0,10000,0,10000,...,0,0,0,0,0,0,1,1,1,1
3,6.26,-501,21.3,-207,0,10000,0,10000,0,10000,...,0,0,0,0,0,1,1,0,0,1
4,5.9,-162,37.8,-554,1,-86,0,10000,1,0,...,0,0,0,0,0,1,1,1,1,0


## Models settings

In [5]:
# Define 5-fold stratified CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Create F2 scorer
f2_scorer = make_scorer(fbeta_score, beta=2)

# ===================== Naive Bayes =====================
search_space_nb = {
    "var_smoothing": (1e-12, 1e-1, "log-uniform")
}

nb_opt = BayesSearchCV(
    estimator=GaussianNB(),
    search_spaces=search_space_nb,
    scoring=f2_scorer,
    n_iter=30,
    cv=cv,
    n_jobs=-1,
    random_state=42
)

# ===================== Logistic Regression =====================
search_space_lr = {
    'C': (1e-4, 1e+3, 'log-uniform'),
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

lr_opt = BayesSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    search_spaces=search_space_lr,
    scoring=f2_scorer,
    n_iter=30,
    cv=cv,
    n_jobs=-1,
    random_state=42
)

# ===================== Decision Tree =====================
search_space_dt = {
    "max_depth": Integer(3, 20),
    "min_samples_split": Integer(2, 20),
    "min_samples_leaf": Integer(1, 10),
    "criterion": Categorical(["gini", "entropy"])
}

dt_opt = BayesSearchCV(
    DecisionTreeClassifier(random_state=42),
    search_spaces=search_space_dt,
    n_iter=30,
    scoring=f2_scorer,
    cv=cv,
    n_jobs=-1,
    random_state=42,
    verbose=0
)

# ===================== Random Forest =====================
search_space_rf = {
    'n_estimators': (50, 300),
    'max_depth': (1, 50),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 20),
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

rf_opt = BayesSearchCV(
    RandomForestClassifier(random_state=42),
    search_spaces=search_space_rf,
    scoring=f2_scorer,
    n_iter=30,
    cv=cv,
    n_jobs=-1,
    random_state=42
)

# ===================== XGBoost =====================
search_space_xgb = {
    'n_estimators': (10, 500),
    'max_depth': (1, 50),
    'learning_rate': (0.001, 0.1, 'uniform'),
    'subsample': (0.5, 1.0),
    'colsample_bytree': (0.5, 1.0),
    'gamma': (0, 5),
    'min_child_weight': (1, 20)
}

xgb_opt = BayesSearchCV(
    XGBClassifier(random_state=42),
    search_spaces=search_space_xgb,
    scoring=f2_scorer,
    n_iter=30,
    n_jobs=-1,
    random_state=42
)

# ===================== MLP ===============================

mlp_opt = MLPClassifier(max_iter=2000, solver="adam", activation="relu", hidden_layer_sizes=(200,100), random_state=42)

# ===================== Optimizer Map =====================
optimizer_map = {
    "nb": nb_opt,
    "lr": lr_opt,
    "dt": dt_opt,
    "rf": rf_opt,
    "xgb": xgb_opt,
    "mlp": mlp_opt
}


## Define function

In [11]:
SamplingName = Literal["baseline", "undersample", "oversample", "smote", "all"]
ModelName = Literal["nb", "lr", "xgb", "dt", "rf", "mlp"]


def _make_resampler(name: SamplingName, random_state: int, y_train: pd.Series = None):
    if name == "baseline":
        return None
    if name == "undersample":
        if y_train is None:
            raise ValueError("y_train must be provided for undersampling strategy")
        n_minority = y_train.sum()
        n_required = max(int(0.1 * len(y_train)), n_minority * 2)
        n_majority = n_required - n_minority
        n_majority = min(n_majority, (y_train == 0).sum())
        sampling_strategy = {0: n_majority, 1: n_minority}
        return RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=random_state)
    if name == "oversample":
        return RandomOverSampler(random_state=random_state)
    if name == "smote":
        return SMOTE(random_state=random_state)
    raise ValueError(f"Unknown sampling technique: {name}")


def run_cv_with_sampling(
    X_full: pd.DataFrame,
    target_cols: List[str],
    target_name: str,
    optimizer_map: Dict[ModelName, object],
    model_name: ModelName = "nb",
    sampling: SamplingName = "all",
    n_splits: int = 5,
    random_state: int = 42,
    xgb_04 = False,
) -> Dict[str, dict]:
    if target_name not in target_cols:
        raise ValueError("target_name must be inside target_cols")
    missing_targets = [c for c in target_cols if c not in X_full.columns]
    if missing_targets:
        raise ValueError(f"Target columns not in X_full: {missing_targets}")

    y = X_full[target_name].copy()
    X = X_full.drop(columns=target_cols).copy()

    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)

    if sampling == "all":
        techniques = ["baseline", "undersample", "oversample", "smote"]
    else:
        techniques = [sampling]

    results: Dict[str, dict] = {}

    for tech in techniques:
        print(f"\n=== Technique: {tech.upper()} ===")

        fold_results: List[dict] = []
        best_params_per_fold: List[dict] = []

        for fold, (train_idx, val_idx) in enumerate(cv.split(X, y), start=1):
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

            if model_name in ["nb", "lr", "mlp"]:
                scaler = StandardScaler()
                X_train = scaler.fit_transform(X_train)
                X_val = scaler.transform(X_val)

            resampler = _make_resampler(tech, random_state, y_train)
            if resampler is not None:
                X_train_rs, y_train_rs = resampler.fit_resample(X_train, y_train)
            else:
                X_train_rs, y_train_rs = X_train, y_train

            opt = optimizer_map[model_name]
            opt.fit(X_train_rs, y_train_rs)

            if model_name == "mlp":
                best_model = opt
                best_params_per_fold = {}
            else:
                best_model = opt.best_estimator_
                best_params_per_fold.append(getattr(opt, "best_params_", {}))

            y_pred = best_model.predict(X_val)

            if hasattr(best_model, "predict_proba"):
                y_pred_proba = best_model.predict_proba(X_val)[:, 1]
            else:
                scores = best_model.decision_function(X_val)
                smin, smax = scores.min(), scores.max()
                y_pred_proba = (scores - smin) / (smax - smin + 1e-9)

            if model_name == "xgb" and xgb_04:
                y_pred = (y_pred_proba >= 0.4).astype(int)

            prec = precision_score(y_val, y_pred, zero_division=0)
            rec = recall_score(y_val, y_pred, zero_division=0)
            f1 = f1_score(y_val, y_pred, zero_division=0)
            fbeta2 = fbeta_score(y_val, y_pred, beta=2, zero_division=0)
            roc = roc_auc_score(y_val, y_pred_proba)

            fold_metrics = {
                "fold": fold,
                "accuracy": accuracy_score(y_val, y_pred),
                "precision": prec,
                "sensitivity": rec,
                "f1_score": f1,
                "fbeta_2": fbeta2,
                "roc_auc": roc,
                "NNS": (1 / prec) if prec > 0 else np.inf,
                "best_params": getattr(opt, "best_params_", {}),
            }
            fold_results.append(fold_metrics)

            print({k: (round(v, 3) if isinstance(v, float) else v) for k, v in fold_metrics.items()})

        metrics = [
            "accuracy",
            "precision",
            "sensitivity",
            "f1_score",
            "fbeta_2",
            "roc_auc",
            "NNS",
        ]
        mean_std = {
            m: (np.mean([fr[m] for fr in fold_results]), np.std([fr[m] for fr in fold_results]))
            for m in metrics
        }

        print("\nMean scores across folds (", tech, "):")
        for m in metrics:
            mu, sd = mean_std[m]
            print(f"{m}: {mu:.3f} \u00B1 {sd:.3f}")

        summary_df = pd.DataFrame(fold_results)
        results[tech] = {
            "fold_results": fold_results,
            "summary_metrics": summary_df,
            "best_params_per_fold": best_params_per_fold,
        }

    return results

## y_acs

In [13]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.804, 'precision': 0.033, 'sensitivity': 0.533, 'f1_score': 0.063, 'fbeta_2': 0.133, 'roc_auc': np.float64(0.755), 'NNS': 30.0, 'best_params': OrderedDict([('var_smoothing', 0.0866931096115264)])}
{'fold': 2, 'accuracy': 0.811, 'precision': 0.042, 'sensitivity': 0.714, 'f1_score': 0.08, 'fbeta_2': 0.171, 'roc_auc': np.float64(0.774), 'NNS': 23.6, 'best_params': OrderedDict([('var_smoothing', 0.08055775091161241)])}
{'fold': 3, 'accuracy': 0.81, 'precision': 0.046, 'sensitivity': 0.786, 'f1_score': 0.087, 'fbeta_2': 0.186, 'roc_auc': np.float64(0.791), 'NNS': 21.727, 'best_params': OrderedDict([('var_smoothing', 0.04435408749130663)])}
{'fold': 4, 'accuracy': 0.839, 'precision': 0.026, 'sensitivity': 0.357, 'f1_score': 0.049, 'fbeta_2': 0.101, 'roc_auc': np.float64(0.721), 'NNS': 38.4, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 5, 'accuracy': 0.851, 'precision': 0.023, 'sensitivity': 0.267, 'f1_score': 0.042, 'fbe

In [14]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.767, 'precision': 0.031, 'sensitivity': 0.6, 'f1_score': 0.06, 'fbeta_2': 0.13, 'roc_auc': np.float64(0.736), 'NNS': 31.889, 'best_params': OrderedDict([('C', 0.00023427253996830794), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 2, 'accuracy': 0.795, 'precision': 0.035, 'sensitivity': 0.643, 'f1_score': 0.067, 'fbeta_2': 0.145, 'roc_auc': np.float64(0.767), 'NNS': 28.222, 'best_params': OrderedDict([('C', 0.0008193534843029969), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 3, 'accuracy': 0.758, 'precision': 0.036, 'sensitivity': 0.786, 'f1_score': 0.069, 'fbeta_2': 0.153, 'roc_auc': np.float64(0.84), 'NNS': 27.545, 'best_params': OrderedDict([('C', 0.0001227775810803022), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.764, 'precision': 0.021, 'sensitivity': 0.429, 'f1_score': 0.04, 'fbeta_2': 0.088, 'roc_auc': np.float64(0.679), 'NNS': 47.5, 'best_params': OrderedDict([('C', 0.0007411220817870144), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 5, 'accuracy': 0.822, 'precision': 0.019, 'sensitivity': 0.267, 'f1_score': 0.036, 'fbeta_2': 0.074, 'roc_auc': np.float64(0.678), 'NNS': 52.5, 'best_params': OrderedDict([('C', 0.0010930794995484406), ('penalty', 'l2'), ('solver', 'liblinear')])}

Mean scores across folds ( undersample ):
accuracy: 0.781 ± 0.024
precision: 0.029 ± 0.007
sensitivity: 0.545 ± 0.180
f1_score: 0.054 ± 0.014
fbeta_2: 0.118 ± 0.031
roc_auc: 0.740 ± 0.060
NNS: 37.531 ± 10.408


In [15]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.855, 'precision': 0.029, 'sensitivity': 0.333, 'f1_score': 0.053, 'fbeta_2': 0.108, 'roc_auc': np.float64(0.53), 'NNS': 34.4, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.829, 'precision': 0.025, 'sensitivity': 0.357, 'f1_score': 0.046, 'fbeta_2': 0.096, 'roc_auc': np.float64(0.596), 'NNS': 40.8, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.827, 'precision': 0.038, 'sensitivity': 0.571, 'f1_score': 0.07, 'fbeta_2': 0.149, 'roc_auc': np.float64(0.602), 'NNS': 26.625, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.832, 'precision': 0.02, 'sensitivity': 0.286, 'f1_score': 0.038, 'fbeta_2': 0.078, 'roc_auc': np.float64(0.632), 'NNS': 49.75, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'libli

In [16]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.852, 'precision': 0.028, 'sensitivity': 0.333, 'f1_score': 0.052, 'fbeta_2': 0.106, 'roc_auc': np.float64(0.518), 'NNS': 35.2, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.837, 'precision': 0.021, 'sensitivity': 0.286, 'f1_score': 0.039, 'fbeta_2': 0.081, 'roc_auc': np.float64(0.6), 'NNS': 48.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.831, 'precision': 0.034, 'sensitivity': 0.5, 'f1_score': 0.064, 'fbeta_2': 0.134, 'roc_auc': np.float64(0.615), 'NNS': 29.429, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.851, 'precision': 0.017, 'sensitivity': 0.214, 'f1_score': 0.032, 'fbeta_2': 0.065, 'roc_auc': np.float64(0.623), 'NNS': 58.0, 'best_params': OrderedDict([('C', 704.0066152316548), ('penalty', 'l2'), ('solver', 'liblinear')]

In [17]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.881, 'precision': 0.015, 'sensitivity': 0.133, 'f1_score': 0.027, 'fbeta_2': 0.052, 'roc_auc': np.float64(0.528), 'NNS': 67.0, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 11), ('min_samples_leaf', 3), ('min_samples_split', 2)])}
{'fold': 2, 'accuracy': 0.86, 'precision': 0.018, 'sensitivity': 0.214, 'f1_score': 0.034, 'fbeta_2': 0.068, 'roc_auc': np.float64(0.541), 'NNS': 54.333, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 13), ('min_samples_leaf', 1), ('min_samples_split', 2)])}
{'fold': 3, 'accuracy': 0.92, 'precision': 0.063, 'sensitivity': 0.429, 'f1_score': 0.11, 'fbeta_2': 0.199, 'roc_auc': np.float64(0.729), 'NNS': 15.833, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 12), ('min_samples_leaf', 3), ('min_samples_split', 2)])}
{'fold': 4, 'accuracy': 0.905, 'precision': 0.019, 'sensitivity': 0.143, 'f1_score': 0.033, 'fbeta_2': 0.062, 'roc_auc': np.float64(0

In [18]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.893, 'precision': 0.025, 'sensitivity': 0.2, 'f1_score': 0.044, 'fbeta_2': 0.083, 'roc_auc': np.float64(0.52), 'NNS': 40.333, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 29), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 50)])}
{'fold': 2, 'accuracy': 0.943, 'precision': 0.033, 'sensitivity': 0.143, 'f1_score': 0.054, 'fbeta_2': 0.086, 'roc_auc': np.float64(0.55), 'NNS': 30.0, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 18), ('max_features', None), ('min_samples_leaf', 6), ('min_samples_split', 20), ('n_estimators', 300)])}
{'fold': 3, 'accuracy': 0.916, 'precision': 0.051, 'sensitivity': 0.357, 'f1_score': 0.089, 'fbeta_2': 0.162, 'roc_auc': np.float64(0.703), 'NNS': 19.6, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 46), ('max_features', None), ('min_samples_leaf', 3), ('min_samples_split', 3), ('n_estimators', 96)])}
{'fol

In [19]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.979, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.664), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.5000145920428805), ('gamma', 2), ('learning_rate', 0.04857003917925159), ('max_depth', 32), ('min_child_weight', 3), ('n_estimators', 322), ('subsample', 0.8855840593769595)])}
{'fold': 2, 'accuracy': 0.973, 'precision': 0.12, 'sensitivity': 0.214, 'f1_score': 0.154, 'fbeta_2': 0.185, 'roc_auc': np.float64(0.764), 'NNS': 8.333, 'best_params': OrderedDict([('colsample_bytree', 0.5), ('gamma', 2), ('learning_rate', 0.1), ('max_depth', 50), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.8512477338086484)])}
{'fold': 3, 'accuracy': 0.952, 'precision': 0.091, 'sensitivity': 0.357, 'f1_score': 0.145, 'fbeta_2': 0.225, 'roc_auc': np.float64(0.801), 'NNS': 11.0, 'best_params': OrderedDict([('colsample_bytree', 0.641620761696812), ('gamma', 1), ('learning

In [20]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04 = True,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.961, 'precision': 0.029, 'sensitivity': 0.067, 'f1_score': 0.041, 'fbeta_2': 0.053, 'roc_auc': np.float64(0.664), 'NNS': 34.0, 'best_params': OrderedDict([('colsample_bytree', 0.5000145920428805), ('gamma', 2), ('learning_rate', 0.04857003917925159), ('max_depth', 32), ('min_child_weight', 3), ('n_estimators', 322), ('subsample', 0.8855840593769595)])}
{'fold': 2, 'accuracy': 0.956, 'precision': 0.065, 'sensitivity': 0.214, 'f1_score': 0.1, 'fbeta_2': 0.147, 'roc_auc': np.float64(0.764), 'NNS': 15.333, 'best_params': OrderedDict([('colsample_bytree', 0.5), ('gamma', 2), ('learning_rate', 0.1), ('max_depth', 50), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.8512477338086484)])}
{'fold': 3, 'accuracy': 0.933, 'precision': 0.075, 'sensitivity': 0.429, 'f1_score': 0.128, 'fbeta_2': 0.221, 'roc_auc': np.float64(0.801), 'NNS': 13.333, 'best_params': OrderedDict([('colsample_bytree', 0.641620761696812), ('gamma', 1),

In [21]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_acs_6_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.943, 'precision': 0.018, 'sensitivity': 0.067, 'f1_score': 0.028, 'fbeta_2': 0.043, 'roc_auc': np.float64(0.691), 'NNS': 56.0, 'best_params': {}}
{'fold': 2, 'accuracy': 0.925, 'precision': 0.086, 'sensitivity': 0.571, 'f1_score': 0.15, 'fbeta_2': 0.268, 'roc_auc': np.float64(0.757), 'NNS': 11.625, 'best_params': {}}
{'fold': 3, 'accuracy': 0.958, 'precision': 0.089, 'sensitivity': 0.286, 'f1_score': 0.136, 'fbeta_2': 0.198, 'roc_auc': np.float64(0.732), 'NNS': 11.25, 'best_params': {}}
{'fold': 4, 'accuracy': 0.952, 'precision': 0.042, 'sensitivity': 0.143, 'f1_score': 0.065, 'fbeta_2': 0.096, 'roc_auc': np.float64(0.654), 'NNS': 24.0, 'best_params': {}}
{'fold': 5, 'accuracy': 0.949, 'precision': 0.039, 'sensitivity': 0.133, 'f1_score': 0.061, 'fbeta_2': 0.09, 'roc_auc': np.float64(0.624), 'NNS': 25.5, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.946 ± 0.011
precision: 0.055 ± 0.028
sensitivity: 0.

## y_cvdeath_6_months

In [22]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.648, 'precision': 0.066, 'sensitivity': 0.644, 'f1_score': 0.119, 'fbeta_2': 0.233, 'roc_auc': np.float64(0.681), 'NNS': 15.241, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 2, 'accuracy': 0.647, 'precision': 0.067, 'sensitivity': 0.682, 'f1_score': 0.122, 'fbeta_2': 0.241, 'roc_auc': np.float64(0.728), 'NNS': 14.867, 'best_params': OrderedDict([('var_smoothing', 0.012603669997988965)])}
{'fold': 3, 'accuracy': 0.663, 'precision': 0.074, 'sensitivity': 0.727, 'f1_score': 0.135, 'fbeta_2': 0.264, 'roc_auc': np.float64(0.756), 'NNS': 13.438, 'best_params': OrderedDict([('var_smoothing', 0.07737482748284942)])}
{'fold': 4, 'accuracy': 0.707, 'precision': 0.069, 'sensitivity': 0.556, 'f1_score': 0.123, 'fbeta_2': 0.231, 'roc_auc': np.float64(0.705), 'NNS': 14.48, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 5, 'accuracy': 0.718, 'precision': 0.081, 'sensitivity': 0.644, 'f1_score': 0.144, 'fbeta_2': 

In [23]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.784, 'precision': 0.067, 'sensitivity': 0.378, 'f1_score': 0.114, 'fbeta_2': 0.197, 'roc_auc': np.float64(0.675), 'NNS': 14.824, 'best_params': OrderedDict([('C', 2.36945111449798), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.775, 'precision': 0.086, 'sensitivity': 0.545, 'f1_score': 0.149, 'fbeta_2': 0.264, 'roc_auc': np.float64(0.756), 'NNS': 11.583, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.756, 'precision': 0.085, 'sensitivity': 0.591, 'f1_score': 0.149, 'fbeta_2': 0.27, 'roc_auc': np.float64(0.728), 'NNS': 11.731, 'best_params': OrderedDict([('C', 13.746626277890918), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.601, 'precision': 0.074, 'sensitivity': 0.844, 'f1_score': 0.135, 'fbeta_2': 0.273, 'roc_auc': np.float64(0.765), 'NNS': 13.605, 'best_params': OrderedDict([('C', 0.00010602594470

In [24]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.724, 'precision': 0.085, 'sensitivity': 0.667, 'f1_score': 0.152, 'fbeta_2': 0.282, 'roc_auc': np.float64(0.766), 'NNS': 11.7, 'best_params': OrderedDict([('C', 0.0014141605539367977), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.708, 'precision': 0.087, 'sensitivity': 0.75, 'f1_score': 0.156, 'fbeta_2': 0.298, 'roc_auc': np.float64(0.762), 'NNS': 11.455, 'best_params': OrderedDict([('C', 0.0010155147000807635), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.755, 'precision': 0.071, 'sensitivity': 0.477, 'f1_score': 0.124, 'fbeta_2': 0.222, 'roc_auc': np.float64(0.695), 'NNS': 14.095, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.685, 'precision': 0.075, 'sensitivity': 0.667, 'f1_score': 0.135, 'fbeta_2': 0.259, 'roc_auc': np.float64(0.754), 'NNS': 13.3, 'best_params': OrderedDict([('C', 0.000766589717607

In [25]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.724, 'precision': 0.044, 'sensitivity': 0.311, 'f1_score': 0.077, 'fbeta_2': 0.14, 'roc_auc': np.float64(0.647), 'NNS': 22.857, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 15), ('min_samples_leaf', 9), ('min_samples_split', 9)])}
{'fold': 2, 'accuracy': 0.804, 'precision': 0.051, 'sensitivity': 0.25, 'f1_score': 0.084, 'fbeta_2': 0.14, 'roc_auc': np.float64(0.599), 'NNS': 19.727, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 7), ('min_samples_leaf', 7), ('min_samples_split', 7)])}
{'fold': 3, 'accuracy': 0.624, 'precision': 0.063, 'sensitivity': 0.682, 'f1_score': 0.116, 'fbeta_2': 0.231, 'roc_auc': np.float64(0.703), 'NNS': 15.8, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 3), ('min_samples_leaf', 10), ('min_samples_split', 4)])}
{'fold': 4, 'accuracy': 0.7, 'precision': 0.048, 'sensitivity': 0.378, 'f1_score': 0.085, 'fbeta_2': 0.159, 'roc_auc': np.float64(0

In [26]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.82, 'precision': 0.097, 'sensitivity': 0.467, 'f1_score': 0.16, 'fbeta_2': 0.264, 'roc_auc': np.float64(0.747), 'NNS': 10.333, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 40), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 20), ('n_estimators', 300)])}
{'fold': 2, 'accuracy': 0.835, 'precision': 0.097, 'sensitivity': 0.432, 'f1_score': 0.159, 'fbeta_2': 0.256, 'roc_auc': np.float64(0.765), 'NNS': 10.263, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 50), ('max_features', 'sqrt'), ('min_samples_leaf', 1), ('min_samples_split', 20), ('n_estimators', 50)])}
{'fold': 3, 'accuracy': 0.833, 'precision': 0.1, 'sensitivity': 0.455, 'f1_score': 0.164, 'fbeta_2': 0.266, 'roc_auc': np.float64(0.78), 'NNS': 10.0, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 47), ('max_features', 'sqrt'), ('min_samples_leaf', 5), ('min_samples_split', 16), ('n_estimators', 143)])

In [27]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.817, 'precision': 0.106, 'sensitivity': 0.533, 'f1_score': 0.177, 'fbeta_2': 0.296, 'roc_auc': np.float64(0.758), 'NNS': 9.417, 'best_params': OrderedDict([('colsample_bytree', 0.5044635727545541), ('gamma', 1), ('learning_rate', 0.08624177600092935), ('max_depth', 1), ('min_child_weight', 3), ('n_estimators', 366), ('subsample', 0.804729414109301)])}
{'fold': 2, 'accuracy': 0.819, 'precision': 0.11, 'sensitivity': 0.568, 'f1_score': 0.185, 'fbeta_2': 0.31, 'roc_auc': np.float64(0.765), 'NNS': 9.08, 'best_params': OrderedDict([('colsample_bytree', 0.8997767208035865), ('gamma', 2), ('learning_rate', 0.05313540347564156), ('max_depth', 36), ('min_child_weight', 18), ('n_estimators', 361), ('subsample', 0.712089036230341)])}
{'fold': 3, 'accuracy': 0.8, 'precision': 0.1, 'sensitivity': 0.568, 'f1_score': 0.171, 'fbeta_2': 0.294, 'roc_auc': np.float64(0.789), 'NNS': 9.96, 'best_params': OrderedDict([('colsample_bytree', 0.705051979

In [28]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04=True
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.751, 'precision': 0.089, 'sensitivity': 0.622, 'f1_score': 0.156, 'fbeta_2': 0.283, 'roc_auc': np.float64(0.758), 'NNS': 11.25, 'best_params': OrderedDict([('colsample_bytree', 0.5044635727545541), ('gamma', 1), ('learning_rate', 0.08624177600092935), ('max_depth', 1), ('min_child_weight', 3), ('n_estimators', 366), ('subsample', 0.804729414109301)])}
{'fold': 2, 'accuracy': 0.753, 'precision': 0.092, 'sensitivity': 0.659, 'f1_score': 0.162, 'fbeta_2': 0.295, 'roc_auc': np.float64(0.765), 'NNS': 10.862, 'best_params': OrderedDict([('colsample_bytree', 0.8997767208035865), ('gamma', 2), ('learning_rate', 0.05313540347564156), ('max_depth', 36), ('min_child_weight', 18), ('n_estimators', 361), ('subsample', 0.712089036230341)])}
{'fold': 3, 'accuracy': 0.727, 'precision': 0.089, 'sensitivity': 0.705, 'f1_score': 0.157, 'fbeta_2': 0.295, 'roc_auc': np.float64(0.789), 'NNS': 11.29, 'best_params': OrderedDict([('colsample_bytree', 0.

In [29]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.78, 'precision': 0.086, 'sensitivity': 0.511, 'f1_score': 0.146, 'fbeta_2': 0.256, 'roc_auc': np.float64(0.742), 'NNS': 11.696, 'best_params': {}}
{'fold': 2, 'accuracy': 0.779, 'precision': 0.082, 'sensitivity': 0.5, 'f1_score': 0.141, 'fbeta_2': 0.247, 'roc_auc': np.float64(0.708), 'NNS': 12.227, 'best_params': {}}
{'fold': 3, 'accuracy': 0.753, 'precision': 0.087, 'sensitivity': 0.614, 'f1_score': 0.152, 'fbeta_2': 0.277, 'roc_auc': np.float64(0.716), 'NNS': 11.519, 'best_params': {}}
{'fold': 4, 'accuracy': 0.784, 'precision': 0.087, 'sensitivity': 0.511, 'f1_score': 0.149, 'fbeta_2': 0.259, 'roc_auc': np.float64(0.718), 'NNS': 11.478, 'best_params': {}}
{'fold': 5, 'accuracy': 0.764, 'precision': 0.099, 'sensitivity': 0.667, 'f1_score': 0.173, 'fbeta_2': 0.311, 'roc_auc': np.float64(0.771), 'NNS': 10.067, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.772 ± 0.012
precision: 0.088 ± 0.006
sensitivi

## y_death_6_months

In [30]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.642, 'precision': 0.1, 'sensitivity': 0.783, 'f1_score': 0.177, 'fbeta_2': 0.331, 'roc_auc': np.float64(0.718), 'NNS': 10.0, 'best_params': OrderedDict([('var_smoothing', 0.04224993273318442)])}
{'fold': 2, 'accuracy': 0.553, 'precision': 0.08, 'sensitivity': 0.78, 'f1_score': 0.144, 'fbeta_2': 0.283, 'roc_auc': np.float64(0.699), 'NNS': 12.565, 'best_params': OrderedDict([('var_smoothing', 0.04759974478726932)])}
{'fold': 3, 'accuracy': 0.606, 'precision': 0.09, 'sensitivity': 0.78, 'f1_score': 0.161, 'fbeta_2': 0.307, 'roc_auc': np.float64(0.721), 'NNS': 11.152, 'best_params': OrderedDict([('var_smoothing', 0.03237858380675109)])}
{'fold': 4, 'accuracy': 0.614, 'precision': 0.087, 'sensitivity': 0.729, 'f1_score': 0.155, 'fbeta_2': 0.293, 'roc_auc': np.float64(0.703), 'NNS': 11.558, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 5, 'accuracy': 0.649, 'precision': 0.095, 'sensitivity': 0.717, 'f1_score': 0.167, 'fb

In [31]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.644, 'precision': 0.102, 'sensitivity': 0.8, 'f1_score': 0.181, 'fbeta_2': 0.338, 'roc_auc': np.float64(0.768), 'NNS': 9.792, 'best_params': OrderedDict([('C', 0.000384051362285679), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.654, 'precision': 0.108, 'sensitivity': 0.847, 'f1_score': 0.192, 'fbeta_2': 0.358, 'roc_auc': np.float64(0.805), 'NNS': 9.26, 'best_params': OrderedDict([('C', 0.0006050901737154604), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.64, 'precision': 0.101, 'sensitivity': 0.814, 'f1_score': 0.18, 'fbeta_2': 0.338, 'roc_auc': np.float64(0.785), 'NNS': 9.896, 'best_params': OrderedDict([('C', 0.00032832540439903956), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.7, 'precision': 0.102, 'sensitivity': 0.661, 'f1_score': 0.176, 'fbeta_2': 0.315, 'roc_auc': np.float64(0.751), 'NNS': 9.846, 'best_params': OrderedDict([('C', 0.001610784010

In [32]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.669, 'precision': 0.1, 'sensitivity': 0.717, 'f1_score': 0.176, 'fbeta_2': 0.321, 'roc_auc': np.float64(0.763), 'NNS': 9.977, 'best_params': OrderedDict([('C', 0.0008907248222384703), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.631, 'precision': 0.102, 'sensitivity': 0.847, 'f1_score': 0.182, 'fbeta_2': 0.344, 'roc_auc': np.float64(0.796), 'NNS': 9.8, 'best_params': OrderedDict([('C', 0.00036205203230287025), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.616, 'precision': 0.097, 'sensitivity': 0.831, 'f1_score': 0.173, 'fbeta_2': 0.33, 'roc_auc': np.float64(0.789), 'NNS': 10.347, 'best_params': OrderedDict([('C', 0.00027646865288649827), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.634, 'precision': 0.094, 'sensitivity': 0.763, 'f1_score': 0.168, 'fbeta_2': 0.316, 'roc_auc': np.float64(0.759), 'NNS': 10.6, 'best_params': OrderedDict([('C', 0.0003362594802

In [33]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.588, 'precision': 0.083, 'sensitivity': 0.733, 'f1_score': 0.149, 'fbeta_2': 0.286, 'roc_auc': np.float64(0.635), 'NNS': 12.045, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 3), ('min_samples_leaf', 1), ('min_samples_split', 2)])}
{'fold': 2, 'accuracy': 0.659, 'precision': 0.084, 'sensitivity': 0.61, 'f1_score': 0.148, 'fbeta_2': 0.271, 'roc_auc': np.float64(0.679), 'NNS': 11.889, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 5), ('min_samples_leaf', 1), ('min_samples_split', 2)])}
{'fold': 3, 'accuracy': 0.617, 'precision': 0.095, 'sensitivity': 0.814, 'f1_score': 0.171, 'fbeta_2': 0.324, 'roc_auc': np.float64(0.735), 'NNS': 10.5, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 3), ('min_samples_leaf', 5), ('min_samples_split', 20)])}
{'fold': 4, 'accuracy': 0.628, 'precision': 0.084, 'sensitivity': 0.678, 'f1_score': 0.15, 'fbeta_2': 0.282, 'roc_auc': np.float64(0.

In [34]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.675, 'precision': 0.096, 'sensitivity': 0.667, 'f1_score': 0.168, 'fbeta_2': 0.305, 'roc_auc': np.float64(0.756), 'NNS': 10.4, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 41), ('max_features', 'sqrt'), ('min_samples_leaf', 2), ('min_samples_split', 2), ('n_estimators', 50)])}
{'fold': 2, 'accuracy': 0.676, 'precision': 0.113, 'sensitivity': 0.831, 'f1_score': 0.199, 'fbeta_2': 0.366, 'roc_auc': np.float64(0.765), 'NNS': 8.857, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 41), ('max_features', None), ('min_samples_leaf', 6), ('min_samples_split', 12), ('n_estimators', 119)])}
{'fold': 3, 'accuracy': 0.712, 'precision': 0.129, 'sensitivity': 0.864, 'f1_score': 0.225, 'fbeta_2': 0.405, 'roc_auc': np.float64(0.831), 'NNS': 7.725, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 16), ('max_features', 'sqrt'), ('min_samples_leaf', 17), ('min_samples_split', 2), ('n_estimators', 293)]

In [35]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.687, 'precision': 0.1, 'sensitivity': 0.667, 'f1_score': 0.173, 'fbeta_2': 0.312, 'roc_auc': np.float64(0.764), 'NNS': 10.05, 'best_params': OrderedDict([('colsample_bytree', 0.7717015338451563), ('gamma', 5), ('learning_rate', 0.05015020365607496), ('max_depth', 42), ('min_child_weight', 7), ('n_estimators', 35), ('subsample', 0.785388901339449)])}
{'fold': 2, 'accuracy': 0.701, 'precision': 0.12, 'sensitivity': 0.814, 'f1_score': 0.209, 'fbeta_2': 0.377, 'roc_auc': np.float64(0.776), 'NNS': 8.354, 'best_params': OrderedDict([('colsample_bytree', 0.7447033912213523), ('gamma', 5), ('learning_rate', 0.015349547813956636), ('max_depth', 7), ('min_child_weight', 1), ('n_estimators', 160), ('subsample', 0.8956670454044815)])}
{'fold': 3, 'accuracy': 0.723, 'precision': 0.122, 'sensitivity': 0.763, 'f1_score': 0.211, 'fbeta_2': 0.373, 'roc_auc': np.float64(0.795), 'NNS': 8.178, 'best_params': OrderedDict([('colsample_bytree', 0.7054

In [7]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04=True
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.576, 'precision': 0.096, 'sensitivity': 0.9, 'f1_score': 0.173, 'fbeta_2': 0.335, 'roc_auc': np.float64(0.764), 'NNS': 10.463, 'best_params': OrderedDict([('colsample_bytree', 0.7717015338451563), ('gamma', 5), ('learning_rate', 0.05015020365607496), ('max_depth', 42), ('min_child_weight', 7), ('n_estimators', 35), ('subsample', 0.785388901339449)])}
{'fold': 2, 'accuracy': 0.587, 'precision': 0.093, 'sensitivity': 0.864, 'f1_score': 0.169, 'fbeta_2': 0.326, 'roc_auc': np.float64(0.776), 'NNS': 10.706, 'best_params': OrderedDict([('colsample_bytree', 0.7447033912213523), ('gamma', 5), ('learning_rate', 0.015349547813956636), ('max_depth', 7), ('min_child_weight', 1), ('n_estimators', 160), ('subsample', 0.8956670454044815)])}
{'fold': 3, 'accuracy': 0.609, 'precision': 0.103, 'sensitivity': 0.915, 'f1_score': 0.185, 'fbeta_2': 0.355, 'roc_auc': np.float64(0.795), 'NNS': 9.722, 'best_params': OrderedDict([('colsample_bytree', 0.7

In [37]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_death_6_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.69, 'precision': 0.102, 'sensitivity': 0.683, 'f1_score': 0.178, 'fbeta_2': 0.32, 'roc_auc': np.float64(0.726), 'NNS': 9.756, 'best_params': {}}
{'fold': 2, 'accuracy': 0.666, 'precision': 0.095, 'sensitivity': 0.695, 'f1_score': 0.168, 'fbeta_2': 0.308, 'roc_auc': np.float64(0.747), 'NNS': 10.488, 'best_params': {}}
{'fold': 3, 'accuracy': 0.693, 'precision': 0.097, 'sensitivity': 0.644, 'f1_score': 0.169, 'fbeta_2': 0.303, 'roc_auc': np.float64(0.734), 'NNS': 10.289, 'best_params': {}}
{'fold': 4, 'accuracy': 0.7, 'precision': 0.095, 'sensitivity': 0.61, 'f1_score': 0.165, 'fbeta_2': 0.293, 'roc_auc': np.float64(0.71), 'NNS': 10.5, 'best_params': {}}
{'fold': 5, 'accuracy': 0.718, 'precision': 0.106, 'sensitivity': 0.633, 'f1_score': 0.181, 'fbeta_2': 0.317, 'roc_auc': np.float64(0.755), 'NNS': 9.447, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.693 ± 0.017
precision: 0.099 ± 0.004
sensitivity: 0.6

## y_hf_6_months

In [38]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.573, 'precision': 0.184, 'sensitivity': 0.715, 'f1_score': 0.293, 'fbeta_2': 0.454, 'roc_auc': np.float64(0.69), 'NNS': 5.426, 'best_params': OrderedDict([('var_smoothing', 2.4505790678824128e-05)])}
{'fold': 2, 'accuracy': 0.736, 'precision': 0.25, 'sensitivity': 0.573, 'f1_score': 0.348, 'fbeta_2': 0.456, 'roc_auc': np.float64(0.74), 'NNS': 4.0, 'best_params': OrderedDict([('var_smoothing', 4.919822910583752e-06)])}
{'fold': 3, 'accuracy': 0.755, 'precision': 0.257, 'sensitivity': 0.517, 'f1_score': 0.343, 'fbeta_2': 0.43, 'roc_auc': np.float64(0.737), 'NNS': 3.897, 'best_params': OrderedDict([('var_smoothing', 4.496601749345354e-05)])}
{'fold': 4, 'accuracy': 0.733, 'precision': 0.236, 'sensitivity': 0.517, 'f1_score': 0.324, 'fbeta_2': 0.418, 'roc_auc': np.float64(0.714), 'NNS': 4.231, 'best_params': OrderedDict([('var_smoothing', 0.001626566827921021)])}
{'fold': 5, 'accuracy': 0.739, 'precision': 0.263, 'sensitivity': 0.616, 

In [39]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.624, 'precision': 0.224, 'sensitivity': 0.828, 'f1_score': 0.353, 'fbeta_2': 0.538, 'roc_auc': np.float64(0.767), 'NNS': 4.456, 'best_params': OrderedDict([('C', 0.0025067628163442427), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.582, 'precision': 0.203, 'sensitivity': 0.82, 'f1_score': 0.326, 'fbeta_2': 0.51, 'roc_auc': np.float64(0.755), 'NNS': 4.919, 'best_params': OrderedDict([('C', 0.0022428956066463668), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.649, 'precision': 0.238, 'sensitivity': 0.834, 'f1_score': 0.371, 'fbeta_2': 0.556, 'roc_auc': np.float64(0.778), 'NNS': 4.198, 'best_params': OrderedDict([('C', 0.0029838305891824223), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.613, 'precision': 0.217, 'sensitivity': 0.815, 'f1_score': 0.343, 'fbeta_2': 0.526, 'roc_auc': np.float64(0.758), 'NNS': 4.602, 'best_params': OrderedDict([('C', 0.0024423

In [40]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.633, 'precision': 0.219, 'sensitivity': 0.762, 'f1_score': 0.34, 'fbeta_2': 0.509, 'roc_auc': np.float64(0.762), 'NNS': 4.574, 'best_params': OrderedDict([('C', 0.00021904882514527536), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.655, 'precision': 0.231, 'sensitivity': 0.773, 'f1_score': 0.356, 'fbeta_2': 0.526, 'roc_auc': np.float64(0.78), 'NNS': 4.328, 'best_params': OrderedDict([('C', 0.0003449761473083623), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.693, 'precision': 0.259, 'sensitivity': 0.795, 'f1_score': 0.391, 'fbeta_2': 0.562, 'roc_auc': np.float64(0.801), 'NNS': 3.858, 'best_params': OrderedDict([('C', 0.00035788770853557964), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.621, 'precision': 0.22, 'sensitivity': 0.808, 'f1_score': 0.346, 'fbeta_2': 0.526, 'roc_auc': np.float64(0.731), 'NNS': 4.549, 'best_params': OrderedDict([('C', 0.00221397771

In [41]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.61, 'precision': 0.215, 'sensitivity': 0.808, 'f1_score': 0.339, 'fbeta_2': 0.52, 'roc_auc': np.float64(0.736), 'NNS': 4.656, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 3), ('min_samples_leaf', 10), ('min_samples_split', 20)])}
{'fold': 2, 'accuracy': 0.739, 'precision': 0.279, 'sensitivity': 0.707, 'f1_score': 0.4, 'fbeta_2': 0.541, 'roc_auc': np.float64(0.741), 'NNS': 3.585, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 3), ('min_samples_leaf', 10), ('min_samples_split', 2)])}
{'fold': 3, 'accuracy': 0.758, 'precision': 0.286, 'sensitivity': 0.636, 'f1_score': 0.394, 'fbeta_2': 0.511, 'roc_auc': np.float64(0.766), 'NNS': 3.5, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 3), ('min_samples_leaf', 3), ('min_samples_split', 7)])}
{'fold': 4, 'accuracy': 0.773, 'precision': 0.303, 'sensitivity': 0.636, 'f1_score': 0.41, 'fbeta_2': 0.521, 'roc_auc': np.float64(0.753)

In [42]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.633, 'precision': 0.226, 'sensitivity': 0.808, 'f1_score': 0.353, 'fbeta_2': 0.533, 'roc_auc': np.float64(0.755), 'NNS': 4.426, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 1), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 50)])}
{'fold': 2, 'accuracy': 0.689, 'precision': 0.254, 'sensitivity': 0.787, 'f1_score': 0.384, 'fbeta_2': 0.554, 'roc_auc': np.float64(0.777), 'NNS': 3.941, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 50), ('max_features', 'sqrt'), ('min_samples_leaf', 1), ('min_samples_split', 16), ('n_estimators', 50)])}
{'fold': 3, 'accuracy': 0.733, 'precision': 0.293, 'sensitivity': 0.815, 'f1_score': 0.431, 'fbeta_2': 0.601, 'roc_auc': np.float64(0.816), 'NNS': 3.415, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 50), ('max_features', 'log2'), ('min_samples_leaf', 1), ('min_samples_split', 20), ('n_estimators', 300)])}

In [43]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.72, 'precision': 0.274, 'sensitivity': 0.762, 'f1_score': 0.403, 'fbeta_2': 0.562, 'roc_auc': np.float64(0.793), 'NNS': 3.652, 'best_params': OrderedDict([('colsample_bytree', 0.5018151536273716), ('gamma', 4), ('learning_rate', 0.0743869916924347), ('max_depth', 13), ('min_child_weight', 12), ('n_estimators', 145), ('subsample', 0.6564196531173141)])}
{'fold': 2, 'accuracy': 0.707, 'precision': 0.26, 'sensitivity': 0.747, 'f1_score': 0.386, 'fbeta_2': 0.543, 'roc_auc': np.float64(0.785), 'NNS': 3.848, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 4), ('learning_rate', 0.07995166334101904), ('max_depth', 30), ('min_child_weight', 5), ('n_estimators', 122), ('subsample', 1.0)])}
{'fold': 3, 'accuracy': 0.752, 'precision': 0.307, 'sensitivity': 0.795, 'f1_score': 0.443, 'fbeta_2': 0.603, 'roc_auc': np.float64(0.823), 'NNS': 3.258, 'best_params': OrderedDict([('colsample_bytree', 0.5018151536273716), ('gamma', 4)

In [44]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_cvdeath_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04=True
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.751, 'precision': 0.089, 'sensitivity': 0.622, 'f1_score': 0.156, 'fbeta_2': 0.283, 'roc_auc': np.float64(0.758), 'NNS': 11.25, 'best_params': OrderedDict([('colsample_bytree', 0.5044635727545541), ('gamma', 1), ('learning_rate', 0.08624177600092935), ('max_depth', 1), ('min_child_weight', 3), ('n_estimators', 366), ('subsample', 0.804729414109301)])}
{'fold': 2, 'accuracy': 0.753, 'precision': 0.092, 'sensitivity': 0.659, 'f1_score': 0.162, 'fbeta_2': 0.295, 'roc_auc': np.float64(0.765), 'NNS': 10.862, 'best_params': OrderedDict([('colsample_bytree', 0.8997767208035865), ('gamma', 2), ('learning_rate', 0.05313540347564156), ('max_depth', 36), ('min_child_weight', 18), ('n_estimators', 361), ('subsample', 0.712089036230341)])}
{'fold': 3, 'accuracy': 0.727, 'precision': 0.089, 'sensitivity': 0.705, 'f1_score': 0.157, 'fbeta_2': 0.295, 'roc_auc': np.float64(0.789), 'NNS': 11.29, 'best_params': OrderedDict([('colsample_bytree', 0.

In [45]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_hf_6_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.669, 'precision': 0.216, 'sensitivity': 0.636, 'f1_score': 0.322, 'fbeta_2': 0.458, 'roc_auc': np.float64(0.702), 'NNS': 4.635, 'best_params': {}}
{'fold': 2, 'accuracy': 0.663, 'precision': 0.225, 'sensitivity': 0.707, 'f1_score': 0.341, 'fbeta_2': 0.494, 'roc_auc': np.float64(0.75), 'NNS': 4.453, 'best_params': {}}
{'fold': 3, 'accuracy': 0.661, 'precision': 0.221, 'sensitivity': 0.689, 'f1_score': 0.335, 'fbeta_2': 0.484, 'roc_auc': np.float64(0.728), 'NNS': 4.519, 'best_params': {}}
{'fold': 4, 'accuracy': 0.705, 'precision': 0.243, 'sensitivity': 0.649, 'f1_score': 0.353, 'fbeta_2': 0.486, 'roc_auc': np.float64(0.742), 'NNS': 4.122, 'best_params': {}}
{'fold': 5, 'accuracy': 0.647, 'precision': 0.212, 'sensitivity': 0.682, 'f1_score': 0.324, 'fbeta_2': 0.473, 'roc_auc': np.float64(0.718), 'NNS': 4.709, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.669 ± 0.019
precision: 0.223 ± 0.011
sensitivity:

## y_inp_6_months

In [46]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.642, 'precision': 0.301, 'sensitivity': 0.506, 'f1_score': 0.377, 'fbeta_2': 0.445, 'roc_auc': np.float64(0.654), 'NNS': 3.326, 'best_params': OrderedDict([('var_smoothing', 0.00011866105296908167)])}
{'fold': 2, 'accuracy': 0.634, 'precision': 0.315, 'sensitivity': 0.612, 'f1_score': 0.416, 'fbeta_2': 0.515, 'roc_auc': np.float64(0.673), 'NNS': 3.17, 'best_params': OrderedDict([('var_smoothing', 1.3455914729731753e-06)])}
{'fold': 3, 'accuracy': 0.645, 'precision': 0.31, 'sensitivity': 0.538, 'f1_score': 0.393, 'fbeta_2': 0.469, 'roc_auc': np.float64(0.64), 'NNS': 3.229, 'best_params': OrderedDict([('var_smoothing', 9.493661540765977e-07)])}
{'fold': 4, 'accuracy': 0.337, 'precision': 0.236, 'sensitivity': 0.942, 'f1_score': 0.378, 'fbeta_2': 0.59, 'roc_auc': np.float64(0.644), 'NNS': 4.237, 'best_params': OrderedDict([('var_smoothing', 7.819115625274379e-08)])}
{'fold': 5, 'accuracy': 0.666, 'precision': 0.339, 'sensitivity': 0.5

In [47]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.615, 'precision': 0.306, 'sensitivity': 0.628, 'f1_score': 0.412, 'fbeta_2': 0.519, 'roc_auc': np.float64(0.68), 'NNS': 3.268, 'best_params': OrderedDict([('C', 0.0002866380225015022), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.616, 'precision': 0.323, 'sensitivity': 0.727, 'f1_score': 0.447, 'fbeta_2': 0.581, 'roc_auc': np.float64(0.695), 'NNS': 3.101, 'best_params': OrderedDict([('C', 0.008675914768992677), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.625, 'precision': 0.317, 'sensitivity': 0.658, 'f1_score': 0.428, 'fbeta_2': 0.541, 'roc_auc': np.float64(0.664), 'NNS': 3.152, 'best_params': OrderedDict([('C', 0.00023609669269262208), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.6, 'precision': 0.301, 'sensitivity': 0.658, 'f1_score': 0.413, 'fbeta_2': 0.531, 'roc_auc': np.float64(0.669), 'NNS': 3.327, 'best_params': OrderedDict([('C', 0.00784375

In [48]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.613, 'precision': 0.309, 'sensitivity': 0.651, 'f1_score': 0.419, 'fbeta_2': 0.533, 'roc_auc': np.float64(0.68), 'NNS': 3.241, 'best_params': OrderedDict([('C', 0.013171572300487202), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.605, 'precision': 0.319, 'sensitivity': 0.746, 'f1_score': 0.446, 'fbeta_2': 0.588, 'roc_auc': np.float64(0.686), 'NNS': 3.139, 'best_params': OrderedDict([('C', 0.009957658611899817), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.599, 'precision': 0.297, 'sensitivity': 0.646, 'f1_score': 0.407, 'fbeta_2': 0.523, 'roc_auc': np.float64(0.659), 'NNS': 3.363, 'best_params': OrderedDict([('C', 0.00921343561095102), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.593, 'precision': 0.297, 'sensitivity': 0.665, 'f1_score': 0.411, 'fbeta_2': 0.533, 'roc_auc': np.float64(0.659), 'NNS': 3.364, 'best_params': OrderedDict([('C', 0.000775635405358

In [49]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.625, 'precision': 0.301, 'sensitivity': 0.567, 'f1_score': 0.393, 'fbeta_2': 0.482, 'roc_auc': np.float64(0.645), 'NNS': 3.324, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 6), ('min_samples_leaf', 10), ('min_samples_split', 20)])}
{'fold': 2, 'accuracy': 0.599, 'precision': 0.302, 'sensitivity': 0.669, 'f1_score': 0.416, 'fbeta_2': 0.538, 'roc_auc': np.float64(0.656), 'NNS': 3.31, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 4), ('min_samples_leaf', 5), ('min_samples_split', 19)])}
{'fold': 3, 'accuracy': 0.63, 'precision': 0.296, 'sensitivity': 0.531, 'f1_score': 0.38, 'fbeta_2': 0.458, 'roc_auc': np.float64(0.601), 'NNS': 3.384, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 5), ('min_samples_leaf', 10), ('min_samples_split', 3)])}
{'fold': 4, 'accuracy': 0.645, 'precision': 0.313, 'sensitivity': 0.554, 'f1_score': 0.4, 'fbeta_2': 0.48, 'roc_auc': np.float64(0.646),

In [50]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.533, 'precision': 0.287, 'sensitivity': 0.793, 'f1_score': 0.421, 'fbeta_2': 0.586, 'roc_auc': np.float64(0.67), 'NNS': 3.488, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 1), ('max_features', None), ('min_samples_leaf', 20), ('min_samples_split', 2), ('n_estimators', 300)])}
{'fold': 2, 'accuracy': 0.516, 'precision': 0.28, 'sensitivity': 0.808, 'f1_score': 0.416, 'fbeta_2': 0.587, 'roc_auc': np.float64(0.662), 'NNS': 3.571, 'best_params': OrderedDict([('bootstrap', True), ('max_depth', 1), ('max_features', None), ('min_samples_leaf', 20), ('min_samples_split', 2), ('n_estimators', 50)])}
{'fold': 3, 'accuracy': 0.493, 'precision': 0.268, 'sensitivity': 0.796, 'f1_score': 0.401, 'fbeta_2': 0.571, 'roc_auc': np.float64(0.603), 'NNS': 3.729, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 1), ('max_features', None), ('min_samples_leaf', 11), ('min_samples_split', 20), ('n_estimators', 50)])}
{'fo

In [51]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.533, 'precision': 0.287, 'sensitivity': 0.793, 'f1_score': 0.421, 'fbeta_2': 0.586, 'roc_auc': np.float64(0.628), 'NNS': 3.488, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weight', 1), ('n_estimators', 10), ('subsample', 1.0)])}
{'fold': 2, 'accuracy': 0.529, 'precision': 0.286, 'sensitivity': 0.804, 'f1_score': 0.421, 'fbeta_2': 0.59, 'roc_auc': np.float64(0.64), 'NNS': 3.502, 'best_params': OrderedDict([('colsample_bytree', 0.6394677064262048), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weight', 3), ('n_estimators', 10), ('subsample', 1.0)])}
{'fold': 3, 'accuracy': 0.493, 'precision': 0.268, 'sensitivity': 0.796, 'f1_score': 0.401, 'fbeta_2': 0.571, 'roc_auc': np.float64(0.603), 'NNS': 3.729, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weig

In [52]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04=True
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.214, 'precision': 0.214, 'sensitivity': 1.0, 'f1_score': 0.353, 'fbeta_2': 0.577, 'roc_auc': np.float64(0.628), 'NNS': 4.67, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weight', 1), ('n_estimators', 10), ('subsample', 1.0)])}
{'fold': 2, 'accuracy': 0.213, 'precision': 0.213, 'sensitivity': 1.0, 'f1_score': 0.352, 'fbeta_2': 0.576, 'roc_auc': np.float64(0.64), 'NNS': 4.685, 'best_params': OrderedDict([('colsample_bytree', 0.6394677064262048), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weight', 3), ('n_estimators', 10), ('subsample', 1.0)])}
{'fold': 3, 'accuracy': 0.213, 'precision': 0.213, 'sensitivity': 1.0, 'f1_score': 0.352, 'fbeta_2': 0.576, 'roc_auc': np.float64(0.603), 'NNS': 4.685, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.001), ('max_depth', 1), ('min_child_weight', 2

In [53]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_inp_6_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.597, 'precision': 0.282, 'sensitivity': 0.571, 'f1_score': 0.378, 'fbeta_2': 0.474, 'roc_auc': np.float64(0.622), 'NNS': 3.544, 'best_params': {}}
{'fold': 2, 'accuracy': 0.577, 'precision': 0.271, 'sensitivity': 0.581, 'f1_score': 0.37, 'fbeta_2': 0.473, 'roc_auc': np.float64(0.623), 'NNS': 3.689, 'best_params': {}}
{'fold': 3, 'accuracy': 0.592, 'precision': 0.272, 'sensitivity': 0.542, 'f1_score': 0.362, 'fbeta_2': 0.452, 'roc_auc': np.float64(0.613), 'NNS': 3.681, 'best_params': {}}
{'fold': 4, 'accuracy': 0.589, 'precision': 0.28, 'sensitivity': 0.588, 'f1_score': 0.379, 'fbeta_2': 0.482, 'roc_auc': np.float64(0.617), 'NNS': 3.575, 'best_params': {}}
{'fold': 5, 'accuracy': 0.594, 'precision': 0.289, 'sensitivity': 0.609, 'f1_score': 0.392, 'fbeta_2': 0.498, 'roc_auc': np.float64(0.64), 'NNS': 3.465, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.590 ± 0.007
precision: 0.279 ± 0.007
sensitivity: 0

## y_stk_or_aemb

In [54]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.459, 'precision': 0.026, 'sensitivity': 0.607, 'f1_score': 0.049, 'fbeta_2': 0.109, 'roc_auc': np.float64(0.577), 'NNS': 39.118, 'best_params': OrderedDict([('var_smoothing', 0.038815006524022595)])}
{'fold': 2, 'accuracy': 0.745, 'precision': 0.033, 'sensitivity': 0.37, 'f1_score': 0.061, 'fbeta_2': 0.122, 'roc_auc': np.float64(0.652), 'NNS': 30.3, 'best_params': OrderedDict([('var_smoothing', 0.05596990799364125)])}
{'fold': 3, 'accuracy': 0.086, 'precision': 0.022, 'sensitivity': 0.926, 'f1_score': 0.043, 'fbeta_2': 0.1, 'roc_auc': np.float64(0.586), 'NNS': 45.44, 'best_params': OrderedDict([('var_smoothing', 1.4155480520859247e-05)])}
{'fold': 4, 'accuracy': 0.608, 'precision': 0.029, 'sensitivity': 0.519, 'f1_score': 0.055, 'fbeta_2': 0.119, 'roc_auc': np.float64(0.52), 'NNS': 34.214, 'best_params': OrderedDict([('var_smoothing', 0.020608473001654685)])}
{'fold': 5, 'accuracy': 0.47, 'precision': 0.02, 'sensitivity': 0.481, 'f

In [55]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.641, 'precision': 0.032, 'sensitivity': 0.5, 'f1_score': 0.06, 'fbeta_2': 0.127, 'roc_auc': np.float64(0.629), 'NNS': 31.286, 'best_params': OrderedDict([('C', 0.0010948964198825438), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.738, 'precision': 0.035, 'sensitivity': 0.407, 'f1_score': 0.065, 'fbeta_2': 0.13, 'roc_auc': np.float64(0.63), 'NNS': 28.545, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.739, 'precision': 0.047, 'sensitivity': 0.556, 'f1_score': 0.086, 'fbeta_2': 0.175, 'roc_auc': np.float64(0.62), 'NNS': 21.4, 'best_params': OrderedDict([('C', 585.813939175574), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.557, 'precision': 0.031, 'sensitivity': 0.63, 'f1_score': 0.059, 'fbeta_2': 0.13, 'roc_auc': np.float64(0.622), 'NNS': 32.176, 'best_params': OrderedDict([('C', 0.00010602594470834996), ('penalt

In [56]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.555, 'precision': 0.029, 'sensitivity': 0.571, 'f1_score': 0.056, 'fbeta_2': 0.121, 'roc_auc': np.float64(0.635), 'NNS': 34.188, 'best_params': OrderedDict([('C', 0.00021966117804062785), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.736, 'precision': 0.038, 'sensitivity': 0.444, 'f1_score': 0.069, 'fbeta_2': 0.141, 'roc_auc': np.float64(0.603), 'NNS': 26.583, 'best_params': OrderedDict([('C', 6.601210458703806), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.741, 'precision': 0.044, 'sensitivity': 0.519, 'f1_score': 0.082, 'fbeta_2': 0.165, 'roc_auc': np.float64(0.622), 'NNS': 22.571, 'best_params': OrderedDict([('C', 997.057119818347), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.718, 'precision': 0.032, 'sensitivity': 0.407, 'f1_score': 0.06, 'fbeta_2': 0.123, 'roc_auc': np.float64(0.588), 'NNS': 30.818, 'best_params': OrderedDict([('C', 5.640688776781903

In [57]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.765, 'precision': 0.022, 'sensitivity': 0.214, 'f1_score': 0.04, 'fbeta_2': 0.078, 'roc_auc': np.float64(0.493), 'NNS': 45.167, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 17), ('min_samples_leaf', 3), ('min_samples_split', 10)])}
{'fold': 2, 'accuracy': 0.747, 'precision': 0.024, 'sensitivity': 0.259, 'f1_score': 0.043, 'fbeta_2': 0.087, 'roc_auc': np.float64(0.514), 'NNS': 42.143, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 20), ('min_samples_leaf', 3), ('min_samples_split', 2)])}
{'fold': 3, 'accuracy': 0.846, 'precision': 0.018, 'sensitivity': 0.111, 'f1_score': 0.031, 'fbeta_2': 0.055, 'roc_auc': np.float64(0.536), 'NNS': 55.333, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 17), ('min_samples_leaf', 8), ('min_samples_split', 20)])}
{'fold': 4, 'accuracy': 0.862, 'precision': 0.033, 'sensitivity': 0.185, 'f1_score': 0.056, 'fbeta_2': 0.097, 'roc_auc': np.

In [58]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.82, 'precision': 0.015, 'sensitivity': 0.107, 'f1_score': 0.027, 'fbeta_2': 0.049, 'roc_auc': np.float64(0.452), 'NNS': 65.667, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 28), ('max_features', None), ('min_samples_leaf', 5), ('min_samples_split', 11), ('n_estimators', 102)])}
{'fold': 2, 'accuracy': 0.843, 'precision': 0.034, 'sensitivity': 0.222, 'f1_score': 0.059, 'fbeta_2': 0.106, 'roc_auc': np.float64(0.636), 'NNS': 29.333, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 50), ('max_features', None), ('min_samples_leaf', 9), ('min_samples_split', 20), ('n_estimators', 300)])}
{'fold': 3, 'accuracy': 0.769, 'precision': 0.019, 'sensitivity': 0.185, 'f1_score': 0.034, 'fbeta_2': 0.067, 'roc_auc': np.float64(0.466), 'NNS': 52.8, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 50), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 300)]

In [59]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.938, 'precision': 0.039, 'sensitivity': 0.071, 'f1_score': 0.051, 'fbeta_2': 0.061, 'roc_auc': np.float64(0.611), 'NNS': 25.5, 'best_params': OrderedDict([('colsample_bytree', 0.9872019464571068), ('gamma', 0), ('learning_rate', 0.05813830530773891), ('max_depth', 40), ('min_child_weight', 2), ('n_estimators', 364), ('subsample', 0.5056730906001793)])}
{'fold': 2, 'accuracy': 0.877, 'precision': 0.023, 'sensitivity': 0.111, 'f1_score': 0.038, 'fbeta_2': 0.063, 'roc_auc': np.float64(0.591), 'NNS': 43.0, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.1), ('max_depth', 1), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.5)])}
{'fold': 3, 'accuracy': 0.902, 'precision': 0.031, 'sensitivity': 0.111, 'f1_score': 0.048, 'fbeta_2': 0.073, 'roc_auc': np.float64(0.555), 'NNS': 32.667, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.1), ('max

In [61]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
    xgb_04=True
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.915, 'precision': 0.037, 'sensitivity': 0.107, 'f1_score': 0.055, 'fbeta_2': 0.077, 'roc_auc': np.float64(0.611), 'NNS': 27.333, 'best_params': OrderedDict([('colsample_bytree', 0.9872019464571068), ('gamma', 0), ('learning_rate', 0.05813830530773891), ('max_depth', 40), ('min_child_weight', 2), ('n_estimators', 364), ('subsample', 0.5056730906001793)])}
{'fold': 2, 'accuracy': 0.835, 'precision': 0.017, 'sensitivity': 0.111, 'f1_score': 0.029, 'fbeta_2': 0.052, 'roc_auc': np.float64(0.591), 'NNS': 60.0, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.1), ('max_depth', 1), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.5)])}
{'fold': 3, 'accuracy': 0.874, 'precision': 0.023, 'sensitivity': 0.111, 'f1_score': 0.038, 'fbeta_2': 0.062, 'roc_auc': np.float64(0.555), 'NNS': 44.0, 'best_params': OrderedDict([('colsample_bytree', 1.0), ('gamma', 0), ('learning_rate', 0.1), ('max

In [62]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.872, 'precision': 0.043, 'sensitivity': 0.214, 'f1_score': 0.071, 'fbeta_2': 0.119, 'roc_auc': np.float64(0.577), 'NNS': 23.333, 'best_params': {}}
{'fold': 2, 'accuracy': 0.845, 'precision': 0.04, 'sensitivity': 0.259, 'f1_score': 0.069, 'fbeta_2': 0.123, 'roc_auc': np.float64(0.583), 'NNS': 25.143, 'best_params': {}}
{'fold': 3, 'accuracy': 0.86, 'precision': 0.056, 'sensitivity': 0.333, 'f1_score': 0.095, 'fbeta_2': 0.167, 'roc_auc': np.float64(0.685), 'NNS': 18.0, 'best_params': {}}
{'fold': 4, 'accuracy': 0.874, 'precision': 0.03, 'sensitivity': 0.148, 'f1_score': 0.049, 'fbeta_2': 0.082, 'roc_auc': np.float64(0.583), 'NNS': 33.75, 'best_params': {}}
{'fold': 5, 'accuracy': 0.859, 'precision': 0.032, 'sensitivity': 0.185, 'f1_score': 0.055, 'fbeta_2': 0.095, 'roc_auc': np.float64(0.56), 'NNS': 31.0, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.862 ± 0.010
precision: 0.040 ± 0.009
sensitivity: 0.

## y_stk_or_aemb 1 year

In [6]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.257, 'precision': 0.011, 'sensitivity': 0.833, 'f1_score': 0.022, 'fbeta_2': 0.052, 'roc_auc': np.float64(0.563), 'NNS': 91.4, 'best_params': OrderedDict([('var_smoothing', 1.8820146565688435e-05)])}
{'fold': 2, 'accuracy': 0.444, 'precision': 0.01, 'sensitivity': 0.636, 'f1_score': 0.02, 'fbeta_2': 0.048, 'roc_auc': np.float64(0.474), 'NNS': 97.143, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 3, 'accuracy': 0.234, 'precision': 0.011, 'sensitivity': 0.909, 'f1_score': 0.021, 'fbeta_2': 0.051, 'roc_auc': np.float64(0.68), 'NNS': 94.2, 'best_params': OrderedDict([('var_smoothing', 0.00018232396833126318)])}
{'fold': 4, 'accuracy': 0.291, 'precision': 0.013, 'sensitivity': 1.0, 'f1_score': 0.025, 'fbeta_2': 0.06, 'roc_auc': np.float64(0.625), 'NNS': 79.545, 'best_params': OrderedDict([('var_smoothing', 0.010089206463940134)])}
{'fold': 5, 'accuracy': 0.479, 'precision': 0.013, 'sensitivity': 0.667, 'f1_score': 0.025

In [20]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.868, 'precision': 0.019, 'sensitivity': 0.25, 'f1_score': 0.036, 'fbeta_2': 0.074, 'roc_auc': np.float64(0.532), 'NNS': 51.667, 'best_params': OrderedDict([('C', 21.154493605815308), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.874, 'precision': 0.02, 'sensitivity': 0.273, 'f1_score': 0.037, 'fbeta_2': 0.078, 'roc_auc': np.float64(0.642), 'NNS': 49.667, 'best_params': OrderedDict([('C', 0.00014412623375973608), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.853, 'precision': 0.028, 'sensitivity': 0.455, 'f1_score': 0.053, 'fbeta_2': 0.113, 'roc_auc': np.float64(0.783), 'NNS': 35.6, 'best_params': OrderedDict([('C', 72.73082081096716), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.792, 'precision': 0.004, 'sensitivity': 0.091, 'f1_score': 0.008, 'fbeta_2': 0.017, 'roc_auc': np.float64(0.593), 'NNS': 244.0, 'best_params': OrderedDict([('C', 1000.0), ('pe

In [21]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.874, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.026, 'roc_auc': np.float64(0.459), 'NNS': 143.0, 'best_params': OrderedDict([('C', 997.7505599555375), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.861, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.487), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.812, 'precision': 0.03, 'sensitivity': 0.636, 'f1_score': 0.058, 'fbeta_2': 0.127, 'roc_auc': np.float64(0.818), 'NNS': 33.143, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.859, 'precision': 0.012, 'sensitivity': 0.182, 'f1_score': 0.023, 'fbeta_2': 0.048, 'roc_auc': np.float64(0.561), 'NNS': 82.5, 'best_params': OrderedDict([('C', 183.61505741204678), ('penalty', 'l2'), (

In [22]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.879, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.027, 'roc_auc': np.float64(0.46), 'NNS': 137.0, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.869, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.498), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.815, 'precision': 0.031, 'sensitivity': 0.636, 'f1_score': 0.059, 'fbeta_2': 0.129, 'roc_auc': np.float64(0.823), 'NNS': 32.571, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.866, 'precision': 0.006, 'sensitivity': 0.091, 'f1_score': 0.012, 'fbeta_2': 0.025, 'roc_auc': np.float64(0.558), 'NNS': 154.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'libli

In [8]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.912, 'precision': 0.01, 'sensitivity': 0.083, 'f1_score': 0.018, 'fbeta_2': 0.034, 'roc_auc': np.float64(0.488), 'NNS': 97.0, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 20), ('min_samples_leaf', 3), ('min_samples_split', 2)])}
{'fold': 2, 'accuracy': 0.888, 'precision': 0.016, 'sensitivity': 0.182, 'f1_score': 0.029, 'fbeta_2': 0.058, 'roc_auc': np.float64(0.521), 'NNS': 64.5, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 20), ('min_samples_leaf', 3), ('min_samples_split', 2)])}
{'fold': 3, 'accuracy': 0.911, 'precision': 0.01, 'sensitivity': 0.091, 'f1_score': 0.018, 'fbeta_2': 0.035, 'roc_auc': np.float64(0.643), 'NNS': 99.0, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 12), ('min_samples_leaf', 1), ('min_samples_split', 2)])}
{'fold': 4, 'accuracy': 0.922, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.5), 'NNS':

In [9]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.893, 'precision': 0.008, 'sensitivity': 0.083, 'f1_score': 0.015, 'fbeta_2': 0.03, 'roc_auc': np.float64(0.5), 'NNS': 120.0, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 39), ('max_features', None), ('min_samples_leaf', 3), ('min_samples_split', 2), ('n_estimators', 277)])}
{'fold': 2, 'accuracy': 0.88, 'precision': 0.014, 'sensitivity': 0.182, 'f1_score': 0.027, 'fbeta_2': 0.055, 'roc_auc': np.float64(0.507), 'NNS': 69.5, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 50), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 267)])}
{'fold': 3, 'accuracy': 0.958, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.482), 'NNS': inf, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 33), ('max_features', None), ('min_samples_leaf', 11), ('min_samples_split', 8), ('n_estimators', 96)])}
{'fold': 4, 'a

In [10]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.985, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.538), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.7362724504252515), ('gamma', 0), ('learning_rate', 0.0998081395813055), ('max_depth', 39), ('min_child_weight', 4), ('n_estimators', 70), ('subsample', 0.8037844840874203)])}
{'fold': 2, 'accuracy': 0.984, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.493), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.5), ('gamma', 0), ('learning_rate', 0.1), ('max_depth', 50), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.5)])}
{'fold': 3, 'accuracy': 0.991, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.576), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.705051979426657), ('gamma', 4), ('learning_rate', 0.09335393188593556), ('max

In [7]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.99, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.479), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.5), ('gamma', 5), ('learning_rate', 0.001), ('max_depth', 50), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 1.0)])}
{'fold': 2, 'accuracy': 0.991, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.483), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.5000737302024599), ('gamma', 1), ('learning_rate', 0.04068730430995486), ('max_depth', 41), ('min_child_weight', 15), ('n_estimators', 282), ('subsample', 0.7088660593035934)])}
{'fold': 3, 'accuracy': 0.991, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.791), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.7348483379297668), ('gamma', 0), ('learning_rate', 0.02605140247230145), (

In [11]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.954, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.503), 'NNS': inf, 'best_params': {}}
{'fold': 2, 'accuracy': 0.961, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.617), 'NNS': inf, 'best_params': {}}
{'fold': 3, 'accuracy': 0.967, 'precision': 0.032, 'sensitivity': 0.091, 'f1_score': 0.048, 'fbeta_2': 0.067, 'roc_auc': np.float64(0.724), 'NNS': 31.0, 'best_params': {}}
{'fold': 4, 'accuracy': 0.961, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.597), 'NNS': inf, 'best_params': {}}
{'fold': 5, 'accuracy': 0.962, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.631), 'NNS': inf, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.961 ± 0.004
precision: 0.006 ± 0.013
sensitivity: 0.018 ± 0.036
f1_score: 0.010 ± 0.019
f

## y_stk_or_aemb_24_months

In [12]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="nb",
    sampling="all",
    n_splits=5,
    random_state=42,
)


=== Technique: BASELINE ===
{'fold': 1, 'accuracy': 0.272, 'precision': 0.016, 'sensitivity': 0.875, 'f1_score': 0.031, 'fbeta_2': 0.073, 'roc_auc': np.float64(0.655), 'NNS': 64.214, 'best_params': OrderedDict([('var_smoothing', 0.006871456952040522)])}
{'fold': 2, 'accuracy': 0.331, 'precision': 0.018, 'sensitivity': 0.938, 'f1_score': 0.036, 'fbeta_2': 0.084, 'roc_auc': np.float64(0.621), 'NNS': 55.267, 'best_params': OrderedDict([('var_smoothing', 0.013867891530433435)])}
{'fold': 3, 'accuracy': 0.551, 'precision': 0.015, 'sensitivity': 0.5, 'f1_score': 0.028, 'fbeta_2': 0.065, 'roc_auc': np.float64(0.561), 'NNS': 68.375, 'best_params': OrderedDict([('var_smoothing', 0.1)])}
{'fold': 4, 'accuracy': 0.365, 'precision': 0.015, 'sensitivity': 0.75, 'f1_score': 0.03, 'fbeta_2': 0.071, 'roc_auc': np.float64(0.62), 'NNS': 65.083, 'best_params': OrderedDict([('var_smoothing', 0.03597056775845164)])}
{'fold': 5, 'accuracy': 0.362, 'precision': 0.014, 'sensitivity': 0.688, 'f1_score': 0.028

In [17]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===




{'fold': 1, 'accuracy': 0.898, 'precision': 0.018, 'sensitivity': 0.125, 'f1_score': 0.031, 'fbeta_2': 0.057, 'roc_auc': np.float64(0.64), 'NNS': 56.0, 'best_params': OrderedDict([('C', 0.0001509933328040306), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 2, 'accuracy': 0.846, 'precision': 0.022, 'sensitivity': 0.25, 'f1_score': 0.041, 'fbeta_2': 0.082, 'roc_auc': np.float64(0.581), 'NNS': 44.75, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 3, 'accuracy': 0.75, 'precision': 0.01, 'sensitivity': 0.188, 'f1_score': 0.019, 'fbeta_2': 0.042, 'roc_auc': np.float64(0.414), 'NNS': 98.333, 'best_params': OrderedDict([('C', 0.00016572302705136722), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 4, 'accuracy': 0.848, 'precision': 0.023, 'sensitivity': 0.25, 'f1_score': 0.041, 'fbeta_2': 0.083, 'roc_auc': np.float64(0.541), 'NNS': 44.25, 'best_params': OrderedDict([('C', 20.976281803377294), ('penalty', 'l1'), ('solver', 'liblinear')])}




{'fold': 5, 'accuracy': 0.856, 'precision': 0.024, 'sensitivity': 0.25, 'f1_score': 0.043, 'fbeta_2': 0.086, 'roc_auc': np.float64(0.593), 'NNS': 42.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l1'), ('solver', 'liblinear')])}

Mean scores across folds ( undersample ):
accuracy: 0.840 ± 0.049
precision: 0.019 ± 0.005
sensitivity: 0.212 ± 0.050
f1_score: 0.035 ± 0.009
fbeta_2: 0.070 ± 0.018
roc_auc: 0.554 ± 0.077
NNS: 57.067 ± 21.199


In [18]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.784, 'precision': 0.027, 'sensitivity': 0.438, 'f1_score': 0.051, 'fbeta_2': 0.108, 'roc_auc': np.float64(0.655), 'NNS': 37.286, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.792, 'precision': 0.024, 'sensitivity': 0.375, 'f1_score': 0.045, 'fbeta_2': 0.096, 'roc_auc': np.float64(0.645), 'NNS': 41.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.787, 'precision': 0.02, 'sensitivity': 0.312, 'f1_score': 0.037, 'fbeta_2': 0.079, 'roc_auc': np.float64(0.646), 'NNS': 50.6, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.782, 'precision': 0.023, 'sensitivity': 0.375, 'f1_score': 0.043, 'fbeta_2': 0.092, 'roc_auc': np.float64(0.587), 'NNS': 43.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'libl

In [19]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.8, 'precision': 0.029, 'sensitivity': 0.438, 'f1_score': 0.054, 'fbeta_2': 0.114, 'roc_auc': np.float64(0.656), 'NNS': 34.571, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.815, 'precision': 0.027, 'sensitivity': 0.375, 'f1_score': 0.051, 'fbeta_2': 0.105, 'roc_auc': np.float64(0.616), 'NNS': 36.833, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.801, 'precision': 0.017, 'sensitivity': 0.25, 'f1_score': 0.032, 'fbeta_2': 0.067, 'roc_auc': np.float64(0.639), 'NNS': 58.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.787, 'precision': 0.024, 'sensitivity': 0.375, 'f1_score': 0.044, 'fbeta_2': 0.094, 'roc_auc': np.float64(0.607), 'NNS': 42.5, 'best_params': OrderedDict([('C', 346.6450210477977), ('penalty', 'l1'), ('solver', 'liblinear

In [13]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="dt",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.878, 'precision': 0.015, 'sensitivity': 0.125, 'f1_score': 0.026, 'fbeta_2': 0.05, 'roc_auc': np.float64(0.552), 'NNS': 68.5, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 15), ('min_samples_leaf', 1), ('min_samples_split', 9)])}
{'fold': 2, 'accuracy': 0.845, 'precision': 0.011, 'sensitivity': 0.125, 'f1_score': 0.021, 'fbeta_2': 0.041, 'roc_auc': np.float64(0.49), 'NNS': 88.5, 'best_params': OrderedDict([('criterion', 'entropy'), ('max_depth', 18), ('min_samples_leaf', 1), ('min_samples_split', 4)])}
{'fold': 3, 'accuracy': 0.872, 'precision': 0.014, 'sensitivity': 0.125, 'f1_score': 0.025, 'fbeta_2': 0.048, 'roc_auc': np.float64(0.485), 'NNS': 72.0, 'best_params': OrderedDict([('criterion', 'gini'), ('max_depth', 20), ('min_samples_leaf', 7), ('min_samples_split', 12)])}
{'fold': 4, 'accuracy': 0.858, 'precision': 0.012, 'sensitivity': 0.125, 'f1_score': 0.023, 'fbeta_2': 0.044, 'roc_auc': np.float64(0.539)

In [15]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="rf",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.886, 'precision': 0.008, 'sensitivity': 0.062, 'f1_score': 0.014, 'fbeta_2': 0.026, 'roc_auc': np.float64(0.478), 'NNS': 125.0, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 46), ('max_features', None), ('min_samples_leaf', 2), ('min_samples_split', 3), ('n_estimators', 120)])}
{'fold': 2, 'accuracy': 0.85, 'precision': 0.017, 'sensitivity': 0.188, 'f1_score': 0.032, 'fbeta_2': 0.063, 'roc_auc': np.float64(0.525), 'NNS': 57.667, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 28), ('max_features', None), ('min_samples_leaf', 1), ('min_samples_split', 2), ('n_estimators', 124)])}
{'fold': 3, 'accuracy': 0.872, 'precision': 0.014, 'sensitivity': 0.125, 'f1_score': 0.025, 'fbeta_2': 0.048, 'roc_auc': np.float64(0.499), 'NNS': 72.0, 'best_params': OrderedDict([('bootstrap', False), ('max_depth', 43), ('max_features', None), ('min_samples_leaf', 7), ('min_samples_split', 2), ('n_estimators', 50)])}
{

In [14]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="xgb",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.961, 'precision': 0.03, 'sensitivity': 0.062, 'f1_score': 0.041, 'fbeta_2': 0.052, 'roc_auc': np.float64(0.547), 'NNS': 33.0, 'best_params': OrderedDict([('colsample_bytree', 0.7010592292962871), ('gamma', 0), ('learning_rate', 0.08685023472134722), ('max_depth', 1), ('min_child_weight', 1), ('n_estimators', 500), ('subsample', 0.629825306996951)])}
{'fold': 2, 'accuracy': 0.968, 'precision': 0.04, 'sensitivity': 0.062, 'f1_score': 0.049, 'fbeta_2': 0.056, 'roc_auc': np.float64(0.567), 'NNS': 25.0, 'best_params': OrderedDict([('colsample_bytree', 0.6846992849299252), ('gamma', 1), ('learning_rate', 0.09184488163034371), ('max_depth', 16), ('min_child_weight', 8), ('n_estimators', 463), ('subsample', 0.525469823067751)])}
{'fold': 3, 'accuracy': 0.981, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.49), 'NNS': inf, 'best_params': OrderedDict([('colsample_bytree', 0.8067664661530178)

In [16]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="mlp",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.937, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.555), 'NNS': inf, 'best_params': {}}
{'fold': 2, 'accuracy': 0.931, 'precision': 0.028, 'sensitivity': 0.125, 'f1_score': 0.045, 'fbeta_2': 0.074, 'roc_auc': np.float64(0.58), 'NNS': 36.0, 'best_params': {}}
{'fold': 3, 'accuracy': 0.943, 'precision': 0.018, 'sensitivity': 0.062, 'f1_score': 0.028, 'fbeta_2': 0.042, 'roc_auc': np.float64(0.569), 'NNS': 55.0, 'best_params': {}}
{'fold': 4, 'accuracy': 0.942, 'precision': 0.018, 'sensitivity': 0.062, 'f1_score': 0.027, 'fbeta_2': 0.041, 'roc_auc': np.float64(0.511), 'NNS': 57.0, 'best_params': {}}
{'fold': 5, 'accuracy': 0.94, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.521), 'NNS': inf, 'best_params': {}}

Mean scores across folds ( undersample ):
accuracy: 0.939 ± 0.004
precision: 0.013 ± 0.011
sensitivity: 0.050 ± 0.047
f1_score:

## Others

In [23]:

search_space_lr = {
    'C': (1e-4, 1e+3, 'log-uniform'),
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

lr_opt = BayesSearchCV(
    LogisticRegression(max_iter=1000, random_state=42),
    search_spaces=search_space_lr,
    scoring="roc_auc",
    n_iter=30,
    cv=cv,
    n_jobs=-1,
    random_state=42
)



# ===================== Optimizer Map =====================
optimizer_map = {
    "nb": nb_opt,
    "lr": lr_opt,
    "dt": dt_opt,
    "rf": rf_opt,
    "xgb": xgb_opt,
    "mlp": mlp_opt
}


In [24]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.785, 'precision': 0.027, 'sensitivity': 0.438, 'f1_score': 0.051, 'fbeta_2': 0.108, 'roc_auc': np.float64(0.66), 'NNS': 37.143, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.792, 'precision': 0.024, 'sensitivity': 0.375, 'f1_score': 0.045, 'fbeta_2': 0.096, 'roc_auc': np.float64(0.645), 'NNS': 41.5, 'best_params': OrderedDict([('C', 733.7074991151121), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.787, 'precision': 0.02, 'sensitivity': 0.312, 'f1_score': 0.037, 'fbeta_2': 0.079, 'roc_auc': np.float64(0.646), 'NNS': 50.6, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.782, 'precision': 0.023, 'sensitivity': 0.375, 'f1_score': 0.043, 'fbeta_2': 0.092, 'roc_auc': np.float64(0.589), 'NNS': 43.5, 'best_params': OrderedDict([('C', 993.7924934184501), ('penalty', 'l2'), ('solv

In [25]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.8, 'precision': 0.029, 'sensitivity': 0.438, 'f1_score': 0.054, 'fbeta_2': 0.114, 'roc_auc': np.float64(0.656), 'NNS': 34.571, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.815, 'precision': 0.027, 'sensitivity': 0.375, 'f1_score': 0.051, 'fbeta_2': 0.105, 'roc_auc': np.float64(0.616), 'NNS': 36.833, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.801, 'precision': 0.017, 'sensitivity': 0.25, 'f1_score': 0.032, 'fbeta_2': 0.067, 'roc_auc': np.float64(0.639), 'NNS': 58.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.787, 'precision': 0.023, 'sensitivity': 0.375, 'f1_score': 0.044, 'fbeta_2': 0.094, 'roc_auc': np.float64(0.607), 'NNS': 42.667, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'f

In [26]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.87, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.026, 'roc_auc': np.float64(0.458), 'NNS': 148.0, 'best_params': OrderedDict([('C', 12.904637420565189), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.861, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.487), 'NNS': inf, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.812, 'precision': 0.03, 'sensitivity': 0.636, 'f1_score': 0.058, 'fbeta_2': 0.127, 'roc_auc': np.float64(0.818), 'NNS': 33.143, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.852, 'precision': 0.012, 'sensitivity': 0.182, 'f1_score': 0.022, 'fbeta_2': 0.046, 'roc_auc': np.float64(0.575), 'NNS': 86.5, 'best_params': OrderedDict([('C', 6.328480155938008), ('penalty', 'l2'), ('

In [27]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.879, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.027, 'roc_auc': np.float64(0.46), 'NNS': 138.0, 'best_params': OrderedDict([('C', 18.012458430275323), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.865, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.511), 'NNS': inf, 'best_params': OrderedDict([('C', 2.0938739961404518), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.815, 'precision': 0.031, 'sensitivity': 0.636, 'f1_score': 0.059, 'fbeta_2': 0.129, 'roc_auc': np.float64(0.824), 'NNS': 32.571, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.863, 'precision': 0.006, 'sensitivity': 0.091, 'f1_score': 0.012, 'fbeta_2': 0.025, 'roc_auc': np.float64(0.559), 'NNS': 158.0, 'best_params': OrderedDict([('C', 13.746626277890918), ('penalty', 'l2'), ('s

## LR static all time ranges

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===




{'fold': 1, 'accuracy': 0.898, 'precision': 0.018, 'sensitivity': 0.125, 'f1_score': 0.031, 'fbeta_2': 0.057, 'roc_auc': np.float64(0.64), 'NNS': 56.0, 'best_params': OrderedDict([('C', 0.0001509933328040306), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 2, 'accuracy': 0.846, 'precision': 0.022, 'sensitivity': 0.25, 'f1_score': 0.041, 'fbeta_2': 0.082, 'roc_auc': np.float64(0.581), 'NNS': 44.75, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 3, 'accuracy': 0.75, 'precision': 0.01, 'sensitivity': 0.188, 'f1_score': 0.019, 'fbeta_2': 0.042, 'roc_auc': np.float64(0.414), 'NNS': 98.333, 'best_params': OrderedDict([('C', 0.00016572302705136722), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 4, 'accuracy': 0.848, 'precision': 0.023, 'sensitivity': 0.25, 'f1_score': 0.041, 'fbeta_2': 0.083, 'roc_auc': np.float64(0.541), 'NNS': 44.25, 'best_params': OrderedDict([('C', 20.976281803377294), ('penalty', 'l1'), ('solver', 'liblinear')])}




{'fold': 5, 'accuracy': 0.856, 'precision': 0.024, 'sensitivity': 0.25, 'f1_score': 0.043, 'fbeta_2': 0.086, 'roc_auc': np.float64(0.593), 'NNS': 42.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l1'), ('solver', 'liblinear')])}

Mean scores across folds ( undersample ):
accuracy: 0.840 ± 0.049
precision: 0.019 ± 0.005
sensitivity: 0.212 ± 0.050
f1_score: 0.035 ± 0.009
fbeta_2: 0.070 ± 0.018
roc_auc: 0.554 ± 0.077
NNS: 57.067 ± 21.199


In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.784, 'precision': 0.027, 'sensitivity': 0.438, 'f1_score': 0.051, 'fbeta_2': 0.108, 'roc_auc': np.float64(0.655), 'NNS': 37.286, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.792, 'precision': 0.024, 'sensitivity': 0.375, 'f1_score': 0.045, 'fbeta_2': 0.096, 'roc_auc': np.float64(0.645), 'NNS': 41.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.787, 'precision': 0.02, 'sensitivity': 0.312, 'f1_score': 0.037, 'fbeta_2': 0.079, 'roc_auc': np.float64(0.646), 'NNS': 50.6, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.782, 'precision': 0.023, 'sensitivity': 0.375, 'f1_score': 0.043, 'fbeta_2': 0.092, 'roc_auc': np.float64(0.587), 'NNS': 43.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'libl

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_24_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.8, 'precision': 0.029, 'sensitivity': 0.438, 'f1_score': 0.054, 'fbeta_2': 0.114, 'roc_auc': np.float64(0.656), 'NNS': 34.571, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.815, 'precision': 0.027, 'sensitivity': 0.375, 'f1_score': 0.051, 'fbeta_2': 0.105, 'roc_auc': np.float64(0.616), 'NNS': 36.833, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.801, 'precision': 0.017, 'sensitivity': 0.25, 'f1_score': 0.032, 'fbeta_2': 0.067, 'roc_auc': np.float64(0.639), 'NNS': 58.5, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.787, 'precision': 0.024, 'sensitivity': 0.375, 'f1_score': 0.044, 'fbeta_2': 0.094, 'roc_auc': np.float64(0.607), 'NNS': 42.5, 'best_params': OrderedDict([('C', 346.6450210477977), ('penalty', 'l1'), ('solver', 'liblinear

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.868, 'precision': 0.019, 'sensitivity': 0.25, 'f1_score': 0.036, 'fbeta_2': 0.074, 'roc_auc': np.float64(0.532), 'NNS': 51.667, 'best_params': OrderedDict([('C', 21.154493605815308), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.874, 'precision': 0.02, 'sensitivity': 0.273, 'f1_score': 0.037, 'fbeta_2': 0.078, 'roc_auc': np.float64(0.642), 'NNS': 49.667, 'best_params': OrderedDict([('C', 0.00014412623375973608), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.853, 'precision': 0.028, 'sensitivity': 0.455, 'f1_score': 0.053, 'fbeta_2': 0.113, 'roc_auc': np.float64(0.783), 'NNS': 35.6, 'best_params': OrderedDict([('C', 72.73082081096716), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.792, 'precision': 0.004, 'sensitivity': 0.091, 'f1_score': 0.008, 'fbeta_2': 0.017, 'roc_auc': np.float64(0.593), 'NNS': 244.0, 'best_params': OrderedDict([('C', 1000.0), ('pe

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.874, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.026, 'roc_auc': np.float64(0.459), 'NNS': 143.0, 'best_params': OrderedDict([('C', 997.7505599555375), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.861, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.487), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.812, 'precision': 0.03, 'sensitivity': 0.636, 'f1_score': 0.058, 'fbeta_2': 0.127, 'roc_auc': np.float64(0.818), 'NNS': 33.143, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.859, 'precision': 0.012, 'sensitivity': 0.182, 'f1_score': 0.023, 'fbeta_2': 0.048, 'roc_auc': np.float64(0.561), 'NNS': 82.5, 'best_params': OrderedDict([('C', 183.61505741204678), ('penalty', 'l2'), (

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_12_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.879, 'precision': 0.007, 'sensitivity': 0.083, 'f1_score': 0.013, 'fbeta_2': 0.027, 'roc_auc': np.float64(0.46), 'NNS': 137.0, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.869, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.498), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.815, 'precision': 0.031, 'sensitivity': 0.636, 'f1_score': 0.059, 'fbeta_2': 0.129, 'roc_auc': np.float64(0.823), 'NNS': 32.571, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.866, 'precision': 0.006, 'sensitivity': 0.091, 'f1_score': 0.012, 'fbeta_2': 0.025, 'roc_auc': np.float64(0.558), 'NNS': 154.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'libli

In [16]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.863, 'precision': 0.012, 'sensitivity': 0.222, 'f1_score': 0.023, 'fbeta_2': 0.051, 'roc_auc': np.float64(0.49), 'NNS': 81.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}




{'fold': 2, 'accuracy': 0.951, 'precision': 0.019, 'sensitivity': 0.125, 'f1_score': 0.032, 'fbeta_2': 0.058, 'roc_auc': np.float64(0.659), 'NNS': 54.0, 'best_params': OrderedDict([('C', 2.0871588778809445), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.848, 'precision': 0.016, 'sensitivity': 0.375, 'f1_score': 0.031, 'fbeta_2': 0.07, 'roc_auc': np.float64(0.598), 'NNS': 61.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.876, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.435), 'NNS': inf, 'best_params': OrderedDict([('C', 26.796471464354358), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 5, 'accuracy': 0.905, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.531), 'NNS': inf, 'best_params': OrderedDict([('C', 17.941272093006813), ('penalty', 'l1'), ('solver', 'liblinear')])}

Mean scores ac

In [17]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.932, 'precision': 0.013, 'sensitivity': 0.111, 'f1_score': 0.024, 'fbeta_2': 0.045, 'roc_auc': np.float64(0.413), 'NNS': 76.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.875, 'precision': 0.014, 'sensitivity': 0.25, 'f1_score': 0.026, 'fbeta_2': 0.056, 'roc_auc': np.float64(0.66), 'NNS': 74.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.871, 'precision': 0.025, 'sensitivity': 0.5, 'f1_score': 0.048, 'fbeta_2': 0.106, 'roc_auc': np.float64(0.638), 'NNS': 39.25, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.905, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.422), 'NNS': inf, 'best_params': OrderedDict([('C', 728.3661839189799), ('penalty', 'l2'), ('solver', 'liblin

In [18]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_6_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.934, 'precision': 0.014, 'sensitivity': 0.111, 'f1_score': 0.024, 'fbeta_2': 0.045, 'roc_auc': np.float64(0.414), 'NNS': 74.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.885, 'precision': 0.022, 'sensitivity': 0.375, 'f1_score': 0.041, 'fbeta_2': 0.088, 'roc_auc': np.float64(0.668), 'NNS': 46.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.878, 'precision': 0.027, 'sensitivity': 0.5, 'f1_score': 0.051, 'fbeta_2': 0.111, 'roc_auc': np.float64(0.636), 'NNS': 37.0, 'best_params': OrderedDict([('C', 997.057119818347), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.911, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.422), 'NNS': inf, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')

In [19]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_3_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.919, 'precision': 0.01, 'sensitivity': 0.2, 'f1_score': 0.02, 'fbeta_2': 0.043, 'roc_auc': np.float64(0.674), 'NNS': 96.0, 'best_params': OrderedDict([('C', 7.4644557471169), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.894, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.579), 'NNS': inf, 'best_params': OrderedDict([('C', 72.73082081096716), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.931, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.331), 'NNS': inf, 'best_params': OrderedDict([('C', 0.00019988938406799298), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.918, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.389), 'NNS': inf, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 

In [20]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_3_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===
{'fold': 1, 'accuracy': 0.96, 'precision': 0.022, 'sensitivity': 0.2, 'f1_score': 0.039, 'fbeta_2': 0.076, 'roc_auc': np.float64(0.753), 'NNS': 46.0, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.961, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.386), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.967, 'precision': 0.027, 'sensitivity': 0.2, 'f1_score': 0.048, 'fbeta_2': 0.088, 'roc_auc': np.float64(0.649), 'NNS': 37.0, 'best_params': OrderedDict([('C', 605.3287159080721), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.969, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.701), 'NNS': inf, 'best_params': OrderedDict([('C', 115.7202874441248), ('penalty', 'l1'), ('solver', 'liblin

In [21]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_3_months",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)


=== Technique: SMOTE ===
{'fold': 1, 'accuracy': 0.961, 'precision': 0.022, 'sensitivity': 0.2, 'f1_score': 0.04, 'fbeta_2': 0.077, 'roc_auc': np.float64(0.772), 'NNS': 45.0, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.962, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.375), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.967, 'precision': 0.027, 'sensitivity': 0.2, 'f1_score': 0.048, 'fbeta_2': 0.088, 'roc_auc': np.float64(0.652), 'NNS': 37.0, 'best_params': OrderedDict([('C', 204.00182631718778), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.969, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.703), 'NNS': inf, 'best_params': OrderedDict([('C', 487.9169484427551), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold

In [26]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_1_month",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="undersample",
    n_splits=5,
    random_state=42,
)


=== Technique: UNDERSAMPLE ===
{'fold': 1, 'accuracy': 0.973, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.481), 'NNS': inf, 'best_params': OrderedDict([('C', 1000.0), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 2, 'accuracy': 0.984, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.514), 'NNS': inf, 'best_params': OrderedDict([('C', 0.003555261421323081), ('penalty', 'l2'), ('solver', 'liblinear')])}
{'fold': 3, 'accuracy': 0.959, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.629), 'NNS': inf, 'best_params': OrderedDict([('C', 48.61518228524981), ('penalty', 'l1'), ('solver', 'liblinear')])}
{'fold': 4, 'accuracy': 0.998, 'precision': 0.0, 'sensitivity': 0.0, 'f1_score': 0.0, 'fbeta_2': 0.0, 'roc_auc': np.float64(0.693), 'NNS': inf, 'best_params': OrderedDict([('C', 0.07425534359037013), ('penalty', 'l2'), ('solver', 'liblinear')])

In [27]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_1_month",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="oversample",
    n_splits=5,
    random_state=42,
)


=== Technique: OVERSAMPLE ===


KeyboardInterrupt: 

In [None]:
results = run_cv_with_sampling(
    X_full=data,
    target_cols=targets,
    target_name="y_stk_or_aemb_1_month",
    optimizer_map=optimizer_map,
    model_name="lr",
    sampling="smote",
    n_splits=5,
    random_state=42,
)