In [None]:
!uv pip install lightgbm optuna scikit-learn pandas matplotlib seaborn IProgress jupyter ipywidgets -U -q

In [None]:
!uv pip install ../../target/wheels/perpetual-1.0.0-cp313-cp313-win_amd64.whl

In [None]:
import math
from functools import partial
from time import process_time, time

import numpy as np
import optuna
import pandas as pd
from lightgbm import LGBMClassifier
from perpetual import PerpetualBooster
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold, cross_validate, train_test_split

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
X = pd.read_csv("lead_scoring.csv")

In [None]:
y = X.pop("Converted")

In [None]:
object_cols = X.select_dtypes(include=["object"]).columns
X[object_cols] = X[object_cols].astype("category")

In [None]:
X.shape

In [None]:
y.shape

In [None]:
np.mean(y.values)

In [None]:
y.value_counts()

In [None]:
X.head()

In [None]:
X.rename(
    columns={
        "What is your current occupation": "Occupation",
        "Through Recommendations": "Recommendation",
        "A free copy of Mastering The Interview": "Free Copy",
    },
    inplace=True,
)

In [None]:
cols_to_drop = [
    "Prospect ID",
    "Lead Number",
    "How did you hear about X Education",
    "Lead Profile",
    "Lead Quality",
    "Asymmetrique Profile Score",
    "Asymmetrique Activity Score",
    "Asymmetrique Activity Index",
    "Asymmetrique Profile Index",
    "Tags",
    "Last Notable Activity",
]
cols_to_drop += [
    "I agree to pay the amount through cheque",
    "Get updates on DM Content",
    "Update me on Supply Chain Content",
    "Receive More Updates About Our Courses",
    "Magazine",
]
cols_to_drop += ["What matters most to you in choosing a course", "Country", "City"]
cols_to_drop += [
    "Do Not Call",
    "Search",
    "Newspaper",
    "Newspaper Article",
    "Digital Advertisement",
    "X Education Forums",
    "Free Copy",
    "Recommendation",
]

X.drop(columns=cols_to_drop, inplace=True)

In [None]:
X.shape

In [None]:
def prepare_data(seed):
    scoring = "neg_log_loss"
    metric_function = log_loss
    metric_name = "log_loss"
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.6, random_state=seed
    )

    return (
        X_train,
        X_test,
        y_train,
        y_test,
        scoring,
        metric_function,
        metric_name,
    )


def objective_function(trial, seed, n_estimators, X_train, y_train, scoring, cv):
    params = {
        "seed": seed,
        "verbosity": -1,
        "n_estimators": n_estimators,
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.5, log=True),
        "min_split_gain": trial.suggest_float("min_split_gain", 1e-6, 1.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-6, 1.0, log=True),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-6, 1.0, log=True),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
        "max_depth": trial.suggest_int("max_depth", 3, 33),
        "num_leaves": trial.suggest_int("num_leaves", 2, 1024),
        "min_child_samples": trial.suggest_int("min_child_samples", 1, 100),
    }

    model = LGBMClassifier(**params)

    cv_results = cross_validate(
        model,
        X_train,
        y_train,
        cv=cv,
        scoring=scoring,
        return_train_score=True,
        return_estimator=True,
    )

    trial.set_user_attr("models", cv_results["estimator"])

    return -1 * np.mean(cv_results["test_score"])

In [None]:
seed = 0
n_trials = 100
n_estimators = 100

cv = KFold(n_splits=5, shuffle=True, random_state=seed)

(
    X_train,
    X_test,
    y_train,
    y_test,
    scoring,
    metric_function,
    metric_name,
) = prepare_data(seed)

sampler = optuna.samplers.TPESampler(seed=seed)
study = optuna.create_study(direction="minimize", sampler=sampler)

obj = partial(
    objective_function,
    seed=seed,
    n_estimators=n_estimators,
    X_train=X_train,
    y_train=y_train,
    scoring=scoring,
    cv=cv,
)

start = process_time()
tick = time()
study.optimize(obj, n_trials=n_trials)
stop = process_time()


print(f"seed: {seed}, cpu time: {stop - start}")

In [None]:
study.best_trial.params

In [None]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from sklearn.calibration import CalibratedClassifierCV, CalibrationDisplay

params = study.best_trial.params
params["n_estimators"] = n_estimators
params["seed"] = seed
params["verbosity"] = -1
lgbm = LGBMClassifier(**params)
lgbm_isotonic = CalibratedClassifierCV(
    LGBMClassifier(**params), cv=cv, method="isotonic"
)
lgbm_sigmoid = CalibratedClassifierCV(LGBMClassifier(**params), cv=cv, method="sigmoid")

lgbm_models = [
    (lgbm, "LightGBM"),
    (lgbm_isotonic, "LightGBM + Isotonic"),
    (lgbm_sigmoid, "LightGBM + Sigmoid"),
]

In [None]:
n_bins = 10

In [None]:
from typing import Sequence, Tuple, Union

import numpy as np
from sklearn.calibration import calibration_curve


def expected_calibration_error(
    y_true: Union[np.ndarray, Sequence[int]],
    y_pred: Union[np.ndarray, Sequence[float]],
    n_bins: int = 10,
) -> Tuple[float, np.ndarray, np.ndarray]:
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    N = len(y_true)

    prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=n_bins)
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    counts, _ = np.histogram(y_pred, bins=bins, range=(0.0, 1.0))
    non_empty_counts = counts[counts > 0]
    weights = non_empty_counts / N
    ece = np.sum(weights * np.abs(prob_true - prob_pred))

    return ece, prob_true, prob_pred, weights

In [None]:
fig = plt.figure(figsize=(10, 10))
gs = GridSpec(4, 2)
colors = plt.get_cmap("Dark2")

ax_calibration_curve = fig.add_subplot(gs[:2, :2])
calibration_displays = {}
for i, (clf, name) in enumerate(lgbm_models):
    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_test)[:, 1]
    display = CalibrationDisplay.from_predictions(
        y_test,
        y_pred,
        n_bins=n_bins,
        name=name,
        ax=ax_calibration_curve,
        color=colors(i),
    )
    calibration_displays[name] = display

ax_calibration_curve.grid()
ax_calibration_curve.set_title("Calibration plots (Naive Bayes)")

# Add histogram
grid_positions = [(2, 0), (2, 1), (3, 0), (3, 1)]
for i, (_, name) in enumerate(lgbm_models):
    row, col = grid_positions[i]
    ax = fig.add_subplot(gs[row, col])

    ax.hist(
        calibration_displays[name].y_prob,
        range=(0, 1),
        bins=n_bins,
        label=name,
        color=colors(i),
    )
    ax.set(title=name, xlabel="Mean predicted probability", ylabel="Count")

plt.tight_layout()
plt.show()

In [None]:
print(
    expected_calibration_error(
        y_test, lgbm_models[0][0].predict_proba(X_test)[:, 1], n_bins
    )
)
print(
    expected_calibration_error(
        y_test, lgbm_models[1][0].predict_proba(X_test)[:, 1], n_bins
    )
)
print(
    expected_calibration_error(
        y_test, lgbm_models[2][0].predict_proba(X_test)[:, 1], n_bins
    )
)

In [None]:
lgbm_cal_classifiers = lgbm_models[1][0].calibrated_classifiers_
print(type(lgbm_cal_classifiers[0]))
print(len(lgbm_cal_classifiers))
print([d for d in dir(lgbm_cal_classifiers[0]) if not d.startswith("__")])

In [None]:
def business_objective(
    ground_truth,
    predicted_probs,
    threshold,
    value_per_lead=100,
    cost_per_false_positive=20,
    verbose=False,
):
    """
    Business objective function to optimize lead scoring threshold.

    Parameters:
    - predicted_probs: array-like of predicted probabilities (floats between 0 and 1)
    - ground_truth: array-like of true labels (0 or 1)
    - threshold: float, decision threshold for classifying leads
    - value_per_lead: monetary value of a true positive lead
    - cost_per_false_positive: cost incurred for pursuing a false positive lead

    Returns:
    - net_gain: total business value (profit) from applying the threshold
    """
    predicted_labels = (np.array(predicted_probs) >= threshold).astype(int)
    ground_truth = np.array(ground_truth)

    true_positives = np.sum((predicted_labels == 1) & (ground_truth == 1))
    false_positives = np.sum((predicted_labels == 1) & (ground_truth == 0))

    net_gain = (true_positives * value_per_lead) - (
        false_positives * cost_per_false_positive
    )

    return net_gain, 0

In [None]:
y_true_dummy = np.array(
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
)
y_pred_dummy = np.array(
    [
        0.1,
        0.4,
        0.3,
        0.8,
        0.7,
        0.1,
        0.2,
        0.9,
        0.3,
        0.1,
        0.6,
        0.4,
        0.4,
        0.4,
        0.4,
        0.4,
        0.4,
        0.4,
        0.4,
        0.4,
        0.6,
        0.4,
        0.4,
        0.4,
        0.4,
    ]
)
print(len(y_true_dummy))
print(len(y_pred_dummy))
print(y_true_dummy[y_pred_dummy < 0.5])
print(len(y_true_dummy[y_pred_dummy < 0.5]))
print(business_objective(y_true_dummy, y_pred_dummy, threshold=0.5))

In [None]:
print(
    business_objective(
        y_test.values, lgbm_models[0][0].predict_proba(X_test)[:, 1], threshold=0.5
    )
)

In [None]:
for t in np.arange(0.01, 1.0, 0.01):
    profit, margin = business_objective(
        y_test.values, lgbm_models[1][0].predict_proba(X_test)[:, 1], threshold=t
    )
    print(f"Threshold: {t:.2f}, Profit: {profit:.0f}, Margin: {margin:.2f}%")

In [None]:
from lightgbm import plot_importance

plot_importance(lgbm_models[0][0])

In [None]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

y_proba_lgbm = np.mean(
    [m.predict_proba(X_test)[:, 1] for m in lgbm_cal_classifiers], axis=0
)

print(accuracy_score(y_test, np.rint(y_proba_lgbm)))
print(f1_score(y_test, np.rint(y_proba_lgbm)))
print(roc_auc_score(y_test, y_proba_lgbm))

In [None]:
def get_leaf_nodes(perp: PerpetualBooster):
    return [
        {node.num: node for node in tree_nodes if node.is_leaf}
        for tree_nodes in perp.get_node_lists()
    ]

In [None]:
def get_weights(leaf_nodes, pred_nodes):
    pred_weights = np.array(
        [
            [
                [
                    leaf_nodes[i][key].weights
                    for key in leaf_nodes[i].keys() & set(nodes)
                ][0]
                for nodes in tree_nodes
            ]
            for i, tree_nodes in enumerate(pred_nodes)
        ]
    )

    return np.sort(pred_weights, axis=2)

# Perpetual training

In [None]:
perp_models = []

for i, (train, test) in enumerate(cv.split(X_train, y_train)):
    print(f"Fold {i}")
    X_train_cv, X_test_cv = X_train.iloc[train], X_train.iloc[test]
    y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]
    perp = PerpetualBooster(budget=1.0, iteration_limit=10000)
    perp.fit(X_train_cv, y_train_cv)

    print(f"Number of trees: {perp.number_of_trees}")

    perp_models.append(perp)

In [None]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

y_proba_perp = np.mean([m.predict_proba(X_test)[:, 1] for m in perp_models], axis=0)

print(accuracy_score(y_test, np.rint(y_proba_perp)))
print(f1_score(y_test, np.rint(y_proba_perp)))
print(roc_auc_score(y_test, y_proba_perp))

In [None]:
pred_weights = get_weights(get_leaf_nodes(perp), perp.predict_nodes(X_test))

In [None]:
pred_weights = get_weights(get_leaf_nodes(perp), perp.predict_nodes(X_test))
pred_lower = np.sum(np.min(pred_weights, axis=2), axis=0) + perp.base_score
pred_lower = 1.0 / (1.0 + np.exp(-pred_lower))
pred_lower.shape

In [None]:
pred_lower

In [None]:
pred_weights = get_weights(get_leaf_nodes(perp), perp.predict_nodes(X_test))
pred_upper = np.sum(np.max(pred_weights, axis=2), axis=0) + perp.base_score
pred_upper = 1.0 / (1.0 + np.exp(-pred_upper))
pred_upper.shape

In [None]:
pred_upper

In [None]:
import seaborn as sns

sns.displot(pred_upper - pred_lower)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(pred_lower, pred_upper, alpha=0.1)

In [None]:
max(pred_upper - pred_lower)

In [None]:
pred_weights.shape

In [None]:
n_simulations = 100

In [None]:
indices = np.random.randint(
    low=0, high=5, size=(pred_weights.shape[0], pred_weights.shape[1], n_simulations)
)
new_pred_weights = np.take_along_axis(pred_weights, indices, axis=2)
print(f"New array shape: {new_pred_weights.shape}")

In [None]:
new_pred_weights_sum = np.sum(new_pred_weights, axis=0) + perp.base_score
new_pred_weights_sum.shape

In [None]:
sns.displot(new_pred_weights_sum[0])

In [None]:
new_pred_weights_sum_proba = 1.0 / (1.0 + np.exp(-new_pred_weights_sum))
new_pred_weights_sum_proba.shape

In [None]:
sns.displot(new_pred_weights_sum_proba[1100])

In [None]:
sns.displot(
    np.max(new_pred_weights_sum_proba, axis=1)
    - np.min(new_pred_weights_sum_proba, axis=1)
)

In [None]:
new_pred_weights_sum_proba.shape

In [None]:
def get_proba_sim(m_sim, X_sim, n_sim=100):
    pred_weights_sim = get_weights(get_leaf_nodes(m_sim), m_sim.predict_nodes(X_sim))
    indices_sim = np.random.randint(
        low=0,
        high=5,
        size=(pred_weights_sim.shape[0], pred_weights_sim.shape[1], n_sim),
    )
    new_pred_weights_sim = np.take_along_axis(pred_weights_sim, indices_sim, axis=2)
    new_pred_weights_sum_sim = np.sum(new_pred_weights_sim, axis=0) + m_sim.base_score
    new_pred_weights_sum_proba_sim = 1.0 / (1.0 + np.exp(-new_pred_weights_sum_sim))

    return new_pred_weights_sum_proba_sim

In [None]:
def objective_t(trial):
    t = trial.suggest_float("threshold", 0.0, 0.3)

    profits = []
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        _X_train_cv, X_test_cv = X_train.iloc[train], X_train.iloc[test]
        _y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]

        y_pred_sim = get_proba_sim(perp_models[i], X_test_cv, n_simulations)

        profits_fold = []
        for j in range(n_simulations):
            profit, margin = business_objective(
                y_test_cv.values, y_pred_sim[:, j], threshold=t
            )
            profits_fold.append(profit)

    profits.append(profits_fold)

    return np.mean(np.array(profits).flatten())

In [None]:
sampler_t = optuna.samplers.TPESampler(seed=seed)
study_t = optuna.create_study(direction="maximize", sampler=sampler_t)
study_t.optimize(objective_t, n_trials=10)

In [None]:
study_t.best_trial.params["threshold"]

In [None]:
y_proba = np.mean([m.predict_proba(X_test)[:, 1] for m in perp_models], axis=0)
profit, margin = business_objective(
    y_test.values, y_proba, threshold=study_t.best_trial.params["threshold"]
)
print(f"Profit: {profit}, Margin: {margin}%")

# Optimize threshold and weight index together

In [None]:
model_pred_weights = []
for i, (train, test) in enumerate(cv.split(X_train, y_train)):
    X_train_cv, X_test_cv = X_train.iloc[train], X_train.iloc[test]
    y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]
    pred_weights = get_weights(
        get_leaf_nodes(perp_models[i]), perp_models[i].predict_nodes(X_test_cv)
    )
    model_pred_weights.append(pred_weights)

In [None]:
def get_proba_from_weights(p_weights, w, b_score):
    cal_weight, w_i_lower = math.modf(w)
    w_i_lower = int(w_i_lower)
    w_i_upper = w_i_lower + 1

    weights_lower = np.sum(p_weights[:, :, w_i_lower], axis=0) + b_score
    weights_upper = np.sum(p_weights[:, :, w_i_upper], axis=0) + b_score

    weighted = weights_lower * (1 - cal_weight) + weights_upper * cal_weight

    y_proba = 1.0 / (1.0 + np.exp(-weighted))

    return y_proba

In [None]:
def objective_w(trial):
    t = trial.suggest_float("threshold", 0.0, 0.3)
    w = trial.suggest_float("weight_index", 0.0, 4.0)

    profits = []
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        _X_train_cv, _X_test_cv = X_train.iloc[train], X_train.iloc[test]
        _y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]

        y_proba = get_proba_from_weights(
            model_pred_weights[i], w, perp_models[i].base_score
        )

        profit, margin = business_objective(
            y_test_cv.values, y_proba, threshold=t, verbose=False
        )

    profits.append(profit)

    return np.mean(profits)

In [None]:
sampler_w = optuna.samplers.TPESampler(seed=seed)
study_w = optuna.create_study(direction="maximize", sampler=sampler_w)
study_w.optimize(objective_w, n_trials=1000)

In [None]:
study_w.best_trial.params

In [None]:
y_proba = np.mean(
    [
        get_proba_from_weights(
            get_weights(get_leaf_nodes(m), m.predict_nodes(X_test)),
            study_w.best_trial.params["weight_index"],
            m.base_score,
        )
        for m in perp_models
    ],
    axis=0,
)
profit_w, margin_w = business_objective(
    y_test.values,
    y_proba,
    threshold=study_w.best_trial.params["threshold"],
    verbose=False,
)
print(f"Profit: {profit_w}, Margin: {margin_w}%")

# Optimize business objective with calibrated LightGBM classifiers

In [None]:
best_threshold = 0
best_profit = 0

for t in np.arange(0.01, 0.5, 0.01):
    profits = []
    margins = []
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        X_train_cv, X_test_cv = X_train.iloc[train], X_train.iloc[test]
        y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]

        m = lgbm_cal_classifiers[i]
        y_proba = m.predict_proba(X_test_cv)[:, 1]

        profit, margin = business_objective(
            y_test_cv.values, y_proba, threshold=t, verbose=False
        )
        profits.append(profit)
        margins.append(margin)

    if np.mean(profits) > best_profit:
        best_profit = np.mean(profits)
        best_threshold = t

    print(
        f"Threshold: {t:.3f}, Profit: {np.mean(profits):.0f}, Margin: {np.mean(margins):.2f}%"
    )

print(f"Best threshold: {best_threshold:.3f}, Best profit: {best_profit:.0f}")

In [None]:
y_proba = np.mean([m.predict_proba(X_test)[:, 1] for m in lgbm_cal_classifiers], axis=0)
profit_l, margin_l = business_objective(
    y_test.values, y_proba, threshold=best_threshold, verbose=False
)
print(f"Profit: {profit_l}, Margin: {margin_l}")

In [None]:
print(((profit_w - profit_l) / abs(profit_l)) * 100)

# Optimize threshold, weight index, budget together

In [None]:
budget_list = [1.0, 1.5, 2.0]

In [None]:
p_models = []
model_pred_weights = []
for budget in budget_list:
    cv_pred_weights = []
    p_models_cv = []
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        X_train_cv, X_test_cv = X_train.iloc[train], X_train.iloc[test]
        y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]
        m = PerpetualBooster(budget=budget, iteration_limit=10000)
        m.fit(X_train_cv, y_train_cv)
        pred_weights = get_weights(get_leaf_nodes(m), m.predict_nodes(X_test_cv))
        cv_pred_weights.append(pred_weights)
        p_models_cv.append(m)
        print(f"Budget: {budget}, Fold: {i}, Number of trees: {m.number_of_trees}")
    print()
    p_models.append(p_models_cv)
    model_pred_weights.append(cv_pred_weights)

In [None]:
budget_index_min = 0
for i, b in enumerate(budget_list):
    if i == 0:
        continue
    cv_models_prev = p_models[i - 1]
    cv_models = p_models[i]
    min_diff_n_trees = min(
        [
            cv_models[j].number_of_trees - cv_models_prev[j].number_of_trees
            for j in range(len(cv_models))
        ]
    )
    if min_diff_n_trees < 0:
        budget_index_min = i
    print(
        f"i: {i}, budget: {b}, previous budget n_trees: {[cv_models_prev[j].number_of_trees for j in range(len(cv_models_prev))]}, current budget n_trees: {[cv_models[j].number_of_trees for j in range(len(cv_models))]}, min_diff_n_trees: {min_diff_n_trees}"
    )

print(f"Minimum budget index without tree count regression: {budget_index_min}")

In [None]:
def objective_b(trial):
    t = trial.suggest_float("threshold", 0.0, 0.3)
    w = trial.suggest_float("weight_index", 0.0, 4.0)
    b = trial.suggest_int("budget_index", budget_index_min, len(budget_list) - 1)

    profits = []
    for i, (train, test) in enumerate(cv.split(X_train, y_train)):
        _X_train_cv, _X_test_cv = X_train.iloc[train], X_train.iloc[test]
        _y_train_cv, y_test_cv = y_train.iloc[train], y_train.iloc[test]

        y_proba = get_proba_from_weights(
            model_pred_weights[b][i], w, p_models[b][i].base_score
        )

        profit, margin = business_objective(
            y_test_cv.values, y_proba, threshold=t, verbose=False
        )

    profits.append(profit)

    return np.mean(profits)

In [None]:
sampler_b = optuna.samplers.TPESampler(seed=seed)
study_b = optuna.create_study(direction="maximize", sampler=sampler_b)
study_b.optimize(objective_b, n_trials=1000)

In [None]:
study_b.best_trial.params

In [None]:
budget_list[study_b.best_trial.params["budget_index"]]

In [None]:
y_proba = np.mean(
    [
        get_proba_from_weights(
            get_weights(get_leaf_nodes(m), m.predict_nodes(X_test)),
            study_b.best_trial.params["weight_index"],
            m.base_score,
        )
        for m in p_models[study_b.best_trial.params["budget_index"]]
    ],
    axis=0,
)
profit_b, margin_b = business_objective(
    y_test.values,
    y_proba,
    threshold=study_b.best_trial.params["threshold"],
    verbose=False,
)
print(f"Profit: {profit_b}, Margin: {margin_b}%")

In [None]:
print(((profit_b - profit_l) / abs(profit_l)) * 100)

In [None]:
# LightGBM n_estimators = 100
# 0.1 -> 1010250 -> -4.05428401259253
# 0.2 -> 989250  -> -2.7184548625882754
# 0.3 -> 987000  -> -8.580787883944526
# 0.5 -> 1135000 -> -4.479707308772228
# 1.0 -> 1046250 -> -0.22547434697524038
# 1.5 -> 1052000 -> 1.3060495192716752
# 2.0 -> 1073500 -> 1.9016421339232537
# 2.5 -> 1072500 -> -0.6041010805751723

In [None]:
# LightGBM n_estimators = 1000
# 2.0 -> 1074250 -> 1.131326776140825