# Training

## 0.Setup and data loading

In [1]:
import pandas as pd
import numpy as np
import os
import random

# ignore warnings
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split


import importlib
import train_models_util
import feature_importance
importlib.reload(train_models_util)
importlib.reload(feature_importance)
from train_models_util import (
    train_xgb_optuna, 
    train_catboost_optuna, 
    train_lgbm_optuna,
    train_best_base_models_from_mlflow,
    train_ensemble, 
    train_neural_network_ensemble,
    train_tree_optuna,
    minimize_eval_metric_with_threshold
)
from evaluate_models_util import evaluate_and_log
from feature_importance import select_important_features, get_top_features_shap

# set all random seeds for reproducibility
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)

In [None]:
EXPERIMENT_NAME = "Fraud-detection-custom-thresholds_v4"

In [3]:
data_folder = 'ieee-fraud-detection-data/processed/'

train = pd.read_csv(f'{data_folder}train_processed_v2.csv')
test = pd.read_csv(f'{data_folder}test_processed_v2.csv')
target = pd.read_csv(f'{data_folder}target.csv')['isFraud']

In [4]:
cat_cols = np.array(train.select_dtypes(include=['object']).columns)
for c in cat_cols:
    if c in train.columns:
        if train[c].isnull().sum()>0 or test[c].isnull().sum()>0:
            train[c].fillna('missing', inplace=True)
            test[c].fillna('missing', inplace=True)
        train[c] = train[c].astype('category')
        test[c] = test[c].astype('category')

In [None]:
#THIS IS WRONG
# # get all fraud samples
# fraud_indices = target[target == 1].index

# # sample 5× non-fraud, without replacement
# non_fraud_indices = target[target == 0].sample(
#     n=len(fraud_indices) * 3,
#     random_state=42
# ).index

# # build tuning dataset
# X_tune = pd.concat([train.loc[fraud_indices], train.loc[non_fraud_indices]], axis=0)
# y_tune = pd.concat([target.loc[fraud_indices], target.loc[non_fraud_indices]], axis=0)

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(train, target, test_size=0.2, stratify=target, random_state=42)

In [None]:
# attempt without suffle, since the data have time relation
n = len(train)
split_idx = int(n * 0.8)  # 4/5 split

X_train = train.iloc[:split_idx]
y_train = target.iloc[:split_idx]
X_valid = train.iloc[split_idx:]
y_valid = target.iloc[split_idx:]

In [None]:
# X_train.shape, X_valid.shape, X_tune.shape

In [6]:
del train, target, cat_cols, c #,fraud_indices, non_fraud_indices,
import gc
gc.collect()

0

## 1. Full Features

### 1.1. XGBoost

In [7]:
model_xgboost_full, best_params_xgboost_full, hist_df, plot_paths = train_xgb_optuna(X_train, y_train, X_train, y_train, X_valid, y_valid, early_stopping_rounds=10)

[I 2025-12-03 13:01:06,000] A new study created in memory with name: xgboost_aucpr_optimization


[Optuna XGBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-03 13:13:13,294] Trial 0 finished with value: 0.8034715417912341 and parameters: {'learning_rate': 0.06351859523582477, 'max_depth': 8, 'subsample': 0.7990546947720916, 'colsample_bytree': 0.9854291579747783, 'min_child_weight': 0.15636535087214387, 'gamma': 4.092943669386593}. Best is trial 0 with value: 0.8034715417912341.
[I 2025-12-03 13:27:04,273] Trial 1 finished with value: 0.7884827439603921 and parameters: {'learning_rate': 0.022237919610860628, 'max_depth': 11, 'subsample': 0.8237868227162372, 'colsample_bytree': 0.5710071333115424, 'min_child_weight': 0.0044403694771470645, 'gamma': 2.779188981370297}. Best is trial 0 with value: 0.8034715417912341.
[I 2025-12-03 13:47:04,012] Trial 2 finished with value: 0.8023025556503861 and parameters: {'learning_rate': 0.013218862325441748, 'max_depth': 15, 'subsample': 0.8369021010467927, 'colsample_bytree': 0.5487872129835882, 'min_child_weight': 0.07754257335544931, 'gamma': 1.370402483783415}. Best is trial 0 with value: 

[INFO] Best XGBoost params: {'learning_rate': 0.2602340610204441, 'max_depth': 13, 'subsample': 0.9725755739435947, 'colsample_bytree': 0.8017297782470512, 'min_child_weight': 0.019702510747035724, 'gamma': 0.019470106922508487}


In [8]:
evaluate_and_log(model_xgboost_full, X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

2025/12/03 19:46:35 INFO mlflow.tracking.fluent: Experiment with name 'Fraud-detection-custom-thresholds_v3' does not exist. Creating a new experiment.


[INFO] Logged metrics: {'roc_auc': np.float64(0.9776125288426118), 'pr_auc': np.float64(0.9155361043373691), 'precision': 0.9602763385146805, 'recall': 0.8071618678925719, 'f1': 0.877086893650585, 'custom_loss': np.float64(0.6759745317844684)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9776125288426118),
 'pr_auc': np.float64(0.9155361043373691),
 'precision': 0.9602763385146805,
 'recall': 0.8071618678925719,
 'f1': 0.877086893650585,
 'custom_loss': np.float64(0.6759745317844684)}

In [9]:
evaluate_and_log(model_xgboost_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9776125288426118), 'pr_auc': np.float64(0.9155361043373691), 'precision': 0.9372464160129835, 'recall': 0.838374062424389, 'f1': 0.8850574712643678, 'custom_loss': np.float64(0.5675483455820097)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9776125288426118),
 'pr_auc': np.float64(0.9155361043373691),
 'precision': 0.9372464160129835,
 'recall': 0.838374062424389,
 'f1': 0.8850574712643678,
 'custom_loss': np.float64(0.5675483455820097)}

In [10]:
evaluate_and_log(model_xgboost_full, X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.10)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9776125288426118), 'pr_auc': np.float64(0.9155361043373691), 'precision': 0.8937875751503006, 'recall': 0.8632954270505686, 'f1': 0.8782769230769231, 'custom_loss': np.float64(0.4819656585498019)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9776125288426118),
 'pr_auc': np.float64(0.9155361043373691),
 'precision': 0.8937875751503006,
 'recall': 0.8632954270505686,
 'f1': 0.8782769230769231,
 'custom_loss': np.float64(0.4819656585498019)}

In [11]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_xgboost_full,  X_valid, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(model_xgboost_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=threshOpt)

Optimal threshold: 9.81056746240938e-06, Minimum eval metric score: 0.6068598232126529
[INFO] Logged metrics: {'roc_auc': np.float64(0.9776125288426118), 'pr_auc': np.float64(0.9155361043373691), 'precision': 0.05716401643238409, 'recall': 0.9898378901524316, 'f1': 0.10808597207360732, 'custom_loss': np.float64(0.6068598232126529)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9776125288426118),
 'pr_auc': np.float64(0.9155361043373691),
 'precision': 0.05716401643238409,
 'recall': 0.9898378901524316,
 'f1': 0.10808597207360732,
 'custom_loss': np.float64(0.6068598232126529)}

### 1.2. LightGBM

In [12]:
model_lgbm_full, best_params_lgbm_full, hist_df, plot_paths = train_lgbm_optuna(X_train, y_train,  X_train, y_train, X_valid, y_valid)

[I 2025-12-03 19:48:19,884] A new study created in memory with name: lgbm_aucpr_optimization


[Optuna LightGBM Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-03 19:49:30,356] Trial 0 finished with value: 0.3319401356684155 and parameters: {'num_leaves': 93, 'max_depth': 17, 'learning_rate': 0.07093807431566478, 'feature_fraction': 0.6620839099624509, 'bagging_fraction': 0.9585739569476259, 'bagging_freq': 1, 'min_child_samples': 71, 'lambda_l1': 6.203166917167362e-07, 'lambda_l2': 1.0756584701882994}. Best is trial 0 with value: 0.3319401356684155.
[I 2025-12-03 19:50:50,754] Trial 1 finished with value: 0.5216533742761759 and parameters: {'num_leaves': 200, 'max_depth': 20, 'learning_rate': 0.02213942620784039, 'feature_fraction': 0.7084122784116766, 'bagging_fraction': 0.6510218495056791, 'bagging_freq': 10, 'min_child_samples': 85, 'lambda_l1': 2.1877029533874874, 'lambda_l2': 1.529226632695873e-06}. Best is trial 1 with value: 0.5216533742761759.
[I 2025-12-03 19:52:05,705] Trial 2 finished with value: 0.41466368682952776 and parameters: {'num_leaves': 169, 'max_depth': 18, 'learning_rate': 0.04532759025006359, 'feature_fract

[INFO] Best LightGBM params: {'num_leaves': 223, 'max_depth': 17, 'learning_rate': 0.15088674011896516, 'feature_fraction': 0.831066537674629, 'bagging_fraction': 0.8289232036486788, 'bagging_freq': 3, 'min_child_samples': 33, 'lambda_l1': 1.149923805979539e-08, 'lambda_l2': 9.19036820301362}


In [13]:
evaluate_and_log(model_lgbm_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="LightGBM_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9788393197514088), 'pr_auc': np.float64(0.9141876469570723), 'precision': 0.9267379679144385, 'recall': 0.8386160174207598, 'f1': 0.8804775816080275, 'custom_loss': np.float64(0.5670572696176381)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9788393197514088),
 'pr_auc': np.float64(0.9141876469570723),
 'precision': 0.9267379679144385,
 'recall': 0.8386160174207598,
 'f1': 0.8804775816080275,
 'custom_loss': np.float64(0.5670572696176381)}

In [14]:
evaluate_and_log(model_lgbm_full, X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="LightGBM_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9788393197514088), 'pr_auc': np.float64(0.9141876469570723), 'precision': 0.8531567746512178, 'recall': 0.8729736269053956, 'f1': 0.8629514470222435, 'custom_loss': np.float64(0.4497663155755749)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9788393197514088),
 'pr_auc': np.float64(0.9141876469570723),
 'precision': 0.8531567746512178,
 'recall': 0.8729736269053956,
 'f1': 0.8629514470222435,
 'custom_loss': np.float64(0.4497663155755749)}

In [15]:
evaluate_and_log(model_lgbm_full, X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="LightGBM_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.10)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9788393197514088), 'pr_auc': np.float64(0.9141876469570723), 'precision': 0.713874496257916, 'recall': 0.9000725864989112, 'f1': 0.7962328767123288, 'custom_loss': np.float64(0.36230399295559995)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9788393197514088),
 'pr_auc': np.float64(0.9141876469570723),
 'precision': 0.713874496257916,
 'recall': 0.9000725864989112,
 'f1': 0.7962328767123288,
 'custom_loss': np.float64(0.36230399295559995)}

In [16]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_lgbm_full, X_valid, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(
    model_lgbm_full, 
    X_valid, y_valid, 
    experiment_name=EXPERIMENT_NAME, 
    run_name="LightGBM_Optuna_fullfeatures", 
    hp_search_history=hist_df, hp_search_plots=plot_paths, 
    prediction_threshold=threshOpt)

Optimal threshold: 0.00036546924429878806, Minimum eval metric score: 0.45168828529820165
[INFO] Logged metrics: {'roc_auc': np.float64(0.9788393197514088), 'pr_auc': np.float64(0.9141876469570723), 'precision': 0.08116863639090527, 'recall': 0.9820953302685701, 'f1': 0.14994458810491318, 'custom_loss': np.float64(0.45168828529820165)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9788393197514088),
 'pr_auc': np.float64(0.9141876469570723),
 'precision': 0.08116863639090527,
 'recall': 0.9820953302685701,
 'f1': 0.14994458810491318,
 'custom_loss': np.float64(0.45168828529820165)}

### 1.3. CatBoost

In [17]:
model_catboost_full, best_params_catboost_full, hist_df, plot_paths = train_catboost_optuna(X_train, y_train, X_train, y_train, X_valid, y_valid, early_stopping_rounds=10)

[I 2025-12-03 21:09:49,180] A new study created in memory with name: catboost_aucpr_optimization


[Optuna CatBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-03 21:25:32,012] Trial 0 finished with value: 0.6735109695200783 and parameters: {'learning_rate': 0.1352067011736096, 'depth': 6, 'l2_leaf_reg': 9.312047419398002, 'subsample': 0.6056104761067427, 'border_count': 89}. Best is trial 0 with value: 0.6735109695200783.
[I 2025-12-03 21:40:38,945] Trial 1 finished with value: 0.612243164834815 and parameters: {'learning_rate': 0.07433624068979262, 'depth': 5, 'l2_leaf_reg': 7.839162424805284, 'subsample': 0.93837684206264, 'border_count': 105}. Best is trial 0 with value: 0.6735109695200783.
[I 2025-12-03 21:55:44,569] Trial 2 finished with value: 0.609077497106695 and parameters: {'learning_rate': 0.07079394232890084, 'depth': 5, 'l2_leaf_reg': 5.904048410274738, 'subsample': 0.849773853043581, 'border_count': 117}. Best is trial 0 with value: 0.6735109695200783.
[I 2025-12-03 22:10:23,482] Trial 3 finished with value: 0.6877021688121393 and parameters: {'learning_rate': 0.23289563857859136, 'depth': 5, 'l2_leaf_reg': 2.2228360

[INFO] Best CatBoost params: {'learning_rate': 0.15013622966459636, 'depth': 10, 'l2_leaf_reg': 4.168068031663588, 'subsample': 0.8760510519082981, 'border_count': 89}


In [18]:
evaluate_and_log(model_catboost_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="CATboost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9766901628549776), 'pr_auc': np.float64(0.879964761240323), 'precision': 0.6898192197906755, 'recall': 0.8770868618436971, 'f1': 0.7722624627183639, 'custom_loss': np.float64(0.4439157381379754)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9766901628549776),
 'pr_auc': np.float64(0.879964761240323),
 'precision': 0.6898192197906755,
 'recall': 0.8770868618436971,
 'f1': 0.7722624627183639,
 'custom_loss': np.float64(0.4439157381379754)}

In [19]:
evaluate_and_log(model_catboost_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="CATboost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9766901628549776), 'pr_auc': np.float64(0.879964761240323), 'precision': 0.4281403665623603, 'recall': 0.9269295910960561, 'f1': 0.585735035547741, 'custom_loss': np.float64(0.29902292816743997)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9766901628549776),
 'pr_auc': np.float64(0.879964761240323),
 'precision': 0.4281403665623603,
 'recall': 0.9269295910960561,
 'f1': 0.585735035547741,
 'custom_loss': np.float64(0.29902292816743997)}

In [20]:
evaluate_and_log(model_catboost_full,  X_valid, y_valid, experiment_name=EXPERIMENT_NAME, run_name="CATboost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.1)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9766901628549776), 'pr_auc': np.float64(0.879964761240323), 'precision': 0.21323887296522634, 'recall': 0.9540285506895717, 'f1': 0.3485678925035361, 'custom_loss': np.float64(0.2840451112541064)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9766901628549776),
 'pr_auc': np.float64(0.879964761240323),
 'precision': 0.21323887296522634,
 'recall': 0.9540285506895717,
 'f1': 0.3485678925035361,
 'custom_loss': np.float64(0.2840451112541064)}

In [21]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_catboost_full,  X_valid, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(
    model_catboost_full, 
    X_valid, y_valid,
    experiment_name=EXPERIMENT_NAME, run_name="CATboost_Optuna_fullfeatures", 
    hp_search_history=hist_df, hp_search_plots= plot_paths, 
    prediction_threshold=threshOpt)

Optimal threshold: 0.04881344988467075, Minimum eval metric score: 0.3403071764825414
[INFO] Logged metrics: {'roc_auc': np.float64(0.9766901628549776), 'pr_auc': np.float64(0.879964761240323), 'precision': 0.13187176016112523, 'recall': 0.9663682555044761, 'f1': 0.23207437536316095, 'custom_loss': np.float64(0.3403071764825414)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9766901628549776),
 'pr_auc': np.float64(0.879964761240323),
 'precision': 0.13187176016112523,
 'recall': 0.9663682555044761,
 'f1': 0.23207437536316095,
 'custom_loss': np.float64(0.3403071764825414)}

## 2. Reduced Features

In [22]:
# ---- 1. Compute SHAP importance for each model ----
df_xgb = get_top_features_shap(model_xgboost_full, X_train, y_train).rename(
    columns={"importance": "importance_xgb"}
)
df_lgbm = get_top_features_shap(model_lgbm_full, X_train, y_train).rename(
    columns={"importance": "importance_lgbm"}
)
df_cat = get_top_features_shap(model_catboost_full, X_train, y_train).rename(
    columns={"importance": "importance_cat"}
)

# ---- 2. Merge all importance tables ----
df = (
    df_xgb.merge(df_lgbm, on="feature")
          .merge(df_cat, on="feature")
)

# ---- 3. Rank features within each model (1 = most important) ----
df["rank_xgb"] = df["importance_xgb"].rank(method="min", ascending=False)
df["rank_lgbm"] = df["importance_lgbm"].rank(method="min", ascending=False)
df["rank_cat"] = df["importance_cat"].rank(method="min", ascending=False)

# ---- 4. Determine the top 30% threshold ----
n_features = len(df)
top_30_cutoff = int(n_features * 0.30)

# ---- 5. Check if a feature appears in top 30% in each model ----
df["in_top_xgb"]  = df["rank_xgb"]  <= top_30_cutoff
df["in_top_lgbm"] = df["rank_lgbm"] <= top_30_cutoff
df["in_top_cat"]  = df["rank_cat"]  <= top_30_cutoff

# ---- 6. Apply the rule: “Top 30% for at least 2 models” ----
df["top_count"] = (
    df["in_top_xgb"].astype(int)
  + df["in_top_lgbm"].astype(int)
  + df["in_top_cat"].astype(int)
)

df_selected = df[df["top_count"] >= 2]  # final selected features


In [23]:
# ---- 7. Sort by mean importance (optional but nice) ----
df_selected["importance_mean"] = df_selected[["importance_xgb","importance_lgbm","importance_cat"]].mean(axis=1)
df_selected = df_selected.sort_values("importance_mean", ascending=False)

# ---- 8. Save to CSV ----
df.to_csv(f"{data_folder}/feature_importances_shap_fullmodels.csv", index=False)
df_selected.to_csv(f"{data_folder}/selected_features_shap.csv", index=False)

# ---- 9. List of selected feature names ----
selected_feature_list = df_selected["feature"].tolist()

In [24]:
del df_xgb, df_lgbm, df_cat, df_selected, df

In [25]:
# X_reduced, feature_ranking = select_important_features(
#     train,
#     target,
#     top_n=100,                # or None to use cumulative 95%
#     use_shap=True,            # optional, slower but more accurate
#     remove_correlated=True,
#     correlation_threshold=0.95
# )

In [26]:
X_train_reduced = X_train[selected_feature_list]
X_valid_reduced = X_valid[selected_feature_list]
# X_tune_reduced = X_tune[selected_feature_list]


In [28]:
X_train_reduced.shape, X_valid_reduced.shape#, X_tune_reduced.shape

((472432, 204), (118108, 204))

### 2.1. XGBoost 

In [29]:
model_xgboost_reduced, best_params_xgboost_reduced, hist_df, plot_paths = train_xgb_optuna(X_train_reduced, y_train, X_train_reduced, y_train, X_valid_reduced, y_valid, n_trials=30)
evaluate_and_log(model_xgboost_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_xgboost_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[I 2025-12-04 13:36:01,918] A new study created in memory with name: xgboost_aucpr_optimization


[Optuna XGBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-04 13:42:13,065] Trial 0 finished with value: 0.8630463200053381 and parameters: {'learning_rate': 0.07176724745521722, 'max_depth': 14, 'subsample': 0.691345645851472, 'colsample_bytree': 0.6651480001823453, 'min_child_weight': 0.06023782199441705, 'gamma': 1.7370321473816035}. Best is trial 0 with value: 0.8630463200053381.
[I 2025-12-04 13:46:41,587] Trial 1 finished with value: 0.8599377017077033 and parameters: {'learning_rate': 0.1457272701871864, 'max_depth': 12, 'subsample': 0.9341473866854714, 'colsample_bytree': 0.5789044790956329, 'min_child_weight': 0.0433759404138579, 'gamma': 2.7320366584200495}. Best is trial 0 with value: 0.8630463200053381.
[I 2025-12-04 13:52:16,653] Trial 2 finished with value: 0.8735479778170822 and parameters: {'learning_rate': 0.1796242010672382, 'max_depth': 11, 'subsample': 0.8416375127681059, 'colsample_bytree': 0.9936583740935362, 'min_child_weight': 0.0439235111639909, 'gamma': 0.22590913518882538}. Best is trial 2 with value: 0.87

[INFO] Best XGBoost params: {'learning_rate': 0.22200082505495905, 'max_depth': 11, 'subsample': 0.964424583105341, 'colsample_bytree': 0.994828611125093, 'min_child_weight': 0.0955612051839127, 'gamma': 0.08080617635702997}
[INFO] Logged metrics: {'roc_auc': np.float64(0.9772594974500788), 'pr_auc': np.float64(0.9132602394981091), 'precision': 0.9518958687040181, 'recall': 0.8139366077909509, 'f1': 0.8775270640406939, 'custom_loss': np.float64(0.6525383547261828)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.9772594974500788), 'pr_auc': np.float64(0.9132602394981091), 'precision': 0.9196639537936466, 'recall': 0.8475683522864748, 'f1': 0.8821455552757492, 'custom_loss': np.float64(0.5360009482846209)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9772594974500788),
 'pr_auc': np.float64(0.9132602394981091),
 'precision': 0.9196639537936466,
 'recall': 0.8475683522864748,
 'f1': 0.8821455552757492,
 'custom_loss': np.float64(0.5360009482846209)}

In [30]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_xgboost_reduced, X_valid_reduced, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(
    model_xgboost_reduced, 
    X_valid_reduced, y_valid,
    experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", 
    hp_search_history=hist_df, hp_search_plots= plot_paths, 
    prediction_threshold=threshOpt)

Optimal threshold: 8.940413681557402e-05, Minimum eval metric score: 0.4438056693873404
[INFO] Logged metrics: {'roc_auc': np.float64(0.9772594974500788), 'pr_auc': np.float64(0.9132602394981091), 'precision': 0.08599723139175806, 'recall': 0.9770142753447859, 'f1': 0.15808017538365174, 'custom_loss': np.float64(0.4438056693873404)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9772594974500788),
 'pr_auc': np.float64(0.9132602394981091),
 'precision': 0.08599723139175806,
 'recall': 0.9770142753447859,
 'f1': 0.15808017538365174,
 'custom_loss': np.float64(0.4438056693873404)}

### 2.2. LightGBM

In [31]:
model_lgmb_reduced, best_params_lgmb_reduced, hist_df, plot_paths = train_lgbm_optuna(X_train_reduced, y_train,X_train_reduced, y_train, X_valid_reduced, y_valid, n_trials=30)
evaluate_and_log(model_lgmb_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_lgmb_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[I 2025-12-04 16:14:51,420] A new study created in memory with name: lgbm_aucpr_optimization


[Optuna LightGBM Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-04 16:15:15,244] Trial 0 finished with value: 0.3649651158860292 and parameters: {'num_leaves': 154, 'max_depth': 10, 'learning_rate': 0.07818826768085109, 'feature_fraction': 0.9708317533394086, 'bagging_fraction': 0.9465268769671564, 'bagging_freq': 3, 'min_child_samples': 32, 'lambda_l1': 7.226811826299855e-05, 'lambda_l2': 2.530288493438748e-06}. Best is trial 0 with value: 0.3649651158860292.
[I 2025-12-04 16:15:40,221] Trial 1 finished with value: 0.4357502758764789 and parameters: {'num_leaves': 96, 'max_depth': 17, 'learning_rate': 0.030688501428436876, 'feature_fraction': 0.6525009460832843, 'bagging_fraction': 0.9802238149545179, 'bagging_freq': 1, 'min_child_samples': 65, 'lambda_l1': 3.548066991375513e-08, 'lambda_l2': 0.0049208112880928245}. Best is trial 1 with value: 0.4357502758764789.
[I 2025-12-04 16:15:57,970] Trial 2 finished with value: 0.2678817250144218 and parameters: {'num_leaves': 27, 'max_depth': 12, 'learning_rate': 0.08276828313328968, 'feature_f

[INFO] Best LightGBM params: {'num_leaves': 169, 'max_depth': 13, 'learning_rate': 0.16498113155739738, 'feature_fraction': 0.9586765310450978, 'bagging_fraction': 0.8218429541917868, 'bagging_freq': 6, 'min_child_samples': 81, 'lambda_l1': 0.00220500614939493, 'lambda_l2': 0.010291676687411806}
[INFO] Logged metrics: {'roc_auc': np.float64(0.9773062050072636), 'pr_auc': np.float64(0.9068656651169572), 'precision': 0.9211297628563816, 'recall': 0.8364384224534237, 'f1': 0.8767435962465128, 'custom_loss': np.float64(0.5748636840857521)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.9773062050072636), 'pr_auc': np.float64(0.9068656651169572), 'precision': 0.8433592828497287, 'recall': 0.8649891120251633, 'f1': 0.8540372670807453, 'custom_loss': np.float64(0.47807091814271685)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9773062050072636),
 'pr_auc': np.float64(0.9068656651169572),
 'precision': 0.8433592828497287,
 'recall': 0.8649891120251633,
 'f1': 0.8540372670807453,
 'custom_loss': np.float64(0.47807091814271685)}

In [32]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_lgmb_reduced, X_valid_reduced, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(
    model_lgmb_reduced, 
    X_valid_reduced, y_valid,
    experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", 
    hp_search_history=hist_df, hp_search_plots= plot_paths, 
    prediction_threshold=threshOpt)

Optimal threshold: 0.002786915222931217, Minimum eval metric score: 0.30500897483659023
[INFO] Logged metrics: {'roc_auc': np.float64(0.9773062050072636), 'pr_auc': np.float64(0.9068656651169572), 'precision': 0.18891977379468994, 'recall': 0.953786595693201, 'f1': 0.31537261490459617, 'custom_loss': np.float64(0.30500897483659023)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9773062050072636),
 'pr_auc': np.float64(0.9068656651169572),
 'precision': 0.18891977379468994,
 'recall': 0.953786595693201,
 'f1': 0.31537261490459617,
 'custom_loss': np.float64(0.30500897483659023)}

### 2.3. CatBoost

In [33]:
model_catoost_reduced, best_params_catoost_reduced, hist_df, plot_paths = train_catboost_optuna(X_train_reduced, y_train, X_train_reduced, y_train, X_valid_reduced, y_valid, n_trials=30)
evaluate_and_log(model_catoost_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_catoost_reduced, X_valid_reduced, y_valid, experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.25)

[I 2025-12-04 16:44:16,902] A new study created in memory with name: catboost_aucpr_optimization


[Optuna CatBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-04 17:10:15,109] Trial 0 finished with value: 0.8391740681370675 and parameters: {'learning_rate': 0.16955881882276694, 'depth': 10, 'l2_leaf_reg': 1.8440763087524017, 'subsample': 0.8991191065727072, 'border_count': 100}. Best is trial 0 with value: 0.8391740681370675.
[I 2025-12-04 17:18:31,617] Trial 1 finished with value: 0.6422839073671373 and parameters: {'learning_rate': 0.11848791671646595, 'depth': 5, 'l2_leaf_reg': 6.8641793249297445, 'subsample': 0.6374770173673866, 'border_count': 45}. Best is trial 0 with value: 0.8391740681370675.
[I 2025-12-04 17:33:48,563] Trial 2 finished with value: 0.7409910860232771 and parameters: {'learning_rate': 0.09219575657073187, 'depth': 8, 'l2_leaf_reg': 1.5198720901163405, 'subsample': 0.8610808219451966, 'border_count': 90}. Best is trial 0 with value: 0.8391740681370675.
[I 2025-12-04 17:50:15,351] Trial 3 finished with value: 0.8062916699587177 and parameters: {'learning_rate': 0.17484878825430017, 'depth': 9, 'l2_leaf_reg': 

[INFO] Best CatBoost params: {'learning_rate': 0.28767273831881374, 'depth': 10, 'l2_leaf_reg': 9.933350162898945, 'subsample': 0.922826333178803, 'border_count': 125}
[INFO] Logged metrics: {'roc_auc': np.float64(0.9754692979595376), 'pr_auc': np.float64(0.8970774703292203), 'precision': 0.8177898797367824, 'recall': 0.8720058069199129, 'f1': 0.8440281030444965, 'custom_loss': np.float64(0.4546940088732347)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.9754692979595376), 'pr_auc': np.float64(0.8970774703292203), 'precision': 0.6309925725860904, 'recall': 0.9044277764335834, 'f1': 0.7433628318584071, 'custom_loss': np.float64(0.3529481491516239)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9754692979595376),
 'pr_auc': np.float64(0.8970774703292203),
 'precision': 0.6309925725860904,
 'recall': 0.9044277764335834,
 'f1': 0.7433628318584071,
 'custom_loss': np.float64(0.3529481491516239)}

In [34]:
threshOpt, minScore = minimize_eval_metric_with_threshold(model_catoost_reduced, X_valid_reduced, y_valid)
print(f"Optimal threshold: {threshOpt}, Minimum eval metric score: {minScore}")
evaluate_and_log(
    model_catoost_reduced, 
    X_valid_reduced, y_valid,
    experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", 
    hp_search_history=hist_df, hp_search_plots= plot_paths, 
    prediction_threshold=threshOpt)

Optimal threshold: 0.05759661424438101, Minimum eval metric score: 0.30367968300199816
[INFO] Logged metrics: {'roc_auc': np.float64(0.9754692979595376), 'pr_auc': np.float64(0.8970774703292203), 'precision': 0.29011697660467906, 'recall': 0.9361238809581418, 'f1': 0.4429560936516114, 'custom_loss': np.float64(0.30367968300199816)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9754692979595376),
 'pr_auc': np.float64(0.8970774703292203),
 'precision': 0.29011697660467906,
 'recall': 0.9361238809581418,
 'f1': 0.4429560936516114,
 'custom_loss': np.float64(0.30367968300199816)}

## 3. Ensemble

In [None]:
base_models, X_tr, X_va, y_tr, y_va, X_test, y_test, best_params_dict = train_best_base_models_from_mlflow(
    train, target, test_size=0.15 ,experiment_name=EXPERIMENT_NAME
)

### 3.1. Logistic

In [35]:
# Build train meta features
X_meta = pd.DataFrame(np.column_stack([m.predict_proba(X_train_reduced)[:, 1] for m in [model_xgboost_reduced, model_lgmb_reduced, model_catoost_reduced]]))
X_meta_val = pd.DataFrame(np.column_stack([m.predict_proba(X_valid_reduced)[:, 1] for m in  [model_xgboost_reduced, model_lgmb_reduced, model_catoost_reduced]]))

In [36]:
ensemble, best_params_log, hist_df, plot_paths = train_ensemble(
    X_meta, y_train, X_meta_val, y_valid
)

[I 2025-12-05 03:46:48,671] A new study created in memory with name: ensemble_meta_learner_pr_auc_optimization


[INFO] Generating meta-features...
[INFO] Optimizing meta-learner with Optuna...


[Optuna Ensemble Meta-Learner Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-05 03:47:04,967] Trial 0 finished with value: 1.0 and parameters: {'C': 0.04009477966365624, 'class_weight': None, 'solver': 'sag', 'penalty_sag': None}. Best is trial 0 with value: 1.0.
[I 2025-12-05 03:49:41,101] Trial 1 finished with value: 1.0 and parameters: {'C': 1.7519066126045513, 'class_weight': 'balanced', 'solver': 'saga', 'penalty_saga': 'l1'}. Best is trial 0 with value: 1.0.
[I 2025-12-05 03:49:42,281] Trial 2 finished with value: 1.0 and parameters: {'C': 0.04268818260828977, 'class_weight': 'balanced', 'solver': 'lbfgs', 'penalty_lbfgs': None}. Best is trial 0 with value: 1.0.
[I 2025-12-05 03:50:33,764] Trial 3 finished with value: 1.0 and parameters: {'C': 35.759967400668934, 'class_weight': None, 'solver': 'saga', 'penalty_saga': 'l2'}. Best is trial 0 with value: 1.0.
[I 2025-12-05 03:50:36,268] Trial 4 finished with value: 1.0 and parameters: {'C': 0.002622715531492202, 'class_weight': None, 'solver': 'sag', 'penalty_sag': 'l2'}. Best is trial 0 with val

[INFO] Best meta-learner params: {'C': 0.04009477966365624, 'class_weight': None, 'solver': 'sag', 'penalty_sag': None}
[INFO] Ensemble validation PR-AUC: 0.9043
[INFO] Ensemble validation ROC-AUC: 0.9785


In [37]:
evaluate_and_log(ensemble, X_meta_val, y_valid, experiment_name=EXPERIMENT_NAME, run_name="Ensemble_from_best_base_models", hp_search_history=hist_df)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9784540736883788), 'pr_auc': np.float64(0.904260950338986), 'precision': 0.9380697050938338, 'recall': 0.846600532300992, 'f1': 0.8899910975454661, 'custom_loss': np.float64(0.5387526670504962)}
[WARN] SHAP skipped: Model type not yet supported by TreeExplainer: <class 'sklearn.linear_model._logistic.LogisticRegression'>




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9784540736883788),
 'pr_auc': np.float64(0.904260950338986),
 'precision': 0.9380697050938338,
 'recall': 0.846600532300992,
 'f1': 0.8899910975454661,
 'custom_loss': np.float64(0.5387526670504962)}

### 3.2. NN

In [None]:
nn_model, base_mdls, X_val, y_val, X_test, y_test, params, hist, plots = train_neural_network_ensemble(
    base_models=base_models,
    X_tr=X_tr,
    y_tr=y_tr,
    X_va=X_va,
    y_va=y_va,
    X_test=X_test,
    y_test=y_test,
    n_trials=30,
    epochs=100
)

## 4. create submission on test

In [None]:
import mlflow.pyfunc

model = mlflow.pyfunc.load_model("mlruns/306298534761506969/models/m-13e4ef05880a4806b656985753497758/artifacts").get_raw_model()

In [None]:
expected_features = [
    'C1', 'R_emaildomain', 'C14', 'P_emaildomain', 'id_31', 'uid_D3_mean', 
    'DeviceInfo', 'C13', 'ProductCD_TransactionAmt_rel', 'card1', 
    'uid_M4_freq', 'uid_C13_mean', 'uid_C13_std', 'uid_D2_mean', 
    'uid_C13_rel', 'uid_TransactionAmt_mean', 'card2', 'uid_C11_mean', 
    'uid_C1_mean', 'addr1', 'uid_V_PCA_4_mean', 'uid_D11_mean', 
    'uid_V_PCA_13_mean', 'uid_C2_mean', 'uid_C6_mean', 'C5', 'D3', 
    'uid_D2_std', 'card6', 'C8', 'uid_D11_std', 'M5', 'uid_card2_mean', 
    'uid_C14_mean', 'card6_TransactionAmt_mean', 'uid_dist1_mean', 'C11', 
    'hour_of_day', 'uid_TransactionAmt_std', 'uid_V_PCA_12_mean', 'id_30', 
    'uid_M5_freq', 'uid_card5_mean', 'uid_V_PCA_2_mean', 'uid_V_PCA_9_std', 
    'uid_V_PCA_5_mean', 'uid_V_PCA_11_mean', 'uid_D15_mean', 
    'uid_V_PCA_3_std', 'M4', 'uid_V_PCA_9_mean', 'uid_D10_mean', 
    'uid_M6_freq', 'uid_D5_mean', 'C2', 'uid_V_PCA_13_std', 
    'uid_DecimalPlaces_mean', 'card5', 'uid_V_PCA_4_std', 'D8', 
    'uid_C9_mean', 'uid_C1_std', 'uid_D10_std', 'uid_C14_rel', 'M6', 
    'uid_V_PCA_1_mean', 'D2', 'uid_P_emaildomain_freq', 'uid_C5_mean', 
    'uid_V_PCA_10_mean', 'id_02', 'uid_V_PCA_10_std', 'uid_V_PCA_3_mean', 
    'id_20', 'M3', 'V_PCA_2', 'uid_V_PCA_6_mean', 'uid_V_PCA_11_std', 
    'uid_V_PCA_7_std', 'card4', 'uid_V_PCA_8_mean', 'V_PCA_4', 
    'card6_TransactionAmt_rel', 'V_PCA_9', 'uid_D4_mean', 'uid_C6_std', 
    'C6', 'uid_hour_of_day_TransactionAmt_mean', 'dist1', 
    'TransactionAmt_Scaled', 'uid_V_PCA_5_std', 'uid_V_PCA_1_std', 'D4', 
    'uid_C9_std', 'D15', 'uid_C1_rel', 'day_of_week', 'uid_V_PCA_7_mean', 
    'id_05', 'uid_day_of_week_TransactionAmt_mean', 'uid_D5_std', 
    'uid_D3_std', 'uid_id_02_mean', 'uid_D4_std', 'uid_DecimalPlaces_rel', 
    'uid_V_PCA_12_std', 'uid_V_PCA_2_std', 'uid_C11_std', 'D10', 
    'uid_V_PCA_6_std', 'uid_C6_rel', 'uid_day_of_week_TransactionAmt_rel'
]

df_aligned = test[expected_features]

In [None]:
train = train[expected_features]

In [None]:
preds = model.predict_proba(df_aligned)

In [None]:
test['isFraud']=preds[:,1]

In [None]:
test[['TransactionID', 'isFraud']].to_csv(data_folder + "submission.csv", index=False)

In [None]:
test = pd.read_csv(f'test_processed.csv')