# 0.Setup and data loading

In [69]:
import pandas as pd
import numpy as np
import os
import random

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

import importlib
import train_models_util
import feature_importance
import evaluate_models_util
importlib.reload(train_models_util)
importlib.reload(feature_importance)
importlib.reload(evaluate_models_util)
from train_models_util import (
    train_xgb_optuna, 
    train_catboost_optuna, 
    train_lgbm_optuna,
    plot_fp_fn_rate_over_time
)
from evaluate_models_util import evaluate_and_log
from feature_importance import get_top_features_shap

# set all random seeds for reproducibility
np.random.seed(42)
random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)

In [58]:
EXPERIMENT_NAME = "Fraud-detection-custom-thresholds_v7"

In [13]:
data_folder = 'ieee-fraud-detection-data/processed/'

data = pd.read_csv(f'{data_folder}train_processed.csv')

In [15]:
data = data.drop(columns=['TransactionDT','uid'], axis='columns')

In [16]:
cat_cols = np.array(data.select_dtypes(include=['object']).columns)
for c in cat_cols:
    if c in data.columns:
        if data[c].isnull().sum()>0:
            data[c].fillna('missing', inplace=True)
        data[c] = data[c].astype('category')

In [17]:
# attempt without suffle, since the data have time relation
n = len(data)
split_idx = int(n * 0.8) 

train = data.iloc[:split_idx]
test = data.iloc[split_idx:]

In [18]:
del data, cat_cols, c #,fraud_indices, non_fraud_indices,
import gc
gc.collect()

21950

## Some more features

In [None]:

# # 0. Sort train by uid + time to ensure correct ordering

# train = train.sort_values(["uid", "TransactionDT"])


# # 1. count_seen_before
# #    = how many times we saw this user before this row

# train["count_seen_before"] = train.groupby("uid").cumcount()
# # Example: 0,1,2,... for each user


# # 2. cumulative_fraud_before
# #    = total fraud count for this user BEFORE this transaction

# train["cumulative_fraud_before"] = (
#     train.groupby("uid")["isFraud"]
#          .cumsum()                # running fraud count
#          .shift(1, fill_value=0)  # shift so we don't include current fraud
# )


# # 3. fraud_percentage_before
# #    = cumulative_fraud_before / count_seen_before

# train["fraud_percentage_before"] = (
#     train["cumulative_fraud_before"] / 
#     train["count_seen_before"].replace(0, pd.NA)
# )

# # Fill NaN for first-time users (count_seen_before == 0 → pct = 0)
# train["fraud_percentage_before"] = train["fraud_percentage_before"].fillna(0)



# # 4. Build TRAIN HISTORY LOOKUP
# #    Used to compute safe features for validation/test.

# user_tx_count = train.groupby("uid")["count_seen_before"].max().to_dict()
# user_cum_fraud = train.groupby("uid")["cumulative_fraud_before"].max().to_dict()

# train = train.sort_values('TransactionDT')

Unnamed: 0,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,...,uid_P_emaildomain_3_freq,uid_R_emaildomain_1_ct,uid_R_emaildomain_1_freq,uid_R_emaildomain_2_ct,uid_R_emaildomain_2_freq,uid_R_emaildomain_3_ct,uid_R_emaildomain_3_freq,count_seen_before,cumulative_fraud_before,fraud_percentage_before
0,86400,68.50,W,13926,,150.0,discover,142.0,credit,315.0,...,,0.0,,0.0,,0.0,,0,0,0.0
1,86401,29.00,W,2755,404.0,150.0,mastercard,102.0,credit,325.0,...,,0.0,,0.0,,0.0,,0,0,0.0
2,86469,59.00,W,4663,490.0,150.0,visa,166.0,debit,330.0,...,,0.0,,0.0,,0.0,,0,0,0.0
3,86499,50.00,W,18132,567.0,150.0,mastercard,117.0,debit,476.0,...,,0.0,,0.0,,0.0,,0,0,0.0
4,86506,50.00,H,4497,514.0,150.0,mastercard,102.0,credit,420.0,...,,0.0,,0.0,,0.0,,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354319,8745488,226.00,W,15497,490.0,150.0,visa,226.0,debit,299.0,...,,0.0,,0.0,,0.0,,10,0,0.0
354320,8745617,59.00,W,4141,404.0,150.0,mastercard,102.0,credit,143.0,...,,0.0,,0.0,,0.0,,7,0,0.0
354321,8745766,50.00,W,12538,543.0,150.0,mastercard,224.0,debit,327.0,...,,0.0,,0.0,,0.0,,4,0,0.0
354322,8745770,159.95,W,12219,,150.0,visa,166.0,debit,143.0,...,,0.0,,0.0,,0.0,,0,0,0.0


In [None]:
# # 5. Function to compute history-based features for val/test
# #    Uses ONLY training history → no leakage
# def get_user_history(uid):
#     # Number of previous transactions in TRAIN
#     prev_seen = user_tx_count.get(uid, 0)
    
#     # Number of previous frauds in TRAIN
#     prev_fraud = user_cum_fraud.get(uid, 0)
    
#     # Fraud percentage history
#     fraud_pct = 0 if prev_seen == 0 else prev_fraud / prev_seen
    
#     return pd.Series([prev_seen, prev_fraud, fraud_pct])

In [None]:
# # 6. Apply to VALIDATION
# valid[[
#     "count_seen_before", 
#     "cumulative_fraud_before",
#     "fraud_percentage_before"
# ]] = valid.apply(lambda row: get_user_history(row["uid"]), axis=1)

# # 7. Apply to TEST
# test[[
#     "count_seen_before", 
#     "cumulative_fraud_before",
#     "fraud_percentage_before"
# ]] = test.apply(lambda row: get_user_history(row["uid"]), axis=1)


## 1. Full Features

In [19]:
# split to X and y
X_train = train.drop(columns='isFraud', axis='columns')
X_test = test.drop(columns='isFraud', axis='columns')

y_train = train['isFraud']
y_test = test['isFraud']

del train, test

### 1.1. XGBoost

In [20]:
model_xgboost_full, best_params_xgboost_full, hist_df, plot_paths = train_xgb_optuna(X_train, y_train)

[I 2025-12-10 20:23:15,876] A new study created in memory with name: xgboost_aucpr_optimization


[Optuna XGBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-10 20:34:12,636] Trial 0 finished with value: 0.5418969175306082 and parameters: {'learning_rate': 0.02008464906455329, 'max_depth': 7, 'subsample': 0.6200008408567017, 'colsample_bytree': 0.901637887545111, 'min_child_weight': 0.04523002904405033, 'gamma': 2.5843537499492313}. Best is trial 0 with value: 0.5418969175306082.
[I 2025-12-10 20:43:25,313] Trial 1 finished with value: 0.4960168035592985 and parameters: {'learning_rate': 0.02410028239617086, 'max_depth': 3, 'subsample': 0.8357876387438774, 'colsample_bytree': 0.5606294566704293, 'min_child_weight': 0.34658968730683914, 'gamma': 1.604750142653607}. Best is trial 0 with value: 0.5418969175306082.
[I 2025-12-10 20:53:16,103] Trial 2 finished with value: 0.5191261091250304 and parameters: {'learning_rate': 0.02635393575225079, 'max_depth': 4, 'subsample': 0.9533776467343262, 'colsample_bytree': 0.6757248726040863, 'min_child_weight': 0.001536459239612567, 'gamma': 2.05674534462281}. Best is trial 0 with value: 0.5418

[INFO] Best XGBoost params: {'learning_rate': 0.029742596648851545, 'max_depth': 8, 'subsample': 0.8469353033128059, 'colsample_bytree': 0.850902041776415, 'min_child_weight': 0.023357406099895485, 'gamma': 0.9451189694511664}
[0]	validation_0-aucpr:0.34212
[100]	validation_0-aucpr:0.54983
[200]	validation_0-aucpr:0.60059
[300]	validation_0-aucpr:0.61733
[400]	validation_0-aucpr:0.62851
[500]	validation_0-aucpr:0.63590
[600]	validation_0-aucpr:0.64362
[700]	validation_0-aucpr:0.64710
[800]	validation_0-aucpr:0.64898
[900]	validation_0-aucpr:0.64894
[934]	validation_0-aucpr:0.64942


In [74]:
evaluate_and_log(model_xgboost_full, X_test, y_test, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

2025/12/12 20:04:25 INFO mlflow.tracking.fluent: Experiment with name 'Fraud-detection-custom-thresholds_v7' does not exist. Creating a new experiment.


[INFO] Logged metrics: {'roc_auc': np.float64(0.8807625525871076), 'pr_auc': np.float64(0.5165338821447129), 'precision': 0.496701685805033, 'recall': 0.500246062992126, 'f1': 0.49846757386293983, 'custom_loss': np.float64(1.7370542215599283)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.8807625525871076),
 'pr_auc': np.float64(0.5165338821447129),
 'precision': 0.496701685805033,
 'recall': 0.500246062992126,
 'f1': 0.49846757386293983,
 'custom_loss': np.float64(1.7370542215599283)}

### 1.2. LightGBM

In [22]:
model_lgbm_full, best_params_lgbm_full, hist_df, plot_paths = train_lgbm_optuna(X_train, y_train)

[I 2025-12-11 01:50:33,421] A new study created in memory with name: lgbm_aucpr_optimization


[Optuna LightGBM Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-11 01:52:58,696] Trial 0 finished with value: 0.550249346495041 and parameters: {'num_leaves': 226, 'max_depth': 11, 'learning_rate': 0.01100451560301804, 'min_child_samples': 32, 'reg_alpha': 0.04727855753858912, 'reg_lambda': 0.4037233193169388, 'colsample_bytree': 0.8306871988562593, 'subsample': 0.9040011218465238}. Best is trial 0 with value: 0.550249346495041.
[I 2025-12-11 01:55:50,563] Trial 1 finished with value: 0.574129521373975 and parameters: {'num_leaves': 292, 'max_depth': 18, 'learning_rate': 0.017417947683587065, 'min_child_samples': 58, 'reg_alpha': 0.48422615012501025, 'reg_lambda': 0.08952761857940922, 'colsample_bytree': 0.9361185547324374, 'subsample': 0.8277761692794536}. Best is trial 1 with value: 0.574129521373975.
[I 2025-12-11 01:59:34,186] Trial 2 finished with value: 0.608777077613429 and parameters: {'num_leaves': 381, 'max_depth': 14, 'learning_rate': 0.023038604421880594, 'min_child_samples': 79, 'reg_alpha': 0.46461288277041835, 'reg_lambda'

[INFO] Best LightGBM params: {'num_leaves': 447, 'max_depth': 20, 'learning_rate': 0.024240466897141592, 'min_child_samples': 84, 'reg_alpha': 0.12397436518223405, 'reg_lambda': 0.18265070503065436, 'colsample_bytree': 0.8789896850185078, 'subsample': 0.9710956914825437}
[50]	valid_0's binary_logloss: 0.122919
[100]	valid_0's binary_logloss: 0.116016
[150]	valid_0's binary_logloss: 0.106728
[200]	valid_0's binary_logloss: 0.0997755
[250]	valid_0's binary_logloss: 0.0938943
[300]	valid_0's binary_logloss: 0.088788
[350]	valid_0's binary_logloss: 0.0842811
[400]	valid_0's binary_logloss: 0.080808
[450]	valid_0's binary_logloss: 0.078805
[500]	valid_0's binary_logloss: 0.078362
[550]	valid_0's binary_logloss: 0.0784578
[600]	valid_0's binary_logloss: 0.0791059


In [75]:
evaluate_and_log(model_lgbm_full,  X_test, y_test, experiment_name=EXPERIMENT_NAME, run_name="LightGBM_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

[INFO] Logged metrics: {'roc_auc': np.float64(0.907969739611483), 'pr_auc': np.float64(0.5812353671424075), 'precision': 0.6744031830238727, 'recall': 0.500492125984252, 'f1': 0.5745762711864407, 'custom_loss': np.float64(1.7270802993870018)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.907969739611483),
 'pr_auc': np.float64(0.5812353671424075),
 'precision': 0.6744031830238727,
 'recall': 0.500492125984252,
 'f1': 0.5745762711864407,
 'custom_loss': np.float64(1.7270802993870018)}

### 1.3. CatBoost

In [24]:
model_catboost_full, best_params_catboost_full, hist_df, plot_paths = train_catboost_optuna(X_train, y_train)

[I 2025-12-11 04:16:55,284] A new study created in memory with name: catboost_aucpr_optimization


[Optuna CatBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-11 04:31:24,201] Trial 0 finished with value: 0.5287053124530988 and parameters: {'learning_rate': 0.01900374826476142, 'depth': 6, 'l2_leaf_reg': 9.904870924520075, 'subsample': 0.7066417028196362, 'border_count': 111}. Best is trial 0 with value: 0.5287053124530988.
[I 2025-12-11 04:51:32,869] Trial 1 finished with value: 0.5421848547393376 and parameters: {'learning_rate': 0.01424323267912269, 'depth': 8, 'l2_leaf_reg': 9.912432061918471, 'subsample': 0.7766886006649043, 'border_count': 38}. Best is trial 1 with value: 0.5421848547393376.
[I 2025-12-11 05:08:38,826] Trial 2 finished with value: 0.5511386737465074 and parameters: {'learning_rate': 0.02567731813242579, 'depth': 7, 'l2_leaf_reg': 8.678681159414351, 'subsample': 0.727595773473682, 'border_count': 114}. Best is trial 2 with value: 0.5511386737465074.
[I 2025-12-11 05:26:51,595] Trial 3 finished with value: 0.5504744441433648 and parameters: {'learning_rate': 0.020145512675280988, 'depth': 7, 'l2_leaf_reg': 8.4

[INFO] Best CatBoost params: {'learning_rate': 0.026880459534529604, 'depth': 8, 'l2_leaf_reg': 7.314549860868977, 'subsample': 0.94911421218717, 'border_count': 108}
0:	learn: 0.8642463	test: 0.8767199	best: 0.8767199 (0)	total: 2.17s	remaining: 3h 43s
100:	learn: 0.9309533	test: 0.9256100	best: 0.9256100 (100)	total: 3m 46s	remaining: 3h 3m 24s
200:	learn: 0.9482742	test: 0.9362463	best: 0.9362463 (200)	total: 7m 20s	remaining: 2h 55m 24s
300:	learn: 0.9588261	test: 0.9425871	best: 0.9425871 (300)	total: 10m 54s	remaining: 2h 50m 17s
400:	learn: 0.9656646	test: 0.9464478	best: 0.9464478 (400)	total: 14m 25s	remaining: 2h 45m 29s
500:	learn: 0.9721868	test: 0.9503005	best: 0.9503005 (500)	total: 17m 59s	remaining: 2h 41m 30s
600:	learn: 0.9778644	test: 0.9519834	best: 0.9520888 (593)	total: 21m 32s	remaining: 2h 37m 37s
700:	learn: 0.9821839	test: 0.9535039	best: 0.9535196 (699)	total: 25m 5s	remaining: 2h 33m 54s
800:	learn: 0.9853483	test: 0.9542686	best: 0.9542689 (796)	total: 28m 

In [76]:
evaluate_and_log(model_catboost_full,  X_test, y_test, experiment_name=EXPERIMENT_NAME, run_name="CATboost_Optuna_fullfeatures", hp_search_history=hist_df, hp_search_plots= plot_paths)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9163916125272273), 'pr_auc': np.float64(0.5282549675325425), 'precision': 0.2752046130952381, 'recall': 0.7281003937007874, 'f1': 0.3994330453563715, 'custom_loss': np.float64(1.001566362989806)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9163916125272273),
 'pr_auc': np.float64(0.5282549675325425),
 'precision': 0.2752046130952381,
 'recall': 0.7281003937007874,
 'f1': 0.3994330453563715,
 'custom_loss': np.float64(1.001566362989806)}

## 2. Reduced Features

In [26]:
# ---- 1. Compute SHAP importance for each model ----
df_xgb = get_top_features_shap(model_xgboost_full, X_train, y_train).rename(
    columns={"importance": "importance_xgb"}
)
df_lgbm = get_top_features_shap(model_lgbm_full, X_train, y_train).rename(
    columns={"importance": "importance_lgbm"}
)
df_cat = get_top_features_shap(model_catboost_full, X_train, y_train).rename(
    columns={"importance": "importance_cat"}
)

# ---- 2. Merge all importance tables ----
df = (
    df_xgb.merge(df_lgbm, on="feature")
          .merge(df_cat, on="feature")
)

# ---- 3. Rank features within each model (1 = most important) ----
df["rank_xgb"] = df["importance_xgb"].rank(method="min", ascending=False)
df["rank_lgbm"] = df["importance_lgbm"].rank(method="min", ascending=False)
df["rank_cat"] = df["importance_cat"].rank(method="min", ascending=False)

# ---- 4. Determine the top 30% threshold ----
n_features = len(df)
top_30_cutoff = int(n_features * 0.30)

# ---- 5. Check if a feature appears in top 30% in each model ----
df["in_top_xgb"]  = df["rank_xgb"]  <= top_30_cutoff
df["in_top_lgbm"] = df["rank_lgbm"] <= top_30_cutoff
df["in_top_cat"]  = df["rank_cat"]  <= top_30_cutoff

# ---- 6. Apply the rule: “Top 30% for at least 2 models” ----
df["top_count"] = (
    df["in_top_xgb"].astype(int)
  + df["in_top_lgbm"].astype(int)
  + df["in_top_cat"].astype(int)
)

df_selected = df[df["top_count"] >= 2]  # final selected features


In [27]:
# ---- 7. Sort by mean importance (optional but nice) ----
df_selected["importance_mean"] = df_selected[["importance_xgb","importance_lgbm","importance_cat"]].mean(axis=1)
df_selected = df_selected.sort_values("importance_mean", ascending=False)

# ---- 8. Save to CSV ----
df.to_csv(f"{data_folder}/feature_importances_shap_fullmodels.csv", index=False)
df_selected.to_csv(f"{data_folder}/selected_features_shap.csv", index=False)

# ---- 9. List of selected feature names ----
selected_feature_list = df_selected["feature"].tolist()

In [28]:
del df_xgb, df_lgbm, df_cat, df_selected, df

In [29]:
X_train_reduced = X_train[selected_feature_list]
X_test_reduced = X_test[selected_feature_list]

In [30]:
X_train_reduced.shape, X_test_reduced.shape

((472432, 221), (118108, 221))

### 2.1. XGBoost 

In [35]:
model_xgboost_reduced, best_params_xgboost_reduced, hist_df, plot_paths = train_xgb_optuna(X_train_reduced, y_train)

[I 2025-12-11 15:00:16,610] A new study created in memory with name: xgboost_aucpr_optimization


[Optuna XGBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-11 15:03:28,039] Trial 0 finished with value: 0.5244601983248465 and parameters: {'learning_rate': 0.017994541804272207, 'max_depth': 7, 'subsample': 0.7337572314337377, 'colsample_bytree': 0.8417596492512893, 'min_child_weight': 0.41951568270366846, 'gamma': 1.8941924359571067}. Best is trial 0 with value: 0.5244601983248465.
[I 2025-12-11 15:06:21,471] Trial 1 finished with value: 0.48068645567072243 and parameters: {'learning_rate': 0.012252498129860038, 'max_depth': 4, 'subsample': 0.7689312832226267, 'colsample_bytree': 0.7834723911952979, 'min_child_weight': 0.0012403217423212734, 'gamma': 0.3563880447072021}. Best is trial 0 with value: 0.5244601983248465.
[I 2025-12-11 15:09:44,129] Trial 2 finished with value: 0.5633477566709145 and parameters: {'learning_rate': 0.024813671267022053, 'max_depth': 7, 'subsample': 0.7708397375547158, 'colsample_bytree': 0.5275431295827882, 'min_child_weight': 0.0028988818485861305, 'gamma': 0.3461782223692256}. Best is trial 2 with va

[INFO] Best XGBoost params: {'learning_rate': 0.02997644798218643, 'max_depth': 8, 'subsample': 0.9071771096154924, 'colsample_bytree': 0.651104590726605, 'min_child_weight': 0.007770956127743196, 'gamma': 2.8226394772597523}
[0]	validation_0-aucpr:0.32120
[100]	validation_0-aucpr:0.55662
[200]	validation_0-aucpr:0.59386
[300]	validation_0-aucpr:0.61023
[400]	validation_0-aucpr:0.62326
[500]	validation_0-aucpr:0.63135
[600]	validation_0-aucpr:0.63731
[700]	validation_0-aucpr:0.64283
[800]	validation_0-aucpr:0.64550
[900]	validation_0-aucpr:0.64896
[1000]	validation_0-aucpr:0.64942
[1100]	validation_0-aucpr:0.65250
[1200]	validation_0-aucpr:0.65463
[1300]	validation_0-aucpr:0.65691
[1400]	validation_0-aucpr:0.65816
[1500]	validation_0-aucpr:0.66057
[1600]	validation_0-aucpr:0.66260
[1700]	validation_0-aucpr:0.66406
[1800]	validation_0-aucpr:0.66533
[1900]	validation_0-aucpr:0.66765
[2000]	validation_0-aucpr:0.66857
[2100]	validation_0-aucpr:0.66955
[2200]	validation_0-aucpr:0.67014
[230



[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.8857356965863707),
 'pr_auc': np.float64(0.5368939873917031),
 'precision': 0.7011115685467271,
 'recall': 0.41904527559055116,
 'f1': 0.5245649160634529,
 'custom_loss': np.float64(2.0051647644528736)}

In [77]:
evaluate_and_log(model_xgboost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_xgboost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.10)
evaluate_and_log(model_xgboost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="XGBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.02)

[INFO] Logged metrics: {'roc_auc': np.float64(0.8857356965863707), 'pr_auc': np.float64(0.5368939873917031), 'precision': 0.7011115685467271, 'recall': 0.41904527559055116, 'f1': 0.5245649160634529, 'custom_loss': np.float64(2.0051647644528736)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.8857356965863707), 'pr_auc': np.float64(0.5368939873917031), 'precision': 0.374085033483881, 'recall': 0.5910433070866141, 'f1': 0.4581783500238436, 'custom_loss': np.float64(1.4412148203339317)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.8857356965863707), 'pr_auc': np.float64(0.5368939873917031), 'precision': 0.18553921568627452, 'recall': 0.7450787401574803, 'f1': 0.29709576138147564, 'custom_loss': np.float64(0.9897043384021404)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.8857356965863707),
 'pr_auc': np.float64(0.5368939873917031),
 'precision': 0.18553921568627452,
 'recall': 0.7450787401574803,
 'f1': 0.29709576138147564,
 'custom_loss': np.float64(0.9897043384021404)}

### 2.2. LightGBM

In [36]:
model_lgmb_reduced, best_params_lgmb_reduced, hist_df, plot_paths = train_lgbm_optuna(X_train_reduced, y_train)

[I 2025-12-11 16:50:07,622] A new study created in memory with name: lgbm_aucpr_optimization


[Optuna LightGBM Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-11 16:51:03,669] Trial 0 finished with value: 0.5634378125206297 and parameters: {'num_leaves': 264, 'max_depth': 16, 'learning_rate': 0.018293192691062665, 'min_child_samples': 30, 'reg_alpha': 0.48079858062459807, 'reg_lambda': 0.26484858878890344, 'colsample_bytree': 0.9232861666339274, 'subsample': 0.9422738362625773}. Best is trial 0 with value: 0.5634378125206297.
[I 2025-12-11 16:52:36,820] Trial 1 finished with value: 0.5991882894882278 and parameters: {'num_leaves': 332, 'max_depth': 19, 'learning_rate': 0.027520189729079186, 'min_child_samples': 17, 'reg_alpha': 0.46525859511456985, 'reg_lambda': 0.19942834493664757, 'colsample_bytree': 0.9874646372937979, 'subsample': 0.9001351298079069}. Best is trial 1 with value: 0.5991882894882278.
[I 2025-12-11 16:53:26,954] Trial 2 finished with value: 0.5470175631074912 and parameters: {'num_leaves': 189, 'max_depth': 14, 'learning_rate': 0.0205651128068471, 'min_child_samples': 37, 'reg_alpha': 0.35912106922397735, 'reg_la

[INFO] Best LightGBM params: {'num_leaves': 508, 'max_depth': 19, 'learning_rate': 0.019454075552920863, 'min_child_samples': 92, 'reg_alpha': 0.4092744648236445, 'reg_lambda': 0.05973388416757987, 'colsample_bytree': 0.8302640766705726, 'subsample': 0.8645191927839543}
[50]	valid_0's binary_logloss: 0.121277
[100]	valid_0's binary_logloss: 0.116532
[150]	valid_0's binary_logloss: 0.109386
[200]	valid_0's binary_logloss: 0.102873
[250]	valid_0's binary_logloss: 0.0981379
[300]	valid_0's binary_logloss: 0.0941949
[350]	valid_0's binary_logloss: 0.0897915
[400]	valid_0's binary_logloss: 0.0856726
[450]	valid_0's binary_logloss: 0.0822549
[500]	valid_0's binary_logloss: 0.0801475
[550]	valid_0's binary_logloss: 0.0786734
[600]	valid_0's binary_logloss: 0.0779737
[650]	valid_0's binary_logloss: 0.0779971
[700]	valid_0's binary_logloss: 0.0784638


In [78]:
evaluate_and_log(model_lgmb_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_lgmb_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.1)
evaluate_and_log(model_lgmb_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="LGBM_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.02)

[INFO] Logged metrics: {'roc_auc': np.float64(0.9105656023390061), 'pr_auc': np.float64(0.5839596356146002), 'precision': 0.6734492943879226, 'recall': 0.5049212598425197, 'f1': 0.5771340177190268, 'custom_loss': np.float64(1.7119500795881735)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.9105656023390061), 'pr_auc': np.float64(0.5839596356146002), 'precision': 0.32843313015417713, 'recall': 0.6761811023622047, 'f1': 0.4421205051886413, 'custom_loss': np.float64(1.1618095302604396)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.9105656023390061), 'pr_auc': np.float64(0.5839596356146002), 'precision': 0.14773780622379165, 'recall': 0.8235728346456693, 'f1': 0.25053332834312664, 'custom_loss': np.float64(0.7705489890608596)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.9105656023390061),
 'pr_auc': np.float64(0.5839596356146002),
 'precision': 0.14773780622379165,
 'recall': 0.8235728346456693,
 'f1': 0.25053332834312664,
 'custom_loss': np.float64(0.7705489890608596)}

### 2.3. CatBoost

In [42]:
model_catoost_reduced, best_params_catoost_reduced, hist_df, plot_paths = train_catboost_optuna(X_train_reduced, y_train)

[I 2025-12-12 09:20:56,825] A new study created in memory with name: catboost_aucpr_optimization


[Optuna CatBoost Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-12 09:27:27,488] Trial 0 finished with value: 0.512706420300562 and parameters: {'learning_rate': 0.017233734852484462, 'depth': 5, 'l2_leaf_reg': 2.787678698438606, 'subsample': 0.860353378482386, 'border_count': 47}. Best is trial 0 with value: 0.512706420300562.
[I 2025-12-12 09:33:12,557] Trial 1 finished with value: 0.4909319137723825 and parameters: {'learning_rate': 0.018500663338939592, 'depth': 4, 'l2_leaf_reg': 9.93770217164884, 'subsample': 0.7056786989839043, 'border_count': 56}. Best is trial 0 with value: 0.512706420300562.
[I 2025-12-12 09:41:20,444] Trial 2 finished with value: 0.5185466009753861 and parameters: {'learning_rate': 0.014317546114679659, 'depth': 6, 'l2_leaf_reg': 4.853926009751866, 'subsample': 0.767679794102856, 'border_count': 37}. Best is trial 2 with value: 0.5185466009753861.
[I 2025-12-12 09:48:34,531] Trial 3 finished with value: 0.5238781392220919 and parameters: {'learning_rate': 0.02190564522433922, 'depth': 5, 'l2_leaf_reg': 1.297810

[INFO] Best CatBoost params: {'learning_rate': 0.029941763415569072, 'depth': 8, 'l2_leaf_reg': 3.624977955604138, 'subsample': 0.997877946170313, 'border_count': 113}
0:	learn: 0.8638560	test: 0.8793277	best: 0.8793277 (0)	total: 1.06s	remaining: 1h 28m 3s
100:	learn: 0.9356353	test: 0.9279100	best: 0.9279100 (100)	total: 1m 50s	remaining: 1h 29m 39s
200:	learn: 0.9545904	test: 0.9391483	best: 0.9392168 (199)	total: 3m 40s	remaining: 1h 27m 39s
300:	learn: 0.9653813	test: 0.9451395	best: 0.9451395 (300)	total: 5m 28s	remaining: 1h 25m 21s
400:	learn: 0.9730892	test: 0.9485074	best: 0.9485074 (400)	total: 7m 16s	remaining: 1h 23m 22s
500:	learn: 0.9790718	test: 0.9509152	best: 0.9509160 (499)	total: 9m 4s	remaining: 1h 21m 31s
600:	learn: 0.9838914	test: 0.9533165	best: 0.9533165 (600)	total: 10m 53s	remaining: 1h 19m 41s
700:	learn: 0.9873886	test: 0.9542882	best: 0.9544693 (675)	total: 12m 41s	remaining: 1h 17m 50s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.



[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.912118695355391),
 'pr_auc': np.float64(0.5265946509511543),
 'precision': 0.2813680844879372,
 'recall': 0.7145669291338582,
 'f1': 0.40375391032325336,
 'custom_loss': np.float64(1.0449503843939445)}

In [79]:
evaluate_and_log(model_catoost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths)
evaluate_and_log(model_catoost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.1)
evaluate_and_log(model_catoost_reduced, X_test_reduced, y_test, experiment_name=EXPERIMENT_NAME, run_name="CATBoost_Optuna_Reduced", hp_search_history=hist_df, hp_search_plots= plot_paths, prediction_threshold=0.02)

[INFO] Logged metrics: {'roc_auc': np.float64(0.912118695355391), 'pr_auc': np.float64(0.5265946509511543), 'precision': 0.2813680844879372, 'recall': 0.7145669291338582, 'f1': 0.40375391032325336, 'custom_loss': np.float64(1.0449503843939445)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.912118695355391), 'pr_auc': np.float64(0.5265946509511543), 'precision': 0.06814946619217081, 'recall': 0.9424212598425197, 'f1': 0.12710739413248373, 'custom_loss': np.float64(0.6415314796626816)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.
[INFO] Logged metrics: {'roc_auc': np.float64(0.912118695355391), 'pr_auc': np.float64(0.5265946509511543), 'precision': 0.03768666443227945, 'recall': 0.9960629921259843, 'f1': 0.07262549786501131, 'custom_loss': np.float64(0.888712026281031)}
[INFO] Logged SHAP summary plot.




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.912118695355391),
 'pr_auc': np.float64(0.5265946509511543),
 'precision': 0.03768666443227945,
 'recall': 0.9960629921259843,
 'f1': 0.07262549786501131,
 'custom_loss': np.float64(0.888712026281031)}

## 3. Ensemble

In [23]:
# Build train meta features
X_meta = pd.DataFrame(np.column_stack([m.predict_proba(X_train_reduced)[:, 1] for m in [model_xgboost_reduced, model_lgmb_reduced, model_catoost_reduced]]))
X_meta_val = pd.DataFrame(np.column_stack([m.predict_proba(X_valid_reduced)[:, 1] for m in  [model_xgboost_reduced, model_lgmb_reduced, model_catoost_reduced]]))

### 3.1. Logistic

In [24]:
ensemble, best_params_log, hist_df, plot_paths = train_ensemble(
    X_meta, y_train, X_meta_val, y_valid
)

[I 2025-12-07 01:53:06,192] A new study created in memory with name: ensemble_meta_learner_pr_auc_optimization


[INFO] Generating meta-features...
[INFO] Optimizing meta-learner with Optuna...


[Optuna Ensemble Meta-Learner Tuning]:   0%|          | 0/30 [00:00<?, ?trial/s]

[I 2025-12-07 01:53:07,195] Trial 0 finished with value: 1.0 and parameters: {'C': 0.046716728199023554, 'class_weight': 'balanced', 'solver': 'liblinear', 'penalty_liblinear': 'l2'}. Best is trial 0 with value: 1.0.
[I 2025-12-07 01:53:08,054] Trial 1 finished with value: 1.0 and parameters: {'C': 4.337626067012835, 'class_weight': None, 'solver': 'lbfgs', 'penalty_lbfgs': 'l2'}. Best is trial 0 with value: 1.0.
[I 2025-12-07 01:53:10,020] Trial 2 finished with value: 0.9999999702398065 and parameters: {'C': 0.0013868610323151099, 'class_weight': None, 'solver': 'sag', 'penalty_sag': 'l2'}. Best is trial 0 with value: 1.0.
[I 2025-12-07 01:53:11,438] Trial 3 finished with value: 1.0 and parameters: {'C': 0.07981434458108105, 'class_weight': 'balanced', 'solver': 'newton-cg', 'penalty_newton-cg': 'l2'}. Best is trial 0 with value: 1.0.
[I 2025-12-07 01:53:12,957] Trial 4 finished with value: 1.0 and parameters: {'C': 0.569471423184438, 'class_weight': 'balanced', 'solver': 'liblinear',

[INFO] Best meta-learner params: {'C': 0.046716728199023554, 'class_weight': 'balanced', 'solver': 'liblinear', 'penalty_liblinear': 'l2'}
[INFO] Ensemble validation PR-AUC: 0.6603
[INFO] Ensemble validation ROC-AUC: 0.9385


In [25]:
evaluate_and_log(ensemble, X_meta_val, y_valid, experiment_name=EXPERIMENT_NAME, run_name="Ensemble_from_best_base_models", hp_search_history=hist_df)

[INFO] Logged metrics: {'roc_auc': np.float64(0.938545198106298), 'pr_auc': np.float64(0.6602538927983681), 'precision': 0.77322206095791, 'recall': 0.5243602362204725, 'f1': 0.6249266862170088, 'custom_loss': np.float64(1.641929420530362)}
[WARN] SHAP skipped: Model type not yet supported by TreeExplainer: <class 'sklearn.linear_model._logistic.LogisticRegression'>




[INFO] Evaluation complete and logged.


{'roc_auc': np.float64(0.938545198106298),
 'pr_auc': np.float64(0.6602538927983681),
 'precision': 0.77322206095791,
 'recall': 0.5243602362204725,
 'f1': 0.6249266862170088,
 'custom_loss': np.float64(1.641929420530362)}

## 4. create submission on test

In [None]:
import mlflow.pyfunc

model = mlflow.pyfunc.load_model("mlruns/306298534761506969/models/m-13e4ef05880a4806b656985753497758/artifacts").get_raw_model()

In [None]:
expected_features = [
    'C1', 'R_emaildomain', 'C14', 'P_emaildomain', 'id_31', 'uid_D3_mean', 
    'DeviceInfo', 'C13', 'ProductCD_TransactionAmt_rel', 'card1', 
    'uid_M4_freq', 'uid_C13_mean', 'uid_C13_std', 'uid_D2_mean', 
    'uid_C13_rel', 'uid_TransactionAmt_mean', 'card2', 'uid_C11_mean', 
    'uid_C1_mean', 'addr1', 'uid_V_PCA_4_mean', 'uid_D11_mean', 
    'uid_V_PCA_13_mean', 'uid_C2_mean', 'uid_C6_mean', 'C5', 'D3', 
    'uid_D2_std', 'card6', 'C8', 'uid_D11_std', 'M5', 'uid_card2_mean', 
    'uid_C14_mean', 'card6_TransactionAmt_mean', 'uid_dist1_mean', 'C11', 
    'hour_of_day', 'uid_TransactionAmt_std', 'uid_V_PCA_12_mean', 'id_30', 
    'uid_M5_freq', 'uid_card5_mean', 'uid_V_PCA_2_mean', 'uid_V_PCA_9_std', 
    'uid_V_PCA_5_mean', 'uid_V_PCA_11_mean', 'uid_D15_mean', 
    'uid_V_PCA_3_std', 'M4', 'uid_V_PCA_9_mean', 'uid_D10_mean', 
    'uid_M6_freq', 'uid_D5_mean', 'C2', 'uid_V_PCA_13_std', 
    'uid_DecimalPlaces_mean', 'card5', 'uid_V_PCA_4_std', 'D8', 
    'uid_C9_mean', 'uid_C1_std', 'uid_D10_std', 'uid_C14_rel', 'M6', 
    'uid_V_PCA_1_mean', 'D2', 'uid_P_emaildomain_freq', 'uid_C5_mean', 
    'uid_V_PCA_10_mean', 'id_02', 'uid_V_PCA_10_std', 'uid_V_PCA_3_mean', 
    'id_20', 'M3', 'V_PCA_2', 'uid_V_PCA_6_mean', 'uid_V_PCA_11_std', 
    'uid_V_PCA_7_std', 'card4', 'uid_V_PCA_8_mean', 'V_PCA_4', 
    'card6_TransactionAmt_rel', 'V_PCA_9', 'uid_D4_mean', 'uid_C6_std', 
    'C6', 'uid_hour_of_day_TransactionAmt_mean', 'dist1', 
    'TransactionAmt_Scaled', 'uid_V_PCA_5_std', 'uid_V_PCA_1_std', 'D4', 
    'uid_C9_std', 'D15', 'uid_C1_rel', 'day_of_week', 'uid_V_PCA_7_mean', 
    'id_05', 'uid_day_of_week_TransactionAmt_mean', 'uid_D5_std', 
    'uid_D3_std', 'uid_id_02_mean', 'uid_D4_std', 'uid_DecimalPlaces_rel', 
    'uid_V_PCA_12_std', 'uid_V_PCA_2_std', 'uid_C11_std', 'D10', 
    'uid_V_PCA_6_std', 'uid_C6_rel', 'uid_day_of_week_TransactionAmt_rel'
]

df_aligned = test[expected_features]

In [None]:
train = train[expected_features]

In [None]:
preds = model.predict_proba(df_aligned)

In [None]:
test['isFraud']=preds[:,1]

In [None]:
test[['TransactionID', 'isFraud']].to_csv(data_folder + "submission.csv", index=False)

In [None]:
test = pd.read_csv(f'test_processed.csv')