In [1]:
import pandas as pd
import numpy as np
import os
import sys
import joblib
import re
import gc
from pathlib import Path
import yaml
import warnings
warnings.filterwarnings('ignore')
sys.path.append(os.path.abspath('..'))
from sentinel.evaluation import SentinelEvaluator
from sentinel.inference import SentinelInference
from sentinel.modeling import SentinelTrainer
%load_ext autoreload
%autoreload 2

In [2]:
base_path = Path('../')

In [3]:
# # uncomment this cell and run it to download the dataset
# from scripts.download_data import download_dataset
# download_dataset(output_dir= base_path / 'data/raw', force=True)

#### **Train models**

In [4]:
df = pd.read_csv(base_path / 'data/raw/train_raw.csv')

In [None]:
gc.collect()
df.head(12).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
TransactionID,2987000,2987001,2987002,2987003,2987004,2987005,2987006,2987007,2987008,2987009,2987010,2987011,2987012,2987013,2987014
isFraud,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
TransactionDT,86400,86401,86469,86499,86506,86510,86522,86529,86535,86536,86549,86555,86564,86585,86596
TransactionAmt,68.5,29.0,59.0,50.0,50.0,49.0,159.0,422.5,15.0,117.0,75.887,16.495,50.0,40.0,10.5
ProductCD,W,W,W,W,H,W,W,W,H,W,C,C,W,W,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
id_36,,,,,F,,,,F,,F,F,,,
id_37,,,,,T,,,,F,,T,T,,,
id_38,,,,,T,,,,T,,T,T,,,
DeviceType,,,,,mobile,,,,mobile,,desktop,desktop,,,


In [6]:
trainer = SentinelTrainer(base_path=base_path)

In [7]:
init_features = df.columns.tolist()
init_obj_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
cat_cols = [

    'ProductCD', 'card4', 'card6', 'DeviceType',
    'os_type', 'browser_type', 'device_vendor',
    'addr1', 'card1', 'card2',
    'P_emaildomain_vendor_id', 
    'P_emaildomain_suffix_id',
    
    'device_info_combo', 'card_email_combo', 
    'product_network_combo', 'card1_addr1_combo', 
    'os_browser_combo'
] + init_obj_cols

cat_cols = list(set(cat_cols))

In [8]:
data_split_config = {'train_size': 0.85,
 'nan_thresh': 0.95,
 'corr_thresh': 0.97, 
 'cat_cols': cat_cols}
trainer.prepare_data(df, **data_split_config)


üöÄ Starting Data Preparation (Train Split=0.85)
   Train size: 501,959 (85.0%)
   Test size:  88,581 (15.0%)
 
  ‚öôÔ∏è  Running SentinelPreprocessing...
--- Fitting Sentinel Preprocessor ---
Dropped 9 columns
--- Sentinel Preprocessor Fitted ---
--- Transforming 501959 rows ---
--- Transforming 88581 rows ---
   üõ†Ô∏è  Running SentinelFeatureEngineering...
--- Fitting Sentinel v7.0 on 501,959 rows ---
   > PCA Group_0: 11 cols -> 2 comps (97% var)
   > PCA Group_1: 23 cols -> 6 comps (97% var)
   > PCA Group_2: 18 cols -> 5 comps (97% var)
   > PCA Group_3: 22 cols -> 7 comps (97% var)
   > PCA Group_4: 20 cols -> 7 comps (97% var)
   > PCA Group_5: 43 cols -> 10 comps (97% var)
   > PCA Group_6: 29 cols -> 7 comps (97% var)
   > PCA Group_7: 31 cols -> 8 comps (97% var)
   > PCA Group_8: 19 cols -> 7 comps (97% var)
   > PCA Group_9: 46 cols -> 8 comps (97% var)
   > PCA Group_10: 16 cols -> 6 comps (97% var)
   > PCA Group_11: 32 cols -> 10 comps (97% var)
   > PCA Group_12: 11

In [9]:
X_train_eng, y_train, X_test_eng, y_test = trainer.X_train, trainer.y_train, trainer.X_test, trainer.y_test

In [10]:
X_train_eng.head(15).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
TransactionID,2987000,2987001,2987002,2987003,2987004,2987005,2987006,2987007,2987008,2987009,2987010,2987011,2987012,2987013,2987014
TransactionDT,86400,86401,86469,86499,86506,86510,86522,86529,86535,86536,86549,86555,86564,86585,86596
TransactionAmt,68.5,29.0,59.0,50.0,50.0,49.0,159.0,422.5,15.0,117.0,75.887001,16.495001,50.0,40.0,10.5
ProductCD,1,1,1,1,2,1,1,1,2,1,3,3,1,1,1
card1,13926,2755,4663,18132,4497,5937,12308,12695,2803,17399,16496,4461,3786,12866,11839
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PCA_Group_13_2,0.109226,0.109226,0.109226,0.109226,-0.816481,0.109226,0.109226,0.109226,-0.816481,0.109226,0.109226,0.109226,0.109226,0.109226,0.109226
PCA_Group_13_3,0.107902,0.107902,0.107902,0.107902,-0.839058,0.107902,0.107902,0.107902,-0.839058,0.107902,0.107902,0.107902,0.107902,0.107902,0.107902
PCA_Group_13_4,-0.093709,-0.093709,-0.093709,-0.093709,0.951159,-0.093709,-0.093709,-0.093709,0.951159,-0.093709,-0.093709,-0.093709,-0.093709,-0.093709,-0.093709
PCA_Group_13_5,0.022433,0.022433,0.022433,0.022433,-0.099798,0.022433,0.022433,0.022433,-0.099798,0.022433,0.022433,0.022433,0.022433,0.022433,0.022433


In [11]:
all_cols = X_train_eng.columns.tolist()
print('All columns: ', all_cols)
gc.collect()

All columns:  ['TransactionID', 'TransactionDT', 'TransactionAmt', 'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain', 'R_emaildomain', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'id_01', 'id_02', 'id_03', 'id_04', 'id_05', 'id_06', 'id_09', 'id_10', 'id_11', 'id_12', 'id_13', 'id_14', 'id_15', 'id_16', 'id_17', 'id_18', 'id_19', 'id_20', 'id_28', 'id_29', 'id_32', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38', 'DeviceType', 'country', 'hour_of_day', 'day_of_week', 'day_of_month', 'month_year', 'P_emaildomain_length', 'P_emaildomain_has_digits', 'P_emaildomain_is_free', 'P_emaildomain_vendor_id', 'P_emaildomain_suffix_id', 'P_emaildomain_country_id', 'P_emaildomain_risk_score', 'R_emaildomain_length', 'R_emaildomain_has_digits',

0

In [13]:
D_cols = [f'D{i}' for i in range(1, 16) if f'D{i}' in X_train_eng.columns]
statistical_drops = [
    'C2', 'C3', 'C10', 'C11', 'C12',          
    'UID_amt_sum_1h', 'UID_count_24h',      
    'id_04', 'id_06', 'id_10', 'id_17',   
    'id_19', 'id_20',
    'TransactionAmt_log_z',                 
    'screen_height',                    
    'is_99_cents',                      
    'D9_norm',                     
    'addr1_switch',              
    'Amt_div_card4_mean'    
]

features_to_drop_final = [
    'TransactionID',   
    'TransactionDT',   
    'UID',          
    'UID_hash',        
    'P_emaildomain',   
    'R_emaildomain',   
    'country',
     
    'card1', 'card2', 'card3', 'card5', 
    'addr1', 'addr2',                  
    
    *D_cols,        

    'month_year',       
    'day_of_month',     
    'hour_of_day',      
    'days_since_first_txn', 
    'UID_dt_diff',     
    
    *statistical_drops,
    
    'multi_entity_sharing',
    'device_vendor_degree',
    'unique_addrs_per_card',
    'unique_emails_per_card',
    'cards_per_device',
    'addr1_degree',
    
    'card1_addr1_combo', 
    'device_info_combo', 
    'D10_norm',          
    'D15_norm',        
    
    'cents_value',                
    'UID_velocity_1h',            
    'UID_velocity_12h',
    'UID_count_1h',
    'UID_count_12h',
    'email_country_mismatch',     
    'P_emaildomain_country_id',   
    'R_emaildomain_country_id',
    'P_emaildomain_has_digits',
    'R_emaildomain_has_digits',
    
    'PCA_Group_0_1'
]

In [14]:
FIXED_PARAMS = {
            'lgb': {'objective': 'binary', 'boosting_type': 'gbdt', 'metric': 'auc', 'n_jobs': -1, 'verbose': -1, 'random_state': 42},
            'cb': {'loss_function': 'Logloss', 'eval_metric': 'AUC', 'grow_policy': 'SymmetricTree','bootstrap_type': 'Bernoulli', 'thread_count': -1, 'task_type': 'GPU', 'verbose': 0, 'random_state': 42},
            'xgb': {'objective': 'binary:logistic', 'booster': 'gbtree', 'eval_metric': 'auc', 'grow_policy': 'depthwise', 'device': 'cuda', 'tree_method': 'hist', 'early_stopping_rounds': 50, 'n_jobs': -1, 'verbosity': 0, 'random_state': 42}
        }

##### **Train LightGBM**

In [15]:
print(f"\n--- 1. Baseline Training for Feature Selection ---")
lgb_baseline_result = trainer.train_model(
    model_type='lgb',
    params=None,        
    top_n_features=None, 
    full_data=False,    
    n_folds=6,
    cols_to_drop=features_to_drop_final,
    save_artifacts=False
)


--- 1. Baseline Training for Feature Selection ---

üöÄ Training LGB Model...

üöÄ Training Final LGB on 211 features | Full Data: False

üöÄ Training LGB Model...
   -> Fold 1/6...
   --- Size: Train=71711, Val=71708 ---
[LightGBM] [Info] Number of positive: 1922, number of negative: 69789
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 39169
[LightGBM] [Info] Number of data points in the train set: 71711, number of used features: 211
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.026802 -> initscore=-3.592110
[LightGBM] [Info] Start training from score -3.592110
   -> Fold 2/6...
   --- Size: Train=143419, Val=71708 ---
[LightGBM] [Info] Number of positive: 3732, number of negative: 139687
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.058877 seconds.
You can set `force_col_wise=true` to remove t

In [17]:
lgb_top_features = lgb_baseline_result['importances'].groupby('feature')['importance'].mean().sort_values(ascending=False).index.tolist()
lgb_best_params = trainer.optimize_hyperparameters(model_type='lgb', n_trials=20, n_folds=5, top_n_features=lgb_top_features[:150])


üîç Optuna: Optimizing LGB on 150 features, (20 trials)...


[I 2026-01-27 23:30:39,330] A new study created in memory with name: no-name-53535de4-ff24-48e5-af95-b7a691f049f4
[I 2026-01-27 23:32:42,985] Trial 0 finished with value: 0.9337392818900874 and parameters: {'n_estimators': 2118, 'learning_rate': 0.009662375788586234, 'num_leaves': 134, 'max_depth': 9, 'subsample': 0.8873097738117048, 'colsample_bytree': 0.8863586374306982, 'reg_alpha': 0.04614015967044038, 'reg_lambda': 0.06395495235273566, 'min_child_samples': 23}. Best is trial 0 with value: 0.9337392818900874.
[I 2026-01-27 23:33:40,796] Trial 1 finished with value: 0.9328453865844626 and parameters: {'n_estimators': 3056, 'learning_rate': 0.15693254125681155, 'num_leaves': 227, 'max_depth': 9, 'subsample': 0.9958935902820945, 'colsample_bytree': 0.9501417805654493, 'reg_alpha': 1.4420985095947494e-08, 'reg_lambda': 1.5691560535327646e-05, 'min_child_samples': 79}. Best is trial 0 with value: 0.9337392818900874.
[I 2026-01-27 23:35:15,089] Trial 2 finished with value: 0.938095483287

   ‚úÖ Best params: {'n_estimators': 1728, 'learning_rate': 0.0052086467147919365, 'num_leaves': 254, 'max_depth': 11, 'subsample': 0.7480403373947704, 'colsample_bytree': 0.5017667080472615, 'reg_alpha': 0.07922401383323642, 'reg_lambda': 1.3782534615346387e-05, 'min_child_samples': 40}


In [18]:
lgb_best_params.update(FIXED_PARAMS['lgb'])
lgb_report = trainer.final_train('lgb', lgb_best_params, lgb_top_features[:150])


üöÄ Training LGB Model...

üèÅ Final Test Results (LGB):
   Optimum Threshold: 0.3169
   AUC: 0.8862
   Precision: 49.35%
   Recall: 52.87%
   Net Profit Impact: $181,705.82
üíæ Saved lgb model & 150 features.


##### **Train CatBoost**

In [19]:
print(f"\n--- 1. Baseline Training for Feature Selection ---")
cb_baseline_result = trainer.train_model(
    model_type='cb',
    params=None,        
    top_n_features=None, 
    full_data=False,    
    n_folds=6,
    cols_to_drop=features_to_drop_final,
    save_artifacts=False
)


--- 1. Baseline Training for Feature Selection ---

üöÄ Training CB Model...

üöÄ Training Final CB on 211 features | Full Data: False

üöÄ Training CB Model...
   -> Fold 1/6...
   --- Size: Train=71711, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU


   -> Fold 2/6...
   --- Size: Train=143419, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU


   -> Fold 3/6...
   --- Size: Train=215127, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU


   -> Fold 4/6...
   --- Size: Train=286835, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU


   -> Fold 5/6...
   --- Size: Train=358543, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU


   -> Fold 6/6...
   --- Size: Train=430251, Val=71708 ---


Default metric period is 5 because AUC is/are not implemented for GPU



üèÅ Final Test Set Evaluation:

üèÅ Final Test Results (CB):
   Optimum Threshold: 0.3763
   AUC: 0.8620
   Precision: 44.46%
   Recall: 48.82%
   Net Profit Impact: $154,438.81


In [20]:
cb_top_features = cb_baseline_result['importances'].groupby('feature')['importance'].mean().sort_values(ascending=False).index.tolist()
cb_best_params = trainer.optimize_hyperparameters(model_type='cb', n_trials=20, n_folds=5, top_n_features=cb_top_features[:150])
cb_best_params.update(FIXED_PARAMS['cb'])


üîç Optuna: Optimizing CB on 150 features, (20 trials)...


[I 2026-01-28 00:47:40,928] A new study created in memory with name: no-name-67b3a89a-db57-4fed-aa93-8681c75e104d
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
[I 2026-01-28 00:49:09,757] Trial 0 finished with value: 0.9454065801698119 and parameters: {'iterations': 2628, 'learning_rate': 0.15118510911583524, 'depth': 4, 'l2_leaf_reg': 7.024678290229622, 'subsample': 0.7309519457710606, 'random_strength': 5.559065905054413e-08}. Best is trial 0 with value: 0.9454065801698119.
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Default metric period is 5 because AUC is/are not implemented for GPU
Defau

   ‚úÖ Best params: {'iterations': 3799, 'learning_rate': 0.006575600361961698, 'depth': 10, 'l2_leaf_reg': 28.72327658931732, 'subsample': 0.5560267687485813, 'random_strength': 0.03336729742516672}


In [21]:
cb_report = trainer.final_train('cb', cb_best_params, cb_top_features[:150])


üöÄ Training CB Model...


Default metric period is 5 because AUC is/are not implemented for GPU



üèÅ Final Test Results (CB):
   Optimum Threshold: 0.5247
   AUC: 0.8731
   Precision: 54.02%
   Recall: 48.17%
   Net Profit Impact: $170,147.86
üíæ Saved cb model & 150 features.


##### **Train Xgboost**

In [22]:
print(f"\n--- 1. Baseline Training for Feature Selection ---")
xgb_baseline_result = trainer.train_model(
    model_type='xgb',
    params=None,        
    top_n_features=None, 
    full_data=False,    
    n_folds=6,
    cols_to_drop=features_to_drop_final,
    save_artifacts=False
)


--- 1. Baseline Training for Feature Selection ---

üöÄ Training XGB Model...

üöÄ Training Final XGB on 211 features | Full Data: False

üöÄ Training XGB Model...
   -> Fold 1/6...
   --- Size: Train=71711, Val=71708 ---
   -> Fold 2/6...
   --- Size: Train=143419, Val=71708 ---
   -> Fold 3/6...
   --- Size: Train=215127, Val=71708 ---
   -> Fold 4/6...
   --- Size: Train=286835, Val=71708 ---
   -> Fold 5/6...
   --- Size: Train=358543, Val=71708 ---
   -> Fold 6/6...
   --- Size: Train=430251, Val=71708 ---

üèÅ Final Test Set Evaluation:

üèÅ Final Test Results (XGB):
   Optimum Threshold: 0.0694
   AUC: 0.8709
   Precision: 47.21%
   Recall: 49.92%
   Net Profit Impact: $169,865.33


In [23]:
xgb_top_features = xgb_baseline_result['importances'].groupby('feature')['importance'].mean().sort_values(ascending=False).index.tolist()
xgb_best_params = trainer.optimize_hyperparameters(model_type='xgb', n_trials=20, n_folds=3, top_n_features=xgb_top_features[:150])



üîç Optuna: Optimizing XGB on 150 features, (20 trials)...


[I 2026-01-28 05:47:36,867] A new study created in memory with name: no-name-ba1da086-6461-4fa1-afe1-53d74d7e9f8d
[I 2026-01-28 05:53:54,923] Trial 0 finished with value: 0.9497161496428811 and parameters: {'n_estimators': 4563, 'learning_rate': 0.009343798676910084, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.7905613458421644, 'colsample_bytree': 0.872652238330978, 'gamma': 2.381061331230639e-07, 'reg_alpha': 3.818134098819682, 'reg_lambda': 2.881266370086273e-07}. Best is trial 0 with value: 0.9497161496428811.
[I 2026-01-28 05:55:01,212] Trial 1 finished with value: 0.9443039482955862 and parameters: {'n_estimators': 3269, 'learning_rate': 0.0391546819064918, 'max_depth': 6, 'min_child_weight': 7, 'subsample': 0.9520801256095032, 'colsample_bytree': 0.8937036067706721, 'gamma': 1.9441503720234865e-06, 'reg_alpha': 0.5850772791985083, 'reg_lambda': 5.111143638939098}. Best is trial 0 with value: 0.9497161496428811.
[I 2026-01-28 06:00:56,379] Trial 2 finished with value: 0

   ‚úÖ Best params: {'n_estimators': 4935, 'learning_rate': 0.01623652280283671, 'max_depth': 10, 'min_child_weight': 1, 'subsample': 0.7934008192507429, 'colsample_bytree': 0.9843954527397509, 'gamma': 0.004357648635267788, 'reg_alpha': 8.901631981922412, 'reg_lambda': 1.1377762447313336e-08}


In [24]:
xgb_best_params.update(FIXED_PARAMS['xgb'])
del xgb_best_params['early_stopping_rounds']
xgb_report = trainer.final_train('xgb', xgb_best_params, xgb_top_features[:150]) 


üöÄ Training XGB Model...

üèÅ Final Test Results (XGB):
   Optimum Threshold: 0.1090
   AUC: 0.9041
   Precision: 50.29%
   Recall: 59.65%
   Net Profit Impact: $224,249.90
üíæ Saved xgb model & 150 features.


In [25]:
trainer.select_best_model()


üèÜ Selecting Best Configuration based on Financial Impact...

   Evaluating 13 combinations using Cost Strategy...
   ‚≠ê New Leader: [lgb:1.0] Net Savings=$181,705.82
   ‚≠ê New Leader: [xgb:1.0] Net Savings=$224,249.90

üéâ WINNER: {'xgb': 1.0}
   AUC: 0.9041
   Net Savings: $224,249.90
   Optimal Threshold: 0.1090
üíæ Saved production_config.json to ../models/prod_v1


{'weights': {'xgb': 1.0},
 'threshold': 0.10898989898989898,
 'profit': 224249.9,
 'auc': 0.9040543687007886}

#### **Post-processing & inference**

In [3]:
test_raw = pd.read_csv(base_path / 'data/raw/test_raw.csv')

In [4]:
inference  = SentinelInference(model_dir=base_path/'models/prod_v1')

2026-01-29 14:06:22,911 - INFO - Loading Sentinel artifacts from ../models/prod_v1...


2026-01-29 14:06:26,778 - INFO - Loaded xgb model.
2026-01-29 14:06:26,779 - INFO - ‚úÖ Sentinel Inference initialized in 3.87s 


In [5]:
processor = joblib.load(base_path / 'models/prod_v1/sentinel_preprocessor.pkl')
engineer = joblib.load(base_path / 'models/prod_v1/sentinel_engineer.pkl')

In [6]:
df_clean = processor.transform(test_raw)
df_features =engineer.transform(df_clean)

--- Transforming 88581 rows ---
--- Transforming 88,581 rows ---


In [7]:
pred = inference.predict(test_raw)

--- Transforming 88581 rows ---
--- Transforming 88,581 rows ---


In [8]:
feat4board = inference._get_feat4board()

In [11]:
feat4board['UID_velocity_24h']

0             NaN
1             NaN
2             NaN
3        0.000000
4        0.000000
           ...   
88576    0.000000
88577         NaN
88578         NaN
88579    0.833333
88580    0.833333
Name: UID_velocity_24h, Length: 88581, dtype: float32

In [9]:
y_pred_prob = pred['probabilities']
y_true = test_raw['isFraud']
amounts = test_raw['TransactionAmt']

In [8]:
evaluator = SentinelEvaluator(y_true, y_pred_prob, amounts)

In [9]:
th = evaluator.find_best_threshold('friction', max_fpr=0.02)
th

0.1161

In [10]:
report = evaluator.report_business_impact(0.12)

In [None]:
["card_email_combo_fraud_rate", "PCA_Group_9_0", "C4", "C14", "C8", "card_email_combo", 
"PCA_Group_11_0", "C1", "C5", "PCA_Group_6_2", "M4", "PCA_Group_3_2", "R_emaildomain_is_free", "PCA_Group_6_1", 
"M5", "C7", "PCA_Group_13_0", "card1_freq_enc", "composite_risk_score", "PCA_Group_13_2", "PCA_Group_12_0", "C13", "C6", 
"PCA_Group_8_1", "PCA_Group_5_0", "PCA_Group_7_4", "PCA_Group_4_2", "device_info_combo_fraud_rate", "PCA_Group_1_2", 
"PCA_Group_13_4", "is_exact_dollars", "M6", "PCA_Group_8_5", "D2_norm", "PCA_Group_8_0", "PCA_Group_5_8",
 "os_browser_combo", "PCA_Group_6_6", "PCA_Group_5_4", "email_match_status", "id_12", "Amt_div_ProductCD_mean",
  "D8_norm", "PCA_Group_13_3", "PCA_Group_8_6", "R_emaildomain_length", "PCA_Group_2_2", "D1_norm", 
  "PCA_Group_12_4", "M2", "PCA_Group_9_4", "product_network_combo", "R_emaildomain_risk_score", "M3", 
  "ProductCD", "PCA_Group_4_1", "PCA_Group_13_1", "PCA_Group_3_0", "PCA_Group_12_7", "PCA_Group_2_0", 
  "addr1_fraud_rate", "PCA_Group_11_2", "PCA_Group_12_5", "PCA_Group_9_3", "PCA_Group_12_2", "id_09", 
  "PCA_Group_11_7", "PCA_Group_12_1", "PCA_Group_9_1", "PCA_Group_1_1", "PCA_Group_5_1", "UID_freq_enc", 
  "PCA_Group_11_4", "PCA_Group_10_4", "screen_area", "D4_norm", "C9", "PCA_Group_2_4", "Amt_div_addr1_mean",
   "PCA_Group_4_5", "P_emaildomain_suffix_id", "time_gap_anomaly", "PCA_Group_4_0", "PCA_Group_4_6", 
   "PCA_Group_3_1", "PCA_Group_3_3", "PCA_Group_1_3", "P_emaildomain_length", "PCA_Group_4_3", "PCA_Group_12_3", 
   "screen_aspect_ratio", "PCA_Group_5_7", "D6_norm", "R_emaildomain_suffix_id", "PCA_Group_11_5", 
   "PCA_Group_12_6", "PCA_Group_3_4", "PCA_Group_8_4", "PCA_Group_4_4", "TransactionAmt_decimal", "dist1", 
   "PCA_Group_3_6", "device_vendor", "PCA_Group_2_3", "Amt_div_P_emaildomain_vendor_id_mean", "id_18", 
   "PCA_Group_2_1", "os_type", "PCA_Group_10_3", "PCA_Group_5_2", "PCA_Group_10_0", "P_emaildomain_vendor_id", 
   "R_emaildomain_vendor_id", "PCA_Group_1_5", "PCA_Group_11_8", "PCA_Group_1_4", "txn_sequence", "PCA_Group_13_5", 
   "PCA_Group_3_5", "Amt_div_card1_mean", "id_36", "id_03", "D11_norm", "PCA_Group_11_3", "D12_norm", "D13_norm", 
   "PCA_Group_8_3", "D3_norm", "PCA_Group_9_6", "id_05", "card6", "addr1_freq_enc", "PCA_Group_5_6", "PCA_Group_1_0", 
   "D5_norm", "TransactionAmt_log", "PCA_Group_11_6", "id_01", "M9", "id_02", "PCA_Group_10_2", "PCA_Group_10_1", 
   "hour_of_day_fraud_rate", "PCA_Group_5_3", 
"PCA_Group_11_1", "TransactionAmt", "screen_width", "PCA_Group_5_9", "D7_norm", "PCA_Group_7_0"]