#### GOAL: Monitering Model Performance


In [137]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Define Random State
RANDOM_STATE = 42

#### Preprocessing all the data before spliting

In [138]:
df = pd.read_csv('data/fraud.csv')
df.drop(['prev_address_months_count', 'intended_balcon_amount', 'device_fraud_count'], axis=1, inplace=True)

In [139]:
df['month'].unique()

array([0, 1, 2, 3, 4, 5, 6, 7], dtype=int64)

#### Spliting the data by month

source: https://arxiv.org/pdf/2401.05240v2

According to the paper's methodology, we split by month:
  - Training: months 1, 2, 3, 4, 5
  - Validation: month 6
  - Test: months 7, 8
 - [Ref: Jesus et al., 2022 - "training set: month 1-5, validation set: month 6, testing set: months 7-8"]

In [140]:
# Split your dataset BEFORE fitting any preprocessing to avoid data leakage
from sklearn.model_selection import train_test_split


train_months = [1, 2, 3, 4, 5]
val_month = [6]
test_months = [7, 8]

df_train = df[df["month"].isin(train_months)]
df_val = df[df['month'].isin(val_month)]
df_test = df[df['month'].isin(test_months)]

# Print quick info for sanity check
print("Train set months:", df_train['month'].unique())
print("Val   set months:", df_val['month'].unique())
print("Test  set months:", df_test['month'].unique())

# Define target variable
target_variable = 'fraud_bool'

X_train = df_train.drop(columns=target_variable, axis=1)
X_val = df_val.drop(columns=target_variable, axis=1)
X_test = df_test.drop(columns=target_variable, axis=1)

y_train = df_train[target_variable]
y_val = df_val[target_variable]
y_test = df_test[target_variable]

print("Shapes:")
print(" X_train:", X_train.shape, " y_train:", y_train.shape)
print(" X_val:  ", X_val.shape,   " y_val:  ", y_val.shape)
print(" X_test: ", X_test.shape,  " y_test: ", y_test.shape)

Train set months: [1 2 3 4 5]
Val   set months: [6]
Test  set months: [7]
Shapes:
 X_train: (662549, 28)  y_train: (662549,)
 X_val:   (108168, 28)  y_val:   (108168,)
 X_test:  (96843, 28)  y_test:  (96843,)


## Feature preprocessing

In [141]:
# Define feature types

num_features = ['income', 'name_email_similarity', 'current_address_months_count', 'customer_age', 'days_since_request'
                    , 'zip_count_4w', 'velocity_6h', 'velocity_24h', 'velocity_4w', 'bank_branch_count_8w', 
                    'date_of_birth_distinct_emails_4w', 'credit_risk_score', 'bank_months_count', 'proposed_credit_limit',  'session_length_in_minutes',
                    'device_distinct_emails_8w', 'month']


cat_features = ['payment_type', 'employment_status', 'housing_status',
                         'source', 'device_os']

binary_features = [
    'email_is_free',
    'phone_home_valid',
    'phone_mobile_valid',
    'has_other_cards',
    'foreign_request',
    'keep_alive_session',
]

In [142]:
from sklearn.impute import SimpleImputer
# ------------------------------------------Numerical----------------------------------------
# a. Impute Numerical Features
# Median imputation is preferred when the distribution is skewed
num_imputer = SimpleImputer(strategy="median")

# Fit the imputer on the training data
num_imputer.fit(X_train[num_features])

# Transform on training, validation data and test data

# Transform numeric features on X_train 
X_train_numeric = pd.DataFrame(
    num_imputer.transform(X_train[num_features]),
    columns=num_features,
    index=X_train.index
)

# Transform numeric features on X_val 
X_val_numeric = pd.DataFrame(
    num_imputer.transform(X_val[num_features]),
    columns=num_features,
    index=X_val.index
)

# Transform numeric features on X_test 
X_test_numeric = pd.DataFrame(
    num_imputer.transform(X_test[num_features]),
    columns=num_features,
    index=X_test.index
)

# ------------------------------------------Categorical----------------------------------------
# b. Impute Categorical Features
# Initialize the imputer for categorical features
categorical_imputer = SimpleImputer(strategy='most_frequent')

# Fit the imputer on the training data
categorical_imputer.fit(X_train[cat_features])

# # Transform on training, validation data and test data
# Transform categorical features on X_train 
X_train_categorical = pd.DataFrame(
    categorical_imputer.transform(X_train[cat_features]),
    columns=cat_features,
    index=X_train.index
)

# Transform categorical features on X_val
X_val_categorical = pd.DataFrame(
    categorical_imputer.transform(X_val[cat_features]),
    columns=cat_features,
    index=X_val.index
)

# Transform categorical features on X_test
X_test_categorical = pd.DataFrame(
    categorical_imputer.transform(X_test[cat_features]),
    columns=cat_features,
    index=X_test.index
)

# ------------------------------------------Binary----------------------------------------
# c. Impute Binary Features

# Initialize the imputer for binary features
binary_imputer = SimpleImputer(strategy='most_frequent')

# Fit the imputer on the training data
binary_imputer.fit(X_train[binary_features])

# Transform on train, validation and test data
# Transform binary features on X_train
X_train_bin = pd.DataFrame(
    binary_imputer.transform(X_train[binary_features]),
    columns=binary_features,
    index=X_train.index
)

# Transform binary features on X_val
X_val_bin = pd.DataFrame(
    binary_imputer.transform(X_val[binary_features]),
    columns=binary_features,
    index=X_val.index
)

# Transform binary features on X_test
X_test_bin = pd.DataFrame(
    binary_imputer.transform(X_test[binary_features]),
    columns=binary_features,
    index=X_test.index
)


KeyboardInterrupt: 

In [49]:
# Combine numerical and categorical features and binary features
X_train_imputed = pd.concat([X_train_numeric, X_train_categorical, X_train_bin], axis=1)
X_val_imputed = pd.concat([X_val_numeric, X_val_categorical, X_val_bin], axis=1)
X_test_imputed = pd.concat([X_test_numeric, X_test_categorical, X_test_bin], axis=1)

## Data Scaling and Encoding

#### Every step is base on notebook 2

In [None]:
from sklearn.preprocessing import RobustScaler, OneHotEncoder
# ------------------------------------------Numerical----------------------------------------
# a. Scale Numerical Features
# Initialize the scaler
scaler = RobustScaler()

# Fit the scaler on the training data
scaler.fit(X_train_imputed[num_features])

# Transform on training, validating and testing data
X_train_scaled = pd.DataFrame(
    scaler.transform(X_train_imputed[num_features]),
    columns=num_features,
    index=X_train_imputed.index
)

X_val_scaled = pd.DataFrame(
    scaler.transform(X_val_imputed[num_features]),
    columns=num_features,
    index=X_val_imputed.index
)

X_test_scaled = pd.DataFrame(
    scaler.transform(X_test_imputed[num_features]),
    columns=num_features,
    index=X_test_imputed.index
)

# ------------------------------------------Categorical----------------------------------------
# b. Encode Categorical Features
# Initialize the encoder
encoder = OneHotEncoder(sparse=False)

# Fit the encoder on the training data
encoder.fit(X_train_imputed[cat_features])

# Transform on training, validating and testing data
X_train_encoded = pd.DataFrame(
    encoder.transform(X_train_imputed[cat_features]),
    columns=encoder.get_feature_names_out(cat_features),
    index=X_train_imputed.index
)

X_val_encoded = pd.DataFrame(
    encoder.transform(X_val_imputed[cat_features]),
    columns=encoder.get_feature_names_out(cat_features),
    index=X_val_imputed.index
)

X_test_encoded = pd.DataFrame(
    encoder.transform(X_test_imputed[cat_features]),
    columns=encoder.get_feature_names_out(cat_features),
    index=X_test_imputed.index
)



In [None]:
# Combine scaled numerical and encoded categorical features
X_train_preprocessed = pd.concat([X_train_scaled, X_train_encoded, X_train_bin], axis=1)
X_val_preprocessed = pd.concat([X_val_scaled, X_val_encoded, X_val_bin], axis=1)
X_test_preprocessed = pd.concat([X_test_scaled, X_test_encoded, X_test_bin], axis=1)

print("Preprocessed validation Data:")
X_val_preprocessed.shape

Preprocessed validation Data:


(108168, 49)

## Features selection with mutual information

#### Chose top 30 features

In [None]:
# a. Mutual information 
from sklearn.feature_selection import mutual_info_classif
#Prepare numerical features

mutual_info = mutual_info_classif(X_train_preprocessed, y_train)
mutual_info

mutual_info = pd.Series(mutual_info)
mutual_info.index = X_train_preprocessed.columns
mutual_info.sort_values(ascending=False)

# Choosing top 30 features
feature_to_drop = mutual_info.sort_values(ascending=False)[31:].index.tolist()

X_train_preprocessed.drop(columns=feature_to_drop, axis=1, inplace=True)
X_val_preprocessed.drop(columns=feature_to_drop, axis=1, inplace=True)
X_test_preprocessed.drop(columns=feature_to_drop, axis=1, inplace=True)
X_train_preprocessed.shape,X_val_preprocessed, X_test_preprocessed.shape

KeyboardInterrupt: 

## Model training, assesing performance

In [38]:
import numpy as np
import pandas as pd

# Model
from lightgbm import LGBMClassifier as lgb

# Scoring metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, confusion_matrix

# import for oversampling
from imblearn.over_sampling import SMOTE

In [67]:
import numpy as np
from sklearn.metrics import confusion_matrix, recall_score

def find_best_threshold_for_max_recall_at_fpr(
    val_probs, y_val, 
    target_fpr=0.05
):
    """
    Given validation set probabilities (val_probs) and ground truth (y_val),
    find the threshold that yields the highest recall subject to FPR <= target_fpr.
    
    Parameters
    ----------
    val_probs : np.ndarray
        Predicted probabilities for the positive class on the validation set.
    y_val : np.ndarray
        True labels (0 or 1) for the validation set.
    target_fpr : float
        The maximum allowed false positive rate.
        
    Returns
    -------
    best_threshold : float
        The threshold that yields the maximum recall while keeping FPR <= target_fpr.
    best_fpr : float
        The FPR at that threshold.
    best_recall : float
        The recall at that threshold.
    """
    
    # Sort the probabilities in ascending order
    sorted_thresholds = np.sort(val_probs)
    
    best_threshold = 0.0
    best_fpr = 1.0
    best_recall = 0.0
    
    # We'll try each threshold in ascending order.
    # For each threshold, we measure FPR and recall.
    for t in sorted_thresholds:
        preds = (val_probs >= t).astype(int)
        
        tn, fp, fn, tp = confusion_matrix(y_val, preds, labels=[0,1]).ravel()
        
        # Compute FPR = FP / (FP + TN)
        # Avoid division by zero if TN+FP=0
        denom = (fp + tn) if (fp+tn) else 1e-15
        current_fpr = fp / denom
        
        # Compute recall = TP / (TP + FN)
        # Avoid division by zero if TP+FN=0
        denom_pos = (tp + fn) if (tp+fn) else 1e-15
        current_recall = tp / denom_pos
        
        # We only consider thresholds where FPR <= target_fpr
        if current_fpr <= target_fpr:
            # Among those, pick the one that yields the highest recall
            if current_recall > best_recall:
                best_threshold = t
                best_fpr = current_fpr
                best_recall = current_recall
    
    return best_threshold, best_fpr, best_recall

#### Hyper parameter tunning

In [146]:
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from lightgbm import LGBMClassifier as lgb

#Grid SearchCV
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# Scoring
from sklearn.metrics import make_scorer, fbeta_score
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, roc_auc_score
from sklearn.model_selection import cross_val_score

#Bayesian Optimization
import optuna

# Calibration library
from sklearn.calibration import CalibratedClassifierCV

def run_experiment(
    X_train, y_train,
    X_val, y_val,
    X_test, y_test,
    n_bootstraps=5,
    oversampling=False,
    random_seed=42,
    target_fpr=0.2  # Desired FPR for validation
):
    """
    Runs a training+evaluation experiment across multiple bootstraps, fixing
    a threshold that yields <= target FPR on the validation set, and then
    measuring precision, recall, and FPR on the test set at that threshold.
    
    Parameters:
    -----------
    X_train, y_train : Training features and labels (DataFrame or Series)
    X_val,   y_val   : Validation features and labels
    X_test,  y_test  : Test features and labels
    n_bootstraps     : Number of bootstrap iterations
    oversampling     : Whether to apply SMOTE oversampling in each bootstrap
    random_seed      : Random seed for reproducibility
    target_fpr       : Desired FPR (false positive rate) on the validation set
    
    Returns:
    --------
    precision_list, recall_list, fpr_list : Lists of precision, recall, and FPR across bootstraps
    """
    
    precision_list = []
    recall_list = []
    fpr_list = []
    decision_threshold = None  # We'll set this only in the first bootstrap
    
    np.random.seed(random_seed)  # Reproducibility
    
    best_params = {} # We'll set this only in the first bootstrap
    
    for i in range(n_bootstraps):
        print(f"\n=== Bootstrap Iteration #{i} (Oversampling={oversampling}) ===")
        
        # ---------------------------
        # 1) Bootstrapping
        # ---------------------------
        indices = np.random.choice(len(X_train), size=len(X_train), replace=True)
        X_train_boot = X_train.iloc[indices]
        y_train_boot = y_train.iloc[indices]

        
        # 
        #  ---------------------------
        # 2) (Optional) Oversampling
        # ---------------------------
        if oversampling:
            smote_obj = SMOTE(random_state=i)  # different random state each iteration
            X_train_boot, y_train_boot = smote_obj.fit_resample(X_train_boot, y_train_boot)
            
        # ---------------------------
        # 3) Hyper Parameter Tuning for first boostraps using validation set (month 6)
        # ---------------------------        
        if i == 0:
            def objective(trial):
                params = {
                    'objective': 'binary',
                    'metric': 'auc',
                    'boosting_type': 'gbdt',
                    'num_leaves': trial.suggest_int('num_leaves', 20, 100),
                    'max_depth': trial.suggest_int('max_depth', 3, 15),
                    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
                    'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                    'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
                    'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 10.0),
                    'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 10.0),
                    'scale_pos_weight': len(y_train[y_train == 0]) / len(y_train[y_train == 1])
                }
                
                # --- Classifier ---
                optuna_model = lgb(random_state = i)
                optuna_model.set_params(**params)
                #fit model
                optuna_model.fit(
                    X=X_train_boot,
                    y=y_train_boot,
                )
                
                # Predict on the validation set
                y_pred_proba = optuna_model.predict_proba(X_val)[:, 1]
                
                # Return the validation ROC_AUC score as the optimization objective
                score = roc_auc_score(y_val, y_pred_proba)
                
                
                return score.mean()

            study = optuna.create_study(direction='maximize')
            study.optimize(objective, n_trials=30)
                    
            best_params = study.best_params
        
        # ---------------------------
        # 4) Model Training with best param from first iterations
        # ---------------------------
        
        print(best_params)
        tuned_lgb = lgb(random_state=i)
        tuned_lgb.set_params(**best_params)
        tuned_lgb.fit(X_train_boot, y_train_boot)     
        
        # ---------------------------
        # 5) Calibration (Platt / sigmoid) on the 1st bootstrap with validation set set to define threshold. 
        # We then reapply the same threshold for the remaining bootstraps
        # ---------------------------
        if i == 0:
            # calibrated_clf = CalibratedClassifierCV(
            #     base_estimator=base_lgb, cv='prefit', method='sigmoid') #this code is wrong, base_estimator is no longer support!, takes 3 hours to debug
            calibrated_clf = CalibratedClassifierCV(
                estimator=tuned_lgb, cv="prefit", method="sigmoid"
            )
            calibrated_clf.fit(X_val, y_val)        
            
            val_probs = calibrated_clf.predict_proba(X_val)[:, 1]
            
            # Maximum recall score at desired FPR
            threshold, fpr, recall_ = find_best_threshold_for_max_recall_at_fpr(
                val_probs, 
                y_val, 
                target_fpr=target_fpr
            )
            
            decision_threshold = threshold
            print(f"Chosen decision threshold (FPR <= {target_fpr}) = {decision_threshold:.3f}")
            print(f"Resulting FPR on validation set: {fpr:.3f}")
            print(f"Resulting Recall on validation set: {recall_:.3f}")
        
        # ---------------------------
        # 6) Evaluate on Test set for Model #k > 0
        # ---------------------------       
        else:
            test_probs = tuned_lgb.predict_proba(X_test)[:, 1]
            test_preds = (test_probs > decision_threshold).astype(int)
            
            # Precision / Recall
            prec = precision_score(y_test, test_preds)
            rec = recall_score(y_test, test_preds)
            
            # FPR on Test
            tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test, test_preds, labels=[0,1]).ravel()
            denom_test = (tn_test + fp_test) if (tn_test + fp_test) else 1e-10
            test_fpr = fp_test / denom_test
            
            precision_list.append(prec)
            recall_list.append(rec)
            fpr_list.append(test_fpr)
            
            print(f"Test Precision @ threshold={decision_threshold:.3f}: {prec:.3f}")
            print(f"Test Recall    @ threshold={decision_threshold:.3f}: {rec:.3f}")
            print(f"Test FPR       @ threshold={decision_threshold:.3f}: {test_fpr:.3f}")
    
    return precision_list, recall_list, fpr_list


In [96]:
# C) Run Experiments (Oversample vs. No Oversample)
# ---------------------------------
print("=== EXPERIMENT 1: No Oversampling ===")
precision_no_os, recall_no_os, fpr_no_os = run_experiment(
    X_train_preprocessed, y_train, X_val_preprocessed, y_val, X_test_preprocessed, y_test,
    n_bootstraps=5,
    oversampling=False,
    random_seed=42,
    target_fpr = 0.2
)

print("\n=== EXPERIMENT 2: With Oversampling ===")
precision_os, recall_os, fpr_os = run_experiment(
    X_train_preprocessed, y_train, X_val_preprocessed, y_val, X_test_preprocessed, y_test,
    n_bootstraps=5,
    oversampling=True,
    random_seed=42,
    target_fpr= 0.2
)



=== EXPERIMENT 1: No Oversampling ===

=== Bootstrap Iteration #0 (Oversampling=False) ===
Iteration: 0  Class distribution: [655799   6750]


NotFittedError: Estimator not fitted, call fit before exploiting the model.

In [97]:
# ---------------------------
# D) Compare Results
# ---------------------------
print("\n=== COMPARISON OF RESULTS ===")

mean_prec_no_os = np.mean(precision_no_os)
std_prec_no_os = np.std(precision_no_os)
mean_rec_no_os = np.mean(recall_no_os)
std_rec_no_os = np.std(recall_no_os)
mean_fpr_no_os = np.mean(fpr_no_os)
std_fpr_no_os = np.std(fpr_no_os)

mean_prec_os = np.mean(precision_os)
std_prec_os = np.std(precision_os)
mean_rec_os = np.mean(recall_os)
std_rec_os = np.std(recall_os)
mean_fpr_os = np.mean(fpr_os)
std_fpr_os = np.std(fpr_os)

print(f"\nNo Oversampling => Precision: {mean_prec_no_os:.3f} ± {std_prec_no_os:.3f}, "
        f"Recall: {mean_rec_no_os:.3f} ± {std_rec_no_os:.3f}",
        f"FPR: {mean_fpr_no_os:.3f} ± {std_fpr_no_os:.3f}"
        )
print(f"Oversampling   => Precision: {mean_prec_os:.3f} ± {std_prec_os:.3f}, "
        f"Recall: {mean_rec_os:.3f} ± {std_rec_os:.3f}", 
        f"FPR: {mean_fpr_os:.3f} ± {std_fpr_os:.3f}"
        )


=== COMPARISON OF RESULTS ===

No Oversampling => Precision: nan ± nan, Recall: nan ± nan FPR: nan ± nan
Oversampling   => Precision: 0.066 ± 0.003, Recall: 0.773 ± 0.015 FPR: 0.165 ± 0.011


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


- There is not much different in performance of oversampling and no sampling

In [147]:
print("=== EXPERIMENT 3: No Oversampling with calibration ===")
precision_no_os, recall_no_os, fpr_no_os = run_experiment(
    X_train_preprocessed, y_train, X_val_preprocessed, y_val, X_test_preprocessed, y_test,
    n_bootstraps=5,
    oversampling=False,
    random_seed=42,
    target_fpr = 0.2
)

=== EXPERIMENT 3: No Oversampling with calibration ===

=== Bootstrap Iteration #0 (Oversampling=False) ===


[I 2025-01-13 15:28:27,190] A new study created in memory with name: no-name-89d1fe97-cd79-4f7f-90dd-2de0bfb31ea3
[I 2025-01-13 15:28:29,457] Trial 0 finished with value: 0.8679363368878728 and parameters: {'num_leaves': 80, 'max_depth': 3, 'learning_rate': 0.11539323601758196, 'subsample': 0.9797345919014896, 'colsample_bytree': 0.9011778952395055, 'lambda_l1': 7.647145012886648, 'lambda_l2': 2.60561135890534}. Best is trial 0 with value: 0.8679363368878728.
[I 2025-01-13 15:28:32,158] Trial 1 finished with value: 0.872041319339206 and parameters: {'num_leaves': 52, 'max_depth': 8, 'learning_rate': 0.04987591264014918, 'subsample': 0.7432374573533561, 'colsample_bytree': 0.8287650854960851, 'lambda_l1': 6.066580703406823, 'lambda_l2': 7.787688902390155}. Best is trial 1 with value: 0.872041319339206.
[I 2025-01-13 15:28:34,780] Trial 2 finished with value: 0.7586508917152586 and parameters: {'num_leaves': 80, 'max_depth': 12, 'learning_rate': 0.1982747479774949, 'subsample': 0.8332097

{'num_leaves': 56, 'max_depth': 11, 'learning_rate': 0.028264344750403514, 'subsample': 0.7827563033434293, 'colsample_bytree': 0.9996985528187515, 'lambda_l1': 2.8892887717835674, 'lambda_l2': 3.257513551038251}
Chosen decision threshold (FPR <= 0.2) = 0.010
Resulting FPR on validation set: 0.200
Resulting Recall on validation set: 0.807

=== Bootstrap Iteration #1 (Oversampling=False) ===
{'num_leaves': 56, 'max_depth': 11, 'learning_rate': 0.028264344750403514, 'subsample': 0.7827563033434293, 'colsample_bytree': 0.9996985528187515, 'lambda_l1': 2.8892887717835674, 'lambda_l2': 3.257513551038251}
Test Precision @ threshold=0.010: 0.064
Test Recall    @ threshold=0.010: 0.783
Test FPR       @ threshold=0.010: 0.172

=== Bootstrap Iteration #2 (Oversampling=False) ===
{'num_leaves': 56, 'max_depth': 11, 'learning_rate': 0.028264344750403514, 'subsample': 0.7827563033434293, 'colsample_bytree': 0.9996985528187515, 'lambda_l1': 2.8892887717835674, 'lambda_l2': 3.257513551038251}
Test Pr

In [148]:
mean_prec_no_os = np.mean(precision_no_os)
std_prec_no_os = np.std(precision_no_os)
mean_rec_no_os = np.mean(recall_no_os)
std_rec_no_os = np.std(recall_no_os)
mean_fpr_no_os = np.mean(fpr_no_os)
std_fpr_no_os = np.std(fpr_no_os)


print(f"\nNo Oversampling => Precision: {mean_prec_no_os:.3f} ± {std_prec_no_os:.3f}, "
        f"Recall: {mean_rec_no_os:.3f} ± {std_rec_no_os:.3f}",
        f"FPR: {mean_fpr_no_os:.3f} ± {std_fpr_no_os:.3f}"
        )


No Oversampling => Precision: 0.060 ± 0.003, Recall: 0.798 ± 0.010 FPR: 0.187 ± 0.011
