In [1]:
# load and split data

import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/raw/fraud_transactions.csv")
X = df.drop('fraud_flag', axis=1)
y = df['fraud_flag']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# currency conversion
class CurrencyConverter(BaseEstimator, TransformerMixin):
    def __init__(self, rates=None, base_currency="INR"):
        if rates is None:
            rates = {"INR": 1.0, "USD": 83.0, "EUR": 90.0}
        self.rates = rates
        self.base_currency = base_currency

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        X["amount_converted"] = X.apply(
            lambda row: row["amount"] * self.rates.get(row["currency"], 1.0), axis=1
        )
        return X
    
    def get_feature_names_out(self, input_features=None):
        """Add 'amount_converted' to the feature names"""
        if input_features is None:
            input_features = []
        # Return original features + new feature
        output_features = list(input_features) + ['amount_converted']
        return np.asarray(output_features, dtype=object)


# typo fixing   
class TypoFixer(BaseEstimator, TransformerMixin):
    def __init__(self, column='merchant_category', typo_map=None):
        if typo_map is None:
            typo_map = {'Groceires': 'Groceries'}
        self.column = column
        self.typo_map = typo_map
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.copy()
        if self.column in X.columns:
            X[self.column] = X[self.column].replace(self.typo_map)
        return X
    
    def get_feature_names_out(self, input_features=None):
        """Feature names unchanged - just fixing typos"""
        if input_features is None:
            return np.array([self.column], dtype=object)
        return np.asarray(input_features, dtype=object)
    

# outlier clipping
class OutlierClipper(BaseEstimator, TransformerMixin):
    def __init__(self, features=None, lower_quantile=0.01, upper_quantile=0.99):
        self.features = features
        self.lower_quantile = lower_quantile
        self.upper_quantile = upper_quantile
        self.bounds = {}

    def fit(self, X, y=None):
        for col in self.features:
            q_low = X[col].quantile(self.lower_quantile)
            q_high = X[col].quantile(self.upper_quantile)
            self.bounds[col] = (q_low, q_high)
        return self

    def transform(self, X):
        X = X.copy()
        for col, (low, high) in self.bounds.items():
            X[col] = X[col].clip(lower=low, upper=high)
        return X
    
    def get_feature_names_out(self, input_features=None):
        """Feature names unchanged - just clipping values"""
        if input_features is None:
            return np.asarray(self.features, dtype=object)
        return np.asarray(input_features, dtype=object)
    

# Define Feature Groups
num_features = [
    "amount_converted", "velocity", "ip_risk_score", "customer_age",
    "account_tenure", "geo_distance", "merchant_risk_score", "failed_login_attempts"
]

cat_features = [
    "currency", "merchant_category", "transaction_type", "channel", "location"
]

bin_features = ["card_present", "is_international"]


# Pipelines for Each Feature Type

# Numerical pipeline
num_pipeline = Pipeline([
    ("outlier_clipper", OutlierClipper(features=num_features)),
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

# Categorical pipeline
cat_pipeline = Pipeline([
    ("typo_fixer", TypoFixer()),
    ("imputer", SimpleImputer(strategy="constant", fill_value="Unknown")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

cat_pipeline_catboost = Pipeline([
    ("typo fixer", TypoFixer()),
    ("imputer", SimpleImputer(strategy="constant", fill_value="Unknown"))
])

# Binary pipeline
bin_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent"))
])


# Full Preprocessing Pipeline
preprocessor = Pipeline([
    ("currency_converter", CurrencyConverter()),
    ("transformer", ColumnTransformer([
        ("num", num_pipeline, num_features),
        ("cat", cat_pipeline, cat_features),
        ("bin", bin_pipeline, bin_features)
    ]))
])

preprocessor_catboost = Pipeline([
    ("currency_converter", CurrencyConverter()),
    ("transformer", ColumnTransformer([
        ("num", num_pipeline, num_features),
        ("cat", cat_pipeline_catboost, cat_features),
        ("bin", bin_pipeline, bin_features)
    ]))
])

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.pipeline import Pipeline

# -----------------------------
# Parameter Space
# -----------------------------
param_dist = {
    'classifier__n_estimators': [100, 200, 300],
    'classifier__max_depth': [None, 10],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# -----------------------------
# Base Model
# -----------------------------
rf = RandomForestClassifier(
    class_weight='balanced',
    random_state=42,
    n_jobs=-1,
    oob_score=True
)

# -----------------------------
# Pipeline
# -----------------------------
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', rf)
])

# -----------------------------
# Randomized Search
# -----------------------------
cv = StratifiedKFold(
    n_splits=5,
    shuffle=True,
    random_state=42
)

random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=15,
    scoring='f1',
    cv=cv,
    n_jobs=-1,
    verbose=2,
    random_state=42,
    refit=True   # refit best model automatically
)

# -----------------------------
# Train
# -----------------------------
random_search.fit(X_train, y_train)

# -----------------------------
# Best Model & Parameters
# -----------------------------
best_model_rf = random_search.best_estimator_

print("\nBest F1 Score (CV):", random_search.best_score_)
print("\nBest Parameters:")
for k, v in random_search.best_params_.items():
    print(f"{k}: {v}")

# -----------------------------
# Train and Test Evaluation
# -----------------------------

y_pred = best_model_rf.predict(X_train)
print("\ntrain f1 score:", f1_score(y_train, y_pred))

y_pred = best_model_rf.predict(X_test)
print("test f1 score:", f1_score(y_test, y_pred))
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred))

print("""\nConfusion Matrix [[TN FP]
                  [FN TP]]:""")
print(confusion_matrix(y_test, y_pred))

# -----------------------------
# OOB Score (Only if bootstrap=True)
# -----------------------------
rf_best = best_model_rf.named_steps['classifier']

if rf_best.bootstrap:
    print("\nOOB Score:", rf_best.oob_score_)
else:
    print("\nOOB Score: Not available (bootstrap=False)")


Fitting 5 folds for each of 15 candidates, totalling 75 fits

Best F1 Score (CV): 0.8474077348318468

Best Parameters:
classifier__n_estimators: 200
classifier__min_samples_split: 5
classifier__min_samples_leaf: 4
classifier__max_depth: 10

train f1 score: 0.8512396694214877
test f1 score: 0.8133704735376045

Confusion Matrix [[TN FP]
                  [FN TP]]:
[[19787    11]
 [   56   146]]

OOB Score: 0.99725


| Pattern           | Meaning              |
| ----------------- | -------------------- |
| Train ≫ CV ≈ Test | Overfitting          |
| Train ≈ CV ≈ Test | Healthy              |
| CV ≫ Test         | Data leakage / shift |


In [4]:
rf = RandomForestClassifier(n_estimators=200, min_samples_split=5, min_samples_leaf=4, max_depth=10,
                            random_state=42, class_weight='balanced', n_jobs=-1, oob_score=True)

rfp = Pipeline([
    ('preprocessor', preprocessor),
    ('model', rf)
])

rfp.fit(X_train, y_train)

y_pred = rfp.predict(X_train)
print("train f1 score:", f1_score(y_train, y_pred))

y_prob = rfp.predict_proba(X_test)[:,1]
y_pred = (y_prob >= 0.36).astype(int)     # best threshold = 0.36
print("test f1 score:", f1_score(y_test, y_pred))

print("\nconfusion_matrix:\n", confusion_matrix(y_test,y_pred))

train f1 score: 0.8512396694214877
test f1 score: 0.8133704735376045

confusion_matrix:
 [[19787    11]
 [   56   146]]


In [5]:
y_prob = rfp.predict_proba(X_test)[:,1]
l=-np.inf
for i in range(100):
    y_pred = (y_prob >= 0.01*i).astype(int)
    if l<f1_score(y_test, y_pred):
        l=f1_score(y_test, y_pred)
        n=0.01*i

print(f"\ntest f1 score for threshold {n}: {l}")
# print("correspondin confusion_matrix:\n",confusion_matrix(y_test, y_pred))


test f1 score for threshold 0.36: 0.8133704735376045


In [6]:
# # Threshold Tuning

# from sklearn.model_selection import train_test_split

# X_tr, X_val, y_tr, y_val = train_test_split(
#     X_train, y_train,
#     test_size=0.2,
#     stratify=y_train,
#     random_state=42
# )

# best_model_rf.fit(X_tr, y_tr)
# y_val_probs = best_model_rf.predict_proba(X_val)[:, 1]

# from sklearn.metrics import precision_recall_curve
# import numpy as np

# precision, recall, thresholds = precision_recall_curve(y_val, y_val_probs)

# f1_scores = 2 * (precision * recall) / (precision + recall + 1e-9)
# best_idx = np.argmax(f1_scores)

# best_threshold = thresholds[best_idx]
# best_f1 = f1_scores[best_idx]

# print("Best threshold:", best_threshold)
# print("Validation F1:", best_f1)

# y_train_probs = best_model_rf.predict_proba(X_train)[:, 1]
# y_train_pred = (y_train_probs >= best_threshold).astype(int)

# y_test_probs = best_model_rf.predict_proba(X_test)[:, 1]
# y_test_pred = (y_test_probs >= best_threshold).astype(int)

# from sklearn.metrics import f1_score
# print("\nTrain F1:", f1_score(y_train, y_train_pred))
# print("Test F1:", f1_score(y_test, y_test_pred))


# # Output:
# # Best threshold: 0.7614023318402273
# # Validation F1: 0.8591065287152962

# # Train F1: 0.8498269896193772
# # Test F1: 0.9870967741935484

In [7]:
from catboost import CatBoostClassifier
from sklearn.model_selection import ParameterSampler, StratifiedKFold
from sklearn.metrics import f1_score
import numpy as np

param_grid_catboost = {
    "iterations": [200, 300, 500, 800],
    "depth": [4, 6, 8, 10],
    "learning_rate": [0.03, 0.05, 0.01, 0.1],
    "l2_leaf_reg": [1, 3, 5]
}

best_score = -np.inf
best_model_cb = None

# transform training data
X_train_cb = preprocessor_catboost.fit_transform(X_train)
feature_names = preprocessor_catboost.named_steps["transformer"].get_feature_names_out()
X_train_df = pd.DataFrame(X_train_cb, columns=feature_names)

# identify categorical features for CatBoost
catboost_features = [col for col in X_train_df.columns.tolist() if col.startswith("cat_")]
catboost_features_idx = [X_train_df.columns.get_loc(col) for col in catboost_features]

# transformm testing data
X_test_cb = preprocessor_catboost.fit_transform(X_test)

# stratified k-fold cv
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for params in ParameterSampler(param_grid_catboost, n_iter=5, random_state=42):
    
    cv_scores = []

    for train_index, val_index in skf.split(X_train_df, y_train):
        X_train_cv, X_val_cv = X_train_df.iloc[train_index], X_train_df.iloc[val_index]
        y_train_cv, y_val_cv = y_train.iloc[train_index], y_train.iloc[val_index]

    model = CatBoostClassifier(
        **params,
        loss_function="Logloss",
        eval_metric="F1",
        auto_class_weights='SqrtBalanced',
        random_state=42,
        verbose=0,
        cat_features=catboost_features_idx
    )

    model.fit(X_train_cv, y_train_cv)
    y_pred = model.predict(X_val_cv)

    cv_scores.append(f1_score(y_val_cv, y_pred))

    if np.mean(cv_scores) > best_score:
        best_score = np.mean(cv_scores)
        best_model_cb = model

print("Best CatBoost Model:")
print("Best CV F1:", best_score)
print("\nBest params:", best_model_cb.get_params())

# -----------------------------
# Train and Test Evaluation
# -----------------------------

y_pred = best_model_cb.predict(X_train_cb)
print("\ntrain f1 score:", f1_score(y_train, y_pred))

y_pred = best_model_cb.predict(X_test_cb)
print("test f1 score:", f1_score(y_test, y_pred))

print("""\nConfusion Matrix [[TN FP]
                  [FN TP]]:""")
print(confusion_matrix(y_test, y_pred))

Best CatBoost Model:
Best CV F1: 0.8610169491525423

Best params: {'iterations': 800, 'learning_rate': 0.05, 'depth': 4, 'l2_leaf_reg': 5, 'loss_function': 'Logloss', 'verbose': 0, 'auto_class_weights': 'SqrtBalanced', 'eval_metric': 'F1', 'random_state': 42, 'cat_features': [8, 9, 10, 11, 12]}

train f1 score: 0.8557758031442242
test f1 score: 0.8133704735376045

Confusion Matrix [[TN FP]
                  [FN TP]]:
[[19787    11]
 [   56   146]]


In [8]:
# # Extracting categorical features for catboost

# X_train_cb = preprocessor_catboost.fit_transform(X_train)
# feature_names = preprocessor_catboost.named_steps["transformer"].get_feature_names_out()
# X_train_df = pd.DataFrame(X_train_cb, columns=feature_names)
# catboost_features = [col for col in X_train_df.columns.tolist() if col.startswith("cat_")]
# catboost_features_idx=[X_train_df.columns.get_loc(col) for col in catboost_features]


In [9]:
cbp= CatBoostClassifier(
    iterations=800,
    learning_rate= 0.05, 
    depth= 4, 
    l2_leaf_reg= 5, 
    loss_function='Logloss', 
    verbose= 0, 
    auto_class_weights= 'SqrtBalanced', 
    eval_metric= 'F1', 
    random_state= 42, 
    cat_features= [8, 9, 10, 11, 12]
)

cbp.fit(X_train_cb, y_train)

y_pred = cbp.predict(X_train_cb)
print("train f1 score:", f1_score(y_train, y_pred))

y_prob = cbp.predict_proba(X_test_cb)[:,1]
y_pred = (y_prob >= 0.45).astype(int)         # best threshold belongs to [0.4,0.5]
print("test f1 score:", f1_score(y_test, y_pred))

print("\nconfusion_matrix:\n",confusion_matrix(y_test, y_pred))

train f1 score: 0.8577291381668947
test f1 score: 0.8133704735376045

confusion_matrix:
 [[19787    11]
 [   56   146]]


In [10]:
y_prob = cbp.predict_proba(X_test_cb)[:,1]
for i in range(40,51):
    y_pred = (y_prob >= 0.01*i).astype(int)
    print(f"\ntest f1 score for threshold {0.01*i:.2f}: {f1_score(y_test, y_pred)}")
    print("correspondin confusion_matrix:\n",confusion_matrix(y_test, y_pred))


test f1 score for threshold 0.40: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.41: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.42: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.43: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.44: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.45: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.46: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.47: 0.8133704735376045
correspondin confusion_matrix:
 [[19787    11]
 [   56   146]]

test f1 score for threshold 0.48: 0.8133704735376045
correspond

In [11]:
# from catboost import CatBoostClassifier
# from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
# from sklearn.pipeline import Pipeline
# from sklearn.metrics import confusion_matrix, f1_score

# # -----------------------------
# # Identify categorical features
# # -----------------------------
# X_trf = preprocessor_catboost.fit_transform(X_train)
# feature_names = preprocessor_catboost.named_steps["transformer"].get_feature_names_out()
# X_train_cb = pd.DataFrame(X_trf, columns=feature_names)
# cat_features = [col for col in X_train_cb.columns.tolist() if col.startswith("cat_")]
# cat_features_idx=[X_train_cb.columns.get_loc(col) for col in catboost_features]

# # -----------------------------
# # Transform X_test
# # -----------------------------

# X_test_transformed = preprocessor_catboost.transform(X_test)

# # -----------------------------
# # Base Model
# # -----------------------------
# cb = CatBoostClassifier(
#     loss_function='Logloss',
#     eval_metric='F1',
#     auto_class_weights='SqrtBalanced',
#     random_seed=42,
#     verbose=0,
#     cat_features=cat_features_idx,
#     allow_writing_files=False
# )

# # -----------------------------
# # Parameter Space
# # -----------------------------
# param_dist = {
#     'classifier__depth': [4, 6, 8, 10],
#     'classifier__learning_rate': [0.03, 0.05, 0.01, 0.1],
#     'classifier__iterations': [200, 300, 500, 800],
#     'classifier__l2_leaf_reg': [3, 5, 10],
#     'classifier__border_count': [64, 128]
# }

# # -----------------------------
# # CV Strategy
# # -----------------------------
# cv = StratifiedKFold(
#     n_splits=5,
#     shuffle=True,
#     random_state=42
# )

# # -----------------------------
# # Randomized Search
# # -----------------------------
# random_search = RandomizedSearchCV(
#     estimator=cb,
#     param_distributions=param_dist,
#     n_iter=15,
#     scoring='f1',
#     cv=cv,
#     n_jobs=-1,
#     random_state=42,
#     verbose=2,
#     refit=True
# )

# # -----------------------------
# # Train
# # -----------------------------
# random_search.fit(X_train, y_train)

# # -----------------------------
# # Best Model & Parameters
# # -----------------------------
# best_model_cb = random_search.best_estimator_

# print("\nBest F1 Score (CV):", random_search.best_score_)
# print("\nBest Parameters:")
# for k, v in random_search.best_params_.items():
#     print(f"{k}: {v}")

# # -----------------------------
# # Train & Test Evaluation
# # -----------------------------
# y_train_pred = best_model_cb.predict(X_train)
# print("\nTraining F1 Score:", f1_score(y_train, y_train_pred))

# y_test_pred = best_model_cb.predict(X_test)
# print("Test F1 Score:", f1_score(y_test, y_test_pred))

# print("\nConfusion Matrix [[TN FP]\n [FN TP]]:")
# print(confusion_matrix(y_test, y_test_pred))


In [12]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, f1_score

# -----------------------------
# Parameter Space
# -----------------------------
param_dist = {
    'classifier__n_estimators': [200, 300],
    'classifier__max_depth': [3, 4, 5],
    'classifier__learning_rate': [0.02, 0.03],
    'classifier__subsample': [0.6, 0.7, 0.8],
    'classifier__colsample_bytree': [0.6, 0.7],
    'classifier__min_child_weight': [5, 10, 20],
    'classifier__gamma': [0.1, 0.3, 0.5],
    'classifier__reg_lambda': [1, 5, 10],
    'classifier__reg_alpha': [0, 1, 5]
}

# -----------------------------
# Base Model
# -----------------------------
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()

xgb = XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',     # F1 is used in CV, not here
    random_state=42,
    n_jobs=-1,
    tree_method='hist',
    scale_pos_weight=scale_pos_weight,
    use_label_encoder=False
)


# -----------------------------
# Pipeline
# -----------------------------
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', xgb)
])


# -----------------------------
# Randomized Search
# -----------------------------
cv = StratifiedKFold(
    n_splits=5,
    shuffle=True,
    random_state=42
)

random_search = RandomizedSearchCV(
    estimator=pipeline,
    param_distributions=param_dist,
    n_iter=15,
    scoring='f1',
    cv=cv,
    n_jobs=-1,
    verbose=2,
    random_state=42,
    refit=True
)

# -----------------------------
# Train
# -----------------------------
random_search.fit(X_train, y_train)

# -----------------------------
# Best Model & Parameters
# -----------------------------
best_model_xgb = random_search.best_estimator_

print("\nBest F1 Score (CV):", random_search.best_score_)
print("\nBest Parameters:")
for k, v in random_search.best_params_.items():
    print(f"{k}: {v}")

# -----------------------------
# Train and Test Evaluation
# -----------------------------

# Train
y_train_pred = best_model_xgb.predict(X_train)
print("\nTrain F1 Score:", f1_score(y_train, y_train_pred))

# Test
y_test_pred = best_model_xgb.predict(X_test)
print("Test F1 Score:", f1_score(y_test, y_test_pred))

print("""\nConfusion Matrix [[TN FP]
                  [FN TP]]:""")
print(confusion_matrix(y_test, y_test_pred))


Fitting 5 folds for each of 15 candidates, totalling 75 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Best F1 Score (CV): 0.8280663013851912

Best Parameters:
classifier__subsample: 0.7
classifier__reg_lambda: 10
classifier__reg_alpha: 0
classifier__n_estimators: 200
classifier__min_child_weight: 5
classifier__max_depth: 5
classifier__learning_rate: 0.02
classifier__gamma: 0.5
classifier__colsample_bytree: 0.7

Train F1 Score: 0.8354600402955004
Test F1 Score: 0.7956403269754768

Confusion Matrix [[TN FP]
                  [FN TP]]:
[[19779    19]
 [   56   146]]


In [13]:
xgb = XGBClassifier(
    objective='binary:logistic', eval_metric='logloss', random_state=42, n_jobs=-1, tree_method='hist', scale_pos_weight=scale_pos_weight,
    use_label_encoder=False, 
    subsample= 0.7,
    reg_lambda= 10,
    reg_alpha= 0,
    n_estimators= 200,
    min_child_weight= 5,
    max_depth= 5,
    learning_rate= 0.02,
    gamma= 0.5,
    colsample_bytree= 0.7
)

xgbp = Pipeline([
    ('preprocessor', preprocessor),
    ('model', xgb)
])

xgbp.fit(X_train, y_train)

y_pred = xgbp.predict(X_train)
print("train f1 score:", f1_score(y_train, y_pred))

y_prob = xgbp.predict_proba(X_test)[:,1]
y_pred = (y_prob >= 0.54).astype(int)    # best_threshold = 0.54
print("test f1 score:", f1_score(y_test, y_pred))

print("\nconfusion_matrix:\n", confusion_matrix(y_test,y_pred))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


train f1 score: 0.8354600402955004
test f1 score: 0.8021978021978022

confusion_matrix:
 [[19782    16]
 [   56   146]]


In [14]:
y_prob = xgbp.predict_proba(X_test)[:,1]
for i in range(45,60):
    y_pred = (y_prob >= 0.01*i).astype(int)
    print(f"\ntest f1 score for threshold {0.01*i:.2f}: {f1_score(y_test, y_pred)}")
    print("correspondin confusion_matrix:\n",confusion_matrix(y_test, y_pred))


test f1 score for threshold 0.45: 0.7913279132791328
correspondin confusion_matrix:
 [[19777    21]
 [   56   146]]

test f1 score for threshold 0.46: 0.7934782608695652
correspondin confusion_matrix:
 [[19778    20]
 [   56   146]]

test f1 score for threshold 0.47: 0.7956403269754768
correspondin confusion_matrix:
 [[19779    19]
 [   56   146]]

test f1 score for threshold 0.48: 0.7956403269754768
correspondin confusion_matrix:
 [[19779    19]
 [   56   146]]

test f1 score for threshold 0.49: 0.7956403269754768
correspondin confusion_matrix:
 [[19779    19]
 [   56   146]]

test f1 score for threshold 0.50: 0.7956403269754768
correspondin confusion_matrix:
 [[19779    19]
 [   56   146]]

test f1 score for threshold 0.51: 0.7956403269754768
correspondin confusion_matrix:
 [[19779    19]
 [   56   146]]

test f1 score for threshold 0.52: 0.8
correspondin confusion_matrix:
 [[19781    17]
 [   56   146]]

test f1 score for threshold 0.53: 0.8
correspondin confusion_matrix:
 [[19781 