# IEEE Fraud Detection Using Catboost
The below model is based on catboost

In [1]:
import pandas as pd
import numpy as np

from catboost import CatBoostClassifier, Pool, cv
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score, f1_score, matthews_corrcoef
import seaborn as sns

import datetime, random, sys

import hyperopt
from numpy.random import RandomState

sys.path.append("../src/python")
from data.utils import get_lbo_pools, gen_seeds, get_catboost_pools

from data.preprocessor import *

In [2]:
pd.options.display.max_columns = None
!jupyter nbextension enable --py widgetsnbextension
DATA_DIR='../data/raw'

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


# Data

In [None]:
df_train, df_test = preprocess(DATA_DIR)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [None]:
df_train.shape

In [None]:
df_train.head()

## LBO

and now create datasets using a "leave block out (LBO)"" split

In [None]:
X, y, X_valid, y_valid = get_lbo_pools(df_train)
train_pool, validate_pool = get_catboost_pools(X, y, X_valid, y_valid)

First lets check the target split

# Hyper Param Tuning

In [None]:
def hyperopt_objective(params):
    print('Params: '+str(params))
    model = CatBoostClassifier(
        l2_leaf_reg=int(params['l2_leaf_reg']),
        learning_rate=params['learning_rate'],
        loss_function=str(params['loss_function']),
        iterations=500,
        eval_metric=str(params['eval_metric']),
        random_seed=42,
        logging_level='Silent',
        custom_metric=['F1','MCC'],
        use_best_model=True,
        #'early_stopping_rounds': 30,
        od_type= 'Iter',
        od_wait= 40,
        depth=int(params['depth'])
    )
    
    model.fit(
        train_pool,
        eval_set=validate_pool    
    );

    # Get results and scores
    preds = model.predict(X_valid)
    acc_score = accuracy_score(y_valid, preds)
    auc_score = roc_auc_score(y_valid, preds)
    f1 = f1_score(y_valid, preds.round())
    mcc = matthews_corrcoef(y_valid, preds.round())
    print("Accuracy score: %s, AUC: %s, F1: %s, MCC: %s" % (acc_score, auc_score,f1, mcc))
    
    return 1 - auc_score # as hyperopt minimises

In [None]:
params_space = {
    'l2_leaf_reg': hyperopt.hp.qloguniform('l2_leaf_reg', 0, 2, 1),
    'learning_rate': hyperopt.hp.uniform('learning_rate', 1e-3, 5e-1),
    #'eval_metric': hyperopt.hp.choice('eval_metric',['F1', 'MCC', 'Accuracy'])
    'loss_function': hyperopt.hp.choice('loss_function',['CrossEntropy','Logloss']),
    'eval_metric': hyperopt.hp.choice('eval_metric',['MCC','AUC']),
    'depth': hyperopt.hp.quniform('depth', 4,10,1)
    
}

trials = hyperopt.Trials()

best = hyperopt.fmin(
    hyperopt_objective,
    space=params_space,
    algo=hyperopt.tpe.suggest,
    max_evals=50,
    trials=trials,
    rstate=RandomState(123)
)

print(best)