In [1]:
import pandas as pd
import xgboost
from sklearn.model_selection import train_test_split
import random

In [2]:
df = pd.read_csv('./../creditRisk_creditCardFraud/data_fraude.csv')

In [3]:
# leer datos y pre-process
df = df.rename(columns={'0.1':'Class'}).\
drop('Unnamed: 0',axis=1)

yvar = df.Class
xvars = df[[x for x in df.columns if x != 'Class']]

In [4]:
# dividir train / test para variables independientes (x)  y dependiente (y)
xtrain, xtest, ytrain, ytest = train_test_split(xvars, yvar, train_size = 0.80, random_state = 2)

In [33]:
# crear matrices de xgboost

trainXGB = xgboost.DMatrix(data=xtrain,
                           label=ytrain)

testXGB = xgboost.DMatrix(data=xtest,
                          label=ytest)

In [93]:
def findXGB(trainXGB,
           testXGB,
           iters):

    best_metric = 0
    best_params = {}

    for iteration in range(iters):

        params = {
                'tree_method' : 'exact',
                'booster' : 'gbtree', # 'gblinear'
                'eta' : random.uniform(0.01, 0.3),
                'max_depth' : random.randint(5,14),
                'reg_lambda' : random.uniform(0.01, 0.4),
                'reg_alpha' : random.uniform(0.01, 0.4),
                'gamma' : random.randint(0, 20),
                'subsample' : random.uniform(0.5, 1),
                'colsample_bytree' : random.uniform(0.5, 1),
                'objective' : 'binary:logistic',
                'eval_metric' : 'auc'
            }

        xgb_cv = xgboost.cv(
            params = params, 
            dtrain = trainXGB, 
               nfold=10,
               metrics={'auc'}, 
               seed=0,
               callbacks=[xgboost.callback.EvaluationMonitor(show_stdv=True),
                          xgboost.callback.EarlyStopping(2)])

        auc = xgb_cv.iloc[-1,2]

        if auc > best_metric:

            best_metric = auc
            best_params = params

    final_model = xgboost.XGBClassifier( 
        eval_metric='auc',
        early_stopping_rounds=2,
        n_estimators=1000000)

    final_model.set_params(**best_params)

    final_model.fit(
        X=xtrain,
        y=ytrain,
        eval_set = [(xtrain, ytrain)]
    )
    
    return final_model

In [94]:
findXGB(trainXGB, testXGB, iters=10)

[0]	train-auc:0.96928+0.00685	test-auc:0.96678+0.01198
[1]	train-auc:0.97750+0.00433	test-auc:0.97291+0.01075
[2]	train-auc:0.98084+0.00506	test-auc:0.97578+0.01140
[3]	train-auc:0.98338+0.00439	test-auc:0.97876+0.00930
[4]	train-auc:0.98622+0.00192	test-auc:0.98023+0.01049
[5]	train-auc:0.98725+0.00136	test-auc:0.98089+0.01047
[6]	train-auc:0.98818+0.00147	test-auc:0.98153+0.01121
[7]	train-auc:0.98839+0.00139	test-auc:0.98175+0.01115
[8]	train-auc:0.98898+0.00154	test-auc:0.98279+0.01057
[9]	train-auc:0.98921+0.00141	test-auc:0.98328+0.00977
[0]	train-auc:0.98442+0.00124	test-auc:0.97315+0.01043
[1]	train-auc:0.99078+0.00179	test-auc:0.98223+0.00812
[2]	train-auc:0.99241+0.00130	test-auc:0.98585+0.00715
[3]	train-auc:0.99328+0.00133	test-auc:0.98799+0.00602
[4]	train-auc:0.99428+0.00152	test-auc:0.98917+0.00636
[5]	train-auc:0.99507+0.00172	test-auc:0.98907+0.00716
[6]	train-auc:0.99553+0.00172	test-auc:0.99011+0.00657
[7]	train-auc:0.99569+0.00182	test-auc:0.98999+0.00702
[8]	train-