In [1]:
import pandas as pd
import xgboost
from sklearn.model_selection import train_test_split
import random

In [2]:
df = pd.read_csv('./../creditRisk_creditCardFraud/data_fraude.csv')

In [3]:
# leer datos y pre-process
df = df.rename(columns={'0.1':'Class'}).\
drop('Unnamed: 0',axis=1)

yvar = df.Class
xvars = df[[x for x in df.columns if x != 'Class']]

In [4]:
# dividir train / test para variables independientes (x)  y dependiente (y)
xtrain, xtest, ytrain, ytest = train_test_split(xvars, yvar, train_size = 0.80, random_state = 2)

In [33]:
# crear matrices de xgboost

trainXGB = xgboost.DMatrix(data=xtrain,
                           label=ytrain)

testXGB = xgboost.DMatrix(data=xtest,
                          label=ytest)

In [93]:
def findXGB(trainXGB,
           testXGB,
           iters):

    best_metric = 0
    best_params = {}

    for iteration in range(iters):

        params = {
                'tree_method' : 'exact',
                'booster' : 'gbtree', # 'gblinear'
                'eta' : random.uniform(0.01, 0.3),
                'max_depth' : random.randint(5,14),
                'reg_lambda' : random.uniform(0.01, 0.4),
                'reg_alpha' : random.uniform(0.01, 0.4),
                'gamma' : random.randint(0, 20),
                'subsample' : random.uniform(0.5, 1),
                'colsample_bytree' : random.uniform(0.5, 1),
                'objective' : 'binary:logistic',
                'eval_metric' : 'auc'
            }

        xgb_cv = xgboost.cv(
            params = params, 
            dtrain = trainXGB, 
               nfold=10,
               metrics={'auc'}, 
               seed=0,
               callbacks=[xgboost.callback.EvaluationMonitor(show_stdv=True),
                          xgboost.callback.EarlyStopping(2)])

        auc = xgb_cv.iloc[-1,2]

        if auc > best_metric:

            best_metric = auc
            best_params = params

    final_model = xgboost.XGBClassifier( 
        eval_metric='auc',
        early_stopping_rounds=2,
        n_estimators=1000000)

    final_model.set_params(**best_params)

    final_model.fit(
        X=xtrain,
        y=ytrain,
        eval_set = [(xtrain, ytrain)]
    )
    
    return final_model

In [None]:
findXGB(trainXGB, testXGB, iters=10)