In [2]:
import os
import sys
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score
from sklearn.metrics import make_scorer, precision_recall_curve, auc
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV

from bayes_opt import BayesianOptimization
#from sklearn.experimental import enable_halving_search_cv 
#from sklearn.model_selection import HalvingRandomSearchCV
#from sklearn.model_selection import HalvingGridSearchCV 
from collections import Counter
from imblearn.over_sampling import SMOTE

# Menu

<a name="navegacao"></a>

## 1) [Preparação dos dados](#parte1)
- 1.1 [Leitura base principal](#principal)
- 1.2 [Leitura base mes](#mes)
- 1.3 [Leitura base hora](#hora)
- 1.4 [Merge principal e base mensal](#merge1)
- 1.5 [Merge principal e base hora](#merge2)
- 1.6 [Confere marcação](#marcacao)


## 2 [Salvando as bases de treino](#parte2)
- 2.1 [Salvando base com histórico](#comhist)
- 2.2 [Salvando base sem histórico](#semhist)


<a name="principal"></a>

## 1.1) Leitura base principal


## Leitura dos dados

In [3]:
%%time
#df = pd.read_pickle("perfilpj.pkl")
df = pd.read_csv("BNDES_UNIFICADO.csv", converters={'CNPJ8': str, 'INTERMEDIARIA': str},
                 delimiter=";" , encoding='latin-1')


CPU times: total: 406 ms
Wall time: 391 ms


In [4]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

In [5]:
df.shape

(125516, 27)

In [6]:
df.columns

Index(['CNPJ8', 'ANO', 'EMPRESA_PUBLICA', 'GARANTIA', 'INDIRETA', 'INOVACAO',
       'INSTRUMENTO', 'UF', 'INTERMEDIARIA', 'CUSTO', 'MEDIA_JUROS',
       'PRAZO_AMORTIZACAO', 'TESOURO', 'PRAZO_CARENCIA', 'VALOR_CONTRATO',
       'VALOR_DESENBOLSO', 'Porte_Cliente', 'CAPITAL_SOCIAL', 'IDADE',
       'NATJUR', 'NCONTRATOS', 'NFILIAIS', 'PORTE_RECEITA', 'SITUACAO',
       'IDADE_SOCIOS', 'QTDSOCIOS', 'SOCIO_PJ'],
      dtype='object')

In [7]:
%%time
#df.filter(regex='spl_idade', axis=1).describe()
df.head()

CPU times: total: 0 ns
Wall time: 0 ns


Unnamed: 0,CNPJ8,ANO,EMPRESA_PUBLICA,GARANTIA,INDIRETA,INOVACAO,INSTRUMENTO,UF,INTERMEDIARIA,CUSTO,...,CAPITAL_SOCIAL,IDADE,NATJUR,NCONTRATOS,NFILIAIS,PORTE_RECEITA,SITUACAO,IDADE_SOCIOS,QTDSOCIOS,SOCIO_PJ
0,0,2002,0,OUTROS,0,1,OUTROS,IE,0.0,TJLP,...,90000020000.0,37.0,2.0,15,5089,5.0,1,1.0,1,0
1,0,2003,0,OUTROS,0,0,OUTROS,IE,92816560.0,TJLP,...,90000020000.0,38.0,2.0,2,5123,5.0,1,1.0,1,0
2,0,2009,1,SEM GARANTIA,0,0,OUTROS,RJ,,TAXAFIXA,...,90000020000.0,44.0,2.0,5,6912,5.0,1,1.0,1,0
3,0,2010,1,SEM GARANTIA,0,0,OUTROS,RJ,,TAXAFIXA,...,90000020000.0,45.0,2.0,2,7002,5.0,1,1.0,1,0
4,0,2012,1,MISTA,0,0,OUTROS,IE,,TJLP,...,90000020000.0,47.0,2.0,1,7407,5.0,1,1.0,1,0


In [8]:
%%time
df.dtypes

CPU times: total: 0 ns
Wall time: 0 ns


CNPJ8                 object
ANO                    int64
EMPRESA_PUBLICA        int64
GARANTIA              object
INDIRETA               int64
INOVACAO               int64
INSTRUMENTO           object
UF                    object
INTERMEDIARIA         object
CUSTO                 object
MEDIA_JUROS          float64
PRAZO_AMORTIZACAO    float64
TESOURO                int64
PRAZO_CARENCIA       float64
VALOR_CONTRATO       float64
VALOR_DESENBOLSO     float64
Porte_Cliente         object
CAPITAL_SOCIAL       float64
IDADE                float64
NATJUR               float64
NCONTRATOS             int64
NFILIAIS               int64
PORTE_RECEITA        float64
SITUACAO               int64
IDADE_SOCIOS         float64
QTDSOCIOS              int64
SOCIO_PJ               int64
dtype: object

In [9]:
pd.crosstab(df.PORTE_RECEITA , df.SITUACAO)

SITUACAO,0,1
PORTE_RECEITA,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,72792,2639
3.0,22618,1864
5.0,21244,4359


In [10]:
%%time
#df[['SITUACAO','CUSTO']].groupby('FRAUDES').count()
pd.crosstab(df.CUSTO, df.SITUACAO )

CPU times: total: 31.2 ms
Wall time: 32 ms


SITUACAO,0,1
CUSTO,Unnamed: 1_level_1,Unnamed: 2_level_1
CDI,14,2
IPCA,48,8
OUTROS,248,119
SELIC,28761,1823
TAXAFIXA,30327,1840
TJLP,20437,2296
TLP,36819,2774


In [11]:
%%time
pd.crosstab(df.NATJUR, df.SITUACAO)

CPU times: total: 15.6 ms
Wall time: 24 ms


SITUACAO,0,1
NATJUR,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,184,49
2.0,116254,8786
3.0,187,21
4.0,29,6


In [12]:
pd.crosstab(df.EMPRESA_PUBLICA, df.SITUACAO)

SITUACAO,0,1
EMPRESA_PUBLICA,Unnamed: 1_level_1,Unnamed: 2_level_1
0,116375,8770
1,279,92


In [13]:
pd.crosstab(df.INDIRETA, df.SITUACAO)

SITUACAO,0,1
INDIRETA,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2711,792
1,113943,8070


In [14]:
pd.crosstab(df.INOVACAO, df.SITUACAO)

SITUACAO,0,1
INOVACAO,Unnamed: 1_level_1,Unnamed: 2_level_1
0,116310,8730
1,344,132


In [15]:
pd.crosstab(df.TESOURO, df.SITUACAO)

SITUACAO,0,1
TESOURO,Unnamed: 1_level_1,Unnamed: 2_level_1
0,110155,7886
1,6499,976


In [16]:
pd.crosstab(df.SOCIO_PJ, df.SITUACAO)

SITUACAO,0,1
SOCIO_PJ,Unnamed: 1_level_1,Unnamed: 2_level_1
0,116654,8862


In [17]:
%%time
df.loc[ (df['CAPITAL_SOCIAL']<1)     ,'CAPITAL_SOCIAL']    = 1
df.loc[ (df['IDADE']<1)              ,'IDADE']             = 1
df.loc[ (df['NCONTRATOS']<1)         ,'NCONTRATOS']        = 1
df.loc[ (df['NFILIAIS']<1)           ,'NFILIAIS']          = 1
df.loc[ (df['IDADE_SOCIOS']<1)       ,'IDADE_SOCIOS']      = 1
df.loc[ (df['QTDSOCIOS']<1)          ,'QTDSOCIOS']         = 1
df.loc[ (df['MEDIA_JUROS']<1)        ,'MEDIA_JUROS']       = 1
df.loc[ (df['PRAZO_AMORTIZACAO']<1)  ,'PRAZO_AMORTIZACAO'] = 1
df.loc[ (df['PRAZO_CARENCIA']<1)     ,'PRAZO_CARENCIA']    = 1
df.loc[ (df['VALOR_CONTRATO']<1)     ,'VALOR_CONTRATO']    = 1
df.loc[ (df['VALOR_DESENBOLSO']<1)   ,'VALOR_DESENBOLSO']  = 1


CPU times: total: 31.2 ms
Wall time: 7.99 ms


In [18]:
%%time
df['enc_NATJUR']        = df.NATJUR.astype("category").cat.codes
df['enc_GARANTIA']      = df.GARANTIA.astype("category").cat.codes
df['enc_INSTRUMENTO']   = df.INSTRUMENTO.astype("category").cat.codes
df['enc_CUSTO']         = df.CUSTO.astype("category").cat.codes
df['enc_PORTE_CLIENTE'] = df.Porte_Cliente.astype("category").cat.codes
df['enc_PORTE_RECEITA'] = df.PORTE_RECEITA.astype("category").cat.codes
df['enc_SITUACAO']      = df.SITUACAO.astype("category").cat.codes
df['enc_UF']            = df.UF.astype("category").cat.codes

CPU times: total: 62.5 ms
Wall time: 56 ms


In [19]:
%%time
df['ln_capsoc']        = np.log(df['CAPITAL_SOCIAL']+1)
df['ln_idade']         = np.log(df['IDADE']+1)
df['ln_contratos']     = np.log(df['NCONTRATOS']+1)
df['ln_filiais']       = np.log(df['NFILIAIS']+1)
df['ln_sociosage']     = np.log(df['IDADE_SOCIOS']+1)
df['ln_qtdsocios']     = np.log(df['QTDSOCIOS']+1)
df['ln_juros']         = np.log(df['MEDIA_JUROS']+1)
df['ln_amortizacao']   = np.log(df['PRAZO_AMORTIZACAO']+1)
df['ln_carencia']      = np.log(df['PRAZO_CARENCIA']+1)
df['ln_vlrcontrato']   = np.log(df['VALOR_CONTRATO']+1)
df['ln_vlrdesembolso'] = np.log(df['VALOR_DESENBOLSO']+1)

CPU times: total: 31.2 ms
Wall time: 32 ms


## Modelo sem histórico foi treinado com as variáveis na seguinte ordem:
['faixa_hora', 'vl_medio_mes_atual', 'dif_vl_1', 'tres_prim_dig_codbarras', 'pagador_pf', 'dif_vl_4', 'dia_do_mes', 'qtd_operacoes_mes_corrente', 'vl_medio_dia_corrente', 'sec_dig', 'qtd_operacoes_dia_corrente', 'qtd_trn_60min', 'centavos', 'dia_da_semana']

In [20]:
%%time
files = df.columns
selected_files = files.str.contains('ln_|enc_|INDIRETA|EMPRESA_PUBLICA|INOVACAO|TESOURO|SOCIO_PJ')
atributes = files[selected_files]

CPU times: total: 15.6 ms
Wall time: 8.02 ms


In [21]:
atributes

Index(['EMPRESA_PUBLICA', 'INDIRETA', 'INOVACAO', 'TESOURO', 'SOCIO_PJ',
       'enc_NATJUR', 'enc_GARANTIA', 'enc_INSTRUMENTO', 'enc_CUSTO',
       'enc_PORTE_CLIENTE', 'enc_PORTE_RECEITA', 'enc_SITUACAO', 'enc_UF',
       'ln_capsoc', 'ln_idade', 'ln_contratos', 'ln_filiais', 'ln_sociosage',
       'ln_qtdsocios', 'ln_juros', 'ln_amortizacao', 'ln_carencia',
       'ln_vlrcontrato', 'ln_vlrdesembolso'],
      dtype='object')

In [22]:
df[atributes].dtypes

EMPRESA_PUBLICA        int64
INDIRETA               int64
INOVACAO               int64
TESOURO                int64
SOCIO_PJ               int64
enc_NATJUR              int8
enc_GARANTIA            int8
enc_INSTRUMENTO         int8
enc_CUSTO               int8
enc_PORTE_CLIENTE       int8
enc_PORTE_RECEITA       int8
enc_SITUACAO            int8
enc_UF                  int8
ln_capsoc            float64
ln_idade             float64
ln_contratos         float64
ln_filiais           float64
ln_sociosage         float64
ln_qtdsocios         float64
ln_juros             float64
ln_amortizacao       float64
ln_carencia          float64
ln_vlrcontrato       float64
ln_vlrdesembolso     float64
dtype: object

## SMOTE: Synthetic Minority Oversampling Technique

In [23]:
y0 = df['SITUACAO']
X0 = df[atributes]

In [24]:
#import joblib
#scaler = joblib.load("scaler.saved") 

from numpy import asarray
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
# transform data
X0 = scaler.fit_transform(df[atributes])

In [25]:
# base sintética: treino e teste
x_train  , x_test0 , y_train, y_test0 = train_test_split(X0, y0, test_size = 0.5, random_state=123)

# base sintética: teste e out of sample
x_test , x_out , y_test, y_out = train_test_split(x_test0, y_test0, test_size = 0.5, random_state=123)

In [27]:
%%time
from bayes_opt import BayesianOptimization
from sklearn.svm import SVC

lem_range = ['Newton','Gradient']
kr_range = ['uniform','poly','rbf','sigmoid','precomputed']

def gbm_xgb( learning_rate, max_depth, gamma, min_child_weight, subsample, 
             colsample_bytree, colsample_bynode, colsample_bylevel, eta,
             reg_alpha, reg_lambda, n_estimators, max_delta_step):
    model = xgb.XGBClassifier(
                  learning_rate      = learning_rate,
                  max_depth          = int(max_depth),
                  gamma              = gamma,
                  min_child_weight   = int(min_child_weight),
                  subsample          = subsample,
                  colsample_bytree   = int(colsample_bytree),
                  colsample_bynode   = int(colsample_bynode),
                  colsample_bylevel  = int(colsample_bylevel),
                  eta                = eta,
                  reg_alpha          = reg_alpha,
                  reg_lambda         = reg_lambda,
                  n_estimators       = int(n_estimators),
                  max_delta_step     = max_delta_step,
                  eval_metric        = 'Logloss' ,  random_state=666)
    
    model.fit(x_train,y_train, verbose=False)
    return roc_auc_score(x_test, )


params_xgb = {
    'learning_rate'           : (0.0001 ,0.666),
    'max_depth'               : (1.0000 ,12.00),
    'gamma'                   : (0.0001 ,120.0),
    'min_child_weight'        : (0.0001 ,20.00),
    'subsample'               : (0.0001 ,0.666),
    'colsample_bytree'        : (0.0001 ,0.999),
    'colsample_bynode'        : (0.0001 ,0.999),
    'colsample_bylevel'       : (0.0001 ,0.999),
    'eta'                     : (0.0001 ,1.000),
    'reg_alpha'               : (0.0001 ,12.00),
    'reg_lambda'              : (0.0001 ,12.00),
    'n_estimators'            : (1.0000 ,500.0),
    'max_delta_step'          : (0.0001 ,100.0),   
}

xgb0 = BayesianOptimization(f=gbm_xgb, pbounds=params_xgb, random_state=123) 
xgb0.maximize(init_points=30, n_iter=320, acq='ucb')

|   iter    |  target   | colsam... | colsam... | colsam... |    eta    |   gamma   | learni... | max_de... | max_depth | min_ch... | n_esti... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


Passing acquisition function parameters or gaussian process parameters to maximize
is no longer supported, and will cause an error in future releases. Instead,
please use the "set_gp_params" method to set the gp params, and pass an instance
 of bayes_opt.util.UtilityFunction using the acquisition_function argument



XGBoostError: [21:48:36] C:\buildkite-agent\builds\buildkite-windows-cpu-autoscaling-group-i-0fc7796c793e6356f-1\xgboost\xgboost-ci-windows\src\metric\metric.cc:49: Unknown metric function Logloss

In [62]:
params_svm0 = svm0.max['params']
params_svm0

{'bagging_temperature': 69.6469489128676,
 'depth': 4.147532684454174,
 'eval_metric': 0.22692876841884668,
 'iterations': 110.71163904749535,
 'l2_leaf_reg': 8.633655690529778,
 'leaf_estimation_method': 0.4231641494784485,
 'learning_rate': 0.6531908797043154,
 'model_size_reg': 68.48300537551248,
 'rsm': 0.48050287639272815}

In [63]:
depth                   = int(svm0.max['params']['depth'])
iterations              = int(svm0.max['params']['iterations'])
l2_leaf_reg             = svm0.max['params']['l2_leaf_reg']
leaf_estimation_method  = lem_range[int(svm0.max['params']['leaf_estimation_method'])]
learning_rate           = svm0.max['params']['learning_rate']
model_size_reg          = int(svm0.max['params']['model_size_reg'])
rsm                     = svm0.max['params']['rsm']
bagging_temperature     = svm0.max['params']['bagging_temperature']
    

print('\n depth:'                  ,depth,
      '\n iterations:'             ,iterations,
      '\n l2_leaf_reg:'            ,l2_leaf_reg,
      '\n leaf_estimation_method:' ,leaf_estimation_method,
      '\n learning_rate:'          ,learning_rate,
      '\n model_size_reg:'         ,model_size_reg,
      '\n rsm:'                    ,rsm,
      '\n bagging_temperature:'    ,bagging_temperature)


 depth: 4 
 iterations: 110 
 l2_leaf_reg: 8.633655690529778 
 leaf_estimation_method: Newton 
 learning_rate: 0.6531908797043154 
 model_size_reg: 68 
 rsm: 0.48050287639272815 
 bagging_temperature: 69.6469489128676


In [None]:
%%time
from catboost import Pool, CatBoost, CatBoostClassifier

cbbb= CatBoostClassifier( depth=depth, iterations=iterations, l2_leaf_reg=l2_leaf_reg, 
                           leaf_estimation_method=leaf_estimation_method, learning_rate=learning_rate, 
                           model_size_reg=model_size_reg, rsm=rsm, bagging_temperature =bagging_temperature ,  verbose=False)

cbbb.fit(x_train, y_train)

In [65]:
%%time
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
print("Accuracy:" ,accuracy_score(  y_train, cbbb.predict(x_train))) #Accuracy: 0.998776450266302
print("F1 score:" ,f1_score(        y_train, cbbb.predict(x_train))) #F1 score: 0.37065637065637064
print("Recall:"   ,recall_score(    y_train, cbbb.predict(x_train))) #Recall: 0.25
print("Precision:",precision_score( y_train, cbbb.predict(x_train))) #Precision: 0.7164179104477612

Accuracy: 0.9625158269780042
F1 score: 0.9481318502220594
Recall: 0.9173697074824568
Precision: 0.9810286677908938
Wall time: 205 ms


In [66]:
%%time
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
print("Accuracy:" ,accuracy_score(  y_test, cbbb.predict(x_test))) #Accuracy: 0.998776450266302
print("F1 score:" ,f1_score(        y_test, cbbb.predict(x_test))) #F1 score: 0.37065637065637064
print("Recall:"   ,recall_score(    y_test, cbbb.predict(x_test))) #Recall: 0.25
print("Precision:",precision_score( y_test, cbbb.predict(x_test))) #Precision: 0.7164179104477612

Accuracy: 0.9554770318021202
F1 score: 0.93890415387102
Recall: 0.9085079762277135
Precision: 0.9714046822742475
Wall time: 166 ms


In [67]:
%%time
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
print("Accuracy:" ,accuracy_score(  y_out, cbbb.predict(x_out))) #Accuracy: 0.998776450266302
print("F1 score:" ,f1_score(        y_out, cbbb.predict(x_out))) #F1 score: 0.37065637065637064
print("Recall:"   ,recall_score(    y_out, cbbb.predict(x_out))) #Recall: 0.25
print("Precision:",precision_score( y_out, cbbb.predict(x_out))) #Precision: 0.7164179104477612

Accuracy: 0.9527707437724515
F1 score: 0.9349342852506897
Recall: 0.9011573350015639
Precision: 0.971341874578557
Wall time: 96.1 ms


## Leitura dos dados originais

In [72]:
## Dado original
#X0_mm = scaler.transform(df[atributes])
X0_mm = df_all[atributes]

In [73]:
#y0 = df['fraude']
print(X0_mm.shape, df.shape)

(44691, 21) (44691, 202)


In [74]:
%%time
#df_all['proba'] = cbbb.predict_proba(X0_mm)[:,1]
df_all['proba'] = cbbb.predict_proba(df_all[atributes])[:,1]

Wall time: 55.1 ms


In [75]:
df_all[['FRAUDES','proba']].groupby(['FRAUDES']).count()

Unnamed: 0_level_0,proba
FRAUDES,Unnamed: 1_level_1
0.0,42453
1.0,2238


In [76]:
%%time
df_all[['FRAUDES','proba']].groupby(['FRAUDES']).describe()

Wall time: 29.4 ms


Unnamed: 0_level_0,proba,proba,proba,proba,proba,proba,proba,proba
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
FRAUDES,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.0,42453.0,0.066089,0.104925,1.3e-05,0.012068,0.031692,0.07403,0.996629
1.0,2238.0,0.594339,0.414987,0.000937,0.101631,0.84128,0.986915,0.999958


In [129]:
%%time
df_all.loc[ df_all['proba']>=0.15 ,'fraude_pred'] = 1
df_all.loc[ df_all['proba'] <0.15 ,'fraude_pred'] = 0

Wall time: 5.98 ms


<a name="metricas"></a>


# 3) Métricas
  
- ir para [Menu Principal](#navegacao)

<a name="amostra"></a>


## 3.1) Métricas na Amostra
  
- ir para [Menu Principal](#navegacao)

In [130]:
from sklearn.metrics import confusion_matrix
confusao_pop = confusion_matrix(df_all['FRAUDES'], df_all['fraude_pred'])
print("Confusion matrix for test:\n%s" % confusao_pop )

Confusion matrix for test:
[[37964  4489]
 [  662  1576]]


In [131]:
%%time
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

print("Accuracy:" , accuracy_score(df_all['FRAUDES'], df_all['fraude_pred'])) 
print("F1 score:" , f1_score(df_all['FRAUDES'], df_all['fraude_pred'])) 
print("Recall:"   , recall_score(df_all['FRAUDES'], df_all['fraude_pred'])) 
print("Precision:", precision_score(df_all['FRAUDES'], df_all['fraude_pred'])) 

Accuracy: 0.8847418943411425
F1 score: 0.3796218234373118
Recall: 0.7042001787310098
Precision: 0.2598516075845012
Wall time: 73.8 ms


<a name="falsopos"></a>

## 3.2) Taxa de Falso Positivo
  
- ir para [Menu Principal](#navegacao)

In [132]:
%%time
tn, fp, fn, tp = confusao_pop.ravel()
print('FPR:', fp/(fp + tn))
print('TPR:', tp/(tp + fn))

#FPR: 0.10574046592702518
#TPR: 0.7042001787310098

FPR: 0.10574046592702518
TPR: 0.7042001787310098
Wall time: 985 µs


In [30]:
%%time
import xgboost as xgb
space = dict()
space['booster']           = ['gbtree','gblinear','dart']
space['colsample_bylevel'] = [row/50 for row in range(1,100,10)]
#space['colsample_bynode']  = [row/50 for row in range(1,100,10)]
#space['colsample_bytree']  = [row/50 for row in range(1,100,10)]
space['gamma']             = [row/30 for row in range(1,30,2)]
space['importance_type']   = ['gain']
space['learning_rate']     = [row/400 for row in range(1,200,10)]
space['max_delta_step']    = [row/50 for row in range(1,100,5)]
space['max_depth']         = [row for row in range(1,10,2)]
space['min_child_weight']  = [row/1000 for row in range(1,100,20)]
space['reg_lambda']        = [row/1000 for row in range(1,100,20)]
space['reg_alpha']         = [row/1000 for row in range(1,100,20)]
space['subsample']         = [row/200  for row in range(1,100,20)]
space['objective']         = ['reg:linear']

xgboost = xgb.XGBClassifier(n_estimators=100,verbosity=0)
xgboost1= RandomizedSearchCV(xgboost, space ,  cv=3, random_state=1, n_iter=300, n_jobs=20)

xgboost1.fit(x_train,y_train, verbose=False)

312 fits failed out of a total of 900.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\patrick.franco\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\patrick.franco\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\core.py", line 620, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "c:\Users\patrick.franco\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\sklearn.py", line 1490, in fit
    self._Booster = train(
                    ^^^^^^
  File "c:\

CPU times: total: 1min 33s
Wall time: 18min 9s
