# Testing Code

In [25]:
import cudf
import numpy as np

In [26]:
bureau_balance = cudf.read_csv('data/bureau_balance.csv')
bureau = cudf.read_csv('data/bureau.csv')
cc_balance = cudf.read_csv('data/credit_card_balance.csv')
payments = cudf.read_csv('data/installments_payments.csv')
pc_balance = cudf.read_csv('data/POS_CASH_balance.csv')
prev = cudf.read_csv('data/previous_application.csv')
train = cudf.read_csv('data/application_train.csv')
test = cudf.read_csv('data/application_test.csv')

## EDA

In [27]:
payments.columns


Index(['SK_ID_PREV', 'SK_ID_CURR', 'NUM_INSTALMENT_VERSION',
       'NUM_INSTALMENT_NUMBER', 'DAYS_INSTALMENT', 'DAYS_ENTRY_PAYMENT',
       'AMT_INSTALMENT', 'AMT_PAYMENT'],
      dtype='object')

## Feature Engineering Part 1

In [28]:
bureau_balance.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


In [29]:
bureau_balance.query("SK_ID_BUREAU==5715448").head(5)

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


In [30]:
# Lets setup the agg functions that we want to use for numerics
agg_func = ['mean', 'max', 'min', 'sum', 'std']

In [31]:
bureau_balance.dtypes

SK_ID_BUREAU       int64
MONTHS_BALANCE     int64
STATUS            object
dtype: object

In [32]:
avg_bbalance = bureau_balance.groupby('SK_ID_BUREAU') \
            .agg({"MONTHS_BALANCE": agg_func})

In [33]:
cc_balance.dtypes

SK_ID_PREV                      int64
SK_ID_CURR                      int64
MONTHS_BALANCE                  int64
AMT_BALANCE                   float64
AMT_CREDIT_LIMIT_ACTUAL         int64
AMT_DRAWINGS_ATM_CURRENT      float64
AMT_DRAWINGS_CURRENT          float64
AMT_DRAWINGS_OTHER_CURRENT    float64
AMT_DRAWINGS_POS_CURRENT      float64
AMT_INST_MIN_REGULARITY       float64
AMT_PAYMENT_CURRENT           float64
AMT_PAYMENT_TOTAL_CURRENT     float64
AMT_RECEIVABLE_PRINCIPAL      float64
AMT_RECIVABLE                 float64
AMT_TOTAL_RECEIVABLE          float64
CNT_DRAWINGS_ATM_CURRENT      float64
CNT_DRAWINGS_CURRENT            int64
CNT_DRAWINGS_OTHER_CURRENT    float64
CNT_DRAWINGS_POS_CURRENT      float64
CNT_INSTALMENT_MATURE_CUM     float64
NAME_CONTRACT_STATUS           object
SK_DPD                          int64
SK_DPD_DEF                      int64
dtype: object

In [34]:
cc_balance.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,...,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,...,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,...,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,...,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,...,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,...,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


In [35]:
sum_cc_balance = cc_balance.drop('SK_ID_PREV', axis=1).select_dtypes('number').groupby('SK_ID_CURR') \
                .agg(agg_func)
sum_cc_balance.head()

Unnamed: 0_level_0,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,AMT_BALANCE,AMT_BALANCE,AMT_BALANCE,AMT_BALANCE,AMT_BALANCE,...,SK_DPD,SK_DPD,SK_DPD,SK_DPD,SK_DPD,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF
Unnamed: 0_level_1,mean,max,min,sum,std,mean,max,min,sum,std,...,mean,max,min,sum,std,mean,max,min,sum,std
SK_ID_CURR,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
241216,-7.0,-1,-13,-91,3.89444,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0
200574,-5.0,-2,-8,-35,2.160247,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0
100116,-29.0,-2,-56,-1595,16.02082,178584.459545,238200.885,-633.735,9822145.275,77556.01974,...,0.509091,28,0,28,3.775519,0.0,0,0,0,0.0
101868,-14.5,-1,-28,-406,8.225975,52057.501071,232196.535,0.0,1457610.03,85218.701743,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0
220536,-45.5,-1,-90,-4095,26.124701,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0


In [36]:
avg_bureau = bureau.merge(avg_bbalance, how='left', 
                          left_on='SK_ID_BUREAU', 
                          right_index=True)

In [37]:
avg_bureau.columns

Index([              'SK_ID_CURR',             'SK_ID_BUREAU',
                  'CREDIT_ACTIVE',          'CREDIT_CURRENCY',
                    'DAYS_CREDIT',       'CREDIT_DAY_OVERDUE',
            'DAYS_CREDIT_ENDDATE',        'DAYS_ENDDATE_FACT',
         'AMT_CREDIT_MAX_OVERDUE',       'CNT_CREDIT_PROLONG',
                 'AMT_CREDIT_SUM',      'AMT_CREDIT_SUM_DEBT',
           'AMT_CREDIT_SUM_LIMIT',   'AMT_CREDIT_SUM_OVERDUE',
                    'CREDIT_TYPE',       'DAYS_CREDIT_UPDATE',
                    'AMT_ANNUITY', ('MONTHS_BALANCE', 'mean'),
        ('MONTHS_BALANCE', 'max'),  ('MONTHS_BALANCE', 'min'),
        ('MONTHS_BALANCE', 'sum'),  ('MONTHS_BALANCE', 'std')],
      dtype='object')

In [38]:
payments.head(5)

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NUM_INSTALMENT_VERSION,NUM_INSTALMENT_NUMBER,DAYS_INSTALMENT,DAYS_ENTRY_PAYMENT,AMT_INSTALMENT,AMT_PAYMENT
0,1054186,161674,1.0,6,-1180.0,-1187.0,6948.36,6948.36
1,1330831,151639,0.0,34,-2156.0,-2156.0,1716.525,1716.525
2,2085231,193053,2.0,1,-63.0,-63.0,25425.0,25425.0
3,2452527,199697,1.0,3,-2418.0,-2426.0,24350.13,24350.13
4,2714724,167756,1.0,2,-1383.0,-1366.0,2165.04,2160.585


In [39]:
payments.columns

Index(['SK_ID_PREV', 'SK_ID_CURR', 'NUM_INSTALMENT_VERSION',
       'NUM_INSTALMENT_NUMBER', 'DAYS_INSTALMENT', 'DAYS_ENTRY_PAYMENT',
       'AMT_INSTALMENT', 'AMT_PAYMENT'],
      dtype='object')

In [40]:
#payments
sum_payments = payments.drop('SK_ID_PREV', axis=1)
sum_payments['PAYMENT_PERC'] = sum_payments.AMT_PAYMENT / sum_payments.AMT_INSTALMENT
sum_payments['PAYMENT_DIFF'] = sum_payments.AMT_INSTALMENT - sum_payments.AMT_PAYMENT
sum_payments['DPD'] = sum_payments.DAYS_ENTRY_PAYMENT - sum_payments.DAYS_INSTALMENT
sum_payments['DBD'] = sum_payments.DAYS_INSTALMENT - sum_payments.DAYS_ENTRY_PAYMENT
sum_payments['DPD'] = sum_payments['DPD']
sum_payments['DBD'] = sum_payments['DBD']

In [41]:
pc_balance.head(10)

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0
2,1784872,397406,-32,12.0,9.0,Active,0,0
3,1903291,269225,-35,48.0,42.0,Active,0,0
4,2341044,334279,-35,36.0,35.0,Active,0,0
5,2207092,342166,-32,12.0,12.0,Active,0,0
6,1110516,204376,-38,48.0,43.0,Active,0,0
7,1387235,153211,-35,36.0,36.0,Active,0,0
8,1220500,112740,-31,12.0,12.0,Active,0,0
9,2371489,274851,-32,24.0,16.0,Active,0,0


In [42]:
#sum_pc_balance
sum_pc_balance = pc_balance.drop('SK_ID_PREV', axis=1).select_dtypes('number').groupby('SK_ID_CURR') \
            .agg(agg_func)
sum_pc_balance.head(5)    

Unnamed: 0_level_0,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT,CNT_INSTALMENT,CNT_INSTALMENT,CNT_INSTALMENT,...,SK_DPD,SK_DPD,SK_DPD,SK_DPD,SK_DPD,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF,SK_DPD_DEF
Unnamed: 0_level_1,mean,max,min,sum,std,mean,max,min,sum,std,...,mean,max,min,sum,std,mean,max,min,sum,std
SK_ID_CURR,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
429363,-33.270833,-2,-81,-1597,23.86731,14.916667,24.0,2.0,716.0,9.613451,...,0.020833,1,0,1,0.144338,0.020833,1,0,1,0.144338
347431,-11.5,-9,-14,-69,1.870829,5.833333,6.0,5.0,35.0,0.408248,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0
428968,-73.105263,-1,-96,-1389,37.574954,10.842105,12.0,10.0,206.0,1.014515,...,1.052632,10,0,20,2.990238,1.052632,10,0,20,2.990238
379270,-6.791667,-2,-14,-163,3.635324,11.166667,12.0,8.0,268.0,1.167184,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0
133128,-10.0,-2,-18,-170,5.049752,18.0,18.0,18.0,306.0,0.0,...,0.0,0,0,0,0.0,0.0,0,0,0,0.0


In [43]:
prev = prev.drop('SK_ID_PREV', axis=1)
prev.DAYS_FIRST_DRAWING = prev.DAYS_FIRST_DRAWING.map(lambda x: np.nan if x == 365243 else x)
prev.DAYS_FIRST_DUE = prev.DAYS_FIRST_DUE.map(lambda x: np.nan if x == 365243 else x)
prev.DAYS_LAST_DUE_1ST_VERSION = prev.DAYS_LAST_DUE_1ST_VERSION.map(lambda x: np.nan if x == 365243 else x)
prev.DAYS_LAST_DUE = prev.DAYS_LAST_DUE.map(lambda x: np.nan if x == 365243 else x)
prev.DAYS_TERMINATION = prev.DAYS_TERMINATION.map(lambda x: np.nan if x == 365243 else x)
prev.APP_CREDIT_PERC = prev.AMT_APPLICATION / prev.AMT_CREDIT

In [44]:
prev.dtypes

SK_ID_CURR                       int64
NAME_CONTRACT_TYPE              object
AMT_ANNUITY                    float64
AMT_APPLICATION                float64
AMT_CREDIT                     float64
AMT_DOWN_PAYMENT               float64
AMT_GOODS_PRICE                float64
WEEKDAY_APPR_PROCESS_START      object
HOUR_APPR_PROCESS_START          int64
FLAG_LAST_APPL_PER_CONTRACT     object
NFLAG_LAST_APPL_IN_DAY           int64
RATE_DOWN_PAYMENT              float64
RATE_INTEREST_PRIMARY          float64
RATE_INTEREST_PRIVILEGED       float64
NAME_CASH_LOAN_PURPOSE          object
NAME_CONTRACT_STATUS            object
DAYS_DECISION                    int64
NAME_PAYMENT_TYPE               object
CODE_REJECT_REASON              object
NAME_TYPE_SUITE                 object
NAME_CLIENT_TYPE                object
NAME_GOODS_CATEGORY             object
NAME_PORTFOLIO                  object
NAME_PRODUCT_TYPE               object
CHANNEL_TYPE                    object
SELLERPLACE_AREA         

In [45]:
sum_prev = prev.select_dtypes('number').groupby('SK_ID_CURR') \
            .agg(agg_func)
sum_prev.head(10)

Unnamed: 0_level_0,AMT_ANNUITY,AMT_ANNUITY,AMT_ANNUITY,AMT_ANNUITY,AMT_ANNUITY,AMT_APPLICATION,AMT_APPLICATION,AMT_APPLICATION,AMT_APPLICATION,AMT_APPLICATION,...,DAYS_TERMINATION,DAYS_TERMINATION,DAYS_TERMINATION,DAYS_TERMINATION,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL,NFLAG_INSURED_ON_APPROVAL,NFLAG_INSURED_ON_APPROVAL,NFLAG_INSURED_ON_APPROVAL,NFLAG_INSURED_ON_APPROVAL
Unnamed: 0_level_1,mean,max,min,sum,std,mean,max,min,sum,std,...,mean,max,min,sum,std,mean,max,min,sum,std
SK_ID_CURR,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
400998,12368.7,12368.7,12368.7,12368.7,,113760.0,113760.0,113760.0,113760.0,,...,-177.0,-177.0,-177.0,-177.0,,1.0,1.0,1.0,1.0,
168727,11065.242857,15928.785,3758.67,77456.7,4087.426934,54031.909091,123741.0,0.0,594351.0,49654.4741,...,-957.2,-456.0,-1597.0,-4786.0,466.3439718,0.4,1.0,0.0,2.0,0.547722558
149317,10225.26,10225.26,10225.26,10225.26,,76455.0,76455.0,76455.0,76455.0,,...,-800.0,-800.0,-800.0,-800.0,,0.0,0.0,0.0,0.0,
127515,14600.72,31093.695,5202.495,131406.48,11099.69071,136259.590909,675000.0,0.0,1498855.5,200534.7919,...,,-50.0,-1569.0,,,0.285714,1.0,0.0,2.0,0.487950036
206610,26384.16,65110.14,4265.91,79152.48,33650.67017,149379.228,630000.0,0.0,746896.14,270911.4652,...,-1037.0,-141.0,-1618.0,-3111.0,787.2744121,0.333333,1.0,0.0,1.0,0.577350269
103788,8771.0175,14065.605,4266.0,35084.07,4276.300708,66195.0,140670.0,14805.0,264780.0,53109.74911,...,-1591.75,-463.0,-2542.0,-6367.0,865.0288531,0.0,0.0,0.0,0.0,0.0
373222,6761.9925,7781.58,5742.405,13523.985,1441.914471,67005.0,80955.0,53055.0,134010.0,19728.2792,...,-1557.5,-1462.0,-1653.0,-3115.0,135.0573952,0.0,0.0,0.0,0.0,0.0
338692,3599.415,3599.415,3599.415,3599.415,,41530.5,41530.5,41530.5,41530.5,,...,-213.0,-213.0,-213.0,-213.0,,0.0,0.0,0.0,0.0,
438152,5597.60625,9000.0,3131.46,22390.425,2509.569443,21193.875,32571.0,0.0,84775.5,14466.89386,...,,-1491.0,-1491.0,,,0.333333,1.0,0.0,1.0,0.577350269
165357,7984.8,11420.82,4548.78,15969.6,4859.266085,57284.1,225000.0,0.0,286420.5,97455.29393,...,,-1.797693e+308,1.797693e+308,,,0.5,1.0,0.0,1.0,0.707106781


In [46]:
sum_cc_balance.columns

MultiIndex([(            'MONTHS_BALANCE', 'mean'),
            (            'MONTHS_BALANCE',  'max'),
            (            'MONTHS_BALANCE',  'min'),
            (            'MONTHS_BALANCE',  'sum'),
            (            'MONTHS_BALANCE',  'std'),
            (               'AMT_BALANCE', 'mean'),
            (               'AMT_BALANCE',  'max'),
            (               'AMT_BALANCE',  'min'),
            (               'AMT_BALANCE',  'sum'),
            (               'AMT_BALANCE',  'std'),
            (   'AMT_CREDIT_LIMIT_ACTUAL', 'mean'),
            (   'AMT_CREDIT_LIMIT_ACTUAL',  'max'),
            (   'AMT_CREDIT_LIMIT_ACTUAL',  'min'),
            (   'AMT_CREDIT_LIMIT_ACTUAL',  'sum'),
            (   'AMT_CREDIT_LIMIT_ACTUAL',  'std'),
            (  'AMT_DRAWINGS_ATM_CURRENT', 'mean'),
            (  'AMT_DRAWINGS_ATM_CURRENT',  'max'),
            (  'AMT_DRAWINGS_ATM_CURRENT',  'min'),
            (  'AMT_DRAWINGS_ATM_CURRENT',  'sum'),
            

In [70]:
# join the main frame
train_feat = train.drop('TARGET', axis=1) \
    .merge(avg_bureau, how='left', left_on='SK_ID_CURR', right_index=True) \
    .merge(sum_cc_balance, how='left', left_on='SK_ID_CURR_x', right_index=True) \
    .merge(sum_payments, how='left', left_on='SK_ID_CURR_x', right_index=True) \
    .merge(sum_pc_balance, how='left', left_on='SK_ID_CURR_x', right_index=True) \
    .merge(sum_prev, how='left', left_on='SK_ID_CURR', right_index=True)

#train_feat.head(10)
type(train_feat)

cudf.core.dataframe.DataFrame

In [71]:
train_feat.filter(regex=("SK_ID_CURR.*"))

AttributeError: 'DataFrame' object has no attribute 'filter'

In [66]:
# test the columns
listy = train_feat.columns
list_list = listy.to_list()
type(list_list)

list

In [65]:
import re
r = re.compile("SK_ID_CURR*")
list(filter(r.match, list_list))


TypeError: expected string or bytes-like object

## Charting

Still need to explore doing this properly

In [11]:
import cuxfilter

In [12]:
cux_df = cuxfilter.DataFrame.from_dataframe(avg_bureau)

In [13]:
bar_chart_1 = cuxfilter.charts.bar('CREDIT_DAY_OVERDUE',data_points=5)

In [14]:
d = cux_df.dashboard([bar_chart_1])

## Feature Engineering - Part 2 - Generating the frame for modelling

In [53]:
train_numeric = train.select_dtypes(exclude = ['object'])

In [54]:
df_train = train_numeric.drop('TARGET', axis=1)
df_target = train['TARGET']

In [37]:
df_train.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,...,0,0,0,0,,,,,,
4,100007,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


# Building the Model

In [41]:
import xgboost as xgb

In [55]:
type(df_target)

cudf.core.series.Series

In [56]:
dtrain = xgb.DMatrix(df_train, label=df_target)

In [59]:
params = {
    'objective': 'binary:logistic',
    'max_depth': 3, 
    'learning_rate': 0.1,
    'subsample': 0.9,
    'colsample_bytree': 0.7,
    'tree_method': 'gpu_hist'
}

In [61]:
clf = xgb.train(params, dtrain, num_boost_round=1000)

