In [1]:
import pandas as pd
import numpy as np
import zipfile as zp
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 1000)

In [2]:
with zp.ZipFile('train_zsTMYVA.zip','r') as tr, zp.ZipFile('test_9tJUnaB.zip', 'r') as te:
    train = pd.read_csv(tr.open(tr.namelist()[0]))
    test = pd.read_csv(te.open(te.namelist()[0]))

## baseline with L1 preds

In [3]:
misCols = train.isnull().sum(axis=0) / train.shape[0]
toremove = []

for k,v in misCols.items():
    if v >= 1:
        toremove.append(k)

In [4]:
train.drop(toremove, axis = 1, inplace=True)
test.drop(toremove, axis =1, inplace=True)

In [5]:
from sklearn.preprocessing import LabelEncoder

In [7]:
for c in train.select_dtypes(include=['object']).columns:
    if c not in ['UCIC_ID','Responders']:
        print(c)
        lbl = LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))

MF_TAG_LIVE
OTHER_LOANS_TAG_LIVE
PL_TAG_LIVE
RD_TAG_LIVE
FD_TAG_LIVE
TL_TAG_LIVE
TWL_TAG_LIVE
lap_tag_live
Billpay_Active_PrevQ1_N
Billpay_Reg_ason_Prev1_N
Charges_cnt_PrevQ1_N
FRX_PrevQ1_N
RBI_Class_Audit
gender_bin
Req_Resolved_PrevQ1
Query_Resolved_PrevQ1
Complaint_Resolved_PrevQ1


## add l1 preds

In [8]:
train_pred = pd.read_csv('train_pred.csv')
test_pred = pd.read_csv('test_pred.csv')

In [9]:
train = pd.concat([train, train_pred], axis=1)
test = pd.concat([test, test_pred], axis=1)

## add decomposition features

In [20]:
papa_train = pd.read_pickle('papa_train.pkl')
papa_test = pd.read_pickle('papa_test.pkl')

In [21]:
papa_train.head()

Unnamed: 0,AGRI_Closed_PrevQ1,AGRI_DATE,AGRI_PREM_CLOSED_PREVQ1,AGRI_TAG_LIVE,AL_CNC_Closed_PrevQ1,AL_CNC_DATE,AL_CNC_PREM_CLOSED_PREVQ1,AL_CNC_TAG_LIVE,AL_Closed_PrevQ1,AL_DATE,AL_PREM_CLOSED_PREVQ1,AL_TAG_LIVE,ATM_CW_Amt_prev1,ATM_CW_Amt_prev2,ATM_CW_Amt_prev3,ATM_CW_Amt_prev4,ATM_CW_Amt_prev5,ATM_CW_Amt_prev6,ATM_CW_Cnt_prev1,ATM_CW_Cnt_prev2,ATM_CW_Cnt_prev3,ATM_CW_Cnt_prev4,ATM_CW_Cnt_prev5,ATM_CW_Cnt_prev6,ATM_C_prev1,ATM_C_prev2,ATM_C_prev3,ATM_C_prev4,ATM_C_prev5,ATM_C_prev6,ATM_D_prev1,ATM_D_prev2,ATM_D_prev3,ATM_D_prev4,ATM_D_prev5,ATM_D_prev6,ATM_amt_prev1,ATM_amt_prev2,ATM_amt_prev3,ATM_amt_prev4,ATM_amt_prev5,ATM_amt_prev6,BAL_prev1,BAL_prev2,BAL_prev3,BAL_prev4,BAL_prev5,BAL_prev6,BL_Closed_PrevQ1,BL_DATE,BL_PREM_CLOSED_PREVQ1,BL_TAG_LIVE,BRANCH_C_prev1,BRANCH_C_prev2,BRANCH_C_prev3,BRANCH_C_prev4,BRANCH_C_prev5,BRANCH_C_prev6,BRANCH_D_prev1,BRANCH_D_prev2,BRANCH_D_prev3,BRANCH_D_prev4,BRANCH_D_prev5,BRANCH_D_prev6,BRN_CASH_Dep_Amt_prev1,BRN_CASH_Dep_Amt_prev2,BRN_CASH_Dep_Amt_prev3,BRN_CASH_Dep_Amt_prev4,BRN_CASH_Dep_Amt_prev5,BRN_CASH_Dep_Amt_prev6,BRN_CASH_Dep_Cnt_prev1,BRN_CASH_Dep_Cnt_prev2,BRN_CASH_Dep_Cnt_prev3,BRN_CASH_Dep_Cnt_prev4,BRN_CASH_Dep_Cnt_prev5,BRN_CASH_Dep_Cnt_prev6,BRN_CW_Amt_prev1,BRN_CW_Amt_prev2,BRN_CW_Amt_prev3,BRN_CW_Amt_prev4,BRN_CW_Amt_prev5,BRN_CW_Amt_prev6,BRN_CW_Cnt_prev1,BRN_CW_Cnt_prev2,BRN_CW_Cnt_prev3,BRN_CW_Cnt_prev4,BRN_CW_Cnt_prev5,BRN_CW_Cnt_prev6,Billpay_Active_PrevQ1,Billpay_Active_PrevQ1_N,Billpay_Reg_ason_Prev1,Billpay_Reg_ason_Prev1_N,CASH_WD_AMT_Last6,CASH_WD_CNT_Last6,CC_CLOSED_PREVQ1,CC_TAG_LIVE,CE_Closed_PrevQ1,CE_DATE,CE_PREM_CLOSED_PREVQ1,CE_TAG_LIVE,CNR_prev1,CNR_prev2,CNR_prev3,CNR_prev4,CNR_prev5,CNR_prev6,COUNT_ATM_C_prev1,COUNT_ATM_C_prev2,COUNT_ATM_C_prev3,COUNT_ATM_C_prev4,COUNT_ATM_C_prev5,COUNT_ATM_C_prev6,COUNT_ATM_D_prev1,COUNT_ATM_D_prev2,COUNT_ATM_D_prev3,COUNT_ATM_D_prev4,COUNT_ATM_D_prev5,COUNT_ATM_D_prev6,COUNT_BRANCH_C_prev1,COUNT_BRANCH_C_prev2,COUNT_BRANCH_C_prev3,COUNT_BRANCH_C_prev4,COUNT_BRANCH_C_prev5,COUNT_BRANCH_C_prev6,COUNT_BRANCH_D_prev1,COUNT_BRANCH_D_prev2,COUNT_BRANCH_D_prev3,COUNT_BRANCH_D_prev4,COUNT_BRANCH_D_prev5,COUNT_BRANCH_D_prev6,COUNT_IB_C_prev1,COUNT_IB_C_prev2,COUNT_IB_C_prev3,COUNT_IB_C_prev4,COUNT_IB_C_prev5,COUNT_IB_C_prev6,COUNT_IB_D_prev1,COUNT_IB_D_prev2,COUNT_IB_D_prev3,COUNT_IB_D_prev4,COUNT_IB_D_prev5,COUNT_IB_D_prev6,COUNT_MB_C_prev1,COUNT_MB_C_prev2,COUNT_MB_C_prev3,COUNT_MB_C_prev4,COUNT_MB_C_prev5,COUNT_MB_C_prev6,COUNT_MB_D_prev1,COUNT_MB_D_prev2,COUNT_MB_D_prev3,COUNT_MB_D_prev4,COUNT_MB_D_prev5,COUNT_MB_D_prev6,COUNT_POS_C_prev1,COUNT_POS_C_prev2,COUNT_POS_C_prev3,COUNT_POS_C_prev4,COUNT_POS_C_prev5,COUNT_POS_C_prev6,COUNT_POS_D_prev1,COUNT_POS_D_prev2,COUNT_POS_D_prev3,COUNT_POS_D_prev4,COUNT_POS_D_prev5,COUNT_POS_D_prev6,CR_AMB_Drop_Build_1,CR_AMB_Drop_Build_2,CR_AMB_Drop_Build_3,CR_AMB_Drop_Build_4,CR_AMB_Drop_Build_5,CR_AMB_Prev1,CR_AMB_Prev2,CR_AMB_Prev3,CR_AMB_Prev4,CR_AMB_Prev5,CR_AMB_Prev6,CV_Closed_PrevQ1,CV_DATE,CV_PREM_CLOSED_PREVQ1,CV_TAG_LIVE,C_prev1,C_prev2,C_prev3,C_prev4,C_prev5,C_prev6,Charges_PrevQ1,Charges_cnt_PrevQ1,Complaint_Logged_PrevQ1,Complaint_Resolved_PrevQ1,DEMAT_CLOSED_PREV1YR,DEMAT_TAG_LIVE,D_prev1,D_prev2,D_prev3,D_prev4,D_prev5,D_prev6,Dmat_Investing_PrevQ1,Dmat_Investing_PrevQ2,EDU_DATE,EDU_TAG_LIVE,EFT_SELF_TRANSFER_PrevQ1,EMAIL_UNSUBSCRIBE,ENGAGEMENT_TAG_prev1,EOP_prev1,EOP_prev2,EOP_prev3,EOP_prev4,EOP_prev5,EOP_prev6,FD_AMOUNT_BOOK_PrevQ1,FD_AMOUNT_BOOK_PrevQ2,FD_CLOSED_PREVQ1,FD_PREM_CLOSED_PREVQ1,FD_TAG_LIVE,FRX_PrevQ1,FRX_PrevQ1_N,GL_Closed_PrevQ1,GL_DATE,GL_TAG_LIVE,HL_TAG_LIVE,IB_C_prev1,IB_C_prev2,IB_C_prev3,IB_C_prev4,IB_C_prev5,IB_C_prev6,IB_D_prev1,IB_D_prev2,IB_D_prev3,IB_D_prev4,IB_D_prev5,IB_D_prev6,INS_TAG_LIVE,I_AQB_PrevQ1,I_AQB_PrevQ2,I_CNR_PrevQ1,I_CNR_PrevQ2,I_CR_AQB_PrevQ1,I_CR_AQB_PrevQ2,I_NRV_PrevQ1,I_NRV_PrevQ2,LAP_DATE,LAS_DATE,MB_C_prev1,MB_C_prev2,MB_C_prev3,MB_C_prev4,MB_C_prev5,MB_C_prev6,MB_D_prev1,MB_D_prev2,MB_D_prev3,MB_D_prev4,MB_D_prev5,MB_D_prev6,MF_TAG_LIVE,NO_OF_Accs,NO_OF_CHEQUE_BOUNCE_V1,NO_OF_COMPLAINTS,NO_OF_FD_BOOK_PrevQ1,NO_OF_FD_BOOK_PrevQ2,NO_OF_RD_BOOK_PrevQ1,NO_OF_RD_BOOK_PrevQ2,OTHER_LOANS_Closed_PrevQ1,OTHER_LOANS_DATE,OTHER_LOANS_PREM_CLOSED_PREVQ1,OTHER_LOANS_TAG_LIVE,PL_Closed_PrevQ1,PL_DATE,PL_PREM_CLOSED_PREVQ1,PL_TAG_LIVE,POS_C_prev1,POS_C_prev2,POS_C_prev3,POS_C_prev4,POS_C_prev5,POS_C_prev6,POS_D_prev1,POS_D_prev2,POS_D_prev3,POS_D_prev4,POS_D_prev5,POS_D_prev6,Percent_Change_in_Big_Expenses,Percent_Change_in_Credits,Percent_Change_in_FT_Bank,Percent_Change_in_FT_outside,Percent_Change_in_Self_Txn,Query_Logged_PrevQ1,Query_Resolved_PrevQ1,RD_AMOUNT_BOOK_PrevQ1,RD_AMOUNT_BOOK_PrevQ2,RD_CLOSED_PREVQ1,RD_PREM_CLOSED_PREVQ1,RD_TAG_LIVE,Recency_of_ATM_TXN,Recency_of_Activity,Recency_of_BRANCH_TXN,Recency_of_CR_TXN,Recency_of_DR_TXN,Recency_of_IB_TXN,Recency_of_MB_TXN,Recency_of_POS_TXN,Req_Logged_PrevQ1,Req_Resolved_PrevQ1,Responders,SEC_ACC_CLOSED_PREV1YR,SEC_ACC_TAG_LIVE,TL_Closed_PrevQ1,TL_DATE,TL_TAG_LIVE,TWL_Closed_PrevQ1,TWL_DATE,TWL_PREM_CLOSED_PREVQ1,TWL_TAG_LIVE,Total_Invest_in_MF_PrevQ1,Total_Invest_in_MF_PrevQ2,UCIC_ID,age,amb_ratio1,brn_code,count_C_prev1,count_C_prev2,count_C_prev3,count_C_prev4,count_C_prev5,count_C_prev6,count_D_prev1,count_D_prev2,count_D_prev3,count_D_prev4,count_D_prev5,count_D_prev6,count_No_of_MF_PrevQ1,count_No_of_MF_PrevQ2,custinit_CR_amt_prev1,custinit_CR_amt_prev2,custinit_CR_amt_prev3,custinit_CR_amt_prev4,custinit_CR_amt_prev5,custinit_CR_amt_prev6,custinit_CR_cnt_prev1,custinit_CR_cnt_prev2,custinit_CR_cnt_prev3,custinit_CR_cnt_prev4,custinit_CR_cnt_prev5,custinit_CR_cnt_prev6,custinit_DR_amt_prev1,custinit_DR_amt_prev2,custinit_DR_amt_prev3,custinit_DR_amt_prev4,custinit_DR_amt_prev5,custinit_DR_amt_prev6,custinit_DR_cnt_prev1,custinit_DR_cnt_prev2,custinit_DR_cnt_prev3,custinit_DR_cnt_prev4,custinit_DR_cnt_prev5,custinit_DR_cnt_prev6,dependents,eop_ratio1,eop_ratio2,eop_ratio3,eop_ratio4,eop_ratio5,gender_bin,lap_tag_live,net_prev1,net_prev2,net_prev3,net_prev4,net_prev5,net_prev6,new_city,percent_chng_bin,ratio_10c,ratio_10d,ratio_11c,ratio_11d,ratio_12c,ratio_12d,ratio_13c,ratio_13d,ratio_14c,ratio_14d,ratio_15c,ratio_15d,ratio_16c,ratio_16d,ratio_17c,ratio_17d,ratio_18c,ratio_18d,ratio_19c,ratio_19d,ratio_1c,ratio_1d,ratio_20c,ratio_20d,ratio_21c,ratio_21d,ratio_22c,ratio_22d,ratio_23c,ratio_23d,ratio_24c,ratio_24d,ratio_25c,ratio_25d,ratio_2c,ratio_2d,ratio_3c,ratio_3d,ratio_4c,ratio_4d,ratio_5c,ratio_5d,ratio_6c,ratio_6d,ratio_7c,ratio_7d,ratio_8c,ratio_8d,ratio_9c,ratio_9d,total_credit_amt,total_debit_amt,vintage,vintage_by_age,zip,1_Imperia,2_Preferred,3_Classic,HOUSEWIFE,INDIVIDUAL,NON_INDIVIDUA,RETIRED,SALARIED,SELF_EMPLOYED,STUDENT,HIGH,LOW,MEDIUM,B_1,C_2,D_3,METROPOLITAN,RURAL,SEMI-URBAN,URBAN,HIGHHIGH,HIGHLOW,HIGHMEDIUM,HIGHNO,LOWHIGH,LOWLOW,LOWMEDIUM,LOWNO,MEDIUMHIGH,MEDIUMLOW,MEDIUMMEDIUM,MEDIUMNO,nannan,teens,adults,oldies,more-oldies,deads,pca_1,tsvd_1,ica_1,grp_1,srp_1,pca_2,tsvd_2,ica_2,grp_2,srp_2,pca_3,tsvd_3,ica_3,grp_3,srp_3,pca_4,tsvd_4,ica_4,grp_4,srp_4,pca_5,tsvd_5,ica_5,grp_5,srp_5,pca_6,tsvd_6,ica_6,grp_6,srp_6,pca_7,tsvd_7,ica_7,grp_7,srp_7,pca_8,tsvd_8,ica_8,grp_8,srp_8,pca_9,tsvd_9,ica_9,grp_9,srp_9,pca_10,tsvd_10,ica_10,grp_10,srp_10
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.0,-1.0,4.740363,-1.0,-1.0,-1.0,0.69897,-1.0,0.778151,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.000004,0.0,4.740371,0.0,-1.0,-1.0,5.000004,-1.0,4.740371,-1.0,6.381356,7.129935,7.304101,6.771956,7.092465,7.261741,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,3.89215,0.0,5.301032,0.0,0.0,4.698979,5.685743,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.190335,1.079181,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.333911,5.74497,5.704211,5.870767,5.521001,5.857962,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.778151,0.0,0.845098,0.0,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.0,0.30103,0.954243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.252398,0.285693,0.296094,0.266753,0.389903,5.871546,5.974951,6.006186,6.016115,6.087607,5.924995,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,3.89215,0.0,3.484157,-1.0,-1.0,-1.0,-1.0,-1.0,5.301032,0.0,5.000004,4.698979,5.732395,0.0,0.0,0.0,-1.0,-1.0,0,-1.0,-1.0,6.886983,6.680407,7.496888,6.739462,7.554096,7.310881,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,5.954604,6.978052,4.987198,5.377996,5.954604,6.014618,5.954604,6.978052,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.795372,-1.0,3.051538,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,1.792392,1.462398,1.462398,2.170262,1.462398,-1.0,-1.0,-1.0,-1.0,-1.0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,170114,1.770852,1.171875,3.015779,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.778151,0.30103,1.176091,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3.89215,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,5.301032,-1.0,5.000004,4.698979,5.685743,-1.0,0.0,-1.0,0.69897,0.0,0.90309,-1.0,0.0,0.970005,1.088559,0.97858,1.096866,1.096866,0,-1.0,-5.301032,0.0,-5.000004,-4.698979,-1.840244,0.0,3.249317,0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.826944,1.0,0.678975,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3.89215,21.033435,3.289366,3.300839,5.749824,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-1.647657,-1.647657,-0.00128,-1.165038,4.172873,-3.083875,-3.084549,0.001715,2.026382,-3.349261,5.542959,5.543638,0.001131,-3.780638,6.999472,2.53791,2.530457,-0.001053,-3.228808,-4.81261,-1.952057,-1.884181,0.000568,-4.002869,2.997099,-2.740514,-2.832889,0.000953,-6.293833,-3.38256,-0.841068,-1.065949,0.002417,-1.832567,-1.882848,-2.474672,-2.691724,0.001428,-1.552974,-5.908446,-0.113839,-0.568298,0.001027,2.348098,5.122291,-1.46304,-1.258798,0.001182,-0.546541,-5.128583
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.255273,4.39794,4.477121,3.69897,4.267172,4.278754,0.477121,0.60206,0.69897,0.0,0.60206,0.60206,0.0,0.0,0.0,0.0,0.0,0.0,4.255297,4.397957,4.477136,3.699057,4.267195,4.278776,4.255273,4.39794,4.477136,3.69897,4.267195,4.278754,6.986623,7.2791,7.392187,7.035459,7.235604,7.416854,-1.0,-1.0,-1.0,-1.0,3.505069,0.0,0.0,0.0,0.0,1.469822,4.146159,6.05535,4.477136,6.011928,3.653309,6.388039,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,1.0,5.062586,1.342423,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,5.366517,5.780934,5.711962,5.876603,5.53421,5.867469,0.0,0.0,0.0,0.0,0.0,0.0,0.60206,0.69897,0.778151,0.30103,0.69897,0.69897,0.477121,0.0,0.0,0.0,0.0,0.30103,0.477121,0.69897,0.778151,0.30103,0.477121,0.778151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.477121,0.0,0.845098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.350536,0.335941,0.285109,0.298141,0.22971,6.904948,6.811005,6.743781,6.77623,6.782027,6.938725,-1.0,-1.0,-1.0,-1.0,3.505069,0.0,0.0,0.0,0.0,1.469822,3.312389,-1.0,-1.0,-1.0,-1.0,1.0,4.505164,6.064805,4.832515,6.014035,6.422908,6.391403,7.401788,7.49036,-1.0,-1.0,0,-1.0,-1.0,7.261503,7.055194,7.544259,7.077306,7.60682,7.446377,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.903144,0.0,6.419119,0.0,-1.0,6.824977,7.186865,5.085422,5.399438,6.824977,6.839088,7.722056,7.813778,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2.416681,-1.0,3.051538,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,1.230449,1.230449,1.30103,1.30103,1.230449,1.778151,-1.0,2.274158,-1.0,-1.0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,7.318569,7.274717,488013,1.568202,1.053024,1.778151,0.477121,0.0,0.0,0.0,0.0,0.30103,0.778151,0.954243,1.113943,0.477121,1.113943,1.0,1.447158,1.414973,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,4.255297,6.057257,4.579795,6.014035,6.422169,6.388039,0.477121,0.69897,0.845098,0.30103,1.0,0.69897,0.0,0.971589,1.038939,0.974634,1.047554,1.047554,1,-1.0,-1.000095,-6.064805,-4.832515,-6.014035,-6.422908,-4.921581,3.385015,0,-1.0,0.0,-1.0,0.615071,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.664371,-1.0,0.0,-1.0,0.99941,-1.0,0.0,0.0,0.944538,-1.0,0.0,0.0,0.669458,1.0,0.229969,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.778011,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.725161,-1.0,0.0,-1.0,0.0,-1.0,0.0,4.974891,33.049309,3.488833,3.226967,5.602076,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,9.897181,9.897181,0.002774,-0.502444,7.715646,0.063945,0.064251,0.001859,13.518547,7.911112,26.215285,26.215186,-0.002629,-1.234966,24.574522,3.087264,3.098627,-0.002569,22.654123,-4.027793,1.664549,1.538435,0.002288,-19.620915,5.5468,1.225555,1.335966,0.002299,-16.523385,4.682506,-7.662655,-7.470606,0.00404,-2.277443,-0.132546,-0.87369,-0.75804,0.009865,-5.187996,-18.963072,-0.966913,-1.134993,-0.004139,-2.453689,12.40159,4.736117,4.788461,0.001884,-28.458333,13.68236
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.004321,0.0,-1.0,-1.0,-1.0,-1.0,2.004321,-1.0,6.296731,7.105903,7.281765,6.688564,7.04907,7.241969,-1.0,-1.0,-1.0,-1.0,4.17612,0.0,0.0,5.471293,5.16197,0.0,6.217484,0.0,0.0,5.471441,5.230298,0.0,4.176091,-1.0,-1.0,5.471292,4.978637,-1.0,0.0,-1.0,-1.0,0.0,0.477121,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.004321,0.30103,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.331817,5.743787,5.702507,5.869477,5.517684,5.85694,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.0,0.30103,0.69897,0.0,0.69897,0.0,0.0,0.30103,0.477121,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.390764,1.68186,0.311503,0.038993,0.425508,5.502323,5.338259,3.665621,3.644926,4.67199,4.450885,-1.0,-1.0,-1.0,-1.0,4.17612,0.0,0.0,5.471293,5.16197,0.0,3.484157,-1.0,-1.0,-1.0,-1.0,-1.0,6.217484,0.0,0.0,5.471441,5.230554,0.0,0.0,0.0,-1.0,-1.0,0,-1.0,-1.0,6.844176,6.740521,7.4837,6.651785,7.541132,7.293018,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,5.255636,6.929394,4.980388,5.374085,5.255636,4.424035,5.255636,6.929394,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2.573767,-1.0,3.055378,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,2.100371,0.0,0.0,0.0,1.30103,-1.0,-1.0,-1.0,-1.0,-1.0,1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,435239,1.568202,1.318469,3.560863,0.30103,0.0,0.0,0.30103,0.69897,0.0,0.69897,0.0,0.0,0.30103,0.60206,0.0,-1.0,-1.0,4.176091,-1.0,-1.0,5.471292,4.978642,-1.0,0.0,-1.0,-1.0,0.0,0.477121,-1.0,6.217484,-1.0,-1.0,5.471441,5.230298,-1.0,0.60206,-1.0,-1.0,0.0,0.30103,-1.0,-1.0,0.984855,1.093441,0.97189,1.101832,1.101832,1,-1.0,-2.041364,0.0,0.0,-0.000148,-0.068584,0.0,-1.0,0,-1.0,-1.0,0.0,0.0,1.0,0.999973,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.383195,1.0,0.986888,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.671674,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,9.33809,11.448038,2.757396,3.226967,5.865179,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-0.524325,-0.524325,-0.001833,-0.175042,-2.097128,2.160778,2.15963,-0.001939,-1.402565,-4.371776,-4.66811,-4.666949,-0.002668,0.184817,-2.704633,3.32367,3.315003,-0.000133,-10.692204,3.353584,-3.507704,-3.388865,0.000845,-1.255332,2.117811,1.100857,0.932233,-0.001784,-7.290833,-10.609029,-0.070174,-0.331403,0.000586,-4.345897,4.83033,0.53114,0.325299,-0.000908,-6.714099,2.186028,5.046686,4.448431,0.002631,-6.83164,-2.817187,0.173977,0.514187,-0.000799,3.003399,-3.22291
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,6.401142,7.127097,7.300676,6.758407,7.079374,7.261705,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.334966,5.745031,5.704257,5.870704,5.52031,5.858117,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.30103,0.30103,0.30103,0.30103,0.305217,5.932536,5.932536,5.932536,5.932536,5.932536,5.924202,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.583595,0.477121,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0,-1.0,-1.0,6.893257,6.672371,7.495672,6.727291,7.551644,7.310939,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,5.932535,6.969586,4.988176,5.377768,5.932535,5.929776,5.932535,6.969586,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,2.167317,2.167317,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,331646,1.748188,1.174807,3.273927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.967956,1.087392,0.975923,1.095512,1.095512,1,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.115162,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,2.888179,3.294897,5.804419,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-10.329894,-10.329894,0.00033,0.22647,2.027528,-0.287654,-0.287677,0.000115,-7.99564,0.619906,4.130493,4.130571,-0.000389,3.735775,-2.77148,1.943306,1.942099,0.002201,-1.379869,-7.041793,0.922657,0.930181,-0.000365,-2.404286,-1.555184,0.761748,0.736578,-5.2e-05,-7.18643,-6.373206,0.792611,0.785605,0.000214,-0.430981,-0.141098,1.297084,1.271018,0.00171,-4.540761,4.778988,-0.086129,-0.137414,0.000325,-1.456915,0.831447,0.616949,0.631813,0.001642,0.466489,4.872285
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,6.316957,7.099961,7.283045,6.694531,7.048414,7.242886,-1.0,-1.0,-1.0,-1.0,6.024179,4.474231,4.474231,4.953281,4.697238,4.902008,5.921687,4.895566,4.492495,4.502441,4.875067,4.477136,5.243038,-1.0,-1.0,4.477121,4.30103,4.69897,0.69897,-1.0,-1.0,0.0,0.0,0.30103,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,5.332282,5.743486,5.702585,5.869569,5.517677,5.856959,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.041393,0.30103,0.30103,0.60206,0.477121,0.60206,0.778151,0.60206,0.477121,0.477121,0.477121,0.477121,0.0,0.0,0.0,0.0,0.0,0.0,0.90309,0.30103,0.477121,0.30103,0.30103,0.90309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.011604,0.237487,0.267026,0.530379,0.164913,5.615246,4.648139,4.786139,4.857037,4.478393,4.813853,-1.0,-1.0,-1.0,-1.0,6.024179,4.474231,4.474231,4.953281,4.697238,4.902008,3.484157,-1.0,-1.0,-1.0,-1.0,-1.0,5.921687,4.895566,4.492495,4.502441,4.875067,4.477136,0.0,0.0,-1.0,-1.0,0,-1.0,-1.0,6.854869,6.585357,7.484501,6.658297,7.541379,7.29369,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.237141,6.930882,4.980293,5.374197,5.237141,4.746062,5.237141,6.930882,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2.699835,-1.0,3.05423,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.60206,0.477121,-1.0,-1.0,-1.0,-1.0,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,226900,1.556303,1.323409,2.155336,1.041393,0.30103,0.30103,0.60206,0.477121,0.60206,1.113943,0.69897,0.69897,0.60206,0.60206,1.0,-1.0,-1.0,5.365961,4.474216,4.474216,4.953276,4.697238,4.69897,0.90309,0.0,0.0,0.477121,0.30103,0.30103,5.921687,4.895561,4.492495,4.502441,4.875067,4.477121,1.079181,0.60206,0.60206,0.477121,0.477121,0.954243,0.0,0.960683,1.091852,0.971324,1.100149,1.100149,0,-1.0,0.102492,-0.421335,-0.018264,0.45084,-0.177829,0.424873,3.385015,0,0.0,0.0,0.0,0.0,1.0,1.100132,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.963523,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.094898,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.017308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.913935,0.0,0.0,0.0,0.0,29.046118,29.154446,3.211388,3.217783,5.602118,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,7.948912,7.948912,0.001285,2.899207,0.581553,7.656821,7.657112,-0.000364,-1.262787,-4.830787,-6.718377,-6.71859,-0.002552,5.092306,1.166669,1.146505,1.14372,-0.001712,-5.198949,9.195584,-5.297846,-5.340877,0.001123,3.054036,-5.091979,1.033529,1.049787,-0.002147,1.414892,0.896433,0.71927,0.807619,-0.001153,-10.299213,-1.657775,-0.023308,-0.005906,-0.001144,-2.326075,3.330459,1.478025,1.447945,0.003318,-2.139825,-2.423926,3.21767,3.274776,-0.002916,-0.842176,-7.854273


## set up model

In [11]:
import xgboost as xgb



In [12]:
feature_names = [x for x in train.columns if x not in ['UCIC_ID','Responders']]

In [13]:
target = train['Responders']

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(train, target, test_size = 0.3, stratify = target)

In [15]:
## default
params = {"objective": "binary:logistic",
          "booster": "gbtree",
          "nthread": 4,
          "eta": 0.1, # 0.1
          "max_depth": 6, # 7
          "subsample": 1,
          "colsample_bytree": 1,
#         "min_child_weight": 2**3,
          "seed": 2016, 
#         "tree_method": "exact",
         "eval_metric":"auc"}

In [17]:
dtrain = xgb.DMatrix(X_train[feature_names], y_train, missing=np.nan)
dvalid = xgb.DMatrix(X_valid[feature_names], y_valid, missing=np.nan)
dtest = xgb.DMatrix(test[feature_names])

In [18]:
watchlist = [(dtrain, 'train'),(dvalid, 'valid')]

In [19]:
clf1 = xgb.train(params, dtrain, num_boost_round=1000, evals=watchlist, maximize=True, verbose_eval=20, early_stopping_rounds=40)

[0]	train-auc:0.872152	valid-auc:0.868063
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 40 rounds.
[20]	train-auc:0.877566	valid-auc:0.870015
[40]	train-auc:0.882359	valid-auc:0.869818
Stopping. Best iteration:
[19]	train-auc:0.877295	valid-auc:0.870067



In [None]:
preds1 = clf1.predict(dtest)

In [None]:
sub = pd.read_csv('sample_submission_fBo3EW5.csv')
sub['UCIC_ID'] = test['UCIC_ID']
sub['Responders'] = preds1