In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option('display.max_rows',40)
pd.set_option('display.max_columns',40)

In [3]:
train = pd.read_csv('Train_Dataset.csv', na_values=['$','#VALUE!','@','#','x','&','##','XNA'])

In [4]:
train.head(2)

Unnamed: 0,ID,Client_Income,Car_Owned,Bike_Owned,Active_Loan,House_Own,Child_Count,Credit_Amount,Loan_Annuity,Accompany_Client,Client_Income_Type,Client_Education,Client_Marital_Status,Client_Gender,Loan_Contract_Type,Client_Housing_Type,Population_Region_Relative,Age_Days,Employed_Days,Registration_Days,ID_Days,Own_House_Age,Mobile_Tag,Homephone_Tag,Workphone_Working,Client_Occupation,Client_Family_Members,Cleint_City_Rating,Application_Process_Day,Application_Process_Hour,Client_Permanent_Match_Tag,Client_Contact_Work_Tag,Type_Organization,Score_Source_1,Score_Source_2,Score_Source_3,Social_Circle_Default,Phone_Change,Credit_Bureau,Default
0,12142509,6750.0,0.0,0.0,1.0,0.0,0.0,61190.55,3416.85,Alone,Commercial,Secondary,M,Male,CL,Home,0.028663,13957.0,1062.0,6123.0,383.0,,1,1,0,Sales,2.0,2.0,6.0,17.0,Yes,Yes,Self-employed,0.568066,0.478787,,0.0186,63.0,,0
1,12138936,20250.0,1.0,0.0,1.0,,0.0,15282.0,1826.55,Alone,Service,Graduation,M,Male,CL,Home,0.008575,14162.0,4129.0,7833.0,21.0,0.0,1,0,1,,2.0,2.0,3.0,10.0,Yes,Yes,Government,0.56336,0.215068,,,,,0


In [5]:
def shape(x):
    row,column = x.shape
    print(f'The dataset has {row} rows and {column} columns.')

In [6]:
shape(train)

The dataset has 121856 rows and 40 columns.


In [7]:
train.drop('ID',1,inplace=True)

In [8]:
shape(train)

The dataset has 121856 rows and 39 columns.


In [9]:
train.isna().sum().sort_values(ascending=False).head()/train.shape[0]*100

Own_House_Age            65.729221
Score_Source_1           56.488806
Social_Circle_Default    50.820641
Client_Occupation        34.003250
Score_Source_3           22.093290
dtype: float64

In [10]:
#Dropping the columns with more than 1/3rd missing values
train.drop(['Own_House_Age','Score_Source_1','Social_Circle_Default','Client_Occupation'],1,inplace=True)

In [11]:
shape(train)

The dataset has 121856 rows and 35 columns.


In [12]:
missing_cols = pd.Series(train.isna().sum().sort_values(ascending=False))
missing_cols = missing_cols[missing_cols != 0]
missing_cols

Score_Source_3                26922
Type_Organization             24694
Credit_Bureau                 18540
ID_Days                        5985
Score_Source_2                 5686
Population_Region_Relative     4868
Loan_Annuity                   4826
Client_Income_Type             3701
Client_Housing_Type            3687
Employed_Days                  3666
Phone_Change                   3664
Application_Process_Hour       3663
House_Own                      3661
Loan_Contract_Type             3651
Client_Education               3645
Child_Count                    3638
Credit_Amount                  3637
Active_Loan                    3635
Registration_Days              3631
Bike_Owned                     3624
Client_Income                  3622
Age_Days                       3617
Car_Owned                      3581
Client_Marital_Status          3473
Application_Process_Day        2428
Client_Gender                  2416
Client_Family_Members          2410
Cleint_City_Rating          

In [13]:
#Imputing the missing values:
def missingValImputation(df, col):
    if df[col].dtype == 'object':
        df[col] = df[col].fillna(df[col].mode()[0])
    else:
        if abs(df[col].skew())>0.5:
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna(df[col].mean())

In [14]:
for col in missing_cols.index:
    missingValImputation(train,col)

In [15]:
train.isna().sum().sum()

0

In [16]:
#capping the outliers
for col in train.columns:
    if train[col].dtype != 'object' and train[col].nunique()>10:
        q1,q3,q90,q01 = train[col].quantile([0.25,0.75,0.90,0.01])
        iqr = q3-q1
        lowerLimit = q1 - (1.5*iqr)
        upperLimit = q3 + (1.5*iqr)
        train[col] = train[col].apply(lambda x: q01 if x<lowerLimit else q90 if x>upperLimit else x)

In [17]:
q1,q3,q90,q01 = train['Employed_Days'].quantile([0.25,0.75,0.90,0.01])
iqr = q3-q1
lowerLimit = q1 - (1.5*iqr)
upperLimit = q3 + (1.5*iqr)

q80 = train['Employed_Days'].quantile(0.8)

train['Employed_Days'] = train['Employed_Days'].apply(lambda x: q01 if x<lowerLimit else q80 if x>upperLimit else x)

In [18]:
train.drop(['Loan_Annuity','Mobile_Tag','Child_Count'],1,inplace=True)

In [19]:
shape(train)

The dataset has 121856 rows and 32 columns.


In [20]:
train['Type_Organization'] = train['Type_Organization'].apply(lambda x: 'Business' if 'Business' in x else 'Industry' if 'Industry' in x else 'Trade' if 'Trade' in x else 'Transport' if 'Transport' in x else x)

In [21]:
train['Type_Organization'].unique()

array(['Self-employed', 'Government', 'Business', 'Other', 'Industry',
       'Transport', 'Construction', 'Kindergarten', 'Trade',
       'Agriculture', 'Military', 'Medicine', 'Housing', 'Bank', 'School',
       'Postal', 'University', 'Restaurant', 'Electricity', 'Police',
       'Security Ministries', 'Services', 'Mobile', 'Hotel', 'Security',
       'Advertising', 'Cleaning', 'Realtor', 'Culture', 'Telecom',
       'Insurance', 'Emergency', 'Legal Services', 'Religion'],
      dtype=object)

In [22]:
def marital_status(x):
    if x =='W':
        return 0
    elif x=='D':
        return 1
    elif x=='S':
        return 2
    else:
        return 3
    
def gender(x):
    if x == 'Female':
        return 0
    else:
        return 1
    
def loan_contract_type(x):
    if x == 'RL':
        return 0
    else:
        return 1
    

def acc_client(x):
    if x == 'Alone':
        return 0
    elif x == 'Relative':
        return 1
    else:
        return 2
    
def match_tag(x):
    if x == 'No':
        return 0 
    else:
        return 1
    
def work_tag(x):
    if x == 'No':
        return 0 
    else:
        return 1
    
def org_type(x):
    if x == 'Self-employed':
        return 0
    elif x == 'Government':
        return 1
    elif x == 'Business':
        return 2
    elif x == 'Industry':
        return 3
    elif x == 'Transport':
        return 4
    elif x == 'Construction':
        return 5
    elif x=='Kindergarten':
        return 6
    elif x=='Trade':
        return 7
    elif x=='Agriculture':
        return 8
    elif x=='Military':
        return 9
    elif x=='Medicine':
        return 10
    elif x=='Housing':
        return 11
    elif x=='Bank':
        return 12
    elif x=='School':
        return 13
    elif x=='Postal':
        return 14
    elif x=='University':
        return 15
    elif x=='Restaurant':
        return 16
    elif x=='Electricity':
        return 17
    elif x=='Police':
        return 18
    elif x=='Security Ministries':
        return 19
    elif x=='Services':
        return 20
    elif x=='Mobile':
        return 21
    elif x=='Hotel':
        return 22
    elif x=='Security':
        return 23
    elif x=='Advertising':
        return 24
    elif x=='Cleaning':
        return 25
    elif x=='Realtor':
        return 26
    elif x=='Culture':
        return 27
    elif x=='Telecom':
        return 28
    elif x=='Insurance':
        return 29
    elif x=='Emergency':
        return 30
    elif x=='Legal Services':
        return 31
    elif x =='Religion':
        return 32
    else:
        return 33
    
def income_type(x):
    if x == 'Service':
        return 0
    elif x == 'Commercial':
        return 1
    elif x == 'Retired':
        return 2
    elif x == 'Govt Job':
        return 3
    else:
        return 4
    
def education(x):
    if x == 'Secondary':
        return 0
    elif x== 'Graduation':
        return 1
    else:
        return 2
    
def housing(x):
    if x == 'Home':
        return 0
    else:
        return 1
    
def child_count(x):
    if x in [0,1]:
        return x
    else:
        return 2
    
def family_members(x):
    if x in [1,2,3]:
        return x
    else:
        return 0
    
def credit_bureau(x):
    if x in [0,1,2,3]:
        return x
    else:
        return 4

In [23]:
train_f = train.copy()

In [24]:
train_f['Client_Housing_Type'] = train_f['Client_Housing_Type'].apply(housing)
train_f['Client_Education'] = train_f['Client_Education'].apply(education)
train_f['Client_Income_Type'] = train_f['Client_Income_Type'].apply(income_type)
train_f['Accompany_Client'] = train_f['Accompany_Client'].apply(acc_client)
train_f['Client_Family_Members'] = train_f['Client_Family_Members'].apply(family_members)
train_f['Credit_Bureau'] = train_f['Credit_Bureau'].apply(credit_bureau)
train_f['Client_Marital_Status'] = train_f['Client_Marital_Status'].apply(marital_status)
train_f['Client_Gender'] = train_f['Client_Gender'].apply(gender)
train_f['Loan_Contract_Type'] = train_f['Loan_Contract_Type'].apply(loan_contract_type)
train_f['Client_Permanent_Match_Tag'] = train_f['Client_Permanent_Match_Tag'].apply(match_tag)
train_f['Client_Contact_Work_Tag'] = train_f['Client_Contact_Work_Tag'].apply(work_tag)
train_f['Type_Organization'] = train_f['Type_Organization'].apply(org_type)
# train_f['Child_Count'] = train_f['Child_Count'].apply(child_count)

In [25]:
train_f.head(2)

Unnamed: 0,Client_Income,Car_Owned,Bike_Owned,Active_Loan,House_Own,Credit_Amount,Accompany_Client,Client_Income_Type,Client_Education,Client_Marital_Status,Client_Gender,Loan_Contract_Type,Client_Housing_Type,Population_Region_Relative,Age_Days,Employed_Days,Registration_Days,ID_Days,Homephone_Tag,Workphone_Working,Client_Family_Members,Cleint_City_Rating,Application_Process_Day,Application_Process_Hour,Client_Permanent_Match_Tag,Client_Contact_Work_Tag,Type_Organization,Score_Source_2,Score_Source_3,Phone_Change,Credit_Bureau,Default
0,6750.0,0.0,0.0,1.0,0.0,61190.55,0,1,0,3,1,1,0,0.028663,13957.0,1062.0,6123.0,383.0,1,0,2.0,2.0,6.0,17.0,1,1,0,0.478787,0.51118,63.0,1.0,0
1,20250.0,1.0,0.0,1.0,1.0,15282.0,0,0,1,3,1,1,0,0.008575,14162.0,4129.0,7833.0,21.0,0,1,2.0,2.0,3.0,10.0,1,1,1,0.215068,0.51118,755.0,1.0,0


In [26]:
shape(train_f)

The dataset has 121856 rows and 32 columns.


In [27]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import f1_score, classification_report

In [28]:
Xf = train_f.drop('Default',1)
yf = train_f['Default']

In [29]:
Xf_train, Xf_test, yf_train, yf_test = train_test_split(Xf, yf, test_size=0.3, random_state=42)

In [30]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
Xsc_trainf = sc.fit_transform(Xf_train)
Xsc_trainf = pd.DataFrame(Xsc_trainf, columns=Xf_train.columns)
Xsc_testf = sc.transform(Xf_test)
Xsc_testf = pd.DataFrame(Xsc_testf, columns=Xf_test.columns)

In [31]:
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier


In [114]:
xgb1 = XGBClassifier(max_depth=13, min_child_weight=7, n_estimators=167, scale_pos_weight=13,
                    random_state=7, reg_alpha=20)

In [34]:
xgb1.fit(Xsc_trainf,yf_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=13,
              min_child_weight=7, missing=nan, monotone_constraints='()',
              n_estimators=167, n_jobs=0, num_parallel_tree=1, random_state=7,
              reg_alpha=20, reg_lambda=1, scale_pos_weight=13, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [35]:
f1_score(yf_test,xgb1.predict(Xsc_testf))

0.34659969088098913

---

In [89]:
from catboost import CatBoostClassifier

# l = []
# for i in range(12,22):
#
cb = CatBoostClassifier(random_state=7, class_weights=[1,7], max_depth=11, l2_leaf_reg=12)
cb.fit(Xsc_trainf,yf_train)
pred = cb.predict(Xsc_testf)

In [87]:
f1_score(yf_test, pred)

0.339356178608515

In [115]:
estimators = [('xg',xgb1),('cb',cb)]

In [116]:
from sklearn.ensemble import VotingClassifier
vc = VotingClassifier(estimators=estimators,voting='soft')
# vc.fit(Xsc_trainf,yf_train)

In [92]:
f1_score(yf_test,vc.predict(Xsc_testf))

0.3527397260273973

### Test Dataset

In [93]:
test = pd.read_csv('Test_Dataset.csv', na_values=['$','#VALUE!','@','#','x','&','Unknown','##','XNA'])

In [94]:
test.head(2)

Unnamed: 0,ID,Client_Income,Car_Owned,Bike_Owned,Active_Loan,House_Own,Child_Count,Credit_Amount,Loan_Annuity,Accompany_Client,Client_Income_Type,Client_Education,Client_Marital_Status,Client_Gender,Loan_Contract_Type,Client_Housing_Type,Population_Region_Relative,Age_Days,Employed_Days,Registration_Days,ID_Days,Own_House_Age,Mobile_Tag,Homephone_Tag,Workphone_Working,Client_Occupation,Client_Family_Members,Cleint_City_Rating,Application_Process_Day,Application_Process_Hour,Client_Permanent_Match_Tag,Client_Contact_Work_Tag,Type_Organization,Score_Source_1,Score_Source_2,Score_Source_3,Social_Circle_Default,Phone_Change,Credit_Bureau
0,12202227,11250.0,0.0,1.0,1.0,1.0,0.0,112500.0,4474.8,Relative,Service,,M,Male,CL,Home,0.019101,20063.0,2523.0,2318.0,3318.0,,1,0,0,,,2.0,4.0,16.0,Yes,Yes,Self-employed,,0.757151,0.629674,0.0515,716.0,2.0
1,12279381,13500.0,1.0,1.0,1.0,1.0,3.0,49752.0,3252.15,Alone,Service,Secondary,M,Female,CL,Home,0.0105,13200.0,551.0,6972.0,3865.0,6.0,1,1,0,Laborers,5.0,3.0,3.0,18.0,No,Yes,Self-employed,,0.342269,,,181.0,


In [95]:
shape(test)

The dataset has 80900 rows and 39 columns.


In [96]:
test.drop(['Own_House_Age','Score_Source_1','Social_Circle_Default','Client_Occupation'],1,inplace=True)

In [97]:
test.drop(['Child_Count','Loan_Annuity','Mobile_Tag'],1,inplace=True)

In [98]:
missing_cols_test = pd.Series(test.isna().sum().sort_values(ascending=False))
missing_cols_test = missing_cols_test[missing_cols_test != 0]

In [99]:
#Imputing the missing values:
def missingValImputation(df1, df2, col):
    if df1[col].dtype == 'object':
        df1[col] = df1[col].fillna(df2[col].mode()[0])
    else:
        if abs(df1[col].skew())>0.5:
            df1[col] = df1[col].fillna(df2[col].median())
        else:
            df1[col] = df1[col].fillna(df2[col].mean())

In [100]:
for col in missing_cols_test.index:
    missingValImputation(test, train, col)

In [101]:
test.isna().sum().sum()

0

In [102]:
#capping the outliers
for col in test.columns:
    if test[col].dtype != 'object' and test[col].nunique()>10 and col != 'ID':
        q1,q3,q90,q01 = train[col].quantile([0.25,0.75,0.90,0.01])
        iqr = q3-q1
        lowerLimit = q1 - (1.5*iqr)
        upperLimit = q3 + (1.5*iqr)
        test[col] = test[col].apply(lambda x: q01 if x<lowerLimit else q90 if x>upperLimit else x)

In [103]:
test['Type_Organization'] = test['Type_Organization'].apply(lambda x: 'Business' if 'Business' in x else 'Industry' if 'Industry' in x else 'Trade' if 'Trade' in x else 'Transport' if 'Transport' in x else x)

In [104]:
test['Client_Housing_Type'] = test['Client_Housing_Type'].apply(housing)
test['Client_Education'] = test['Client_Education'].apply(education)
test['Client_Income_Type'] = test['Client_Income_Type'].apply(income_type)
test['Accompany_Client'] = test['Accompany_Client'].apply(acc_client)
test['Client_Family_Members'] = test['Client_Family_Members'].apply(family_members)
test['Credit_Bureau'] = test['Credit_Bureau'].apply(credit_bureau)
test['Client_Marital_Status'] = test['Client_Marital_Status'].apply(marital_status)
test['Client_Gender'] = test['Client_Gender'].apply(gender)
test['Loan_Contract_Type'] = test['Loan_Contract_Type'].apply(loan_contract_type)
test['Client_Permanent_Match_Tag'] = test['Client_Permanent_Match_Tag'].apply(match_tag)
test['Client_Contact_Work_Tag'] = test['Client_Contact_Work_Tag'].apply(work_tag)
test['Type_Organization'] = test['Type_Organization'].apply(org_type)

In [105]:
ID = test['ID']
test.drop('ID',1,inplace=True)

In [106]:
Xsc = sc.fit_transform(Xf)
Xsc = pd.DataFrame(Xsc, columns=Xf.columns)

In [107]:
test_sc = sc.transform(test)
test_sc = pd.DataFrame(test_sc, columns=test.columns)

In [108]:
shape(Xf)

The dataset has 121856 rows and 31 columns.


In [109]:
shape(test_sc)

The dataset has 80900 rows and 31 columns.


In [117]:
vc.fit(Xsc,yf)

0:	learn: 0.6867303	total: 313ms	remaining: 5m 12s
1:	learn: 0.6804422	total: 724ms	remaining: 6m 1s
2:	learn: 0.6748025	total: 996ms	remaining: 5m 30s
3:	learn: 0.6686998	total: 1.29s	remaining: 5m 21s
4:	learn: 0.6635200	total: 1.75s	remaining: 5m 48s
5:	learn: 0.6580206	total: 2.27s	remaining: 6m 16s
6:	learn: 0.6534755	total: 2.63s	remaining: 6m 13s
7:	learn: 0.6491273	total: 3.15s	remaining: 6m 30s
8:	learn: 0.6448245	total: 3.57s	remaining: 6m 33s
9:	learn: 0.6404339	total: 3.95s	remaining: 6m 31s
10:	learn: 0.6365044	total: 4.26s	remaining: 6m 23s
11:	learn: 0.6328789	total: 4.6s	remaining: 6m 18s
12:	learn: 0.6291159	total: 5.07s	remaining: 6m 24s
13:	learn: 0.6268284	total: 5.15s	remaining: 6m 2s
14:	learn: 0.6240566	total: 5.5s	remaining: 6m 1s
15:	learn: 0.6213971	total: 5.88s	remaining: 6m 1s
16:	learn: 0.6183712	total: 6.3s	remaining: 6m 4s
17:	learn: 0.6151607	total: 6.84s	remaining: 6m 13s
18:	learn: 0.6122421	total: 7.24s	remaining: 6m 13s
19:	learn: 0.6094200	total: 7.

159:	learn: 0.4807767	total: 53.2s	remaining: 4m 39s
160:	learn: 0.4802139	total: 53.5s	remaining: 4m 38s
161:	learn: 0.4796605	total: 53.7s	remaining: 4m 38s
162:	learn: 0.4789130	total: 54s	remaining: 4m 37s
163:	learn: 0.4784541	total: 54.3s	remaining: 4m 36s
164:	learn: 0.4777127	total: 54.6s	remaining: 4m 36s
165:	learn: 0.4769217	total: 54.9s	remaining: 4m 35s
166:	learn: 0.4762069	total: 55.2s	remaining: 4m 35s
167:	learn: 0.4755796	total: 55.4s	remaining: 4m 34s
168:	learn: 0.4750576	total: 55.7s	remaining: 4m 33s
169:	learn: 0.4746216	total: 56s	remaining: 4m 33s
170:	learn: 0.4739023	total: 56.3s	remaining: 4m 32s
171:	learn: 0.4731769	total: 56.6s	remaining: 4m 32s
172:	learn: 0.4724637	total: 57s	remaining: 4m 32s
173:	learn: 0.4719296	total: 57.3s	remaining: 4m 32s
174:	learn: 0.4715378	total: 57.6s	remaining: 4m 31s
175:	learn: 0.4708434	total: 57.8s	remaining: 4m 30s
176:	learn: 0.4703648	total: 58.2s	remaining: 4m 30s
177:	learn: 0.4700550	total: 58.5s	remaining: 4m 30s

313:	learn: 0.4055247	total: 1m 39s	remaining: 3m 38s
314:	learn: 0.4051902	total: 1m 40s	remaining: 3m 37s
315:	learn: 0.4047437	total: 1m 40s	remaining: 3m 37s
316:	learn: 0.4042498	total: 1m 40s	remaining: 3m 36s
317:	learn: 0.4037609	total: 1m 40s	remaining: 3m 36s
318:	learn: 0.4031901	total: 1m 41s	remaining: 3m 36s
319:	learn: 0.4028267	total: 1m 41s	remaining: 3m 36s
320:	learn: 0.4023554	total: 1m 42s	remaining: 3m 35s
321:	learn: 0.4018811	total: 1m 42s	remaining: 3m 35s
322:	learn: 0.4014302	total: 1m 42s	remaining: 3m 35s
323:	learn: 0.4009027	total: 1m 42s	remaining: 3m 34s
324:	learn: 0.4004907	total: 1m 43s	remaining: 3m 34s
325:	learn: 0.3996362	total: 1m 43s	remaining: 3m 33s
326:	learn: 0.3990835	total: 1m 43s	remaining: 3m 33s
327:	learn: 0.3987031	total: 1m 44s	remaining: 3m 33s
328:	learn: 0.3982563	total: 1m 44s	remaining: 3m 32s
329:	learn: 0.3977936	total: 1m 44s	remaining: 3m 32s
330:	learn: 0.3970728	total: 1m 45s	remaining: 3m 32s
331:	learn: 0.3965384	total:

467:	learn: 0.3432776	total: 2m 29s	remaining: 2m 49s
468:	learn: 0.3427817	total: 2m 29s	remaining: 2m 49s
469:	learn: 0.3425294	total: 2m 30s	remaining: 2m 49s
470:	learn: 0.3421248	total: 2m 30s	remaining: 2m 48s
471:	learn: 0.3417631	total: 2m 30s	remaining: 2m 48s
472:	learn: 0.3415375	total: 2m 30s	remaining: 2m 48s
473:	learn: 0.3412934	total: 2m 31s	remaining: 2m 47s
474:	learn: 0.3410017	total: 2m 31s	remaining: 2m 47s
475:	learn: 0.3405785	total: 2m 31s	remaining: 2m 47s
476:	learn: 0.3401459	total: 2m 32s	remaining: 2m 46s
477:	learn: 0.3398229	total: 2m 32s	remaining: 2m 46s
478:	learn: 0.3396219	total: 2m 32s	remaining: 2m 46s
479:	learn: 0.3393247	total: 2m 33s	remaining: 2m 46s
480:	learn: 0.3391039	total: 2m 33s	remaining: 2m 45s
481:	learn: 0.3387516	total: 2m 33s	remaining: 2m 45s
482:	learn: 0.3384440	total: 2m 34s	remaining: 2m 44s
483:	learn: 0.3382134	total: 2m 34s	remaining: 2m 44s
484:	learn: 0.3379092	total: 2m 34s	remaining: 2m 44s
485:	learn: 0.3375129	total:

621:	learn: 0.3010290	total: 3m 14s	remaining: 1m 58s
622:	learn: 0.3007725	total: 3m 15s	remaining: 1m 58s
623:	learn: 0.3004767	total: 3m 15s	remaining: 1m 57s
624:	learn: 0.3002397	total: 3m 15s	remaining: 1m 57s
625:	learn: 0.3000613	total: 3m 16s	remaining: 1m 57s
626:	learn: 0.2998629	total: 3m 16s	remaining: 1m 56s
627:	learn: 0.2995298	total: 3m 16s	remaining: 1m 56s
628:	learn: 0.2992139	total: 3m 16s	remaining: 1m 56s
629:	learn: 0.2989486	total: 3m 17s	remaining: 1m 55s
630:	learn: 0.2986606	total: 3m 17s	remaining: 1m 55s
631:	learn: 0.2982566	total: 3m 17s	remaining: 1m 55s
632:	learn: 0.2980490	total: 3m 18s	remaining: 1m 54s
633:	learn: 0.2977613	total: 3m 18s	remaining: 1m 54s
634:	learn: 0.2974932	total: 3m 18s	remaining: 1m 54s
635:	learn: 0.2972577	total: 3m 18s	remaining: 1m 53s
636:	learn: 0.2970002	total: 3m 19s	remaining: 1m 53s
637:	learn: 0.2967861	total: 3m 19s	remaining: 1m 53s
638:	learn: 0.2966044	total: 3m 19s	remaining: 1m 52s
639:	learn: 0.2962616	total:

773:	learn: 0.2661985	total: 3m 59s	remaining: 1m 9s
774:	learn: 0.2660106	total: 3m 59s	remaining: 1m 9s
775:	learn: 0.2658399	total: 3m 59s	remaining: 1m 9s
776:	learn: 0.2657032	total: 4m	remaining: 1m 8s
777:	learn: 0.2654583	total: 4m	remaining: 1m 8s
778:	learn: 0.2651667	total: 4m	remaining: 1m 8s
779:	learn: 0.2649926	total: 4m	remaining: 1m 7s
780:	learn: 0.2647808	total: 4m 1s	remaining: 1m 7s
781:	learn: 0.2645102	total: 4m 1s	remaining: 1m 7s
782:	learn: 0.2643162	total: 4m 1s	remaining: 1m 7s
783:	learn: 0.2641706	total: 4m 2s	remaining: 1m 6s
784:	learn: 0.2638309	total: 4m 2s	remaining: 1m 6s
785:	learn: 0.2636446	total: 4m 2s	remaining: 1m 6s
786:	learn: 0.2634219	total: 4m 2s	remaining: 1m 5s
787:	learn: 0.2632070	total: 4m 3s	remaining: 1m 5s
788:	learn: 0.2630952	total: 4m 3s	remaining: 1m 5s
789:	learn: 0.2628803	total: 4m 3s	remaining: 1m 4s
790:	learn: 0.2624486	total: 4m 4s	remaining: 1m 4s
791:	learn: 0.2623343	total: 4m 4s	remaining: 1m 4s
792:	learn: 0.2621276

930:	learn: 0.2348343	total: 4m 46s	remaining: 21.3s
931:	learn: 0.2346769	total: 4m 47s	remaining: 21s
932:	learn: 0.2345554	total: 4m 47s	remaining: 20.6s
933:	learn: 0.2342901	total: 4m 47s	remaining: 20.3s
934:	learn: 0.2340434	total: 4m 48s	remaining: 20s
935:	learn: 0.2338844	total: 4m 48s	remaining: 19.7s
936:	learn: 0.2336470	total: 4m 48s	remaining: 19.4s
937:	learn: 0.2334891	total: 4m 49s	remaining: 19.1s
938:	learn: 0.2333665	total: 4m 49s	remaining: 18.8s
939:	learn: 0.2332067	total: 4m 49s	remaining: 18.5s
940:	learn: 0.2330636	total: 4m 50s	remaining: 18.2s
941:	learn: 0.2329031	total: 4m 50s	remaining: 17.9s
942:	learn: 0.2326710	total: 4m 50s	remaining: 17.6s
943:	learn: 0.2325586	total: 4m 51s	remaining: 17.3s
944:	learn: 0.2324122	total: 4m 51s	remaining: 17s
945:	learn: 0.2322825	total: 4m 51s	remaining: 16.6s
946:	learn: 0.2320891	total: 4m 51s	remaining: 16.3s
947:	learn: 0.2317140	total: 4m 52s	remaining: 16s
948:	learn: 0.2314535	total: 4m 52s	remaining: 15.7s
9

VotingClassifier(estimators=[('xg',
                              XGBClassifier(base_score=None, booster=None,
                                            colsample_bylevel=None,
                                            colsample_bynode=None,
                                            colsample_bytree=None, gamma=None,
                                            gpu_id=None, importance_type='gain',
                                            interaction_constraints=None,
                                            learning_rate=None,
                                            max_delta_step=None, max_depth=13,
                                            min_child_weight=7, missing=nan,
                                            monotone_constraints=None,
                                            n_estimators=167, n_jobs=None,
                                            num_parallel_tree=None,
                                            random_state=7, reg_alpha=20,
            

In [118]:
resxgb = vc.predict(test_sc)

In [119]:
submitxgb = pd.DataFrame({'ID':ID, 'Default':resxgb})

In [120]:
submitxgb.to_csv('Submission23.csv', index=False)