In [None]:
!pip install catboost

In [None]:
!pip install xgboost

In [None]:
! pip install kaggle

In [None]:
!pip install lightgbm

In [1]:
#Importing machine learning algorithms
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

#Importing other packages
import os
import timeit
import pandas as pd
import numpy as np

#Importing packages for machine learning operations
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier

In [44]:
#Reading dataset
data_df = pd.read_csv("../data/cat-in-the-dat/train.csv")

In [45]:
#Variable Description
def description(df):
    print(f"Dataset Shape: {df.shape}")
    summary = pd.DataFrame(df.dtypes,columns=['dtypes'])
    summary = summary.reset_index()
    summary['Name'] = summary['index']
    summary['Missing'] = df.isnull().sum().values
    summary['PercMissing'] = df.isnull().sum().values / df.isnull().count().values
    summary['Uniques'] = df.nunique().values
    summary['Data type'] = df.dtypes.values
    return summary
print('**Variable Description of  data:**')
description(data_df)

**Variable Description of  data:**
Dataset Shape: (300000, 25)


Unnamed: 0,index,dtypes,Name,Missing,PercMissing,Uniques,Data type
0,id,int64,id,0,0.0,300000,int64
1,bin_0,int64,bin_0,0,0.0,2,int64
2,bin_1,int64,bin_1,0,0.0,2,int64
3,bin_2,int64,bin_2,0,0.0,2,int64
4,bin_3,object,bin_3,0,0.0,2,object
5,bin_4,object,bin_4,0,0.0,2,object
6,nom_0,object,nom_0,0,0.0,3,object
7,nom_1,object,nom_1,0,0.0,6,object
8,nom_2,object,nom_2,0,0.0,6,object
9,nom_3,object,nom_3,0,0.0,6,object


In [46]:
data_df.drop("id", axis = 1, inplace = True)

#Change some features to categorical data type
for item in data_df.columns:
    data_df[item] = data_df[item].astype("category").cat.codes

In [47]:
description(data_df)

Dataset Shape: (300000, 24)


Unnamed: 0,index,dtypes,Name,Missing,PercMissing,Uniques,Data type
0,bin_0,int8,bin_0,0,0.0,2,int8
1,bin_1,int8,bin_1,0,0.0,2,int8
2,bin_2,int8,bin_2,0,0.0,2,int8
3,bin_3,int8,bin_3,0,0.0,2,int8
4,bin_4,int8,bin_4,0,0.0,2,int8
5,nom_0,int8,nom_0,0,0.0,3,int8
6,nom_1,int8,nom_1,0,0.0,6,int8
7,nom_2,int8,nom_2,0,0.0,6,int8
8,nom_3,int8,nom_3,0,0.0,6,int8
9,nom_4,int8,nom_4,0,0.0,4,int8


In [48]:
train, test = train_test_split(data_df, test_size = 0.2, random_state = 1985)

target_train = train['target']
train.drop(['target'], axis=1, inplace=True)

target_test = test['target']
test.drop(['target'], axis=1, inplace=True)

In [74]:
#Metric evaluation
def metrics(y_pred_test, run):
    score = roc_auc_score(target_test, y_pred_test)
    run['ROC AUC score'] = score

# Default settings

In [76]:
# Function to training model, log training and prediction time and tracking performance
run_all = list()

def run_model(model, description, key, cat_features=[]):
    run = dict()
    if key =='LGB':
        #Description
        run["Description"] = description

        #Training session
        start = timeit.default_timer()
        model.fit(train,target_train, categorical_feature=cat_features)
        stop = timeit.default_timer()
        run['Training time'] = stop - start

        #Prediction session
        start = timeit.default_timer()
        y_pred_test = model.predict(test)
        stop = timeit.default_timer()
        run['Prediction time'] = stop - start

        #Performance evaluation
        metrics(y_pred_test, run)

    elif key =='CAT':
        #Description
        run["Description"] = description

        #Training session
        start = timeit.default_timer()
        model.fit(train,target_train,
                  eval_set=(test, target_test),
                  cat_features=cat_features,
                  use_best_model=True)
        stop = timeit.default_timer()
        run['Training time'] = stop - start

        #Prediction session
        start = timeit.default_timer()
        y_pred_test = model.predict(test)
        stop = timeit.default_timer()
        run['Prediction time'] = stop - start

        #Performance evaluation
        metrics(y_pred_test, run)

    else:
        #Description
        run["Description"] = description

        #Training session
        start = timeit.default_timer()
        model.fit(train,target_train)
        stop = timeit.default_timer()
        run['Training time'] = stop - start

        #Prediction session
        start = timeit.default_timer()
        y_pred_test = model.predict(test)
        stop = timeit.default_timer()
        run['Prediction time'] = stop - start

        #Performance evaluation
        metrics(y_pred_test, run)

    run_all.append(run)

## LightGBM (Default)

In [77]:
# Default LightGBM without categorical features support
model_lgb_def = lgb.LGBMClassifier()
run_model(model_lgb_def,'Default LightGBM without categorical support', key='LGB')



In [78]:
#Default LightGBM with categorical feature support
model_lgb_cat_def = lgb.LGBMClassifier()
run_model(model_lgb_cat_def, 'Default LightGBM with categorical support',key='LGB', cat_features=list(train.columns))



## XGBoost

In [80]:
# Default XGBoost
model_xgb_def = xgb.XGBClassifier()
run_model(model_xgb_def, 'Default XGBoost', key='XGB')

## Catboost

In [81]:
#Without categorical encoding
model_cat_def = cb.CatBoostClassifier()
run_model(model_cat_def,'Default Catboost without categorical support', key='CAT')

Learning rate set to 0.122693
0:	learn: 0.6621832	test: 0.6618195	best: 0.6618195 (0)	total: 13.9ms	remaining: 13.9s
1:	learn: 0.6386658	test: 0.6380276	best: 0.6380276 (1)	total: 29ms	remaining: 14.5s
2:	learn: 0.6200143	test: 0.6191233	best: 0.6191233 (2)	total: 45.3ms	remaining: 15.1s
3:	learn: 0.6061726	test: 0.6051218	best: 0.6051218 (3)	total: 60.8ms	remaining: 15.2s
4:	learn: 0.5948793	test: 0.5936920	best: 0.5936920 (4)	total: 76.5ms	remaining: 15.2s
5:	learn: 0.5856141	test: 0.5843732	best: 0.5843732 (5)	total: 92.4ms	remaining: 15.3s
6:	learn: 0.5783646	test: 0.5769838	best: 0.5769838 (6)	total: 108ms	remaining: 15.3s
7:	learn: 0.5721656	test: 0.5707828	best: 0.5707828 (7)	total: 121ms	remaining: 15.1s
8:	learn: 0.5671025	test: 0.5656746	best: 0.5656746 (8)	total: 135ms	remaining: 14.9s
9:	learn: 0.5629473	test: 0.5613769	best: 0.5613769 (9)	total: 148ms	remaining: 14.7s
10:	learn: 0.5596481	test: 0.5580824	best: 0.5580824 (10)	total: 161ms	remaining: 14.4s
11:	learn: 0.55622

108:	learn: 0.5125383	test: 0.5135393	best: 0.5135393 (108)	total: 1.43s	remaining: 11.7s
109:	learn: 0.5124086	test: 0.5134315	best: 0.5134315 (109)	total: 1.45s	remaining: 11.7s
110:	learn: 0.5122657	test: 0.5132987	best: 0.5132987 (110)	total: 1.46s	remaining: 11.7s
111:	learn: 0.5121630	test: 0.5131821	best: 0.5131821 (111)	total: 1.48s	remaining: 11.7s
112:	learn: 0.5120434	test: 0.5130977	best: 0.5130977 (112)	total: 1.49s	remaining: 11.7s
113:	learn: 0.5119534	test: 0.5130644	best: 0.5130644 (113)	total: 1.5s	remaining: 11.7s
114:	learn: 0.5118587	test: 0.5129766	best: 0.5129766 (114)	total: 1.52s	remaining: 11.7s
115:	learn: 0.5117684	test: 0.5129066	best: 0.5129066 (115)	total: 1.53s	remaining: 11.7s
116:	learn: 0.5116865	test: 0.5128840	best: 0.5128840 (116)	total: 1.55s	remaining: 11.7s
117:	learn: 0.5116023	test: 0.5128618	best: 0.5128618 (117)	total: 1.56s	remaining: 11.7s
118:	learn: 0.5115067	test: 0.5128078	best: 0.5128078 (118)	total: 1.58s	remaining: 11.7s
119:	learn:

203:	learn: 0.5052006	test: 0.5093929	best: 0.5093929 (203)	total: 4.34s	remaining: 16.9s
204:	learn: 0.5051496	test: 0.5093969	best: 0.5093929 (203)	total: 4.36s	remaining: 16.9s
205:	learn: 0.5050995	test: 0.5094080	best: 0.5093929 (203)	total: 4.38s	remaining: 16.9s
206:	learn: 0.5050499	test: 0.5094087	best: 0.5093929 (203)	total: 4.39s	remaining: 16.8s
207:	learn: 0.5049289	test: 0.5093082	best: 0.5093082 (207)	total: 4.41s	remaining: 16.8s
208:	learn: 0.5048716	test: 0.5092804	best: 0.5092804 (208)	total: 4.42s	remaining: 16.7s
209:	learn: 0.5048289	test: 0.5092657	best: 0.5092657 (209)	total: 4.43s	remaining: 16.7s
210:	learn: 0.5047596	test: 0.5092010	best: 0.5092010 (210)	total: 4.45s	remaining: 16.6s
211:	learn: 0.5047049	test: 0.5092010	best: 0.5092010 (211)	total: 4.46s	remaining: 16.6s
212:	learn: 0.5046677	test: 0.5091936	best: 0.5091936 (212)	total: 4.47s	remaining: 16.5s
213:	learn: 0.5045747	test: 0.5091156	best: 0.5091156 (213)	total: 4.49s	remaining: 16.5s
214:	learn

296:	learn: 0.5001636	test: 0.5076809	best: 0.5076809 (296)	total: 7.77s	remaining: 18.4s
297:	learn: 0.5001202	test: 0.5076762	best: 0.5076762 (297)	total: 7.78s	remaining: 18.3s
298:	learn: 0.5000691	test: 0.5076317	best: 0.5076317 (298)	total: 7.8s	remaining: 18.3s
299:	learn: 0.5000201	test: 0.5076232	best: 0.5076232 (299)	total: 7.81s	remaining: 18.2s
300:	learn: 0.4999380	test: 0.5075688	best: 0.5075688 (300)	total: 7.83s	remaining: 18.2s
301:	learn: 0.4998983	test: 0.5075601	best: 0.5075601 (301)	total: 7.84s	remaining: 18.1s
302:	learn: 0.4998531	test: 0.5075481	best: 0.5075481 (302)	total: 7.86s	remaining: 18.1s
303:	learn: 0.4998017	test: 0.5075598	best: 0.5075481 (302)	total: 7.88s	remaining: 18s
304:	learn: 0.4997633	test: 0.5075692	best: 0.5075481 (302)	total: 7.89s	remaining: 18s
305:	learn: 0.4997127	test: 0.5075542	best: 0.5075481 (302)	total: 7.9s	remaining: 17.9s
306:	learn: 0.4996789	test: 0.5075559	best: 0.5075481 (302)	total: 7.92s	remaining: 17.9s
307:	learn: 0.49

401:	learn: 0.4952996	test: 0.5064958	best: 0.5064958 (401)	total: 9.25s	remaining: 13.8s
402:	learn: 0.4952632	test: 0.5064763	best: 0.5064763 (402)	total: 9.27s	remaining: 13.7s
403:	learn: 0.4952235	test: 0.5064623	best: 0.5064623 (403)	total: 9.28s	remaining: 13.7s
404:	learn: 0.4951878	test: 0.5064562	best: 0.5064562 (404)	total: 9.29s	remaining: 13.7s
405:	learn: 0.4951525	test: 0.5064552	best: 0.5064552 (405)	total: 9.31s	remaining: 13.6s
406:	learn: 0.4951110	test: 0.5064619	best: 0.5064552 (405)	total: 9.32s	remaining: 13.6s
407:	learn: 0.4950776	test: 0.5064657	best: 0.5064552 (405)	total: 9.34s	remaining: 13.5s
408:	learn: 0.4950368	test: 0.5064666	best: 0.5064552 (405)	total: 9.35s	remaining: 13.5s
409:	learn: 0.4949865	test: 0.5064501	best: 0.5064501 (409)	total: 9.36s	remaining: 13.5s
410:	learn: 0.4949470	test: 0.5064598	best: 0.5064501 (409)	total: 9.38s	remaining: 13.4s
411:	learn: 0.4949037	test: 0.5064699	best: 0.5064501 (409)	total: 9.39s	remaining: 13.4s
412:	learn

504:	learn: 0.4909023	test: 0.5054866	best: 0.5054866 (504)	total: 10.7s	remaining: 10.5s
505:	learn: 0.4908710	test: 0.5054734	best: 0.5054734 (505)	total: 10.7s	remaining: 10.5s
506:	learn: 0.4908318	test: 0.5054798	best: 0.5054734 (505)	total: 10.7s	remaining: 10.4s
507:	learn: 0.4907830	test: 0.5055016	best: 0.5054734 (505)	total: 10.8s	remaining: 10.4s
508:	learn: 0.4907423	test: 0.5054997	best: 0.5054734 (505)	total: 10.8s	remaining: 10.4s
509:	learn: 0.4907005	test: 0.5055008	best: 0.5054734 (505)	total: 10.8s	remaining: 10.4s
510:	learn: 0.4906567	test: 0.5055175	best: 0.5054734 (505)	total: 10.8s	remaining: 10.3s
511:	learn: 0.4906092	test: 0.5055291	best: 0.5054734 (505)	total: 10.8s	remaining: 10.3s
512:	learn: 0.4905718	test: 0.5055245	best: 0.5054734 (505)	total: 10.8s	remaining: 10.3s
513:	learn: 0.4905285	test: 0.5055263	best: 0.5054734 (505)	total: 10.8s	remaining: 10.3s
514:	learn: 0.4904979	test: 0.5055246	best: 0.5054734 (505)	total: 10.9s	remaining: 10.2s
515:	learn

604:	learn: 0.4869631	test: 0.5051836	best: 0.5051413 (587)	total: 12.2s	remaining: 7.94s
605:	learn: 0.4869378	test: 0.5051704	best: 0.5051413 (587)	total: 12.2s	remaining: 7.92s
606:	learn: 0.4869050	test: 0.5051490	best: 0.5051413 (587)	total: 12.2s	remaining: 7.89s
607:	learn: 0.4868710	test: 0.5051092	best: 0.5051092 (607)	total: 12.2s	remaining: 7.87s
608:	learn: 0.4868295	test: 0.5051260	best: 0.5051092 (607)	total: 12.2s	remaining: 7.85s
609:	learn: 0.4867865	test: 0.5051235	best: 0.5051092 (607)	total: 12.2s	remaining: 7.82s
610:	learn: 0.4867465	test: 0.5051327	best: 0.5051092 (607)	total: 12.2s	remaining: 7.8s
611:	learn: 0.4867078	test: 0.5051358	best: 0.5051092 (607)	total: 12.3s	remaining: 7.77s
612:	learn: 0.4866615	test: 0.5051451	best: 0.5051092 (607)	total: 12.3s	remaining: 7.75s
613:	learn: 0.4866267	test: 0.5051427	best: 0.5051092 (607)	total: 12.3s	remaining: 7.73s
614:	learn: 0.4865900	test: 0.5051436	best: 0.5051092 (607)	total: 12.3s	remaining: 7.7s
615:	learn: 

702:	learn: 0.4832579	test: 0.5048300	best: 0.5048300 (702)	total: 13.6s	remaining: 5.75s
703:	learn: 0.4832223	test: 0.5048358	best: 0.5048300 (702)	total: 13.6s	remaining: 5.73s
704:	learn: 0.4831905	test: 0.5048535	best: 0.5048300 (702)	total: 13.6s	remaining: 5.71s
705:	learn: 0.4831612	test: 0.5048647	best: 0.5048300 (702)	total: 13.7s	remaining: 5.68s
706:	learn: 0.4831240	test: 0.5048633	best: 0.5048300 (702)	total: 13.7s	remaining: 5.67s
707:	learn: 0.4830792	test: 0.5048433	best: 0.5048300 (702)	total: 13.7s	remaining: 5.64s
708:	learn: 0.4830467	test: 0.5048268	best: 0.5048268 (708)	total: 13.7s	remaining: 5.62s
709:	learn: 0.4830071	test: 0.5048055	best: 0.5048055 (709)	total: 13.7s	remaining: 5.6s
710:	learn: 0.4829648	test: 0.5048003	best: 0.5048003 (710)	total: 13.7s	remaining: 5.58s
711:	learn: 0.4829306	test: 0.5048065	best: 0.5048003 (710)	total: 13.7s	remaining: 5.56s
712:	learn: 0.4828968	test: 0.5048043	best: 0.5048003 (710)	total: 13.8s	remaining: 5.54s
713:	learn:

800:	learn: 0.4796537	test: 0.5048411	best: 0.5047811 (796)	total: 15.1s	remaining: 3.74s
801:	learn: 0.4796251	test: 0.5048398	best: 0.5047811 (796)	total: 15.1s	remaining: 3.72s
802:	learn: 0.4795828	test: 0.5048395	best: 0.5047811 (796)	total: 15.1s	remaining: 3.7s
803:	learn: 0.4795362	test: 0.5048575	best: 0.5047811 (796)	total: 15.1s	remaining: 3.68s
804:	learn: 0.4795110	test: 0.5048687	best: 0.5047811 (796)	total: 15.1s	remaining: 3.66s
805:	learn: 0.4794876	test: 0.5048641	best: 0.5047811 (796)	total: 15.1s	remaining: 3.64s
806:	learn: 0.4794427	test: 0.5048262	best: 0.5047811 (796)	total: 15.1s	remaining: 3.62s
807:	learn: 0.4794077	test: 0.5048272	best: 0.5047811 (796)	total: 15.2s	remaining: 3.6s
808:	learn: 0.4793631	test: 0.5048078	best: 0.5047811 (796)	total: 15.2s	remaining: 3.58s
809:	learn: 0.4793272	test: 0.5048287	best: 0.5047811 (796)	total: 15.2s	remaining: 3.56s
810:	learn: 0.4792953	test: 0.5048335	best: 0.5047811 (796)	total: 15.2s	remaining: 3.54s
811:	learn: 

898:	learn: 0.4761877	test: 0.5047722	best: 0.5047639 (897)	total: 16.5s	remaining: 1.85s
899:	learn: 0.4761508	test: 0.5047685	best: 0.5047639 (897)	total: 16.5s	remaining: 1.83s
900:	learn: 0.4761161	test: 0.5047759	best: 0.5047639 (897)	total: 16.5s	remaining: 1.82s
901:	learn: 0.4760767	test: 0.5047988	best: 0.5047639 (897)	total: 16.5s	remaining: 1.8s
902:	learn: 0.4760371	test: 0.5047844	best: 0.5047639 (897)	total: 16.6s	remaining: 1.78s
903:	learn: 0.4759968	test: 0.5047759	best: 0.5047639 (897)	total: 16.6s	remaining: 1.76s
904:	learn: 0.4759657	test: 0.5047725	best: 0.5047639 (897)	total: 16.6s	remaining: 1.74s
905:	learn: 0.4759339	test: 0.5047549	best: 0.5047549 (905)	total: 16.6s	remaining: 1.72s
906:	learn: 0.4759021	test: 0.5047625	best: 0.5047549 (905)	total: 16.6s	remaining: 1.7s
907:	learn: 0.4758636	test: 0.5047600	best: 0.5047549 (905)	total: 16.6s	remaining: 1.69s
908:	learn: 0.4758258	test: 0.5047527	best: 0.5047527 (908)	total: 16.7s	remaining: 1.67s
909:	learn: 

997:	learn: 0.4728158	test: 0.5047061	best: 0.5046509 (917)	total: 17.9s	remaining: 36ms
998:	learn: 0.4727755	test: 0.5047222	best: 0.5046509 (917)	total: 18s	remaining: 18ms
999:	learn: 0.4727424	test: 0.5047231	best: 0.5046509 (917)	total: 18s	remaining: 0us

bestTest = 0.5046509165
bestIteration = 917

Shrink model to first 918 iterations.


In [83]:
#With categorical encoding
model_cat_cat_def = cb.CatBoostClassifier()
cat_features_index = list(range(len(list(train.columns))))
run_model(model_cat_cat_def,'Default Catboost with categorical support','CAT', cat_features_index)

Learning rate set to 0.122693
0:	learn: 0.6607271	test: 0.6599062	best: 0.6599062 (0)	total: 121ms	remaining: 2m
1:	learn: 0.6365183	test: 0.6354440	best: 0.6354440 (1)	total: 248ms	remaining: 2m 3s
2:	learn: 0.6176734	test: 0.6158626	best: 0.6158626 (2)	total: 366ms	remaining: 2m 1s
3:	learn: 0.6033162	test: 0.6012285	best: 0.6012285 (3)	total: 503ms	remaining: 2m 5s
4:	learn: 0.5915416	test: 0.5891456	best: 0.5891456 (4)	total: 633ms	remaining: 2m 5s
5:	learn: 0.5820596	test: 0.5793396	best: 0.5793396 (5)	total: 751ms	remaining: 2m 4s
6:	learn: 0.5747575	test: 0.5716887	best: 0.5716887 (6)	total: 880ms	remaining: 2m 4s
7:	learn: 0.5683068	test: 0.5652111	best: 0.5652111 (7)	total: 1.06s	remaining: 2m 11s
8:	learn: 0.5628140	test: 0.5598064	best: 0.5598064 (8)	total: 1.2s	remaining: 2m 11s
9:	learn: 0.5581716	test: 0.5550195	best: 0.5550195 (9)	total: 1.31s	remaining: 2m 10s
10:	learn: 0.5542200	test: 0.5508065	best: 0.5508065 (10)	total: 1.44s	remaining: 2m 9s
11:	learn: 0.5510178	te

93:	learn: 0.4968345	test: 0.4912670	best: 0.4912670 (93)	total: 13s	remaining: 2m 5s
94:	learn: 0.4967068	test: 0.4911314	best: 0.4911314 (94)	total: 13.2s	remaining: 2m 5s
95:	learn: 0.4965956	test: 0.4910327	best: 0.4910327 (95)	total: 13.3s	remaining: 2m 5s
96:	learn: 0.4964877	test: 0.4909562	best: 0.4909562 (96)	total: 13.4s	remaining: 2m 5s
97:	learn: 0.4963659	test: 0.4908544	best: 0.4908544 (97)	total: 13.6s	remaining: 2m 5s
98:	learn: 0.4962610	test: 0.4907459	best: 0.4907459 (98)	total: 13.7s	remaining: 2m 4s
99:	learn: 0.4961466	test: 0.4906473	best: 0.4906473 (99)	total: 13.9s	remaining: 2m 4s
100:	learn: 0.4960272	test: 0.4905314	best: 0.4905314 (100)	total: 14s	remaining: 2m 4s
101:	learn: 0.4959510	test: 0.4904934	best: 0.4904934 (101)	total: 14.1s	remaining: 2m 4s
102:	learn: 0.4958338	test: 0.4903976	best: 0.4903976 (102)	total: 14.3s	remaining: 2m 4s
103:	learn: 0.4957389	test: 0.4903308	best: 0.4903308 (103)	total: 14.5s	remaining: 2m 4s
104:	learn: 0.4956502	test: 

186:	learn: 0.4911607	test: 0.4876113	best: 0.4876113 (186)	total: 27.5s	remaining: 1m 59s
187:	learn: 0.4911236	test: 0.4875932	best: 0.4875932 (187)	total: 27.7s	remaining: 1m 59s
188:	learn: 0.4910859	test: 0.4875820	best: 0.4875820 (188)	total: 27.8s	remaining: 1m 59s
189:	learn: 0.4910462	test: 0.4875717	best: 0.4875717 (189)	total: 28s	remaining: 1m 59s
190:	learn: 0.4910069	test: 0.4875547	best: 0.4875547 (190)	total: 28.1s	remaining: 1m 59s
191:	learn: 0.4909723	test: 0.4875588	best: 0.4875547 (190)	total: 28.3s	remaining: 1m 59s
192:	learn: 0.4909423	test: 0.4875395	best: 0.4875395 (192)	total: 28.4s	remaining: 1m 58s
193:	learn: 0.4909136	test: 0.4875362	best: 0.4875362 (193)	total: 28.6s	remaining: 1m 58s
194:	learn: 0.4908846	test: 0.4875339	best: 0.4875339 (194)	total: 28.8s	remaining: 1m 58s
195:	learn: 0.4908416	test: 0.4875170	best: 0.4875170 (195)	total: 29s	remaining: 1m 58s
196:	learn: 0.4908063	test: 0.4874932	best: 0.4874932 (196)	total: 29.1s	remaining: 1m 58s
197

277:	learn: 0.4886689	test: 0.4870048	best: 0.4870048 (277)	total: 42.4s	remaining: 1m 50s
278:	learn: 0.4886334	test: 0.4870122	best: 0.4870048 (277)	total: 42.6s	remaining: 1m 50s
279:	learn: 0.4886240	test: 0.4870131	best: 0.4870048 (277)	total: 42.7s	remaining: 1m 49s
280:	learn: 0.4885951	test: 0.4870040	best: 0.4870040 (280)	total: 42.9s	remaining: 1m 49s
281:	learn: 0.4885668	test: 0.4870057	best: 0.4870040 (280)	total: 43.1s	remaining: 1m 49s
282:	learn: 0.4885271	test: 0.4869961	best: 0.4869961 (282)	total: 43.2s	remaining: 1m 49s
283:	learn: 0.4884928	test: 0.4869855	best: 0.4869855 (283)	total: 43.4s	remaining: 1m 49s
284:	learn: 0.4884607	test: 0.4869844	best: 0.4869844 (284)	total: 43.5s	remaining: 1m 49s
285:	learn: 0.4884441	test: 0.4869757	best: 0.4869757 (285)	total: 43.7s	remaining: 1m 49s
286:	learn: 0.4884325	test: 0.4869703	best: 0.4869703 (286)	total: 43.9s	remaining: 1m 49s
287:	learn: 0.4883948	test: 0.4869718	best: 0.4869703 (286)	total: 44.1s	remaining: 1m 48s

368:	learn: 0.4866214	test: 0.4866395	best: 0.4866395 (368)	total: 58.2s	remaining: 1m 39s
369:	learn: 0.4865958	test: 0.4866383	best: 0.4866383 (369)	total: 58.4s	remaining: 1m 39s
370:	learn: 0.4865657	test: 0.4866415	best: 0.4866383 (369)	total: 58.6s	remaining: 1m 39s
371:	learn: 0.4865489	test: 0.4866426	best: 0.4866383 (369)	total: 58.8s	remaining: 1m 39s
372:	learn: 0.4865373	test: 0.4866431	best: 0.4866383 (369)	total: 58.9s	remaining: 1m 39s
373:	learn: 0.4865160	test: 0.4866428	best: 0.4866383 (369)	total: 59.1s	remaining: 1m 38s
374:	learn: 0.4864981	test: 0.4866370	best: 0.4866370 (374)	total: 59.2s	remaining: 1m 38s
375:	learn: 0.4864716	test: 0.4866407	best: 0.4866370 (374)	total: 59.5s	remaining: 1m 38s
376:	learn: 0.4864623	test: 0.4866397	best: 0.4866370 (374)	total: 59.6s	remaining: 1m 38s
377:	learn: 0.4864373	test: 0.4866332	best: 0.4866332 (377)	total: 59.8s	remaining: 1m 38s
378:	learn: 0.4864106	test: 0.4866295	best: 0.4866295 (378)	total: 60s	remaining: 1m 38s
3

459:	learn: 0.4847113	test: 0.4864709	best: 0.4864706 (458)	total: 1m 13s	remaining: 1m 26s
460:	learn: 0.4846866	test: 0.4864609	best: 0.4864609 (460)	total: 1m 13s	remaining: 1m 26s
461:	learn: 0.4846611	test: 0.4864552	best: 0.4864552 (461)	total: 1m 14s	remaining: 1m 26s
462:	learn: 0.4846402	test: 0.4864571	best: 0.4864552 (461)	total: 1m 14s	remaining: 1m 26s
463:	learn: 0.4846331	test: 0.4864585	best: 0.4864552 (461)	total: 1m 14s	remaining: 1m 25s
464:	learn: 0.4846001	test: 0.4864474	best: 0.4864474 (464)	total: 1m 14s	remaining: 1m 25s
465:	learn: 0.4845950	test: 0.4864453	best: 0.4864453 (465)	total: 1m 14s	remaining: 1m 25s
466:	learn: 0.4845711	test: 0.4864518	best: 0.4864453 (465)	total: 1m 14s	remaining: 1m 25s
467:	learn: 0.4845644	test: 0.4864518	best: 0.4864453 (465)	total: 1m 15s	remaining: 1m 25s
468:	learn: 0.4845194	test: 0.4864412	best: 0.4864412 (468)	total: 1m 15s	remaining: 1m 25s
469:	learn: 0.4844861	test: 0.4864352	best: 0.4864352 (469)	total: 1m 15s	remain

549:	learn: 0.4827981	test: 0.4864416	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
550:	learn: 0.4827755	test: 0.4864424	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
551:	learn: 0.4827347	test: 0.4864396	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
552:	learn: 0.4827071	test: 0.4864349	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
553:	learn: 0.4826877	test: 0.4864349	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
554:	learn: 0.4826735	test: 0.4864346	best: 0.4864248 (487)	total: 1m 29s	remaining: 1m 12s
555:	learn: 0.4826400	test: 0.4864439	best: 0.4864248 (487)	total: 1m 30s	remaining: 1m 11s
556:	learn: 0.4826217	test: 0.4864455	best: 0.4864248 (487)	total: 1m 30s	remaining: 1m 11s
557:	learn: 0.4825945	test: 0.4864404	best: 0.4864248 (487)	total: 1m 30s	remaining: 1m 11s
558:	learn: 0.4825718	test: 0.4864382	best: 0.4864248 (487)	total: 1m 30s	remaining: 1m 11s
559:	learn: 0.4825599	test: 0.4864351	best: 0.4864248 (487)	total: 1m 30s	remain

641:	learn: 0.4807463	test: 0.4864142	best: 0.4864011 (635)	total: 1m 45s	remaining: 59s
642:	learn: 0.4807135	test: 0.4864136	best: 0.4864011 (635)	total: 1m 45s	remaining: 58.8s
643:	learn: 0.4807015	test: 0.4864078	best: 0.4864011 (635)	total: 1m 46s	remaining: 58.7s
644:	learn: 0.4806835	test: 0.4864084	best: 0.4864011 (635)	total: 1m 46s	remaining: 58.5s
645:	learn: 0.4806598	test: 0.4864034	best: 0.4864011 (635)	total: 1m 46s	remaining: 58.4s
646:	learn: 0.4806297	test: 0.4864133	best: 0.4864011 (635)	total: 1m 46s	remaining: 58.2s
647:	learn: 0.4806045	test: 0.4864139	best: 0.4864011 (635)	total: 1m 46s	remaining: 58s
648:	learn: 0.4805820	test: 0.4864221	best: 0.4864011 (635)	total: 1m 47s	remaining: 57.9s
649:	learn: 0.4805639	test: 0.4864158	best: 0.4864011 (635)	total: 1m 47s	remaining: 57.7s
650:	learn: 0.4805488	test: 0.4864136	best: 0.4864011 (635)	total: 1m 47s	remaining: 57.5s
651:	learn: 0.4805333	test: 0.4864114	best: 0.4864011 (635)	total: 1m 47s	remaining: 57.4s
652

733:	learn: 0.4788740	test: 0.4864131	best: 0.4863784 (666)	total: 2m 1s	remaining: 43.9s
734:	learn: 0.4788501	test: 0.4864105	best: 0.4863784 (666)	total: 2m 1s	remaining: 43.8s
735:	learn: 0.4788300	test: 0.4864109	best: 0.4863784 (666)	total: 2m 1s	remaining: 43.6s
736:	learn: 0.4788108	test: 0.4864130	best: 0.4863784 (666)	total: 2m 1s	remaining: 43.4s
737:	learn: 0.4787850	test: 0.4864179	best: 0.4863784 (666)	total: 2m 1s	remaining: 43.2s
738:	learn: 0.4787781	test: 0.4864178	best: 0.4863784 (666)	total: 2m 2s	remaining: 43.1s
739:	learn: 0.4787519	test: 0.4864186	best: 0.4863784 (666)	total: 2m 2s	remaining: 42.9s
740:	learn: 0.4787247	test: 0.4864155	best: 0.4863784 (666)	total: 2m 2s	remaining: 42.8s
741:	learn: 0.4787041	test: 0.4864103	best: 0.4863784 (666)	total: 2m 2s	remaining: 42.6s
742:	learn: 0.4786808	test: 0.4864122	best: 0.4863784 (666)	total: 2m 2s	remaining: 42.5s
743:	learn: 0.4786554	test: 0.4864143	best: 0.4863784 (666)	total: 2m 2s	remaining: 42.3s
744:	learn

824:	learn: 0.4769969	test: 0.4863918	best: 0.4863784 (666)	total: 2m 17s	remaining: 29.1s
825:	learn: 0.4769769	test: 0.4863936	best: 0.4863784 (666)	total: 2m 17s	remaining: 29s
826:	learn: 0.4769671	test: 0.4863872	best: 0.4863784 (666)	total: 2m 17s	remaining: 28.8s
827:	learn: 0.4769517	test: 0.4863972	best: 0.4863784 (666)	total: 2m 17s	remaining: 28.6s
828:	learn: 0.4769354	test: 0.4863960	best: 0.4863784 (666)	total: 2m 18s	remaining: 28.5s
829:	learn: 0.4769239	test: 0.4863960	best: 0.4863784 (666)	total: 2m 18s	remaining: 28.3s
830:	learn: 0.4769151	test: 0.4863947	best: 0.4863784 (666)	total: 2m 18s	remaining: 28.1s
831:	learn: 0.4768857	test: 0.4863896	best: 0.4863784 (666)	total: 2m 18s	remaining: 28s
832:	learn: 0.4768671	test: 0.4863928	best: 0.4863784 (666)	total: 2m 18s	remaining: 27.8s
833:	learn: 0.4768596	test: 0.4863979	best: 0.4863784 (666)	total: 2m 18s	remaining: 27.7s
834:	learn: 0.4768312	test: 0.4863997	best: 0.4863784 (666)	total: 2m 19s	remaining: 27.5s
835

915:	learn: 0.4753162	test: 0.4864227	best: 0.4863784 (666)	total: 2m 34s	remaining: 14.1s
916:	learn: 0.4752864	test: 0.4864295	best: 0.4863784 (666)	total: 2m 34s	remaining: 14s
917:	learn: 0.4752475	test: 0.4864389	best: 0.4863784 (666)	total: 2m 34s	remaining: 13.8s
918:	learn: 0.4752165	test: 0.4864416	best: 0.4863784 (666)	total: 2m 34s	remaining: 13.6s
919:	learn: 0.4751916	test: 0.4864471	best: 0.4863784 (666)	total: 2m 34s	remaining: 13.5s
920:	learn: 0.4751630	test: 0.4864589	best: 0.4863784 (666)	total: 2m 34s	remaining: 13.3s
921:	learn: 0.4751562	test: 0.4864587	best: 0.4863784 (666)	total: 2m 35s	remaining: 13.1s
922:	learn: 0.4751272	test: 0.4864750	best: 0.4863784 (666)	total: 2m 35s	remaining: 13s
923:	learn: 0.4751089	test: 0.4864798	best: 0.4863784 (666)	total: 2m 35s	remaining: 12.8s
924:	learn: 0.4750910	test: 0.4864825	best: 0.4863784 (666)	total: 2m 35s	remaining: 12.6s
925:	learn: 0.4750621	test: 0.4864747	best: 0.4863784 (666)	total: 2m 35s	remaining: 12.5s
926

## Random Forest

In [86]:
# Default XGBoost
model_rf_def = RandomForestClassifier(verbose = True, n_jobs = -1, random_state = 1985)
run_model(model_rf_def, 'Default Random Forest', key='RF')

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.1s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.3s finished


In [89]:
results_df = pd.DataFrame(run_all)
results_df

Unnamed: 0,Description,Training time,Prediction time,ROC AUC score
0,Default LightGBM without categorical support,1.026472,0.072304,0.635199
1,Default LightGBM with categorical support,2.491392,0.073969,0.644861
2,Default XGBoost,6.096581,0.017769,0.649817
3,Default Catboost without categorical support,18.193569,0.022442,0.655684
4,Default Catboost with categorical support,170.324903,0.296049,0.673017
5,Default Random Forest,5.173537,0.264041,0.600149


In [90]:
results_df.sort_values("ROC AUC score")

Unnamed: 0,Description,Training time,Prediction time,ROC AUC score
5,Default Random Forest,5.173537,0.264041,0.600149
0,Default LightGBM without categorical support,1.026472,0.072304,0.635199
1,Default LightGBM with categorical support,2.491392,0.073969,0.644861
2,Default XGBoost,6.096581,0.017769,0.649817
3,Default Catboost without categorical support,18.193569,0.022442,0.655684
4,Default Catboost with categorical support,170.324903,0.296049,0.673017
