In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.ensemble as ens
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv("Datasets/train_cleaned_data.csv")
test = pd.read_csv('Datasets/test_cleaned_data.csv')

In [3]:
train.dtypes

Unnamed: 0                int64
id                        int64
Gender                    int64
Age                       int64
Driving_License           int64
Region_Code             float64
Previously_Insured        int64
Vehicle_Age               int64
Vehicle_Damage            int64
Annual_Premium          float64
Policy_Sales_Channel    float64
Vintage                   int64
Response                  int64
dtype: object

In [4]:
## Converting float64 dtypes to int64 so the data can be compatable with the models
train['Policy_Sales_Channel'] = train['Policy_Sales_Channel'].astype('int64')
train['Annual_Premium'] = train['Annual_Premium'].astype('int64')
train['Region_Code'] = train['Region_Code'].astype('int64')
train.dtypes

Unnamed: 0              int64
id                      int64
Gender                  int64
Age                     int64
Driving_License         int64
Region_Code             int64
Previously_Insured      int64
Vehicle_Age             int64
Vehicle_Damage          int64
Annual_Premium          int64
Policy_Sales_Channel    int64
Vintage                 int64
Response                int64
dtype: object

In [5]:
## Extracting responses from the training dataset
responses = train['Response']

## Will be dropping id and responses from the training dataset
train.drop(['Response', 'id', 'Unnamed: 0'], axis=1, inplace=True)

In [6]:
## Splitting up the data using train_test_split with a shuffle added
x_train, y_train, x_test, y_test = train_test_split(train, responses, test_size=0.1, random_state=42)

In [7]:
print("x_train length: {}\nx_test length: {}\ny_train length: {}\ny_test length: {}".format(len(x_train), len(x_test), len(y_train), len(y_test)))

x_train length: 342998
x_test length: 342998
y_train length: 38111
y_test length: 38111


In [25]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(random_state=2, verbose=1, n_estimators=400)
clf.fit(x_train, x_test)

Iter       Train Loss   Remaining Time 
         1           0.7126            2.31m
         2           0.6884            2.24m
         3           0.6688            2.25m
         4           0.6526            2.21m
         5           0.6390            2.18m
         6           0.6273            2.15m
         7           0.6172            2.13m
         8           0.6086            2.13m
         9           0.6012            2.11m
        10           0.5945            2.10m
        20           0.5582            2.03m
        30           0.5457            1.98m
        40           0.5403            1.92m
        50           0.5376            1.88m
        60           0.5356            1.84m
        70           0.5342            1.78m
        80           0.5331            1.73m
        90           0.5322            1.68m
       100           0.5315            1.63m
       200           0.5279            1.09m
       300           0.5260           32.55s
       400     

GradientBoostingClassifier(n_estimators=400, random_state=2, verbose=1)

In [26]:
prediction = clf.predict(y_train[:200])
actual = y_test[:200]
print("{}\n{}".format(prediction, np.array(actual)))

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0
 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0
 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0]


In [27]:
from sklearn.metrics import roc_auc_score
predictions = clf.predict(y_train)
print("Score: {}".format(clf.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

Score: 0.8747343286715121


0.7130274666529549

In [28]:
from sklearn.tree import DecisionTreeClassifier #DecisionTreeClassifier reduced accuracy, so wont use
clf_adaboost = AdaBoostClassifier(n_estimators=400, random_state=2)
clf_adaboost.fit(x_train, x_test)

AdaBoostClassifier(n_estimators=400)

In [29]:
predictions = clf_adaboost.predict(y_train)
print("Score: {}".format(clf_adaboost.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

Score: 0.8744719372359686


0.6754409981372438

In [31]:
from sklearn.ensemble import RandomForestClassifier
cls_randomforest = RandomForestClassifier(n_estimators=400, criterion='gini', random_state=2, verbose=True)
cls_randomforest.fit(x_train, x_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:  3.3min finished


RandomForestClassifier(n_estimators=400, random_state=2, verbose=True)

In [33]:
predictions = cls_randomforest.predict(y_train)
print("Score: {}".format(cls_randomforest.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:    4.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
Score: 0.8641074755319986
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:    3.1s finished


0.6259202635157612

In [37]:
from sklearn.ensemble import BaggingClassifier
cls_bagging = BaggingClassifier(n_estimators=400, n_jobs=4, random_state=2, verbose=1)
cls_bagging.fit(x_train, x_test)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  2.6min remaining:  2.6min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  2.6min finished


BaggingClassifier(n_estimators=400, n_jobs=4, random_state=2, verbose=1)

In [38]:
predictions = cls_bagging.predict(y_train)
print("Score: {}".format(cls_bagging.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    4.1s remaining:    4.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    5.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    5.8s remaining:    5.8s
Score: 0.8619821049040959
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    6.3s finished


0.6209097947203638

In [41]:
from sklearn.ensemble import ExtraTreesClassifier
cls_extra = ExtraTreesClassifier(n_estimators=400, criterion='gini', random_state=2, verbose=True)
cls_extra.fit(x_train, x_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:  2.3min finished


ExtraTreesClassifier(n_estimators=400, random_state=2, verbose=True)

In [42]:
predictions = cls_extra.predict(y_train)
print("Score: {}".format(cls_extra.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:   11.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
Score: 0.8598042559890845
[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:    4.3s finished


0.6238235648978123

In [24]:
from catboost import CatBoostClassifier
categories =['Gender','Driving_License', 'Region_Code', 'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage','Policy_Sales_Channel']
cls_catboost = CatBoostClassifier()
cls_catboost.fit(x_train, x_test, cat_features=categories, eval_set=(y_train, y_test),verbose=1)

Learning rate set to 0.134007
0:	learn: 0.4849927	test: 0.4856354	best: 0.4856354 (0)	total: 94.9ms	remaining: 1m 34s
1:	learn: 0.3870166	test: 0.3880742	best: 0.3880742 (1)	total: 182ms	remaining: 1m 31s
2:	learn: 0.3396596	test: 0.3409384	best: 0.3409384 (2)	total: 271ms	remaining: 1m 30s
3:	learn: 0.3146431	test: 0.3160981	best: 0.3160981 (3)	total: 355ms	remaining: 1m 28s
4:	learn: 0.3004845	test: 0.3021597	best: 0.3021597 (4)	total: 427ms	remaining: 1m 24s
5:	learn: 0.2914089	test: 0.2932280	best: 0.2932280 (5)	total: 502ms	remaining: 1m 23s
6:	learn: 0.2860220	test: 0.2880633	best: 0.2880633 (6)	total: 571ms	remaining: 1m 20s
7:	learn: 0.2822834	test: 0.2845279	best: 0.2845279 (7)	total: 645ms	remaining: 1m 20s
8:	learn: 0.2786803	test: 0.2810333	best: 0.2810333 (8)	total: 727ms	remaining: 1m 20s
9:	learn: 0.2756743	test: 0.2781573	best: 0.2781573 (9)	total: 822ms	remaining: 1m 21s
10:	learn: 0.2735502	test: 0.2761082	best: 0.2761082 (10)	total: 898ms	remaining: 1m 20s
11:	learn:

94:	learn: 0.2627779	test: 0.2667613	best: 0.2667613 (94)	total: 7.01s	remaining: 1m 6s
95:	learn: 0.2627686	test: 0.2667613	best: 0.2667613 (94)	total: 7.07s	remaining: 1m 6s
96:	learn: 0.2627533	test: 0.2667597	best: 0.2667597 (96)	total: 7.14s	remaining: 1m 6s
97:	learn: 0.2627324	test: 0.2667536	best: 0.2667536 (97)	total: 7.21s	remaining: 1m 6s
98:	learn: 0.2627249	test: 0.2667528	best: 0.2667528 (98)	total: 7.28s	remaining: 1m 6s
99:	learn: 0.2627139	test: 0.2667719	best: 0.2667528 (98)	total: 7.36s	remaining: 1m 6s
100:	learn: 0.2627006	test: 0.2667714	best: 0.2667528 (98)	total: 7.45s	remaining: 1m 6s
101:	learn: 0.2626850	test: 0.2667772	best: 0.2667528 (98)	total: 7.54s	remaining: 1m 6s
102:	learn: 0.2626642	test: 0.2667680	best: 0.2667528 (98)	total: 7.65s	remaining: 1m 6s
103:	learn: 0.2626444	test: 0.2667637	best: 0.2667528 (98)	total: 7.76s	remaining: 1m 6s
104:	learn: 0.2626074	test: 0.2667564	best: 0.2667528 (98)	total: 7.88s	remaining: 1m 7s
105:	learn: 0.2625874	test:

187:	learn: 0.2613299	test: 0.2668468	best: 0.2667528 (98)	total: 14.7s	remaining: 1m 3s
188:	learn: 0.2613251	test: 0.2668456	best: 0.2667528 (98)	total: 14.8s	remaining: 1m 3s
189:	learn: 0.2613028	test: 0.2668324	best: 0.2667528 (98)	total: 14.9s	remaining: 1m 3s
190:	learn: 0.2613006	test: 0.2668305	best: 0.2667528 (98)	total: 15s	remaining: 1m 3s
191:	learn: 0.2612859	test: 0.2668323	best: 0.2667528 (98)	total: 15s	remaining: 1m 3s
192:	learn: 0.2612628	test: 0.2668389	best: 0.2667528 (98)	total: 15.1s	remaining: 1m 3s
193:	learn: 0.2612568	test: 0.2668379	best: 0.2667528 (98)	total: 15.2s	remaining: 1m 3s
194:	learn: 0.2612467	test: 0.2668528	best: 0.2667528 (98)	total: 15.3s	remaining: 1m 2s
195:	learn: 0.2612285	test: 0.2668573	best: 0.2667528 (98)	total: 15.3s	remaining: 1m 2s
196:	learn: 0.2612237	test: 0.2668563	best: 0.2667528 (98)	total: 15.4s	remaining: 1m 2s
197:	learn: 0.2612089	test: 0.2668526	best: 0.2667528 (98)	total: 15.5s	remaining: 1m 2s
198:	learn: 0.2611988	tes

282:	learn: 0.2602402	test: 0.2668441	best: 0.2667528 (98)	total: 22.4s	remaining: 56.7s
283:	learn: 0.2602291	test: 0.2668507	best: 0.2667528 (98)	total: 22.5s	remaining: 56.6s
284:	learn: 0.2602252	test: 0.2668513	best: 0.2667528 (98)	total: 22.5s	remaining: 56.5s
285:	learn: 0.2602238	test: 0.2668533	best: 0.2667528 (98)	total: 22.6s	remaining: 56.4s
286:	learn: 0.2602102	test: 0.2668554	best: 0.2667528 (98)	total: 22.7s	remaining: 56.3s
287:	learn: 0.2601918	test: 0.2668583	best: 0.2667528 (98)	total: 22.8s	remaining: 56.3s
288:	learn: 0.2601872	test: 0.2668629	best: 0.2667528 (98)	total: 22.8s	remaining: 56.2s
289:	learn: 0.2601810	test: 0.2668642	best: 0.2667528 (98)	total: 22.9s	remaining: 56.1s
290:	learn: 0.2601720	test: 0.2668667	best: 0.2667528 (98)	total: 23s	remaining: 56s
291:	learn: 0.2601702	test: 0.2668663	best: 0.2667528 (98)	total: 23s	remaining: 55.9s
292:	learn: 0.2601490	test: 0.2668659	best: 0.2667528 (98)	total: 23.1s	remaining: 55.8s
293:	learn: 0.2601417	test:

376:	learn: 0.2592868	test: 0.2668975	best: 0.2667528 (98)	total: 29.3s	remaining: 48.5s
377:	learn: 0.2592749	test: 0.2668988	best: 0.2667528 (98)	total: 29.4s	remaining: 48.4s
378:	learn: 0.2592601	test: 0.2668879	best: 0.2667528 (98)	total: 29.5s	remaining: 48.3s
379:	learn: 0.2592543	test: 0.2668864	best: 0.2667528 (98)	total: 29.6s	remaining: 48.2s
380:	learn: 0.2592370	test: 0.2668971	best: 0.2667528 (98)	total: 29.6s	remaining: 48.1s
381:	learn: 0.2592124	test: 0.2668804	best: 0.2667528 (98)	total: 29.7s	remaining: 48s
382:	learn: 0.2592043	test: 0.2668829	best: 0.2667528 (98)	total: 29.7s	remaining: 47.9s
383:	learn: 0.2592017	test: 0.2668817	best: 0.2667528 (98)	total: 29.8s	remaining: 47.8s
384:	learn: 0.2591983	test: 0.2668825	best: 0.2667528 (98)	total: 29.9s	remaining: 47.8s
385:	learn: 0.2591918	test: 0.2668825	best: 0.2667528 (98)	total: 30s	remaining: 47.7s
386:	learn: 0.2591869	test: 0.2668826	best: 0.2667528 (98)	total: 30.1s	remaining: 47.6s
387:	learn: 0.2591747	tes

469:	learn: 0.2583557	test: 0.2669323	best: 0.2667528 (98)	total: 37.1s	remaining: 41.8s
470:	learn: 0.2583537	test: 0.2669325	best: 0.2667528 (98)	total: 37.2s	remaining: 41.8s
471:	learn: 0.2583402	test: 0.2669339	best: 0.2667528 (98)	total: 37.3s	remaining: 41.7s
472:	learn: 0.2583196	test: 0.2669405	best: 0.2667528 (98)	total: 37.4s	remaining: 41.7s
473:	learn: 0.2582986	test: 0.2669492	best: 0.2667528 (98)	total: 37.5s	remaining: 41.6s
474:	learn: 0.2582881	test: 0.2669501	best: 0.2667528 (98)	total: 37.6s	remaining: 41.6s
475:	learn: 0.2582802	test: 0.2669499	best: 0.2667528 (98)	total: 37.7s	remaining: 41.5s
476:	learn: 0.2582719	test: 0.2669553	best: 0.2667528 (98)	total: 37.8s	remaining: 41.5s
477:	learn: 0.2582700	test: 0.2669589	best: 0.2667528 (98)	total: 37.9s	remaining: 41.4s
478:	learn: 0.2582470	test: 0.2669538	best: 0.2667528 (98)	total: 38s	remaining: 41.3s
479:	learn: 0.2582462	test: 0.2669543	best: 0.2667528 (98)	total: 38s	remaining: 41.2s
480:	learn: 0.2582314	tes

562:	learn: 0.2574867	test: 0.2670740	best: 0.2667528 (98)	total: 44.6s	remaining: 34.6s
563:	learn: 0.2574803	test: 0.2670764	best: 0.2667528 (98)	total: 44.7s	remaining: 34.5s
564:	learn: 0.2574734	test: 0.2670747	best: 0.2667528 (98)	total: 44.8s	remaining: 34.5s
565:	learn: 0.2574624	test: 0.2670775	best: 0.2667528 (98)	total: 44.8s	remaining: 34.4s
566:	learn: 0.2574512	test: 0.2670821	best: 0.2667528 (98)	total: 45s	remaining: 34.3s
567:	learn: 0.2574366	test: 0.2670850	best: 0.2667528 (98)	total: 45.1s	remaining: 34.3s
568:	learn: 0.2574308	test: 0.2670820	best: 0.2667528 (98)	total: 45.2s	remaining: 34.2s
569:	learn: 0.2574267	test: 0.2670821	best: 0.2667528 (98)	total: 45.3s	remaining: 34.2s
570:	learn: 0.2574208	test: 0.2670803	best: 0.2667528 (98)	total: 45.4s	remaining: 34.1s
571:	learn: 0.2574113	test: 0.2670817	best: 0.2667528 (98)	total: 45.5s	remaining: 34.1s
572:	learn: 0.2574059	test: 0.2670807	best: 0.2667528 (98)	total: 45.7s	remaining: 34s
573:	learn: 0.2573938	tes

655:	learn: 0.2566017	test: 0.2671929	best: 0.2667528 (98)	total: 52.4s	remaining: 27.5s
656:	learn: 0.2565882	test: 0.2671773	best: 0.2667528 (98)	total: 52.4s	remaining: 27.4s
657:	learn: 0.2565819	test: 0.2671787	best: 0.2667528 (98)	total: 52.5s	remaining: 27.3s
658:	learn: 0.2565691	test: 0.2671921	best: 0.2667528 (98)	total: 52.6s	remaining: 27.2s
659:	learn: 0.2565625	test: 0.2671911	best: 0.2667528 (98)	total: 52.7s	remaining: 27.1s
660:	learn: 0.2565587	test: 0.2671925	best: 0.2667528 (98)	total: 52.8s	remaining: 27.1s
661:	learn: 0.2565526	test: 0.2671881	best: 0.2667528 (98)	total: 52.8s	remaining: 27s
662:	learn: 0.2565468	test: 0.2671852	best: 0.2667528 (98)	total: 52.9s	remaining: 26.9s
663:	learn: 0.2565374	test: 0.2671865	best: 0.2667528 (98)	total: 53s	remaining: 26.8s
664:	learn: 0.2565288	test: 0.2671832	best: 0.2667528 (98)	total: 53.1s	remaining: 26.8s
665:	learn: 0.2565095	test: 0.2671870	best: 0.2667528 (98)	total: 53.2s	remaining: 26.7s
666:	learn: 0.2565052	tes

748:	learn: 0.2558159	test: 0.2672941	best: 0.2667528 (98)	total: 1m	remaining: 20.2s
749:	learn: 0.2558107	test: 0.2672932	best: 0.2667528 (98)	total: 1m	remaining: 20.1s
750:	learn: 0.2558020	test: 0.2672912	best: 0.2667528 (98)	total: 1m	remaining: 20s
751:	learn: 0.2557999	test: 0.2672915	best: 0.2667528 (98)	total: 1m	remaining: 19.9s
752:	learn: 0.2557834	test: 0.2672861	best: 0.2667528 (98)	total: 1m	remaining: 19.8s
753:	learn: 0.2557735	test: 0.2672994	best: 0.2667528 (98)	total: 1m	remaining: 19.8s
754:	learn: 0.2557700	test: 0.2672994	best: 0.2667528 (98)	total: 1m	remaining: 19.7s
755:	learn: 0.2557596	test: 0.2673009	best: 0.2667528 (98)	total: 1m	remaining: 19.6s
756:	learn: 0.2557518	test: 0.2673021	best: 0.2667528 (98)	total: 1m	remaining: 19.5s
757:	learn: 0.2557451	test: 0.2673035	best: 0.2667528 (98)	total: 1m	remaining: 19.4s
758:	learn: 0.2557353	test: 0.2672999	best: 0.2667528 (98)	total: 1m	remaining: 19.3s
759:	learn: 0.2557312	test: 0.2673000	best: 0.2667528 (9

841:	learn: 0.2550680	test: 0.2674163	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.6s
842:	learn: 0.2550576	test: 0.2674091	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.5s
843:	learn: 0.2550568	test: 0.2674089	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.5s
844:	learn: 0.2550468	test: 0.2674103	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.4s
845:	learn: 0.2550437	test: 0.2674129	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.3s
846:	learn: 0.2550290	test: 0.2674365	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.2s
847:	learn: 0.2550198	test: 0.2674389	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.1s
848:	learn: 0.2550122	test: 0.2674405	best: 0.2667528 (98)	total: 1m 7s	remaining: 12.1s
849:	learn: 0.2550018	test: 0.2674436	best: 0.2667528 (98)	total: 1m 7s	remaining: 12s
850:	learn: 0.2549930	test: 0.2674423	best: 0.2667528 (98)	total: 1m 8s	remaining: 11.9s
851:	learn: 0.2549821	test: 0.2674452	best: 0.2667528 (98)	total: 1m 8s	remaining: 11.8s
852:	learn: 0.2549775	t

934:	learn: 0.2541554	test: 0.2674933	best: 0.2667528 (98)	total: 1m 15s	remaining: 5.22s
935:	learn: 0.2541428	test: 0.2675055	best: 0.2667528 (98)	total: 1m 15s	remaining: 5.14s
936:	learn: 0.2541375	test: 0.2675054	best: 0.2667528 (98)	total: 1m 15s	remaining: 5.06s
937:	learn: 0.2541313	test: 0.2674993	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.98s
938:	learn: 0.2541234	test: 0.2675024	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.9s
939:	learn: 0.2541050	test: 0.2675016	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.82s
940:	learn: 0.2540992	test: 0.2675018	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.74s
941:	learn: 0.2540920	test: 0.2674964	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.67s
942:	learn: 0.2540847	test: 0.2675019	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.59s
943:	learn: 0.2540767	test: 0.2675027	best: 0.2667528 (98)	total: 1m 15s	remaining: 4.51s
944:	learn: 0.2540739	test: 0.2674994	best: 0.2667528 (98)	total: 1m 16s	remaining: 4.43s
945:	learn:

<catboost.core.CatBoostClassifier at 0x7f83557f0790>

In [35]:
from sklearn.metrics import roc_auc_score

predictions = cls_catboost.predict(y_train)
print("Score: {}".format(cls_catboost.score(y_train, y_test)))
roc_auc_score(predictions, np.array(y_test))

Score: 0.8749180026763926


0.7417725912678196

In [33]:
probs = cls_catboost.predict_proba(y_train)[:, 1]
roc_auc_score(y_test, probs) ## This only gives probability of 1 or buying insurance

0.860665913738337