In [None]:
#Analysis of Mortgage Approvals from US Government Data


In [2]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, cv

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
#reading the datasets 
train = pd.read_csv("train_values.csv")
test = pd.read_csv("test_values.csv")

train_label = pd.read_csv("train_labels.csv")

In [4]:
#merging the contents of train_values and train_label datasets respectively

join_train = pd.merge(train, train_label, on='row_id')
join_train.head()

Unnamed: 0,row_id,loan_type,property_type,loan_purpose,occupancy,loan_amount,preapproval,msa_md,state_code,county_code,...,applicant_income,population,minority_population_pct,ffiecmedian_family_income,tract_to_msa_md_income_pct,number_of_owner-occupied_units,number_of_1_to_4_family_units,lender,co_applicant,accepted
0,0,3,1,1,1,70.0,3,18,37,246,...,24.0,6203.0,44.23,60588.0,50.933,716.0,2642.0,4536,False,1
1,1,1,1,3,1,178.0,3,369,52,299,...,57.0,5774.0,15.905,54821.0,100.0,1622.0,2108.0,2458,False,0
2,2,2,1,3,1,163.0,3,16,10,306,...,67.0,6094.0,61.27,67719.0,100.0,760.0,1048.0,5710,False,1
3,3,1,1,1,1,155.0,1,305,47,180,...,105.0,6667.0,6.246,78439.0,100.0,2025.0,2299.0,5888,True,1
4,4,1,1,1,1,305.0,3,24,37,20,...,71.0,6732.0,100.0,63075.0,82.2,1464.0,1847.0,289,False,1


In [5]:
#datacleaning

join_train.fillna(-999, inplace=True)
test.fillna(-999, inplace=True)

In [6]:
#extracting the ration of loan to income

join_train['loan_income_ratio'] = join_train['applicant_income']/join_train['loan_amount']
test['loan_income_ration'] = test['applicant_income']/test['loan_amount']

mean_acceptance_rate = pd.DataFrame(join_train.groupby(['lender'])['accepted'].mean())

msd_acceptance_rate = pd.DataFrame(join_train.groupby(['msa_md'])['accepted'].mean())


join_train = pd.merge(join_train, mean_acceptance_rate, how='left', on='lender')
test = pd.merge(test, mean_acceptance_rate, how='left', on='lender')


In [20]:
#declaration of features for join_train and test

join_train.columns = ['row_id', 'loan_type', 'property_type', 'loan_purpose','occupancy',
                     'loan_amount', 'preapproval', 'msa_md', 'state_code', 'country_code',
                      'applicant_ethnicity', 'applicant_race','applicant_sex',
                      'applicant_income', 'population', 'minority_population_pct',
                     
                     'ffiecmedia_family_income','tract_to_msa_md_income_pct',
                      'number_of_owner-occupied_units','number_of_1_to_4_family_units',
                      'lender','co_applicant','accepted','loan_income_ratio','loan_accepted_rate'
                     ]


test.columns = ['row_id','loan_type','property_type','loan_purpose','occupancy',
              'loan_amount','preapproval','msa_md','state_code','country_code',
               'applicant_ethnicity','applicant_race','applicant_sex',
               'applicant_income','population','minority_population_pct',
               'ffiecmedia_family_income','tract_to_msa_md_income_pct',
               'number_of_owner-occupied_units','number_of_1_to_4_family_units',
               'lender','co_applicant','loan_income_ratio','loan_accepted_rate'
              ]

In [8]:
#displaying the information of the features under join_train joi
join_train.info()

x = join_train.drop('accepted', axis=1)
y = join_train.accepted

x.dtypes

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500000 entries, 0 to 499999
Data columns (total 25 columns):
row_id                            500000 non-null int64
loan_type                         500000 non-null int64
property_type                     500000 non-null int64
loan_purpose                      500000 non-null int64
occupancy                         500000 non-null int64
loan_amount                       500000 non-null float64
preapproval                       500000 non-null int64
msa_md                            500000 non-null int64
state_code                        500000 non-null int64
country_code                      500000 non-null int64
applicant_ethnicity               500000 non-null int64
applicant_race                    500000 non-null int64
applicant_sex                     500000 non-null int64
applicant_income                  500000 non-null float64
population                        500000 non-null float64
minority_population_pct           500000 no

row_id                              int64
loan_type                           int64
property_type                       int64
loan_purpose                        int64
occupancy                           int64
loan_amount                       float64
preapproval                         int64
msa_md                              int64
state_code                          int64
country_code                        int64
applicant_ethnicity                 int64
applicant_race                      int64
applicant_sex                       int64
applicant_income                  float64
population                        float64
minority_population_pct           float64
ffiecmedia_family_income          float64
tract_to_msa_md_income_pct        float64
number_of_owner-occupied_units    float64
number_of_1_to_4_family_units     float64
lender                              int64
co_applicant                         bool
loan_income_ratio                 float64
loan_accepted_rate                

In [9]:
cate_features_index = np.where(x.dtypes != float)[0]

In [10]:
#splitting the train and test variables
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.15)


In [11]:
#building the prediction model 
model = CatBoostClassifier(
    iterations=1500, 
    depth=10, learning_rate=0.01, eval_metric='Accuracy',
    use_best_model=True, random_seed=42 )

In [24]:
#fitting the model
model.fit(xtrain, ytrain, cat_features=cate_features_index, eval_set=(xtest, ytest))

          

0:	learn: 0.7082941	test: 0.7036667	best: 0.7036667 (0)	total: 1.14s	remaining: 28m 30s
1:	learn: 0.7129318	test: 0.7069467	best: 0.7069467 (1)	total: 2.1s	remaining: 26m 10s
2:	learn: 0.7139365	test: 0.7077600	best: 0.7077600 (2)	total: 2.98s	remaining: 24m 49s
3:	learn: 0.7134353	test: 0.7068533	best: 0.7077600 (2)	total: 3.89s	remaining: 24m 14s
4:	learn: 0.7143624	test: 0.7083600	best: 0.7083600 (4)	total: 4.86s	remaining: 24m 12s
5:	learn: 0.7156965	test: 0.7090267	best: 0.7090267 (5)	total: 5.92s	remaining: 24m 34s
6:	learn: 0.7150682	test: 0.7083600	best: 0.7090267 (5)	total: 7.12s	remaining: 25m 18s
7:	learn: 0.7151953	test: 0.7086000	best: 0.7090267 (5)	total: 8.22s	remaining: 25m 33s
8:	learn: 0.7147247	test: 0.7079467	best: 0.7090267 (5)	total: 9.29s	remaining: 25m 39s
9:	learn: 0.7157600	test: 0.7086000	best: 0.7090267 (5)	total: 10.4s	remaining: 25m 50s
10:	learn: 0.7154776	test: 0.7079067	best: 0.7090267 (5)	total: 11.4s	remaining: 25m 46s
11:	learn: 0.7152729	test: 0.707

92:	learn: 0.7211976	test: 0.7140667	best: 0.7141600 (91)	total: 1m 46s	remaining: 26m 47s
93:	learn: 0.7211765	test: 0.7141600	best: 0.7141600 (91)	total: 1m 47s	remaining: 26m 45s
94:	learn: 0.7212635	test: 0.7140533	best: 0.7141600 (91)	total: 1m 48s	remaining: 26m 45s
95:	learn: 0.7212871	test: 0.7141200	best: 0.7141600 (91)	total: 1m 49s	remaining: 26m 44s
96:	learn: 0.7213341	test: 0.7142667	best: 0.7142667 (96)	total: 1m 50s	remaining: 26m 42s
97:	learn: 0.7214753	test: 0.7143867	best: 0.7143867 (97)	total: 1m 51s	remaining: 26m 41s
98:	learn: 0.7214541	test: 0.7144000	best: 0.7144000 (98)	total: 1m 53s	remaining: 26m 39s
99:	learn: 0.7215812	test: 0.7146000	best: 0.7146000 (99)	total: 1m 54s	remaining: 26m 37s
100:	learn: 0.7216000	test: 0.7146267	best: 0.7146267 (100)	total: 1m 55s	remaining: 26m 35s
101:	learn: 0.7216541	test: 0.7147600	best: 0.7147600 (101)	total: 1m 56s	remaining: 26m 33s
102:	learn: 0.7218235	test: 0.7147600	best: 0.7147600 (101)	total: 1m 57s	remaining: 2

181:	learn: 0.7267765	test: 0.7199200	best: 0.7199333 (180)	total: 3m 23s	remaining: 24m 30s
182:	learn: 0.7268400	test: 0.7200133	best: 0.7200133 (182)	total: 3m 24s	remaining: 24m 28s
183:	learn: 0.7269341	test: 0.7199467	best: 0.7200133 (182)	total: 3m 25s	remaining: 24m 27s
184:	learn: 0.7269694	test: 0.7200267	best: 0.7200267 (184)	total: 3m 25s	remaining: 24m 23s
185:	learn: 0.7270682	test: 0.7201467	best: 0.7201467 (185)	total: 3m 27s	remaining: 24m 22s
186:	learn: 0.7271365	test: 0.7202533	best: 0.7202533 (186)	total: 3m 28s	remaining: 24m 21s
187:	learn: 0.7272188	test: 0.7202000	best: 0.7202533 (186)	total: 3m 29s	remaining: 24m 19s
188:	learn: 0.7272188	test: 0.7203200	best: 0.7203200 (188)	total: 3m 30s	remaining: 24m 18s
189:	learn: 0.7272212	test: 0.7202667	best: 0.7203200 (188)	total: 3m 31s	remaining: 24m 16s
190:	learn: 0.7273129	test: 0.7202667	best: 0.7203200 (188)	total: 3m 32s	remaining: 24m 14s
191:	learn: 0.7273718	test: 0.7202000	best: 0.7203200 (188)	total: 3m 

270:	learn: 0.7303059	test: 0.7231600	best: 0.7231600 (270)	total: 5m 4s	remaining: 23m 3s
271:	learn: 0.7303435	test: 0.7231600	best: 0.7231600 (270)	total: 5m 6s	remaining: 23m 1s
272:	learn: 0.7303459	test: 0.7231600	best: 0.7231600 (270)	total: 5m 7s	remaining: 23m
273:	learn: 0.7303412	test: 0.7232800	best: 0.7232800 (273)	total: 5m 8s	remaining: 23m
274:	learn: 0.7304376	test: 0.7232800	best: 0.7232800 (273)	total: 5m 9s	remaining: 22m 59s
275:	learn: 0.7304988	test: 0.7233600	best: 0.7233600 (275)	total: 5m 10s	remaining: 22m 58s
276:	learn: 0.7304565	test: 0.7233733	best: 0.7233733 (276)	total: 5m 12s	remaining: 22m 57s
277:	learn: 0.7304682	test: 0.7234800	best: 0.7234800 (277)	total: 5m 13s	remaining: 22m 57s
278:	learn: 0.7305365	test: 0.7233733	best: 0.7234800 (277)	total: 5m 14s	remaining: 22m 56s
279:	learn: 0.7305553	test: 0.7234400	best: 0.7234800 (277)	total: 5m 15s	remaining: 22m 55s
280:	learn: 0.7306118	test: 0.7234933	best: 0.7234933 (280)	total: 5m 16s	remaining: 

359:	learn: 0.7326612	test: 0.7254267	best: 0.7254267 (352)	total: 6m 44s	remaining: 21m 22s
360:	learn: 0.7326588	test: 0.7253867	best: 0.7254267 (352)	total: 6m 46s	remaining: 21m 21s
361:	learn: 0.7327082	test: 0.7254933	best: 0.7254933 (361)	total: 6m 47s	remaining: 21m 21s
362:	learn: 0.7326988	test: 0.7255200	best: 0.7255200 (362)	total: 6m 49s	remaining: 21m 21s
363:	learn: 0.7327482	test: 0.7256800	best: 0.7256800 (363)	total: 6m 50s	remaining: 21m 21s
364:	learn: 0.7327859	test: 0.7257067	best: 0.7257067 (364)	total: 6m 51s	remaining: 21m 20s
365:	learn: 0.7327976	test: 0.7258267	best: 0.7258267 (365)	total: 6m 52s	remaining: 21m 19s
366:	learn: 0.7328235	test: 0.7258133	best: 0.7258267 (365)	total: 6m 54s	remaining: 21m 18s
367:	learn: 0.7328212	test: 0.7257600	best: 0.7258267 (365)	total: 6m 55s	remaining: 21m 17s
368:	learn: 0.7328494	test: 0.7258133	best: 0.7258267 (365)	total: 6m 56s	remaining: 21m 17s
369:	learn: 0.7328659	test: 0.7258400	best: 0.7258400 (369)	total: 6m 

448:	learn: 0.7346894	test: 0.7267467	best: 0.7267867 (444)	total: 8m 28s	remaining: 19m 50s
449:	learn: 0.7347294	test: 0.7266933	best: 0.7267867 (444)	total: 8m 29s	remaining: 19m 48s
450:	learn: 0.7347482	test: 0.7266933	best: 0.7267867 (444)	total: 8m 30s	remaining: 19m 47s
451:	learn: 0.7347671	test: 0.7266933	best: 0.7267867 (444)	total: 8m 31s	remaining: 19m 45s
452:	learn: 0.7347624	test: 0.7266933	best: 0.7267867 (444)	total: 8m 32s	remaining: 19m 44s
453:	learn: 0.7347624	test: 0.7267733	best: 0.7267867 (444)	total: 8m 33s	remaining: 19m 43s
454:	learn: 0.7347741	test: 0.7267333	best: 0.7267867 (444)	total: 8m 34s	remaining: 19m 41s
455:	learn: 0.7348047	test: 0.7267467	best: 0.7267867 (444)	total: 8m 35s	remaining: 19m 40s
456:	learn: 0.7347671	test: 0.7267467	best: 0.7267867 (444)	total: 8m 36s	remaining: 19m 39s
457:	learn: 0.7348282	test: 0.7268800	best: 0.7268800 (457)	total: 8m 38s	remaining: 19m 38s
458:	learn: 0.7348565	test: 0.7269867	best: 0.7269867 (458)	total: 8m 

537:	learn: 0.7363788	test: 0.7277467	best: 0.7277467 (537)	total: 10m 12s	remaining: 18m 15s
538:	learn: 0.7363812	test: 0.7277200	best: 0.7277467 (537)	total: 10m 14s	remaining: 18m 15s
539:	learn: 0.7363906	test: 0.7277333	best: 0.7277467 (537)	total: 10m 15s	remaining: 18m 14s
540:	learn: 0.7364047	test: 0.7277067	best: 0.7277467 (537)	total: 10m 16s	remaining: 18m 13s
541:	learn: 0.7364282	test: 0.7277600	best: 0.7277600 (541)	total: 10m 18s	remaining: 18m 12s
542:	learn: 0.7364376	test: 0.7277867	best: 0.7277867 (542)	total: 10m 19s	remaining: 18m 11s
543:	learn: 0.7364682	test: 0.7278533	best: 0.7278533 (543)	total: 10m 20s	remaining: 18m 10s
544:	learn: 0.7364894	test: 0.7279067	best: 0.7279067 (544)	total: 10m 21s	remaining: 18m 9s
545:	learn: 0.7364918	test: 0.7279200	best: 0.7279200 (545)	total: 10m 22s	remaining: 18m 8s
546:	learn: 0.7364894	test: 0.7279067	best: 0.7279200 (545)	total: 10m 23s	remaining: 18m 7s
547:	learn: 0.7365176	test: 0.7279200	best: 0.7279200 (545)	tot

625:	learn: 0.7377976	test: 0.7289333	best: 0.7291200 (612)	total: 11m 52s	remaining: 16m 34s
626:	learn: 0.7378000	test: 0.7289067	best: 0.7291200 (612)	total: 11m 53s	remaining: 16m 33s
627:	learn: 0.7378165	test: 0.7289333	best: 0.7291200 (612)	total: 11m 55s	remaining: 16m 32s
628:	learn: 0.7378353	test: 0.7289333	best: 0.7291200 (612)	total: 11m 56s	remaining: 16m 31s
629:	learn: 0.7378424	test: 0.7289733	best: 0.7291200 (612)	total: 11m 57s	remaining: 16m 30s
630:	learn: 0.7378706	test: 0.7290400	best: 0.7291200 (612)	total: 11m 58s	remaining: 16m 29s
631:	learn: 0.7378471	test: 0.7290400	best: 0.7291200 (612)	total: 11m 59s	remaining: 16m 28s
632:	learn: 0.7378424	test: 0.7290000	best: 0.7291200 (612)	total: 12m 1s	remaining: 16m 27s
633:	learn: 0.7378706	test: 0.7290000	best: 0.7291200 (612)	total: 12m 2s	remaining: 16m 26s
634:	learn: 0.7379129	test: 0.7290133	best: 0.7291200 (612)	total: 12m 3s	remaining: 16m 25s
635:	learn: 0.7379529	test: 0.7290267	best: 0.7291200 (612)	tot

713:	learn: 0.7392306	test: 0.7294800	best: 0.7295600 (677)	total: 13m 33s	remaining: 14m 55s
714:	learn: 0.7392800	test: 0.7294533	best: 0.7295600 (677)	total: 13m 34s	remaining: 14m 54s
715:	learn: 0.7392988	test: 0.7294000	best: 0.7295600 (677)	total: 13m 35s	remaining: 14m 53s
716:	learn: 0.7393059	test: 0.7294000	best: 0.7295600 (677)	total: 13m 37s	remaining: 14m 52s
717:	learn: 0.7393200	test: 0.7293600	best: 0.7295600 (677)	total: 13m 38s	remaining: 14m 51s
718:	learn: 0.7393341	test: 0.7293867	best: 0.7295600 (677)	total: 13m 39s	remaining: 14m 49s
719:	learn: 0.7393224	test: 0.7294667	best: 0.7295600 (677)	total: 13m 40s	remaining: 14m 48s
720:	learn: 0.7393341	test: 0.7294533	best: 0.7295600 (677)	total: 13m 41s	remaining: 14m 47s
721:	learn: 0.7393812	test: 0.7294400	best: 0.7295600 (677)	total: 13m 42s	remaining: 14m 46s
722:	learn: 0.7394024	test: 0.7294267	best: 0.7295600 (677)	total: 13m 43s	remaining: 14m 45s
723:	learn: 0.7394282	test: 0.7294800	best: 0.7295600 (677)	

801:	learn: 0.7405718	test: 0.7301067	best: 0.7301600 (798)	total: 15m 18s	remaining: 13m 19s
802:	learn: 0.7405624	test: 0.7301867	best: 0.7301867 (802)	total: 15m 19s	remaining: 13m 18s
803:	learn: 0.7405953	test: 0.7302000	best: 0.7302000 (803)	total: 15m 20s	remaining: 13m 17s
804:	learn: 0.7405859	test: 0.7301067	best: 0.7302000 (803)	total: 15m 21s	remaining: 13m 15s
805:	learn: 0.7406424	test: 0.7301067	best: 0.7302000 (803)	total: 15m 23s	remaining: 13m 14s
806:	learn: 0.7406424	test: 0.7300800	best: 0.7302000 (803)	total: 15m 24s	remaining: 13m 13s
807:	learn: 0.7406447	test: 0.7301067	best: 0.7302000 (803)	total: 15m 25s	remaining: 13m 12s
808:	learn: 0.7406259	test: 0.7301467	best: 0.7302000 (803)	total: 15m 26s	remaining: 13m 11s
809:	learn: 0.7406588	test: 0.7302000	best: 0.7302000 (803)	total: 15m 27s	remaining: 13m 10s
810:	learn: 0.7406541	test: 0.7302267	best: 0.7302267 (810)	total: 15m 29s	remaining: 13m 9s
811:	learn: 0.7406659	test: 0.7302667	best: 0.7302667 (811)	t

889:	learn: 0.7417671	test: 0.7306267	best: 0.7306533 (888)	total: 16m 59s	remaining: 11m 38s
890:	learn: 0.7417788	test: 0.7306667	best: 0.7306667 (890)	total: 17m	remaining: 11m 37s
891:	learn: 0.7417882	test: 0.7306800	best: 0.7306800 (891)	total: 17m 2s	remaining: 11m 36s
892:	learn: 0.7417859	test: 0.7306933	best: 0.7306933 (892)	total: 17m 3s	remaining: 11m 35s
893:	learn: 0.7418024	test: 0.7307200	best: 0.7307200 (893)	total: 17m 4s	remaining: 11m 34s
894:	learn: 0.7418306	test: 0.7307067	best: 0.7307200 (893)	total: 17m 5s	remaining: 11m 33s
895:	learn: 0.7418424	test: 0.7306667	best: 0.7307200 (893)	total: 17m 7s	remaining: 11m 32s
896:	learn: 0.7418588	test: 0.7306800	best: 0.7307200 (893)	total: 17m 8s	remaining: 11m 31s
897:	learn: 0.7418447	test: 0.7306400	best: 0.7307200 (893)	total: 17m 9s	remaining: 11m 30s
898:	learn: 0.7418706	test: 0.7306933	best: 0.7307200 (893)	total: 17m 10s	remaining: 11m 29s
899:	learn: 0.7418824	test: 0.7307333	best: 0.7307333 (899)	total: 17m 

977:	learn: 0.7430047	test: 0.7312933	best: 0.7312933 (977)	total: 18m 38s	remaining: 9m 57s
978:	learn: 0.7430024	test: 0.7314000	best: 0.7314000 (978)	total: 18m 40s	remaining: 9m 56s
979:	learn: 0.7430141	test: 0.7313867	best: 0.7314000 (978)	total: 18m 41s	remaining: 9m 54s
980:	learn: 0.7430259	test: 0.7314000	best: 0.7314000 (978)	total: 18m 42s	remaining: 9m 53s
981:	learn: 0.7430753	test: 0.7314533	best: 0.7314533 (981)	total: 18m 43s	remaining: 9m 52s
982:	learn: 0.7430941	test: 0.7314533	best: 0.7314533 (981)	total: 18m 44s	remaining: 9m 51s
983:	learn: 0.7430729	test: 0.7314267	best: 0.7314533 (981)	total: 18m 46s	remaining: 9m 50s
984:	learn: 0.7430706	test: 0.7314667	best: 0.7314667 (984)	total: 18m 47s	remaining: 9m 49s
985:	learn: 0.7430824	test: 0.7314400	best: 0.7314667 (984)	total: 18m 48s	remaining: 9m 48s
986:	learn: 0.7430847	test: 0.7314400	best: 0.7314667 (984)	total: 18m 49s	remaining: 9m 47s
987:	learn: 0.7430706	test: 0.7313600	best: 0.7314667 (984)	total: 18m

1065:	learn: 0.7442988	test: 0.7313867	best: 0.7314667 (984)	total: 20m 22s	remaining: 8m 17s
1066:	learn: 0.7443224	test: 0.7314667	best: 0.7314667 (984)	total: 20m 24s	remaining: 8m 16s
1067:	learn: 0.7443647	test: 0.7314800	best: 0.7314800 (1067)	total: 20m 25s	remaining: 8m 15s
1068:	learn: 0.7443647	test: 0.7314667	best: 0.7314800 (1067)	total: 20m 26s	remaining: 8m 14s
1069:	learn: 0.7443718	test: 0.7314533	best: 0.7314800 (1067)	total: 20m 27s	remaining: 8m 13s
1070:	learn: 0.7443835	test: 0.7314267	best: 0.7314800 (1067)	total: 20m 28s	remaining: 8m 12s
1071:	learn: 0.7443906	test: 0.7314133	best: 0.7314800 (1067)	total: 20m 29s	remaining: 8m 11s
1072:	learn: 0.7443929	test: 0.7314400	best: 0.7314800 (1067)	total: 20m 31s	remaining: 8m 9s
1073:	learn: 0.7444047	test: 0.7314000	best: 0.7314800 (1067)	total: 20m 32s	remaining: 8m 8s
1074:	learn: 0.7444188	test: 0.7314800	best: 0.7314800 (1067)	total: 20m 33s	remaining: 8m 7s
1075:	learn: 0.7444259	test: 0.7315333	best: 0.7315333 

1152:	learn: 0.7453718	test: 0.7320667	best: 0.7321467 (1151)	total: 22m	remaining: 6m 37s
1153:	learn: 0.7453624	test: 0.7321067	best: 0.7321467 (1151)	total: 22m 1s	remaining: 6m 36s
1154:	learn: 0.7453718	test: 0.7320667	best: 0.7321467 (1151)	total: 22m 3s	remaining: 6m 35s
1155:	learn: 0.7453788	test: 0.7321333	best: 0.7321467 (1151)	total: 22m 4s	remaining: 6m 34s
1156:	learn: 0.7454094	test: 0.7321600	best: 0.7321600 (1156)	total: 22m 5s	remaining: 6m 32s
1157:	learn: 0.7454118	test: 0.7321333	best: 0.7321600 (1156)	total: 22m 6s	remaining: 6m 31s
1158:	learn: 0.7454047	test: 0.7321200	best: 0.7321600 (1156)	total: 22m 7s	remaining: 6m 30s
1159:	learn: 0.7454282	test: 0.7321333	best: 0.7321600 (1156)	total: 22m 8s	remaining: 6m 29s
1160:	learn: 0.7454329	test: 0.7321200	best: 0.7321600 (1156)	total: 22m 10s	remaining: 6m 28s
1161:	learn: 0.7454471	test: 0.7320400	best: 0.7321600 (1156)	total: 22m 11s	remaining: 6m 27s
1162:	learn: 0.7454400	test: 0.7321067	best: 0.7321600 (1156)

1239:	learn: 0.7464494	test: 0.7325467	best: 0.7325467 (1238)	total: 23m 38s	remaining: 4m 57s
1240:	learn: 0.7464400	test: 0.7324800	best: 0.7325467 (1238)	total: 23m 39s	remaining: 4m 56s
1241:	learn: 0.7464847	test: 0.7324533	best: 0.7325467 (1238)	total: 23m 40s	remaining: 4m 55s
1242:	learn: 0.7464918	test: 0.7325067	best: 0.7325467 (1238)	total: 23m 42s	remaining: 4m 54s
1243:	learn: 0.7464824	test: 0.7324800	best: 0.7325467 (1238)	total: 23m 43s	remaining: 4m 52s
1244:	learn: 0.7465153	test: 0.7324667	best: 0.7325467 (1238)	total: 23m 44s	remaining: 4m 51s
1245:	learn: 0.7465129	test: 0.7324400	best: 0.7325467 (1238)	total: 23m 45s	remaining: 4m 50s
1246:	learn: 0.7465200	test: 0.7324400	best: 0.7325467 (1238)	total: 23m 46s	remaining: 4m 49s
1247:	learn: 0.7465506	test: 0.7324533	best: 0.7325467 (1238)	total: 23m 47s	remaining: 4m 48s
1248:	learn: 0.7465318	test: 0.7324400	best: 0.7325467 (1238)	total: 23m 49s	remaining: 4m 47s
1249:	learn: 0.7465576	test: 0.7324667	best: 0.732

1326:	learn: 0.7475153	test: 0.7327067	best: 0.7327067 (1322)	total: 25m 17s	remaining: 3m 17s
1327:	learn: 0.7475059	test: 0.7326533	best: 0.7327067 (1322)	total: 25m 18s	remaining: 3m 16s
1328:	learn: 0.7475059	test: 0.7326667	best: 0.7327067 (1322)	total: 25m 20s	remaining: 3m 15s
1329:	learn: 0.7475035	test: 0.7326933	best: 0.7327067 (1322)	total: 25m 21s	remaining: 3m 14s
1330:	learn: 0.7475294	test: 0.7326800	best: 0.7327067 (1322)	total: 25m 22s	remaining: 3m 13s
1331:	learn: 0.7475435	test: 0.7326800	best: 0.7327067 (1322)	total: 25m 24s	remaining: 3m 12s
1332:	learn: 0.7475812	test: 0.7326933	best: 0.7327067 (1322)	total: 25m 25s	remaining: 3m 11s
1333:	learn: 0.7475694	test: 0.7326667	best: 0.7327067 (1322)	total: 25m 26s	remaining: 3m 9s
1334:	learn: 0.7475600	test: 0.7326400	best: 0.7327067 (1322)	total: 25m 27s	remaining: 3m 8s
1335:	learn: 0.7475835	test: 0.7326267	best: 0.7327067 (1322)	total: 25m 29s	remaining: 3m 7s
1336:	learn: 0.7475718	test: 0.7326267	best: 0.732706

1413:	learn: 0.7485388	test: 0.7329733	best: 0.7329733 (1407)	total: 26m 55s	remaining: 1m 38s
1414:	learn: 0.7485224	test: 0.7329867	best: 0.7329867 (1414)	total: 26m 56s	remaining: 1m 37s
1415:	learn: 0.7485812	test: 0.7330533	best: 0.7330533 (1415)	total: 26m 57s	remaining: 1m 35s
1416:	learn: 0.7485765	test: 0.7330933	best: 0.7330933 (1416)	total: 26m 58s	remaining: 1m 34s
1417:	learn: 0.7486000	test: 0.7330667	best: 0.7330933 (1416)	total: 27m	remaining: 1m 33s
1418:	learn: 0.7486282	test: 0.7331067	best: 0.7331067 (1418)	total: 27m 1s	remaining: 1m 32s
1419:	learn: 0.7486894	test: 0.7330133	best: 0.7331067 (1418)	total: 27m 2s	remaining: 1m 31s
1420:	learn: 0.7486894	test: 0.7330000	best: 0.7331067 (1418)	total: 27m 3s	remaining: 1m 30s
1421:	learn: 0.7487035	test: 0.7330267	best: 0.7331067 (1418)	total: 27m 4s	remaining: 1m 29s
1422:	learn: 0.7486847	test: 0.7330400	best: 0.7331067 (1418)	total: 27m 5s	remaining: 1m 27s
1423:	learn: 0.7487059	test: 0.7330000	best: 0.7331067 (141

<catboost.core.CatBoostClassifier at 0x1a2000e160>

In [25]:
#test accuracy
print('The test accuracy is :{:.6f}'.format(accuracy_score(ytest, model.predict(xtest))))

The test accuracy is :0.733400


In [26]:
#predicting the outcome of the loan application
pred = model.predict(test)
pred = pred.astype(np.int)

submission = pd.DataFrame({'row_id':test['row_id'],'accepted':pred})

In [27]:
#exporting the predicted outcome to csv format
submission.to_csv('Lars_Lemos_prediction_catboost_submission1.csv', index=False)