In [1]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline 

In [3]:
train = pd.read_csv('./input/train.csv')
test = pd.read_csv('./input/test.csv')

train.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [4]:
X_full = pd.concat([train.drop('Survived', axis = 1), test], axis = 0)

In [5]:
X_full.shape

(1309, 11)

In [6]:
X_full.drop('PassengerId', axis = 1, inplace=True)

In [7]:
X_full.isnull().sum()

Pclass         0
Name           0
Sex            0
Age          263
SibSp          0
Parch          0
Ticket         0
Fare           1
Cabin       1014
Embarked       2
dtype: int64

In [8]:
(X_full.Age.isnull() & X_full.Cabin.isnull()).sum()

240

In [9]:
train.Survived.mean()

0.3838383838383838

In [10]:
train.Cabin.notnull().mean()

0.22895622895622897

In [11]:
(train.Cabin.isnull() & (train.Survived == 0)).mean()

0.5398428731762065

In [12]:
selector = (train.Cabin.isnull() & train.Age.isnull())

train[selector].Survived.mean()

0.25949367088607594

In [15]:
selector = (train.Cabin.isnull())

train[selector].Survived.mean()

0.29985443959243085

In [16]:
X_full['Nulls'] = X_full.Cabin.isnull().astype('int') + X_full.Age.isnull().astype('int')

In [17]:
X_full['Nulls'] = X_full.Cabin.isnull().astype('int') + X_full.Age.isnull().astype('int')

In [18]:
X_full['Cabin_mapped'] = X_full['Cabin'].astype(str).str[0] # this captures the letter

# this transforms the letters into numbers
cabin_dict = {k:i for i, k in enumerate(X_full.Cabin_mapped.unique())} 
X_full.loc[:, 'Cabin_mapped'] = X_full.loc[:, 'Cabin_mapped'].map(cabin_dict)

In [19]:
cabin_dict

{'n': 0, 'C': 1, 'E': 2, 'G': 3, 'D': 4, 'A': 5, 'B': 6, 'F': 7, 'T': 8}

In [20]:
X_full.columns

Index(['Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare',
       'Cabin', 'Embarked', 'Nulls', 'Cabin_mapped'],
      dtype='object')

In [21]:
X_full.drop(['Age', 'Cabin'], inplace = True, axis = 1)

In [22]:
fare_mean = X_full[X_full.Pclass == 3].Fare.mean()

X_full['Fare'].fillna(fare_mean, inplace = True)

In [23]:
X_full.isnull().sum()

Pclass          0
Name            0
Sex             0
SibSp           0
Parch           0
Ticket          0
Fare            0
Embarked        2
Nulls           0
Cabin_mapped    0
dtype: int64

In [24]:
X_full[X_full.Embarked.isnull()]

Unnamed: 0,Pclass,Name,Sex,SibSp,Parch,Ticket,Fare,Embarked,Nulls,Cabin_mapped
61,1,"Icard, Miss. Amelie",female,0,0,113572,80.0,,0,6
829,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,0,0,113572,80.0,,0,6


In [25]:
X_full[X_full['Pclass'] == 1].Embarked.value_counts()

S    177
C    141
Q      3
Name: Embarked, dtype: int64

In [26]:
X_full['Embarked'].fillna('S', inplace = True)

In [27]:
X_full.isnull().sum()

Pclass          0
Name            0
Sex             0
SibSp           0
Parch           0
Ticket          0
Fare            0
Embarked        0
Nulls           0
Cabin_mapped    0
dtype: int64

In [28]:
X_full.drop(['Name', 'Ticket'], axis = 1, inplace = True)

In [29]:
X_full.dtypes

Pclass            int64
Sex              object
SibSp             int64
Parch             int64
Fare            float64
Embarked         object
Nulls             int64
Cabin_mapped      int64
dtype: object

In [30]:
X_dummies = pd.get_dummies(X_full, columns = ['Sex', 'Nulls', 'Cabin_mapped', 'Embarked'], drop_first= True)

In [31]:
X_dummies.dtypes

Pclass              int64
SibSp               int64
Parch               int64
Fare              float64
Sex_male            uint8
Nulls_1             uint8
Nulls_2             uint8
Cabin_mapped_1      uint8
Cabin_mapped_2      uint8
Cabin_mapped_3      uint8
Cabin_mapped_4      uint8
Cabin_mapped_5      uint8
Cabin_mapped_6      uint8
Cabin_mapped_7      uint8
Cabin_mapped_8      uint8
Embarked_Q          uint8
Embarked_S          uint8
dtype: object

In [32]:
X = X_dummies[:len(train)]; new_X = X_dummies[len(train):]
y = train.Survived

In [33]:
from sklearn.model_selection import train_test_split

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = .3,
                                                    random_state = 5,
                                                   stratify = y)

In [35]:
from sklearn.ensemble import RandomForestClassifier

In [36]:
rf = RandomForestClassifier()

rf.fit(X_train, y_train)

rf.score(X_test, y_test)

0.7835820895522388

In [39]:
from xgboost import XGBClassifier

In [40]:
xgb = XGBClassifier()

In [41]:
xgb.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='binary:logistic', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [42]:
xgb.score(X_test, y_test)

0.8097014925373134

In [43]:
from sklearn.linear_model import LogisticRegression
lg = LogisticRegression()
lg.fit(X_train, y_train)
lg.score(X_test, y_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.8022388059701493

In [44]:
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

# Create the parameter grid: gbm_param_grid 
gbm_param_grid = {
    'n_estimators': range(8, 20),
    'max_depth': range(6, 10),
    'learning_rate': [.4, .45, .5, .55, .6],
    'colsample_bytree': [.6, .7, .8, .9, 1]
}

# Instantiate the regressor: gbm
gbm = XGBClassifier(n_estimators=10)

# Perform random search: grid_mse
xgb_random = RandomizedSearchCV(param_distributions=gbm_param_grid, 
                                    estimator = gbm, scoring = "accuracy", 
                                    verbose = 1, n_iter = 50, cv = 4)


# Fit randomized_mse to the data
xgb_random.fit(X, y)

# Print the best parameters and lowest RMSE
print("Best parameters found: ", xgb_random.best_params_)
print("Best accuracy found: ", xgb_random.best_score_)

Fitting 4 folds for each of 50 candidates, totalling 200 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Best parameters found:  {'n_estimators': 13, 'max_depth': 6, 'learning_rate': 0.55, 'colsample_bytree': 0.8}
Best accuracy found:  0.8193299802044197


[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    4.8s finished


In [45]:
xgb_pred = xgb_random.predict(new_X)

In [46]:
submission = pd.concat([test.PassengerId, pd.DataFrame(xgb_pred)], axis = 'columns')

In [47]:
submission.columns = ["PassengerId", "Survived"]

In [48]:
submission.to_csv('titanic_submission.csv', header = True, index = False)

In [49]:
from sklearn.svm import LinearSVC

In [50]:
lsvc = LinearSVC()
lsvc.fit(X_train, y_train)
lsvc.score(X_test, y_test)



0.7798507462686567

In [51]:
from sklearn.neighbors import KNeighborsClassifier
kn = KNeighborsClassifier()
kn.fit(X_train, y_train)
kn.score(X_test, y_test)

0.7201492537313433

In [53]:
from sklearn.svm import SVR
svr = SVR()
svr.fit(X_train, y_train)
svr.score(X_test, y_test)

-0.04477286675995873

In [55]:
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier()
sgd.fit(X_train, y_train)
sgd.score(X_test, y_test)

0.7574626865671642

In [56]:
from catboost import CatBoostClassifier
cbclf = CatBoostClassifier()
cbclf.fit(X_train, y_train)
cbclf.score(X_test, y_test)

Learning rate set to 0.008417
0:	learn: 0.6880357	total: 64.3ms	remaining: 1m 4s
1:	learn: 0.6827759	total: 65.9ms	remaining: 32.9s
2:	learn: 0.6784079	total: 67ms	remaining: 22.3s
3:	learn: 0.6733926	total: 68.5ms	remaining: 17.1s
4:	learn: 0.6683542	total: 70.2ms	remaining: 14s
5:	learn: 0.6629126	total: 71.7ms	remaining: 11.9s
6:	learn: 0.6583111	total: 72.9ms	remaining: 10.3s
7:	learn: 0.6542769	total: 74.2ms	remaining: 9.2s
8:	learn: 0.6510797	total: 75.7ms	remaining: 8.34s
9:	learn: 0.6464565	total: 77.3ms	remaining: 7.65s
10:	learn: 0.6421438	total: 78.9ms	remaining: 7.09s
11:	learn: 0.6394135	total: 80ms	remaining: 6.58s
12:	learn: 0.6349275	total: 81.5ms	remaining: 6.19s
13:	learn: 0.6308747	total: 82.8ms	remaining: 5.83s
14:	learn: 0.6267077	total: 84.6ms	remaining: 5.55s
15:	learn: 0.6222991	total: 86.1ms	remaining: 5.29s
16:	learn: 0.6179706	total: 87.6ms	remaining: 5.06s
17:	learn: 0.6144178	total: 88.7ms	remaining: 4.84s
18:	learn: 0.6101947	total: 90.3ms	remaining: 4.66s

190:	learn: 0.4048234	total: 365ms	remaining: 1.55s
191:	learn: 0.4042983	total: 368ms	remaining: 1.55s
192:	learn: 0.4040460	total: 369ms	remaining: 1.54s
193:	learn: 0.4038714	total: 371ms	remaining: 1.54s
194:	learn: 0.4034078	total: 373ms	remaining: 1.54s
195:	learn: 0.4031874	total: 375ms	remaining: 1.54s
196:	learn: 0.4025775	total: 377ms	remaining: 1.53s
197:	learn: 0.4023785	total: 379ms	remaining: 1.53s
198:	learn: 0.4020650	total: 380ms	remaining: 1.53s
199:	learn: 0.4019018	total: 382ms	remaining: 1.53s
200:	learn: 0.4016184	total: 384ms	remaining: 1.53s
201:	learn: 0.4013417	total: 387ms	remaining: 1.53s
202:	learn: 0.4010034	total: 389ms	remaining: 1.52s
203:	learn: 0.4005399	total: 391ms	remaining: 1.52s
204:	learn: 0.4001555	total: 393ms	remaining: 1.52s
205:	learn: 0.3998707	total: 395ms	remaining: 1.52s
206:	learn: 0.3995900	total: 397ms	remaining: 1.52s
207:	learn: 0.3993077	total: 398ms	remaining: 1.52s
208:	learn: 0.3991486	total: 400ms	remaining: 1.51s
209:	learn: 

399:	learn: 0.3661950	total: 731ms	remaining: 1.1s
400:	learn: 0.3661307	total: 734ms	remaining: 1.1s
401:	learn: 0.3660414	total: 736ms	remaining: 1.09s
402:	learn: 0.3660161	total: 738ms	remaining: 1.09s
403:	learn: 0.3659065	total: 740ms	remaining: 1.09s
404:	learn: 0.3657198	total: 742ms	remaining: 1.09s
405:	learn: 0.3656960	total: 743ms	remaining: 1.09s
406:	learn: 0.3656358	total: 745ms	remaining: 1.08s
407:	learn: 0.3653978	total: 747ms	remaining: 1.08s
408:	learn: 0.3652215	total: 749ms	remaining: 1.08s
409:	learn: 0.3651606	total: 751ms	remaining: 1.08s
410:	learn: 0.3649369	total: 753ms	remaining: 1.08s
411:	learn: 0.3648647	total: 755ms	remaining: 1.08s
412:	learn: 0.3647496	total: 757ms	remaining: 1.07s
413:	learn: 0.3646387	total: 759ms	remaining: 1.07s
414:	learn: 0.3645070	total: 761ms	remaining: 1.07s
415:	learn: 0.3644062	total: 763ms	remaining: 1.07s
416:	learn: 0.3642491	total: 765ms	remaining: 1.07s
417:	learn: 0.3640983	total: 767ms	remaining: 1.07s
418:	learn: 0.

606:	learn: 0.3430575	total: 1.09s	remaining: 708ms
607:	learn: 0.3429984	total: 1.09s	remaining: 707ms
608:	learn: 0.3428672	total: 1.1s	remaining: 705ms
609:	learn: 0.3426975	total: 1.1s	remaining: 703ms
610:	learn: 0.3426638	total: 1.1s	remaining: 701ms
611:	learn: 0.3425113	total: 1.1s	remaining: 699ms
612:	learn: 0.3423977	total: 1.1s	remaining: 698ms
613:	learn: 0.3422390	total: 1.11s	remaining: 696ms
614:	learn: 0.3421446	total: 1.11s	remaining: 694ms
615:	learn: 0.3420760	total: 1.11s	remaining: 692ms
616:	learn: 0.3420063	total: 1.11s	remaining: 690ms
617:	learn: 0.3420001	total: 1.11s	remaining: 688ms
618:	learn: 0.3419274	total: 1.11s	remaining: 686ms
619:	learn: 0.3418692	total: 1.12s	remaining: 684ms
620:	learn: 0.3418066	total: 1.12s	remaining: 682ms
621:	learn: 0.3417375	total: 1.12s	remaining: 680ms
622:	learn: 0.3415955	total: 1.12s	remaining: 678ms
623:	learn: 0.3415227	total: 1.12s	remaining: 676ms
624:	learn: 0.3415188	total: 1.12s	remaining: 674ms
625:	learn: 0.341

814:	learn: 0.3232157	total: 1.46s	remaining: 332ms
815:	learn: 0.3231478	total: 1.46s	remaining: 330ms
816:	learn: 0.3229994	total: 1.46s	remaining: 328ms
817:	learn: 0.3228344	total: 1.47s	remaining: 326ms
818:	learn: 0.3227949	total: 1.47s	remaining: 325ms
819:	learn: 0.3227205	total: 1.47s	remaining: 323ms
820:	learn: 0.3226915	total: 1.47s	remaining: 321ms
821:	learn: 0.3224678	total: 1.48s	remaining: 320ms
822:	learn: 0.3222463	total: 1.48s	remaining: 318ms
823:	learn: 0.3221790	total: 1.48s	remaining: 316ms
824:	learn: 0.3220957	total: 1.48s	remaining: 314ms
825:	learn: 0.3220515	total: 1.48s	remaining: 313ms
826:	learn: 0.3219895	total: 1.49s	remaining: 311ms
827:	learn: 0.3219246	total: 1.49s	remaining: 309ms
828:	learn: 0.3218159	total: 1.49s	remaining: 307ms
829:	learn: 0.3217622	total: 1.49s	remaining: 305ms
830:	learn: 0.3216946	total: 1.49s	remaining: 303ms
831:	learn: 0.3215646	total: 1.49s	remaining: 302ms
832:	learn: 0.3214680	total: 1.5s	remaining: 300ms
833:	learn: 0

0.8097014925373134

In [57]:
cbclf.fit(X_train, y_train)

Learning rate set to 0.008417
0:	learn: 0.6880357	total: 1.78ms	remaining: 1.78s
1:	learn: 0.6827759	total: 3.83ms	remaining: 1.91s
2:	learn: 0.6784079	total: 5.06ms	remaining: 1.68s
3:	learn: 0.6733926	total: 7.25ms	remaining: 1.81s
4:	learn: 0.6683542	total: 9.05ms	remaining: 1.8s
5:	learn: 0.6629126	total: 10.7ms	remaining: 1.77s
6:	learn: 0.6583111	total: 12.2ms	remaining: 1.73s
7:	learn: 0.6542769	total: 13.8ms	remaining: 1.71s
8:	learn: 0.6510797	total: 15.6ms	remaining: 1.71s
9:	learn: 0.6464565	total: 17.3ms	remaining: 1.72s
10:	learn: 0.6421438	total: 19.3ms	remaining: 1.74s
11:	learn: 0.6394135	total: 20.5ms	remaining: 1.69s
12:	learn: 0.6349275	total: 22.5ms	remaining: 1.71s
13:	learn: 0.6308747	total: 24.2ms	remaining: 1.71s
14:	learn: 0.6267077	total: 26.1ms	remaining: 1.71s
15:	learn: 0.6222991	total: 27.8ms	remaining: 1.71s
16:	learn: 0.6179706	total: 29.8ms	remaining: 1.72s
17:	learn: 0.6144178	total: 31.1ms	remaining: 1.7s
18:	learn: 0.6101947	total: 33.3ms	remaining: 

195:	learn: 0.4031874	total: 359ms	remaining: 1.47s
196:	learn: 0.4025775	total: 361ms	remaining: 1.47s
197:	learn: 0.4023785	total: 363ms	remaining: 1.47s
198:	learn: 0.4020650	total: 365ms	remaining: 1.47s
199:	learn: 0.4019018	total: 367ms	remaining: 1.47s
200:	learn: 0.4016184	total: 369ms	remaining: 1.47s
201:	learn: 0.4013417	total: 371ms	remaining: 1.47s
202:	learn: 0.4010034	total: 373ms	remaining: 1.46s
203:	learn: 0.4005399	total: 375ms	remaining: 1.46s
204:	learn: 0.4001555	total: 377ms	remaining: 1.46s
205:	learn: 0.3998707	total: 380ms	remaining: 1.46s
206:	learn: 0.3995900	total: 382ms	remaining: 1.46s
207:	learn: 0.3993077	total: 384ms	remaining: 1.46s
208:	learn: 0.3991486	total: 386ms	remaining: 1.46s
209:	learn: 0.3989511	total: 388ms	remaining: 1.46s
210:	learn: 0.3987517	total: 390ms	remaining: 1.46s
211:	learn: 0.3985998	total: 393ms	remaining: 1.46s
212:	learn: 0.3984362	total: 395ms	remaining: 1.46s
213:	learn: 0.3981711	total: 397ms	remaining: 1.46s
214:	learn: 

386:	learn: 0.3680354	total: 721ms	remaining: 1.14s
387:	learn: 0.3678753	total: 724ms	remaining: 1.14s
388:	learn: 0.3677943	total: 726ms	remaining: 1.14s
389:	learn: 0.3676996	total: 728ms	remaining: 1.14s
390:	learn: 0.3675171	total: 730ms	remaining: 1.14s
391:	learn: 0.3671932	total: 732ms	remaining: 1.14s
392:	learn: 0.3670549	total: 734ms	remaining: 1.13s
393:	learn: 0.3668933	total: 736ms	remaining: 1.13s
394:	learn: 0.3668204	total: 738ms	remaining: 1.13s
395:	learn: 0.3667016	total: 740ms	remaining: 1.13s
396:	learn: 0.3665826	total: 742ms	remaining: 1.13s
397:	learn: 0.3665242	total: 744ms	remaining: 1.13s
398:	learn: 0.3663229	total: 747ms	remaining: 1.12s
399:	learn: 0.3661950	total: 749ms	remaining: 1.12s
400:	learn: 0.3661307	total: 751ms	remaining: 1.12s
401:	learn: 0.3660414	total: 753ms	remaining: 1.12s
402:	learn: 0.3660161	total: 756ms	remaining: 1.12s
403:	learn: 0.3659065	total: 760ms	remaining: 1.12s
404:	learn: 0.3657198	total: 762ms	remaining: 1.12s
405:	learn: 

564:	learn: 0.3473113	total: 1.08s	remaining: 834ms
565:	learn: 0.3471533	total: 1.08s	remaining: 832ms
566:	learn: 0.3470882	total: 1.09s	remaining: 831ms
567:	learn: 0.3469650	total: 1.09s	remaining: 829ms
568:	learn: 0.3469302	total: 1.09s	remaining: 827ms
569:	learn: 0.3468012	total: 1.09s	remaining: 826ms
570:	learn: 0.3466880	total: 1.1s	remaining: 824ms
571:	learn: 0.3466097	total: 1.1s	remaining: 822ms
572:	learn: 0.3466041	total: 1.1s	remaining: 820ms
573:	learn: 0.3465030	total: 1.1s	remaining: 818ms
574:	learn: 0.3463719	total: 1.1s	remaining: 817ms
575:	learn: 0.3462973	total: 1.11s	remaining: 815ms
576:	learn: 0.3462605	total: 1.11s	remaining: 813ms
577:	learn: 0.3461430	total: 1.11s	remaining: 812ms
578:	learn: 0.3459921	total: 1.11s	remaining: 810ms
579:	learn: 0.3459142	total: 1.11s	remaining: 808ms
580:	learn: 0.3458132	total: 1.12s	remaining: 806ms
581:	learn: 0.3457288	total: 1.12s	remaining: 804ms
582:	learn: 0.3455793	total: 1.12s	remaining: 802ms
583:	learn: 0.345

760:	learn: 0.3286472	total: 1.45s	remaining: 454ms
761:	learn: 0.3285813	total: 1.45s	remaining: 453ms
762:	learn: 0.3284914	total: 1.45s	remaining: 451ms
763:	learn: 0.3283847	total: 1.45s	remaining: 449ms
764:	learn: 0.3283270	total: 1.46s	remaining: 448ms
765:	learn: 0.3281927	total: 1.46s	remaining: 446ms
766:	learn: 0.3281030	total: 1.46s	remaining: 444ms
767:	learn: 0.3280395	total: 1.46s	remaining: 442ms
768:	learn: 0.3279712	total: 1.47s	remaining: 440ms
769:	learn: 0.3279279	total: 1.47s	remaining: 438ms
770:	learn: 0.3277860	total: 1.47s	remaining: 436ms
771:	learn: 0.3276086	total: 1.47s	remaining: 434ms
772:	learn: 0.3275215	total: 1.47s	remaining: 433ms
773:	learn: 0.3274672	total: 1.48s	remaining: 431ms
774:	learn: 0.3273009	total: 1.48s	remaining: 429ms
775:	learn: 0.3272529	total: 1.48s	remaining: 427ms
776:	learn: 0.3271249	total: 1.48s	remaining: 425ms
777:	learn: 0.3270624	total: 1.48s	remaining: 423ms
778:	learn: 0.3269651	total: 1.48s	remaining: 421ms
779:	learn: 

942:	learn: 0.3092153	total: 1.81s	remaining: 109ms
943:	learn: 0.3091404	total: 1.81s	remaining: 107ms
944:	learn: 0.3090356	total: 1.81s	remaining: 105ms
945:	learn: 0.3088965	total: 1.82s	remaining: 104ms
946:	learn: 0.3088666	total: 1.82s	remaining: 102ms
947:	learn: 0.3087222	total: 1.82s	remaining: 99.9ms
948:	learn: 0.3085925	total: 1.82s	remaining: 98ms
949:	learn: 0.3085246	total: 1.82s	remaining: 96.1ms
950:	learn: 0.3084190	total: 1.83s	remaining: 94.2ms
951:	learn: 0.3082724	total: 1.83s	remaining: 92.2ms
952:	learn: 0.3082370	total: 1.83s	remaining: 90.3ms
953:	learn: 0.3081734	total: 1.83s	remaining: 88.4ms
954:	learn: 0.3081012	total: 1.83s	remaining: 86.5ms
955:	learn: 0.3080248	total: 1.84s	remaining: 84.6ms
956:	learn: 0.3079668	total: 1.84s	remaining: 82.6ms
957:	learn: 0.3078737	total: 1.84s	remaining: 80.7ms
958:	learn: 0.3078065	total: 1.84s	remaining: 78.8ms
959:	learn: 0.3076300	total: 1.84s	remaining: 76.9ms
960:	learn: 0.3075764	total: 1.85s	remaining: 75ms
96

<catboost.core.CatBoostClassifier at 0x1a2c69d290>

In [58]:
cbclf_preds = cbclf.predict(new_X)

In [59]:
submission = pd.concat([test.PassengerId, pd.DataFrame(cbclf_preds)], axis = 'columns')

In [60]:
submission.to_csv('titanic_submission_cb.csv', header = True, index = False)

In [61]:
model = CatBoostClassifier(eval_metric='Accuracy',use_best_model=True,random_seed=42)

In [64]:
cate_features_index = np.where(X.dtypes != float)[0]

In [66]:
model.fit(X_train,y_train,cat_features=cate_features_index,eval_set=(X_test,y_test))

Learning rate set to 0.028193
0:	learn: 0.8041734	test: 0.7947761	best: 0.7947761 (0)	total: 6.79ms	remaining: 6.79s
1:	learn: 0.8009631	test: 0.7947761	best: 0.7947761 (0)	total: 12.4ms	remaining: 6.17s
2:	learn: 0.8121990	test: 0.8022388	best: 0.8022388 (2)	total: 17.1ms	remaining: 5.69s
3:	learn: 0.8154093	test: 0.7947761	best: 0.8022388 (2)	total: 21.8ms	remaining: 5.43s
4:	learn: 0.8073836	test: 0.8022388	best: 0.8022388 (2)	total: 26.8ms	remaining: 5.33s
5:	learn: 0.8154093	test: 0.7947761	best: 0.8022388 (2)	total: 31.5ms	remaining: 5.22s
6:	learn: 0.8138042	test: 0.7947761	best: 0.8022388 (2)	total: 36.9ms	remaining: 5.23s
7:	learn: 0.8138042	test: 0.7947761	best: 0.8022388 (2)	total: 42.7ms	remaining: 5.29s
8:	learn: 0.8138042	test: 0.7947761	best: 0.8022388 (2)	total: 48.3ms	remaining: 5.32s
9:	learn: 0.8138042	test: 0.7947761	best: 0.8022388 (2)	total: 51.6ms	remaining: 5.1s
10:	learn: 0.8138042	test: 0.8022388	best: 0.8022388 (2)	total: 56.5ms	remaining: 5.08s
11:	learn: 0.

116:	learn: 0.8539326	test: 0.7985075	best: 0.8059701 (21)	total: 572ms	remaining: 4.32s
117:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 579ms	remaining: 4.32s
118:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 586ms	remaining: 4.33s
119:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 590ms	remaining: 4.33s
120:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 595ms	remaining: 4.32s
121:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 598ms	remaining: 4.3s
122:	learn: 0.8539326	test: 0.7985075	best: 0.8059701 (21)	total: 602ms	remaining: 4.29s
123:	learn: 0.8571429	test: 0.7985075	best: 0.8059701 (21)	total: 608ms	remaining: 4.29s
124:	learn: 0.8571429	test: 0.7985075	best: 0.8059701 (21)	total: 613ms	remaining: 4.29s
125:	learn: 0.8571429	test: 0.7985075	best: 0.8059701 (21)	total: 616ms	remaining: 4.27s
126:	learn: 0.8555377	test: 0.7985075	best: 0.8059701 (21)	total: 621ms	remaining: 4.27s
127:	learn: 0.8571429	

234:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.16s	remaining: 3.76s
235:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.16s	remaining: 3.76s
236:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.17s	remaining: 3.75s
237:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.17s	remaining: 3.75s
238:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.18s	remaining: 3.75s
239:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.18s	remaining: 3.74s
240:	learn: 0.8812199	test: 0.8022388	best: 0.8059701 (21)	total: 1.19s	remaining: 3.73s
241:	learn: 0.8796148	test: 0.8022388	best: 0.8059701 (21)	total: 1.19s	remaining: 3.73s
242:	learn: 0.8812199	test: 0.8022388	best: 0.8059701 (21)	total: 1.2s	remaining: 3.73s
243:	learn: 0.8828250	test: 0.8022388	best: 0.8059701 (21)	total: 1.2s	remaining: 3.73s
244:	learn: 0.8828250	test: 0.8022388	best: 0.8059701 (21)	total: 1.21s	remaining: 3.73s
245:	learn: 0.8812199	t

334:	learn: 0.8988764	test: 0.8097015	best: 0.8097015 (316)	total: 1.74s	remaining: 3.46s
335:	learn: 0.8972713	test: 0.8097015	best: 0.8097015 (316)	total: 1.75s	remaining: 3.46s
336:	learn: 0.8972713	test: 0.8097015	best: 0.8097015 (316)	total: 1.75s	remaining: 3.45s
337:	learn: 0.8972713	test: 0.8097015	best: 0.8097015 (316)	total: 1.76s	remaining: 3.45s
338:	learn: 0.8988764	test: 0.8097015	best: 0.8097015 (316)	total: 1.77s	remaining: 3.45s
339:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.78s	remaining: 3.45s
340:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.79s	remaining: 3.45s
341:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.79s	remaining: 3.45s
342:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.8s	remaining: 3.45s
343:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.81s	remaining: 3.45s
344:	learn: 0.9004815	test: 0.8097015	best: 0.8097015 (316)	total: 1.81s	remaining: 3.44s
345:	learn:

456:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.53s	remaining: 3s
457:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.53s	remaining: 3s
458:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.54s	remaining: 3s
459:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.55s	remaining: 2.99s
460:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.56s	remaining: 2.99s
461:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.56s	remaining: 2.98s
462:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.57s	remaining: 2.98s
463:	learn: 0.9133226	test: 0.8171642	best: 0.8171642 (401)	total: 2.57s	remaining: 2.97s
464:	learn: 0.9133226	test: 0.8134328	best: 0.8171642 (401)	total: 2.58s	remaining: 2.97s
465:	learn: 0.9133226	test: 0.8134328	best: 0.8171642 (401)	total: 2.58s	remaining: 2.96s
466:	learn: 0.9133226	test: 0.8134328	best: 0.8171642 (401)	total: 2.59s	remaining: 2.96s
467:	learn: 0.91332

552:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.12s	remaining: 2.52s
553:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.12s	remaining: 2.51s
554:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.13s	remaining: 2.51s
555:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.14s	remaining: 2.5s
556:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.14s	remaining: 2.5s
557:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.15s	remaining: 2.49s
558:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.15s	remaining: 2.49s
559:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.16s	remaining: 2.48s
560:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.17s	remaining: 2.48s
561:	learn: 0.9197432	test: 0.8134328	best: 0.8171642 (401)	total: 3.17s	remaining: 2.47s
562:	learn: 0.9213483	test: 0.8134328	best: 0.8171642 (401)	total: 3.17s	remaining: 2.46s
563:	learn: 

675:	learn: 0.9245586	test: 0.8171642	best: 0.8171642 (401)	total: 3.89s	remaining: 1.87s
676:	learn: 0.9245586	test: 0.8171642	best: 0.8171642 (401)	total: 3.9s	remaining: 1.86s
677:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.91s	remaining: 1.86s
678:	learn: 0.9261637	test: 0.8134328	best: 0.8171642 (401)	total: 3.92s	remaining: 1.85s
679:	learn: 0.9261637	test: 0.8134328	best: 0.8171642 (401)	total: 3.92s	remaining: 1.84s
680:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.93s	remaining: 1.84s
681:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.93s	remaining: 1.83s
682:	learn: 0.9261637	test: 0.8134328	best: 0.8171642 (401)	total: 3.94s	remaining: 1.83s
683:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.94s	remaining: 1.82s
684:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.95s	remaining: 1.81s
685:	learn: 0.9245586	test: 0.8134328	best: 0.8171642 (401)	total: 3.96s	remaining: 1.81s
686:	learn:

769:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.48s	remaining: 1.34s
770:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.49s	remaining: 1.33s
771:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.49s	remaining: 1.33s
772:	learn: 0.9309791	test: 0.8134328	best: 0.8171642 (401)	total: 4.5s	remaining: 1.32s
773:	learn: 0.9309791	test: 0.8134328	best: 0.8171642 (401)	total: 4.5s	remaining: 1.31s
774:	learn: 0.9309791	test: 0.8134328	best: 0.8171642 (401)	total: 4.51s	remaining: 1.31s
775:	learn: 0.9309791	test: 0.8134328	best: 0.8171642 (401)	total: 4.52s	remaining: 1.3s
776:	learn: 0.9309791	test: 0.8134328	best: 0.8171642 (401)	total: 4.53s	remaining: 1.3s
777:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.53s	remaining: 1.29s
778:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.54s	remaining: 1.29s
779:	learn: 0.9293740	test: 0.8134328	best: 0.8171642 (401)	total: 4.54s	remaining: 1.28s
780:	learn: 0.

862:	learn: 0.9325843	test: 0.8097015	best: 0.8171642 (401)	total: 5.07s	remaining: 804ms
863:	learn: 0.9325843	test: 0.8097015	best: 0.8171642 (401)	total: 5.07s	remaining: 799ms
864:	learn: 0.9325843	test: 0.8097015	best: 0.8171642 (401)	total: 5.08s	remaining: 793ms
865:	learn: 0.9325843	test: 0.8097015	best: 0.8171642 (401)	total: 5.09s	remaining: 787ms
866:	learn: 0.9325843	test: 0.8097015	best: 0.8171642 (401)	total: 5.09s	remaining: 782ms
867:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.1s	remaining: 776ms
868:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.1s	remaining: 770ms
869:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.11s	remaining: 764ms
870:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.12s	remaining: 758ms
871:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.12s	remaining: 752ms
872:	learn: 0.9341894	test: 0.8097015	best: 0.8171642 (401)	total: 5.13s	remaining: 746ms
873:	learn: 

954:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.65s	remaining: 266ms
955:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.66s	remaining: 261ms
956:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.67s	remaining: 255ms
957:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.68s	remaining: 249ms
958:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.68s	remaining: 243ms
959:	learn: 0.9373997	test: 0.8097015	best: 0.8171642 (401)	total: 5.69s	remaining: 237ms
960:	learn: 0.9357945	test: 0.8097015	best: 0.8171642 (401)	total: 5.7s	remaining: 231ms
961:	learn: 0.9357945	test: 0.8097015	best: 0.8171642 (401)	total: 5.7s	remaining: 225ms
962:	learn: 0.9357945	test: 0.8097015	best: 0.8171642 (401)	total: 5.71s	remaining: 219ms
963:	learn: 0.9357945	test: 0.8097015	best: 0.8171642 (401)	total: 5.72s	remaining: 213ms
964:	learn: 0.9357945	test: 0.8097015	best: 0.8171642 (401)	total: 5.72s	remaining: 208ms
965:	learn: 

<catboost.core.CatBoostClassifier at 0x1a2c6e6c50>

In [68]:
from catboost import Pool, cv

In [72]:
cv_data = cv(Pool(X,y,cat_features=cate_features_index), model.get_params(),fold_count=10)

CatBoostError: catboost/private/libs/options/loss_description.cpp:331: loss [RMSE] is incompatible with metric [Accuracy] (no classification support)