In [1]:
import pandas as pd
import numpy as np

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [2]:
# count missing values
sum_missing = train_df.isnull().sum()

In [3]:
# impute missing values
train_df.fillna(-999, inplace=True)
test_df.fillna(-999, inplace=True)

In [4]:
# define target, features
X = train_df.drop('Survived', axis=1)
y = train_df['Survived']

In [6]:
X.dtypes

PassengerId      int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [7]:
#categorical_features_indices = np.where(X.dtypes != np.float)[0]

In [5]:
numeric_var = [key for key in dict(X.dtypes)
                   if dict(X.dtypes)[key]
                       in ['float64','float32','int32','int64']] # Numeric Variable

cat_var = [key for key in dict(X.dtypes)
             if dict(X.dtypes)[key] in ['object'] ] # Categorical Varible

In [6]:
# get columns indices number
def column_index(df, query_cols):
    cols = df.columns.values
    sidx = np.argsort(cols)
    return sidx[np.searchsorted(cols,query_cols,sorter=sidx)]

In [7]:
cat_index = column_index(X, cat_var)

In [8]:
cat_index

array([ 2,  3,  7,  9, 10])

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_validation, y_train, y_validation = train_test_split(X, y, train_size=0.75, random_state=42)

X_test = test_df



In [10]:
from catboost import CatBoostClassifier, Pool, cv
from sklearn.metrics import accuracy_score, roc_auc_score

In [17]:
model = CatBoostClassifier(
    custom_loss=['AUC']
)

In [18]:
model.fit(X_train, y_train, cat_features=cat_index, 
          eval_set=(X_validation, y_validation),
          use_best_model=True, early_stopping_rounds=5)

0:	learn: 0.6566433	test: 0.6555803	best: 0.6555803 (0)	total: 24.5ms	remaining: 24.5s
1:	learn: 0.6199772	test: 0.6188700	best: 0.6188700 (1)	total: 35.6ms	remaining: 17.8s
2:	learn: 0.5848711	test: 0.5869462	best: 0.5869462 (2)	total: 47.9ms	remaining: 15.9s
3:	learn: 0.5538386	test: 0.5569970	best: 0.5569970 (3)	total: 61.8ms	remaining: 15.4s
4:	learn: 0.5381961	test: 0.5412003	best: 0.5412003 (4)	total: 73.9ms	remaining: 14.7s
5:	learn: 0.5261420	test: 0.5295138	best: 0.5295138 (5)	total: 84.8ms	remaining: 14s
6:	learn: 0.5077056	test: 0.5119539	best: 0.5119539 (6)	total: 99.1ms	remaining: 14.1s
7:	learn: 0.4937319	test: 0.4995796	best: 0.4995796 (7)	total: 113ms	remaining: 14s
8:	learn: 0.4877487	test: 0.4938176	best: 0.4938176 (8)	total: 125ms	remaining: 13.7s
9:	learn: 0.4779730	test: 0.4851482	best: 0.4851482 (9)	total: 133ms	remaining: 13.2s
10:	learn: 0.4672601	test: 0.4696902	best: 0.4696902 (10)	total: 144ms	remaining: 13s
11:	learn: 0.4577228	test: 0.4634981	best: 0.463498

95:	learn: 0.3564335	test: 0.3879223	best: 0.3876937 (93)	total: 1.28s	remaining: 12.1s
96:	learn: 0.3564291	test: 0.3879982	best: 0.3876937 (93)	total: 1.3s	remaining: 12.1s
97:	learn: 0.3562385	test: 0.3877809	best: 0.3876937 (93)	total: 1.31s	remaining: 12.1s
98:	learn: 0.3560481	test: 0.3874292	best: 0.3874292 (98)	total: 1.33s	remaining: 12.1s
99:	learn: 0.3560159	test: 0.3874639	best: 0.3874292 (98)	total: 1.34s	remaining: 12s
100:	learn: 0.3549061	test: 0.3851124	best: 0.3851124 (100)	total: 1.35s	remaining: 12s
101:	learn: 0.3546800	test: 0.3852659	best: 0.3851124 (100)	total: 1.37s	remaining: 12s
102:	learn: 0.3545495	test: 0.3850604	best: 0.3850604 (102)	total: 1.38s	remaining: 12s
103:	learn: 0.3536462	test: 0.3846061	best: 0.3846061 (103)	total: 1.39s	remaining: 12s
104:	learn: 0.3528581	test: 0.3835896	best: 0.3835896 (104)	total: 1.41s	remaining: 12s
105:	learn: 0.3515447	test: 0.3835253	best: 0.3835253 (105)	total: 1.42s	remaining: 12s
106:	learn: 0.3496019	test: 0.38323

<catboost.core.CatBoostClassifier at 0x123e18ef0>

In [19]:
model.learning_rate_

0.07479199767