In [2]:
import csv
import numpy as np

with open('titanic-train.csv', 'rt') as f:
    data = list(csv.DictReader(f))
data[:1]

[{'Age': '22',
  'Cabin': '',
  'Embarked': 'S',
  'Fare': '7.25',
  'Name': 'Braund, Mr. Owen Harris',
  'Parch': '0',
  'PassengerId': '1',
  'Pclass': '3',
  'Sex': 'male',
  'SibSp': '1',
  'Survived': '0',
  'Ticket': 'A/5 21171'}]

In [3]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

_all_xs = [{k: v for k, v in row.items() if k != 'Survived'} for row in data]
_all_ys = np.array([int(row['Survived']) for row in data])

all_xs, all_ys = shuffle(_all_xs, _all_ys, random_state=0)
train_xs, valid_xs, train_ys, valid_ys = train_test_split(
    all_xs, all_ys, test_size=0.25, random_state=0)
print('{} items total, {:.1%} true'.format(len(all_xs), np.mean(all_ys)))

891 items total, 38.4% true


In [4]:
for x in all_xs:
    if x['Age']:
        x['Age'] = float(x['Age'])
    else:
        x.pop('Age')
    x['Fare'] = float(x['Fare'])
    x['SibSp'] = int(x['SibSp'])
    x['Parch'] = int(x['Parch'])

In [5]:
import warnings
# xgboost <= 0.6a2 shows a warning when used with scikit-learn 0.18+
warnings.filterwarnings('ignore', category=DeprecationWarning)
from xgboost import XGBClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score


clf = XGBClassifier()
vec = DictVectorizer()
pipeline = make_pipeline(vec, clf)

def evaluate(_clf):
    scores = cross_val_score(_clf, all_xs, all_ys, scoring='accuracy', cv=10)
    print('Accuracy: {:.3f} ± {:.3f}'.format(np.mean(scores), 2 * np.std(scores)))
    _clf.fit(train_xs, train_ys)  # so that parts of the original pipeline are fitted

evaluate(pipeline)

Accuracy: 0.823 ± 0.071


In [6]:
booster = clf.booster()
original_feature_names = booster.feature_names
booster.feature_names = vec.get_feature_names()
print(booster.get_dump()[0])
# recover original feature names
booster.feature_names = original_feature_names

0:[Sex=female<-9.53674e-07] yes=1,no=2,missing=1
	1:[Age<13] yes=3,no=4,missing=4
		3:[SibSp<2] yes=7,no=8,missing=7
			7:leaf=0.145455
			8:leaf=-0.125
		4:[Fare<26.2687] yes=9,no=10,missing=9
			9:leaf=-0.151515
			10:leaf=-0.0727273
	2:[Pclass=3<-9.53674e-07] yes=5,no=6,missing=5
		5:[Fare<12.175] yes=11,no=12,missing=12
			11:leaf=0.05
			12:leaf=0.175194
		6:[Fare<24.8083] yes=13,no=14,missing=14
			13:leaf=0.0365591
			14:leaf=-0.152



In [7]:
from eli5 import show_weights
show_weights(clf, vec=vec)

Weight,Feature
0.4278,Sex=female
0.1949,Pclass=3
0.0665,Embarked=S
0.051,Pclass=2
0.042,SibSp
0.0417,Cabin=
0.0385,Embarked=C
0.0358,Ticket=1601
0.0331,Age
0.0323,Fare


In [8]:
from eli5 import show_prediction
show_prediction(clf, valid_xs[1], vec=vec, show_feature_values=True)

Contribution?,Feature,Value
1.673,Sex=female,1.000
0.479,Embarked=S,Missing
0.07,Fare,7.879
-0.004,Cabin=,1.000
-0.006,Parch,0.000
-0.009,Pclass=2,Missing
-0.009,Ticket=1601,Missing
-0.012,Embarked=C,Missing
-0.071,SibSp,0.000
-0.073,Pclass=1,Missing


In [9]:
type(valid_xs[1])

dict

In [10]:
type(vec)

sklearn.feature_extraction.dict_vectorizer.DictVectorizer

In [11]:
import pandas as pd

data = pd.read_csv('titanic-train.csv')

In [14]:
data.loc[0]

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                                 22
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 0, dtype: object

In [19]:
data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [26]:
columns_list = list(data.columns)
print(columns_list)
columns_list.remove('Survived')
print(columns_list)

['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']


In [47]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

train_xs, valid_xs, train_ys, valid_ys = train_test_split(data[columns_list], data['Survived'], test_size=0.25, random_state=0)

In [48]:
import warnings
# xgboost <= 0.6a2 shows a warning when used with scikit-learn 0.18+
warnings.filterwarnings('ignore', category=DeprecationWarning)
from xgboost import XGBClassifier
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score


clf = XGBClassifier()
#vec = DictVectorizer()
pipeline = make_pipeline(clf)
dummies_train = pd.get_dummies(train_xs[['Sex','Ticket','Cabin','Embarked']])
train_xs.drop(['Name','Sex','Ticket','Cabin','Embarked'],inplace = True, axis = 1)
train_xs = pd.concat([train_xs,dummies_train],axis = 1)
clf.fit(train_xs,train_ys)

XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [49]:
dummies_test = pd.get_dummies(valid_xs[['Sex','Ticket','Cabin','Embarked']])
valid_xs.drop(['Name','Sex','Ticket','Cabin','Embarked'],inplace = True,axis = 1)
valid_xs = pd.concat([valid_xs,dummies_test],axis = 1)

In [50]:
import eli5
eli5.show_prediction(clf,valid_xs.loc[1])

ValueError: feature_names mismatch: ['PassengerId', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex_female', 'Sex_male', 'Ticket_110152', 'Ticket_110413', 'Ticket_110465', 'Ticket_110564', 'Ticket_110813', 'Ticket_111240', 'Ticket_111320', 'Ticket_111361', 'Ticket_111369', 'Ticket_111426', 'Ticket_111427', 'Ticket_111428', 'Ticket_112050', 'Ticket_112052', 'Ticket_112059', 'Ticket_112277', 'Ticket_113028', 'Ticket_113050', 'Ticket_113051', 'Ticket_113055', 'Ticket_113056', 'Ticket_113059', 'Ticket_113501', 'Ticket_113503', 'Ticket_113505', 'Ticket_113510', 'Ticket_113572', 'Ticket_113760', 'Ticket_113767', 'Ticket_113773', 'Ticket_113776', 'Ticket_113781', 'Ticket_113783', 'Ticket_113784', 'Ticket_113786', 'Ticket_113787', 'Ticket_113788', 'Ticket_113789', 'Ticket_113792', 'Ticket_113796', 'Ticket_113798', 'Ticket_113803', 'Ticket_113804', 'Ticket_113806', 'Ticket_113807', 'Ticket_11668', 'Ticket_11751', 'Ticket_11752', 'Ticket_11753', 'Ticket_11755', 'Ticket_11765', 'Ticket_11767', 'Ticket_11769', 'Ticket_11813', 'Ticket_11967', 'Ticket_12233', 'Ticket_12460', 'Ticket_12749', 'Ticket_13049', 'Ticket_13502', 'Ticket_13507', 'Ticket_13509', 'Ticket_13568', 'Ticket_14311', 'Ticket_14312', 'Ticket_14313', 'Ticket_1601', 'Ticket_16966', 'Ticket_17421', 'Ticket_17453', 'Ticket_17463', 'Ticket_17464', 'Ticket_17466', 'Ticket_17474', 'Ticket_17764', 'Ticket_19877', 'Ticket_19928', 'Ticket_19943', 'Ticket_19950', 'Ticket_19952', 'Ticket_19996', 'Ticket_2003', 'Ticket_211536', 'Ticket_218629', 'Ticket_219533', 'Ticket_220367', 'Ticket_220845', 'Ticket_2223', 'Ticket_223596', 'Ticket_226593', 'Ticket_226875', 'Ticket_228414', 'Ticket_229236', 'Ticket_230080', 'Ticket_230136', 'Ticket_230433', 'Ticket_230434', 'Ticket_231919', 'Ticket_233639', 'Ticket_233866', 'Ticket_234360', 'Ticket_234604', 'Ticket_234818', 'Ticket_236171', 'Ticket_236852', 'Ticket_237442', 'Ticket_237565', 'Ticket_237671', 'Ticket_237736', 'Ticket_237798', 'Ticket_239853', 'Ticket_239854', 'Ticket_239855', 'Ticket_239865', 'Ticket_24160', 'Ticket_243847', 'Ticket_243880', 'Ticket_244252', 'Ticket_244270', 'Ticket_244278', 'Ticket_244310', 'Ticket_244358', 'Ticket_244361', 'Ticket_244367', 'Ticket_244373', 'Ticket_248698', 'Ticket_248706', 'Ticket_248727', 'Ticket_248731', 'Ticket_248733', 'Ticket_248738', 'Ticket_248747', 'Ticket_250643', 'Ticket_250644', 'Ticket_250646', 'Ticket_250647', 'Ticket_250648', 'Ticket_250649', 'Ticket_250652', 'Ticket_250653', 'Ticket_250655', 'Ticket_2620', 'Ticket_2623', 'Ticket_2627', 'Ticket_2628', 'Ticket_2629', 'Ticket_2631', 'Ticket_26360', 'Ticket_2647', 'Ticket_2649', 'Ticket_2651', 'Ticket_2653', 'Ticket_2659', 'Ticket_2661', 'Ticket_2662', 'Ticket_2664', 'Ticket_2665', 'Ticket_2666', 'Ticket_2668', 'Ticket_2671', 'Ticket_2674', 'Ticket_2677', 'Ticket_2680', 'Ticket_2683', 'Ticket_2685', 'Ticket_2686', 'Ticket_2689', 'Ticket_2690', 'Ticket_2691', 'Ticket_2693', 'Ticket_2694', 'Ticket_2697', 'Ticket_2699', 'Ticket_2700', 'Ticket_27042', 'Ticket_27267', 'Ticket_27849', 'Ticket_28134', 'Ticket_28206', 'Ticket_28213', 'Ticket_28220', 'Ticket_28403', 'Ticket_28424', 'Ticket_28425', 'Ticket_28551', 'Ticket_28664', 'Ticket_28665', 'Ticket_2908', 'Ticket_29104', 'Ticket_29105', 'Ticket_29106', 'Ticket_29108', 'Ticket_2926', 'Ticket_29750', 'Ticket_3101265', 'Ticket_3101267', 'Ticket_3101276', 'Ticket_3101277', 'Ticket_3101278', 'Ticket_3101281', 'Ticket_3101295', 'Ticket_3101296', 'Ticket_3101298', 'Ticket_31027', 'Ticket_312991', 'Ticket_312992', 'Ticket_312993', 'Ticket_315082', 'Ticket_315084', 'Ticket_315088', 'Ticket_315089', 'Ticket_315090', 'Ticket_315093', 'Ticket_315096', 'Ticket_315097', 'Ticket_315098', 'Ticket_315151', 'Ticket_315153', 'Ticket_323592', 'Ticket_324669', 'Ticket_330909', 'Ticket_330919', 'Ticket_330923', 'Ticket_330932', 'Ticket_330935', 'Ticket_330958', 'Ticket_330959', 'Ticket_330979', 'Ticket_330980', 'Ticket_334912', 'Ticket_335097', 'Ticket_335677', 'Ticket_33638', 'Ticket_336439', 'Ticket_3411', 'Ticket_341826', 'Ticket_34218', 'Ticket_343095', 'Ticket_343120', 'Ticket_343275', 'Ticket_343276', 'Ticket_345364', 'Ticket_345572', 'Ticket_345764', 'Ticket_345765', 'Ticket_345767', 'Ticket_345769', 'Ticket_345773', 'Ticket_345774', 'Ticket_345777', 'Ticket_345778', 'Ticket_345779', 'Ticket_345780', 'Ticket_345781', 'Ticket_345783', 'Ticket_347054', 'Ticket_347060', 'Ticket_347061', 'Ticket_347062', 'Ticket_347063', 'Ticket_347064', 'Ticket_347071', 'Ticket_347073', 'Ticket_347076', 'Ticket_347077', 'Ticket_347078', 'Ticket_347080', 'Ticket_347082', 'Ticket_347085', 'Ticket_347087', 'Ticket_347088', 'Ticket_347089', 'Ticket_3474', 'Ticket_347464', 'Ticket_347466', 'Ticket_347470', 'Ticket_347742', 'Ticket_348121', 'Ticket_349201', 'Ticket_349203', 'Ticket_349204', 'Ticket_349206', 'Ticket_349207', 'Ticket_349208', 'Ticket_349209', 'Ticket_349210', 'Ticket_349213', 'Ticket_349214', 'Ticket_349216', 'Ticket_349217', 'Ticket_349218', 'Ticket_349219', 'Ticket_349221', 'Ticket_349222', 'Ticket_349224', 'Ticket_349225', 'Ticket_349228', 'Ticket_349234', 'Ticket_349237', 'Ticket_349240', 'Ticket_349241', 'Ticket_349243', 'Ticket_349244', 'Ticket_349245', 'Ticket_349246', 'Ticket_349247', 'Ticket_349248', 'Ticket_349249', 'Ticket_349252', 'Ticket_349253', 'Ticket_349254', 'Ticket_349256', 'Ticket_349257', 'Ticket_349909', 'Ticket_349910', 'Ticket_349912', 'Ticket_350026', 'Ticket_350029', 'Ticket_350034', 'Ticket_350035', 'Ticket_350036', 'Ticket_350042', 'Ticket_350043', 'Ticket_350046', 'Ticket_350048', 'Ticket_350050', 'Ticket_350060', 'Ticket_350407', 'Ticket_350417', 'Ticket_35273', 'Ticket_35281', 'Ticket_35852', 'Ticket_358585', 'Ticket_36209', 'Ticket_362316', 'Ticket_363291', 'Ticket_363294', 'Ticket_363592', 'Ticket_364499', 'Ticket_364500', 'Ticket_364506', 'Ticket_364511', 'Ticket_364516', 'Ticket_364848', 'Ticket_364849', 'Ticket_364850', 'Ticket_365222', 'Ticket_36568', 'Ticket_367226', 'Ticket_367228', 'Ticket_367230', 'Ticket_367231', 'Ticket_367232', 'Ticket_367655', 'Ticket_368323', 'Ticket_36864', 'Ticket_36928', 'Ticket_36947', 'Ticket_36963', 'Ticket_36967', 'Ticket_370129', 'Ticket_370365', 'Ticket_370369', 'Ticket_370370', 'Ticket_370371', 'Ticket_370372', 'Ticket_370373', 'Ticket_370375', 'Ticket_370377', 'Ticket_371060', 'Ticket_371110', 'Ticket_371362', 'Ticket_372622', 'Ticket_373450', 'Ticket_374887', 'Ticket_374910', 'Ticket_376564', 'Ticket_376566', 'Ticket_382651', 'Ticket_382652', 'Ticket_383121', 'Ticket_384461', 'Ticket_386525', 'Ticket_392091', 'Ticket_392096', 'Ticket_394140', 'Ticket_4133', 'Ticket_4138', 'Ticket_4579', 'Ticket_54636', 'Ticket_5727', 'Ticket_65303', 'Ticket_65304', 'Ticket_65306', 'Ticket_6563', 'Ticket_695', 'Ticket_7534', 'Ticket_7545', 'Ticket_7552', 'Ticket_7598', 'Ticket_8475', 'Ticket_9234', 'Ticket_A./5. 3235', 'Ticket_A.5. 11206', 'Ticket_A.5. 18509', 'Ticket_A/4 48871', 'Ticket_A/4. 20589', 'Ticket_A/4. 39886', 'Ticket_A/5 21171', 'Ticket_A/5 21172', 'Ticket_A/5 21173', 'Ticket_A/5 21174', 'Ticket_A/5 2466', 'Ticket_A/5 2817', 'Ticket_A/5 3536', 'Ticket_A/5 3902', 'Ticket_A/5. 13032', 'Ticket_A/5. 2151', 'Ticket_A/5. 3336', 'Ticket_A/5. 3337', 'Ticket_A/5. 851', 'Ticket_A/S 2816', 'Ticket_A4. 54510', 'Ticket_C 17369', 'Ticket_C 4001', 'Ticket_C 7076', 'Ticket_C 7077', 'Ticket_C.A. 17248', 'Ticket_C.A. 2315', 'Ticket_C.A. 24579', 'Ticket_C.A. 2673', 'Ticket_C.A. 29178', 'Ticket_C.A. 29395', 'Ticket_C.A. 29566', 'Ticket_C.A. 31026', 'Ticket_C.A. 31921', 'Ticket_C.A. 33111', 'Ticket_C.A. 33112', 'Ticket_C.A. 33595', 'Ticket_C.A. 34651', 'Ticket_C.A. 37671', 'Ticket_C.A. 5547', 'Ticket_C.A. 6212', 'Ticket_CA 2144', 'Ticket_CA. 2314', 'Ticket_CA. 2343', 'Ticket_F.C.C. 13528', 'Ticket_F.C.C. 13529', 'Ticket_Fa 265302', 'Ticket_LINE', 'Ticket_P/PP 3381', 'Ticket_PC 17318', 'Ticket_PC 17473', 'Ticket_PC 17474', 'Ticket_PC 17475', 'Ticket_PC 17476', 'Ticket_PC 17477', 'Ticket_PC 17482', 'Ticket_PC 17483', 'Ticket_PC 17485', 'Ticket_PC 17558', 'Ticket_PC 17569', 'Ticket_PC 17572', 'Ticket_PC 17582', 'Ticket_PC 17585', 'Ticket_PC 17590', 'Ticket_PC 17592', 'Ticket_PC 17593', 'Ticket_PC 17595', 'Ticket_PC 17596', 'Ticket_PC 17597', 'Ticket_PC 17600', 'Ticket_PC 17604', 'Ticket_PC 17605', 'Ticket_PC 17609', 'Ticket_PC 17610', 'Ticket_PC 17611', 'Ticket_PC 17612', 'Ticket_PC 17754', 'Ticket_PC 17755', 'Ticket_PC 17756', 'Ticket_PC 17757', 'Ticket_PC 17758', 'Ticket_PC 17760', 'Ticket_PC 17761', 'Ticket_PP 4348', 'Ticket_PP 9549', 'Ticket_S.C./PARIS 2079', 'Ticket_S.O./P.P. 3', 'Ticket_S.O.C. 14879', 'Ticket_S.P. 3464', 'Ticket_SC/AH 29037', 'Ticket_SC/AH Basle 541', 'Ticket_SC/PARIS 2133', 'Ticket_SC/PARIS 2146', 'Ticket_SC/PARIS 2149', 'Ticket_SC/PARIS 2167', 'Ticket_SC/Paris 2123', 'Ticket_SC/Paris 2163', 'Ticket_SO/C 14885', 'Ticket_SOTON/O.Q. 3101305', 'Ticket_SOTON/O.Q. 3101306', 'Ticket_SOTON/O.Q. 3101307', 'Ticket_SOTON/O.Q. 3101310', 'Ticket_SOTON/O.Q. 392078', 'Ticket_SOTON/O2 3101272', 'Ticket_SOTON/O2 3101287', 'Ticket_SOTON/OQ 392076', 'Ticket_SOTON/OQ 392082', 'Ticket_SOTON/OQ 392086', 'Ticket_SOTON/OQ 392089', 'Ticket_SOTON/OQ 392090', 'Ticket_STON/O 2. 3101269', 'Ticket_STON/O 2. 3101273', 'Ticket_STON/O 2. 3101274', 'Ticket_STON/O 2. 3101275', 'Ticket_STON/O 2. 3101280', 'Ticket_STON/O 2. 3101285', 'Ticket_STON/O 2. 3101286', 'Ticket_STON/O 2. 3101288', 'Ticket_STON/O 2. 3101289', 'Ticket_STON/O 2. 3101292', 'Ticket_STON/O 2. 3101293', 'Ticket_STON/O 2. 3101294', 'Ticket_STON/O2. 3101271', 'Ticket_STON/O2. 3101279', 'Ticket_STON/O2. 3101282', 'Ticket_STON/O2. 3101283', 'Ticket_STON/O2. 3101290', 'Ticket_SW/PP 751', 'Ticket_W./C. 14258', 'Ticket_W./C. 14263', 'Ticket_W./C. 6607', 'Ticket_W./C. 6608', 'Ticket_W./C. 6609', 'Ticket_W.E.P. 5734', 'Ticket_W/C 14208', 'Ticket_WE/P 5735', 'Cabin_A10', 'Cabin_A14', 'Cabin_A16', 'Cabin_A19', 'Cabin_A20', 'Cabin_A23', 'Cabin_A24', 'Cabin_A31', 'Cabin_A32', 'Cabin_A34', 'Cabin_A36', 'Cabin_A5', 'Cabin_A6', 'Cabin_A7', 'Cabin_B101', 'Cabin_B18', 'Cabin_B19', 'Cabin_B20', 'Cabin_B22', 'Cabin_B28', 'Cabin_B3', 'Cabin_B35', 'Cabin_B38', 'Cabin_B39', 'Cabin_B4', 'Cabin_B49', 'Cabin_B5', 'Cabin_B51 B53 B55', 'Cabin_B58 B60', 'Cabin_B69', 'Cabin_B73', 'Cabin_B77', 'Cabin_B79', 'Cabin_B80', 'Cabin_B82 B84', 'Cabin_B86', 'Cabin_B94', 'Cabin_B96 B98', 'Cabin_C101', 'Cabin_C103', 'Cabin_C104', 'Cabin_C110', 'Cabin_C111', 'Cabin_C118', 'Cabin_C123', 'Cabin_C124', 'Cabin_C125', 'Cabin_C126', 'Cabin_C128', 'Cabin_C148', 'Cabin_C2', 'Cabin_C22 C26', 'Cabin_C23 C25 C27', 'Cabin_C30', 'Cabin_C32', 'Cabin_C45', 'Cabin_C46', 'Cabin_C49', 'Cabin_C50', 'Cabin_C52', 'Cabin_C65', 'Cabin_C68', 'Cabin_C70', 'Cabin_C78', 'Cabin_C82', 'Cabin_C86', 'Cabin_C90', 'Cabin_C91', 'Cabin_C92', 'Cabin_C93', 'Cabin_C95', 'Cabin_C99', 'Cabin_D', 'Cabin_D11', 'Cabin_D15', 'Cabin_D17', 'Cabin_D19', 'Cabin_D20', 'Cabin_D21', 'Cabin_D26', 'Cabin_D28', 'Cabin_D30', 'Cabin_D33', 'Cabin_D35', 'Cabin_D36', 'Cabin_D37', 'Cabin_D46', 'Cabin_D47', 'Cabin_D48', 'Cabin_D50', 'Cabin_D56', 'Cabin_D6', 'Cabin_D7', 'Cabin_E10', 'Cabin_E101', 'Cabin_E12', 'Cabin_E121', 'Cabin_E17', 'Cabin_E24', 'Cabin_E25', 'Cabin_E31', 'Cabin_E33', 'Cabin_E36', 'Cabin_E38', 'Cabin_E40', 'Cabin_E44', 'Cabin_E46', 'Cabin_E49', 'Cabin_E50', 'Cabin_E58', 'Cabin_E63', 'Cabin_E67', 'Cabin_E68', 'Cabin_E77', 'Cabin_E8', 'Cabin_F E69', 'Cabin_F G63', 'Cabin_F2', 'Cabin_F33', 'Cabin_F38', 'Cabin_F4', 'Cabin_G6', 'Cabin_T', 'Embarked_C', 'Embarked_Q', 'Embarked_S'] ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 'f102', 'f103', 'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 'f111', 'f112', 'f113', 'f114', 'f115', 'f116', 'f117', 'f118', 'f119', 'f120', 'f121', 'f122', 'f123', 'f124', 'f125', 'f126', 'f127', 'f128', 'f129', 'f130', 'f131', 'f132', 'f133', 'f134', 'f135', 'f136', 'f137', 'f138', 'f139', 'f140', 'f141', 'f142', 'f143', 'f144', 'f145', 'f146', 'f147', 'f148', 'f149', 'f150', 'f151', 'f152', 'f153', 'f154', 'f155', 'f156', 'f157', 'f158', 'f159', 'f160', 'f161', 'f162', 'f163', 'f164', 'f165', 'f166', 'f167', 'f168', 'f169', 'f170', 'f171', 'f172', 'f173', 'f174', 'f175', 'f176', 'f177', 'f178', 'f179', 'f180', 'f181', 'f182', 'f183', 'f184', 'f185', 'f186', 'f187', 'f188', 'f189', 'f190', 'f191', 'f192', 'f193', 'f194', 'f195', 'f196', 'f197', 'f198', 'f199', 'f200', 'f201', 'f202', 'f203', 'f204', 'f205', 'f206', 'f207', 'f208', 'f209', 'f210', 'f211', 'f212', 'f213', 'f214', 'f215', 'f216', 'f217', 'f218', 'f219', 'f220', 'f221', 'f222', 'f223', 'f224', 'f225', 'f226', 'f227', 'f228', 'f229', 'f230', 'f231', 'f232', 'f233', 'f234', 'f235', 'f236', 'f237', 'f238', 'f239', 'f240', 'f241', 'f242', 'f243', 'f244', 'f245', 'f246', 'f247', 'f248', 'f249', 'f250', 'f251', 'f252', 'f253', 'f254', 'f255', 'f256', 'f257', 'f258', 'f259', 'f260', 'f261', 'f262']
expected Cabin_C110, Ticket_F.C.C. 13528, Ticket_2680, Ticket_315089, Ticket_343095, Ticket_PC 17585, Ticket_29104, Ticket_110413, Ticket_250653, Cabin_A6, Ticket_2627, Ticket_PC 17605, Ticket_345778, Ticket_2653, Ticket_PC 17485, Ticket_345774, Ticket_347464, Ticket_330979, Ticket_347064, Ticket_13502, Ticket_17463, Ticket_371362, Ticket_3101295, Cabin_B20, Cabin_A24, Ticket_CA. 2314, Ticket_371110, Cabin_C45, Ticket_234604, Ticket_315090, Ticket_113051, Cabin_D20, PassengerId, Ticket_13507, Ticket_1601, Ticket_349222, Ticket_STON/O2. 3101283, Ticket_65303, Ticket_28664, Ticket_345765, Ticket_376566, Ticket_113510, Ticket_113807, Ticket_5727, Ticket_W./C. 6608, Cabin_C32, Ticket_8475, Ticket_C.A. 29395, Ticket_226593, Ticket_2926, Ticket_368323, Ticket_19950, Ticket_111320, Ticket_343120, Ticket_SC/PARIS 2133, Ticket_315096, Ticket_370377, Ticket_7552, Ticket_343276, Ticket_364499, Ticket_36967, Cabin_C82, Ticket_PC 17597, Ticket_28551, Ticket_392091, Ticket_SC/PARIS 2146, Ticket_349228, Ticket_3101267, Ticket_2620, Ticket_374910, Ticket_STON/O2. 3101271, Ticket_2659, Ticket_113760, Ticket_SC/AH 29037, Ticket_315097, Ticket_371060, Ticket_367226, Ticket_9234, Ticket_347063, Ticket_226875, Ticket_P/PP 3381, Ticket_PC 17755, Cabin_E38, Ticket_2683, Ticket_C 4001, Ticket_W./C. 6607, Ticket_A/5 3902, Pclass, Ticket_113786, Ticket_PC 17593, Ticket_CA 2144, Cabin_F38, Ticket_218629, Cabin_C124, Cabin_D30, Ticket_244373, Ticket_2647, Ticket_A/5 21172, Ticket_250655, Ticket_243880, Cabin_G6, Cabin_A34, Ticket_14311, Cabin_B19, Ticket_S.C./PARIS 2079, Cabin_C46, Ticket_315082, Ticket_695, Cabin_D17, Cabin_D6, Cabin_B82 B84, Ticket_345780, Ticket_312991, Ticket_330919, Cabin_B94, Ticket_11753, Ticket_12749, Cabin_D56, Ticket_362316, Cabin_B5, Ticket_349209, Parch, Cabin_C125, Embarked_S, Ticket_347073, Cabin_C65, Ticket_2689, Cabin_C103, Cabin_E12, Ticket_237798, Ticket_345783, Cabin_B4, Ticket_239853, Ticket_PP 4348, Ticket_SOTON/O.Q. 3101306, Ticket_4579, Ticket_111427, Ticket_386525, Cabin_C99, Cabin_C49, Ticket_345777, Ticket_347054, Ticket_2223, Ticket_382651, Ticket_350026, Ticket_234818, Ticket_2631, Ticket_2691, Cabin_E46, Cabin_A5, Ticket_349245, Cabin_E36, Ticket_347742, Ticket_392096, Cabin_D26, Ticket_220367, Ticket_7545, Fare, Ticket_W.E.P. 5734, Ticket_248733, Ticket_112059, Ticket_239865, Cabin_A31, Ticket_345781, Ticket_341826, Ticket_349210, Ticket_113056, Ticket_17764, Ticket_PC 17758, Ticket_W./C. 14263, Ticket_STON/O 2. 3101288, Cabin_C68, Ticket_347089, Ticket_2649, Ticket_349234, Ticket_2671, Cabin_B38, Cabin_C52, Cabin_C148, Cabin_D, Ticket_2697, Ticket_PC 17476, Cabin_C101, Ticket_SOTON/O2 3101287, Ticket_19928, Ticket_A/5. 851, Cabin_T, Cabin_B73, Cabin_E68, Ticket_PC 17609, Ticket_349256, Ticket_CA. 2343, Ticket_16966, SibSp, Ticket_312993, Ticket_367228, Ticket_A/5 21173, Cabin_A7, Ticket_370372, Ticket_65306, Ticket_A./5. 3235, Ticket_11765, Ticket_229236, Ticket_3101298, Ticket_113789, Cabin_A16, Ticket_347080, Ticket_14312, Ticket_24160, Ticket_250652, Ticket_110564, Ticket_364849, Ticket_335097, Ticket_211536, Ticket_349252, Ticket_C.A. 24579, Ticket_364848, Ticket_349217, Ticket_2685, Ticket_349910, Ticket_S.P. 3464, Ticket_250644, Ticket_A/5. 3336, Ticket_S.O.C. 14879, Ticket_29106, Ticket_350407, Ticket_STON/O 2. 3101293, Ticket_349216, Cabin_E10, Ticket_PC 17474, Ticket_29108, Ticket_373450, Ticket_SOTON/OQ 392089, Ticket_STON/O 2. 3101292, Cabin_A10, Cabin_F G63, Ticket_2651, Ticket_347077, Cabin_F2, Ticket_363592, Ticket_349207, Cabin_B79, Ticket_A.5. 11206, Ticket_A/5. 13032, Cabin_A36, Ticket_A/5 2817, Cabin_A19, Ticket_PC 17590, Ticket_2693, Ticket_347088, Ticket_A/5. 2151, Ticket_233866, Ticket_2629, Ticket_113804, Ticket_345572, Ticket_28665, Ticket_345773, Ticket_SO/C 14885, Ticket_28134, Ticket_7534, Ticket_349240, Ticket_350060, Cabin_A20, Ticket_233639, Ticket_347466, Ticket_370370, Cabin_B58 B60, Ticket_113792, Ticket_239854, Ticket_35273, Ticket_PC 17475, Ticket_349247, Ticket_367230, Ticket_236171, Cabin_C92, Ticket_364516, Ticket_C.A. 5547, Ticket_2662, Ticket_A/4. 39886, Cabin_E17, Cabin_C2, Ticket_SC/AH Basle 541, Ticket_349221, Ticket_C 17369, Cabin_D11, Ticket_113501, Cabin_E77, Cabin_C22 C26, Ticket_315088, Ticket_A4. 54510, Ticket_345769, Embarked_C, Ticket_A/5 2466, Ticket_PC 17612, Ticket_SOTON/OQ 392082, Cabin_D7, Ticket_248747, Ticket_PP 9549, Ticket_219533, Ticket_PC 17756, Ticket_28220, Ticket_C.A. 29566, Ticket_14313, Ticket_248738, Ticket_383121, Ticket_250648, Ticket_PC 17582, Ticket_29750, Cabin_D50, Ticket_SC/Paris 2163, Cabin_B35, Ticket_29105, Ticket_250647, Ticket_110465, Ticket_W./C. 6609, Ticket_349203, Ticket_2003, Ticket_C.A. 31026, Cabin_B69, Ticket_11668, Ticket_250646, Ticket_382652, Ticket_350036, Ticket_363291, Ticket_17466, Ticket_113776, Ticket_345779, Cabin_B3, Ticket_230080, Ticket_350417, Ticket_363294, Ticket_250649, Ticket_C.A. 33112, Ticket_C.A. 33595, Ticket_STON/O 2. 3101274, Ticket_223596, Ticket_364511, Ticket_367232, Ticket_364850, Ticket_36963, Ticket_W./C. 14258, Ticket_244310, Ticket_113798, Ticket_345764, Ticket_334912, Ticket_2664, Ticket_17474, Cabin_B28, Ticket_250643, Ticket_112277, Cabin_D36, Cabin_A32, Ticket_STON/O2. 3101282, Ticket_347061, Ticket_2623, Ticket_349249, Cabin_C126, Ticket_PC 17757, Cabin_E44, Cabin_C128, Ticket_367231, Ticket_2674, Cabin_B77, Cabin_E25, Ticket_330909, Ticket_347470, Ticket_28206, Ticket_7598, Ticket_349208, Ticket_36209, Ticket_347071, Ticket_244270, Ticket_A/5 3536, Ticket_19996, Ticket_3101281, Cabin_E40, Ticket_113787, Ticket_11752, Ticket_C.A. 2673, Ticket_11751, Ticket_370365, Ticket_28425, Ticket_347076, Ticket_SOTON/O.Q. 3101305, Ticket_36864, Ticket_347062, Ticket_315098, Ticket_345767, Ticket_36568, Ticket_27849, Ticket_PC 17760, Ticket_349246, Ticket_112050, Ticket_C.A. 2315, Ticket_347060, Ticket_A/4 48871, Ticket_C.A. 31921, Ticket_A/5. 3337, Ticket_STON/O2. 3101290, Ticket_34218, Ticket_113783, Ticket_2700, Ticket_315153, Ticket_220845, Cabin_D21, Cabin_B96 B98, Ticket_19952, Ticket_237736, Ticket_33638, Ticket_370369, Ticket_113055, Ticket_234360, Ticket_2668, Ticket_248731, Cabin_C104, Ticket_350035, Ticket_17453, Cabin_E67, Ticket_113503, Ticket_349253, Cabin_D33, Ticket_35852, Ticket_330958, Ticket_350034, Ticket_113806, Ticket_F.C.C. 13529, Ticket_110152, Ticket_330932, Ticket_347087, Ticket_STON/O 2. 3101294, Ticket_112052, Ticket_113788, Ticket_248727, Ticket_324669, Ticket_244278, Ticket_PC 17610, Ticket_111240, Ticket_36947, Ticket_315093, Ticket_PC 17477, Ticket_244367, Ticket_230434, Cabin_D35, Ticket_370373, Cabin_B51 B53 B55, Ticket_S.O./P.P. 3, Ticket_330923, Ticket_SW/PP 751, Ticket_4138, Ticket_350042, Cabin_C91, Ticket_PC 17611, Ticket_28213, Ticket_65304, Ticket_2686, Ticket_31027, Ticket_364500, Ticket_SC/Paris 2123, Ticket_372622, Ticket_2694, Cabin_C50, Cabin_D28, Ticket_113028, Ticket_3101296, Cabin_E33, Ticket_2666, Ticket_SC/PARIS 2167, Cabin_D48, Ticket_349213, Cabin_B22, Ticket_349206, Ticket_358585, Ticket_PC 17569, Ticket_C 7076, Ticket_110813, Cabin_E49, Ticket_Fa 265302, Cabin_E58, Ticket_2690, Ticket_237442, Ticket_350029, Ticket_C.A. 33111, Ticket_315084, Ticket_3101277, Ticket_349218, Cabin_A14, Cabin_B86, Ticket_SC/PARIS 2149, Ticket_STON/O 2. 3101280, Cabin_B39, Cabin_D46, Cabin_B18, Cabin_C23 C25 C27, Cabin_D37, Ticket_231919, Ticket_C.A. 37671, Embarked_Q, Ticket_367655, Sex_female, Ticket_349219, Age, Ticket_11967, Ticket_3101265, Ticket_312992, Ticket_SOTON/O.Q. 392078, Ticket_376564, Ticket_SOTON/O.Q. 3101310, Cabin_C118, Cabin_C78, Ticket_347082, Ticket_17421, Cabin_E31, Ticket_230433, Cabin_D47, Cabin_A23, Ticket_13509, Ticket_SOTON/OQ 392086, Cabin_E101, Ticket_370371, Ticket_12460, Ticket_PC 17483, Ticket_C.A. 29178, Ticket_35281, Ticket_347085, Ticket_113767, Ticket_323592, Ticket_364506, Ticket_LINE, Ticket_343275, Ticket_336439, Ticket_349237, Ticket_345364, Ticket_C.A. 34651, Ticket_244358, Ticket_2677, Ticket_SOTON/O.Q. 3101307, Ticket_111426, Ticket_11755, Ticket_228414, Ticket_111361, Ticket_13568, Ticket_STON/O2. 3101279, Cabin_C30, Ticket_370129, Ticket_237565, Ticket_113796, Ticket_349243, Ticket_239855, Ticket_A/4. 20589, Ticket_113059, Cabin_E121, Cabin_D19, Ticket_PC 17595, Ticket_PC 17604, Ticket_WE/P 5735, Ticket_111369, Ticket_11813, Ticket_237671, Ticket_3101276, Cabin_E63, Ticket_384461, Cabin_D15, Cabin_F E69, Ticket_236852, Ticket_PC 17482, Ticket_365222, Ticket_315151, Ticket_350043, Ticket_A.5. 18509, Ticket_STON/O 2. 3101285, Ticket_PC 17754, Ticket_13049, Ticket_244252, Ticket_113781, Ticket_330980, Ticket_349214, Cabin_F4, Ticket_C.A. 17248, Ticket_A/5 21174, Ticket_330959, Cabin_B80, Ticket_PC 17473, Ticket_113572, Ticket_27042, Ticket_349248, Ticket_349224, Ticket_349257, Ticket_A/S 2816, Cabin_C70, Ticket_11769, Ticket_347078, Ticket_STON/O 2. 3101269, Ticket_36928, Ticket_113773, Ticket_3474, Ticket_PC 17761, Ticket_12233, Ticket_2665, Ticket_113505, Ticket_STON/O 2. 3101289, Ticket_STON/O 2. 3101273, Ticket_349204, Ticket_2628, Ticket_394140, Cabin_E8, Ticket_28424, Ticket_PC 17596, Ticket_2908, Cabin_C90, Ticket_54636, Ticket_28403, Cabin_C111, Ticket_248698, Ticket_113803, Cabin_B101, Ticket_349225, Ticket_348121, Ticket_W/C 14208, Ticket_SOTON/OQ 392076, Ticket_111428, Ticket_113050, Ticket_243847, Ticket_SOTON/O2 3101272, Ticket_370375, Ticket_17464, Ticket_19877, Ticket_STON/O 2. 3101275, Ticket_3411, Ticket_349244, Sex_male, Ticket_374887, Ticket_PC 17600, Cabin_C93, Ticket_STON/O 2. 3101286, Ticket_350048, Ticket_PC 17558, Cabin_C95, Cabin_C123, Ticket_11767, Cabin_C86, Ticket_6563, Cabin_E50, Ticket_C.A. 6212, Ticket_PC 17572, Ticket_PC 17592, Ticket_349912, Ticket_350050, Ticket_230136, Ticket_4133, Ticket_27267, Ticket_2661, Ticket_349201, Ticket_350046, Cabin_E24, Cabin_F33, Ticket_330935, Ticket_248706, Ticket_2699, Ticket_349909, Ticket_SOTON/OQ 392090, Ticket_349241, Ticket_349254, Ticket_C 7077, Cabin_B49, Ticket_3101278, Ticket_PC 17318, Ticket_26360, Ticket_113784, Ticket_A/5 21171, Ticket_19943, Ticket_335677, Ticket_244361 in input data
training data did not have the following fields: f129, f21, f86, f209, f83, f253, f85, f192, f4, f198, f6, f162, f103, f124, f152, f125, f139, f44, f89, f67, f231, f29, f134, f19, f160, f220, f251, f163, f96, f119, f91, f249, f170, f259, f113, f84, f222, f100, f168, f225, f150, f200, f50, f0, f3, f201, f95, f32, f99, f38, f56, f175, f14, f246, f250, f141, f148, f179, f188, f157, f60, f24, f27, f258, f232, f248, f244, f46, f5, f230, f92, f23, f122, f25, f221, f22, f87, f143, f54, f211, f202, f212, f172, f8, f208, f173, f80, f10, f39, f218, f195, f93, f238, f261, f2, f118, f213, f224, f55, f123, f26, f247, f1, f12, f115, f72, f146, f223, f109, f64, f127, f210, f53, f20, f149, f116, f101, f151, f161, f36, f184, f194, f15, f186, f111, f11, f131, f79, f126, f121, f154, f69, f159, f205, f234, f169, f196, f90, f105, f147, f165, f171, f187, f214, f138, f34, f254, f17, f217, f75, f233, f61, f76, f142, f73, f30, f82, f183, f257, f31, f133, f177, f98, f13, f59, f52, f176, f33, f18, f107, f117, f58, f135, f166, f216, f9, f47, f114, f226, f252, f110, f104, f49, f120, f167, f203, f45, f88, f262, f40, f43, f228, f63, f185, f189, f245, f156, f145, f180, f62, f16, f106, f207, f158, f227, f65, f215, f242, f48, f140, f197, f144, f236, f182, f28, f37, f164, f68, f130, f243, f206, f260, f237, f178, f240, f70, f199, f136, f77, f51, f74, f112, f204, f256, f41, f128, f153, f97, f78, f155, f174, f7, f190, f191, f94, f241, f181, f229, f108, f219, f193, f66, f102, f81, f57, f255, f42, f235, f239, f35, f132, f137, f71