In [1]:
import pandas as pd
import numpy as np

In [2]:
"""
Load data
"""
df_train = pd.read_csv('data/titanic/train.csv')
df_test = pd.read_csv('data/titanic/test.csv')
df_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
"""
Feature engineering
"""
from sklearn import preprocessing
from keras.utils import to_categorical

le = preprocessing.LabelEncoder()
mms = preprocessing.MinMaxScaler()

# Handle missing
df_train = df_train.drop(['Name', 'Ticket', 'Cabin'], axis=1).dropna()
df_test = df_test.fillna(method='ffill')

# Embarked
embarked_train = to_categorical(le.fit_transform(df_train['Embarked']))
embarked_test = to_categorical(le.fit_transform(df_test['Embarked']))

# Fare
fare_train = df_train['Fare'].values.reshape(-1, 1)
fare_test = df_test['Fare'].values.reshape(-1, 1)

# Siblings
siblings_train = df_train['SibSp'].values.reshape(-1, 1)
siblings_test = df_test['SibSp'].values.reshape(-1, 1)

# Parents
parents_train = df_train['Parch'].values.reshape(-1, 1)
parents_test = df_test['Parch'].values.reshape(-1, 1)

# Sex
sex_train = to_categorical(le.fit_transform(df_train['Sex']))
sex_test = to_categorical(le.fit_transform(df_test['Sex']))

# Pclass
pclass_train = to_categorical(le.fit_transform(df_train['Pclass']))
pclass_test = to_categorical(le.fit_transform(df_test['Pclass']))

# Age
age_train = df_train['Age'].values.reshape(-1, 1)
age_test  = df_test['Age'].values.reshape(-1, 1)

# X and Y
num = len(df_train)
split = int(num * .75)

x_train = np.hstack([pclass_train, sex_train, age_train, siblings_train, parents_train, embarked_train])
x_train = mms.fit_transform(x_train)

y_train = df_train['Survived'].values

# X test
x_test = np.hstack([pclass_test,  sex_test,  age_test, siblings_test, parents_test, embarked_test])
x_test = mms.fit_transform(x_test)

pd.DataFrame(x_train).head()


Using TensorFlow backend.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.0,0.0,1.0,0.0,1.0,0.271174,0.2,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,1.0,0.0,0.472229,0.2,0.0,1.0,0.0,0.0
2,0.0,0.0,1.0,1.0,0.0,0.321438,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,1.0,0.0,0.434531,0.2,0.0,0.0,0.0,1.0
4,0.0,0.0,1.0,0.0,1.0,0.434531,0.0,0.0,0.0,0.0,1.0


In [4]:
"""
Define and train model
"""
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

model = Sequential()
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

epochs = 400
batch_size = 16

model.fit(x_train, y_train, validation_split=.1, epochs=epochs, batch_size=batch_size, shuffle=True)

Train on 640 samples, validate on 72 samples
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Ep

Epoch 132/400
Epoch 133/400
Epoch 134/400
Epoch 135/400
Epoch 136/400
Epoch 137/400
Epoch 138/400
Epoch 139/400
Epoch 140/400
Epoch 141/400
Epoch 142/400
Epoch 143/400
Epoch 144/400
Epoch 145/400
Epoch 146/400
Epoch 147/400
Epoch 148/400
Epoch 149/400
Epoch 150/400
Epoch 151/400
Epoch 152/400
Epoch 153/400
Epoch 154/400
Epoch 155/400
Epoch 156/400
Epoch 157/400
Epoch 158/400
Epoch 159/400
Epoch 160/400
Epoch 161/400
Epoch 162/400
Epoch 163/400
Epoch 164/400
Epoch 165/400
Epoch 166/400
Epoch 167/400
Epoch 168/400
Epoch 169/400
Epoch 170/400
Epoch 171/400
Epoch 172/400
Epoch 173/400
Epoch 174/400
Epoch 175/400
Epoch 176/400
Epoch 177/400
Epoch 178/400
Epoch 179/400
Epoch 180/400
Epoch 181/400
Epoch 182/400
Epoch 183/400
Epoch 184/400
Epoch 185/400
Epoch 186/400
Epoch 187/400
Epoch 188/400
Epoch 189/400
Epoch 190/400
Epoch 191/400
Epoch 192/400
Epoch 193/400
Epoch 194/400
Epoch 195/400
Epoch 196/400
Epoch 197/400
Epoch 198/400
Epoch 199/400
Epoch 200/400
Epoch 201/400
Epoch 202/400
Epoch 

Epoch 262/400
Epoch 263/400
Epoch 264/400
Epoch 265/400
Epoch 266/400
Epoch 267/400
Epoch 268/400
Epoch 269/400
Epoch 270/400
Epoch 271/400
Epoch 272/400
Epoch 273/400
Epoch 274/400
Epoch 275/400
Epoch 276/400
Epoch 277/400
Epoch 278/400
Epoch 279/400
Epoch 280/400
Epoch 281/400
Epoch 282/400
Epoch 283/400
Epoch 284/400
Epoch 285/400
Epoch 286/400
Epoch 287/400
Epoch 288/400
Epoch 289/400
Epoch 290/400
Epoch 291/400
Epoch 292/400
Epoch 293/400
Epoch 294/400
Epoch 295/400
Epoch 296/400
Epoch 297/400
Epoch 298/400
Epoch 299/400
Epoch 300/400
Epoch 301/400
Epoch 302/400
Epoch 303/400
Epoch 304/400
Epoch 305/400
Epoch 306/400
Epoch 307/400
Epoch 308/400
Epoch 309/400
Epoch 310/400
Epoch 311/400
Epoch 312/400
Epoch 313/400
Epoch 314/400
Epoch 315/400
Epoch 316/400
Epoch 317/400
Epoch 318/400
Epoch 319/400
Epoch 320/400
Epoch 321/400
Epoch 322/400
Epoch 323/400
Epoch 324/400
Epoch 325/400
Epoch 326/400
Epoch 327/400
Epoch 328/400
Epoch 329/400
Epoch 330/400
Epoch 331/400
Epoch 332/400
Epoch 

Epoch 392/400
Epoch 393/400
Epoch 394/400
Epoch 395/400
Epoch 396/400
Epoch 397/400
Epoch 398/400
Epoch 399/400
Epoch 400/400


<keras.callbacks.History at 0x11ecf9908>

In [69]:
"""
Create submission from model
"""
survived = (model.predict(x_test) > 0.5).astype('int')
passenger_ids = df_test['PassengerId'].values
passenger_ids = passenger_ids.reshape(len(passenger_ids), 1)

print(survived.shape)
print(passenger_ids.shape)

submission = pd.DataFrame(np.hstack([passenger_ids, survived]), columns=['PassengerId', 'Survived'])
submission.to_csv('submissions/titanic.csv', index=False)
submission.head()

(418, 1)
(418, 1)
