In [4]:
import pandas as pd
import numpy as np

In [5]:
"""
Load data
"""
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_train.head()

# Handle missing
df_train = df_train.drop(['Name', 'Ticket', 'Cabin'], axis=1).dropna()
df_test = df_test.fillna(method='ffill')

In [6]:
"""
Feature engineering
"""
from sklearn import preprocessing
from keras.utils import to_categorical

le = preprocessing.LabelEncoder()
mms = preprocessing.MinMaxScaler()

# Embarked
embarked_train = to_categorical(le.fit_transform(df_train['Embarked']))
embarked_test = to_categorical(le.fit_transform(df_test['Embarked']))

# Fare
fare_train = df_train['Fare'].values.reshape(-1, 1)
fare_test = df_test['Fare'].values.reshape(-1, 1)

# Siblings
siblings_train = df_train['SibSp'].values.reshape(-1, 1)
siblings_test = df_test['SibSp'].values.reshape(-1, 1)

# Parents
parents_train = df_train['Parch'].values.reshape(-1, 1)
parents_test = df_test['Parch'].values.reshape(-1, 1)

# Sex
sex_train = to_categorical(le.fit_transform(df_train['Sex']))
sex_test = to_categorical(le.fit_transform(df_test['Sex']))

# Pclass
pclass_train = to_categorical(le.fit_transform(df_train['Pclass']))
pclass_test = to_categorical(le.fit_transform(df_test['Pclass']))

# Age
age_train = df_train['Age'].values.reshape(-1, 1)
age_test  = df_test['Age'].values.reshape(-1, 1)

# X and Y
num = len(df_train)
split = int(num * .75)

x_train = np.hstack([pclass_train, sex_train, age_train, siblings_train, parents_train, embarked_train])
x_train = mms.fit_transform(x_train)

y_train = df_train['Survived'].values

# X test
x_test = np.hstack([pclass_test,  sex_test,  age_test, siblings_test, parents_test, embarked_test])
x_test = mms.fit_transform(x_test)

print('Training data:')
[print(", ".join(map(lambda x: str(x), row))) for row in x_train[0:10,:]]

print('Test data:')
[print(", ".join(map(lambda x: str(x), row))) for row in x_test[0:10,:]]

print()

Training data:
0.0, 0.0, 1.0, 0.0, 1.0, 0.271173661724, 0.2, 0.0, 0.0, 0.0, 1.0
1.0, 0.0, 0.0, 1.0, 0.0, 0.472229203317, 0.2, 0.0, 1.0, 0.0, 0.0
0.0, 0.0, 1.0, 1.0, 0.0, 0.321437547122, 0.0, 0.0, 0.0, 0.0, 1.0
1.0, 0.0, 0.0, 1.0, 0.0, 0.434531289269, 0.2, 0.0, 0.0, 0.0, 1.0
0.0, 0.0, 1.0, 0.0, 1.0, 0.434531289269, 0.0, 0.0, 0.0, 0.0, 1.0
1.0, 0.0, 0.0, 0.0, 1.0, 0.673284744911, 0.0, 0.0, 0.0, 0.0, 1.0
0.0, 0.0, 1.0, 0.0, 1.0, 0.0198542347323, 0.6, 0.166666666667, 0.0, 0.0, 1.0
0.0, 0.0, 1.0, 1.0, 0.0, 0.334003518472, 0.0, 0.333333333333, 0.0, 0.0, 1.0
0.0, 1.0, 0.0, 1.0, 0.0, 0.170645890927, 0.2, 0.0, 1.0, 0.0, 0.0
0.0, 0.0, 1.0, 1.0, 0.0, 0.0449861774315, 0.2, 0.166666666667, 0.0, 0.0, 1.0
Test data:
0.0, 0.0, 1.0, 0.0, 1.0, 0.452723196624, 0.0, 0.0, 0.0, 1.0, 0.0
0.0, 0.0, 1.0, 1.0, 0.0, 0.617565607279, 0.125, 0.0, 0.0, 0.0, 1.0
0.0, 1.0, 0.0, 0.0, 1.0, 0.815376500066, 0.0, 0.0, 0.0, 1.0, 0.0
0.0, 0.0, 1.0, 0.0, 1.0, 0.353817750231, 0.0, 0.0, 0.0, 0.0, 1.0
0.0, 0.0, 1.0, 1.0, 0.0, 0.

In [11]:
"""
Define and train model
"""
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

model = Sequential()
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(16, input_dim=11))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(x_train, y_train, validation_split=.1, epochs=200, batch_size=16, shuffle=True)

Train on 640 samples, validate on 72 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200


Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200


Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200


Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.History at 0x11fee7e48>

In [8]:
"""
Create submission from model
"""
survived = (model.predict(x_test) > 0.5).astype('int')
passenger_ids = df_test['PassengerId'].values
passenger_ids = passenger_ids.reshape(len(passenger_ids), 1)

print(survived.shape)
print(passenger_ids.shape)

submission = pd.DataFrame(np.hstack([passenger_ids, survived]), columns=['PassengerId', 'Survived'])
submission.to_csv('submission.csv', index=False)
submission.head()

(418, 1)
(418, 1)


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1


In [10]:
model.predict(x_test) > 0.5

array([[False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [False],
       [False],
       [ True],
       [False],
       [False],
       [