In [1]:
import numpy as np
import pandas as pd
from sklearn import cross_validation, preprocessing
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from helpers import load_titanic

Using TensorFlow backend.


In [2]:
# define the neural net function
def neural_net():
    
    # initialize neural net
    model = Sequential()
    model.add(Dense(128, input_dim=7, init='normal', activation='relu'))
    model.add(Dense(128, init='normal', activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    return model

In [3]:
# load the titanic data
train, test = load_titanic()

In [4]:
# create feature vectors and labels
features = ['Sex', 'Pclass', 'Age', 'Fare', 'SibSp', 'Parch', 'Embarked']
X = train[features].values
y = train['Survived'].values

In [5]:
# split into folds for cross validation
kfold = cross_validation.StratifiedKFold(y=y, n_folds=10, shuffle=True)
cvscores = []  # create list to store scores

# loop through the folds
for i, (train_ix, test_ix) in enumerate(kfold):
    
    # rescale the data
    scaler = preprocessing.StandardScaler()
    Xtrain = scaler.fit_transform(X[train_ix])
    Xtest = scaler.transform(X[test_ix])
    
    # initialize network
    model = neural_net()
    
    # fit and predict
    model.fit(Xtrain, y[train_ix], verbose=0)
    _, scores = model.evaluate(Xtest, y[test_ix], verbose=0)
    cvscores.append(scores)

# print average score
print(np.mean(cvscores))

0.830582793694


In [6]:
# train on entire dataset
Xtrain = X
Xtest = test[features].values

# rescale data
scaler = preprocessing.StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

# initialize network
model = neural_net()

# fit and predict
model.fit(Xtrain, y, verbose=0)
ypred = model.predict(Xtest)

In [7]:
# predict on testing data
test['Survived'] = (ypred[:, 0] > 0.5).astype(int)

# write predictions to csv
pred_file = '../data/nn.csv'
test[['PassengerId', 'Survived']].to_csv(pred_file, index=False)

Kaggle Score: 0.78947