In [None]:
import numpy
import pandas
import datetime

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers.core import Dropout
from keras.layers.normalization import BatchNormalization

In [None]:
############################################################
# SibSp　-> one hot enconding
# One hot encoding SibSp
############################################################
def get_dummies_sibSp(df_all, df, df_test) :

    categories = set(df_all['SibSp'].unique())
    df['SibSp'] = pandas.Categorical(df['SibSp'], categories=categories)
    df_test['SibSp'] = pandas.Categorical(df_test['SibSp'], categories=categories)

    df = pandas.get_dummies(df, columns=['SibSp'])
    df_test = pandas.get_dummies(df_test, columns=['SibSp'])

    return df, df_test

############################################################
# Parch　-> one hot enconding
# One hot encoding SibSp
############################################################
def get_dummies_parch(df_all, df, df_test) :

    categories = set(df_all['Parch'].unique())
    df['Parch'] = pandas.Categorical(df['Parch'], categories=categories)
    df_test['Parch'] = pandas.Categorical(df_test['Parch'], categories=categories)

    df = pandas.get_dummies(df, columns=['Parch'])
    df_test = pandas.get_dummies(df_test, columns=['Parch'])

    return df, df_test

############################################################
# Ticket　-> one hot enconding
# One hot encoding Ticket
############################################################
def get_dummies_ticket(df_all, df, df_test) :

    ticket_values = df_all['Ticket'].value_counts()
    ticket_values = ticket_values[ticket_values > 1]
    ticket_values = pandas.Series(ticket_values.index, name='Ticket')
    categories = set(ticket_values.tolist())
    df['Ticket'] = pandas.Categorical(df['Ticket'], categories=categories)
    df_test['Ticket'] = pandas.Categorical(df_test['Ticket'], categories=categories)

    df = pandas.get_dummies(df, columns=['Ticket'])
    df_test = pandas.get_dummies(df_test, columns=['Ticket'])

    return df, df_test

############################################################
# Standardization
############################################################
def standardization(df, df_test) :

    standard = StandardScaler()
    df_std = pandas.DataFrame(standard.fit_transform(df[['Pclass', 'Fare']].values), columns=['Pclass', 'Fare'])
    df.loc[:,'Pclass'] = df_std['Pclass']
    df.loc[:,'Fare'] = df_std['Fare']

    df_test_std = pandas.DataFrame(standard.transform(df_test[['Pclass', 'Fare']].values), columns=['Pclass', 'Fare'])
    df_test.loc[:,'Pclass'] = df_test_std['Pclass']
    df_test.loc[:,'Fare'] = df_test_std['Fare']

    return df, df_test

############################################################
# prepare Data
############################################################
def prepareData() :

    ##############################
    # Data preprocessing
    # Extract necessary items
    ##############################
    # Load gender_submission.csv
    df = pandas.read_csv('train.csv')
    df_test = pandas.read_csv('test.csv')

    df_all = pandas.concat([df, df_test], sort=False)

    df_test_index = df_test[['PassengerId']]

    df = df[['Survived', 'Pclass', 'Sex', 'SibSp', 'Parch', 'Ticket', 'Fare']]
    df_test = df_test[['Pclass', 'Sex', 'SibSp', 'Parch', 'Ticket', 'Fare']]

    ##############################
    # Data preprocessing
    # Fill or remove missing values
    ##############################
    df = df[df['Fare'] != 5].reset_index(drop=True)
    df = df[df['Fare'] != 0].reset_index(drop=True)

    ##############################
    # Data preprocessing
    # Digitize labels
    ##############################
    # Gender
    ##############################
    encoder_sex = LabelEncoder()
    df['Sex'] = encoder_sex.fit_transform(df['Sex'].values)
    df_test['Sex'] = encoder_sex.transform(df_test['Sex'].values)

    ##############################
    # Data preprocessing
    # One-Hot Encoding
    ##############################
    ##############################
    # SibSp
    ##############################
    df, df_test = get_dummies_sibSp(df_all, df, df_test)

    ##############################
    # Parch
    ##############################
    df, df_test = get_dummies_parch(df_all, df, df_test)

    ##############################
    # Ticket
    ##############################
    df, df_test = get_dummies_ticket(df_all, df, df_test)

    ##############################
    ##############################
    df, df_test = standardization(df, df_test)

    ##############################
    # Data preprocessing
    # Fill or remove missing values
    ##############################
    df.fillna({'Fare':0}, inplace=True)
    df_test.fillna({'Fare':0}, inplace=True)

    ##############################
    # Split training data and test data
    ##############################
    x = df.drop(columns='Survived')
    y = df[['Survived']]

    return x, y, df_test, df_test_index

##############################
# Model -> 5perceptron
##############################
def create_model_5dim_layer_perceptron(input_dim, \
                                       activation="relu", \
                                       optimizer="adam", \
                                       out_dim=100, \
                                       dropout=0.5):

    model = Sequential()

    # Input - Hidden1
    model.add(Dense(input_dim=input_dim, units=out_dim))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout))

    # Hidden1 - Hidden2
    model.add(Dense(units=out_dim))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout))

    # Hidden2 - Hidden3
    model.add(Dense(units=out_dim))
    model.add(BatchNormalization())
    model.add(Activation(activation))
    model.add(Dropout(dropout))

    # Hidden3 - Output
    model.add(Dense(units=1))
    model.add(Activation("sigmoid"))

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [None]:
x_train, y_train, x_test, y_test_index = prepareData()

model = create_model_5dim_layer_perceptron(len(x_train.columns), \
                                           activation="relu", \
                                           optimizer="adam", \
                                           out_dim=702, \
                                           dropout=0.5)
model.summary()    

In [None]:
# Training
fit = model.fit(x_train, y_train, epochs=25, batch_size=16, verbose=2)

# Predict
y_test_proba = model.predict(x_test)
y_test = numpy.round(y_test_proba).astype(int)

# Combine the data frame of PassengerId and the result
df_output = pandas.concat([y_test_index, pandas.DataFrame(y_test, columns=['Survived'])], axis=1)

# Write result.csv to the current directory
df_output.to_csv('result.csv', index=False)