In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
from keras import layers
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

In [2]:
def od_ordinal(X, object_cols):
    """Takes in X dataframe and a list of column names with object
       data type and returns an ordinal encoded X dataframe."""

    ordinal = OrdinalEncoder()
    X[object_cols] = pd.DataFrame(ordinal.fit_transform(X[object_cols]))
    return X

def od_imputer(*X_tuple, numerical_cols, categorical_cols):
    """Imputes the numerical and categorical columns of X_train and/or X_test before returning the
       said dataframe(s)."""

    imputer_mean = SimpleImputer()
    imputer_mode = SimpleImputer(strategy='most_frequent')

    for x in X_tuple:
        x[numerical_cols] = imputer_mean.fit_transform(x[numerical_cols])
        x[categorical_cols] = imputer_mode.fit_transform(x[categorical_cols])
    
    return X_tuple

In [3]:
def normalize(x):
    return ' '.join([a.strip(',()[].\'"') for a in x.split(' ')])

def ticket_number(x):
    return x.split(' ')[-1]

def ticket_object(x):
    if len(x) == 1:
        return 'NONE'
    return '_'.join(x.split(' ')[:-1])

In [4]:
train_set = pd.read_csv('train.csv')

X = train_set.drop('Survived', axis=1)
y = train_set.Survived
X_test = pd.read_csv('test.csv')

X.Name = X.Name.apply(normalize)
X['Ticket_Number'] = X.Ticket.apply(ticket_number)
X.loc[X['Ticket_Number'] == 'LINE', 'Ticket_Number'] = np.nan
X['Ticket'] = X.Ticket.apply(ticket_object)

X_test.Name = X_test.Name.apply(normalize)
X_test['Ticket_Number'] = X_test.Ticket.apply(ticket_number)
X_test.loc[X_test['Ticket_Number'] == 'LINE', 'Ticket_Number'] = np.nan
X_test['Ticket'] = X_test.Ticket.apply(ticket_object)

object_cols = [col for col in X.columns if X[col].dtype=='object']
numerical_cols = list(set(X.columns) - set(object_cols))

X = od_ordinal(X, object_cols)
X_test = od_ordinal(X_test, object_cols)

In [5]:
X, X_test = od_imputer(X, X_test, numerical_cols=numerical_cols, categorical_cols=object_cols)

In [6]:
model = keras.Sequential([layers.BatchNormalization(input_shape=[12]),
                         layers.Dense(512, activation='relu'),
                         layers.BatchNormalization(),
                          layers.Dropout(rate=0.5),
                         layers.Dense(512, activation='relu'),
                         layers.BatchNormalization(),
                          layers.Dropout(rate=0.5),
                         layers.Dense(512, activation='relu'),
                         layers.BatchNormalization(),
                          layers.Dropout(rate=0.5),
                         layers.Dense(1, activation='sigmoid')])

model.compile(loss=keras.losses.BinaryCrossentropy(),
             optimizer=keras.optimizers.Adam(),
             metrics=['accuracy'])

In [8]:
model.fit(X, y, epochs=70, batch_size=64)

yhat_test = np.where(model.predict(X_test).squeeze() >= 0.5, 1, 0) 

output = pd.DataFrame({'PassengerId': X_test.PassengerId.astype('int32'),
                       'Survived': yhat_test,})

output.to_csv('submission_nn.csv', index=False)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
