In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

print("TensorFlow version:", tf.__version__)

In [None]:
# Read the training and test data sets
data = map(pd.read_csv, ('train.csv', 'test.csv'))

def clean_data(data_set):
    """Cleans a data set."""
    # Drop columns which will not be used
    drop_columns = ['Name', 'Ticket']
    data_set.drop(drop_columns, axis=1, inplace=True)

    # Set 'Cabin' to 1 if the passenger has a cabin, else 0
    data_set['Cabin'] = data_set['Cabin'].notnull().astype(int)

    # Convert 'Pclass' from interval [1,3] to [0,2]
    data_set['Pclass'] -= 1

    # Set string NaN values to '' and numerical to -1
    string_columns = data_set.columns[data_set.dtypes == object]
    data_set[string_columns] = data_set[string_columns].fillna('')
    data_set.fillna(-1, inplace=True)
    return data_set

# Clean the data sets
data = tuple(map(clean_data, data))

# Split training data into training and validation sets
data = *train_test_split(data[0], test_size=0.2), data[1]

def create_dataset(df):
    """Creates a dataset from a dataframe."""
    try:
        labels = df.pop('Survived')
        ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
        ds = ds.shuffle(buffer_size=len(df))
    except KeyError:
        # Test data is not labeled
        ds = tf.data.Dataset.from_tensor_slices(dict(df))
    ds = ds.batch(batch_size=32)
    return ds

# Create datasets from the dataframes
train, val, test = map(create_dataset, data)

In [None]:
# Create feature columns for the model
feature_columns = []

# Add numeric columns
for numeric in ['Age', 'SibSp', 'Parch', 'Fare']:
    feature_columns.append(tf.feature_column.numeric_column(numeric))

# Add one-hot encoded 'Pclass'
pclass = tf.feature_column.categorical_column_with_identity(
         key='Pclass', num_buckets=3)
feature_columns.append(tf.feature_column.indicator_column(pclass))

# Add one-hot encoded 'Sex'
sex = tf.feature_column.categorical_column_with_vocabulary_list(
      key='Sex', vocabulary_list=['male', 'female'])
feature_columns.append(tf.feature_column.indicator_column(sex))

# Add one-hot encoded 'Cabin'
cabin = tf.feature_column.categorical_column_with_identity(
        key='Cabin', num_buckets=2)
feature_columns.append(tf.feature_column.indicator_column(cabin))

# Add one-hot encoded 'Embarked'
embarked = tf.feature_column.categorical_column_with_vocabulary_list(
           key='Embarked', vocabulary_list=['', 'C', 'Q', 'S'])
feature_columns.append(tf.feature_column.indicator_column(embarked))

In [None]:
# Create the model
model = tf.keras.Sequential([
    tf.keras.layers.DenseFeatures(feature_columns),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up early stopping
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.01, patience=5)

# Train the model
model.fit(train, validation_data=val, epochs=100,
          callbacks=[early_stop]);

In [None]:
# Make predictions and format for submission
predictions = np.squeeze(np.round(model.predict(test))).astype(np.uint8)

# Generate submission CSV
submission = pd.DataFrame({
    'PassengerId': data[2]['PassengerId'],
    'Survived': predictions
})
submission.to_csv("submission.csv", index=False)