In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# read data into file
training_data = pd.read_csv('/kaggle/input/titanic/train.csv')

test_data = pd.read_csv('/kaggle/input/titanic/test.csv')

print(training_data.iloc[0])
print(len(training_data))
print(len(test_data))
print(len(training_data) + len(test_data))

In [None]:
# combine train and test into one table to preprocess

training_data['is_training_data'] = 1
test_data['is_training_data'] = 0

all_data = training_data.append(test_data)
print(len(all_data))

In [None]:
# preprocess

# one-hot encode embarked
embarked_df = pd.DataFrame(all_data, columns=['Embarked'])
dum_df = pd.get_dummies(embarked_df, columns=['Embarked'])
all_data = pd.concat([all_data, dum_df], axis=1)
all_data.head()

In [None]:
# make sex a binary variable

all_data['Sex'] = all_data['Sex'].astype('category')
all_data['Sex'] = all_data['Sex'].cat.codes
all_data.head()

In [None]:
# handle NaN age
all_data['Age'].fillna(value=training_data['Age'].mean(), inplace=True)

In [None]:
training_data_processed = all_data[all_data['is_training_data'] == 1].copy()
final_test_data_processed = all_data[all_data['is_training_data'] == 0].copy()

# drop unneeded columns
dropped_columns = ['is_training_data', 'PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked']
training_data_processed.drop(dropped_columns, axis=1, inplace=True)
final_test_data_processed.drop(dropped_columns, axis=1, inplace=True)

training_data_processed.reindex()
final_test_data_processed.reindex()
training_data_processed.head()

In [None]:
from sklearn.model_selection import train_test_split

XY_train, XY_test = train_test_split(training_data_processed, test_size = 0.20)

def split_XY(XY_dataset):    
    return XY_dataset.copy().drop('Survived', axis=1), pd.DataFrame(XY_dataset['Survived'])

X_train, Y_train = split_XY(XY_train)
X_test, Y_test = split_XY(XY_test)

In [None]:
X_train.head()

In [None]:
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

def build_model(hp):
    hp_units = hp.Int('units', min_value=5, max_value=30, step=5)
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hp_units, activation='relu'),
        tf.keras.layers.Dense(hp_units, activation='relu'),
        tf.keras.layers.Dense(hp_units, activation='relu'),
        tf.keras.layers.Dense(hp_units, activation='relu'),
        tf.keras.layers.Dense(hp_units, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5])

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.BinaryCrossentropy(),
                  metrics=['accuracy']
                 )

    return model

In [None]:
print("Searching")
tuner = kt.BayesianOptimization(build_model,
                     objective='val_accuracy',
                     max_trials=10,
                     overwrite=True
                    )

tuner.search(X_train, Y_train, epochs=500, batch_size=32, validation_split=0.2)

In [None]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(best_hps.get('units'))
print(best_hps.get('learning_rate'))

In [None]:
model = tuner.hypermodel.build(best_hps)
model.fit(X_train, Y_train, epochs=500, validation_split=0.2)

In [None]:
result = model.evaluate(X_test, Y_test)
result