# Import Libraries

In [1]:
!pip install keras-tuner --upgrade --quiet

[K     |████████████████████████████████| 135 kB 16.3 MB/s 
[K     |████████████████████████████████| 1.6 MB 57.5 MB/s 
[?25h

In [2]:
import numpy as np
import os
import pandas as pd
from numpy.random import seed
seed(42)

import tensorflow
tensorflow.random.set_seed(42)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow import keras 
from tensorflow.keras import regularizers
from keras_tuner import HyperModel, RandomSearch

In [3]:
os.chdir("/content/drive/MyDrive/deep_learning/spaceship_titanic")
%ls

[0m[01;34mdata[0m/  kaggle.json  [01;34mnotebooks[0m/  [01;34mreports[0m/  test.npz


# Load Dataset

In [4]:
data = np.load("data/train.npz")
X = data["x"]
y = data["y"]
X.shape

(8693, 16)

# Split into train and validation

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)

print(f"Amount of Train Samples: {len(X_train)} - Amount of Validation Sample: {len(X_val)}")

Amount of Train Samples: 6519 - Amount of Validation Sample: 2174


# Standardize Dataset

In [6]:
# scaler = MinMaxScaler()
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_val = scaler.transform(X_val) 
# X_train = np.reshape(X_train, newshape=(X_train.shape[0], X_train.shape[1], 1))
# X_val = np.reshape(X_val, newshape=(X_val.shape[0], X_val.shape[1], 1))

# Build FeedFoward Neural Network Model applying Tuner

In [62]:
class BuildHyperModel(HyperModel):
    def __init__(self, n_units_list, n_names_list, shape):
        self.n_units_list = n_units_list
        self.n_names_list = n_names_list
        self.shape = shape

    def build(self, hp):
        input = keras.Input(shape=self.shape)

        for i, (n_units, n_names) in enumerate(zip(self.n_units_list, self.n_names_list)):
            if (i == 0):
                x = keras.layers.Dense(hp.Int(f"units_{i+1}", n_units, n_units*2, default=n_units), 
                                        activation=hp.Choice(
                                            f"dense_activation_{i+1}",
                                            values=["relu", "elu"],
                                            default="relu"), 
                                        name=n_names,
                                        kernel_regularizer=regularizers.L2(
                                            hp.Choice(
                                                f"l2_{i+1}",
                                                values=[1e-3, 1e-4],
                                                default=1e-4
                                            )
                                            ))(input)
                x = keras.layers.BatchNormalization()(x)
                x = keras.layers.Dropout(hp.Float(
                    f"dropout_{i+1}",
                    min_value=0.45,
                    max_value=0.5,
                    default=0.5,
                    step = 0.01
                ))(x)
            else:
                x = keras.layers.Dense(hp.Int(f"units_{i+1}", n_units, n_units*2, default=n_units), 
                                        activation=hp.Choice(
                                            f"dense_activation_{i+1}",
                                            values=["relu", "elu"],
                                            default="relu"), 
                                        name=n_names,
                                        kernel_regularizer=regularizers.L2(
                                            hp.Choice(
                                                f"l2_{i+1}",
                                                values=[1e-3, 1e-4],
                                                default=1e-4
                                            )
                                            ))(x)
                x = keras.layers.BatchNormalization()(x)
                x = keras.layers.Dropout(hp.Float(
                    f"dropout_{i+1}",
                    min_value=0.45,
                    max_value=0.5,
                    default=0.5,
                    step = 0.01
                ))(x)

        n_units = int(self.n_units_list[-1]//2)
        x = keras.layers.Dense(
            hp.Int(
                "units_last_hidden",
                n_units, n_units*2, default=n_units
                ),
            activation=hp.Choice(
                "dense_activation_last_hidden",
                values=["relu", "elu"],
                default="relu"
            ),
            kernel_regularizer=regularizers.L2(
                hp.Choice(
                    "l2_last_hidden",
                    values=[1e-3, 1e-4],
                    default=1e-4
                )
            ),
            name="last_hidden")(x)
        output = keras.layers.Dense(1, activation="sigmoid", name="output")(x)
        model = keras.Model(inputs=input, outputs=output)
        
        lr = hp.Choice(
            "learning_rate",
            values=[1e-3, 1e-4]
        )
        optimizer = keras.optimizers.Adam(learning_rate=lr)
        metrics = [
            keras.metrics.BinaryAccuracy(name="accuracy"),
            keras.metrics.AUC(),
        ]
        model.compile(loss=keras.losses.BinaryCrossentropy(from_logits = True), optimizer=optimizer, metrics=metrics)
        return model

In [63]:
n_units = [16, 32]
n_names = ["hidden_1"]
shape = X_train.shape[1:]
hm = BuildHyperModel(n_units, n_names, shape)

In [64]:
rs_tuner = RandomSearch(
    hm,
    objective="val_accuracy",
    seed=42,
    max_trials=10,
    executions_per_trial=3,
    overwrite=True,
    directory="reports",
    project_name="random_search"
)

In [65]:
rs_tuner.search_space_summary()

Search space summary
Default search space size: 8
units_1 (Int)
{'default': 16, 'conditions': [], 'min_value': 16, 'max_value': 32, 'step': 1, 'sampling': None}
dense_activation_1 (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
l2_1 (Choice)
{'default': 0.0001, 'conditions': [], 'values': [0.001, 0.0001], 'ordered': True}
dropout_1 (Float)
{'default': 0.5, 'conditions': [], 'min_value': 0.45, 'max_value': 0.5, 'step': 0.01, 'sampling': None}
units_last_hidden (Int)
{'default': 16, 'conditions': [], 'min_value': 16, 'max_value': 32, 'step': 1, 'sampling': None}
dense_activation_last_hidden (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'elu'], 'ordered': False}
l2_last_hidden (Choice)
{'default': 0.0001, 'conditions': [], 'values': [0.001, 0.0001], 'ordered': True}
learning_rate (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.001, 0.0001], 'ordered': True}


# Run Random Search tuner

In [66]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_accuracy", mode="max", patience=8),
    keras.callbacks.ReduceLROnPlateau(monitor="val_accuracy", patience=3, mode="max",
                                      )
]

In [67]:
rs_tuner.search(X_train, y_train, epochs=200,
                validation_data=(X_val, y_val), verbose=1, callbacks=callbacks,
                batch_size=64)

Trial 10 Complete [00h 00m 48s]
val_accuracy: 0.7850352724393209

Best val_accuracy So Far: 0.7850352724393209
Total elapsed time: 00h 09m 19s


In [68]:
best_model = rs_tuner.get_best_models(num_models=1)[0]
best_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 16)]              0         
                                                                 
 hidden_1 (Dense)            (None, 20)                340       
                                                                 
 batch_normalization (BatchN  (None, 20)               80        
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 20)                0         
                                                                 
 last_hidden (Dense)         (None, 27)                567       
                                                                 
 output (Dense)              (None, 1)                 28        
                                                             

In [69]:
loss, accuracy, auc = best_model.evaluate(X_val, y_val)



# Save Best Model

In [70]:
best_model.save("reports/final_model.h5")

# Predict Submission Dataset

In [71]:
X_test = np.load("data/test.npz")["x"]

X_test.shape

(4277, 16)

In [72]:
y_pred = best_model.predict(X_test,)



In [73]:
np.unique(y_pred)

array([0.57035404, 0.57091355, 0.5715986 , ..., 0.96966994, 0.96967834,
       0.9696941 ], dtype=float32)

In [74]:
df = pd.read_csv("data/sample_submission.csv")
df.head()

Unnamed: 0,PassengerId,Transported
0,0013_01,False
1,0018_01,False
2,0019_01,False
3,0021_01,False
4,0023_01,False


In [75]:
def get_prediction(test_data):
    y_pred = best_model.predict(test_data)
    y_pred = [True if y > 0.5 else False for y in y_pred]
    df["Transported"] = y_pred

In [76]:
get_prediction(X_test)



In [77]:
np.unique(df["Transported"])

array([ True])

In [78]:
df.to_csv("reports/output.csv", index=False)