In [70]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [72]:
data = pd.read_csv("/kaggle/input/spaceship-titanic/train.csv")
data = data.fillna(0)

data.head()

data.shape

In [73]:
import tensorflow as tf
import matplotlib.pyplot as plt

tf.__version__

In [74]:
categorical_columns = ["HomePlanet", "Destination"]

boolean_columns = ["CryoSleep", "VIP"]

numerical_columns = [ "Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]

all_columns = numerical_columns + boolean_columns + categorical_columns

X = data[all_columns]

Y = data.Transported.astype(int)


In [75]:
def one_hot_encode(data, col):
    one_hot_columns = pd.DataFrame()

    unique = list(data[col].unique())

    for value in unique:
        one_hot_columns[value] = (data[col] == value).astype(int)
    
    return one_hot_columns

def shuffle_data(X, Y):

    indexes = list(X.index)

    np.random.shuffle(indexes)

    shuffled_X = X.loc[indexes]
    shuffled_Y = Y.loc[indexes]

    return shuffled_X, shuffled_Y

def train_dev_split(X, Y):
    m = X.shape[0]

    train_size = np.floor(m * 0.7).astype(int)
    dev_size = np.floor(m*0.3).astype(int)

    X_train = X.iloc[:train_size]
    Y_train = Y.iloc[:train_size]

    X_dev = X.iloc[train_size:dev_size+train_size]
    Y_dev = Y.iloc[train_size:dev_size+train_size]

    return X_train, Y_train, X_dev, Y_dev

def normalize_input(X, epsilon=1e-8):

    mean = np.mean(X)
    variance = np.var(X)

    return (X-mean) / np.sqrt(variance + epsilon)
    # return X / np.max(X)


In [76]:
def preprocess(X, Y):
    X = X[all_columns]
    for col in categorical_columns:
        one_hot = one_hot_encode(X, col)

        X = pd.concat([X, one_hot], axis=1)
    
    X = X.drop(categorical_columns, axis=1)

    X[boolean_columns] = X[boolean_columns].astype(int)
    
    # normalising numerical columns
    X[numerical_columns] = normalize_input(X[numerical_columns])

    if Y is None:
        return X.values.T

    return X.values.T, Y.values.reshape(1, -1)

In [81]:
X, Y = shuffle_data(X, Y)
X_train, Y_train, X_dev, Y_dev = train_dev_split(X, Y)

print(X_train.head())
X_train, Y_train = preprocess(X_train, Y_train)
X_dev, Y_dev = preprocess(X_dev, Y_dev)

In [82]:
X_train.shape

In [83]:
def gen_random_parameters(n_x, layers):
    """
        Generate Random Parameters
    """

    parameters = list()

    L = len(layers)
    
    initializer = tf.initializers.GlorotNormal()

    for l, layer in enumerate(layers):

        n_a_prev = n_x if l == 0 else layers[l-1][0]

        n_l, activation = layer

        shape = (n_l, n_a_prev)
        
        if activation == 'dropout':
            W = tf.Variable(np.random.randn(*shape) // 0.6, name=f"dropout{l+1}", dtype=tf.float32)
            b = tf.zeros(shape=(n_l, 1))
            b = tf.cast(b, tf.float32)
        else:

            W = tf.Variable(initializer(shape=shape) * .01, name=f'W{l+1}', dtype=tf.float32)

            b = tf.Variable(initializer(shape=(n_l, 1)), name=f'b{l+1}', dtype=tf.float32)

        parameters.append({'W': W, 'b': b, 'activation': activation})

    return parameters

def forward_propagation_step(X, W, b, activation):
    """
        Single Forward prop from A[l-1] across W[l]
    """
    Z = tf.matmul(W, X) + b
    
    if activation == 'sigmoid':
        A = tf.sigmoid(Z)
    elif activation == 'tanh':
        A = tf.tanh(Z)
    elif activation == 'softmax':
        A = tf.softmax(Z)
    else:
        A = tf.nn.relu(Z)

    return tf.cast(A, tf.float32)

def forward_propagation(X, parameters):
    """
        Full Forward propagation
    """
    A_prev = tf.cast(X, tf.float32)

    for param in parameters:

        A_prev = forward_propagation_step(A_prev, **param)

    return A_prev


def compute_cost(A, Y):
    Y = tf.cast(Y, tf.float32)
    # logprob = (tf.multiply(Y, tf.math.log(A)) + tf.multiply(1-Y, tf.math.log(1-A))) * -1
    
    loss = tf.keras.losses.binary_crossentropy(Y, A)

    return loss


In [84]:

def run_epoch(X, Y, layers, parameters, optimizer, learning_rate=.01):

    with tf.GradientTape() as tape:
        # Forward Propagation
        A = forward_propagation(X, parameters)

        cost = compute_cost(A, Y)


    weights = [(param['W'])  for param in parameters if param['activation'] != 'dropout']
    biases = [(param['b'])  for param in parameters if param['activation'] != 'dropout']

    trainable_variables = weights + biases

    # Back Propagation
    grads = tape.gradient(cost, trainable_variables)

    # Gradient Descent step
    optimizer.apply_gradients(zip(grads, trainable_variables))


    return cost

In [85]:
def train(X, Y, batch_size=5000, learning_rate=.1, print_cost=True):
    layers = [
        (16, 'relu'),
        (16, 'relu'),
        (8, 'relu'),
        (8, 'relu'),
        (16, 'relu'),
        (1, 'sigmoid') # Output layer with sigmoid activation function
    ]

    parameters = gen_random_parameters(X.shape[0], layers)

    X = tf.constant(X)
    Y = tf.constant(Y)

    # optimizer = tf.optimizers.Adam(learning_rate)
    optimizer = tf.optimizers.Adam(learning_rate)

    costs = []
    accuracy = tf.keras.metrics.Accuracy()

    for i in range(batch_size+1):

        cost = run_epoch(X, Y, layers, parameters, optimizer, learning_rate)
        costs.append(cost)


        if i % 1000 == 0:
            if print_cost:
                print(f"After {i}th epoch, cost is: {cost}")
            # learning_rate = decay_alpha(learning_rate, i)


    accuracy.update_state(predict(X, parameters), Y)
    print(f"Accuracy is {accuracy.result().numpy()}%")
    
    return parameters, costs


def decay_alpha(learning_rate, epoch_number, decay_rate=1e-8):

    learning_rate = (1 / (1 + (decay_rate * epoch_number))) * learning_rate

    return learning_rate

def predict(X, parameters):
    A = forward_propagation(X, parameters)

    return A // 0.5

def test_accuracy(X, Y, parameters):
    accuracy = tf.keras.metrics.Accuracy()

    accuracy.update_state(predict(X, parameters), Y)
    
    return accuracy.result().numpy()


In [87]:
learning_rate = 0.1
parameters, costs = train(X_train, Y_train, learning_rate=learning_rate)

In [89]:
plt.plot(list(range(0, 5000)), costs[0:5000])

In [None]:
(10**np.abs(-np.random.randn(4)))

In [90]:
test_accuracy(X_dev, Y_dev, parameters)

In [91]:
test_data = pd.read_csv("/kaggle/input/spaceship-titanic/test.csv").fillna(0)

test_data.head()


In [92]:
X_train.shape

In [93]:
X_test = preprocess(test_data, None)

In [94]:
X_test.shape
parameters[0]['W'].shape

In [95]:
prediction = predict(X_test, parameters)

In [109]:
np.squeeze(prediction.numpy().astype(bool))

In [112]:
submission = pd.DataFrame({
    'PassengerId': test_data.PassengerId, 
    'Transported': np.squeeze(prediction.numpy().astype(bool))
})

In [116]:
out = submission.to_csv(index=False)

In [117]:
with open("/kaggle/working/output.csv", "w") as f:
    f.write(out)