<a href="https://colab.research.google.com/github/stanislavlia/DeepLearning.AI-specialization/blob/main/DeepNN_on_Titanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


**Preprocessing of Titanic Dataset**

In [58]:
df = pd.read_csv("sample_data/train.csv")
#some data definetly useless for predicion such as ticket number, name
df = df.drop(["Ticket", "PassengerId", "Name", "Cabin"], axis=1)

df["Sex"] = df["Sex"].replace({"male" : 0, "female" : 1})

df["Embarked"] = df["Embarked"].replace({"S" : 0, "C" : 1, "Q": 2})

In [59]:
df["Age"] = df["Age"].fillna(df["Age"].mean())
df.dropna(inplace=True)


X = df.drop("Survived", axis=1).values
y = df["Survived"].values

In [60]:
#Scaling Features

std_scaler = StandardScaler()
X_scaled = std_scaler.fit_transform(X)

#Splitting into train/test set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=1)

In [61]:
#transforming for shape of our implementation

X_train = X_train.T
X_test = X_test.T

y_train = y_train.reshape(-1, 1).T
y_test = y_test.reshape(-1, 1).T

In [62]:
#My implementation

class DeepNN():
    def __init__(self, X, Y, dims_of_layers, activations, alpha = 0.01):
        #dims_of_layers - list of number of units in each layer (first element - num of features in input)
        #activations - activation function applied to each layer

        #dims_of_layers[0] - n of features in input
        #activations[0] - activation for first hidden layer
        #we support only 3 activation funcs: linear, sigmoid, relu

        #X.shape should be (n_features, m_examples)
        #Y.shape should be (1, m_examples)
        self.X = X
        self.Y = Y

        self.m_examples = X.shape[1]

        self.dims_of_layers = dims_of_layers
        self.n_layers = len(activations)

        self.activations = activations
        self.params = dict()

        self.learning_history = []
        self.alpha = alpha

        #setting cache dicts for backpropogation

        self.cache = dict()

    def initialize_params(self):

        for i in range(1, len(self.activations) + 1):

            #setting parameters layer by layer
            self.params["W" + str(i)] = np.random.randn(self.dims_of_layers[i], self.dims_of_layers[i-1])
            self.params["b" + str(i)] = np.zeros((self.dims_of_layers[i], 1))



    def activation(self, Z, function="linear"):
        if function == "linear":
            return Z

        if function == "sigmoid":
            return 1 / (1 + np.exp(-Z))

        if function == "relu":
            return Z * (Z > 0)


    def deriv_activation(self, Z, function):
        if function == "linear":
            return 1.

        if function == "sigmoid":
            sigm_z = self.activation(Z, "sigmoid")

            return sigm_z *(1 - sigm_z)

        if function == "relu":
            return 1. * (Z > 0)



    def forward_propogation(self, X):

        #X.shape = (n_features, m_examples)

        A_prev = X
        self.cache["A0"]  = A_prev

        Z_current = np.dot(self.params["W1"], A_prev) + self.params["b1"]
        A_current = self.activation(Z_current, function=self.activations[0])

        self.cache["Z1"] = Z_current
        self.cache["A1"] = A_current

        for i in range(1, len(self.activations)):
            A_prev = A_current

            #A_prev - cache["A" + str(i)]
            Z_current = np.dot(self.params["W" + str(i+1)], A_prev) + self.params["b" + str(i+1)]
            A_current = self.activation(Z_current, function=self.activations[i])

            #keeping values in cache for backprop
            self.cache["Z" + str(i+1)] = Z_current
            self.cache["A" + str(i+1)] = A_current


        predictions = A_current

        return predictions

    def compute_cost(self, predictions, cost_function="cross_entropy"):
        #leave cost func as a parameter
        #so that we can use it futher not only for classification




        if cost_function == "cross_entropy":

            #lets cut off a  tiny constant to avoid log0 problem
            epsilon = 10 ** -15

            predictions = np.clip(predictions, epsilon, 1-epsilon)

            #BinaryCrossEntropy

            cost = (self.Y * np.log(predictions) +
                    (1 - self.Y) * np.log(1 - (predictions)) ) * (-1 / self.m_examples)

            cost = np.sum(cost, axis=1, keepdims=True) #sum up the columns

        #we will have more cost functions here later...

        return cost

    def deriv_of_cost(self, predictions, cost_function="cross_entropy"):

        if cost_function == "cross_entropy":

            #avoiding division by zero
            epsilon = 10 ** -15
            predictions = np.clip(predictions, epsilon, 1-epsilon)

            dAL = (predictions - self.Y) / (predictions * (1 - predictions))

        #we will have more cost functions here later...

        return dAL


    def back_propogation(self, predictions, cost_func="cross_entropy"):

        L = self.n_layers

        grads_cache = dict()

        for i in range(L, 0, -1):
            if i == L:
                dA_i = self.deriv_of_cost(predictions, cost_function=cost_func)
            else:

                dA_i = np.dot(self.params["W" + str(i+1)].T, grads_cache["dZ" + str(i+1)]) #ERROR MuSt be here
                #print("i =", i)
                # print("W=", self.params["W" + str(i+1)] )
                # print("dZ_next =", grads_cache["dZ" + str(i+1)])

                # print("W = ", self.params["W" + str(i+1)].T.shape)
                # print("W shape ", )
                # print("dZ shape" + str(i),  grads_cache["dZ" + str(i+1)].shape)
                #print("dA", dA_i)


            grads_cache["dA" + str(i)] = dA_i
            activation_i = self.activations[i-1]

            Z_i = self.cache["Z" + str(i)]
            A_prev = self.cache["A" + str(i-1)]
            W_i = self.params["W" + str(i)]

            #print("Activation = ", activation_i)
            dZ_i = dA_i * self.deriv_activation(Z_i, activation_i)

            #print("Sum of dZ_i", np.sum(dZ_i))

            #computing derivs for W, b
            dW_i = (1 / self.m_examples) * np.dot(dZ_i, A_prev.T)
            db_i = (1/ self.m_examples) * np.sum(dZ_i, axis=1, keepdims=True)


            #storing gradients
            grads_cache["dZ" + str(i)] = dZ_i
            grads_cache["dW" + str(i)] = dW_i
            grads_cache["db" + str(i)] = db_i

        return grads_cache

    def update_params(self, grads):

        for i in range(1, self.n_layers + 1):

            #updating by Gradient Descent
            self.params["W" + str(i)] -=  self.alpha * grads["dW" + str(i)]
            self.params["b" + str(i)] -= self.alpha * grads["db" + str(i)]


    def fit(self, epochs=100, cost_func="cross_entropy", debug=False):

        #fitting process

        #initialize random params

        history = []
        self.initialize_params()


        for epoch in range(0, epochs + 1):

            predictions = self.forward_propogation(self.X)

            #computing cost function
            cost = np.round(self.compute_cost(predictions, cost_func), 6)
            history.append(cost)


            if (epoch % max(1 , (epochs // 20)) == 0):
              print("Epoch #{},  {} == {}".format(epoch, cost_func, cost))

            #computing gradients
            grads = self.back_propogation(predictions, cost_func="cross_entropy")
            if debug == True:
              print(grads)

            #update params using Gradient Descent
            self.update_params(grads)

        self.history = history

**Building structure for Deep Neural Network**

In [63]:

n_units = [X_train.shape[0], 10, 5, 1]
activations = ["relu", "relu", "sigmoid"]



In [67]:
deepnn = DeepNN(X_train, y_train, n_units, activations, alpha = 0.01)
deepnn.fit(15000)

Epoch #0,  cross_entropy == [[9.272832]]
Epoch #750,  cross_entropy == [[0.551106]]
Epoch #1500,  cross_entropy == [[0.51706]]
Epoch #2250,  cross_entropy == [[0.501535]]
Epoch #3000,  cross_entropy == [[0.460147]]
Epoch #3750,  cross_entropy == [[0.438692]]
Epoch #4500,  cross_entropy == [[0.428698]]
Epoch #5250,  cross_entropy == [[0.421387]]
Epoch #6000,  cross_entropy == [[0.41542]]
Epoch #6750,  cross_entropy == [[0.410508]]
Epoch #7500,  cross_entropy == [[0.406717]]
Epoch #8250,  cross_entropy == [[0.403265]]
Epoch #9000,  cross_entropy == [[0.400219]]
Epoch #9750,  cross_entropy == [[0.397599]]
Epoch #10500,  cross_entropy == [[0.395319]]
Epoch #11250,  cross_entropy == [[0.39326]]
Epoch #12000,  cross_entropy == [[0.391495]]
Epoch #12750,  cross_entropy == [[0.389836]]
Epoch #13500,  cross_entropy == [[0.388119]]
Epoch #14250,  cross_entropy == [[0.386261]]
Epoch #15000,  cross_entropy == [[0.384499]]


In [68]:
predictions = np.round(deepnn.forward_propogation(X_test)).T

test_accuracy = accuracy_score(y_test.T, predictions)
test_accuracy

0.8071748878923767