In [83]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from gc_utils import dictionary_to_vector, vector_to_dictionary, gradients_to_vector

# Deep Neural Network

In [88]:
class DeepNeuralNetwork:
    
    def __init__(self, layer_dims, type='binary', h_func_type='relu', random_state=-1):
        
        self.layer_dims = layer_dims
        self.type = type
        self.h_func_type = h_func_type
        self.random_state = random_state
        self.functions = {'sigmoid': self.sigmoid,
                          'relu': self.relu,
                          'tanh': np.tanh, 
                          'binary': self.sigmoid, 
                          'multi': self.softmax}
        self.derivative = {'sigmoid': self.sigmoidDerivative, 
                           'relu': self.reluDerivative, 
                           'tanh': self.tanhDerivative}
        
    def parametersInitialization(self):
    
        if self.random_state != -1:
                np.random.seed(self.random_state)

        self.parameters = {}
        self.L = len(self.layer_dims)

        for l in range(1, self.L):

            self.parameters['W' + str(l)] = np.random.randn(self.layer_dims[l], self.layer_dims[l-1]) * 0.01
            self.parameters['b' + str(l)] = np.zeros((self.layer_dims[l], 1))

    def linear(self, A, W, b):
    
        return np.dot(W, A) + b
    
    def relu(self, Z):
    
        return np.maximum(0, Z)
    
    def sigmoid(self, Z):
        
        return (1 / (1 + np.exp(-Z)))
    
    def softmax(self, Z):
        
        return (np.exp(Z) / np.sum(np.exp(Z), axis=0))
    
    def sigmoidDerivative(self, Z):
        
        A = self.sigmoid(Z)
        
        return (A * (1 - A))
    
    def reluDerivative(self, Z):
        
        A = np.where(Z >= 0., 1., 0.)
            
        return A
    
    def tanhDerivative(self, Z):
        
        A = np.tanh(Z)
        
        return (1 - A**2)
    
    def forward(self, X):
        
        self.cache = {'A0': X}
        
        for l in range(1, self.L-1):

            Z = self.linear(self.cache['A' + str(l-1)], self.parameters['W' + str(l)], 
                            self.parameters['b' + str(l)])
            A = self.functions[self.h_func_type](Z)
            self.cache['Z' + str(l)] = Z
            self.cache['A' + str(l)] = A

        ZL = self.linear(self.cache['A' + str((self.L-1)-1)], self.parameters['W' + str(self.L-1)], 
                         self.parameters['b' + str(self.L-1)])
        AL = self.functions[self.type](ZL)
        self.cache['Z' + str(self.L-1)] = ZL
        self.cache['A' + str(self.L-1)] = AL

        return AL
    
    def binary_crossEntropy(self, AL, Y):
        
        loss = - (Y * np.log(AL) + (1 - Y) * np.log(1 - AL))
        
        cost = np.sum(loss) / self.m

        return cost
    
    def crossEntropy(self, AL, Y):
        
        loss =  - (np.sum(np.log(AL) * (Y), axis=0))
        
        cost = np.sum(loss) / self.m
        
        return cost
    
    def backward(self, AL, Y):
    
        self.grads = {}
        
        dZ = AL - Y
        
        self.grads['dW' + str(self.L-1)] = np.dot(dZ, self.cache['A' + str((self.L-1)-1)].T) / self.m
        self.grads['db' + str(self.L-1)] = np.sum(dZ, axis=1, keepdims=True) / self.m
        
        for l in reversed(range(1, self.L-1)):
            
            dZ = (np.dot(self.parameters['W' + str(l+1)].T, dZ)) * (self.derivative[self.h_func_type](
                self.cache['Z' + str(l)]))

            self.grads['dW' + str(l)] = np.dot(dZ, self.cache['A' + str(l-1)].T) / self.m
            self.grads['db' + str(l)] = np.sum(dZ, axis=1, keepdims=True) / self.m
            
            
    def updateparameters(self, alpha):
    
        for l in range(1, self.L):

            self.parameters['W' + str(l)] = self.parameters['W' + str(l)] - alpha * self.grads['dW' + str(l)]
            self.parameters['b' + str(l)] = self.parameters['b' + str(l)] - alpha * self.grads['db' + str(l)]
            
    def train(self, X, Y, alpha, epochs, print_cost=False):
        
        self.layer_dims.insert(0, X.shape[0])
        self.m = X.shape[1]
        
        self.parametersInitialization()
        
        for i in range(epochs):
        
            AL = self.forward(X)

            if self.type == 'multi':
                cost = self.crossEntropy(AL, Y)
            else:
                cost = self.binary_crossEntropy(AL, Y)

            self.backward(AL, Y)

            self.updateparameters(alpha)

            if print_cost and i % (epochs // 10) == 0:
                    print ("Cost after iteration %i : %f" %(i, cost))
                    
        return self.parameters
        
    def predict(self, X):
    
        AL = self.forward(X)
        
        if self.type == 'multi':
            Y_pred = AL.argmax(axis=0)
        else:
            Y_pred = np.where(AL > 0.5, 1., 0.)

        return Y_pred

# Load Dataset

In [89]:
def loadIrisBinary(path, size=0.2, random_state=0):
    
    df = pd.read_csv(path)
    df = df.sample(frac=1, random_state=random_state)
    df.Species.replace(('Iris-setosa', 'Iris-versicolor'), (0., 1.), inplace=True)
    
    X_train, X_val, Y_train, Y_val = train_test_split(df.drop(['Species'], axis=1), 
                                                      df.Species, 
                                                      test_size=size, 
                                                      random_state=random_state)
    X_train, X_val = X_train.values.T, X_val.values.T
    Y_train, Y_val = Y_train.values.reshape(1, -1), Y_val.values.reshape(1, -1)
    
    return X_train, Y_train, X_val, Y_val

In [90]:
def loadIrisMulti(path, size=0.2, random_state=0):

    df = pd.read_csv(path)
    df.Species.replace(('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'), (0, 1, 2), inplace=True)
    df = df.sample(frac=1, random_state=random_state)

    X_train, X_val, Y_train, Y_val = train_test_split(df.drop(['Species'], axis=1), 
                                                          df.Species, 
                                                          test_size=size, 
                                                          random_state=random_state)
    
    X_train, X_val = X_train.values.T, X_val.values.T
    Y_train, Y_val = Y_train.values, Y_val.values
    Y_train = ((np.arange(np.max(Y_train) + 1) == Y_train[:, None]).astype(float)).T
    
    return X_train, Y_train, X_val, Y_val

# Training

### Iris Binary

In [92]:
X_train, Y_train, X_val, Y_val = loadIrisBinary('data/Iris_binary.csv', size=0.2)
layer_dims = [5,  1]
alpha = 0.1
epochs = 100
model = DeepNeuralNetwork(layer_dims, h_func_type='relu', random_state=0)
parameters = model.train(X_train, Y_train, alpha, epochs, print_cost=True)
Y_pred = model.predict(X_val)
print(classification_report(Y_pred.flatten(), Y_val.flatten()))

Cost after iteration 0 : 0.693380
Cost after iteration 10 : 0.692099
Cost after iteration 20 : 0.688690
Cost after iteration 30 : 0.676167
Cost after iteration 40 : 0.636707
Cost after iteration 50 : 0.541084
Cost after iteration 60 : 0.412634
Cost after iteration 70 : 0.281197
Cost after iteration 80 : 0.169535
Cost after iteration 90 : 0.105444
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         9
         1.0       1.00      1.00      1.00        11

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



### Iris Multiclass

In [95]:
X_train, Y_train, X_val, Y_val = loadIrisMulti('data/Iris.csv')
layer_dims = [5, 3]
alpha = 0.1
epochs = 100
model = DeepNeuralNetwork(layer_dims, type='multi', h_func_type='relu', random_state=0)
parameters = model.train(X_train, Y_train, alpha, epochs, print_cost=True)
Y_pred = model.predict(X_val)
print(classification_report(Y_pred.flatten(), Y_val.flatten()))

Cost after iteration 0 : 1.098396
Cost after iteration 10 : 1.094394
Cost after iteration 20 : 1.074003
Cost after iteration 30 : 1.003746
Cost after iteration 40 : 0.865423
Cost after iteration 50 : 0.715127
Cost after iteration 60 : 0.566849
Cost after iteration 70 : 0.479411
Cost after iteration 80 : 0.421535
Cost after iteration 90 : 0.375075
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.88      1.00      0.93         7
           2       1.00      0.92      0.96        13

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

