In [None]:
# Import relevant packages. 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# ****Deep Neural Network from Scratch****

This notebook shows an implementation of a Deep Neural Network from scratch using only Numpy. It is largely based on the Deep Learning Specialization given in Coursera, especially regarding the notation and matrix definition

## DNN Construction

In this section the auxiliar functions as well as the main DNN class are constructed.

In [None]:
def relu(Z):
    A = (0 < Z) * Z
    return A

def sigmoid(Z):
    A = 1/(1+np.exp(-Z)+1e-8)
    return A

def softmax(Z):
    A = np.exp(Z)/np.sum(np.exp(Z),axis=0,keepdims=True)
    return A
    
def one_hot_encoding(Y,n_classes=-1):    
    classes = np.reshape(np.unique(Y),(-1,1))
    Y_enc = np.equal(classes,Y)*1
    return Y_enc

def one_hot_decoding(Y):    
    return np.argmax(Y,axis=0).reshape((1,-1))
    

In [None]:
def nn_layer_forward(X, W, b, activation = 'relu'):
    
    if activation == 'relu':
        Z = np.dot(W,X) + b
        A = relu(Z)
    elif activation == 'sigmoid':
        Z = np.dot(W,X) + b
        A = sigmoid(Z) 
    elif activation == 'softmax':
        Z = np.dot(W,X) + b
        A = softmax(Z) 
    else:
        Z = np.dot(W,X) + b
        A = sigmoid(Z)   
        
    return Z,A  


def nn_layer_backward(W, b, Z, A_prev, A, dA, activation = 'relu'):
    
    m = Z.shape[0]
    
    if activation == 'relu':
        gp = (0 < Z)*1
        dZ = dA*gp
    elif activation == 'sigmoid':   
        gp = A * (1 - A)
        dZ = dA*gp
    elif activation == 'softmax':
        dZ = A*(1-dA)
            
    dW = np.dot(dZ,A_prev.T) / m
    db = np.sum(dZ,axis=1,keepdims=True)/m                # Dont forget axis=1, keepdims=True
    dA_prev = np.dot(W.T,dZ)            # The multiplication here are a dot product.
    return dA_prev, dW, db

In [None]:
def random_index_mini_batches(m,mini_batch_size = 64, seed = 0):

    random_indx = np.random.permutation(m)
    
    n_mini_batches = np.floor(m/mini_batch_size)
    indx_list = []
    ini = 0
    for i in range(int(n_mini_batches)):
        indx_list.append(random_indx[ini:ini+mini_batch_size])
        ini = ini + mini_batch_size
    
    if np.floor(m/mini_batch_size) < m/mini_batch_size:
        indx_list.append(random_indx[ini:])
    
    return indx_list

In [None]:
class DNN:


    def __init__(self, layers_dim,activations):       
        self.layers_dim = list(layers_dim)
        self.activations = list(activations)        
        self.n_layers = len(self.layers_dim)-1        
        self.Ws = [None]*(self.n_layers+1) # include layer 0
        self.bs = [None]*(self.n_layers+1) # include layer 0
        self.Zs = [None]*(self.n_layers+1) # include layer 0
        self.As = [None]*(self.n_layers+1) # include layer 0
        self.dAs = [None]*(self.n_layers+1) # include layer 0
        self.dWs = [None]*(self.n_layers+1) # include layer 0
        self.dbs = [None]*(self.n_layers+1) # include layer 0
        
    def initialize_parameters(self):
        
        for l in range(1,self.n_layers+1):            
            self.Ws[l] = np.random.rand(self.layers_dim[l],self.layers_dim[l-1])*0.01
            self.bs[l] = np.zeros((self.layers_dim[l],1))

    def forward_prop(self, X):        
        self.As[0] = X
        for l in range(1,self.n_layers+1):
            self.Zs[l],self.As[l] = nn_layer_forward(self.As[l-1], self.Ws[l], self.bs[l],self.activations[l])
        
        AL = self.As[self.n_layers]        
        return AL
    
    def calculate_cost(self,Y):
        m = Y.shape[0]
        AL = self.As[self.n_layers]
        
        if self.activations[self.n_layers] == 'softmax':
            L = -np.sum(np.sum(Y*np.log(AL+1e-8),axis=0,keepdims=True))/m
            dAL = Y/(AL+1e-8)
        else:
            L = - np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL))/m
            dAL = (-(Y/AL) + ((1-Y)/(1-AL)))  
            
        self.dAs[self.n_layers] = dAL
        
        return L
    
    def back_prop(self):
        for l in reversed(range(1,self.n_layers+1)):
            self.dAs[l-1], self.dWs[l], self.dbs[l] = nn_layer_backward(self.Ws[l], self.bs[l], self.Zs[l], self.As[l-1], self.As[l], self.dAs[l],  self.activations[l])
            
    
    def update_parameters(self, learning_rate = 0.001):
        for l in range(1,self.n_layers+1):
            self.Ws[l] = self.Ws[l] - learning_rate*self.dWs[l]
            self.bs[l] = self.bs[l] - learning_rate*self.dbs[l]
            
    def train(self,X,Y,n_epochs=10000,learning_rate=0.0001,mini_batch_size = -1,verbose=1):
        
        if mini_batch_size == -1:
            mini_batch_size = X.shape[1]
        m = X.shape[1]  
        
        for epoch in range(n_epochs):   
            cost_total = 0
            indx_list = random_index_mini_batches(m,mini_batch_size=mini_batch_size)
            for i_mini_batch in range(0,len(indx_list)):                
                X_minibatch = X[:,indx_list[i_mini_batch]]
                Y_minibatch = Y[:,indx_list[i_mini_batch]]
                dnn.forward_prop(X_minibatch)
                L = dnn.calculate_cost(Y_minibatch)
                dnn.back_prop()
                dnn.update_parameters(learning_rate)
                cost_total += L
            if epoch%np.round(n_epochs*0.1) == 0 and verbose:
                print('Epoch:'+ str(epoch),'-','Cost: ' + str(cost_total))

        
        

## Test on Sythetic data

In this section 2 tests on synthetic data are presented.

### Test Case 1: Binary Classification

In [None]:
m = 1000
X = np.array(np.random.rand(2,m)-0.5)*100
r = np.sqrt(np.square(X[0,:])+np.square(X[1,:]))
Y = (r < 30)*1.0
Y = np.reshape(20*np.sin(X[0,:]/10) > X[1,:],(1,-1))

plt.scatter(X[0,:],X[1,:],c=Y)
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Train Data')

In [None]:
layers_dim = (X.shape[0],30,10,Y.shape[0])
activations = (None,'relu','relu','sigmoid')
dnn = DNN(layers_dim,activations)
dnn.initialize_parameters()
dnn.train(X,Y,n_epochs=100000)

In [None]:
X_test = np.array(np.random.rand(2,m)-0.5)*100  
Yhat = np.round(dnn.forward_prop(X_test))
plt.scatter(X_test[0,:],X_test[1,:],c=Yhat)

plt.xlabel('X')
plt.ylabel('Estimated Y')
plt.title('Result on Test Data')

### Test Case 2: Three Classes Classification

In [None]:
m = 1000
X = np.array(np.random.rand(2,m)-0.5)*100
r = np.sqrt(np.square(X[0,:])+np.square(X[1,:]))
Y = np.reshape(20*np.sin(X[0,:]/10) > X[1,:],(1,-1))
Y = Y*1
Y[0,r < 20] = 2

plt.scatter(X[0,:],X[1,:],c=Y)
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Train Data')

Y = one_hot_encoding(Y)


In [None]:
layers_dim = (X.shape[0],20,5,Y.shape[0])
activations = (None,'relu','relu','softmax')
dnn = DNN(layers_dim,activations)
dnn.initialize_parameters()
dnn.train(X,Y,n_epochs=100000)



In [None]:
X_test = np.array(np.random.rand(2,m)-0.5)*100  
Yhat = np.round(dnn.forward_prop(X_test))
Yhat = one_hot_decoding(Yhat)
plt.scatter(X_test[0,:],X_test[1,:],c=Yhat)
plt.xlabel('X')
plt.ylabel('Estimated Y')
plt.title('Result on Test Data')

print(dnn.calculate_cost(Yhat))



### Test Case 3: MNIST

In [None]:
train_data = pd.read_csv('../input/mnist-in-csv/mnist_train.csv')
np_data = np.array(train_data)
X_train = np_data[:,1:].T
X_train = X_train/255
Y_train = np.reshape(np_data[:,0],(1,-1))
Y_train = one_hot_encoding(Y_train)

print(X_train.shape,Y_train.shape)
print(np.max(X_train))

In [None]:
layers_dim = (X_train.shape[0],32,Y_train.shape[0])
activations = (None,'relu','softmax')
dnn = DNN(layers_dim,activations)
dnn.initialize_parameters()
dnn.train(X_train,Y_train,n_epochs=100,learning_rate=0.01,mini_batch_size=32)

In [None]:
test_data = pd.read_csv('../input/mnist-in-csv/mnist_test.csv')
np_data = np.array(test_data)
X_test = np_data[:,1:].T
X_test = X_test/255
Y_test = np.reshape(np_data[:,0],(1,-1))

Yhat = dnn.forward_prop(X_test)
Yhat = one_hot_decoding(Yhat)

precision = np.sum(Yhat == Y_test)/Y_test.shape[1]
print(np.sum(Yhat == Y_test),Y_test.shape[1])
print(Yhat.shape,Y_test.shape)
print('Precision: ',precision)

In [None]:
indx = int(np.round(np.random.rand()*X_test.shape[1]))

vect_img = np.reshape(X_test[:,indx],(-1,1))
dim = (int(np.sqrt(vect_img.shape[0])),int(np.sqrt(vect_img.shape[0])))
img = np.reshape(vect_img,dim)

plt.imshow(img,cmap='Greys')
Yhat = dnn.forward_prop(vect_img)
Yhat = one_hot_decoding(Yhat)
print('Predicted: ',Yhat[0])
print('True Label: ', Y_test[:,indx])