In [1]:
import numpy as np
import math

In [2]:
### Deep Neural Netwrok ###

def deep_nn_train(X, y, n_units = [3,2,1], alpha = 0.1, max_iterations = 1000):
    
    # Extract the dimension
    (n0, m) = X.shape

    # Set the number of layers and number of units in each layer
    L = len(n_units) # L = number of hidden layers + 1
    n = [n0] + n_units # [Input units, Hidden units, Output units]

    
    # Initialize parameters
    w = list(np.zeros(L+1))
    b = list(np.zeros(L+1))
    for i in range(1, L+1):
        w[i] = np.random.normal(size = (n[i], n[i-1]))*0.01
        b[i] = np.zeros((n[i], 1))

        
    # Input layer
    z = list(np.zeros(L+1))
    a = list(np.zeros(L+1))
    a[0] = X
    
    # Hidden layers
    for i in range(1,L):
        z[i] = np.dot(w[i], a[i-1]) + b[i]
        a[i] = np.fmax(z[i], np.zeros(z[i].shape)) # ReLU

    # Output layer
    z[L] = np.dot(w[L], a[L-1]) + b[L]
    a[L] = 1/(1+np.exp(-z[L])) # Sigmoid    

    y_hat = a[-1]

    k = 0  
    while k < max_iterations:
        
        # Gradient
        dz = list(np.zeros(L+1))
        dw = list(np.zeros(L+1))
        db = list(np.zeros(L+1))
        
        # Backward propagation
        dz[L] = a[L] - y
        dw[L] = np.dot(dz[L], a[L-1].T)/m
        db[L] = np.sum(dz[L], axis = 1, keepdims = True)/m
        
        for i in range(L-1,0,-1):
            dz[i] = np.multiply(np.dot(w[i+1].T, dz[i+1]), (z[i] > 0)*1)
            dw[i] = np.dot(dz[i], a[i-1].T)/m
            db[i] = np.sum(dz[i], axis = 1, keepdims = True)/m
        
        # Update parameters
        for i in range(1,L+1):
            w[i] -= dw[i] * alpha
            b[i] -= db[i] * alpha 
        
        # Forward propagation
        # Hidden layers
        for i in range(1,L): 
            z[i] = np.dot(w[i], a[i-1]) + b[i]
            a[i] = np.fmax(z[i], np.zeros(z[i].shape)) # ReLU
        
        # Output layer
        z[L] = np.dot(w[L], a[L-1]) + b[L] 
        a[L] = 1/(1+np.exp(-z[L])) # Sigmoid    
        
        
        # Earth stop if convergence
        if np.mean(abs(y_hat - a[-1])) < 1e-8:
            print('The number of iterations is ', k)
            break
            
        y_hat = a[-1]
        k += 1
    
    if k == max_iterations:
        print('The number of iterations reach the max_steps', max_iterations)
        
    return (w, b)

In [3]:
def deep_nn_predict(X, w, b):
    
    L = len(w) - 1
    # Forward propagation
    
    # Input layer
    z = list(np.zeros(L+1))
    a = list(np.zeros(L+1))
    a[0] = X
    
    # Hidden layers
    for i in range(1,L):
        z[i] = np.dot(w[i], a[i-1]) + b[i]
        a[i] = np.fmax(z[i], np.zeros(z[i].shape)) # ReLU

    # Output layer
    z[L] = np.dot(w[L], a[L-1]) + b[L]
    a[L] = 1/(1+np.exp(-z[L])) # Sigmoid    

    return a[-1]

In [4]:
# simulate data

m_train = 7000 # the number of samples
m_test = 3000
n = 1000  # the number of features
b = 0.5 # the intercept

beta = np.array([i/n*2 for i in range(n)]).reshape((n,1))

X_train = np.random.normal(size = (n,m_train))
X_test = np.random.normal(size = (n,m_test))

z_train = np.dot(beta.T, X_train) + b 
z_test = np.dot(beta.T, X_test) + b 

y_train = (1/(1+np.exp(-z_train)) > 0.5)*1
y_test = (1/(1+np.exp(-z_test)) > 0.5)*1

In [5]:
# Train the model
w, b = deep_nn_train(X_train, y_train, n_units = [4,1], alpha = 0.02, max_iterations = 10000)
# make prediction
pred = deep_nn_predict(X_test, w, b)
# Accuracy rate
np.mean((pred > 0.5) == y_test)

The number of iterations reach the max_steps 10000


0.9293333333333333