In [1]:
import numpy as np
import math

In [2]:
# Activation functions
def sigmoid(x):
    value = 1/(1+math.exp(-x))
    grad = (1-value)*value
    return (value, grad)

def tanh(x):
    value = (math.exp(x) - math.exp(-x))/(math.exp(x) + math.exp(-x))
    grad = 1 - value**2
    return (value, grad)

def ReLU(x):
    value = max(0,x)
    grad = 0
    if x > 0:
        grad = 1
    return (value, grad)

In [149]:
### Shallow Neural Netwrok (1 hidden layer)
def shallow_nn_train(X, y, max_steps = 1000, learning_rate = 0.1, n_nodes = 4, activation_func = 'ReLU'):
    # Extract the dimension
    (n, m) = X.shape

    # Set mumber of nodes for each layer
    n0 = n
    n1 = n_nodes
    n2 = 1

    # initialize parameters
    w1 = np.random.normal(size = (n1,n0))*0.01
    b1 = np.zeros((n1,1))
    w2 = np.random.normal(size = (n2,n1))*0.01
    b2 = np.zeros((n2,1))

    z1 = np.dot(w1, X) + b1
    a1 = np.fmax(z1, np.zeros(z1.shape)) # ReLU
    z2 = np.dot(w2, a1) + b2
    a2 = 1/(1+np.exp(-z2)) # Sigmoid
    y_hat = a2
    
    k = 0
    
    while k < max_steps:
        
        dz2 = a2 - y
        dw2 = np.dot(dz2, a1.T)/m
        db2 = np.sum(dz2, axis = 1, keepdims = True)/m
        
        dz1 = np.multiply(np.dot(w2.T, dz2), (z1 > 0)*1)
        dw1 = np.dot(dz1, X.T)/m
        db1 = np.sum(dz1, axis = 1, keepdims = True)/m
        
        w2 -= learning_rate * dw2
        b2 -= learning_rate * db2
        w1 -= learning_rate * dw1
        b1 -= learning_rate * db1
        
        z1 = np.dot(w1, X) + b1
        a1 = np.fmax(z1, np.zeros(z1.shape)) # ReLU
        z2 = np.dot(w2, a1) + b2
        a2 = 1/(1+np.exp(-z2)) # Sigmoid
        
        # Earth stop if convergence
        if np.mean(abs(y_hat - a2)) < 1e-6:
            print('The number of iterations is ', k)
            break
            
        y_hat = a2
        k += 1
    
    if k == max_steps:
        print('The number of iterations reach the max_steps', max_steps)
        
    return (w1, b1, w2, b2)

In [146]:
def shallow_nn_predict(X, w1, b1, w2, b2):
    
    z1 = np.dot(w1, X) + b1
    a1 = np.fmax(z1, np.zeros(z1.shape)) # ReLU
    z2 = np.dot(w2, a1) + b2
    a2 = 1/(1+np.exp(-z2)) # Sigmoid
    
    return a2

In [115]:
# simulate data

m_train = 7000 # the number of samples
m_test = 3000
n = 1000  # the number of features
b = 0.5 # the intercept

beta = np.array([i/n*2 for i in range(n)]).reshape((n,1))

X_train = np.random.normal(size = (n,m_train))
X_test = np.random.normal(size = (n,m_test))

z_train = np.dot(beta.T, X_train) + b 
z_test = np.dot(beta.T, X_test) + b 

y_train = (1/(1+np.exp(-z_train)) > 0.5)*1
y_test = (1/(1+np.exp(-z_test)) > 0.5)*1

In [117]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1000, 7000), (1, 7000), (1000, 3000), (1, 3000))

In [152]:
# Train the shallow NN and make predictions
w1, b1, w2, b2 = shallow_nn_train(X_train, y_train, max_steps = 10000, n_nodes = 10, learning_rate = 0.05)
pred = shallow_nn_predict(X_test, w1, b1, w2, b2)

np.mean((pred > 0.5) == y_test)

The number of iterations is  4586
