Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
1 contributor

Users who have contributed to this file

125 lines (106 sloc) 5.1 KB
#!/usr/bin/env python
# @Author: Omar U. Florez
# @Date: October 28, 2017
'''
Train a feed forward neural network using only numpy Math library. This contains step by step explanation of the
learning process of the network.
'''
import numpy as np
import ipdb
from scratch_mlp import utils
utils.reset_folders()
def load_XOR_data(N=300):
rng = np.random.RandomState(0)
X = rng.randn(N, 2)
y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), dtype=int)
y = np.expand_dims(y, 1)
y_hot_encoded = []
for x in y:
if x == 0:
y_hot_encoded.append([1,0])
else:
y_hot_encoded.append([0, 1])
return X, np.array(y_hot_encoded)
def sigmoid(z, first_derivative=False):
if first_derivative:
return z*(1.0-z)
return 1.0/(1.0+np.exp(-z))
def tanh(z, first_derivative=True):
if first_derivative:
return (1.0-z*z)
return (1.0-np.exp(-z))/(1.0+np.exp(-z))
def inference(data, weights):
h1 = sigmoid(np.matmul(data, weights[0]))
logits = np.matmul(h1, weights[1])
probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
return np.argmax(probs, axis=1)
def run():
#size of minibatch: int(X.shape[0])
N = 50
X, y = load_XOR_data(N=300)
input_dim = int(X.shape[1])
hidden_dim = 10
output_dim = 2
num_epochs = 1000000
learning_rate= 1e-3
reg_coeff = 1e-6
losses = []
accuracies=[]
#---------------------------------------------------------------------------------------------------------------
# Initialize weights:
np.random.seed(2017)
w1 = 2.0*np.random.random((input_dim, hidden_dim))-1.0 #w0=(2,hidden_dim)
w2 = 2.0*np.random.random((hidden_dim, output_dim))-1.0 #w1=(hidden_dim,2)
#Calibratring variances with 1/sqrt(fan_in)
w1 /= np.sqrt(input_dim)
w2 /= np.sqrt(hidden_dim)
for i in range(num_epochs):
index = np.arange(X.shape[0])[:N]
#is want to shuffle indices: np.random.shuffle(index)
#---------------------------------------------------------------------------------------------------------------
# Forward step:
h1 = sigmoid(np.matmul(X[index], w1)) #(N, 3)
logits = sigmoid(np.matmul(h1, w2)) #(N, 2)
probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
h2 = logits
#---------------------------------------------------------------------------------------------------------------
# Definition of Loss function: mean squared error plus Ridge regularization
L = np.square(y[index]-h2).sum()/(2*N) + reg_coeff*(np.square(w1).sum()+np.square(w2).sum())/(2*N)
losses.append([i,L])
#---------------------------------------------------------------------------------------------------------------
# Backward step: Error = W_l e_l+1 f'_l
# dL/dw2 = dL/dh2 * dh2/dz2 * dz2/dw2
dL_dh2 = -(y[index] - h2) #(N, 2)
dh2_dz2 = sigmoid(h2, first_derivative=True) #(N, 2)
dz2_dw2 = h1 #(N, hidden_dim)
#Gradient for weight2: (hidden_dim,N)x(N,2)*(N,2)
dL_dw2 = dz2_dw2.T.dot(dL_dh2*dh2_dz2) + reg_coeff*np.square(w2).sum()
#dL/dw1 = dL/dh1 * dh1/dz1 * dz1/dw1
# dL/dh1 = dL/dz2 * dz2/dh1
# dL/dz2 = dL/dh2 * dh2/dz2
dL_dz2 = dL_dh2 * dh2_dz2 #(N, 2)
dz2_dh1 = w2 #z2 = h1*w2
dL_dh1 = dL_dz2.dot(dz2_dh1.T) #(N,2)x(2, hidden_dim)=(N, hidden_dim)
dh1_dz1 = sigmoid(h1, first_derivative=True) #(N,hidden_dim)
dz1_dw1 = X[index] #(N,2)
#Gradient for weight1: (2,N)x((N,hidden_dim)*(N,hidden_dim))
dL_dw1 = dz1_dw1.T.dot(dL_dh1*dh1_dz1) + reg_coeff*np.square(w1).sum()
#weight updates:
w2 += -learning_rate*dL_dw2
w1 += -learning_rate*dL_dw1
if True: #(i+1)%1000==0:
y_pred = inference(X, [w1, w2])
y_actual = np.argmax(y, axis=1)
accuracy = np.sum(np.equal(y_pred,y_actual))/len(y_actual)
accuracies.append([i, accuracy])
if (i+1)% 10000 == 0:
print('Epoch %d\tLoss: %f Average L1 error: %f Accuracy: %f' %(i, L, np.mean(np.abs(dL_dh2)), accuracy))
save_filepath = './scratch_mlp/plots/boundary/image_%d.png'%i
text = 'Batch #: %d Accuracy: %.2f Loss value: %.2f'%(i, accuracy, L)
utils.plot_decision_boundary(X, y_actual, lambda x: inference(x, [w1, w2]),
save_filepath=save_filepath, text = text)
save_filepath = './scratch_mlp/plots/loss/image_%d.png' % i
utils.plot_function(losses, save_filepath=save_filepath, ylabel='Loss', title='Loss estimation')
save_filepath = './scratch_mlp/plots/accuracy/image_%d.png' % i
utils.plot_function(accuracies, save_filepath=save_filepath, ylabel='Accuracy', title='Accuracy estimation')
run()
You can’t perform that action at this time.