# A simple neural network
***

The simple neural network consists of **two input, two hidden, and two output neurons**:
![Alt text](drawing.svg)

The input **(x_1, x_2)** are data points drawn from two different Gaussians with mean and covariance **((1, 1), diag(1 1)) and ((5, 5), diag(1 1))**, respectively.
The output neurons **N_21 and N_22** define where the point is predicted to belong to class A or B, depending on which neuron has a higher output. The loss function is the squared error of the predictions, where the desired output is (1, 0), or (0, 1), depending on the class.

In [1]:
import numpy as np
from matplotlib.pyplot import plot
%matplotlib inline

W = np.random.uniform(-0.1, 0.1, (2, 2, 3)) # Initialise weights of shape (2, 2, 3) - We have 2 2-by-3 Weight Matrices
n = 0.01 # Learning rate

First, we define the necessary functions.

In [2]:
def activation(x): # Assumes single 1D x
    #return np.maximum(0, x)
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))
def d_activation(x): # Assumes single 1D x
    return activation(x)*(1-activation(x))
def loss_fkt(x, y): # Assumes 2-D x, y
    return np.sum(np.square(y-x))
def d_loss(x, y): # Assumed 1-D x, y (for the respective neuron, as the other sum just equals 0)
    return (y-x)

Now we can define a forward and a backward pass through the network. The backward pass returns the weight changes that are calculated with the backpropagation algorithm.

In [3]:
def forward_pass(x):
    x = np.array([x[0], x[1], 1])
    sum_1 = np.dot(W[0], x)
    ac_1 = np.array([activation(sum_1[0]), activation(sum_1[1]), 1])
    sum_2 = np.dot(W[1], ac_1)
    ac_2 = np.array([activation(sum_2[0]), activation(sum_2[1]), 1])
    return sum_1, ac_1, sum_2, ac_2

In [4]:
def backward_pass(sum_1, ac_1, sum_2, ac_2, x):
    label = x[2]
    x = np.array([x[0], x[1], 1])
    #1.) Weights W[1][0][:] - The weights on the connections to the first neuron of the second layer, N_21
    y = 1 if label==1 else 0;
    dw_1 = d_loss(ac_2[0], y)*d_activation(sum_2[0])
    dw_1 = dw_1 * ac_1
    #2.) Weights[1][1][:] for neutron N_22
    y = 1 if label==-1 else 0;
    dw_2 = d_loss(ac_2[1], y)*d_activation(sum_2[1])
    dw_2 = dw_2 * ac_1
    
    #3) Weights[0][0][:] for Neuron N_11
    # First path over N_21
    y = 1 if label==1 else 0;
    dw_31 = d_loss(ac_2[0], y)*d_activation(sum_2[0])*W[1][0][0]*d_activation(sum_1[0])
    dw_31 = dw_31 * x
    # Second path over N_22
    y = 1 if label==-1 else 0;
    dw_32 = d_loss(ac_2[1], y)*d_activation(sum_2[1])*W[1][1][0]*d_activation(sum_1[0])
    dw_32 = dw_32 * x
    #Sum of both
    dw_3 = dw_31 + dw_32
    
    #4) Weights[0][1][:] for Neuron N_12
    # First path over N_21
    y = 1 if label==1 else 0;
    dw_41 = d_loss(ac_2[0], y)*d_activation(sum_2[0])*W[1][0][1]*d_activation(sum_1[1])
    dw_41 = dw_41 * x
    # Second path over N_22
    y = 1 if label==-1 else 0;
    dw_42 = d_loss(ac_2[1], y)*d_activation(sum_2[1])*W[1][1][1]*d_activation(sum_1[1])
    dw_42 = dw_42 * x
    #Sum of both
    dw_4 = dw_41 + dw_42
    
    #Full change matrix
    dw = np.array([[dw_3, dw_4], [dw_1, dw_2]])
    return dw


In [5]:
def update_weights(dw):
    global W
    W -= n*dw

Now, we can produce some data. The data is collected from 2 2-D Gaussians with mean and covariance ((1, 1), diag(1 1)) and ((5, 5), diag(1 1)).

In [6]:
#data = np.concatenate([np.random.randn(1000, 2), np.random.randn(1000, 2)+5])
#plot(data[:, 0], data[:, 1], 'bo')
num_data_points = 1000
data_g1 = np.hstack((np.random.randn(num_data_points, 2), np.ones(num_data_points).reshape(-1, 1)))
data_g2 = np.hstack((np.random.randn(num_data_points, 2)+5, -1*np.ones(num_data_points).reshape(-1, 1)))
data = np.concatenate([data_g1, data_g2])
np.random.shuffle(data)

# Split data into training and validation set
ind = int(num_data_points*2*0.75)
data_train = data[:ind][:]
data_eval = data[ind:][:]

## Training
***
We can now run through all data points 50 times and update the weights accordingly. As we perform an online update, we implement the **stochastic gradient descent** algorithm.

In [7]:
for i in range(50):
    for x in data_train:
        sum_1, ac_1, sum_2, ac_2 = forward_pass([x[0], x[1]])
        dw = backward_pass(sum_1, ac_1, sum_2, ac_2, x)
        update_weights(dw)

## Validation
***
Finally, we can check our results and have a look at the performance accuracy. 

In [8]:
correct_assignments = 0
for x in data_eval:
    _, _, _, ac_2 = forward_pass([x[0], x[1]])
    y_hat = 1 if ac_2[0]<ac_2[1] else -1;
    if y_hat==x[2]:
        correct_assignments += 1
acc = correct_assignments*1.0/len(data_eval)
print ("accuracy", acc)

accuracy 1.0
