### Implementation of 3 layers neural network (backward propagation)


In [1]:
import numpy as np

# sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)

    return 1/(1+np.exp(-x))

def add_alpha(alpha=0):
    if alpha==1:
        return [0.001,0.01,0.1,1,10,100,1000]
    else:
        return [int(input('Enter value of alpha: '))]

alphas = add_alpha(alpha=int(input('Type 1 if you choose to vary alpha, and 0 otherwise: ')))

# input dataset
X = np.array([[0,0,1],
            [0,1,1],
            [1,0,1],
            [1,1,1]])

# output dataset
y = np.array([[0],
              [1],
              [1],
              [0]])

outputs_dic = {}
for alpha in alphas:
    print ("\nTraining With Alpha:" + str(alpha))

    # seed for reproductability
    np.random.seed(7)

    # randomly initialize our weights with mean 0
    syn0 = 2*np.random.random((3,4)) - 1
    syn1 = 2*np.random.random((4,1)) - 1
    
    lowest_error = 1
    for j in range(60000):

        # Feed forward through layers 0, 1, and 2
        l0 = X
        l1 = nonlin(np.dot(l0,syn0))
        l2 = nonlin(np.dot(l1,syn1))

        # what is the error
        l2_error = y - l2

        if (j% 10000) == 0:
            print ("Error:" + str(np.mean(np.abs(l2_error))))

        # in what direction is the target value? how much is the rate of change
        l2_delta = l2_error*nonlin(l2,deriv=True)

        # how much did each l1 value contribute to the l2 error (according to the weights)?
        l1_error = l2_delta.dot(syn1.T)

        # in what direction is the target l1? how much is the rate of change
        l1_delta = l1_error * nonlin(l1,deriv=True)

        syn1 += alpha*l1.T.dot(l2_delta)
        syn0 += alpha*l0.T.dot(l1_delta)
        
        if np.mean(np.abs(l2_error))<lowest_error:
            lowest_error = np.mean(np.abs(l2_error))
    
    outputs_dic[alpha] = {'lowest_error':lowest_error,'output':l2}

Type 1 if you choose to vary alpha, and 0 otherwise: 1

Training With Alpha:0.001
Error:0.501362713079
Error:0.501052143223
Error:0.500654849382
Error:0.500344578005
Error:0.500088769601
Error:0.499860497202

Training With Alpha:0.01
Error:0.501362713079
Error:0.498504050855
Error:0.482140628237
Error:0.357411251961
Error:0.155751852473
Error:0.0975414917236

Training With Alpha:0.1
Error:0.501362713079
Error:0.043186766858
Error:0.0255149221486
Error:0.0196268894186
Error:0.0164728501577
Error:0.0144439715495

Training With Alpha:1
Error:0.501362713079
Error:0.00976387487092
Error:0.00670766367985
Error:0.00540927072394
Error:0.00465006419062
Error:0.00413805323714

Training With Alpha:10
Error:0.501362713079
Error:0.00277576670044
Error:0.0019358032759
Error:0.00156993421403
Error:0.00135376282688
Error:0.00120710393942

Training With Alpha:100
Error:0.501362713079
Error:0.375281437996
Error:0.375193655723
Error:0.375156047863
Error:0.375133999436
Error:0.375119114658

Training With 

In [2]:
# have a look at the outputs of the lowest error for every alpha
outputs_dic

{0.001: {'lowest_error': 0.4996395524134753, 'output': array([[ 0.52036512],
         [ 0.50233518],
         [ 0.50060855],
         [ 0.48113682]])},
 0.01: {'lowest_error': 0.074212824492294854, 'output': array([[ 0.07957674],
         [ 0.92442411],
         [ 0.92746989],
         [ 0.06916856]])},
 0.1: {'lowest_error': 0.013002860881219845, 'output': array([[ 0.01392872],
         [ 0.98584881],
         [ 0.98835061],
         [ 0.01228214]])},
 1: {'lowest_error': 0.0037632434526633902, 'output': array([[ 0.00403242],
         [ 0.99581133],
         [ 0.99673274],
         [ 0.00356462]])},
 10: {'lowest_error': 0.0010993197180327052,
  'output': array([[  6.60032883e-04],
         [  9.98699810e-01],
         [  9.99103847e-01],
         [  1.54090335e-03]])},
 100: {'lowest_error': 0.37510821446469944,
  'output': array([[  4.32090186e-04],
         [  4.99999598e-01],
         [  4.99999634e-01],
         [  5.00000000e-01]])},
 1000: {'lowest_error': 0.5, 'output': array(

In [3]:
# when alpha is 10, the network realizes the lowest error
outputs_dic[10]['lowest_error']

0.0010993197180327052

In [4]:
# choosing the right learning rate is crucial in building a good model
# print the output when alpha = 0.001
outputs_dic[0.001]['output']

array([[ 0.52036512],
       [ 0.50233518],
       [ 0.50060855],
       [ 0.48113682]])

In [5]:
# print the output when alpha = 10 
outputs_dic[10]['output']

array([[  6.60032883e-04],
       [  9.98699810e-01],
       [  9.99103847e-01],
       [  1.54090335e-03]])

In [6]:
# print the output when alpha = 1000
outputs_dic[1000]['output']

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.]])