In [21]:
import numpy as np
from scipy.special import expit as expit

# Generate starting values

In [46]:
lr = 0.1

## Input layer

In [60]:
input_layer = np.random.randint(2, size=(1,8))
input_layer

array([[1, 1, 0, 1, 1, 1, 0, 1]])

## Weights and biases

In [61]:
hidden_bias = np.random.uniform(size=(1, 3))
output_bias = np.random.uniform(size=(1, 8))
print(hidden_bias)
print()
print(output_bias)

[[0.2888758  0.14884696 0.20725632]]

[[0.66379342 0.51166311 0.6992155  0.14650619 0.61919299 0.46754989
  0.68069931 0.95591248]]


In [35]:
hidden_weight = np.random.randn(8, 3)*np.sqrt(2.0/8)
output_weight = np.random.randn(3, 8)*np.sqrt(2.0/8)
print(hidden_weight)
print()
print(output_weight)

[[-0.14596248  0.46984296 -0.20284101]
 [ 0.39457557  0.2383357  -0.30118454]
 [ 0.21543008 -0.09547247  0.44815435]
 [ 0.18373625 -0.06346439 -0.09628488]
 [ 0.10443775  0.97537049 -0.19977915]
 [-0.62747513 -0.07544811  0.45845421]
 [ 0.2087666  -1.30966475 -0.12424663]
 [-0.08091887 -0.01457799  0.7577325 ]]

[[-0.57340537  0.11797267 -0.72943096  0.08953936 -0.59328186  0.11522032
  -0.21102804 -0.25640616]
 [ 0.47972382 -0.74219694  0.11819049 -0.93784652  0.8747555  -0.10647782
   0.07074547 -0.48855532]
 [ 0.72074364 -0.62565008  0.15053445  0.44193169  0.60381459  0.01375663
  -0.00427383 -0.81879678]]


## Initialize layers

In [62]:
hidden_activation = np.ones(3)
output_activation = np.ones(8)

# Define activation functions

In [17]:
def activation(x, dx = False):
    '''
    Sigmiod based activation function - as discussed by Mike Keiser in class
    Input: value to be 'activated' can be int, float or array, boolean of if we want the derivative or not
    Output: sigmoid activation of input, derivative if requested
    using expit from scipy to prevent runtime overflow errors
    '''
    sig = expit(x + 1)
    if dx:
        return sig*(1-sig)
    else:
        return sig

# Forward Prop

## Hidden Layer

### Calculate 'z' (preactivation value)

In [63]:
input_layer

array([[1, 1, 0, 1, 1, 1, 0, 1]])

In [64]:
hidden_weight

array([[-0.14596248,  0.46984296, -0.20284101],
       [ 0.39478882,  0.23582777, -0.30064179],
       [ 0.21564332, -0.09798041,  0.4486971 ],
       [ 0.1839495 , -0.06597233, -0.09574213],
       [ 0.104651  ,  0.97286255, -0.1992364 ],
       [-0.62747513, -0.07544811,  0.45845421],
       [ 0.2087666 , -1.30966475, -0.12424663],
       [-0.08091887, -0.01457799,  0.7577325 ]])

In [65]:
hidden_bias

array([[0.2888758 , 0.14884696, 0.20725632]])

In [66]:
hidden_z = np.dot(input_layer, hidden_weight) + hidden_bias
hidden_z

array([[0.11790862, 1.67138181, 0.6249817 ]])

### Calculate activation

In [67]:
hidden_activation = activation(hidden_z, dx=False)
hidden_activation

array([[0.75360058, 0.93531668, 0.83548102]])

## Output Layer

### Calculate 'z' (preactivation value)

In [68]:
hidden_activation

array([[0.75360058, 0.93531668, 0.83548102]])

In [69]:
output_weight

array([[-0.59196054,  0.10018885, -0.72569459,  0.07080871, -0.61067354,
         0.09612946, -0.20848902, -0.25395206],
       [ 0.46978088, -0.75172655,  0.12019265, -0.9478835 ,  0.86543602,
        -0.11670782,  0.07210603, -0.48724027],
       [ 0.70269662, -0.64294688,  0.15416849,  0.423714  ,  0.58689919,
        -0.00481141, -0.00180434, -0.81640988]])

In [70]:
output_bias

array([[0.66379342, 0.51166311, 0.6992155 , 0.14650619, 0.61919299,
        0.46754989, 0.68069931, 0.95591248]])

In [71]:
output_z = np.dot(hidden_activation, output_weight) + output_bias
output_z

array([[ 1.24417519, -0.65310681,  0.39355468, -0.33269858,  1.45878894,
         0.4268145 ,  0.58951635, -0.37328485]])

### Calculate activation

In [72]:
output_activation = activation(output_z)
output_activation

array([[0.90414691, 0.58586398, 0.80115912, 0.66089864, 0.9212018 ,
        0.80640449, 0.83054805, 0.65174426]])

# Error Calculation and backprop

## Error and gradient

In [73]:
error = input_layer - output_activation
error

array([[ 0.09585309,  0.41413602, -0.80115912,  0.33910136,  0.0787982 ,
         0.19359551, -0.83054805,  0.34825574]])

In [74]:
slope_out = activation(output_activation, dx=True)
slope_out

array([[0.11283336, 0.14107796, 0.12162829, 0.13415602, 0.11141327,
        0.12117158, 0.11908122, 0.13499311]])

In [75]:
slope_hidden = activation(hidden_activation, dx = True)
slope_hidden

array([[0.12580975, 0.1102461 , 0.11865654]])

## Calculate weight + bias changes

In [76]:
delta_out = error * slope_out
delta_out

array([[ 0.01081543,  0.05842546, -0.09744362,  0.04549249,  0.00877917,
         0.02345827, -0.09890268,  0.04701213]])

In [77]:
delta_hidden = np.dot(delta_out,hidden_weight) * slope_hidden
delta_hidden

array([[-0.00370008,  0.01775197, -0.00129583]])

## Update weights and biases

In [78]:
output_weight += np.dot(hidden_activation.T,delta_out) * lr
output_weight

array([[-0.59114549,  0.1045918 , -0.73303795,  0.07423702, -0.61001194,
         0.09789728, -0.21594233, -0.25040922],
       [ 0.47079246, -0.74626192,  0.11107859, -0.94362851,  0.86625715,
        -0.11451373,  0.0628555 , -0.48284314],
       [ 0.70360023, -0.63806554,  0.14602726,  0.42751481,  0.58763268,
        -0.00285152, -0.01006747, -0.81248211]])

In [79]:
input_layer

array([[1, 1, 0, 1, 1, 1, 0, 1]])

In [80]:
delta_hidden

array([[-0.00370008,  0.01775197, -0.00129583]])

In [81]:
hidden_weight += np.dot(input_layer.T, delta_hidden) * lr
hidden_weight

array([[-0.14633249,  0.47161816, -0.20297059],
       [ 0.39441881,  0.23760296, -0.30077137],
       [ 0.21564332, -0.09798041,  0.4486971 ],
       [ 0.18357949, -0.06419714, -0.09587171],
       [ 0.10428099,  0.97463774, -0.19936598],
       [-0.62784514, -0.07367291,  0.45832463],
       [ 0.2087666 , -1.30966475, -0.12424663],
       [-0.08128888, -0.01280279,  0.75760292]])

In [82]:
output_bias += np.sum(delta_out, axis=0, keepdims=True) * lr
hidden_bias += np.sum(delta_hidden, axis=0, keepdims=True) * lr
print(output_bias)
print()
print(hidden_bias)

[[0.66487496 0.51750566 0.68947114 0.15105544 0.62007091 0.46989572
  0.67080904 0.96061369]]

[[0.28850579 0.15062215 0.20712673]]
