In [None]:
import numpy as np

def sigmoid(x):
  return 1/(1+np.exp(-x))

def f(a,b):
  return a and b

print(sigmoid(0))
print(f(0,0))
print(f(1,1))
print(f(0,1))
print(f(1,0))

0.5
0
1
0
0


In [None]:
import numpy as np

b = 1 #bias (not used this cell)

x = np.random.randint(2)
y = np.random.randint(2)

print(x, '---', y, '--->', f(x,y))

1 --- 0 ---> 0


In [None]:
wx = np.random.random()*2-1
wy = np.random.random()*2-1
wb = np.random.random()*2-1

for n in range(100000):
  x = np.random.randint(2)
  y = np.random.randint(2)
  weighted_input = (x*wx)+(y*wy)+(b*wb)
  guess = sigmoid(weighted_input)
  error = (guess-f(x,y))**2
  
  ### application of the chain rule starts here ################################
  #
  # Note that derror/dguess is:  2 * (guess-f(x,y))
  #           dguess/dinput is:  sigmoid(input) * (1-sigmoid(input))
  #           dinput/dweight is: x, y, b (for wx, wy, wb)
  #
  deds = 2*(guess-f(x,y))*(sigmoid(weighted_input)*(1-sigmoid(weighted_input)))
  dwxde = deds*x
  dwyde = deds*y
  dwbde = deds*1
  ### and ends here ############################################################
  
  wx -= .01*dwxde
  wy -= .01*dwyde
  wb -= .01*dwbde
print(wx, '---', wy, '---', wb)
print('input(1, 1): ', sigmoid((1*wx)+(1*wy)+(1*wb)))
print('input(1, 0): ', sigmoid((1*wx)+(0*wy)+(1*wb)))
print('input(0, 1): ', sigmoid((0*wx)+(1*wy)+(1*wb)))
print('input(0, 0): ', sigmoid((0*wx)+(0*wy)+(1*wb)))

4.658907748390027 --- 4.641641051924831 --- -7.072499714352695
input(1, 1):  0.9027402026526113
input(1, 0):  0.08214209587888932
input(0, 1):  0.08084963448925586
input(0, 0):  0.0008473917416511116




---


**Linear Algebra**

Rewriting the example, above, in matrix form.

What is a more compact form for $(x*wx)+(y*wy)+(b*wb)$ ?


**Step 1.** 

Make a matrix for the inputs (including the bias, which is always equal to 1):

$X=\begin{bmatrix}x \\y\\1\end{bmatrix}$

In [None]:
X = [np.random.randint(2),np.random.randint(2),1]
print(X)

[0, 1, 1]


**Step 2.**

Create a matrix of weights:

$w=\begin{bmatrix}w_x\\w_y\\w_b\end{bmatrix}$

In [None]:
'''
take a closer look! numpy applies operators to each element individually;
the one-liner creates a 3-element array of random numbers from [-1,1).
'''
w = np.random.random(3)*2-1
print(w)

[ 0.26459259 -0.00682397  0.21357913]


**Step 3.**

Notice that the weighted sum of the $inputs \times weights$ is the dot product $X \bullet w$:

In [None]:
print('the dot product is exactly what I need to do...')
print('X =      ',X)
print('w =      ',w)
print('X dot w =',np.dot(X,w))
print('sigmoid =',sigmoid(np.dot(X,w)))

the dot product is exactly what I need to do...
X =       [0, 1, 1]
w =       [ 0.26459259 -0.00682397  0.21357913]
X dot w = 0.20675515709730652
sigmoid = 0.5515054416202427


In [None]:
w = np.random.random(3)*2-1

for n in range(10000):
  X = [np.random.randint(2), np.random.randint(2), 1] # third term is bias=1
  guess = sigmoid(np.dot(X,w))
  deds = 2*(guess-f(X[0],X[1]))*(guess)*(1-guess)
  dwde = np.multiply(X,deds)                          # use np to multiply matrix
  w -= np.multiply(0.1, dwde)
 
print(w)
print('input(1, 1): ', sigmoid(np.dot([1,1,1],w)))
print('input(1, 0): ', sigmoid(np.dot([1,0,1],w)))
print('input(0, 1): ', sigmoid(np.dot([0,1,1],w)))
print('input(0, 0): ', sigmoid(np.dot([0,0,1],w)))

[ 4.65173945  4.61889323 -7.04322552]
input(1, 1):  0.9026838266933053
input(1, 0):  0.08382423415293003
input(0, 1):  0.08133595733550404
input(0, 0):  0.0008725431528753509
