In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [4]:
learning_rate = 0.1

In [71]:
X = np.array([[0.35, 0.9],
              [0.1, -0.7]])
print("X:", X)
print()

Y = np.array([[0.5], [0.35]])

print("Y:", Y)
print()

m = X.shape[0]

theta1 = np.array([[0.0,0.0],
                   [0.1, 0.4],
                   [0.8, 0.6]])

theta2 = np.array([[0.0,0.3,0.9]])

#!Forward propagation
#layer 1
h1 = np.insert(X, 0, 1.0, axis=1)
print("h1:", h1,)
print()
z2 = np.matmul(h1, theta1)
print("z2:", z2)
print()
a2 = sigmoid(z2)
print("a2:", a2)
print()
#layer 2
h2 = np.insert(a2, 0, 1, axis=1)
print("h2:", h2)
print()
z3 = np.matmul(h2, theta2.T)
print("z3:", z3)
print()
#layer 3
h3 = sigmoid(z3)
print("h3:", h3)
print()

#!Back propagation
#layer3
delta3 = np.subtract(h3, Y)
print("d3:", delta3)
print()
#layer 2
delta2 = 1 * np.matmul(h2.T,(delta3 * h3 * (1 - h3)))
print("d2:", delta2)
print()
g3 = (delta3 * (h3 * (1 - h3)))
print("g3:", g3)
print()

#layer 1
print("theta2.T:", theta2.T)
print()
#delta1 = 1 * np.matmul(h1.T, (np.matmul(g3, theta2.T) * a2 * (1 - a2)))
#print("d1:", delta1)
print()


#theta2 = theta2[:,1:]
g3 = np.insert(g3, 0, 1.0, axis=0)
print("theta2:", theta2)
print()
print("g3:", g3)
print()

print(np.matmul(g3, theta2))

#!Gradient descent

X: [[ 0.35  0.9 ]
 [ 0.1  -0.7 ]]

Y: [[0.5 ]
 [0.35]]

h1: [[ 1.    0.35  0.9 ]
 [ 1.    0.1  -0.7 ]]

z2: [[ 0.755  0.68 ]
 [-0.55  -0.38 ]]

a2: [[0.6802672  0.6637387 ]
 [0.36586441 0.4061269 ]]

h2: [[1.         0.6802672  0.6637387 ]
 [1.         0.36586441 0.4061269 ]]

z3: [[0.80144499]
 [0.47527353]]

h3: [[0.69028349]
 [0.61663117]]

d3: [[0.19028349]
 [0.26663117]]

d2: [[0.10371198]
 [0.05073478]
 [0.05260016]]

g3: [[0.04068113]
 [0.06303085]]

theta2.T: [[0. ]
 [0.3]
 [0.9]]


theta2: [[0.  0.3 0.9]]

g3: [[1.        ]
 [0.04068113]
 [0.06303085]]

[[0.         0.3        0.9       ]
 [0.         0.01220434 0.03661301]
 [0.         0.01890926 0.05672777]]


In [23]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))

def forward_prop(X, theta1, theta2):
    # insert bias at the first column
    a1 = np.insert(X, 0, 1, axis=1)
    z2 = np.dot(a1, theta1.T)
    a2 = np.insert(sigmoid(z2), 0, 1, axis=1)
    z3 = np.dot(a2, theta2.T)
    a3 = sigmoid(z3)
    return a1, z2, a2, z3, a3

def cost_function(X, Y, theta1, theta2):
    a1, z2, a2, z3, a3 = forward_prop(X, theta1, theta2)
    m = X.shape[0]
    J = 0
    for i in range(m):
        J += np.sum((a3[i] - Y[i])**2)
    J = J/(2*m)
    return J

def back_prop(X, Y, theta1, theta2):
    a1, z2, a2, z3, a3 = forward_prop(X, theta1, theta2)
    m = X.shape[0]
    delta3 = np.zeros((m, 1))
    delta2 = np.zeros((m, 2))
    for i in range(m):
        delta3[i] = (a3[i] - Y[i])*sigmoid_prime(z3[i])
        delta2[i] = np.dot(theta2.T, delta3[i])*sigmoid_prime(z2[i])
    return delta2, delta3

def gradient_descent(X, Y, theta1, theta2, alpha, num_iters):
    m = X.shape[0]
    J_history = np.zeros((num_iters, 1))
    for i in range(num_iters):
        delta2, delta3 = back_prop(X, Y, theta1, theta2)
        theta1 = theta1 - alpha*np.dot(delta2.T, X)/m
        theta2 = theta2 - alpha*np.dot(delta3.T, X)/m
        J_history[i] = cost_function(X, Y, theta1, theta2)
    return theta1, theta2, J_history

def predict(X, theta1, theta2):
    a1, z2, a2, z3, a3 = forward_prop(X, theta1, theta2)
    return a3

