# Problem 4.7.1 
Consider binary classification using Logistic Regression and the following training data:

$X = \begin{bmatrix} 3 & 1 & -1 \\ 1 & -2 & 2 \end{bmatrix}, Y = \begin{bmatrix} 0 & 1 & 1 \end{bmatrix}$

Assume that 

$W = \begin{bmatrix} W_0 & W_1 \end{bmatrix} = \begin{bmatrix} 2 & -1 \end{bmatrix},      b=-1 $

In [None]:
import numpy as np

In [None]:
X = np.array([[3, 1, -1], [1, -2, 2]])
Y = np.array([[0, 1, 1]])
W = np.array([[2, -1]])
b = np.array([[-1]])
m = X.shape[1]

**(a)** Perform forward propagation using above training data and parameters. 

In [None]:
# forward propagation
Z = np.dot(W,X)+b
A = 1/(1+np.exp(-Z))
print("Z: {}".format(Z))
print("A: {}".format(A))

**(b)**	Compute the value of the loss function (binary cross entropy) after forward propagation.

In [None]:
# Loss function
L = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
print("Loss: {}".format(L))

**(c)**	Perform back propagation for the above training data and parameter matrices to determine $\nabla_WL$ and $\nabla_bL$

In [None]:
# back propagation
grad_AL = -1/m*(Y/A - (1-Y)/(1-A))
print("grad_AL: {}".format(grad_AL))
dAdZ = A-np.square(A)
print("dAdZ: {}".format(dAdZ))
grad_ZL = grad_AL*dAdZ
print("grad_ZL: {}".format(grad_ZL))
grad_WL = np.dot(grad_ZL,X.T)
grad_bL = np.sum(grad_ZL,axis=1,keepdims=True)
print("grad_WL: {}".format(grad_WL))
print("grad_bL: {}".format(grad_bL))

**(d)** Perform 1 epoch of training using Gradient Descent with learning rate of 0.1 and recompute the loss function with the updated W,b

In [None]:
alpha = 0.1
# update W and b
W = W - alpha*grad_WL
b = b - alpha*grad_bL
print("update W and b")
print("W epoch 1: {}".format(W))
print("b epoch 1: {}".format(b))
# recompute loss
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
L = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
print("Loss epoch 1: {}".format(L))

**(e)**	Compute the prediction based on input feature matrix X above and the updated W,b from (d)

In [None]:
# prediction
# forward propagation
Z = np.dot(W,X)+b
A = 1/(1+np.exp(-Z))
Y_pred = np.round(A)
print("A: {}".format(A))
print("predicted Y: {}".format(Y_pred))

**(f)**	Compute the accuracy of the prediction in (e) when compared against the actual Y specified above.

In [None]:
# compute accuracy
accuracy = np.mean(np.absolute(Y-Y_pred)<1e-7)
print("accuracy: {}".format(accuracy))

**(g)** Compute the derivatives $\frac{\partial L}{\partial W_0}, \frac{\partial L}{\partial W_1}, \frac{\partial L}{\partial b}$ using the centred differences method with $\epsilon=0.1$

In [None]:
eps = 0.1

# estimated dLdW0
print("dLdW0 ****")
W = np.array([[2+eps,-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A plus: {}".format(A))
Lossp = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
print("Loss plus: {}".format(Lossp))
W = np.array([[2-eps,-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A minus: {}".format(A))
Lossm = -np.mean(Y*np.log(A)+(1-Y)*np.log(1-A))
print("Loss minus: {}".format(Lossm))
dLdW0 = (Lossp - Lossm)/2/eps
print("Estimated dL/dW0: {}".format(dLdW0))

In [None]:
# estimated dLdW1
print("dLdW1 ****")
W = np.array([[2,-1+eps]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A plus: {}".format(A))
Lossp = -np.mean(Y*np.log(A)+(1-Y)*np.log(1-A))
print("Loss plus: {}".format(Lossp))
W = np.array([[2,-1-eps]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A minus: {}".format(A))
Lossm = -np.mean(Y*np.log(A)+(1-Y)*np.log(1-A))
print("Loss minus: {}".format(Lossm))
dLdW1 = (Lossp - Lossm)/2/eps
print("Estimated dL/dW1: {}".format(dLdW1))

In [None]:
# estimated dLdb
print("dLdb ****")
W = np.array([[2, -1]])
b = np.array([[-1+eps]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A plus: {}".format(A))
Lossp = -np.mean(Y*np.log(A)+(1-Y)*np.log(1-A))
print("Loss plus: {}".format(Lossp))
b = np.array([[-1-eps]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
print("A minus: {}".format(A))
Lossm = -np.mean(Y*np.log(A)+(1-Y)*np.log(1-A))
print("Loss minus: {}".format(Lossm))
dLdb = (Lossp - Lossm)/2/eps
print("Estimated dL/db: {}".format(dLdb))

**(h)** Focusing on the derivative $\frac{\partial L}{\partial W_0}$, redo the calculation in (f) with $\epsilon=0.02, 0.01, 0.005$. Confirm that the error (absolute difference) in the approximate derivative when compared to the actual derivative computed in (c) decreases by a factor of 4 when $\epsilon$ is cut in half.

In [None]:
eps = 0.02

# estimated dLdW0
print("dLdW0, eps = {}".format(eps))
W = np.array([[2+eps,-1]])
b = np.array([[-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossp = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
W = np.array([[2-eps,-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossm = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
dLdW0 = (Lossp - Lossm)/2/eps
print("Estimated dL/dW0: {}".format(dLdW0))
error = np.absolute(grad_WL[0,0]-dLdW0)
print("Error: {}".format(error))

In [None]:
eps = 0.01

# estimated dLdW0
print("dLdW0, eps = {}".format(eps))
W = np.array([[2+eps,-1]])
b = np.array([[-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossp = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
W = np.array([[2-eps,-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossm = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
dLdW0 = (Lossp - Lossm)/2/eps
print("Estimated dL/dW0: {}".format(dLdW0))
error = np.absolute(grad_WL[0,0]-dLdW0)
print("Error: {}".format(error))

In [None]:
eps = 0.005

# estimated dLdW0
print("dLdW0, eps = {}".format(eps))
W = np.array([[2+eps,-1]])
b = np.array([[-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossp = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
W = np.array([[2-eps,-1]])
Z = np.dot(W,X) + b
A = 1/(1+np.exp(-Z))
Lossm = -np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))/m
dLdW0 = (Lossp - Lossm)/2/eps
print("Estimated dL/dW0: {}".format(dLdW0))
error = np.absolute(grad_WL[0,0]-dLdW0)
print("Error: {}".format(error))

Notice that the error is 1.12e-5 when $\epsilon=0.02$ and error is 2.82e-6 when $\epsilon=0.01$. Hence error is roughly one fourth the value when $\epsilon$ drops by one half. Similarly, error is 2.82e-6 when $\epsilon=0.01$ and 7.04e-7 when $\epsilon=0.005$. Hence again, error is roughly one fourth the value when $\epsilon$ drops by one half.