# Problem 6.1.1 
The Jupyter notebook [IntroML/Examples/Chapter6/StochasticGradientDescent.ipynb](https://github.com/satishchandrareddy/IntroML/blob/master/Examples/Chapter6/StochasticGradientDescent.ipynb) performs stochastic gradient descent using the following split of X and Y:

$X = \begin{bmatrix} 1 & 2 \\ -2 & -5 \end{bmatrix}, Y = \begin{bmatrix} 0 & 1 \end{bmatrix}$

$X_{sample=0} = \begin{bmatrix} 1 \\ -2  \end{bmatrix}, Y_{sample=0} = \begin{bmatrix} 0 \end{bmatrix}, X_{sample=1} = \begin{bmatrix} 2 \\ -5  \end{bmatrix}, Y_{sample=1} = \begin{bmatrix} 1 \end{bmatrix}$

Redo the calculation by changing order of the data samples:

$X_{sample=1} = \begin{bmatrix} 1 \\ -2  \end{bmatrix}, Y_{sample=1} = \begin{bmatrix} 0 \end{bmatrix}, X_{sample=0} = \begin{bmatrix} 2 \\ -5  \end{bmatrix}, Y_{sample=0} = \begin{bmatrix} 1 \end{bmatrix}$

Show that the order of the samples does indeed affect the final values of W and b after 1 epoch.

In [None]:
import numpy as np

In [None]:
# input data
X = np.array([[1, 2],[-2,-5]])
Y = np.array([[0, 1]])

# learning rate
alpha = 0.1

## Original order:

In [None]:
# initial parameters
W = np.array([[0.1,0.1]])
b = np.array([[0.2]])

# Epoch 1
# Sample point index 0
# Forward propagation
X0 = X[:,0:1]
Y0 = Y[:,0:1]
Z0 = np.dot(W,X0) + b
A0 = 1/(1+np.exp(-Z0))
print("Forward Propagation: Epoch 1, Sample index=0")
print("Z0: {}".format(Z0))
print("A0: {}".format(A0))

# Back Propagation
grad_A_L0 = -(Y0/A0 - (1-Y0)/(1-A0))/1
dA0dZ0 = A0 - np.square(A0)
grad_Z_L0 = grad_A_L0*dA0dZ0
grad_W_L = np.dot(grad_Z_L0,X0.T)
grad_b_L = np.sum(grad_Z_L0,keepdims=True)
print("Back Propagation: Sample index=0")
print("grad_A_L0: {}".format(grad_A_L0))
print("dA0dZ0: {}".format(dA0dZ0))
print("grad_Z_L0: {}".format(grad_Z_L0))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

# Epoch 1, Sample index=1
# Forward propagation
X1 = X[:,1:2]
Y1 = Y[:,1:2]
Z1 = np.dot(W,X1) + b
A1 = 1/(1+np.exp(-Z1))
print("Forward Propagation: Epoch 1, Sample index=1")
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))

# Back Propagation
grad_A_L1 = -(Y1/A1 - (1-Y1)/(1-A1))/1
dA1dZ1 = A1 - np.square(A1)
grad_Z_L1 = grad_A_L1*dA1dZ1
grad_W_L = np.dot(grad_Z_L1,X1.T)
grad_b_L = np.sum(grad_Z_L1,keepdims=True)
print("Back Propagation: Sample index=1")
print("dLossdA1: {}".format(grad_A_L1))
print("dA1dZ1: {}".format(dA1dZ1))
print("dLossdZ1: {}".format(grad_Z_L1))
print("dgradW: {}".format(grad_W_L))
print("dgradb: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

## Switched Order

In [None]:
# initial parameters
W = np.array([[0.1,0.1]])
b = np.array([[0.2]])

# Epoch 1
# Epoch 1, Sample index=1
# Forward propagation
X1 = X[:,1:2]
Y1 = Y[:,1:2]
Z1 = np.dot(W,X1) + b
A1 = 1/(1+np.exp(-Z1))
print("Forward Propagation: Epoch 1, Sample index=1")
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))

# Back Propagation
grad_A_L1 = -(Y1/A1 - (1-Y1)/(1-A1))/1
dA1dZ1 = A1 - np.square(A1)
grad_Z_L1 = grad_A_L1*dA1dZ1
grad_W_L = np.dot(grad_Z_L1,X1.T)
grad_b_L = np.sum(grad_Z_L1,keepdims=True)
print("Back Propagation: Sample index=1")
print("dLossdA1: {}".format(grad_A_L1))
print("dA1dZ1: {}".format(dA1dZ1))
print("dLossdZ1: {}".format(grad_Z_L1))
print("dgradW: {}".format(grad_W_L))
print("dgradb: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

# Sample point index 0
# Forward propagation
X0 = X[:,0:1]
Y0 = Y[:,0:1]
Z0 = np.dot(W,X0) + b
A0 = 1/(1+np.exp(-Z0))
print("Forward Propagation: Epoch 1, Sample index=0")
print("Z0: {}".format(Z0))
print("A0: {}".format(A0))

# Back Propagation
grad_A_L0 = -(Y0/A0 - (1-Y0)/(1-A0))/1
dA0dZ0 = A0 - np.square(A0)
grad_Z_L0 = grad_A_L0*dA0dZ0
grad_W_L = np.dot(grad_Z_L0,X0.T)
grad_b_L = np.sum(grad_Z_L0,keepdims=True)
print("Back Propagation: Sample index=0")
print("grad_A_L0: {}".format(grad_A_L0))
print("dA0dZ0: {}".format(dA0dZ0))
print("grad_Z_L0: {}".format(grad_Z_L0))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))