In [66]:
import torch
import numpy as np

In [67]:
W = np.array([[2,1,2], [1,3,1], [7,4,7]], dtype=np.float32)
x = np.array([[1], [0], [-1]], dtype=np.float32)

print(f"W:\n {W}")
print(f"x:\n {x}")

W:
 [[2. 1. 2.]
 [1. 3. 1.]
 [7. 4. 7.]]
x:
 [[ 1.]
 [ 0.]
 [-1.]]


In [68]:
# Defining sigmoid and L2 norm functions
def sigmoid(X):
    return 1 / (1 + np.exp(X))

def l2_norm(Y):
    return np.linalg.norm(Y) ** 2

### Forward Propagation

In [69]:
Z = np.dot(W,x)
y = sigmoid(Z)
f = l2_norm(y)
print(f"Z:\n {Z}")
print(f"y:\n {y}")
print(f"f:\n {f}")

Z:
 [[0.]
 [0.]
 [0.]]
y:
 [[0.5]
 [0.5]
 [0.5]]
f:
 0.749999973077653


### Backpropagation

$\qquad \sigma'(x) = \sigma(x) (1 - \sigma(x))$

$\qquad (\lVert y \rVert_2^2)' = 2y$

In [70]:
df_df = 1.0

In [71]:
# For y
df_dy = 2 * y
df_dy = df_df * df_dy

print(f"df/dy:\n {df_dy}")

df/dy:
 [[1.]
 [1.]
 [1.]]


In [72]:
# For Z
dy_dZ = sigmoid(Z) * (1-sigmoid(Z))
df_dZ = df_dy * dy_dZ

print(f"df/dZ:\n {df_dZ}")

df/dZ:
 [[0.25]
 [0.25]
 [0.25]]


In [73]:
# For W and x
dZ_dW = x.T
dZ_dx = W.T

df_dW = np.dot(df_dZ, dZ_dW)
df_dx = np.dot(dZ_dx, df_dZ)

print(f"df/dW:\n {df_dW}")
print(f"df/dx:\n {df_dx}")

df/dW:
 [[ 0.25  0.   -0.25]
 [ 0.25  0.   -0.25]
 [ 0.25  0.   -0.25]]
df/dx:
 [[2.5]
 [2. ]
 [2.5]]


### Computation using PyTorch

In [74]:
W = np.array([[2,1,2], [1,3,1], [7,4,7]], dtype=np.float32)
x = np.array([[1], [0], [-1]], dtype=np.float32)

# Covert to tensors
W = torch.tensor(W, requires_grad=True)
x = torch.tensor(x, requires_grad=True)

In [75]:
# Compute forward propagation
Z = torch.matmul(W,x)
y = torch.sigmoid(Z)
f = torch.linalg.norm(y) ** 2

print(f"Z:\n {Z}")
print(f"y:\n {y}")
print(f"f:\n {f}")

Z:
 tensor([[0.],
        [0.],
        [0.]], grad_fn=<MmBackward0>)
y:
 tensor([[0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward0>)
f:
 0.75


In [76]:
# Save branch grad values
Z.retain_grad()
y.retain_grad()
f.retain_grad()

# Backpropo
f.backward()

In [77]:
print(f"df/df:\n {f.grad.detach().numpy()}\n")
print(f"df/dy:\n {y.grad.detach().numpy()}\n")
print(f"df/dZ:\n {Z.grad.detach().numpy()}\n")
print(f"df/dW:\n {W.grad.detach().numpy()}\n")
print(f"df/dx:\n {x.grad.detach().numpy()}")

df/df:
 1.0

df/dy:
 [[1.]
 [1.]
 [1.]]

df/dZ:
 [[0.25]
 [0.25]
 [0.25]]

df/dW:
 [[ 0.25  0.   -0.25]
 [ 0.25  0.   -0.25]
 [ 0.25  0.   -0.25]]

df/dx:
 [[2.5]
 [2. ]
 [2.5]]


Rochishnu Banerjee

banerjr2@miamioh.edu