Chain rule

In [3]:
# inputs
x = -2
y = 5
z = -4

# forward
q = x + y
f = q * z

# backward
dfdz = q
dfdq = z
dqdx = 1.0
dqdy = 1.0

print(f"dfdq = {dfdq}")

# backpropagation
dfdx = dfdq * dqdx
dfdy = dfdq * dqdy

print(f"dfdx = {dfdx}, dfdy = {dfdy}, dfdz = {dfdz}")

dfdq = -4
dfdx = -4.0, dfdy = -4.0, dfdz = 3


Sigmoid

In [4]:
import math

w = [2, -3, -3]
x = [-1, -2]

dot = w[0]*x[0] + w[1]*x[1] + w[2]
f = 1.0 / (1 + math.exp(-dot))	# sigmoid

ddot = (1 - f) * f
dx = [w[0] * ddot, w[1] * ddot]
dw = [x[0] * ddot, x[1] * ddot, 1.0 * ddot]

print(f"dfdx: {dx}, dfdw: {dw}")

dfdx: [0.3932238664829637, -0.5898357997244456], dfdw: [-0.19661193324148185, -0.3932238664829637, 0.19661193324148185]


In [8]:
import math

x = 3
y = -4

# forward
sigy = 1.0 / (1 + math.exp(-y))
num = x + sigy	# 분자
sigx = 1.0 / (1 + math.exp(-x))
xpy = x + y
xpysqr = xpy ** 2
den = sigx + xpysqr	# 분모

invden = 1.0 / den	# 분모 역수화
f = num * invden

print(f"sigx: {sigx}, sigy: {sigy}, x+y: {xpy}, (x+y)^2: {xpysqr}, num: {num}, den: {den}, invden: {invden}")
print(f"f: {f}")

sigx: 0.9525741268224334, sigy: 0.01798620996209156, x+y: -1, (x+y)^2: 1, num: 3.0179862099620918, den: 1.9525741268224333, invden: 0.5121444488396316
f: 1.5456448841066441


In [12]:
# backprop f = num * invden
dnum = invden
dinvden = num
print(f"dfdnum: {dnum}, dfdinvden: {dinvden}")

# backprop invden = 1 / den
dden = (-1.0 / (den ** 2)) * dinvden
print(f"dfdden: {dden}")

# backprop den = sigx + xpysqr
dsigx = 1.0 * dden
dxpysqr = 1.0 * dden
print(f"dfdsigx: {dsigx}, dfdxpysqr: {dxpysqr}")

# backprop xpysqr = xpy ** 2
dxpy = (2 * xpy) * dxpysqr
print(f"dfdxpy: {dxpy}")

# backprop xpy = x + y
dx = 1 * dxpy
dy = 1 * dxpy
print(f"dfdx: {1 * dxpy}, dfdy: {1 * dxpy}")

# backprop sig(x)
dx += (sigx * (1 - sigx)) * dsigx
print(f"dfdx: {(sigx * (1 - sigx)) * dsigx}")

# backprop num = x + sigy
dx += 1 * dnum
dsigy = 1 * dnum
print(f"dfdx: {1 * dnum}, dfdsigy: {dsigy}")

# backprop sig(y)
dy += (sigy * (1 - sigy)) * dsigy
print(f"dfdy: {(sigy * (1 - sigy)) * dsigy}")

print(f"[final] dfdx: {dx}, dfdy: {dy}")

dfdnum: 0.5121444488396316, dfdinvden: 3.0179862099620918
dfdden: -0.7915934472725935
dfdsigx: -0.7915934472725935, dfdxpysqr: -0.7915934472725935
dfdxpy: 1.583186894545187
dfdx: 1.583186894545187, dfdy: 1.583186894545187
dfdx: -0.03576154781265359
dfdx: 0.5121444488396316, dfdsigy: 0.5121444488396316
dfdy: 0.009045856938622317
[final] dfdx: 2.0595697955721652, dfdy: 1.5922327514838093


In [16]:
import numpy as np

# forward
W = np.random.randn(4, 2)
X = np.random.randn(2, 3)

D = W.dot(X)	# [4, 3]
print(f"W: {W}")
print(f"X: {X}")
print(f"D: {D}")

# backpropagation
# dD를 전달 받았다고 가정
dD = np.random.rand(*D.shape)	# [4, 3]
print(f"dLdD: {dD}")

dW = dD.dot(X.T)	# X의 transpose를 곱함. [4, 3] * [3, 2]
dX = W.T.dot(dD)	# W의 transpose를 곱함. [2, 4] *[4, 3]
print(f"dLdW: {dW}")
print(f"dLdX: {dX}")

W: [[-0.1429865   0.17558202]
 [ 1.38290122 -1.30285315]
 [ 0.33725225  0.95069444]
 [ 0.99845258  0.6948907 ]]
X: [[-0.8292188   0.6376559   0.20898543]
 [-0.42855198 -0.05425221 -0.29552626]]
D: [[ 0.04332107 -0.1007019  -0.08177119]
 [-0.58838739  0.95249778  0.67403352]
 [-0.68707789  0.16347362 -0.21047437]
 [-1.12573243  0.59896982  0.00330359]]
dLdD: [[0.94238681 0.24986601 0.54802526]
 [0.82881446 0.02856052 0.26148397]
 [0.27709616 0.04088727 0.53231392]
 [0.77266089 0.36214939 0.1633659 ]]
dLdW: [[-0.50758702 -0.57937337]
 [-0.61441041 -0.43401493]
 [-0.09245548 -0.27828107]
 [-0.37563715 -0.39905167]]
dLdX: [[ 1.8763365   0.37914722  0.62588346]
 [-0.11400872  0.29718736  0.3751375 ]]
