# 1. 수동 미분

- 직접 미적분을 통한 편도함수 유도
- 매우 번거롭고 실수 가능성 존재

# 2. 유한 차분 근사(Finite Difference Approximation)

- 함수 → 도함수 : 해당 포인트에서 함수의 기울기
- 해당 포인트에 무한히 가까워질 때 두 점을 지나는 접선의 기울기
- 매우 작은 수를 사용해 도함수를 구하는 수치적 근사 방법
- 뉴턴의 차분몫(Newton’s Difference Quotient) 방정식 사용

# 3. 전진 모드 자동 미분

- 입력에서 출력까지 계산 그래프를 따라 진행
- 리프 노드 편도함수 → 곱셈 노드 → 덧셈 노드
- 기호 미분(Symbolic Differentiation) : 전진 모드 자동 미분으로 다른 계산 그래프를 생성한 것

# 4. 후진 모드 자동 미분(Reverse-Mode Autodiff)

- 정방향으로 그래프 실행 → 각 노드 값 계산
- 역방향으로 실행 → 모든 편도함수 계산
- 미분 연쇄 법칙 사용

In [1]:
def f(x, y):
    return x * x * y + y + 2

In [2]:
def df(x, y):
    return 2 * x * y, x * x + 1

In [3]:
df(3, 4)

(24, 10)

In [4]:
def d2f(x, y):
    return [2 * y, 2 * x], [2 * x, 0]

In [5]:
d2f(3, 4)

([8, 6], [6, 0])

In [6]:
def gradients(func, vars_list, eps=0.0001):
    partial_derivatives = []
    base_func_eval = func(*vars_list)
    for idx in range(len(vars_list)):
        tweaked_vars = vars_list[:]
        tweaked_vars[idx] += eps
        tweaked_func_eval = func(*tweaked_vars)
        derivative = (tweaked_func_eval - base_func_eval) / eps
        partial_derivatives.append(derivative)
    return partial_derivatives

In [7]:
def df(x, y):
    return gradients(f, [x, y])

In [8]:
df(3, 4)

[24.000400000048216, 10.000000000047748]

In [9]:
def dfdx(x, y):
    return gradients(f, [x, y])[0]


def dfdy(x, y):
    return gradients(f, [x, y])[1]


dfdx(3., 4.), dfdy(3., 4.)

(24.000400000048216, 10.000000000047748)

In [10]:
def d2f(x, y):
    return [gradients(dfdx, [3., 4.]), gradients(dfdy, [3., 4.])]

In [11]:
d2f(3, 4)

[[7.999999951380232, 6.000099261882497],
 [6.000099261882497, -1.4210854715202004e-06]]

In [12]:
class Const(object):
    def __init__(self, value):
        self.value = value

    def evaluate(self):
        return self.value

    def __str__(self):
        return str(self.value)


class Var(object):
    def __init__(self, name, init_value=0):
        self.value = init_value
        self.name = name

    def evaluate(self):
        return self.value

    def __str__(self):
        return self.name


class BinaryOperator(object):
    def __init__(self, a, b):
        self.a = a
        self.b = b


class Add(BinaryOperator):
    def evaluate(self):
        return self.a.evaluate() + self.b.evaluate()

    def __str__(self):
        return "{} + {}".format(self.a, self.b)


class Mul(BinaryOperator):
    def evaluate(self):
        return self.a.evaluate() * self.b.evaluate()

    def __str__(self):
        return "({}) * ({})".format(self.a, self.b)

In [13]:
x = Var("x")
y = Var("y")
f = Add(Mul(Mul(x, x), y), Add(y, Const(2)))

In [14]:
x.value = 3
y.value = 4
f.evaluate()

42

In [15]:
from math import sin


def z(x):
    return sin(x ** 2)


gradients(z, [3])

[-5.46761419430053]

In [16]:
Const.gradient = lambda self, var: Const(0)
Var.gradient = lambda self, var: Const(1) if self is var else Const(0)
Add.gradient = lambda self, var: Add(self.a.gradient(var), self.b.gradient(var))
Mul.gradient = lambda self, var: Add(Mul(self.a, self.b.gradient(var)), Mul(self.a.gradient(var), self.b))

x = Var(name="x", init_value=3.)
y = Var(name="y", init_value=4.)
f = Add(Mul(Mul(x, x), y), Add(y, Const(2)))

dfdx = f.gradient(x)
dfdy = f.gradient(y)

In [17]:
dfdx.evaluate(), dfdy.evaluate()

(24.0, 10.0)

In [18]:
d2fdxdx = dfdx.gradient(x)
d2fdxdy = dfdx.gradient(y)
d2fdydx = dfdy.gradient(x)
d2fdydy = dfdy.gradient(y)

In [19]:
[[d2fdxdx.evaluate(), d2fdxdy.evaluate()], [d2fdydx.evaluate(), d2fdydy.evaluate()]]

[[8.0, 6.0], [6.0, 0.0]]

In [20]:
class DualNumber(object):
    def __init__(self, value=0.0, eps=0.0):
        self.value = value
        self.eps = eps

    def __add__(self, b):
        return DualNumber(self.value + self.to_dual(b).value, self.eps + self.to_dual(b).eps)

    def __radd__(self, a):
        return self.to_dual(a).__add__(self)

    def __mul__(self, b):
        return DualNumber(self.value * self.to_dual(b).value,
                          self.eps * self.to_dual(b).value + self.value * self.to_dual(b).eps)

    def __rmul__(self, a):
        return self.to_dual(a).__mul__(self)

    def __str__(self):
        if self.eps:
            return "{:.1f} + {:.1f}ε".format(self.value, self.eps)
        else:
            return "{:.1f}".format(self.value)

    def __repr__(self):
        return str(self)

    @classmethod
    def to_dual(cls, n):
        if hasattr(n, "value"):
            return n
        else:
            return cls(n)

In [21]:
3 + DualNumber(3, 4)

6.0 + 4.0ε

In [22]:
DualNumber(3, 4) * DualNumber(5, 7)

15.0 + 41.0ε

In [23]:
x.value = DualNumber(3.0)
y.value = DualNumber(4.0)

f.evaluate()

42.0

In [24]:
x.value = DualNumber(3.0, 1.0)
y.value = DualNumber(4.0)

dfdx = f.evaluate().eps

x.value = DualNumber(3.0)
y.value = DualNumber(4.0, 1.0)

dfdy = f.evaluate().eps

In [25]:
dfdx

24.0

In [26]:
dfdy

10.0

In [27]:
class Const(object):
    def __init__(self, value):
        self.value = value

    def evaluate(self):
        return self.value

    def backpropagate(self, gradient):
        pass

    def __str__(self):
        return str(self.value)


class Var(object):
    def __init__(self, name, init_value=0):
        self.value = init_value
        self.name = name
        self.gradient = 0

    def evaluate(self):
        return self.value

    def backpropagate(self, gradient):
        self.gradient += gradient

    def __str__(self):
        return self.name


class BinaryOperator(object):
    def __init__(self, a, b):
        self.a = a
        self.b = b


class Add(BinaryOperator):
    def evaluate(self):
        self.value = self.a.evaluate() + self.b.evaluate()
        return self.value

    def backpropagate(self, gradient):
        self.a.backpropagate(gradient)
        self.b.backpropagate(gradient)

    def __str__(self):
        return "{} + {}".format(self.a, self.b)


class Mul(BinaryOperator):
    def evaluate(self):
        self.value = self.a.evaluate() * self.b.evaluate()
        return self.value

    def backpropagate(self, gradient):
        self.a.backpropagate(gradient * self.b.value)
        self.b.backpropagate(gradient * self.a.value)

    def __str__(self):
        return "({}) * ({})".format(self.a, self.b)

In [28]:
x = Var("x", init_value=3)
y = Var("y", init_value=4)
f = Add(Mul(Mul(x, x), y), Add(y, Const(2)))

result = f.evaluate()
f.backpropagate(1.0)

In [29]:
print(f)

((x) * (x)) * (y) + y + 2


In [30]:
result

42

In [31]:
x.gradient

24.0

In [32]:
y.gradient

10.0

In [33]:
import tensorflow as tf

x = tf.Variable(3.)
y = tf.Variable(4.)

with tf.GradientTape() as tape:
    f = x * x * y + y + 2

jacobians = tape.gradient(f, [x, y])
jacobians

[<tf.Tensor: shape=(), dtype=float32, numpy=24.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [34]:
x = tf.Variable(3.)
y = tf.Variable(4.)

with tf.GradientTape(persistent=True) as tape:
    f = x * x * y + y + 2
    df_dx, df_dy = tape.gradient(f, [x, y])

d2f_d2x, d2f_dydx = tape.gradient(df_dx, [x, y])
d2f_dxdy, d2f_d2y = tape.gradient(df_dy, [x, y])
del tape

hessians = [[d2f_d2x, d2f_dydx], [d2f_dxdy, d2f_d2y]]
hessians



[[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=6.0>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, None]]