In [30]:
import numpy as np

In [58]:
# 기울기

def numerical_gradient_1d(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]

        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        
    return grad

def numerical_gradient_2d(f, x):
    if x.ndim == 1:
        return numerical_gradient_1d(f, x)
    else:
        grad = np.zeros_like(x)

        for idx, a in enumerate(x):
            grad[idx] = numerical_gradient_1d(f, a)

    return grad

In [51]:
# 경사 하강법 구현

def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x

    for i in range(step_num):
        grad = numerical_gradient_1d(f, x)
        x -= lr * grad
    return x

In [52]:
# 경사법으로 f(x0, x1) = x0^2 + x1^2 의 최솟값을 구하기

def function_2(x):
    return x[0]**2 + x[1]**2

init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)

array([-6.11110793e-10,  8.14814391e-10])

In [53]:
# 학습률이 너무 큰 예 lr = 10.0
init_x = np.array([-3.0, 4.0])
print(gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100))

# 학습률이 너무 작은 예 lr = 1e-10
init_x = np.array([-3.0, 4.0])
print(gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100))

[-2.58983747e+13 -1.29524862e+12]
[-2.99999994  3.99999992]


In [54]:
# 기울기를 구하는 코드

# 소프트맥스 함수
def softmax(x):
    c = np.max(x)
    exp_x = np.exp(x - c) # 오버플로 대책
    sum_exp_x = np.sum(exp_x)
    y = exp_x / sum_exp_x

    return y

# CEE (Cross Entropy Error)
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

class simpleNet:
    def __init__(self):
        self.W = np.random.rand(2, 3) # 정규분포로 초기화
    
    def predict(self, x):
        return np.dot(x, self.W)

    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)

        return loss

In [60]:
net = simpleNet()
print(net.W) # 가중치

print('\n')

x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
print(np.argmax(p)) # 최댓값의 인덱스

print('\n')

t = np.array([0, 0, 1]) # 정답 레이블
print(net.loss(x, t))

# 기울기 구하기
# def f(W):
#     return net.loss(x, t)

f = lambda W: net.loss(x, t)

dW = numerical_gradient_2d(f, net.W)
print(dW)

[[0.63505839 0.17986436 0.89338109]
 [0.78663061 0.43114324 0.66209967]]


[1.08900258 0.49594753 1.13191836]
2


0.9112431005523967
[[ 0.23108146  0.12770402 -0.35878548]
 [ 0.34662219  0.19155603 -0.53817821]]
