In [1]:
import numpy as np

In [2]:
# x0와 x1의 편미분을 동시에 구하고싶다면?
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)  # x와 형상이 같은 배열을 생성
    
    for idx in range(x.size):
        tmp_val = x[idx]
        
        # f(x+h) 계산
        x[idx] = tmp_val + h 
        fxh1 = f(x)
        
        # f(x-h) 계산
        x[idx] = tmp_val - h 
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        
    return grad

In [3]:
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

In [4]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr*grad
        
    return x

In [5]:
# 문제 : 경사법으로 f(x0,x1) = x0**2 + x1**2의 최솟값을 구하라
def function_2(x):
    return x[0]**2 + x[1]**2

init_x = np.array([-3.0, 4.0])

gradient_descent(function_2, init_x=init_x)

array([-0.39785867,  0.53047822])

### 신경망에서의 기울기

In [6]:
import sys, os
import numpy as np

In [7]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2,3)  # 정규분포로 초기화
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        
        return loss

In [8]:
net = simpleNet()
net.W

array([[-0.00375559,  1.1677413 ,  0.67157966],
       [-0.14346331,  0.74801055, -0.40680585]])

In [9]:
x = np.array([0.6,0.9])
p = net.predict(x)
p

array([-0.13137033,  1.37385428,  0.03682253])

In [11]:
np.argmax(p)

1

In [12]:
t = np.array([0, 0, 1])
net.loss(x, t)

1.7321708203427038

In [14]:
def f(W):
    return net.loss(x, t)

In [15]:
dW = numerical_gradient(f, net.W)
dW

IndexError: index 2 is out of bounds for axis 0 with size 2