In [2]:
import numpy as np
import pickle
import os
from dataset.mnist import load_mnist
import matplotlib.pylab as plt

In [5]:
def identity_funtion(x):
    return x


def step_function(x):
    return np.array(x > 0, dtype=np.int)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    x = x - np.max(x)
    return np.exp(x) / np.sum(np.exp(x))

### Prediction

In [15]:
def get_data():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
    return x_test, t_test


def init_network():
    with open("sample_weight.pkl", 'rb') as f:
        network = pickle.load(f)
    return network


def predict(network, x):
    w1, w2, w3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = np.dot(x, w1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, w2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, w3) + b3
    y = softmax(a3)

    return y


### Batch

In [7]:
a = np.array([
    [0.1, 0.8, 0.1],
    [0.3, 0.1, 0.8],
    [0.2, 0.5, 0.3],
    [0.8, 0.1, 0.1]
])

print (np.argmax(a, axis=1))

[1 2 1 0]


In [9]:
a = np.array([0, 2, 2, 0, 1])
b = np.array([1, 2, 0, 0, 1])

print (a==b)
print (np.sum(a==b))

[False  True False  True  True]
3


In [10]:
x, t = get_data()
network = init_network()

In [11]:
print (x.shape)

(10000, 784)


In [12]:
print (t.shape)

(10000,)


In [14]:
batch_size = 200
accuracy_cnt = 0

for i in range(0, len(x), batch_size):
    x_batch = x[i:i+batch_size]
    y_batch = predict(network, x_batch)
    p = np.argmax(y_batch, axis=1)
    accuracy_cnt += np.sum(p == t[i:i+batch_size])
    
print("AccuracY:" + str(float(accuracy_cnt) / len(x)))

AccuracY:0.9352


## 모델 학습

### 평균 제곱 오차(Mean Squared Error, MSE)

In [17]:
def mean_squared_error(y, t):
    return 0.5 * np.sum((y-t)**2)

In [18]:
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
t = np.array([0, 0, 1 ,0 ,0, 0, 0, 0, 0, 0])

mean_squared_error(y, t)

0.09750000000000003

### 크로스 엔트로피 오차 (Cross Entropy Error, CEE)

In [27]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t * np.log(y + delta))

In [28]:
cross_entropy_error(y, t)

0.510825457099338

### 배치 버전

In [29]:
def cross_entropy_error(y, t):
    if y.ndim ==1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    delta = 1e-7
    return -np.sum(t * np.log(y + delta)) / batch_size

In [30]:
cross_entropy_error(y, t)

0.510825457099338

## (수치) 미분

## 우리가 알고 있는 방식

In [31]:
def numerical_diff(f, x):
    h = 10e-50
    return (f(x + h) - f(x)) / h

## 적당히 작은 h, 중심 차분 적용

In [32]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h) / (2*h))

In [36]:
def functin_1(x):
    return 0.01 * x **2+ 0.1*x

def tangent_line(f, x):
    d = numerical_diff(f, x)
    print (d)
    y = f(x) - d*x
    return labmda t*d + y

''' 
'''

SyntaxError: invalid syntax (<ipython-input-36-63e3b06ffb40>, line 8)

### 편미분

In [37]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        
        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        x[idx] = tmp_val - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val

    return grad

In [39]:
def function_2(x):
    if x.ndim == 1:
        return np.sum(x**2)
    else:
        return np.sum(x**2, axis=1) # f = x_1^2 + x_2^2 + ... + x_n^2

In [40]:
numerical_gradient(function_2, np.array([3.0, 4.0]))

array([6., 8.])