---

### 感知器公式

![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/1.png)

<font size=2> 
    b 是被称为偏置的参数,用于控制神经元被激活的容易程度; 而 w1 和 w2是表示各个信号的权重的参数,用于控制各个信号的重要性。</font>
<br><br><br>

### ReLU激活函数

In [1]:
def relu(x):
    return np.maximum(0, x)  # 不用 np.max

## 3层神经网络的实现

![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/ch03_1.png)
![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/ch03_2.png)
![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/ch03_3.png)
![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/ch03_4.png)
![](https://raw.githubusercontent.com/massquantity/DL_from_scratch_NOTE/master/pic/ch03_5.png)

In [None]:
import numpy as np

In [None]:
def init_network():
    network = dict()
    network['W1'] = np.random.randn(2, 3)
    network['b1'] = np.random.randn(3)
    network['W2'] = np.random.randn(2, 3)
    network['b2'] = np.random.randn(2)
    network['W3'] = np.random.randn(2, 2)
    network['b3'] = np.random.randn(2)
    return network

def forward(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    
    z1 = np.dot(x, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    z3 = np.dot(a2, W3) + b3
    y = softmax(z3)
    return y

### 实现 softmax 函数时的注意事项
<br>

<font size=2> 
    一般的 `softmax` 实现在计算机的运算上有一定的缺陷。这个缺陷就是溢出问题。`softmax` 函数的实现中要进行指
数函数的运算,但是此时指数函数的值很容易变得非常大。比如,$e^{10}$ 的值
会超过 20000,  $e^{100}$ 会变成一个后面有 40 多个 0 的超大值,$e^{1000}$ 的结果会返回
一个表示无穷大的 `inf`。如果在这些超大值之间进行除法运算,结果会出现“不确定”的情况，所以通常减去最大值 
</font>

In [3]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    y = exp_a / np.sum(exp_a)
    return y

### show image

In [11]:
import sys, os
sys.path.append(os.pardir)
sys.path

['',
 '/usr/lib/spark/spark-2.3.2-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip',
 '/usr/lib/spark/spark-2.3.2-bin-hadoop2.7/python',
 '/home/massquantity/Workspace/DL_from_scratch/NOTE',
 '/home/massquantity/.conda/envs/py35/lib/python35.zip',
 '/home/massquantity/.conda/envs/py35/lib/python3.5',
 '/home/massquantity/.conda/envs/py35/lib/python3.5/plat-linux',
 '/home/massquantity/.conda/envs/py35/lib/python3.5/lib-dynload',
 '/home/massquantity/.conda/envs/py35/lib/python3.5/site-packages',
 '/home/massquantity/.conda/envs/py35/lib/python3.5/site-packages/python_recsys-0.2-py3.5.egg',
 '/home/massquantity/.conda/envs/py35/lib/python3.5/site-packages/IPython/extensions',
 '/home/massquantity/.ipython',
 '..']

In [12]:
sys.path.append("/home/massquantity/Workspace/DL_from_scratch/dataset")

In [14]:
from dataset.mnist import load_mnist
from PIL import Image

In [15]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)

In [16]:
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

In [18]:
img = x_train[0].reshape(28, 28) # 将 flatten 后的数据 reshape 为 28*28
label = t_train[0]
img_show(img)

### Cross Entropy

$$
\begin{align}
& E = -\sum\limits_k t_k \text{log}y_k \\
& E = -\frac{1}{N} \sum\limits_n \sum\limits_k t_{nk} \text{log} y_{nk}  \qquad \text{(batch version)}
\end{align}
$$

In [10]:
def cross_entropy_error(y, t):  # y, t 均为 one-hot 形式
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    return -np.sum(y * np.log(y + 1e-7)) / batch_size

In [11]:
def cross_entropy_error(y, t):  # t 为标签形式 e.g. t = [1,4,8,...], y 为 one-hot
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    if t.size == y.size:   # 若 y 和 t 均为 one-hot， t转为标签
        t = t.argmax(axis=1)
        
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size 
    # y[np.arange(batch_size), t]， batch 中每个样本对应的 t

### Gradient

In [12]:
def numerical_gradient_2d(f, X):
    grad = np.zeros_like(X)
    h = 1e-4
    for idx, x in enumerate(X):
        for idx_2 in range(x.size):
            temp = x[idx_2]
            x[idx_2] = temp + h
            fxh1 = f(x)
            
            x[idx_2] = temp - h
            fxh2 = f(x)
            grad[idx] = (fxh1 - fxh2) / (2*h)
            x[idx_2] = temp
    return grad

In [20]:
def _numerical_gradient_1d(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        
    return grad


def numerical_gradient_2d(f, X):
    if X.ndim == 1:
        return _numerical_gradient_1d(f, X)
    else:
        grad = np.zeros_like(X)
        
        for idx, x in enumerate(X):
            grad[idx] = _numerical_gradient_1d(f, x)
        
        return grad

In [21]:
def function_2(x):
    return x[0] ** 2 + x[1] ** 2

In [22]:
aa = np.array([[3.0, 4.0], [3.0, 4.0]])
aa

array([[3., 4.],
       [3., 4.]])

In [23]:
function_2(aa)

array([18., 32.])

In [24]:
numerical_gradient_2d(function_2, aa)

array([[6., 8.],
       [6., 8.]])