In [4]:
import numpy as np
from sklearn.datasets import fetch_openml

#点乘，速度比numpy库的快
from sklearn.utils.extmath import safe_sparse_dot 

# Python 实现神经网络完成手写数字识别任务
# 激活函数是relu，输出层是softmax分类

In [13]:
# 将目标数字0-9转成One-Hot编码
def tran_y(y_true):
    y_ohe = np.zeros(10)
    y_ohe[int(y_true)] = 1
    return y_ohe

mnist = fetch_openml('mnist_784')

X,y = mnist["data"],mnist["target"]

y = np.array([tran_y(i) for i in y])

[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [14]:
# 两个隐藏层，第一层300个节点，第二层100
hidden_layer_sizes = [300,100]
# 最大迭代次数
max_iter = 200
# 正则项系数
alpha = 0.0001
learning_rate = 0.001

In [15]:
def log_loss(y_true, y_prob):
    
    y_prob = np.clip(y_prob, 1e-10, 1-1e-10)
    if y_prob.shape[1]==1:
        y_prob = np.append(1-y_prob, y_prob, axis=1)

    if y_true.shape[1]==1:
        y_true = np.append(1-y_true, y_true, axis=1)
    return -np.sum(y_true * np.log(y_prob)) / y_prob.shape[0]

In [16]:
def softmax(x):
    tmp = x - x.max(axis=1)[:, np.newaxis]
    np.exp(tmp, out=x)
    x /= x.sum(axis=1)[:, np.newaxis]

    return x

In [17]:
def relu(x):
    # RELU(x) = max(0,x)
    np.clip(x,0,np.finfo(x.dtype).max,out=x)
    return x

In [23]:
def relu_derivative(z,delta):
    # 实现relu的导数
    delta[z==0] = 0

1
[1, 0]


In [25]:
def gen_batches(n, bs):
    """
    产生一个样本批次的样本数据的索引
    :param n：样本总数
    :param bs：批次大小
    :return：批次的索引
    """
    start = 0
    for _ in range(int(n // bs)):
        end = start + bs
        yield slice(start,end)
        start = end
    if start < n:
        yield slice(start, n)


In [27]:
n_samples, n_features = X.shape
n_outputs = y.shape[1]

batch_size = min(200, n_samples)
layer_units = ([n_features] + hidden_layer_sizes + [n_outputs])
n_layers = len(layer_units)

# 初始化w和b
coefs_ = [] # weights
intercepts_ = [] #bias

for i in range(n_layers-1):
    fan_in = layer_units[i]
    fan_out = layer_units[i+1]
    factor = 6.
    init_bound = np.sqrt(factor / (fan_in + fan_out))
    coef_init = np.random.uniform(-init_bound, init_bound, (fan_in, fan_out))
    intercept_init = np.random.uniform(-init_bound, init_bound, fan_out)
    coefs_.append(coef_init)
    intercepts_.append(intercept_init)
