In [1]:
import numpy as np
from scipy.optimize import minimize
from scipy import stats

In [2]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [3]:
def sigmoidDerivative(a):
    """
    sigmoid求导
    """
    return np.multiply(a, (1 - a))

In [4]:
def initTheta(len_in, len_out, epsilon):
    """初始化一次传递的权重（m,n）
    """
    return np.random.rand(len_in, len_out) * 2 * epsilon - epsilon

def initThetas(hiddenNum, unitNum, inputSize, classNum, epsilon):
    """初始化权值矩阵
    
    Args:
        hiddenNum 隐藏层数
        unitNum 每个隐藏层，神经元数目
        inputSize 输入层规模
        classNum 分类数目
        epsilon 
    Returns:
        thetas 权值矩阵
    
    """
    hiddens = [unitNum for i in range(hiddenNum)]
    units = [inputSize] + hiddens + [classNum]
    thetas = []
    for idx, unit in enumerate(units):
        if idx == len(units) - 1:
            break
        nextUnit = units[idx + 1]
        theta = initTheta(nextUnit, unit + 1, epsilon)
        thetas.append(theta)
    return thetas

In [6]:
def computeCost(Thetas, y, theLambda, X=None, a=None):
    """代价计算
    
    Args:
        Thetas 权值矩阵序列
        X 样本
        y 标签集
        a 各层激活值
    Return:
        J 预测代价
    """
    m = y.shape[0]
    if a is None:
        a = fp(Thetas, X)
    loss = -np.sum(y.T @ np.log(a[-1]) + (1 - y).T @ np.log(1 - a[-1]))
    # 参数正则化-偏置不参与正则化
    reg = -np.sum([np.sum(Theta[:, 1:]) for Theta in Thetas])
    return (1.0 / m) * loss + (1.0 / (2 * m)) * theLambda * reg

In [5]:
def fp(Thetas, X):
    """前向反馈过程
    
    Args:
        Thetas 权值矩阵
        X 输入样本
    Returns:
        a 各层激活向量
    """
    layers = range(len(Thetas) + 1)
    layerNum = len(layers)
    # 初始化激活向量序列
    a  = range(layerNum)
    # 前向传播计算各层输出
    for i in layers:
        if i == 0:
            a[i] = X.T
        else:
            z = Thetas[i - 1] @ a[i - 1]
            a[i] = sigmoid(z)
        # 除输出层外，需要添加偏置
        if i != layerNum - 1:
            a[i] = np.concatenate((np.ones((1, a[i].shape[1])), a[i]))
    return a

In [8]:
def bp(Thetas, a, y, theLambda):
    """反向传播过程
    
    Args:
        a 激活值
        y 标签
    Returns:
        D 权值梯度
    """
    m = y.shape[0]
    layers = range(len(Thetas) + 1)
    layerNum = len(layers)
    d = range(len(layers))
    delta = [np.zeros(Theta.shape) for Theta in Thetas]
    for i in layers[::-1]:
        if i == 0:
            # 输入层不计算误差
            break
        if i == layerNum - 1:
            # 输出层误差
            d[i] = a[i] - y.T
        else:
            # 偏置不参与误差计算
            d[i] = np.multiply((Thetas[i][:,1:]).T * d(i + 1), sigmoidDerivative(a[i][1:,:]))
    for i in layers[0:layerNum - 1]:
        delta[i] = d[i + 1] @ (a[i].T)
    D = [np.zeros(Theta.shape) for Theta in Thetas]
    for i in range(len(Thetas)):
        Theta = Thetas[i]
        # 偏置更新
        D[i][:, 0] = (1.0 / m) * (delta[i][0:, 0].reshape(1, -1))
        # 权值更新
        D[i][:, 1:] = (1.0 / m) * (delta[i][0:,1:] + theLambda * Theta[:,1:])
        
    return D