# 神经网络解决多分类问题

In [5]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from  scipy.optimize import minimize

In [29]:
data=sio.loadmat('ex4data1.mat')
raw_X=data['X']
raw_y=data['y']
raw_y.shape

(5000, 1)

In [31]:
X=np.insert(raw_X,0,values=1,axis=1)
X.shape


(5000, 401)

#### 1. 对y进行one-hot编码处理

In [33]:
def one_hot_encoder(raw_y):
    result=[]
    for i in raw_y:#raw_y里面放的是对应的数字
        y_temp = np.zeros(10)
        y_temp[i-1]=1
        result.append(y_temp)
    return np.array(result)

In [35]:
y=one_hot_encoder(raw_y)
y.shape

(5000, 10)

In [45]:
theta = sio.loadmat('ex4weights.mat')
theta.keys()
theta1,theta2=theta['Theta1'],theta['Theta2']
theta1.shape,theta2.shape

((25, 401), (10, 26))

### 2.序列化权重参数

In [50]:
def serialize(a,b):
    return np.append(a.flatten(),b.flatten())

In [52]:
theta_serialize=serialize(theta1,theta2)

In [54]:
theta_serialize.shape

(10285,)

### 3. 解序列化权重参数

In [57]:
def deserialize(theta_serialize):
    theta1=theta_serialize[:25*401].reshape(25,401)
    theta2=theta_serialize[25*401:].reshape(10,26)
    return theta1,theta2

In [84]:
theta1,theta2=deserialize(theta_serialize)

theta1.shape

(25, 401)

### 4. 前向传播

In [62]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [76]:
def feed_forward(theta_serialize,X):
    theta1,theta2=deserialize(theta_serialize)
    a1=X
    z2=a1@theta1.T
    a2=sigmoid(z2)
    a2=np.insert(a2,0,values=1,axis=1)
    z3=a2@theta2.T
    h=sigmoid(z3)
    return a1,z2,a2,z3,h

In [66]:
### 5. 不带正则化loss

In [117]:
def cost(theta_serialize,X,y):
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    h = np.clip(h, 1e-15, 1 - 1e-15)  # 关键修复：数值裁剪
    J=-np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(X)
    return J

In [119]:
cost(theta_serialize,X,y)

0.2876291651613189

### 5-2 带正则化

In [89]:
def reg_cost(theta_serialize,X,y,lam):
    sum1=np.sum(np.power(theta1[:,1:],2))
    sum2=np.sum(np.power(theta2[:,1:],2))
    reg = (sum1+sum2)*lam/(2*len(X))
    return reg+cost(theta_serialize,X,y)

In [91]:
lam=0.1
reg_cost(theta_serialize,X,y,lam)

0.2972432345542794

In [93]:
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

In [95]:
def gradient(theta_serialize,X,y):
    theta1,theta2 = deserialize(theta_serialize)
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    d3=h-y
    d2=d3@theta2[:,1:]*sigmoid_gradient(z2)
    D2=(d3.T@a2)/len(X)
    D1=(d2.T@a1)/len(X)
    return serialize(D1,D2)



### 6-1 带正则化的梯度

In [139]:
def reg_gradient(theta_serialize,X,y,lam):
    D=gradient(theta_serialize,X,y)
    D1,D2=deserialize(D)
    D1[:,1:]=D1[:,1:]+(lam/len(X))*theta1[:,1:]
    D2[:,1:]=D2[:,1:]+(lam/len(X))*theta2[:,1:]
    return serialize(D1,D2)

### 优化

In [143]:
from scipy.optimize import minimize
def nn_training(X,y,lamb):
    init_theta=np.random.uniform(-0.5,0.5,10285)
    res=minimize(fun=reg_cost,
                 x0=init_theta,
                 args=(X,y,lamb),
                 method='TNC',
                 jac=reg_gradient,
                 options = {'maxfun':300})#迭代次数
    return res
    
#res 对象的关键属性：

#res.x：优化后的参数向量（即训练好的权重）。

#res.fun：最终损失值。

#res.success：是否优化成功（布尔值）。

#res.message：优化状态的描述信息。

In [145]:
lamb=10
res=nn_training(X,y,lamb)


In [146]:
raw_y=data['y'].reshape(5000,)
_,_,_,_,h=feed_forward(res.x,X)
y_pred=np.argmax(h,axis=1)+1#索引加一
acc=np.mean(y_pred==raw_y)
acc

0.8556