使用sigmoid函数作为激励函数,sigmoid函数的特点是S型

In [27]:
import numpy as np
def tanh(x):
    return np.tanh(x)
def tanh_deriv(x):
    return 1.0 - np.tanh(x)*np.tanh(x)
def logistic(x):
    return 1/(1+np.exp(-x))
def logistic_derivative(x):
    return logistic(x)*(1-logistic(x))

In [28]:
class NeuralNetwork:
    def __init__(self,layers,activation = 'tanh'):
        if activation == 'tanh':
            self.activation = tanh
            self.activation_deriv = tanh_deriv
        elif activation == 'logistic':
            self.activation = logistic
            self.activation_deriv = logistic_derivative
        
        self.weights = []
        """这里形成的最下面一行和最右边的一列其实是为bias准备的"""
        #以XOR为例
        #     * * *    *
        #     * * *    *
        #     % % %    %       (参数的形式)
        
        #     ￥ ￥ ￥         (输入值的形式，最后一个值是新加的，为1)
        #     ￥%+￥%+￥%      完成了Ij = ∑WijOi + θj
        #      并且最后一次的  ￥%+￥%+￥% 生成了新的￥值
        for i in range(1,len(layers)-1):
            self.weights.append((2*np.random.random((layers[i-1]+1,layers[i]+1))-1)*0.25)
            self.weights.append((2*np.random.random((layers[i]+1,layers[i+1]))-1)*0.25)
    
    def fit(self,X,y,learning_rate = 0.2,epochs = 10000):
        """X在下面的操作后会变成np.array以及在最后增加了一列"""
        X = np.atleast_2d(X)
        temp = np.ones([X.shape[0],X.shape[1]+1])
        temp[:,0:-1] = X
        X = temp
        y = np.array(y)
        for k in range(epochs):
            i = np.random.randint(X.shape[0])
            a = [X[i]]
            """a最初是随机选取的一行数据，把当做输入值，但是要使用激励函数"""
            """把每一层的计算结果都加载列表a的最后面，每一层的计算都要使用前一层的结果"""
            for L in range(len(self.weights)):
                a.append(self.activation(np.dot(a[L],self.weights[L])))
            """error是预测与标签的偏差"""
            error = y[i]-a[-1]
            """第一次是计算输出层的Err"""
            """输出层的公式：Errj = Oj(1-Oj)(Tj-Oj)"""
            """这里的error是(Tj-Oj),而self.activation是Oj(1-Oj)"""
            deltas = [error * self.activation_deriv(a[-1])] #For output layer, Err calculation (delta is updated error)
            
            #Staring backprobagation
            """隐藏层的公式:Errj = Oj(1-Oj)∑ErrK*weights(JK)"""
            for l in range(len(a) - 2, 0, -1): # we need to begin at the second to last layer 
                #Compute the updated error (i,e, deltas) for each node going from top layer to input layer 
                deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_deriv(a[l]))  
            deltas.reverse()#反转操作是必要的
            """正向对权重以及偏差进行更新"""
            for i in range(len(self.weights)):
                layer = np.atleast_2d(a[i])  
                delta = np.atleast_2d(deltas[i])  
                self.weights[i] += learning_rate * layer.T.dot(delta)
    def predict(self, x):         
        x = np.array(x)
        temp = np.ones(x.shape[0]+1)
        temp[0:-1] = x
        a = temp
        for l in range(0, len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))
        return a

In [29]:
nn = NeuralNetwork([2,2,1], 'tanh') 

# 开始对神经网络进行训练

XOR

In [30]:
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([0,1,1,0])
nn.fit(X,y)

In [31]:
pre = []
for i in [[0, 0], [0, 1], [1, 0], [1,1]]:    
    a = nn.predict(i)
    if a<0.5:
        a=0
    else:
        a=1
    print(i,a)

[0, 0] 0
[0, 1] 1
[1, 0] 1
[1, 1] 0


手写数字识别

In [32]:
from sklearn.datasets import load_digits 
from sklearn.metrics import confusion_matrix, classification_report 
from sklearn.preprocessing import LabelBinarizer #对数据进行转化例如 2-> 00000010
from sklearn.cross_validation import train_test_split


In [33]:
digits = load_digits()
X = digits.data
y = digits.target
X -= X.min()
X /= X.max()

In [44]:
nn = NeuralNetwork([64,100,10],'logistic')
X_train,X_test,y_train,y_test = train_test_split(X,y)
labels_train = LabelBinarizer().fit_transform(y_train)
labels_test = LabelBinarizer().fit_transform(y_test)
print("start fitting")

start fitting


In [45]:
nn.fit(X_train,labels_train,3000)
predictions = []
for i in range(X_test.shape[0]):
    o = nn.predict(X_test[i])
    predictions.append(np.argmax(o))
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

  import sys


[[53  0  0  0  0  0  0  0  0  0]
 [11 33  0  0  4  0  0  0  0  1]
 [ 1  1 32  0  0  0  0  1  0  0]
 [14  0  0 26  0  0  0  2  1  2]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 0  0  0  0  0 48  0  0  0  0]
 [ 0  2  0  0  1  0 34  0  0  0]
 [ 1  0  0  0  0  0  0 40  0  1]
 [ 6  3  0  0  5  2  0  0 29  0]
 [ 1  0  0  0  0  0  0  3  1 43]]
             precision    recall  f1-score   support

          0       0.61      1.00      0.76        53
          1       0.85      0.67      0.75        49
          2       1.00      0.91      0.96        35
          3       1.00      0.58      0.73        45
          4       0.83      1.00      0.91        48
          5       0.96      1.00      0.98        48
          6       1.00      0.92      0.96        37
          7       0.87      0.95      0.91        42
          8       0.94      0.64      0.76        45
          9       0.91      0.90      0.91        48

avg / total       0.89      0.86      0.86       450

