In [1]:
import numpy as np
import pandas as pd
import math
import random
from csv import reader
data = []
label_dict = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
with open("./iris.txt", 'r') as file:
    csv_reader = reader(file, delimiter=',')
    for row in csv_reader:
        row[0:4] = list(map(float, row[0:4]))
        row[4] = label_dict[row[4]]
        data.append(row)
data

[[5.1, 3.5, 1.4, 0.2, 0],
 [4.9, 3.0, 1.4, 0.2, 0],
 [4.7, 3.2, 1.3, 0.2, 0],
 [4.6, 3.1, 1.5, 0.2, 0],
 [5.0, 3.6, 1.4, 0.2, 0],
 [5.4, 3.9, 1.7, 0.4, 0],
 [4.6, 3.4, 1.4, 0.3, 0],
 [5.0, 3.4, 1.5, 0.2, 0],
 [4.4, 2.9, 1.4, 0.2, 0],
 [4.9, 3.1, 1.5, 0.1, 0],
 [5.4, 3.7, 1.5, 0.2, 0],
 [4.8, 3.4, 1.6, 0.2, 0],
 [4.8, 3.0, 1.4, 0.1, 0],
 [4.3, 3.0, 1.1, 0.1, 0],
 [5.8, 4.0, 1.2, 0.2, 0],
 [5.7, 4.4, 1.5, 0.4, 0],
 [5.4, 3.9, 1.3, 0.4, 0],
 [5.1, 3.5, 1.4, 0.3, 0],
 [5.7, 3.8, 1.7, 0.3, 0],
 [5.1, 3.8, 1.5, 0.3, 0],
 [5.4, 3.4, 1.7, 0.2, 0],
 [5.1, 3.7, 1.5, 0.4, 0],
 [4.6, 3.6, 1.0, 0.2, 0],
 [5.1, 3.3, 1.7, 0.5, 0],
 [4.8, 3.4, 1.9, 0.2, 0],
 [5.0, 3.0, 1.6, 0.2, 0],
 [5.0, 3.4, 1.6, 0.4, 0],
 [5.2, 3.5, 1.5, 0.2, 0],
 [5.2, 3.4, 1.4, 0.2, 0],
 [4.7, 3.2, 1.6, 0.2, 0],
 [4.8, 3.1, 1.6, 0.2, 0],
 [5.4, 3.4, 1.5, 0.4, 0],
 [5.2, 4.1, 1.5, 0.1, 0],
 [5.5, 4.2, 1.4, 0.2, 0],
 [4.9, 3.1, 1.5, 0.1, 0],
 [5.0, 3.2, 1.2, 0.2, 0],
 [5.5, 3.5, 1.3, 0.2, 0],
 [4.9, 3.1, 1.5, 0.1, 0],
 [4.4, 3.0, 

In [2]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))
def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [3]:
class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) 
                        for x, y in zip(sizes[:-1], sizes[1:])]
        
    def cost_derivative(self, output_activations, y):
        return (output_activations-y)
    #损失函数对输出层激活函数的值的导数
    #即为上述的用激活值减去实际值
    
    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
             a = sigmoid(np.dot(w, a)+b)#
        return a
    
    def evaluate(self, test_data):
        #返回神经网络对测试集样本分类正确的数量
        #test_result存储的是有神经网络预测出的类别与实际的类别
        test_results = [(np.argmax(self.feedforward(x)), y)
                 for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)
    
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]#梯度矩阵的初始化
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # 存激活值a的列表
        zs = [] # 存权值输入z的列表
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y)*sigmoid_prime(zs[-1]) 
        #activations中的最后一层，即输出层的值-实际值 损失函数对输出层激活函数的值的导数
        #*输出层激活值对于权值输入的导数
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
  
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)#损失函数C_x的梯度
    
    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]#梯度矩阵的初始化
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # 存激活值a的列表
        zs = [] # 存权值输入z的列表
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y)*sigmoid_prime(zs[-1]) 
        #activations中的最后一层，即输出层的值-实际值 损失函数对输出层激活函数的值的导数
        #*输出层激活值对于权值输入的导数
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
  
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)#损失函数C_x的梯度
    
    def update_mini_batch(self, mini_batch, eta):
        #进行更新 mini_batch是(x,y)的一个列表 eta学习率
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        ######################################################
        for x,y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]
        
    def SGD(self, training_data, epochs, mini_batch_size, eta,test_data=None):
          #使用随机梯度下降法训练神经网络
        
        n_test = len(test_data)#测试数据test_data的长度
        n = len(training_data)#训练数据的长度
        for j in range(epochs):#学习次数
            random.shuffle(training_data)#序列的所有元素随机排序
            mini_batches = [training_data[k:k+mini_batch_size]for k in range(0, n, mini_batch_size)]#按块取步长训练数据进行参数更新
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))#测试正确的次数/测试次数         
            else:
                print("Epoch {0} complete".format(j))

In [4]:
#参数设置
eta = 3.0 # 学习率
epochs = 30 # 迭代训练的次数
n_hidden = 10 # 隐藏层神经元个数
n_train_data = 130 # 训练集的大小（总共150条数据，训练集130条，验证集20条）

In [5]:
training_data = data[0:n_train_data]#训练集
test_data = data[n_train_data:]#验证集


In [2]:
sizes=[4,7,3]
net=Network(sizes)
biases = [np.random.randn(y, 1) for y in sizes[1:]]
weights = [np.random.randn(y, x) 
        for x, y in zip(sizes[:-1], sizes[1:])]
weights
#biases

NameError: name 'Network' is not defined

In [3]:
net.SGD(training_data,epochs,10,eta,test_data=test_data)

NameError: name 'net' is not defined