# 实现一层神经元数值的向前传播和梯度的反向传播算法

1. 输入参数：输入向量x，权重矩阵W，偏置项b 向量，激活函数f
2. 计算输出值：输出值y=f(Wx+b)
4. 计算梯度：
   下一层传回误差: R
   梯度dy/dW= x * (f'(Wx+b * R)) 
   dy/db= f'(Wx+b)* R
   向前传播误差: dy/dx= f'(Wx+b)* R * W 

   

5. 激活函数f 取sigmoid函数 f(x)=1/(1+exp(-x))
   sigmoid函数的导数:f'(x)=sigmoid(x)(1-sigmoid(x))

In [33]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))
class DenseLayer:
    def __init__(self, input_size, output_size,  activation=sigmoid, derivative=sigmoid_derivative):
        self.weights = np.random.randn(input_size, output_size)
        self.biases = np.zeros(output_size)
        self.output = None
        self.dinputs = None
        self.dweights = None
        self.dbiases = None
        self.activation = activation
        self.derivative = derivative


    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        samples = len(dvalues)
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0)
        self.dinputs = np.dot(dvalues, self.weights.T)
        return self.dinputs
    
    def update(self,learning_rate,samples):
        self.weights -= learning_rate * self.dweights / samples
        self.biases -= learning_rate * self.dbiases / samples


class LinearNetwork:
    def __init__(self,  activation=sigmoid, derivative=sigmoid_derivative):
        self.activation = activation
        self.derivative = derivative
        self.layers = []
        self.learning_rate = 0.01
        
    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        curr_input = inputs
        for layer in self.layers:
            layer.forward(curr_input)
            curr_input = layer.activation(layer.output)
        return curr_input

    def backward(self, dvalues):
        n_samples, n_features = dvalues.shape
        curr_dvalues = dvalues
        for layer in reversed(self.layers):
            curr_dvalues = layer.derivative(layer.output) * curr_dvalues
            curr_dvalues = layer.backward(curr_dvalues)

    def update(self,n_samples):
        for layer in self.layers:
            layer.update(self.learning_rate, n_samples)

    def train(self, inputs, targets):
        n_samples, n_features = inputs.shape
        output = self.forward(inputs)
        self.backward(output - targets)
        self.update(n_samples)
        loss = np.mean(np.square(output - targets))        
        return loss

    def fit(self, X, y, epochs=1000, batch_size=10):
        for epoch in range(epochs):
            for i in range(0, X.shape[0], batch_size):
                batch_X = X[i:i+batch_size]
                batch_y = y[i:i+batch_size]
                loss = self.train(batch_X, batch_y)
            if epoch % 100 == 0:
                print(f"Epoch {epoch} loss: {loss}")
    
    def predict(self, X):
        return self.forward(X)

In [34]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(categories='auto')
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_onehot = encoder.transform(y_test.reshape(-1, 1)).toarray()

In [9]:
X_train.shape,y_test_onehot.shape

((105, 4), (45, 3))

In [35]:
model = LinearNetwork()

model.add_layer(DenseLayer(input_size=4,output_size=20))
model.add_layer(DenseLayer(input_size=20,output_size=3))

model.fit(X_train,y_train_onehot,epochs=10000,batch_size=32)

y_pred_hot = model.predict(X_test)

y_pred = np.argmax(y_pred_hot,axis=1)


Epoch 0 loss: 0.5326681501727518
Epoch 100 loss: 0.2597462011984709
Epoch 200 loss: 0.19623773926570465
Epoch 300 loss: 0.17172748077485822
Epoch 400 loss: 0.1567052070800773
Epoch 500 loss: 0.14686236993718418
Epoch 600 loss: 0.13978974571300304
Epoch 700 loss: 0.13408096034022404
Epoch 800 loss: 0.1291628831700563
Epoch 900 loss: 0.12476031698621508
Epoch 1000 loss: 0.1207187946150598
Epoch 1100 loss: 0.11694243754687497
Epoch 1200 loss: 0.11336612981460917
Epoch 1300 loss: 0.10994272601053331
Epoch 1400 loss: 0.10663688063896151
Epoch 1500 loss: 0.10342154238931549
Epoch 1600 loss: 0.10027538198740617
Epoch 1700 loss: 0.09718036032635691
Epoch 1800 loss: 0.09411876678348631
Epoch 1900 loss: 0.09106843861099018
Epoch 2000 loss: 0.08799275975807024
Epoch 2100 loss: 0.08481558556811138
Epoch 2200 loss: 0.08135748358486322
Epoch 2300 loss: 0.07730016575700166
Epoch 2400 loss: 0.07298628152101345
Epoch 2500 loss: 0.0694864948567319
Epoch 2600 loss: 0.06666559526971871
Epoch 2700 loss: 0.

In [36]:
from sklearn.metrics import accuracy_score
y_pred_hot = model.predict(X_test)
y_pred = np.argmax(y_pred_hot,axis=1)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0
