# 实现一层神经元数值的向前传播和梯度的反向传播算法

1. 输入参数：输入向量x，权重矩阵W，偏置项b 向量，激活函数f
2. 计算输出值：输出值y=f(Wx+b)
4. 计算梯度：
   下一层传回误差: R
   梯度dy/dW= x * (f'(Wx+b * R)) 
   dy/db= f'(Wx+b)* R
   向前传播误差: dy/dx= f'(Wx+b)* R * W 

   

5. 激活函数f 取sigmoid函数 f(x)=1/(1+exp(-x))
   sigmoid函数的导数:f'(x)=sigmoid(x)(1-sigmoid(x))

In [9]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))
class DenseLayer:
    def __init__(self, input_size, output_size,  activation=sigmoid, derivative=sigmoid_derivative):
        self.weights = np.random.randn(input_size, output_size)
        self.biases = np.zeros(output_size)
        self.output = None
        self.dinputs = None
        self.dweights = None
        self.dbiases = None
        self.activation = activation
        self.derivative = derivative


    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases
        return self.activation(self.output)

    def backward(self, dvalues):
        dvalues = self.derivative(self.output) * dvalues
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0)
        self.dinputs = np.dot(dvalues, self.weights.T)
        return self.dinputs
    
    def update(self,learning_rate,samples):
        self.weights -= learning_rate * self.dweights / samples
        self.biases -= learning_rate * self.dbiases / samples


class LinearNetwork:
    def __init__(self):
        self.layers = []
        self.learning_rate = 0.01
        
    def add_layer(self, layer):
        self.layers.append(layer)

    def forward(self, inputs):
        curr_input = inputs
        for layer in self.layers:
            curr_input =layer.forward(curr_input)
        return curr_input

    def backward(self, dvalues):
        n_samples, n_features = dvalues.shape
        curr_dvalues = dvalues
        for layer in reversed(self.layers):            
            curr_dvalues = layer.backward(curr_dvalues)

    def update(self,n_samples):
        for layer in self.layers:
            layer.update(self.learning_rate, n_samples)

    def train(self, inputs, targets):
        n_samples, n_features = inputs.shape
        output = self.forward(inputs)
        self.backward(output - targets)
        self.update(n_samples)
        loss = np.mean(np.square(output - targets))        
        return loss

    def fit(self, X, y, epochs=1000, batch_size=10):
        for epoch in range(epochs):
            for i in range(0, X.shape[0], batch_size):
                batch_X = X[i:i+batch_size]
                batch_y = y[i:i+batch_size]
                loss = self.train(batch_X, batch_y)
            if epoch % 10 == 0:
                print(f"Epoch {epoch} loss: {loss}")
    
    def predict(self, X):
        return self.forward(X)

In [2]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(categories='auto')
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_onehot = encoder.transform(y_test.reshape(-1, 1)).toarray()

In [3]:
X_train.shape,y_test_onehot.shape

((105, 4), (45, 3))

In [10]:
model = LinearNetwork()

model.add_layer(DenseLayer(input_size=4,output_size=20))
model.add_layer(DenseLayer(input_size=20,output_size=3))

model.fit(X_train,y_train_onehot,epochs=10000,batch_size=32)

y_pred_hot = model.predict(X_test)

y_pred = np.argmax(y_pred_hot,axis=1)


Epoch 0 loss: 0.5722104851959021
Epoch 10 loss: 0.5502415732366718
Epoch 20 loss: 0.5232030822436671
Epoch 30 loss: 0.49177305931163184
Epoch 40 loss: 0.4554368846275462
Epoch 50 loss: 0.4178365156666135
Epoch 60 loss: 0.3860394757453747
Epoch 70 loss: 0.36170607158049833
Epoch 80 loss: 0.342990302665137
Epoch 90 loss: 0.328334233160836
Epoch 100 loss: 0.31656404821220846
Epoch 110 loss: 0.30645626180634233
Epoch 120 loss: 0.2967608363154492
Epoch 130 loss: 0.28633040980114655
Epoch 140 loss: 0.2741883915161884
Epoch 150 loss: 0.2591630705854579
Epoch 160 loss: 0.23854803890734164
Epoch 170 loss: 0.2079925304908389
Epoch 180 loss: 0.18148139620635517
Epoch 190 loss: 0.17589594339500023
Epoch 200 loss: 0.17450815332317596
Epoch 210 loss: 0.17313914320829915
Epoch 220 loss: 0.17172223895598648
Epoch 230 loss: 0.17033268570051702
Epoch 240 loss: 0.1689889432007235
Epoch 250 loss: 0.16768829610812439
Epoch 260 loss: 0.16642418553509908
Epoch 270 loss: 0.1651908707135091
Epoch 280 loss: 0.1

In [11]:
from sklearn.metrics import accuracy_score
y_pred_hot = model.predict(X_test)
y_pred = np.argmax(y_pred_hot,axis=1)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0
