# 2.3神经元（下）
本节是神经元的最后一部分，我们会补充一些上一节的神经元改进方案，同时介绍一些新的梯度下降迭代策略，相信细心的读者以及发现，在我们上一节的迭代策略中，我们不一定会找到全局最优点，而可能会陷入局部最优点，因此我们会介绍新的梯度迭代策略，来实现梯度下降策略。<br>
同时读者也会发现在神经网络中，矩阵运算是如此普遍，但是上一节的Tensor类只支持传入整型或浮点型，我们会重写Tensor类，让其支持对矩阵的计算和反向传播。<br>
本节还会实现一个简单的模型可视化模块，读者可以通过这个模块知道自己构造的模型结构。

首先，我们将Tensor转换乘numpy形式计算，来支持矩阵运算，其实矩阵求导和正常的求导没有本质区别，在上一节中我们设置了1*10个神经元作为输入层的下一层，然后输出层用了10*1的神经元，我们的计算结果实际上就是矩阵乘积的求导，可见在神经网络中，矩阵求导更多体现在对于复杂网络结构的简化表示，并没有变化原来的计算规则。若想了解详细的矩阵求导公式和推导过程，可参考这篇博客：https://zhuanlan.zhihu.com/p/273729929 <br>
为了方便我们使用，我们还添加了类方法(类似其他语言的静态方法)方便numpy和Tensor的互换

In [11]:
import numpy as np

def from_numpy(n):
    return 

class Tensor:
    def __init__(self, data, _prev=(), trainable=True):
        self.data = np.array(data)
        self.grad = np.zeros_like(self.data)
        self._backward = lambda: None
        self._prev = set(_prev)
        self.trainable = trainable

    @classmethod
    def from_numpy(cls, array, trainable=True):
        return cls(array, trainable=trainable)
    
    def to_numpy(self):
        return self.data

    def __add__(self, other):
        if isinstance(other, (int, float)):
            other = Tensor(other, trainable=False)
        out_data = self.data + other.data
        out = Tensor(out_data, (self, other))
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out
    
    def __radd__(self, other):
        return self.__add__(other)

    def __sub__(self, other):
        if isinstance(other, (int, float)):
            other = Tensor(other, trainable=False)
        out_data = self.data - other.data
        out = Tensor(out_data, (self, other))
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad -= 1.0 * out.grad
        out._backward = _backward
        return out

    def __rsub__(self, other):
        return (-self).__add__(other)
    
    def __neg__(self):
        out_data = -self.data
        neg_tensor = Tensor(out_data, (self,))
        def _backward():
            self.grad -= 1.0 * neg_tensor.grad  
        neg_tensor._backward = _backward
        return neg_tensor

    def __mul__(self, other):
        if isinstance(other, (int, float)):
            other = Tensor(other, trainable=False)
        out_data = np.dot(self.data, other.data)
        out = Tensor(out_data, (self, other))
        def _backward():
            self.grad += np.dot(other.data.T, out.grad)
            other.grad += np.dot(self.data.T, out.grad)
        out._backward = _backward
        return out
    
    def __truediv__(self, other):
        if isinstance(other, (int, float)):
            other = Tensor(other, trainable=False)
        out_data = np.true_divide(self.data, other.data)
        out = Tensor(out_data, (self, other))
        def _backward():
            self.grad += np.true_divide(1, other.data) * out.grad
            other.grad -= np.true_divide(self.data, np.square(other.data)) * out.grad
        out._backward = _backward
        return out
    
    def __pow__(self, power):
        out_data = np.power(self.data, power)
        out = Tensor(out_data, (self,))
        def _backward():
            self.grad += power * (np.power(self.data, power - 1)) * out.grad
        out._backward = _backward
        return out
    
    def tanh(self):
        x = self.data
        t = np.tanh(x)
        out = Tensor(t, (self, ))
        
        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        
        return out
    
    def sigmoid(self):
        x = self.data
        s = 1 / (1 + np.exp(-x))
        out = Tensor(s, (self,))

        def _backward():
            self.grad += (s * (1 - s)) * out.grad
        out._backward = _backward

        return out

    def relu(self):
        x = self.data
        r = np.maximum(0, x)
        out = Tensor(r, (self,))

        def _backward():
            self.grad += (x > 0) * out.grad
        out._backward = _backward

        return out
    
    def gradient_descent_opt(self, learning_rate=0.001, grad_zero=True):
        for v in self.visited:
            if v.trainable:
                v.data -= learning_rate * v.grad
            if grad_zero:
                v.grad = 0

    def backward(self):
        topo = []
        self.visited = set()
        def build_topo(v):
            if v not in self.visited:
                self.visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward()
    
    def __repr__(self):
        return f"Tensor(data={self.data}, trainable={self.trainable})"

我们来做个简单的测试

In [12]:
# 测试代码
# 创建两个矩阵
tensor1 = Tensor([[1], [3], [5]])
tensor2 = Tensor([[1, 3, 2]])

# 矩阵相乘
result_mul = tensor1 * tensor2
print("\nTensor Multiplication:")
print(result_mul.data)


Tensor Multiplication:
[[ 1  3  2]
 [ 3  9  6]
 [ 5 15 10]]


我们再用现在版本的神经元实现我们上一节二阶函数的模型。

In [None]:
def func(x):
    return 4*x*x - 5

import numpy as np
x_values = np.linspace(-10, 10, 30)
y_values = func(x_values)

# Randomly choosing training data
random_indices = np.random.choice(len(x_values), size=20, replace=False)
train_x = x_values[random_indices]
train_y = y_values[random_indices]

# The remaining data can be considered as the testing dataset
test_x = np.delete(x_values, random_indices)
test_y = np.delete(y_values, random_indices)

In [None]:
import numpy as np
import random
w1 = Tensor.from_numpy(np.random.rand(1, 10))
b1 = Tensor.from_numpy(np.random.rand(1, 10))
w2 = Tensor.from_numpy(np.random.rand(10, 1))
b2 = Tensor(random.random())
epoch = 5000
def forward(x):
    x = x*w1+b1
    x = x.relu()
    x = x*w2+b2
    return x
for i in range(epoch):
    loss = 0
    

本节所实现的神经元类，并不会是神经元的最终版本，在后续介绍深度学习的经典模型的章节，我们会继续补充神经元类