# 自动微分

In [11]:
from abc import abstractmethod, ABC
import numpy as np

## Foundation

### Tensor

In [12]:
class Tensor:

    def __init__(self, data):
        self.data = np.array(data)
        self.grad = 0
        self.gradient_fn = lambda: None
        self.parents = set()

    def backward(self):
        if self.gradient_fn:
            self.gradient_fn()

        for p in self.parents:
            p.backward()

    def size(self):
        return len(self.data)

    def __str__(self):
        return str(self.data)

### Base Layer

In [13]:
class Layer(ABC):

    def __call__(self, x: Tensor):
        return self.forward(x)

    @abstractmethod
    def forward(self, x: Tensor):
        pass

    def __str__(self):
        return ''

### Base Loss Function

In [14]:
class Loss(ABC):

    def __call__(self, p: Tensor, y: Tensor):
        return self.loss(p, y)

    @abstractmethod
    def loss(self, p: Tensor, y: Tensor):
        pass

## Data

### Features and Label

In [15]:
feature = Tensor([28.1, 58.0])
label = Tensor([165])

## Model

### Linear Layer

In [16]:
class Linear(Layer):

    def __init__(self, in_size, out_size):
        self.weight = Tensor(np.ones((out_size, in_size)) / in_size)
        self.bias = Tensor(np.zeros(out_size))

    def forward(self, x: Tensor):
        p = Tensor(x.data @ self.weight.data.T + self.bias.data)

        def gradient_fn():
            self.weight.grad += p.grad * x.data
            self.bias.grad += np.sum(p.grad, axis=0)

        p.gradient_fn = gradient_fn
        p.parents = {self.weight, self.bias}
        return p

    def __str__(self):
        return f'weight: {self.weight}\nbias: {self.bias}'

### MSE Loss Function

In [17]:
class MSELoss(Loss):

    def loss(self, p: Tensor, y: Tensor):
        mse = Tensor(np.mean(np.square(y.data - p.data)))

        def gradient_fn():
            p.grad += -2 * (y.data - p.data)

        mse.gradient_fn = gradient_fn
        mse.parents = {p}
        return mse

## Testing

### Predicting

In [18]:
layer = Linear(feature.size(), 1)

prediction = layer(feature)

print(f'prediction: {prediction}')

prediction: [43.05]


### Calculating Loss

In [19]:
loss = MSELoss()

error = loss(prediction, label)

print(f'error: {error}')

error: 14871.802500000002


### Calculating Gradient and Updating Layer Parameters

In [20]:
layer.weight.grad = 0
layer.bias.grad = 0

error.backward()

layer.weight.data -= layer.weight.grad
layer.bias.data -= layer.bias.grad

print(layer)

weight: [[ 6854.09 14146.7 ]]
bias: [243.9]
