## 自动求导与计算图

In [None]:
# 计算 y = w1*x^2 + w2*x + w3

import torch

w1 = torch.tensor(1., requires_grad=True)
w2 = torch.tensor(2., requires_grad=True)
w3 = torch.tensor(3., requires_grad=True)
x = torch.tensor(4.)

print(w1, w2, w3, x)

y = w1 * (x ** 2) + w2 * x + w3

y.backward()

print(w1.grad) # x ^ 2 = 16
print(w2.grad) # x = 4
print(w3.grad) # 1

print()

In [None]:
print(y.requires_grad)
with torch.no_grad():
    z = w1 * x + w2
    print(x.requires_grad) # False

h = y.detach()
print(h.requires_grad) # False

## 神经网络工具箱 torch.nn

### Linear

In [None]:
import math
import torch.nn as nn
import torch.nn.functional as F

from torch import Tensor
# Linear
class Linear(nn.Module):
    in_features: int
    out_features: int
    weight: Tensor
    bias: Tensor
    def __init__(self, 
        in_features: int, out_features: int,
        bias: bool = True, device=None, dtype=None
    ) -> None:
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.empty((out_features, in_features), device=device, dtype=dtype))
        if bias:
            self.bias = nn.Parameter(torch.empty(out_features, device=device, dtype=dtype))
        else:
            self.bias = None
        
        self.init_parameters()
    
    def init_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))

        if self.bias is None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            nn.init.uniform_(self.bias, -bound, bound)
        
    def forward(self, input: Tensor):
        return F.linear(input, self.weight, self.bias)

# Flatten

### Relu

In [None]:
class Relu(nn.Module):
    inplace: bool
    def __init__(self, inplace: bool = False) -> None:
        super(Relu, self).__init__()
        self.inplace = inplace
    
    def forward(self, input: Tensor) -> Tensor:
        return F.relu(input, inplace=self.inplace)
    
    def extra_repr(self) -> str:
        return 'inplace=True' if self.inplace else ''

### 损失函数

In [None]:
from typing import Optional

class _Loss(nn.Module):
    reduction: str

    def __init__(self, size_avg=None, reduce=None, reduction: str = 'mean') -> None:
        super(_Loss, self).__init__()
        if size_avg is not None or reduce is not None:
            self.reduction = nn._reduction.legacy_get_string(size_avg, reduce)
        else:
            self.reduction = reduction

class _WeightedLoss(_Loss):
    weight: Optional[Tensor]
    def __init__(self, 
        weight: Optional[Tensor] = None, 
        size_avg=None, reduce=None, reduction: str = 'mean') -> None:
        super().__init__(size_avg=size_avg, reduce=reduce, reduction=reduction)
        self.register_buffer('weight', weight)

### 交叉熵损失

In [None]:
class CrossEntropyLoss(_WeightedLoss):
    __constants__ = ['ignore_index', 'reduction', 'label_smoothing']
    ignore_index: int
    label_smoothing: float

    def __init__(self, 
        weight: Optional[Tensor] = None, size_average=None, ignore_index: int = -100,
        reduce=None, reduction: str = 'mean', label_smoothing: float = 0.0) -> None:
        
        super(CrossEntropyLoss, self).__init__(weight, size_average, reduce, reduction)
        self.ignore_index = ignore_index
        self.label_smoothing = label_smoothing

    def forward(self, input: Tensor, target: Tensor) -> Tensor:
        return torch.nn.functional.cross_entropy(input, target, weight=self.weight,
                               ignore_index=self.ignore_index, reduction=self.reduction,
                               label_smoothing=self.label_smoothing)

### SmoothL1Loss

In [None]:
class SmoothL1Loss(_Loss):
    def __init__(self, size_avg=None, reduce=None, reduction: str = 'mean', beta: float = 1.0) -> None:
        super(SmoothL1Loss, self).__init__(size_avg=size_avg, reduce=reduce, reduction=reduction)
        self.beta = beta
        
    def forward(self, input: Tensor, target: Tensor) -> Tensor:
        diff = torch.abs(input - target)
        cond = diff < self.beta

        loss = torch.where(cond, 0.5 * (diff**2)/self.beta, diff - 0.5 * self.beta)

        return torch.sum(loss) if self.reduction == 'sum' else torch.mean(loss)

x = torch.randint(5, (2, 1, 2, 2)).float()
y = torch.randint(5, (2, 1, 2, 2)).float()

crit = SmoothL1Loss(reduction='sum')
crit_t = nn.SmoothL1Loss(reduction='sum')

print(crit(x, y))
print(crit_t(x, y))



### 优化器 —— SDG

In [None]:
import torch
import torch.optim._functional as F
import torch.optim as optim

class SGD(optim.Optimizer):
    def __init__(self, 
        params, lr: float = 0.001, weight_decay=0
    ) -> None:
        default = dict(
            lr=lr, momentum=0, dampening=0,
            weight_decay=weight_decay, nesterov=False
        )
        super().__init__(params, default)
    
    def __setstate__(self, state: dict) -> None:
        super().__setstate__(state)
        for group in self.param_groups:
            group.setdefault('nesterov', False)
    
    @torch.no_grad()
    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()
        
        for group in self.param_groups:
            params_with_grad = []
            d_p_list = []
            weight_decay = group['weight_decay']
            lr = group['lr']

            for p in group['params']:
                if p.grad is not None:
                    params_with_grad.append(p)
                    d_p_list.append(p.grad)
            
            F.sgd(params_with_grad,d_p_list, [], 
                    weight_decay=weight_decay,lr=lr, momentum=0,
                    dampening=0, nesterov=False
            )

            return loss

### 使用优化器

In [None]:
class MLP(nn.Module):
    def __init__(self, in_channels, out_channels) -> None:
        super(MLP, self).__init__()

        self.layer = nn.Sequential(
            nn.Linear(in_channels, 300),
            nn.ReLU(),
            nn.Linear(300, 64),
            nn.ReLU(),
            nn.Linear(64, out_channels),
            nn.ReLU()
        )

    def forward(self, x):
        return self.layer(x)

In [None]:
mlp_model = MLP(28*28, 10)

optimizer = SGD(params=mlp_model.parameters(), lr=0.05)
data = torch.randn(10, 28*28)



label = torch.Tensor([1, 0, 4, 7, 9, 3, 4, 5, 3, 6]).long()

criterion = CrossEntropyLoss()

for i in range(10):
    output = mlp_model(data)
    loss = criterion(output, label)

    optimizer.zero_grad() # 清空梯度
    loss.backward()       # 反向传播
    optimizer.step()      # 梯度更新
    print(loss)

## 模型处理

In [None]:
import torch.nn as nn
import torchvision as TV

vgg16 = TV.models.vgg16()

print(len(vgg16.features))
print(len(vgg16.classifier))
print(vgg16.classifier[-1])

### 数据可视化

In [None]:
import torch
import visdom

vis = visdom.Visdom(env='first')
vis.text('Hello Visdom', win='text1')
vis.text('Hello Pytorch', win='text1', append=True)

# 绘制 y = -x^2 + 20x + 1 
for x in range(20):
    vis.line(
        X=torch.FloatTensor([x]), Y=torch.FloatTensor([-x ** 2 + 20 * x + 1]),
        opts={'title': 'y = -x^2 + 20x + 1'},
        win='loss',
        update='append'
    )

vis.image(torch.randint(1, 255, (3, 255, 255)).float(), win='random image')