In [19]:
import os
import torch
from torch import nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

## 线性回归

In [4]:
class Linear(nn.Module): # 继承nn.Module，必须重写构造函数（__init__)和前向传播函数（forward）
    def __init__(self, in_features, out_features):
        super().__init__() # 等价于nn.Module.__init__(self)，常用super方式
        # nn.Parameter内的参数是网络可学习的参数
        self.w = nn.Parameter(torch.randn(in_features, out_features))
        self.b = nn.Parameter(torch.randn(out_features))
    
    def forward(self, x):
        x = x.mm(self.w) # 矩阵乘法，等价于x.@(self.w)
        return x + self.b.expand_as(x)

In [5]:
layer = Linear(4,3)
input = torch.randn(2,4)
output = layer(input)
output

tensor([[-3.6353, -0.5164, -0.9270],
        [-4.0045, -1.1728,  0.2221]], grad_fn=<AddBackward0>)

In [6]:
for name, param in layer.named_parameters():
    print(name, param)

w Parameter containing:
tensor([[-0.3632, -0.5640,  0.7314],
        [-1.9605, -0.4759, -0.5635],
        [-2.2911,  0.3091, -0.2229],
        [ 0.5920,  0.1634, -0.3330]], requires_grad=True)
b Parameter containing:
tensor([ 1.6898, -0.7530, -0.2323], requires_grad=True)


## 设置运行设备device

In [10]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" 
    if torch.backends.mps.is_available() # MacOS上的训练模块
    else "cpu"
)
print(f"The device is: {device}\n")

The device is: mps



将Module放在GPU上运行也十分简单，只需以下两步：

In [None]:

#model = model.cuda() #将模型的所有参数转存到GPU；
input.cuda() #将输入数据放置到GPU上。


至于如何在多个GPU上并行计算，PyTorch也提供了两个函数，可实现简单高效的并行GPU计算。

In [21]:
#nn.parallel.data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None)

## 网络结构的写法

### Sequential类型写法

In [14]:
net1 = nn.Sequential()
net1.add_module('cov1',nn.Conv2d(3, 3, 3))
net1.add_module('bn', nn.BatchNorm2d(3))
net1.add_module('relu',nn.ReLU())
print(net1)

net2 = nn.Sequential(
    nn.Conv2d(3,3,3),
    nn.BatchNorm2d(3),
    nn.ReLU()
)
print(net2)

from collections import OrderedDict
net3= nn.Sequential(OrderedDict([
          ('conv1', nn.Conv2d(3, 3, 3)),
          ('bn1', nn.BatchNorm2d(3)),
          ('relu1', nn.ReLU())
        ]))
print(net3)

Sequential(
  (cov1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
)
Sequential(
  (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)
Sequential(
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
)


可以根据对应的名称或者位置取出网络的对应的层数具体信息

In [16]:
print(net1.cov1, net2[0], net3.conv1)

Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)) Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1)) Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))


## 初始化NN的Class

In [None]:
class Linear(nn.Module): # 继承nn.Module，必须重写构造函数（__init__)和前向传播函数（forward）
    def __init__(self, in_features, out_features):
        super().__init__() # 等价于nn.Module.__init__(self)，常用super方式
        # nn.Parameter内的参数是网络可学习的参数
        self.w = nn.Parameter(torch.randn(in_features, out_features))
        # self.w = torch.randn(in_features, out_features, requires_grad=True)
        self.b = nn.Parameter(torch.randn(out_features))
    
    def forward(self, x):
        x = x.mm(self.w) # 矩阵乘法，等价于x.@(self.w)
        return x + self.b.expand_as(x)

In [8]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

## 初始化网络并输出

In [11]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


## 使用模型

In [None]:
shape = (1, 28, 28) # 注意这里的1 表示bs = 1
x = torch.rand(shape, device=device)
logits = model(x) # (1,10)
pred_prob = nn.Softmax(dim = 1)(logits)
print(pred_prob.argmax()) #找到所有softmax之后最大的值

tensor(5, device='mps:0')


## 查看模型参数

In [35]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0217,  0.0247,  0.0014,  ..., -0.0141, -0.0031, -0.0160],
        [ 0.0101,  0.0317,  0.0267,  ...,  0.0045, -0.0024,  0.0339]],
       device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0228, -0.0219], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0407,  0.0209,  0.0361,  ..., -0.0191,  0.0408, -0.0318],
        [-0.0344, -0.0054, -0.0049,  ...,  0.0129, -0.0122, -0.0123]],
       device='mps:0', grad_fn=<Slice

## 感知机实现

`感知机`由两个全连接层组成，采用`sigmoid`函数作为激活函数

In [8]:
class perception(nn.Module):
    def __init__(self, input_feature, hidden_feature, output_feature):
        super(perception, self).__init__()
        self.layer1 = nn.Linear(input_feature, hidden_feature)
        self.layer2 = nn.Linear(hidden_feature, output_feature)
        
    def forward(self, x):
        x = torch.sigmoid(self.layer1(x))
        return self.layer2(x)

In [10]:
# 初始化网络结构
perceptron = perception(3,4,1)

# 打印网络的参数结构
for name, param in perceptron.named_parameters():
    print(name, param.size())

layer1.weight torch.Size([4, 3])
layer1.bias torch.Size([4])
layer2.weight torch.Size([1, 4])
layer2.bias torch.Size([1])


## Functional API

在functional中都有一个与之相对应的函数。`nn.functional`中的函数和`nn.Module`的主要区别在于，用`nn.Module`实现的layers是一个特殊的类，都是由`class layer(nn.Module)`定义，会自动提取可学习的参数；而`nn.functional`中的函数更像是纯函数，由`def function(input)`定义。下面将举例说明`functional`的使用，并对比二者的不同之处。

In [22]:
class function(nn.Module):
    def __init__(self):
        super(function, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


如果模型有可学习的参数，最好用 `nn.Module`，否则既可以使用`nn.functional`也可以使用`nn.Module`

- 激活函数`（ReLU、sigmoid、tanh）`、池化`（MaxPool）`等层没有可学习参数，可以使用对应的functional函数代替
- 卷积、全连接等具有可学习参数的网络建议使用`nn.Module`。
- 虽然`dropout`操作也没有可学习操作，但建议还是使用`nn.Dropout`而不是`nn.functional.dropout`，因为dropout在训练和测试两个阶段的行为有所差别，

## ResNet

每一个resblock的结构图：(input-> conv -> bn -> ReLU ->conv ->BN )+ input -> ReLU

In [24]:
class ResBlock(nn.Module):
    def __init__(self,input_channel, output_channel, stride = 1, shortcut = None):
        super(ResBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(input_channel, output_channel, 3, stride, 1, bias = False),
            nn.BatchNorm2d(output_channel),
            nn.ReLU(inplace = True),
            nn.Conv2d(output_channel, output_channel, 3, 1, 1, bias = False),
            nn.BatchNorm2d(output_channel)
        )
        self.right = shortcut
    
    def forward(self, x):
        out = self.left(x)
        res = x if self.right is None else self.right(x)
        out += res
        return F.relu(out)

相当于是res net中包含多个layer，而每一个layer中包含多个res block

In [35]:
class ResNet(nn.Module):
    def __init__(self, num_classes = 1000):
        super(ResNet, self).__init__()
        self.pre = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3, bias = False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(3, 2, 1)
        )

        self.layer1 = self._make_layer(64, 64, 3, 1, is_shortcut=False)
        self.layer2 = self._make_layer(64, 128, 4, 2)
        self.layer3 = self._make_layer(128, 256, 6, 2)
        self.layer4 = self._make_layer(256, 512, 3, 2)

        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self,inchannel, outchannel, block_num, stride, is_shortcut = True):
        if is_shortcut:
            shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, 1, stride, bias = False),
                nn.BatchNorm2d(outchannel)
            )
        else:
            shortcut = None
        layers = []
        layers.append(ResBlock(inchannel, outchannel, stride, shortcut))

        for i in range(1, block_num):
            layers.append(ResBlock(outchannel, outchannel)) #注意这个地方的维度要保持和输入一致
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.pre(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = F.avg_pool2d(x, 7)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [34]:
resnet = ResNet()
input = torch.randn(1, 3, 224, 224)
out = resnet(input)
print(out.shape)

torch.Size([1, 1000])
