In [0]:
import  torch
from    torch import  nn
from    torch.nn import functional as F
from    torch.utils.data import DataLoader
from    torchvision import datasets
from    torchvision import transforms
from    torch import nn, optim

## Residual Block

In [0]:
class MyResnetBlk(nn.Module):
  def __init__(self, ch_in, ch_out, stride=1):
    super(MyResnetBlk, self).__init__()

    self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
    self.bn1 = nn.BatchNorm2d(ch_out)
    self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(ch_out)

    self.extra = nn.Sequential()
    if ch_out != ch_in:
      self.extra = nn.Sequential(
        nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
        nn.BatchNorm2d(ch_out)
      )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out = self.extra(x) + out
    return out


In [4]:
layer = MyResnetBlk(3, 64)
output = layer(torch.autograd.Variable(torch.randn(256, 3, 23, 23)))
output.shape

torch.Size([256, 64, 23, 23])

## ResNet18

In [0]:
class MyResnet18(nn.Module):
  def __init__(self):
    super(MyResnet18, self).__init__()
    self.conv1 = nn.Sequential(
      nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=1),
      # nn.BatchNorm2d(64, 128)
      nn.BatchNorm2d(64)
    )

    # follow 4 blocks
    # [b, 64, h, w] => [b, 128, h, w]
    # h,w是不断变化的，保证参数不会成倍增加
    self.blk1 = MyResnetBlk(64, 128, stride=2)
    self.blk2 = MyResnetBlk(128, 256, stride=2)
    self.blk3 = MyResnetBlk(256, 512, stride=2)
    self.blk4 = MyResnetBlk(512, 512, stride=2)

    self.outlayer = nn.Linear(512*1*1, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.blk1(x)
    x = self.blk2(x)
    x = self.blk3(x)
    x = self.blk4(x)

    # print('after conv:', x.shape) # [b, 512, 2, 2]
    # [b, 512, 2, 2] => [b, 512, 1, 1]
    x = F.adaptive_avg_pool2d(x, [1,1])
    # print('after pool:', x.shape)
    x = x.view(x.size(0), -1)
    x = self.outlayer(x)
    return x

In [7]:
# 测试
blk = MyResnetBlk(64, 128, stride=2)
tmp = torch.randn(2, 64, 32, 32)
out = blk(tmp)
print(out.shape)

x = torch.randn(2, 3, 32, 32)
model = MyResnet18()
out = model(x)
print('resnet:', out.shape)

torch.Size([2, 128, 16, 16])
resnet: torch.Size([2, 10])


In [8]:
batchsz = 32

cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.299, 0.244, 0.225])
]), download=True)

cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)

cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.299, 0.244, 0.225])
]), download=True)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)

x, label = iter(cifar_train).next()
# x: torch.Size([32, 3, 32, 32]) label: torch.Size([32])
print('x:', x.shape, 'label:', label.shape)

device = torch.device('cuda')
model = MyResnet18().to(device)
print(model)

criteon = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(1000):
  model.train()
  for batchidx, (x, label) in enumerate(cifar_train):
    x, label = x.to(device), label.to(device)
    # [b, 3, 32, 32]

    # [b, 10]
    logits = model(x)
    loss = criteon(logits, label)

    # backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(epoch, loss.item())

  model.eval()
  with torch.no_grad():
    # test
    total_corrext = 0
    total_num = 0

    for x, label in cifar_test:
      x, label = x.to(device), label.to(device)
      # [b, 3, 32, 32]
      # [b, 10]
      logits = model(x)
      # [b]
      pred = logits.argmax(dim=1)
      total_corrext += torch.eq(pred, label).float().sum().item()
      total_num += x.size(0)

    acc = total_corrext / total_num
    print(epoch, acc)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar/cifar-10-python.tar.gz


 99%|█████████▉| 168878080/170498071 [00:14<00:00, 14694244.56it/s]

Files already downloaded and verified
x: torch.Size([32, 3, 32, 32]) label: torch.Size([32])
MyResnet18(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): MyResnetBlk(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk2): MyResnetBlk(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(256, eps=1e-05, momen

170500096it [00:30, 14694244.56it/s]                               

0 1.129532814025879
0 0.5425
1 1.1817609071731567
1 0.6407
2 1.3577247858047485
2 0.6917
3 0.9200694561004639
3 0.7084
4 0.5688472986221313
4 0.7233
5 0.6478909254074097
5 0.7208
6 0.40048155188560486
6 0.7207
7 0.24108511209487915
7 0.7192
8 0.4784318208694458
8 0.7218
9 0.31677746772766113
9 0.7221
10 0.11031816899776459
10 0.718
11 0.4551965594291687
11 0.713
12 0.06772845983505249
12 0.7146
13 0.1932656466960907
13 0.7163
14 0.08900994062423706
14 0.7061
15 0.03008049726486206
15 0.7157
16 0.26211586594581604
16 0.7128
17 0.0862932801246643
17 0.72
18 0.005097329616546631
18 0.7082
19 0.07478970289230347
19 0.7201
20 0.10317319631576538
20 0.7216
21 0.024396657943725586
21 0.7159
22 0.0773671567440033
22 0.7168
23 0.2266378402709961
23 0.7218
24 0.34860408306121826
24 0.715
25 0.5472322702407837
25 0.7157


KeyboardInterrupt: ignored

In [12]:
x = torch.randn(2,3,4)
x

tensor([[[ 2.0548, -0.5383, -0.2068, -0.2970],
         [ 1.6627, -0.5162, -0.4017,  0.4149],
         [-0.0595, -0.3123,  0.0828, -0.1331]],

        [[-0.5104,  0.0361, -0.1162, -0.0548],
         [-0.1232,  0.3358, -0.7191,  0.0463],
         [-0.2623, -0.7678, -0.0670,  2.5472]]])

In [13]:
x.transpose(0,1).shape

torch.Size([3, 2, 4])

In [15]:
x.transpose(1, 2).shape

torch.Size([2, 4, 3])

## nn.moudle

torch.nn的核心数据结构是Module，它是一个抽象概念，既可以表示神经网络中的某个层（layer），也可以表示一个包含很多层的神经网络。

在实际使用中，最常见的做法是继承nn.Module，撰写自己的网络/层。

自定义层Linear必须继承nn.Module，并且在其构造函数中需调用nn.Module的构造函数，即super(Linear, self).__init__() 或nn.Module.__init__(self)，推荐使用第一种用法。


在构造函数__init__中必须自己定义可学习的参数，并封装成Parameter，如在本例中我们把w和b封装成parameter。parameter是一种特殊的Variable，但其默认需要求导（requires_grad = True）。


forward函数实现前向传播过程，其输入可以是一个或多个variable，对x的任何操作也必须是variable支持的操作。


无需写反向传播函数，因其前向传播都是对variable进行操作，nn.Module能够利用autograd自动实现反向传播，这点比Function简单许多。


使用时，直观上可将layer看成数学概念中的函数，调用layer(input)即可得到input对应的结果。它等价于layers.__call__(input)，在__call__函数中，主要调用的是 layer.forward(x)，另外还对钩子做了一些处理。所以在实际使用中应尽量使用layer(x)而不是使用layer.forward(x)。


Module中的可学习参数可以通过named_parameters()或者parameters()返回迭代器，前者会给每个parameter都附上名字，使其更具有辨识度。

## 使用nn.Module的好处

1. 可以方便的使用nn.Module中定义好的层，比如nn.Linear,nn.BarchNorm2d,nn.Conv2d等

2. 可以使用nn.Sequential()容器。只有class才能写到Sequential里面，函数不能写。比如，nn.ReLU可以，但是F.relu不行！

3. 会对网络内部的参数进行有效的管理

4. 对内部的Module进行方便的管理

5. 方便地将成员转移到GPU

6. 方便的使用pytorch中的保存和加载功能

7. 切换train/test状态。比如：dropout和BatchNormalization在训练和测试阶段情况不同，可以使用eval()函数。

8. 可以实现自己的类。可以自定义打平的类。

In [54]:
m = nn.Conv2d(16, 33, 3, stride=2)
m.type

<bound method Module.type of Conv2d(16, 33, kernel_size=(3, 3), stride=(2, 2))>

In [55]:
# 2. 可以使用nn.Sequential()容器
net = nn.Sequential(
    nn.Linear(4, 2),
    nn.Linear(2, 2)
)
# net.parameters()
list(net.parameters())[0].shape

torch.Size([2, 4])

In [56]:
# 3. 会对网络内部的参数进行有效的管理
list(net.parameters())[3].shape

torch.Size([2])

In [57]:
list(net.named_parameters())

[('0.weight', Parameter containing:
  tensor([[-0.2216,  0.3746,  0.1378, -0.3413],
          [-0.2787,  0.3887,  0.3365, -0.1527]], requires_grad=True)),
 ('0.bias', Parameter containing:
  tensor([-0.1576,  0.1776], requires_grad=True)),
 ('1.weight', Parameter containing:
  tensor([[ 0.2206, -0.1033],
          [-0.0946,  0.5667]], requires_grad=True)),
 ('1.bias', Parameter containing:
  tensor([-0.2346, -0.2097], requires_grad=True))]

In [58]:
dict(net.named_parameters()).items()

dict_items([('0.weight', Parameter containing:
tensor([[-0.2216,  0.3746,  0.1378, -0.3413],
        [-0.2787,  0.3887,  0.3365, -0.1527]], requires_grad=True)), ('0.bias', Parameter containing:
tensor([-0.1576,  0.1776], requires_grad=True)), ('1.weight', Parameter containing:
tensor([[ 0.2206, -0.1033],
        [-0.0946,  0.5667]], requires_grad=True)), ('1.bias', Parameter containing:
tensor([-0.2346, -0.2097], requires_grad=True))])

In [59]:
optimizer = optim.SGD(net.parameters(), lr=1e-3)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [0]:
class BasicNet(nn.Module):
  def __init__(self):
    super(BasicNet, self).__init__()
    self.net = nn.Linear(4, 3)
  
  def forward(self, x):
    return self.net(x)

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.net = nn.Sequential(
        BasicNet(),
        nn.ReLU(),
        nn.Linear(3, 2)
    )
  
  def forward(self, x):
    return self.net(x)

In [61]:
BasicNet().parameters

<bound method Module.parameters of BasicNet(
  (net): Linear(in_features=4, out_features=3, bias=True)
)>

In [73]:
net = Net()
list(net.named_parameters())

[('net.0.net.weight', Parameter containing:
  tensor([[-0.3147, -0.3827,  0.0918,  0.0372],
          [ 0.0098, -0.0452,  0.1944, -0.2419],
          [-0.3138,  0.4497, -0.1440,  0.0318]], requires_grad=True)),
 ('net.0.net.bias', Parameter containing:
  tensor([-0.2546, -0.4852, -0.4463], requires_grad=True)),
 ('net.2.weight', Parameter containing:
  tensor([[-0.0411,  0.4213, -0.5133],
          [ 0.5611, -0.0210,  0.1458]], requires_grad=True)),
 ('net.2.bias', Parameter containing:
  tensor([0.2343, 0.4968], requires_grad=True))]

In [74]:
# 4. 对内部的Module进行方便的管理
net.children

<bound method Module.children of Net(
  (net): Sequential(
    (0): BasicNet(
      (net): Linear(in_features=4, out_features=3, bias=True)
    )
    (1): ReLU()
    (2): Linear(in_features=3, out_features=2, bias=True)
  )
)>

In [75]:
# 5. 方便地将成员转移到GPU
device = torch.device('cuda')
net.to(device)

Net(
  (net): Sequential(
    (0): BasicNet(
      (net): Linear(in_features=4, out_features=3, bias=True)
    )
    (1): ReLU()
    (2): Linear(in_features=3, out_features=2, bias=True)
  )
)

In [0]:
# 6.方便的使用pytorch中的保存和加载功能（checkpoint）
# 保存
torch.save(net.state_dic(), 'ckpt.mdl')

# 加载
net.load_state_dict(torch.load('ckpt.mdl'))

In [0]:
# 8. 可以实现自己的类。可以自定义打平的类。
class Flatten(nn.Module):
  
  def __init__(self):
    super(Flatten, self).__init__()
  
  def forward(self, input):
    # 将第一个值保存，后边的直接打平
    return input.view(input.size(0), -1)

In [0]:
class TestNet(nn.Module):
  
  def __init__(self):
    super(TestNet, self).__init__()
    self.net = nn.Sequential(
        nn.Conv2d(1, 16, stride=1, padding=1),
        Flatten(),
        nn.Linear(1*14*14, 10)
    )
  
  def forward(self, x):
    return self.net(x)

In [0]:
class MyLinear(nn.Module):
  def __init__(self, input, output):
    super(MyLinear, self).__init__()
    # requires_grad = True
    self.w = nn.Parameter(torch.rand(input, output))
    self.b = nn.Parameter(torch.rand(output))
    
  def forward(self, x):
    x = x@self.w.t() + self.b
    return x

layer = MyLinear(4, 4)
input = torch.autograd.Variable(torch.randn(2, 4))
output = layer(input)
output.shape

torch.Size([2, 4])

## __call__函数

In [64]:
f = abs
f.__name__

'abs'

In [65]:
# 可调用对象
f(-123)

123

In [0]:
# 一个类的实例也可以变成可调用对象
class Person(object):
  def __init__(self, name, gender):
    self.name = name
    self.gender = gender

  def __call__(self, friend):
    print('My name is %s...' % self.name) 
    print('My friend is %s...' % friend)

In [67]:
callable(Person)

True

In [68]:
p = Person('Bob', 'male')
p('Tim')

My name is Bob...
My friend is Tim...


In [0]:
class Person2(object):
  def __init__(self, name, gender):
    self.name = name
    self.gender = gender

In [0]:
p2 = Person2('pop', 'fmale')
# p2()

In [71]:
callable(Person2)

True

In [72]:
callable(p2)

False