# 用不同的抽象层次工具构建network

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchviz

## I.tensor operation：在tensor包中
1. 直接以tensor为对象的操作，比如shape manipulate(eg:view)，math ops
2. 是最基础的运算方式，是构成function的基础。[可以理解成最简单的function]
3. 已经内建了forward和backward method。autograd根据tensor的requires_grad属性值来判断是否构建DAG，是否计算gradient并保存到tensor.grad
4. 可以将tensor ops处理成python函数，此时autograd也可以自动完成函数的DAG构建和backward propagation

**典型使用场景：**
1. 数据预处理
2. 自定义torch.autograd.Function时，用来实现函数运算
3. module forward method中可以将所需ops打包成python函数形式。通常用于ops不涉及learnable parameter的时候讲函数用于module的forward method中，功能和nn.functional函数一样

In [2]:
# 直接使用底层tensor ops

# 仅示例，实际不要这样使用！！！稍微复杂的网络都无法实现

x = torch.randn(3, requires_grad=True)
w = torch.ones(3) * 0.5
y = (w * x).sum()
y.backward()
print(x.grad, w.grad)

tensor([0.5000, 0.5000, 0.5000]) None


In [3]:
# 将tensor ops处理成python函数实现: affine -> relu -> affine -> softmax

# 仅示例，实际不要这样使用！！！

# 定义函数：
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1) # 'unsqueeze' is for broadcast   

def model(x, w1, w2):
    x = x @ w1.T
    x = torch.relu(x)
    x = x @ w2.T
    return log_softmax(x)

def negative_log_likelyhood(input, target):
    return -input[range(target.shape[0]), target].mean()
loss_func = negative_log_likelyhood

# 执行运算：
x = torch.randn(2, 3)
y = torch.tensor([2, 3])
w1 = torch.randn(2, 3, requires_grad=True)
w2 = torch.randn(4, 2, requires_grad=True)

scores = model(x, w1, w2)
loss = loss_func(scores, y)
loss.backward()

print(w1.grad)
print(w2.grad)

tensor([[ 0.0045, -0.0214,  0.1023],
        [ 0.0000,  0.0000,  0.0000]])
tensor([[ 0.0353,  0.0000],
        [ 0.0004,  0.0000],
        [-0.0453,  0.0000],
        [ 0.0096,  0.0000]])


## II. function：在torch.nn.functional包中
1. 定义了forward和backward method
2. 可以通过实例化torch.autograd.Function来自定义
3. autograd基于function构建graph中的Node，如果output tensor requires_grad，grad_fn指向backward method

**特征：**
1. 缺点：要手动管理learnable parameters：新建、初始化、赋值函数argument、用梯度做update
2. 当函数本身没有learnable parameter的时候，上述缺点就不存在，此时function和layer功能一样

**典型使用方式：**
1. <font color=green>**没有learnable parameter的function常用于module的forward method中做对应运算。如:F.relu** </font>
2. <font color=green>**自定义function如果有learnable parameter，通常打包成layers使用；如果没有也直接用于module的forward method。** </font>

<font color=red>注：通过实例化autograd.Function得到的function要apply后才能当做函数使用。</font>

In [4]:
# 用function实现: affine -> relu -> affine -> softmax

# 仅示例，实际不要这样使用！！！

# 网络结构复杂时，应该通过实例化nn.Module来自定义net，见下例

torch.manual_seed(2)
x = torch.randn(2, 3)
y = torch.tensor([2, 3])
w1 = torch.randn(2, 3, requires_grad=True)
w2 = torch.randn(4, 2, requires_grad=True)
scores = F.linear(F.relu(F.linear(x, w1)), w2)               # F.linear运算方式：x @ w.T
loss = F.cross_entropy(scores, y)
loss.backward()

print(x.grad)
print(w1.grad)
print(w2.grad)

None
tensor([[-0.2859,  0.2067,  0.0354],
        [-0.0313,  0.0271, -0.0164]])
tensor([[ 0.0249,  0.0534],
        [ 0.0355,  0.1165],
        [-0.0469,  0.5916],
        [-0.0135, -0.7615]])


In [5]:
# 用nn.Module封装nn.Functional实现simple net：affine -> relu -> affine -> softmax

# 仅示例，实际不要这样使用！！！

# 优点：nn.Module可以利用autograd system来自动管理learnable parameters
# 缺点：要手动向nn.Parameter登记nn.Functional中的参数，参数初始化和向函数argument传参也要手动
#      当网络复杂后，很难再手动管理参数

class Simple_net(nn.Module):
    def __init__(self):
        super().__init__()
        self.w1 = nn.Parameter(torch.randn(2, 3))
        self.w2 = nn.Parameter(torch.randn(4, 2))
    def forward(self, x):
        x = F.linear(x, w1)
        x = F.relu(x)
        x = F.linear(x, w2)
        return x

x = torch.ones(2, 3)
y = torch.tensor([2, 3])
model = Simple_net()
loss = F.cross_entropy(model(x), y)
loss.backward()

for param in model.named_parameters():
    print(param)

('w1', Parameter containing:
tensor([[-0.0591, -1.5653,  0.4258],
        [-1.4818, -0.4766,  0.2480]], requires_grad=True))
('w2', Parameter containing:
tensor([[ 0.1559, -0.1607],
        [ 0.4172,  1.0004],
        [ 0.6008,  0.1098],
        [-0.8411, -0.2908]], requires_grad=True))


## III. layer：在torch.nn包中
1. forward method中定义了layer运算用的functions，以及functions用到的learnable parameters
2. 可以通过实例化torch.nn.Module来自定义
3. autograd可以基于layer中的function构建Node；用backward method计算梯度，用optimizer自动更新parameters

In [6]:
# 用nn.Module示例封装layers实现simple net: affine -> relu -> affine -> softmax
# 优点：
#   1. nn.Module可以利用autograd system来自动管理learnable parameters
#   2. 基本不用手动管理参数：初始化、传参、参数更新都由autograd system自动完成

class Simple_net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = torch.nn.Linear(3, 2, bias=False)
        self.linear2 = torch.nn.Linear(2, 4, bias=False)
        # 初始化: 使用in-place操作
        nn.init.kaiming_normal_(self.linear1.weight)
        nn.init.kaiming_normal_(self.linear2.weight)
    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)        # 没有learnable parameter的layer则可以直接用nn.Funcitonal
        x = self.linear2(x)
        return x

x = torch.ones(2, 3)
y = torch.tensor([2, 3])
model = Simple_net()
loss = F.cross_entropy(model(x), y)
loss.backward()

for param in model.named_parameters():
    print(param)

('linear1.weight', Parameter containing:
tensor([[ 1.3269,  0.1434, -0.6918],
        [-0.6898, -0.6815, -0.5943]], requires_grad=True))
('linear2.weight', Parameter containing:
tensor([[-1.7490,  1.3883],
        [-0.1098,  1.2384],
        [ 0.5407,  0.5787],
        [-0.3134, -0.0843]], requires_grad=True))


## IV. 复杂网络：nn.Sequential, nn.ModuleList, nn.ModuleDict
**比较nn.Sequential和nn.ModuleList：** \
<font color=blue>**在nn.Sequential中：** </font>
1. 堆叠的nn.Module时相互关联的，上一的输出是下一个的输入。因此，定义nn.Sequential的时候要保证output size of a layer/block matches the input size of the following layer/block
2. object of type nn.Sequential有forward method。所以sequential是可执行的。整个sequence会构成一个功能模块，具体执行方式是按顺序执行其中的layers。比如sequential中放了conv2D，ReLU和Linearlayer的话，实际上会构成一个可运行的network: Conv2D -> ReLU -> Linear layer。output = Linear(relu(conv2D(x))). 
3. nn.Sequential可以用add_module() method来添加layer。

<font color=blue>**在ModulList中：** </font>  
1. 没有forward method，所以不能执行运算，list不构成一个可运行的Network。
2. list中存放的nn.Modules之间也没有前后关联。就和python的list相似，只是存的数据类型是nn.Modules。不用python list而用ModuleList的区别在于，直接用python list的话，list中的layer的parameter就无法自动计入model.parameters()中
3. ModuleList和list一样，用append() method来添加layer

In [7]:
## 用add_module() method来添加layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, output_size, num_hidden_layers=3, hidden_layer_size=128):
        super(NeuralNet, self).__init__()
        self.deep_nn = nn.Sequential()
        for i in range(num_hidden_layers):
            self.deep_nn.add_module(f'ff{i}', nn.Linear(input_size, hidden_layer_size))
            self.deep_nn.add_module(f'activation{i}', nn.ReLU())
            input_size = hidden_layer_size
        self.deep_nn.add_module(f'classifier', nn.Linear(hidden_layer_size, output_size))

    def forward(self, inputs):
        tensor = self.deep_nn(inputs)
        return tensor

model = NeuralNet(16, 2)
print(model)

NeuralNet(
  (deep_nn): Sequential(
    (ff0): Linear(in_features=16, out_features=128, bias=True)
    (activation0): ReLU()
    (ff1): Linear(in_features=128, out_features=128, bias=True)
    (activation1): ReLU()
    (ff2): Linear(in_features=128, out_features=128, bias=True)
    (activation2): ReLU()
    (classifier): Linear(in_features=128, out_features=2, bias=True)
  )
)


## 1. nn.Sequential：用于堆叠layers/blocks

### i. 直接用layers
**缺点：**
1. layer多了之后model的结构性很差
2. 内部复用性和外部复用性都很差： \
(1) 如果要增加layer，就要修改__init__()和forward()，如果要加conv3和bn3，并不能复用前面的内容 \
(2) 其中模块也不能被其他model使用，比如其中的'conv-batchnorm-relu'block时很常见的block

In [8]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(64 * 28 * 28, 1024)
        self.fc2 = nn.Linear(1024, n_classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)

        x = x.view(x.size(0), -1) # flat
        
        x = self.fc1(x)
        x = F.sigmoid(x)
        x = self.fc2(x)
        
        return x

model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=50176, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=10, bias=True)
)


### ii. 用nn.Sequential()来stack layers
**优点：**结构比前面更清晰，可以模块化地管理每个Sequential中的参数等 \
**缺点：**内外部复用性还是差，所有模块仍然要在__init__中逐个定义，比如下面2个conv_block要写两次

In [9]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(64 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )

        
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x.view(x.size(0), -1) # flat
        x = self.decoder(x)
        return x
    
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=50176, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


### iii. 用nn.Sequential()来stack layers，用function来wrap Sequential
**优点：** 结构同样清晰 \
**缺点：**内外部可复用

In [10]:
def conv_block(in_f, out_f, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        nn.ReLU()
    )

In [11]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.conv_block1 = conv_block(in_c, 32, kernel_size=3, padding=1)
        self.conv_block2 = conv_block(32, 64, kernel_size=3, padding=1)
        self.decoder = nn.Sequential(
            nn.Linear(64 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x.view(x.size(0), -1) # flat
        x = self.decoder(x)
        return x

model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=50176, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


### iv. 进一步结构化：用nn.Sequential()来stack layers，用function来wrap Sequential，再用Sequential来stack function
**优点：** 结构同样清晰 \
**缺点：** 内外部可复用

In [12]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        self.encoder = nn.Sequential(
            conv_block(in_c, 32, kernel_size=3, padding=1),
            conv_block(32, 64, kernel_size=3, padding=1)
        )

        self.decoder = nn.Sequential(
            nn.Linear(64 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1) # flat
        x = self.decoder(x)
        return x
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=50176, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


### v.Dynamic Sequential：一次创建多个layers
1. 用list comprehension生成block list
2. 用list generator来作为nn.Sequential的argument，实现一次构建多个layers

In [13]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, n_classes):
        super().__init__()
        # number of hidden layers
        self.enc_sizes = [in_c, 32, 64]
        
        # 用list comprehension生成block list
        conv_blocks = [conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(self.enc_sizes, self.enc_sizes[1:])]
        
        # 将list decompose后作nn.Sequential的argument，实现一次构建多个layers
        self.encoder = nn.Sequential(*conv_blocks)
        
        self.decoder = nn.Sequential(
            nn.Linear(64 * 28 * 28, 1024),
            nn.Sigmoid(),
            nn.Linear(1024, n_classes)
        )
        
    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1) # flat
        x = self.decoder(x)
        return x
model = MyCNNClassifier(1, 10)
print(model)

MyCNNClassifier(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=50176, out_features=1024, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=1024, out_features=10, bias=True)
  )
)


### vi. 进一步分拆encoder和decoder
<font color=red>这里用python function来warp nn.Sequential，可以方便给Sequential中的layers传参。此时，返回值是nn.Sequential。</font>

In [14]:
# encoder module
def conv_block(in_f, out_f, *args, **kwargs):
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        nn.ReLU()
    )

class MyEncoder(nn.Module):
    def __init__(self, enc_sizes):
        super().__init__()
        self.conv_blocks = nn.Sequential(*[conv_block(in_f, out_f, kernel_size=3, padding=1) 
                       for in_f, out_f in zip(enc_sizes, enc_sizes[1:])])

        def forward(self, x):
            return self.conv_blocks(x)

# decoder module
def dec_block(in_f, out_f):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )

class MyDecoder(nn.Module):
    def __init__(self, dec_sizes, n_classes):
        super().__init__()
        self.dec_blocks = nn.Sequential(*[dec_block(in_f, out_f) 
                       for in_f, out_f in zip(dec_sizes, dec_sizes[1:])])
        self.last = nn.Linear(dec_sizes[-1], n_classes)

    def forward(self, x):
        return self.dec_blocks()

In [15]:
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, enc_sizes, dec_sizes,  n_classes):
        super().__init__()
        # super parameter configuration
        self.enc_sizes = [in_c, *enc_sizes]
        self.dec_sizes = [self.enc_sizes[-1] * 28 * 28, *dec_sizes]
        # encoder and decoder
        self.encoder = MyEncoder(self.enc_sizes)
        self.decoder = MyDecoder(self.dec_sizes, n_classes)
        
    def forward(self, x):
        x = self.encoder(x)
        x = x.flatten(1) # flat
        x = self.decoder(x)
        return x
model = MyCNNClassifier(1, [32,64], [1024, 512], 10)
print(model)

MyCNNClassifier(
  (encoder): MyEncoder(
    (conv_blocks): Sequential(
      (0): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (1): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
  )
  (decoder): MyDecoder(
    (dec_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=50176, out_features=1024, bias=True)
        (1): Sigmoid()
      )
      (1): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Sigmoid()
      )
    )
    (last): Linear(in_features=512, out_features=10, bias=True)
  )
)


## 2. nn.ModuleList：用于iterate module as a list

In [16]:
class MyModule(nn.Module):
    def __init__(self, sizes):
        super().__init__()
#         self.linears = nn.ModuleList([nn.Linear(in_f, out_f) for in_f, out_f in zip(sizes, sizes[1:])])
        self.linears = [nn.Linear(in_f, out_f) for in_f, out_f in zip(sizes, sizes[1:])]
        self.linears.append(nn.Linear(32, 10))  # 这里写死了参数，只是为了示例append()
    
    def forward(self, x):
        for layer in self.linears:   # iterate
            x = layer(x)
        return x
        
model = MyModule([1, 16, 32])

torch.manual_seed(2)
x = torch.rand(4,1)
y = model(x)
print(model)

# 注意：如果不用nn.ModuleList而用python list，无法自动登记参数
for name, param in model.named_parameters():
    print(name)

MyModule()


## 3. nn.ModuleDict：用于需要根据条件选择不同module的场景

In [17]:
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    # dict用于实现：根据参数选择不同的layer作为activations
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

print(conv_block(1, 32,'lrelu', kernel_size=3, padding=1))
print(conv_block(1, 32,'relu', kernel_size=3, padding=1))

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): LeakyReLU(negative_slope=0.01)
)
Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)


## 一个综合案例

In [18]:
# 用python function warp nn.Sequential传参
def conv_block(in_f, out_f, activation='relu', *args, **kwargs):
    activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['relu', nn.ReLU()]
    ])
    
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        activations[activation]
    )

class MyEncoder(nn.Module):
    def __init__(self, enc_sizes, *args, **kwargs):
        super().__init__()
        self.conv_blocks = nn.Sequential(*[conv_block(in_f, out_f, kernel_size=3, padding=1, *args, **kwargs) 
                       for in_f, out_f in zip(enc_sizes, enc_sizes[1:])])
        def forward(self, x):
            return self.conv_blocks(x)

        
# 用python function warp nn.Sequential传参
def dec_block(in_f, out_f):
    return nn.Sequential(
        nn.Linear(in_f, out_f),
        nn.Sigmoid()
    )

class MyDecoder(nn.Module):
    def __init__(self, dec_sizes, n_classes):
        super().__init__()
        self.dec_blocks = nn.Sequential(*[dec_block(in_f, out_f) 
                       for in_f, out_f in zip(dec_sizes, dec_sizes[1:])])
        self.last = nn.Linear(dec_sizes[-1], n_classes)

    def forward(self, x):
        return self.dec_blocks()
    
    
class MyCNNClassifier(nn.Module):
    def __init__(self, in_c, enc_sizes, dec_sizes,  n_classes, activation='relu'):
        super().__init__()
        self.enc_sizes = [in_c, *enc_sizes]
        self.dec_sizes = [32 * 28 * 28, *dec_sizes]

        self.encoder = MyEncoder(self.enc_sizes, activation=activation)
        self.decoder = MyDecoder(dec_sizes, n_classes)
        
    def forward(self, x):
        x = self.encoder(x)
        x = x.flatten(1) # flat
        x = self.decoder(x)
        return x

In [19]:
model = MyCNNClassifier(1, [32,64], [1024, 512], 10, activation='lrelu')
print(model)

MyCNNClassifier(
  (encoder): MyEncoder(
    (conv_blocks): Sequential(
      (0): Sequential(
        (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
      (1): Sequential(
        (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): LeakyReLU(negative_slope=0.01)
      )
    )
  )
  (decoder): MyDecoder(
    (dec_blocks): Sequential(
      (0): Sequential(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Sigmoid()
      )
    )
    (last): Linear(in_features=512, out_features=10, bias=True)
  )
)
