In [2]:
import torch
from torch import nn,Tensor
from d2l import torch as d2l
from torch.nn import functional as f


In [8]:
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
X = torch.rand(2,20)
net(X)


tensor([[-0.0123, -0.0985,  0.0404, -0.0259, -0.0529, -0.2467,  0.1501, -0.0133,
          0.2082,  0.0641],
        [-0.0827, -0.2169,  0.1037, -0.0453, -0.0106, -0.1553,  0.0952, -0.0892,
          0.1040, -0.0121]], grad_fn=<AddmmBackward0>)

In [11]:
class MLP(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.hidden = nn.Linear(20,256)
    self.out = nn.Linear(256,10)
  # 定义模型的前向传播
  def forward(self,X):
    # 使用function中的relu
    return self.out(f.relu(self.hidden(X)))

In [13]:
net = MLP()
# 这里应该是使用了类重载
net(X)

tensor([[-0.2782,  0.3424, -0.1423,  0.0821,  0.0352, -0.1648,  0.1165, -0.0476,
          0.0192,  0.0324],
        [-0.1214,  0.2150, -0.0512, -0.0778,  0.1232, -0.2033,  0.0953,  0.1267,
         -0.0337, -0.0377]], grad_fn=<AddmmBackward0>)

In [14]:
class MySequential(nn.Module):
  def __init__(self,*args) -> None:
    super().__init__()
    for idx,module in enumerate(args):
      self._modules[str(idx)] = module
  def forward(self,X):
    for block in self._modules.values():
      X = block(X)
    return X

In [16]:
net = MySequential(
  nn.Linear(20,256),
  nn.ReLU(),
  nn.Linear(256,10)
)
    
net(X)

tensor([[-4.3664e-02, -1.2101e-01,  1.3052e-01, -3.2957e-02,  9.6022e-02,
          2.8757e-01,  6.5758e-02, -2.9576e-02, -6.9751e-02, -1.8845e-01],
        [-1.7885e-02, -1.1425e-01,  1.4706e-01, -1.7229e-01, -7.3928e-02,
          3.2578e-01, -9.9234e-05, -9.0535e-02, -2.9273e-02, -3.0240e-02]],
       grad_fn=<AddmmBackward0>)

In [20]:
class FixedHiddenMLP(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.rand_weight = torch.rand((20,20),requires_grad=True)
    self.linear = nn.Linear(20,20)
  def forward(self,X:Tensor):
    X = self.linear(X)
    X = f.relu((X@self.rand_weight)+1)
    X = self.linear(X)
    while X.abs().sum() > 1:
      X/=2
    return X.sum()

In [25]:
net = FixedHiddenMLP()
net(X)

tensor(-0.0638, grad_fn=<SumBackward0>)

In [28]:
class NestMLP(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.net = nn.Sequential(
      nn.Linear(20,64),nn.ReLU(),
      nn.Linear(64,32)
    )
    self.linear = nn.Linear(32,16)
  
  def forward(self,X):
    return self.linear(self.net(X))
  
chimera = nn.Sequential(
  NestMLP(),
  nn.Linear(16,20),
  FixedHiddenMLP()
)
chimera(X)

tensor(0.0290, grad_fn=<SumBackward0>)