In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))

X = torch.rand(2, 20)
net(X)

tensor([[ 0.1183, -0.0829, -0.0707, -0.1012,  0.1154,  0.0752, -0.2587,  0.0850,
          0.0214,  0.0610],
        [ 0.0627, -0.0597, -0.0044, -0.0849, -0.0042,  0.0080, -0.2276, -0.0831,
          0.2140, -0.1782]], grad_fn=<AddmmBackward>)

# A Custom Block

In [3]:
class MLP(nn.Module):
    # Declare a layer with model parameters.
    # Here, we declare two fully connected layers.
    def __init__(self):
        # Call the constructor of the `MLP` parent class `Module` to 
        # perform the necessary initialization. In this way, other 
        # function arguments can also be specified during class instantiation,
        # such as the model parameters, `params`.
        super().__init__()
        self.hidden = nn.Linear(20, 256) # Hidden layer
        self.out = nn.Linear(256, 10) # Output layer
    
    # Define the forward propagation of the model, that is,
    # how to return the required model output based on the input `X`
    def forward(self, X):
        # We use the functional version of ReLU defined in the 
        # nn.functional module.
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()
net(X)

tensor([[ 0.0842,  0.0259,  0.2572,  0.3152,  0.0699, -0.1885,  0.0204, -0.2124,
         -0.0391, -0.1389],
        [ 0.1721,  0.0276,  0.1589,  0.1362,  0.2001, -0.2295,  0.0804, -0.0304,
         -0.0308, -0.1526]], grad_fn=<AddmmBackward>)

# The Sequential Block

In [5]:
# This class delivers the same functionality of the default `Sequential` class.
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            # Here, `module` is an instance of a `Module` subclass.
            # We save it in the member variable `_modules` of the
            # `Module` class, and its type is OrderedDict.
            self._modules[str(idx)] = module
    
    def forward(self, X):
        # OrderedDict gurantees that members will be traversed in the
        # order they were added.
        for block in self._modules.values():
            X = block(X)
        return X

In [6]:
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.0354,  0.1540,  0.0189,  0.1632, -0.0019,  0.2472, -0.0342, -0.0518,
          0.1734, -0.0192],
        [ 0.1419,  0.2769, -0.1476,  0.0830, -0.0679,  0.2109,  0.0627,  0.0056,
          0.0456,  0.0417]], grad_fn=<AddmmBackward>)

# Executing Code in the Forward Propagation Function

In [7]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # Random weight parameters that will not compute gradients
        # and therefore keep constant during training
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)
    
    def forward(self, X):
        X = self.linear(X)
        # Use the created constant parameters, as well as the `relu` 
        # and `mm` functions.
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # Reuse the fully-connected layer.
        # This is equivalent to sharing parameters with two
        # fully-connected layers.
        X = self.linear(X)
        # Control flow.
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
        
        

In [8]:
net = FixedHiddenMLP()
net(X)

tensor(0.3160, grad_fn=<SumBackward0>)

In [10]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    
    def forward(self, X):
        return self.linear(self.net(X))

chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.0900, grad_fn=<SumBackward0>)