Model Construction

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(
    nn.Linear(20, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)

X = torch.rand(2, 20) # 2 is batch size, (2, 20) is dimension of matrix
net(X)

tensor([[-0.1225, -0.0259, -0.0305,  0.0563,  0.1315, -0.3018,  0.0380, -0.1728,
         -0.2033, -0.0803],
        [-0.1554,  0.1633,  0.0595,  0.0129,  0.2352, -0.1766,  0.2375, -0.1053,
         -0.2476, -0.0812]], grad_fn=<AddmmBackward0>)

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

In [3]:
net = MLP()
net(X)

tensor([[-0.2100, -0.2094,  0.1223, -0.1160, -0.2329, -0.0951,  0.0486, -0.0184,
         -0.1236, -0.1002],
        [-0.1136, -0.2260,  0.0427, -0.1666, -0.2390,  0.0348,  0.2050,  0.0241,
         -0.0177,  0.0296]], grad_fn=<AddmmBackward0>)

In [None]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block # Define sequential layers

    def forward(self, X):
        for block in self._modules.values():
            X = block(X) # Apply parameters to layers
        return X
    
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[ 0.3191,  0.0543, -0.0521, -0.0952, -0.0500,  0.0480, -0.3285,  0.1938,
          0.3010, -0.4181],
        [ 0.4607,  0.0551, -0.1860, -0.0851, -0.1009, -0.0006, -0.1181,  0.0261,
          0.0904, -0.3273]], grad_fn=<AddmmBackward0>)

In [5]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False) # Do not compute gradients for hidden units
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1) # 1 is bias
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2 # Keep dividing until the values are extremely small
        return X.sum()
    
net = FixedHiddenMLP()
net(X)

tensor(0.3858, grad_fn=<SumBackward0>)

In [6]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(20, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.linear = nn.Linear(32, 16)

    def forward(self, X):
        return self.linear(self.net(X))
    
chimera = nn.Sequential(
    NestMLP(),
    nn.Linear(16, 20),
    FixedHiddenMLP()
)
chimera(X)

tensor(0.4110, grad_fn=<SumBackward0>)