In [1]:
import torch
from torch import nn
from d2l import torch as d2l

1. What happens if you specify the input dimensions to the first layer but not to subsequent layers? Do you get immediate initialization?

In [2]:
net = nn.Sequential(nn.Linear(256, 256), nn.ReLU(), nn.LazyLinear(10))



In [3]:
net[0].weight

Parameter containing:
tensor([[-0.0482,  0.0490,  0.0299,  ...,  0.0534, -0.0034, -0.0056],
        [ 0.0389,  0.0393,  0.0510,  ...,  0.0519, -0.0513,  0.0535],
        [ 0.0436, -0.0408,  0.0004,  ..., -0.0299, -0.0455, -0.0050],
        ...,
        [ 0.0047, -0.0313, -0.0488,  ...,  0.0424, -0.0480,  0.0023],
        [-0.0140, -0.0353,  0.0509,  ...,  0.0350, -0.0618, -0.0092],
        [ 0.0470, -0.0333,  0.0573,  ..., -0.0036, -0.0026,  0.0185]],
       requires_grad=True)

The first layer gets initialized.

2. What happens if you specify mismatching dimensions?

The foward function cannot be executed because the dimension of input matrix and weight matrix do not match.

In [4]:
x = torch.randn(2,28*28)
net(x)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x784 and 256x256)

3. What would you need to do if you have input of varying dimensionality? Hint: look at the parameter tying.

In [9]:
class TyingParameterMLP(d2l.Classifier):
    def __init__(self, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.k1 = 16
        self.flat = nn.Flatten()
        self.shared = nn.Linear(self.k1, num_hiddens)
        self.net = nn.Sequential(nn.ReLU(), 
                                 nn.Linear(num_hiddens, num_outputs))
        
    def forward(self, X):
        X = self.flat(X)
        r = X.shape[-1] % self.k1
        if r != 0:
            pad = torch.zeros(list(X.shape[:-1])+[r])
            X = torch.cat((X, pad), dim=-1)
        n = X.shape[-1] // self.k1
        chunks = torch.chunk(X, n, dim=-1)
        out1 = self.shared(chunks[0])
        for i in range(1,len(chunks)):
            out1 += self.shared(chunks[i])
        return self.net(out1)

In [10]:
hparams = {'num_outputs':10,'num_hiddens':8,'lr':0.1}
model = TyingParameterMLP(**hparams)
x1 = torch.randn(1,28*28)
x2 = torch.randn(1,32*32)
print(model(x1))
print(model(x2))

tensor([[ 6.9937, -2.7151, -0.5719,  1.1045,  0.6360, -3.0469,  5.8679, -0.4652,
         -7.3380,  4.4826]], grad_fn=<AddmmBackward0>)
tensor([[10.8664, -0.4606,  2.3964,  4.9256,  2.0284, -2.9672,  6.8204, -1.8947,
         -8.8334,  6.2374]], grad_fn=<AddmmBackward0>)
