In [1]:
import torch
from torch import nn

In [2]:
net = nn.Sequential(nn.LazyLinear(8),
                    nn.ReLU(),
                    nn.LazyLinear(1))

X = torch.rand(size=(2, 4))
net(X).shape



torch.Size([2, 1])

## 1. Parameter Access

In [3]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[-0.2904,  0.0555,  0.2442,  0.1221, -0.1118,  0.3063, -0.0985,  0.0907]])),
             ('bias', tensor([-0.0885]))])

### 1.1 Targeted Parameters

In [4]:
type(net[2].bias), net[2].bias.data

(torch.nn.parameter.Parameter, tensor([-0.0885]))

we have not invoked backpropagation for this network yet, it is in its initial state.

In [5]:
net[2].weight.grad == None

True

### 1.2 All Parameters at Once

In [6]:
[(name, param.shape) for name, param in net.named_parameters()]

[('0.weight', torch.Size([8, 4])),
 ('0.bias', torch.Size([8])),
 ('2.weight', torch.Size([1, 8])),
 ('2.bias', torch.Size([1]))]

## 2. Tied Parameters

In [7]:
# We need to give the shared layer a name so that we can refer to its
# parameters
shared = nn.LazyLinear(8)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.LazyLinear(1))

net(X)
# Check whether the parameters are the same
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
# Make sure that they are actually the same object rather than just having the
# same value
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
