# Parameter management

In [17]:
import torch
from torch import nn

In [18]:
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(10), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(3,3))
print(X)
net(X).shape

tensor([[0.8798, 0.7266, 0.4198],
        [0.7841, 0.9321, 0.8293],
        [0.2777, 0.4361, 0.8149]])


torch.Size([3, 1])

In [19]:
# We can access each layer by indexing it as follows and ge taccess to all its parameters
# from 0th to the n-1 (last) layer for sequential, Activations also count as individual layers
net[4].state_dict()

OrderedDict([('weight',
              tensor([[ 0.1875, -0.2367, -0.2676,  0.2621, -0.1305, -0.1872,  0.0413,  0.0446,
                       -0.3151,  0.3016]])),
             ('bias', tensor([0.2822]))])

In [20]:
# To get direct access to the value of parameters: bias or weight
# type(net[2].bias), net[2].bias.data
type(net[2].weight), net[2].weight


(torch.nn.parameter.Parameter,
 Parameter containing:
 tensor([[ 0.0984, -0.0923, -0.1363,  0.1738,  0.2956, -0.0867,  0.0862,  0.2050],
         [-0.1523,  0.3298, -0.3459,  0.3285,  0.0314,  0.0276,  0.1756, -0.0098],
         [ 0.1732, -0.2964, -0.1219, -0.0433, -0.0764, -0.3146,  0.0256,  0.0288],
         [-0.3312,  0.1796,  0.0249,  0.1019, -0.2635, -0.2969, -0.2857, -0.0430],
         [ 0.1927,  0.1466,  0.1671,  0.3324,  0.0239,  0.0535, -0.2943, -0.0885],
         [-0.3306,  0.3133, -0.2776,  0.3406, -0.1077, -0.0114,  0.2980,  0.0090],
         [-0.1945,  0.2282,  0.2173,  0.3081, -0.0949,  0.3322,  0.1798,  0.1888],
         [-0.2451, -0.0597,  0.2713, -0.2248,  0.2964, -0.1583,  0.0893, -0.2207],
         [ 0.2466, -0.0163, -0.0373, -0.0967, -0.3260,  0.2582, -0.2347,  0.1938],
         [ 0.2043, -0.0547,  0.2155, -0.0607, -0.2758,  0.2062,  0.0637,  0.2381]],
        requires_grad=True))

In [21]:
# View all parameters at once
[(name, param.shape) for name, param in net.named_parameters()]

[('0.weight', torch.Size([8, 3])),
 ('0.bias', torch.Size([8])),
 ('2.weight', torch.Size([10, 8])),
 ('2.bias', torch.Size([10])),
 ('4.weight', torch.Size([1, 10])),
 ('4.bias', torch.Size([1]))]

In [23]:
# We can make models share the same layer across space and time as a referenced

# We need to give the shared layer a name so that we can refer to its
# parameters
shared = nn.LazyLinear(8)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.LazyLinear(1))

net(X)
# Check whether the parameters are the same
print(net[2].weight.data == net[4].weight.data)
net[2].weight.data[0, 0] = 100
# Make sure that they are actually the same object rather than just having the
# same value
print(net[2].weight.data[0] == net[4].weight.data[0])


tensor([[True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True]])
tensor([True, True, True, True, True, True, True, True])
