# Parameter Initialization

Now that we can access parameters, lets initialise them properly. Torch will automatically initialise parameters randomly for us, but as we've already covered, this is often a suboptimal approach. PyTorch's nn.init module provides some of the common methods for us, but it's also possible to define our own.

In [1]:
import torch
from torch import nn

In [5]:
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))

X = torch.rand(size=(2, 4))
net(X).shape

torch.Size([2, 1])

## Built-in Initialization

In [9]:
# Initialize by drawing weight from Gaussian distribution with mean 0, and stdev 0.01 set biases to 0

def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.normal_(module.weight, mean=0, std=0.01)
        nn.init.zeros_(module.bias)

net.apply(init_normal)

net[0].weight.data, net[0].bias.data

(tensor([[ 0.0137,  0.0035, -0.0022,  0.0013],
         [-0.0093,  0.0110,  0.0143, -0.0109],
         [-0.0081, -0.0052, -0.0104,  0.0089],
         [-0.0131, -0.0089,  0.0087, -0.0139],
         [-0.0140,  0.0122,  0.0039,  0.0080],
         [ 0.0054, -0.0013, -0.0103,  0.0020],
         [-0.0196, -0.0025,  0.0020, -0.0069],
         [-0.0281, -0.0096, -0.0022, -0.0002]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [12]:
# We can also initialise all the parameters to a constant

def init_constant(module):
    if type(module) == nn.Linear:
        nn.init.constant_(module.weight, 1)
        nn.init.zeros_(module.bias)

net.apply(init_constant)

net[0].weight.data, net[0].bias.data

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [14]:
# We can also combine initializations, here we mix Xavier and and constant

def init_xavier(module):
    if type(module) == nn.Linear:
        nn.init.xavier_uniform_(module.weight)

def init_42(module):
    if type(module) == nn.Linear:
        nn.init.constant_(module.weight, 42)
        
net[0].apply(init_xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.0445, -0.6304, -0.4451,  0.5981])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


### Custom Initialization

Sometimes, we may wish to work with initialisation methods which are not included in the machine learning framework, we can define our own:

In [23]:
def my_init(module):
    if type(module) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in module.named_parameters()][0])
        nn.init.uniform_(module.weight, -10, 10)
        module.weight.data *= module.weight.data.abs() >= 5

In [24]:
net.apply(my_init)
net[0].weight[:2]


Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[ 8.7808, -0.0000, -0.0000,  0.0000],
        [ 0.0000, -6.2417,  0.0000, -0.0000]], grad_fn=<SliceBackward0>)

In [25]:
# Of course, if we really need to, setting the parameters directly is also an option.

net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.,  1.,  1.,  1.])