In [1]:
import torch
from torch import nn

In [2]:
# By default, PyTorch initializes weight and bias matrices uniformly by drawing from a range that is computed according to the input and output dimension.
# PyTorch’s nn.init module provides a variety of preset initialization methods.
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net(X).shape



torch.Size([2, 1])

In [3]:
# Let’s begin by calling on built-in initializers. 
# The code below initializes all weight parameters as Gaussian random variables with standard deviation 0.01, 
# while bias parameters are cleared to zero
def init_normal(module):
    if type(module) == nn.Linear:
        nn.init.normal_(module.weight, mean=0, std=0.01)
        nn.init.zeros_(module.bias)


net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([ 0.0098, -0.0049,  0.0215, -0.0070]), tensor(0.))

In [4]:
# We can also initialize all the parameters to a given constant value (say, 1).
def my_init(module):
    if type(module) == nn.Linear:
        print("Init", *[(name, param.shape)
                        for name, param in module.named_parameters()][0])
        nn.init.uniform_(module.weight, -10, 10)
        module.weight.data *= module.weight.data.abs() >= 5


net.apply(my_init)
net[0].weight[:2]

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[-0.0000, -8.4738,  0.0000, -5.2203],
        [-0.0000, -0.0000, -0.0000,  0.0000]], grad_fn=<SliceBackward0>)

In [5]:
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] = 42
net[0].weight.data[0]

tensor([42.0000, -7.4738,  1.0000, -4.2203])