In [None]:
import torch 
import torch.nn as nn


In [2]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

device

'cuda'

In [4]:

layer = nn.Linear(40, 10)
layer.weight.data *= 6 ** 0.5 # Kaiming initialization
torch.zero_(layer.bias.data)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
nn.init.kaiming_uniform_(layer.weight)
nn.init.zeros_(layer.bias)      


Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [8]:
def use_he_init(module):
    if isinstance(module, nn.Linear):
        nn.init.kaiming_uniform_(module.weight)
        nn.init.zeros_(module.bias)

model = nn.Sequential(
    nn.Linear(50, 40), nn.ReLU(),
    nn.Linear(40, 1), nn.ReLU())
model.apply(use_he_init)

Sequential(
  (0): Linear(in_features=50, out_features=40, bias=True)
  (1): ReLU()
  (2): Linear(in_features=40, out_features=1, bias=True)
  (3): ReLU()
)

In [10]:
alpha = 0.02
model = nn.Sequential(
    nn.Linear(50, 40), nn.LeakyReLU(negative_slope=alpha))
nn.init.kaiming_uniform_(model[0].weight, a=alpha, nonlinearity="leaky_relu")

Parameter containing:
tensor([[-0.2324,  0.0322,  0.1200,  ...,  0.0033, -0.3104, -0.0831],
        [-0.2978, -0.2092, -0.2802,  ..., -0.1823, -0.1544, -0.2992],
        [-0.3073,  0.1527, -0.0099,  ...,  0.0193,  0.3244,  0.2923],
        ...,
        [-0.1450, -0.1077, -0.1284,  ...,  0.2165, -0.3219, -0.1727],
        [-0.0966, -0.0470,  0.0773,  ...,  0.0583,  0.0874,  0.2954],
        [-0.2337, -0.0107, -0.1989,  ...,  0.2556, -0.1681,  0.1206]],
       requires_grad=True)