# Learning Ways to Access and Initialize parameters

In [4]:
import torch
import torch.nn as nn

## Creating a MLP as a experimental nw

In [None]:
net = nn.Sequential()
net.add_module('Linear1', nn.Linear(256, 128, bias=True))
net.add_module('relu', nn.ReLU())
net.add_module('Output', nn.Linear(128, 10, bias=False))



## Function to initialize weight

In [None]:
def init_weights(m):
  if type(m) == nn.Linear:
    torch.nn.init.xavier_uniform_(m.weight)

In [None]:
net.apply(init_weights)

Sequential(
  (Linear1): Linear(in_features=256, out_features=128, bias=True)
  (relu): ReLU()
  (Output): Linear(in_features=128, out_features=10, bias=False)
)

In [None]:
x = torch.randn(1, 256)
net(x)

tensor([[ 0.9744,  0.2991,  0.2853,  1.5426,  1.0074,  1.3887, -3.0738,  1.7791,
         -0.8074,  0.6550]], grad_fn=<MmBackward>)

# Parameter Access

In [None]:
# accessing parameters layerswise by layer indexing
print(net[0].parameters)
print(net[1].parameters)
print(net[2].parameters)

<bound method Module.parameters of Linear(in_features=256, out_features=128, bias=True)>
<bound method Module.parameters of ReLU()>
<bound method Module.parameters of Linear(in_features=128, out_features=10, bias=False)>


In [None]:
# accessing targeted parameters
print(net[0].bias)
print(net[2].weight)

In [None]:
# accessing parameters with name of the layers
print(net.Linear1.weight)
print(net.Output.bias)

Parameter containing:
tensor([[-0.0718,  0.0055, -0.0962,  ...,  0.0004,  0.0776,  0.0320],
        [ 0.0582,  0.0014,  0.1235,  ...,  0.0137,  0.0655, -0.0800],
        [ 0.1208,  0.0362,  0.0236,  ...,  0.0054, -0.0544, -0.0760],
        ...,
        [ 0.0034,  0.0466,  0.0583,  ...,  0.0335,  0.0234,  0.1248],
        [-0.0514, -0.0778,  0.0663,  ...,  0.0990, -0.0481, -0.0346],
        [-0.0792, -0.0218, -0.0362,  ...,  0.1049,  0.0213,  0.1178]],
       requires_grad=True)
None


In [None]:
# all parameters at once in the form of state dict
print(net.state_dict)

<bound method Module.state_dict of Sequential(
  (Linear1): Linear(in_features=256, out_features=128, bias=True)
  (relu): ReLU()
  (Output): Linear(in_features=128, out_features=10, bias=False)
)>


# Parameter Initialization techniques

In [None]:
# initializazing parameters of single layer
l1 = nn.Linear(20, 256, bias=True)
nn.init.normal_(l1.weight, mean=0.0, std=0.01)

Parameter containing:
tensor([[-0.0090,  0.0048,  0.0175,  ...,  0.0139, -0.0056, -0.0217],
        [ 0.0175,  0.0006,  0.0016,  ..., -0.0042, -0.0011,  0.0024],
        [ 0.0064, -0.0064, -0.0164,  ..., -0.0059,  0.0165,  0.0210],
        ...,
        [-0.0117,  0.0132,  0.0046,  ...,  0.0142,  0.0079, -0.0113],
        [-0.0089, -0.0088,  0.0166,  ..., -0.0124,  0.0173,  0.0001],
        [ 0.0151, -0.0144, -0.0100,  ...,  0.0175,  0.0082,  0.0053]],
       requires_grad=True)

In [None]:
# using init weight
model = nn.Sequential(nn.Linear(10, 128, bias=False), nn.ReLU(), nn.Linear(128, 32, bias=False))
net.apply(init_weights)

Sequential(
  (Linear1): Linear(in_features=256, out_features=128, bias=True)
  (relu): ReLU()
  (Output): Linear(in_features=128, out_features=10, bias=False)
)

In [None]:
# initializing weigths with constants
def init_weight_constants(m):
  if type(m) == nn.Linear:
    torch.nn.init.constant_(m.weight, 1)

model = nn.Sequential(nn.Linear(10, 128, bias=False), nn.ReLU(), nn.Linear(128, 32, bias=False))
model.apply(init_weight_constants)
print(model[0].parameters)

<bound method Module.parameters of Linear(in_features=10, out_features=128, bias=False)>


# Sharing parameters with other layers

In [None]:
l1 = nn.Linear(5, 25, bias=False)
nn.init.constant_(l1.weight, 5) 


l2 = nn.Linear(5, 25, bias= False)
l2.parameters = l1.parameters

l1.parameters == l2.parameters

True

 # Deferred Initialization
 provision for the cases when in_features can't be determined beforehand

In [8]:
# creating a nw

def net(in_features, out_features):
  return nn.Sequential(nn.Linear(in_features, 256), 
         nn.ReLU(), nn.Linear(256, out_features))


In [9]:
net = net(10,10)

In [10]:
def init_weights(m):
  print("Init",m)

net.apply(init_weights)

Init Linear(in_features=10, out_features=256, bias=True)
Init ReLU()
Init Linear(in_features=256, out_features=10, bias=True)
Init Sequential(
  (0): Linear(in_features=10, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


Sequential(
  (0): Linear(in_features=10, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)

We can't set or get parametrs before first time forward method is involed as 
parameters get initialized only at that time. because input shape becomes know to the network when forward is invoked