# Layer Initialization

In [1]:
import torch
import torch.nn as nn

In [8]:
layer = nn.Linear(64, 128)
layer.weight.min(), layer.weight.max()

(tensor(-0.1250, grad_fn=<MinBackward1>),
 tensor(0.1248, grad_fn=<MaxBackward1>))

In [15]:
nn.init.uniform_(layer.weight)
layer.weight.min(), layer.weight.max()

(tensor(7.9930e-05, grad_fn=<MinBackward1>),
 tensor(0.9999, grad_fn=<MaxBackward1>))

# Fine Tuning
- Train with a small learning.
- Freeze some layer of the network (we do not train them.)
    - Usually freeze early layers, and train layers closer to the output layer.
    - Set `requires_grad` attribute of each parameter to `False`.

### Freeze layers in a Neural Network

In [16]:
input_tensor = torch.randn((30, 4))
input_tensor[:4]

tensor([[ 0.4773, -1.0382,  0.6629, -1.9118],
        [-0.8498, -0.3147,  1.8723,  0.2460],
        [-0.9433, -0.2662,  0.6920, -1.2071],
        [-0.1730, -1.7524, -1.3496,  0.3125]])

In [25]:
model = nn.Sequential(
    nn.Linear(in_features=input_tensor.shape[-1], out_features=10),
    nn.ReLU(),
    nn.Linear(in_features=10, out_features=5),
    nn.ReLU(),
    nn.Linear(in_features=5, out_features=5),
    nn.ReLU(),
    nn.Linear(in_features=5, out_features=1)
)

In [27]:
for name, parameter in model.named_parameters():
    if name == "0.weight":
        parameter.requires_grad = False

0.weight False
0.bias True
2.weight True
2.bias True
4.weight True
4.bias True
6.weight True
6.bias True


In [28]:
for parameter in model.parameters():
    print(parameter.requires_grad)

False
True
True
True
True
True
True
True
