<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Defining-the-network" data-toc-modified-id="Defining-the-network-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Defining the network</a></span></li><li><span><a href="#Creating-an-object-of-MLP-class" data-toc-modified-id="Creating-an-object-of-MLP-class-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Creating an object of MLP class</a></span></li><li><span><a href="#Inputs-and-outputs" data-toc-modified-id="Inputs-and-outputs-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Inputs and outputs</a></span></li><li><span><a href="#Loss,-forward-and-backward-propagation" data-toc-modified-id="Loss,-forward-and-backward-propagation-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Loss, forward and backward propagation</a></span></li><li><span><a href="#Weights-and-biases" data-toc-modified-id="Weights-and-biases-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Weights and biases</a></span><ul class="toc-item"><li><span><a href="#Accessing-weights-and-biases" data-toc-modified-id="Accessing-weights-and-biases-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Accessing weights and biases</a></span></li><li><span><a href="#Accessing-gradients-of-weights-and-biases" data-toc-modified-id="Accessing-gradients-of-weights-and-biases-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Accessing gradients of weights and biases</a></span></li><li><span><a href="#Accessing-all-weights,-biases-and-their-gradients" data-toc-modified-id="Accessing-all-weights,-biases-and-their-gradients-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Accessing all weights, biases and their gradients</a></span><ul class="toc-item"><li><span><a href="#Method-1" data-toc-modified-id="Method-1-5.3.1"><span class="toc-item-num">5.3.1&nbsp;&nbsp;</span>Method 1</a></span></li><li><span><a href="#Method-2" data-toc-modified-id="Method-2-5.3.2"><span class="toc-item-num">5.3.2&nbsp;&nbsp;</span>Method 2</a></span></li><li><span><a href="#Method-3" data-toc-modified-id="Method-3-5.3.3"><span class="toc-item-num">5.3.3&nbsp;&nbsp;</span>Method 3</a></span></li></ul></li></ul></li><li><span><a href="#Accessing-modules-of-a-model" data-toc-modified-id="Accessing-modules-of-a-model-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Accessing modules of a model</a></span></li></ul></div>

# Accessing Network's Weights and Biases

## Defining the network

In [1]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, n_in, n_hid, n_out):
        super(MLP, self).__init__()
        self.hid = nn.Linear(n_in, n_hid)
        self.out = nn.Linear(n_hid, n_out)
        
    def forward(self, x):
        y = self.hid(x)
        y = torch.relu(y)
        y = self.out(y)
        y = torch.sigmoid(y)
        return y

## Creating an object of MLP class

In [2]:
model = MLP(10, 5, 1)

## Inputs and outputs

In [3]:
x = torch.randn(15, 10)
y = torch.randn(15, 1)

## Loss, forward and backward propagation

In [4]:
loss_fn = nn.MSELoss()
y_pred = model(x)
loss = loss_fn(y, y_pred)
loss.backward()

## Weights and biases

In [5]:
print(model)

MLP(
  (hid): Linear(in_features=10, out_features=5, bias=True)
  (out): Linear(in_features=5, out_features=1, bias=True)
)


### Accessing weights and biases

In [6]:
print(model.hid.weight.shape)
print(model.hid.weight)
print(model.hid.bias.shape)
print(model.hid.bias)

torch.Size([5, 10])
Parameter containing:
tensor([[ 0.1542,  0.2727, -0.2688, -0.2167,  0.2276,  0.1290,  0.1904, -0.3052,
         -0.0722, -0.2763],
        [-0.1476,  0.0665, -0.2478,  0.2180, -0.2690,  0.2435, -0.0228, -0.0390,
         -0.0955, -0.0616],
        [ 0.1143, -0.3107, -0.2268, -0.0037, -0.0183,  0.1669, -0.1527, -0.2287,
         -0.1954,  0.1867],
        [-0.0496,  0.0076, -0.2912,  0.2673,  0.1897, -0.2624,  0.3020, -0.1142,
         -0.1080, -0.1376],
        [-0.1354, -0.0784, -0.2497,  0.1875, -0.2576,  0.0784,  0.1197, -0.2452,
          0.2836,  0.2185]], requires_grad=True)
torch.Size([5])
Parameter containing:
tensor([-0.2451, -0.0112,  0.1131,  0.1883,  0.3110], requires_grad=True)


### Accessing gradients of weights and biases

In [7]:
print(model.hid.weight.grad.shape)
print(model.hid.weight.grad)
print(model.hid.bias.grad.shape)
print(model.hid.bias.grad)

torch.Size([5, 10])
tensor([[-0.0054, -0.0014, -0.0002,  0.0070,  0.0026,  0.0030, -0.0051, -0.0028,
          0.0030,  0.0057],
        [ 0.0227,  0.0608,  0.0720,  0.0658, -0.0163, -0.0060, -0.0196,  0.0342,
         -0.0030,  0.0742],
        [-0.0730,  0.0117,  0.0029,  0.0294,  0.0108, -0.0052, -0.0674,  0.0245,
         -0.0486, -0.0193],
        [-0.1260, -0.0177,  0.0129,  0.0881,  0.0334,  0.0195, -0.1037, -0.0140,
          0.0013,  0.0737],
        [-0.0258, -0.0262, -0.0252, -0.0205, -0.0004, -0.0073, -0.0047,  0.0022,
         -0.0232, -0.0152]])
torch.Size([5])
tensor([-0.0035,  0.0159,  0.0003, -0.0456, -0.0240])


### Accessing all weights, biases and their gradients

#### Method 1

In [18]:
for child in model.children():
    print("Child: ", child)
    print("Weights:")
    print("--------")
    print("Shape: ", child.weight.shape)
    print(child.weight)
    print("Gradients:")
    print("----------")
    print("Shape: ", child.weight.grad.shape)
    print(child.weight.grad)
    print("Biases:")
    print("--------")
    print("Shape: ", child.bias.shape)
    print(child.bias)
    print("Gradients:")
    print("----------")
    print("Shape: ", child.bias.grad.shape)
    print(child.bias.grad)    
    print()

Child:  Linear(in_features=10, out_features=5, bias=True)
Weights:
--------
Shape:  torch.Size([5, 10])
Parameter containing:
tensor([[ 0.1542,  0.2727, -0.2688, -0.2167,  0.2276,  0.1290,  0.1904, -0.3052,
         -0.0722, -0.2763],
        [-0.1476,  0.0665, -0.2478,  0.2180, -0.2690,  0.2435, -0.0228, -0.0390,
         -0.0955, -0.0616],
        [ 0.1143, -0.3107, -0.2268, -0.0037, -0.0183,  0.1669, -0.1527, -0.2287,
         -0.1954,  0.1867],
        [-0.0496,  0.0076, -0.2912,  0.2673,  0.1897, -0.2624,  0.3020, -0.1142,
         -0.1080, -0.1376],
        [-0.1354, -0.0784, -0.2497,  0.1875, -0.2576,  0.0784,  0.1197, -0.2452,
          0.2836,  0.2185]], requires_grad=True)
Gradients:
----------
Shape:  torch.Size([5, 10])
tensor([[-0.0054, -0.0014, -0.0002,  0.0070,  0.0026,  0.0030, -0.0051, -0.0028,
          0.0030,  0.0057],
        [ 0.0227,  0.0608,  0.0720,  0.0658, -0.0163, -0.0060, -0.0196,  0.0342,
         -0.0030,  0.0742],
        [-0.0730,  0.0117,  0.0029,  0.0

In [19]:
for child in model.children():
    print(child.weight.shape)
    print(child.bias.shape)

torch.Size([5, 10])
torch.Size([5])
torch.Size([1, 5])
torch.Size([1])


#### Method 2

In [30]:
for param in model.parameters():
    print("Shape: ", param.shape)
    print("Weight/Bias: ", param)
    print("Gradiant: ", param.grad)
    print()

Shape:  torch.Size([5, 10])
Weight/Bias:  Parameter containing:
tensor([[ 0.1542,  0.2727, -0.2688, -0.2167,  0.2276,  0.1290,  0.1904, -0.3052,
         -0.0722, -0.2763],
        [-0.1476,  0.0665, -0.2478,  0.2180, -0.2690,  0.2435, -0.0228, -0.0390,
         -0.0955, -0.0616],
        [ 0.1143, -0.3107, -0.2268, -0.0037, -0.0183,  0.1669, -0.1527, -0.2287,
         -0.1954,  0.1867],
        [-0.0496,  0.0076, -0.2912,  0.2673,  0.1897, -0.2624,  0.3020, -0.1142,
         -0.1080, -0.1376],
        [-0.1354, -0.0784, -0.2497,  0.1875, -0.2576,  0.0784,  0.1197, -0.2452,
          0.2836,  0.2185]], requires_grad=True)
Gradiant:  tensor([[-0.0054, -0.0014, -0.0002,  0.0070,  0.0026,  0.0030, -0.0051, -0.0028,
          0.0030,  0.0057],
        [ 0.0227,  0.0608,  0.0720,  0.0658, -0.0163, -0.0060, -0.0196,  0.0342,
         -0.0030,  0.0742],
        [-0.0730,  0.0117,  0.0029,  0.0294,  0.0108, -0.0052, -0.0674,  0.0245,
         -0.0486, -0.0193],
        [-0.1260, -0.0177,  0.01

#### Method 3

In [33]:
for name, param in model.named_parameters():
    print(name, param)
    print(param.grad)

hid.weight Parameter containing:
tensor([[ 0.1542,  0.2727, -0.2688, -0.2167,  0.2276,  0.1290,  0.1904, -0.3052,
         -0.0722, -0.2763],
        [-0.1476,  0.0665, -0.2478,  0.2180, -0.2690,  0.2435, -0.0228, -0.0390,
         -0.0955, -0.0616],
        [ 0.1143, -0.3107, -0.2268, -0.0037, -0.0183,  0.1669, -0.1527, -0.2287,
         -0.1954,  0.1867],
        [-0.0496,  0.0076, -0.2912,  0.2673,  0.1897, -0.2624,  0.3020, -0.1142,
         -0.1080, -0.1376],
        [-0.1354, -0.0784, -0.2497,  0.1875, -0.2576,  0.0784,  0.1197, -0.2452,
          0.2836,  0.2185]], requires_grad=True)
tensor([[-0.0054, -0.0014, -0.0002,  0.0070,  0.0026,  0.0030, -0.0051, -0.0028,
          0.0030,  0.0057],
        [ 0.0227,  0.0608,  0.0720,  0.0658, -0.0163, -0.0060, -0.0196,  0.0342,
         -0.0030,  0.0742],
        [-0.0730,  0.0117,  0.0029,  0.0294,  0.0108, -0.0052, -0.0674,  0.0245,
         -0.0486, -0.0193],
        [-0.1260, -0.0177,  0.0129,  0.0881,  0.0334,  0.0195, -0.1037, -0

## Accessing modules of a model

In [9]:
for module in model.modules():
    print(module)

MLP(
  (hid): Linear(in_features=10, out_features=5, bias=True)
  (out): Linear(in_features=5, out_features=1, bias=True)
)
Linear(in_features=10, out_features=5, bias=True)
Linear(in_features=5, out_features=1, bias=True)
