In [1]:
from torch import nn
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = nn.Sequential(nn.Linear(10, 5), nn.Linear(5, 2))

In [3]:
type(model)

torch.nn.modules.container.Sequential

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.L1 = nn.Linear(10, 5)
        self.L2 = nn.Linear(5, 2)
    
    def forward(self, x):
        return self.L2(self.L1(x))

In [5]:
model = MLP()

In [6]:
def backward_hook(module, grad_input, grad_output):
    print(f"*** MODULE  : {module} ***")
    print(f'grad_input  : {grad_input}')
    print(f'grad_output : {grad_output}')
    if hasattr(module, "weight"):
        print(f'weight val  : {module.weight}')
        print(f'weight grad  : {module.weight.grad}')
    print()

In [7]:
# # model.register_full_backward_hook(backward_hook)
# model.apply(lambda module: module.register_full_backward_hook(backward_hook))
# x = torch.ones(size=(3, 10))
# out = model(x).mean()
# out.backward()

In [8]:
linear = nn.Linear(5, 1)
with torch.no_grad():
    linear.weight = nn.Parameter(torch.ones_like(linear.weight) * 2)
    linear.bias = nn.Parameter(torch.ones_like(linear.bias))
linear.register_full_backward_hook(backward_hook)

<torch.utils.hooks.RemovableHandle at 0x7fc2185fb7f0>

In [9]:
linear.weight, linear.bias

(Parameter containing:
 tensor([[2., 2., 2., 2., 2.]], requires_grad=True),
 Parameter containing:
 tensor([1.], requires_grad=True))

In [10]:
out = linear(torch.ones(5, requires_grad=True))
print(out)

tensor([11.], grad_fn=<BackwardHookFunctionBackward>)


In [11]:
out.backward()

*** MODULE  : Linear(in_features=5, out_features=1, bias=True) ***
grad_input  : (tensor([2., 2., 2., 2., 2.]),)
grad_output : (tensor([1.]),)
weight val  : Parameter containing:
tensor([[2., 2., 2., 2., 2.]], requires_grad=True)
weight grad  : tensor([[1., 1., 1., 1., 1.]])



  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


In [12]:
class Mult(nn.Module):
    def __init__(self):
        super().__init__()
        self.L1 = nn.Linear(1, 1, bias=False)
        self.L2 = nn.Linear(1, 1, bias=False)
        self.L3 = nn.Linear(1, 1, bias=False)

    def forward(self, x):
        return self.L3(self.L2(self.L1(x)))

In [13]:
m = Mult()

In [14]:
for i, l in enumerate(m.modules()):
    if hasattr(l, 'weight'):
        with torch.no_grad():
            print(l)
            l.weight = nn.Parameter(torch.ones_like(l.weight) * (i))
m.apply(lambda module: module.register_full_backward_hook(backward_hook))


Linear(in_features=1, out_features=1, bias=False)
Linear(in_features=1, out_features=1, bias=False)
Linear(in_features=1, out_features=1, bias=False)


Mult(
  (L1): Linear(in_features=1, out_features=1, bias=False)
  (L2): Linear(in_features=1, out_features=1, bias=False)
  (L3): Linear(in_features=1, out_features=1, bias=False)
)

In [15]:
x = torch.tensor([5.], requires_grad=True)
out = m(x)

In [16]:
out.backward()

*** MODULE  : Linear(in_features=1, out_features=1, bias=False) ***
grad_input  : (tensor([3.]),)
grad_output : (tensor([1.]),)
weight val  : Parameter containing:
tensor([[3.]], requires_grad=True)
weight grad  : tensor([[10.]])

*** MODULE  : Linear(in_features=1, out_features=1, bias=False) ***
grad_input  : (tensor([6.]),)
grad_output : (tensor([3.]),)
weight val  : Parameter containing:
tensor([[2.]], requires_grad=True)
weight grad  : tensor([[15.]])

*** MODULE  : Linear(in_features=1, out_features=1, bias=False) ***
grad_input  : (tensor([6.]),)
grad_output : (tensor([6.]),)
weight val  : Parameter containing:
tensor([[1.]], requires_grad=True)
weight grad  : tensor([[30.]])

*** MODULE  : Mult(
  (L1): Linear(in_features=1, out_features=1, bias=False)
  (L2): Linear(in_features=1, out_features=1, bias=False)
  (L3): Linear(in_features=1, out_features=1, bias=False)
) ***
grad_input  : (tensor([6.]),)
grad_output : (tensor([1.]),)



In [17]:
x.grad

tensor([6.])