In [1]:
import numpy as np
import torch
import torch.nn as nn

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class MyMean(nn.Module): 
    def forward(self, input):
        out = input/4
        # print('MyMean.input:', input.data)
        # print('MyMean.forward:', out)
        print('----------')
        return out
    
def tensor_hook(grad):
    print('tensor_hook.grad:', grad) # [8, 8, 8, 8] / 4
    print('----------')
    return grad

def first_hook(module, grad_input, grad_output):
    print('first_hook grad input:', grad_input)
    print('first_hook grad output:', grad_output)
    print('----------')
    return grad_input

class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.f1 = nn.Linear(4, 1, bias=True)    
        self.f2 = MyMean()
        self.weight_init()
        
    def forward(self, input):
        self.input = input
        output = self.f1(input)
        output = self.f2(output)      
        return output
    
    def weight_init(self):
        self.f1.weight.data.fill_(8.0)    # 这里设置Linear的权重为8
        self.f1.bias.data.fill_(2.0)      # 这里设置Linear的bias为2
        print('f1.weight:', self.f1.weight.data)
        print('f1.bias:', self.f1.bias.data)
        print("----------------")
        
    def my_hook(self, module, grad_input, grad_output):
        print('my_hook grad input:', grad_input)
        print('my_hook grad output:', grad_output)
        print('----------')
        # grad_input = grad_input[0]*self.input   # 这里把hook函数内对grad_input的操作进行了注释，
        # grad_input = tuple([grad_input])        # 返回的grad_input必须是tuple，所以我们进行了tuple包装。
        # print('now grad:', grad_input)        
        return grad_input
   

In [3]:
_input = np.array([1,2,3,4])

sum(_input * 8 + 2) / 4

22.0

In [4]:
net = MyNet()
net.cuda()

f1.weight: tensor([[8., 8., 8., 8.]])
f1.bias: tensor([2.])
----------------


MyNet(
  (f1): Linear(in_features=4, out_features=1, bias=True)
  (f2): MyMean()
)

In [16]:
dir(net)

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_name',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_modules',
 '_named_members',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_version',
 'add_module',
 'apply',
 'buffers',
 'children',
 'cpu',
 'cuda',
 'double',
 'dump_patches',
 'eval',
 'extra_repr',
 'f1',
 'f2',
 'float',
 'forward',
 'half',
 'input',
 'load_state_dict',
 'modu

In [15]:
net.cpu()
it = next(net.parameters())
type(it), it.device, print(it) # dir(it)

Parameter containing:
tensor([[8., 8., 8., 8.]], requires_grad=True)


(torch.nn.parameter.Parameter, device(type='cpu'), None)

In [5]:
input = torch.tensor(_input, dtype=torch.float32, requires_grad=True).to(device)
net = MyNet()
print(net.f1)
print(net.f2)
print('------------')
net.to(device)
input.register_hook(tensor_hook) # input tensor must requires_grad=True
net.f1.register_backward_hook(first_hook) # grad_input[0] input tensor must requires_grad=True
# net.register_backward_hook(net.my_hook)
net.f2.register_backward_hook(net.my_hook)
result = net(input)
print('result =', result)
print('------------')
result.backward()

print('input.grad:', input.grad)
for param in net.parameters():
    print('{}:grad->{}'.format(param.data, param.grad.data))

f1.weight: tensor([[8., 8., 8., 8.]])
f1.bias: tensor([2.])
----------------
Linear(in_features=4, out_features=1, bias=True)
MyMean()
------------
----------
result = tensor([20.5000], device='cuda:0', grad_fn=<DivBackward0>)
------------
my_hook grad input: (tensor([0.2500], device='cuda:0'), None)
my_hook grad output: (tensor([1.], device='cuda:0'),)
----------
first_hook grad input: (tensor([0.2500], device='cuda:0'), tensor([0.2500], device='cuda:0'))
first_hook grad output: (tensor([0.2500], device='cuda:0'),)
----------
tensor_hook.grad: tensor([2., 2., 2., 2.], device='cuda:0')
----------
input.grad: None
tensor([[8., 8., 8., 8.]], device='cuda:0'):grad->tensor([[0.2500, 0.5000, 0.7500, 1.0000]], device='cuda:0')
tensor([2.], device='cuda:0'):grad->tensor([0.2500], device='cuda:0')


---------------

In [6]:
_input2 = np.array([[1,2,3,4], [1,1,1,1]])

In [7]:
input = torch.tensor(_input2, dtype=torch.float32, requires_grad=True).to(device)
net = MyNet()
print(net.f1)
print(net.f2)
print('------------')
net.to(device)
net.register_backward_hook(net.my_hook)
input.register_hook(tensor_hook)
result = net(input)
print('result =', result)
print('------------')
result.backward(torch.ones_like(result))

print('input.grad:', input.grad)
for param in net.parameters():
    print('{}:grad->{}'.format(param.data, param.grad.data))

f1.weight: tensor([[8., 8., 8., 8.]])
f1.bias: tensor([2.])
----------------
Linear(in_features=4, out_features=1, bias=True)
MyMean()
------------
----------
result = tensor([[20.5000],
        [ 8.5000]], device='cuda:0', grad_fn=<DivBackward0>)
------------
my_hook grad input: (tensor([[0.2500],
        [0.2500]], device='cuda:0'), None)
my_hook grad output: (tensor([[1.],
        [1.]], device='cuda:0'),)
----------
tensor_hook.grad: tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.]], device='cuda:0')
----------
input.grad: None
tensor([[8., 8., 8., 8.]], device='cuda:0'):grad->tensor([[0.5000, 0.7500, 1.0000, 1.2500]], device='cuda:0')
tensor([2.], device='cuda:0'):grad->tensor([0.5000], device='cuda:0')
