# Variable hook

In [3]:
import torch as t

v = t.tensor(t.Tensor([0,0,0]), requires_grad=True)
h = v.register_hook(lambda grad: grad*2) # return the double gradient
v.backward(gradient=t.Tensor([1,1,1])) # you can remove the gradient to see what will happen

print(v.grad.data)
h.remove()


tensor([2., 2., 2.])


  This is separate from the ipykernel package so we can avoid doing imports until


# Module hook

<font face="楷体">There are two kinds of hooks for module,one is forward hook, the other is backward hook.</font>

In [19]:
# 1. forward_hook(hook)
import torch
from torch import nn

def for_hook(module, input, output): ## -> should return None
    global mark_modu
    mark_modu = module
    for val in input:
        print("input val:",val)
    for out_val in output:
        print("output val:", out_val)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
    
    def forward(self, x):
        return x+1

model = Model()
x = torch.tensor(torch.Tensor([1]), requires_grad=True)
handle = model.register_forward_hook(for_hook) # return a handle
print("model output is:",model(x))
handle.remove() # remove the hook
print(mark_modu(2))

input val: tensor([1.], requires_grad=True)
output val: tensor(2., grad_fn=<SelectBackward>)
model output is: tensor([2.], grad_fn=<AddBackward0>)
3




In [30]:
# 2. backward hook
# register_backward_hook(hook) -> return a Tensor or None
# we can change the data of grad_inputs and return it

import torch
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def tensor_hook(grad):
    print('tensor hook')
    print('grad:', grad)
    return grad

class MyMean(nn.Module):            # 自定义除法module
    def forward(self, input):
        out = input/4
        return out
    
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.f1 = nn.Linear(4, 1, bias=True)
        self.f2 = MyMean()
        self.weight_init()
    
    def forward(self, input):
        self.input = input
        output = self.f1(input)
        output = self.f2(output)
        return output
    
    def weight_init(self):
        self.f1.weight.data.fill_(8.0)
        self.f1.bias.data.fill_(2.0)
    
    def my_hook(self, module, grad_input, grad_output):
        print("doing_my_hook")
        print('original grad', grad_input)
        print('original outgrad', grad_output)
        
        return grad_input
    
if __name__ == '__main__':
        
    input = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True).to(device)
    net = MyNet()
    net.register_backward_hook(net.my_hook)
    
    input.register_hook(tensor_hook)
    result = net(input)
    print('result = ',result)
    result.backward()
    print('input.grad', input.grad)
    for param in net.parameters():
        print('{}:grad->{}'.format(param, param.grad))
# here y = w1*x1 + ...  + 24*x4 +b
# z = y/4,so the grad_input is 0.25

result =  tensor([20.5000], grad_fn=<DivBackward0>)
doing_my_hook
original grad (tensor([0.2500]), None)
original outgrad (tensor([1.]),)
tensor hook
grad: tensor([2., 2., 2., 2.])
input.grad tensor([2., 2., 2., 2.])
Parameter containing:
tensor([[8., 8., 8., 8.]], requires_grad=True):grad->tensor([[0.2500, 0.5000, 0.7500, 1.0000]])
Parameter containing:
tensor([2.], requires_grad=True):grad->tensor([0.2500])


In [32]:
import torch
import torch.nn as nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class MyMul(nn.Module):
    def forward(self, input):
        out = input * 2
        return out
class MyMean(nn.Module):            # 自定义除法module
    def forward(self, input):
        out = input/4
        return out
def tensor_hook(grad):
    print('tensor hook')
    print('grad:', grad)
    return grad
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.f1 = nn.Linear(4, 1, bias=True)    
        self.f2 = MyMean()
        self.weight_init()
    def forward(self, input):
        self.input = input
        output = self.f2(input)       # 先进行运算1，后进行运算2
        output = self.f1(output)      
        return output
    def weight_init(self):
        self.f1.weight.data.fill_(8.0)    # 这里设置Linear的权重为8
        self.f1.bias.data.fill_(2.0)      # 这里设置Linear的bias为2
    def my_hook(self, module, grad_input, grad_output):
        print('doing my_hook')
        print('original grad:', grad_input)
        print('original outgrad:', grad_output)
        # grad_input = grad_input[0]*self.input   # 这里把hook函数内对grad_input的操作进行了注释，
        # grad_input = tuple([grad_input])        # 返回的grad_input必须是tuple，所以我们进行了tuple包装。
        # print('now grad:', grad_input)        
        return grad_input
if __name__ == '__main__':
    input = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True).to(device)
    net = MyNet()
    net.to(device)
    net.register_backward_hook(net.my_hook)   # 这两个hook函数一定要result = net(input)执行前执行，因为hook函数实在forward的时候进行绑定的
    input.register_hook(tensor_hook)
    result = net(input)
    print('result =', result)
    result.backward()
    print('input.grad:', input.grad)
    for param in net.parameters():
        print('{}:grad->{}'.format(param, param.grad))

# here z=y1*w1 + ... + y4*w4 +b, torch treats the formul as z = m + b, where m = y1*w1 + ... + y4*w4
# so the grad_input is 1,1

result = tensor([22.], grad_fn=<AddBackward0>)
doing my_hook
original grad: (tensor([1.]), tensor([1.]))
original outgrad: (tensor([1.]),)
tensor hook
grad: tensor([2., 2., 2., 2.])
input.grad: tensor([2., 2., 2., 2.])
Parameter containing:
tensor([[8., 8., 8., 8.]], requires_grad=True):grad->tensor([[0.2500, 0.5000, 0.7500, 1.0000]])
Parameter containing:
tensor([2.], requires_grad=True):grad->tensor([1.])


In [33]:
import torch
import torch.nn as nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class MyMul(nn.Module):
    def forward(self, input):
        out = input * 2
        return out
class MyMean(nn.Module):            # 自定义除法module
    def forward(self, input):
        out = input/4
        return out
def tensor_hook(grad):
    print('tensor hook')
    print('grad:', grad)
    return grad
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.f1 = nn.Linear(4, 1, bias=True)    
        self.f2 = MyMean()
        self.weight_init()
    def forward(self, input):
        self.input = input
        output = self.f2(input)       # 先进行运算1，后进行运算2
        output = self.f1(output) / 4      
        return output
    def weight_init(self):
        self.f1.weight.data.fill_(8.0)    # 这里设置Linear的权重为8
        self.f1.bias.data.fill_(2.0)      # 这里设置Linear的bias为2
    def my_hook(self, module, grad_input, grad_output):
        print('doing my_hook')
        print('original grad:', grad_input)
        print('original outgrad:', grad_output)
        # grad_input = grad_input[0]*self.input   # 这里把hook函数内对grad_input的操作进行了注释，
        # grad_input = tuple([grad_input])        # 返回的grad_input必须是tuple，所以我们进行了tuple包装。
        # print('now grad:', grad_input)        
        return grad_input
if __name__ == '__main__':
    input = torch.tensor([1, 2, 3, 4], dtype=torch.float32, requires_grad=True).to(device)
    net = MyNet()
    net.to(device)
    net.register_backward_hook(net.my_hook)   # 这两个hook函数一定要result = net(input)执行前执行，因为hook函数实在forward的时候进行绑定的
    input.register_hook(tensor_hook)
    result = net(input)
    print('result =', result)
    result.backward()
    print('input.grad:', input.grad)
    for param in net.parameters():
        print('{}:grad->{}'.format(param, param.grad))

# here z = (y1*w1 + ... + y4*w4 +b) / 4, torch treats formul as z = y/4
# so grad_input is 0.25

result = tensor([5.5000], grad_fn=<DivBackward0>)
doing my_hook
original grad: (tensor([0.2500]), None)
original outgrad: (tensor([1.]),)
tensor hook
grad: tensor([0.5000, 0.5000, 0.5000, 0.5000])
input.grad: tensor([0.5000, 0.5000, 0.5000, 0.5000])
Parameter containing:
tensor([[8., 8., 8., 8.]], requires_grad=True):grad->tensor([[0.0625, 0.1250, 0.1875, 0.2500]])
Parameter containing:
tensor([2.], requires_grad=True):grad->tensor([0.2500])


## summarize
<font face="楷体">backward hook will automaticly calculate the last layer </font>