In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

## 1.数值梯度检测

### 1.1 autograd.gradcheck

In [13]:
## 自定义sigmoid函数
class sig(torch.autograd.Function):
    @staticmethod
    def forward(ctx,x):
        ctx.save_for_backward(x)
        out=1/(1+torch.exp(-x))
        return out
    
    @staticmethod
    def backward(ctx,grad_output):
        x,=ctx.saved_tensors
        return x*(1-x)*grad_output

In [18]:
x=torch.arange(2.,4.,requires_grad=True)
z=sig.apply(x)
z

tensor([0.8808, 0.9526], grad_fn=<sigBackward>)

In [19]:
z.backward(torch.tensor([1.,0.]),retain_graph=True)
x.grad

tensor([-2., -0.])

In [20]:
x.grad.zero_()
z.backward(torch.tensor([0.,1.]),retain_graph=True)
x.grad

tensor([ 0., -6.])

In [22]:
##使用gradcheck进行检测
torch.autograd.gradcheck(sig.apply,(x,),eps=1e-03)

  'At least one of the inputs that requires gradient '


RuntimeError: Jacobian mismatch for output 0 with respect to input 0,
numerical:tensor([[0.1050, 0.0000],
        [0.0000, 0.0452]])
analytical:tensor([[-2., -0.],
        [-0., -6.]])


### 1.2 autograd.gradgradcheck

## 2. 分析器（Profiler）

autograd内部的分析器，可以分析模型内部不同计算操作的代价，不论是在CPU上，还是在GPU上.


为此同时实现了两种模型：使用profil只对CPU分析；使用emit_nvtx同时分析CPU和GPU.

### 2.1 profile

In [25]:
x=torch.randn((2,2),requires_grad=True)
with torch.autograd.profiler.profile() as prof:##注意：profile()是一个context manager类型，因此可以使用 with...as...
    y=(x**2).sum()
    y.backward()

In [26]:
print(prof)

------------------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------
Name                                               CPU time        CUDA time            Calls        CPU total       CUDA total
------------------------------------------  ---------------  ---------------  ---------------  ---------------  ---------------
pow                                                18.553us          0.000us                1         18.553us          0.000us
sum                                              1310.197us          0.000us                1       1310.197us          0.000us
_sum                                               72.636us          0.000us                1         72.636us          0.000us
struct torch::autograd::GraphRoot                   0.789us          0.000us                1          0.789us          0.000us
SumBackward0                                       35.528us          0.000us                1         35

### 2.2 emit_nvtx

## 3. 异常发现（Anomaly Detection）

### 3.1 detect_anomaly可以为autograd引擎开启异常发现，是一个上下文管理器.

In [45]:
##先看看tensor的clone方法
zc=x.clone()

In [46]:
zc

tensor([[-0.5219,  1.4823],
        [ 0.4141,  0.0188]], grad_fn=<CloneBackward>)

In [32]:
zc.requires_grad

True

In [47]:
zc.is_leaf

False

In [48]:
c=(zc*2).sum()
c.backward()
zc.grad

In [49]:
x.grad##对x的clone的梯度计算，最终会传递到x这里

tensor([[2., 2.],
        [2., 2.]])

In [50]:
class myFunc(torch.autograd.Function):
    @staticmethod
    def forward(ctx,x):
        return x.clone()
    
    @staticmethod
    def backward(ctx,grad_output):
        raise RuntimeError('some err in backward')
        return grad_output.clone()

In [52]:
def run_fn(x):
    out = myFunc.apply(x)
    return out.sum()

In [53]:
input = torch.randn((2,2),requires_grad=True)
out=run_fn(input)
out.backward()

RuntimeError: some err in backward

In [54]:
with torch.autograd.detect_anomaly():
    input = torch.randn((2,2),requires_grad=True)
    out=run_fn(input)
    out.backward()##由于使用了detect_anomaly，因此会打印出相关的过程

  File "D:\Anaconda3-4.4.0\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "D:\Anaconda3-4.4.0\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "D:\Anaconda3-4.4.0\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "D:\Anaconda3-4.4.0\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "D:\Anaconda3-4.4.0\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "D:\Anaconda3-4.4.0\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "D:\Anaconda3-4.4.0\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "D:\Anaconda3-4.4.0\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "D:\Anaconda3-4.4.0\lib\site-packages\zmq\eventloop\zmqstre

RuntimeError: some err in backward

### 3.2 set_detect_anomaly(mode)

既可以作为函数，也可以作为上下文管理器. 功能与detect_anomaly相同.

如果mode为True，则开启异常发现；否则关闭.