In [52]:
import torch

# 1. 当你设置`requires_grad=True`后，每次对tensor进行运算，PyTorch都会自动生成计算图

In [53]:
# If we want torch help us to calculate gradient, we should set `requires_grad=True`.
# Now whenever we do operation with tensor, pytorch will create so-called Computation Graph
x = torch.tensor([1., 2., 3.], requires_grad=True)
print(x)

tensor([1., 2., 3.], requires_grad=True)


In [54]:
# grad_fn=<AddBackward0>
y = x + 2 
print(y)

tensor([3., 4., 5.], grad_fn=<AddBackward0>)


## computation graph for `y = x + 2` :

- 运算是一个node
- 数据也是一个node


<img src="./picut/computation_graph.png" height="400" width="400">

In [55]:
# grad_fn=<MulBackward0>
z = y * y * 2
print(z)

tensor([18., 32., 50.], grad_fn=<MulBackward0>)


## 1.2 计算gradient

- 当loss为标量时，`loss.backward()`
- 当loss不是标量时，`loss.backward(与loss维度相同的tensor)`
- `loss.backward()`：计算x.grad属性

### 1.2.1 计算gradient -- scalar

<img src="./picut/computation_graph_1.jpg" height="400" width="400">

In [56]:
# grad_fn=<MeanBackward0>
z = z.mean()
print(z)

tensor(33.3333, grad_fn=<MeanBackward0>)


In [57]:
z.backward() # dz / dx
print(x.grad)

tensor([4.0000, 5.3333, 6.6667])


### 1.2.2 计算gradient -- vector

- dl / dx = Jacobian_Matrix * dl / dy
- we should identy dl/dy here
<img src="./picut/jacobian_matrix.png" height="400" width="400">

In [59]:
# x: input
# y: target
# z: loss
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x + 2 
z = y * y * 2

In [60]:
v = torch.tensor([1., 1.5, 1.5], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([12., 24., 30.])


# 2. Three attributes
- `x.requires_grad_(False)`: 设置`tensor.requires_grad`属性
- `x.detach()`: 创造一个新的`tensor`，但`tensor.requires_grad=False`(从当前的计算图中分离出来)
- `with torch.no_grad()`:

## 2.1 `tensor.requires_grad_(False)`

In [62]:
x = torch.randn(3, requires_grad=True)
print(x)
x.requires_grad_(False)
print(x)

tensor([-0.5389,  1.3927, -0.4943], requires_grad=True)
tensor([-0.5389,  1.3927, -0.4943])


## 2.2 `tensor.detach()`

In [70]:
x = torch.randn(3, requires_grad=True)
print(x)
y = x.detach()
print(y)

tensor([ 1.9340,  0.0264, -0.2288], requires_grad=True)
tensor([ 1.9340,  0.0264, -0.2288])


## 2.3 `with torch.no_grad():`

In [71]:
x = torch.randn(3, requires_grad=True)
print(x)
y = x + 2
print(y)

tensor([-1.2138,  0.1682, -0.7964], requires_grad=True)
tensor([0.7862, 2.1682, 1.2036], grad_fn=<AddBackward0>)


In [68]:
x = torch.randn(3, requires_grad=True)
print(x)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([-0.6641,  0.7710, -0.4929], requires_grad=True)
tensor([1.3359, 2.7710, 1.5071])


# 3. Before we do the next optimization step(SGD), we must empty the gradients


- `tensor.grad.zero_()`: empty gradient of tensor。 注意：必须在`loss.backward()`后调用，否则会报错

```python
for epoch in range(100):
    # 进行100步优化
    loss.backward()
    print(weights.grad)
    ...
    weights.grad.zero_()
```

### In the following example:

If we run second iteration, the second backward will be called, 
will again add the values and write them into grad attribute
so we need 

In [100]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights * 3).sum()
    # print(model_output)
    model_output.backward()
    
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


## 3.1 优化实例

In [107]:
weights = torch.ones(4, requires_grad=True)

# 初始化optimizer
optimizer = torch.optim.SGD([weights], lr=0.01)
# optimizer运行一步
optimizer.step()
# 清空梯度，以免下次`loss.backward()`时，梯度叠加
optimizer.zero_grad()