In [1]:
import torch

In [4]:
x = torch.randn(3, requires_grad=True)  # x에 대한 함수들의 gradient 계산 위해 requires_grad=True 필요
print(x)

tensor([ 0.4144, -0.1820, -0.6953], requires_grad=True)


In [10]:
y = x + 2   # Computational graph 생성됨
print(y)    # y에 backpropagation function AddBackward 생성해줌
z = y*y*2
print(z)
z2 = z.mean()
print(z2)

z2.backward()   # dz/dx gradient 계산함
print(x.grad)   # x의 gradient 출력

tensor([2.4144, 1.8180, 1.3047], grad_fn=<AddBackward0>)
tensor([11.6591,  6.6106,  3.4044], grad_fn=<MulBackward0>)
tensor(7.2247, grad_fn=<MeanBackward0>)
tensor([6.4385, 4.8481, 3.4792])


In [None]:
# Gradient tracking 막는 방법

# 1. x.requires_grad_(False)
x.requires_grad_(False)

# x.detach()
y = x.detach()  # y에 x와 같은 tensor만들고 requires_grad는 False로 됨

# with torch.no_grad():
with torch.no_grad():
    y = x + 2
# with 안에서는 requires_grad가 False됨

In [14]:
# Dummy training example
weights = torch.ones(4, requires_grad=True)

# Optimization위해 backward할 때 gradient는 비워주지않으면 계속 쌓임
for epoch in range(3):
    model_output = (weights*3).sum()

    model_output.backward()
    
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [16]:
# Dummy training example
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()

    model_output.backward()
    
    print(weights.grad)

    weights.grad.zero_()    # gradient 초기화


tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
# Dummy training example
# 위의 for문과 같은 역할
weights = torch.ones(4, requires_grad=True)

optimizer = torch.optim.SGD(weights, lr=0.01)
optimizer.step()
optimizer.zero_grad()