<a href="https://colab.research.google.com/github/kiyong21c/pytorch_tutorial/blob/main/20220714_Autograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# autograd

import torch
# The autograd package provides automatic differentiation 
# for all operations on Tensors

# requires_grad = True -> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True) # [랜덤값1, 랜덤값2, 랜덤값3, requires_grad=True]
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # tensor([-2.2015,  0.1272,  0.9258], requires_grad=True)
print(x.grad_fn) # None : 직접만든 텐서로 연산이 수행된것이 아니므로

# y는 x의 더하기연산의 결과로 생성됨
print(y) # tensor([1.8301, 0.8254, 1.4925], grad_fn=<AddBackward0>) : y는 연산의 결과로 생성된 것이므로 grad_fn을 갖는다
# grad_fn : 해당 텐서가 직전에 어떤연산으로 부터 생성 되었는지 알려준다
print(y.grad_fn) # <AddBackward0 object at 0x7f491793cd10>

# Do more operations on y
z = y * y * 3
print(z) # tensor([ 1.1012, 16.5231,  8.6291], grad_fn=<MulBackward0>) : 곱하기연산이 수행되었음
z = z.mean() # z가 Scalar가 됨(Loss가 Scalar인경우)
print(z) # tensor(8.7511, grad_fn=<MeanBackward0>) : 평균연산이 수행되었음




# Let's compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor

z.backward() # z는 Scalar : z.backward(torch.tensor(1.))과 동일
print(x.grad) # dz/dx : 기울기 연산결과 tensor([-0.3019,  3.2651,  5.6809])

# Generally speaking, torch.autograd is an engine for computing vector-Jacobian product
# It computes partial derivates(편미분) while applying the chain rule

# -------------
# Model with non-scalar output:
# If a Tensor is non-scalar (more than 1 elements), we need to specify arguments for backward() 
# specify a gradient argument that is a tensor of matching shape.
# needed for vector-Jacobian product

x = torch.randn(3, requires_grad=True)

y = x * 2 # x의 연산결과로 나온 y
for _ in range(10):
    y = y * 2 # y의 연산결과로 나온 y

print(y)
print(y.shape) # y가 Scalar가 아닌 텐서 : torch.Size([3])

v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float32) # v(jacob vector) : Loss에 대한 Output의 기울기(dLoss/dy) 벡터로 backward시 파라미터로 전달하여, 가중치 또는 중요도를 조절할 수 있다.
# [0.1, 1.0, 0.0001] 값이 중요한게 아니라 chain rule을 위해 곱해지는 것
print(v.shape) # torch.Size([3])
y.backward(v)
print(x.grad) # dLoss/dx = (dLoss/dy) * (dy/dx) ## Loss는 y(output)를 통해 나온 값
              # dy/dx = v * dy/dx

# -------------
# Stop a tensor from tracking history:
# For example during our training loop when we want to update our weights
# then this update operation should not be part of the gradient computation
# - x.requires_grad_(False) : 텐서 만들때파라미터로 지정해도됨 requires_grad=True/False
# - x.detach()
# - wrap in 'with torch.no_grad():'

# .requires_grad_(...) changes an existing flag in-place.
a = torch.randn(2, 2) # requires_grad=True/False 파라미터 지정 안함
print(a.requires_grad) # False
b = ((a * 3) / (a - 1)) # a의 연산결과로 나온 b
print(b.grad_fn) # None
a.requires_grad_(True) # a = torch.randn(2, 2, requires_grad=True)와 같음
print(a.requires_grad) # True
b = a * a
print(b.grad_fn) # <MulBackward0 object at 0x7f489f02aa10> : 직전연산에서 곱하기 연산 수행됨
b = b.sum()
print(b.grad_fn) # <SumBackward0 object at 0x7f48a670e990> : 직전연산에서 더하기 연산 수행됨

# .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad) # True
b = a.detach() # .detach():기울기계산을 하지않는 텐서로 복사
print(b.requires_grad) # False

# wrap in 'with torch.no_grad():'
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad) # True
with torch.no_grad():
    print((x ** 2).requires_grad) # False

# -------------
# backward() accumulates the gradient for this tensor into .grad attribute. : backward()수행시 grad가 += 연산됨(축적)
# !!! We need to be careful during optimization !!!
# Use .zero_() to empty the gradients before a new optimization step! : optimizer.step()시 기울기가 계산되므로 grad를 다시 초기화(zero_())
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    # just a dummy example
    model_output = (weights*3).sum()
    model_output.backward()
    
    print(weights.grad)

    # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad # 옵티마이저가 하는 역할 : 가중치 수정

    # this is important! It affects the final weights & output
    weights.grad.zero_()

print(weights)
print(model_output)

# # Optimizer has zero_grad() method
# # optimizer = torch.optim.SGD([weights], lr=0.1) : [weights]자리에 model.parameters()을 넣는다.
# # During training:
# # optimizer.step() # 가중치(w) 갱신 : w = w - (lr * dLoss/dw)
# # optimizer.zero_grad() # dLoss/dw → 0

tensor([-0.1176,  0.7011,  1.8788], requires_grad=True)
None
tensor([1.8824, 2.7011, 3.8788], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7fa2c501cc10>
tensor([10.6302, 21.8885, 45.1350], grad_fn=<MulBackward0>)
tensor(25.8846, grad_fn=<MeanBackward0>)
tensor([3.7648, 5.4023, 7.7576])
tensor([1209.3574, -110.4234,  879.3964], grad_fn=<MulBackward0>)
torch.Size([3])
torch.Size([3])
tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])
False
None
True
<MulBackward0 object at 0x7fa2c501ccd0>
<SumBackward0 object at 0x7fa2c501ccd0>
True
False
True
False
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)
