# (목적)Gradient를 확인한다
## 전체 Gradient에 3을 곱한것과 parameter 별로 gradient에 3을 곱한 것이 차이가 나는지 확인

In [181]:
import torch
import torch.nn as nn

# 예제 데이터 생성
X = torch.randn(100, 2)
y = torch.randint(0, 2, (100,))

# 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(2, 2)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

model = ClassificationModel()

# Cross Entropy Loss와 KL Divergence Loss 정의
cross_entropy_loss = nn.CrossEntropyLoss()
kl_divergence_loss = nn.KLDivLoss(reduction='batchmean')

# Forward Pass
outputs = model(X)

# Cross Entropy Loss 계산
ce_loss = cross_entropy_loss(outputs, y)

# KL Divergence Loss 계산
p = torch.tensor([0.1, 0.2])  # 예시 분포
kl_loss = kl_divergence_loss(torch.log(outputs), p)
kl_loss = ce_loss

# Gradient 계산 (torch.autograd.grad 사용)
ce_gradient = grad(ce_loss, model.parameters(), retain_graph=True)
kl_gradient = grad(kl_loss, model.parameters(), retain_graph=True)

# Gradient 확인
print("Cross Entropy Loss Gradient:")
for grad_param in ce_gradient:
    print(grad_param)

print("\nKL Divergence Loss Gradient:")
for grad_param in kl_gradient:
    print(grad_param)

Cross Entropy Loss Gradient:
tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])

KL Divergence Loss Gradient:
tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])


In [182]:
ce_loss

tensor(0.7601, grad_fn=<NllLossBackward>)

In [183]:
kl_loss

tensor(0.7601, grad_fn=<NllLossBackward>)

In [184]:
total_loss = ce_loss + 2 * kl_loss
total_loss

tensor(2.2804, grad_fn=<AddBackward0>)

In [185]:
ce_gradient = torch.autograd.grad(total_loss, model.parameters(), retain_graph=True)
ce_gradient

(tensor([[ 0.4385, -0.3331],
         [-0.4385,  0.3331]]),
 tensor([ 0.0278, -0.0278]))

## 1. Gradient를 추가적으로 구해도 retain_graph=True를 설정하면, 값이 변하지 않는다

In [186]:
ce_gradient = torch.autograd.grad(ce_loss, model.parameters(), retain_graph=True)
for grad_param in ce_gradient:
    print(grad_param)

tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])


# 2. 전체 gradient에 3을 곱한다
## 1) 안된다

In [187]:
ce_gradient =  torch.tensor(3) * torch.autograd.grad(ce_loss, model.parameters(), retain_graph=True)
for grad_param in ce_gradient:
    print(grad_param)

tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])
tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])
tensor([[ 0.1462, -0.1110],
        [-0.1462,  0.1110]])
tensor([ 0.0093, -0.0093])


## 2) 덧셈 조차 하면 안된다

In [188]:
total_gradient = ce_gradient + kl_gradient
total_gradient

(tensor([[ 0.1462, -0.1110],
         [-0.1462,  0.1110]]),
 tensor([ 0.0093, -0.0093]),
 tensor([[ 0.1462, -0.1110],
         [-0.1462,  0.1110]]),
 tensor([ 0.0093, -0.0093]),
 tensor([[ 0.1462, -0.1110],
         [-0.1462,  0.1110]]),
 tensor([ 0.0093, -0.0093]),
 tensor([[ 0.1462, -0.1110],
         [-0.1462,  0.1110]]),
 tensor([ 0.0093, -0.0093]))

## 3) loss에 3을 곱해줘야 내가 원하는 값을 얻을 수 있다

In [189]:
ce_gradient =  torch.autograd.grad(torch.tensor(3) * ce_loss, model.parameters(), retain_graph=True)
for grad_param in ce_gradient:
    print(grad_param)

tensor([[ 0.4385, -0.3331],
        [-0.4385,  0.3331]])
tensor([ 0.0278, -0.0278])


# 3. 각 parameter 별로 3을 곱해준다

In [190]:
# Gradient 확인
print("Cross Entropy Loss Gradient:")
ce_gradient = torch.autograd.grad(ce_loss, model.parameters(), retain_graph=True)
for grad_param in ce_gradient:
    print(torch.tensor(3) * grad_param)

Cross Entropy Loss Gradient:
tensor([[ 0.4385, -0.3331],
        [-0.4385,  0.3331]])
tensor([ 0.0278, -0.0278])
