In [2]:
import torch
x = torch.ones(5)
y = torch.zeros(3)
w = torch.randn(5,3,requires_grad=True)
b = torch.randn(3,requires_grad=True)
z=torch.matmul(x,w)+b

In [3]:
loss = torch.nn.functional.binary_cross_entropy_with_logits(z,y)

In [4]:
import torch.nn.functional as F

def binary_cross_entropy_with_logits_manual(logits, targets):
    # 1. 시그모이드 함수 적용 (로짓을 확률로 변환)
    sigmoid_output = torch.sigmoid(logits)
    
    # 2. 이진 교차 엔트로피 손실 계산
    # BCE 공식: - (y * log(p) + (1 - y) * log(1 - p))
    loss = - (targets * torch.log(sigmoid_output) + (1 - targets) * torch.log(1 - sigmoid_output))
    
    # 손실 값을 평균으로 반환 (batch-wise로 평균을 취함)
    return loss.mean()

# 테스트용 코드
logits = torch.tensor([0.0, 0.5, 1.0])  # 예측된 로짓
targets = torch.tensor([0.0, 1.0, 0.0])  # 실제 레이블

# 직접 구현한 함수
manual_loss = binary_cross_entropy_with_logits_manual(logits, targets)

# PyTorch 내장 함수로 계산한 값과 비교
pytorch_loss = F.binary_cross_entropy_with_logits(logits, targets)

print(f"Manual loss: {manual_loss.item()}")
print(f"PyTorch loss: {pytorch_loss.item()}")

Manual loss: 0.82682865858078
PyTorch loss: 0.82682865858078


In [5]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x7fc6fafdd1d0>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7fc6fafdd190>


In [6]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.2889, 0.2940, 0.1640],
        [0.2889, 0.2940, 0.1640],
        [0.2889, 0.2940, 0.1640],
        [0.2889, 0.2940, 0.1640],
        [0.2889, 0.2940, 0.1640]])
tensor([0.2889, 0.2940, 0.1640])


In [7]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [8]:
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [9]:
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"First call\n{inp.grad}")
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")
inp.grad.zero_()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nCall after zeroing gradients\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])

Call after zeroing gradients
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])
