<a href="https://colab.research.google.com/github/rickiepark/the-lm-book/blob/main/auto_grad.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<div style="display: flex; justify-content: center;">
    <div style="background-color: #f4f6f7; padding: 15px; width: 80%;">
        <table style="width: 100%">
            <tr>
                <td style="vertical-align: middle;">
                    <span style="font-size: 14px;">
                        <a href="https://tensorflow.blog/the-lm-book" target="_blank" rel="noopener"><대규모 언어 모델, 핵심만 빠르게!>(인사이트, 2025)</a>의 주피터 노트북<br><br>
                        코드 저장소: <a href="https://github.com/rickiepark/the-lm-book" target="_blank" rel="noopener">https://github.com/rickiepark/the-lm-book</a>
                    </span>
                </td>
                <td style="vertical-align: middle;">
                    <a href="https://www.thelmbook.com" target="_blank" rel="noopener">
                        <img src="https://tensorflow.blog/wp-content/uploads/2025/10/cover-the-lm-book.jpg" width="80px" alt="대규모 언어 모델, 핵심만 빠르게!" border="1">
                    </a>
                </td>
            </tr>
        </table>
    </div>
</div>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
inputs = torch.tensor([
    [22, 25], [25, 35], [47, 80], [52, 95], [46, 82], [56, 90],
    [23, 27], [30, 50], [40, 60], [39, 57], [53, 95], [48, 88]
], dtype=torch.float32)

In [3]:
labels = torch.tensor([
    [0], [0], [1], [1], [1], [1], [0], [1], [1], [0], [1], [1]
], dtype=torch.float32)

In [4]:
inputs.shape

torch.Size([12, 2])

In [5]:
labels.shape

torch.Size([12, 1])

In [6]:
model = nn.Sequential(
    nn.Linear(2, 1),
    nn.Sigmoid()
)

In [7]:
model

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
  (1): Sigmoid()
)

In [8]:
params = list(model[0].named_parameters())

In [9]:
params[0][1].shape

torch.Size([1, 2])

In [10]:
optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

In [11]:
for step in range(500):
    optimizer.zero_grad()
    loss = criterion(model(inputs), labels)
    loss.backward()
    optimizer.step()

In [12]:
sample_1 = torch.tensor([10., 50.])
model(sample_1)

tensor([1.0000], grad_fn=<SigmoidBackward0>)

In [13]:
sample_2 = torch.tensor([50., 10.])
with torch.no_grad():
    print(model(sample_2))

tensor([1.1240e-11])


In [14]:
model = nn.Sequential(
    nn.Linear(2, 1)
)

optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

In [15]:
for step in range(500):
    optimizer.zero_grad()
    loss = criterion(model(inputs), labels)
    loss.backward()
    optimizer.step()

In [16]:
with torch.no_grad():
    logit = model(sample_1)
    print(logit)

tensor([7.0200])


In [17]:
torch.sigmoid(logit)

tensor([0.9991])