## Adam

In [93]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# 가상 데이터셋 생성
num_samples = 1000
num_classes = 10
input_shape = (28, 28)

x_train = np.random.random((num_samples, *input_shape)).astype(np.float32)
y_train = np.random.randint(num_classes, size=num_samples).astype(np.int64)
x_test = np.random.random((num_samples // 5, *input_shape)).astype(np.float32)
y_test = np.random.randint(num_classes, size=num_samples // 5).astype(np.int64)


In [94]:
x_train.shape, y_train.shape

((1000, 28, 28), (1000,))

In [95]:
x_test.shape, y_test.shape

((200, 28, 28), (200,))

In [96]:
t = torch.tensor([1,2,3])
ut = t.unsqueeze(0)  # (1, 3)

In [97]:
ut

tensor([[1, 2, 3]])

In [98]:
t = torch.tensor([1,2,3])
ut = t.unsqueeze(1)  # (3, 1)

In [99]:
ut

tensor([[1],
        [2],
        [3]])

In [100]:
# 텐서로 변환
x_train = torch.tensor(x_train).unsqueeze(1)
y_train = torch.tensor(y_train)
x_test = torch.tensor(x_test).unsqueeze(1)
y_test = torch.tensor(y_test)

In [101]:
x_train.shape # [batch_size, channels, height, width]

torch.Size([1000, 1, 28, 28])

In [102]:
# RGB 컬러 이미지 (1000, 28, 28, 3)
# x_train = torch.tensor(x_train).permute(0, 3, 1, 2)
# 결과: (1000, 3, 28, 28)

In [103]:
# 데이터셋 및 데이터 로더 생성
train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [104]:
class SimpleNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

model = SimpleNN(num_classes=num_classes)

In [105]:
# SGD 옵티마이저 선택 및 학습률 설정
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [106]:
# 손실 함수 정의
criterion = nn.CrossEntropyLoss()

---

## `criterion = nn.CrossEntropyLoss()`

### **Definition**

`nn.CrossEntropyLoss` is a **loss function** used for **multi-class classification** problems in PyTorch.
It combines two key operations into one:

$
\text{CrossEntropyLoss} = \text{LogSoftmax} + \text{Negative Log Likelihood (NLLLoss)}
$

So, you don’t need to apply `softmax` manually to your model’s outputs — `CrossEntropyLoss` handles that internally.

---

### **Mathematical Formula**

For a single input sample:

$$
\text{Loss} = -\log\left( \frac{e^{z_{y}}}{\sum_{j} e^{z_j}} \right)
$$

where:

* $z_j$ = raw (unnormalized) output score (logit) from the network for class ( j )
* $y$ = correct class index
* The numerator is the exponential of the true class logit.
* The denominator sums over all class logits, normalizing them via the softmax.

---

### **Intuition**

* Your model outputs a vector of raw scores, e.g. `[2.1, -1.3, 0.7]`.
* `CrossEntropyLoss` first applies **softmax** to turn these scores into probabilities.
* Then it computes how far the predicted distribution is from the **true label** (which is treated as a one-hot vector).

It penalizes the model when:

* The correct class gets **low probability**.
* Other classes get **high probability**.

---

### **Usage Example**

```python
import torch
import torch.nn as nn

criterion = nn.CrossEntropyLoss()

# Example logits (2 samples, 3 classes each)
outputs = torch.tensor([[2.5, 0.3, -1.2],
                        [0.1, 2.0, 0.1]])  # shape: [batch_size, num_classes]

# True class indices
labels = torch.tensor([0, 1])  # shape: [batch_size]

loss = criterion(outputs, labels)
print(loss)
```

**Key points:**

* `outputs` = raw logits (no softmax applied)
* `labels` = integer indices (not one-hot vectors)
* The loss is **averaged** across the batch by default.

---


In [112]:
import torch
import torch.nn as nn

criterion = nn.CrossEntropyLoss()

# Example logits (2 samples, 3 classes each)
outputs  = torch.tensor([[2.5, 0.3, -1.2],
                        [0.1, 2.0, 0.1]])  # shape: [batch_size, num_classes]

# True class indices
labels = torch.tensor([0, 1])  # shape: [batch_size]

loss = criterion(outputs , labels)
print(loss)

tensor(0.1944)


In [113]:
outputs.shape

torch.Size([2, 3])

In [114]:
# 모델 학습
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

Epoch 1, Loss: 2.286495864391327
Epoch 2, Loss: 2.2782506942749023
Epoch 3, Loss: 2.273959696292877
Epoch 4, Loss: 2.270854525268078
Epoch 5, Loss: 2.256908133625984
Epoch 6, Loss: 2.250021517276764
Epoch 7, Loss: 2.2568142488598824
Epoch 8, Loss: 2.222443498671055
Epoch 9, Loss: 2.2075348272919655
Epoch 10, Loss: 2.1897913962602615


In [115]:
# 모델 평가
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
loss = running_loss / len(train_loader)
print(f'Test Accuracy: {accuracy * 100:.2f}%')
print(f'Test Loss: {loss:.4f}')

Test Accuracy: 12.00%
Test Loss: 2.1898


In [116]:
# Adam 옵티마이저 사용 예시
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 모델 컴파일 및 학습
model.train()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

# 모델 평가
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
loss = running_loss / len(train_loader)
print(f'Test Accuracy: {accuracy * 100:.2f}%')
print(f'Test Loss: {loss:.4f}')

Epoch 1, Loss: 2.1705394834280014
Epoch 2, Loss: 2.1494657322764397
Epoch 3, Loss: 2.132377963513136
Epoch 4, Loss: 2.110610753297806
Epoch 5, Loss: 2.103687711060047
Epoch 6, Loss: 2.087603021413088
Epoch 7, Loss: 2.0774229615926743
Epoch 8, Loss: 2.0598913617432117
Epoch 9, Loss: 2.052796706557274
Epoch 10, Loss: 2.0338753163814545
Test Accuracy: 8.00%
Test Loss: 2.0339
