In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter # TensorBoard 사용을 위해 임포트
                                                  # Summary 대문자 S크니까 -> 클래스다
import datetime
from tqdm import tqdm


# ... (이전 예제와 동일한 장치 설정, 데이터 준비, 모델 정의) ...
device = "cuda" if torch.cuda.is_available() else "cpu"
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
training_set = torchvision.datasets.FashionMNIST('./data', train=True, download=True, transform=transform)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, download=True, transform=transform)
training_loader = DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=64, shuffle=False)


class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(nn.Linear(28*28, 512), nn.ReLU(), nn.Linear(512, 256), nn.ReLU(), nn.Linear(256, 10))
    def forward(self, x):
        return self.layers(self.flatten(x))


model = MLP().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# 1. SummaryWriter 생성
# 실험마다 다른 로그 디렉토리를 생성하기 위해 타임스탬프를 사용합니다.
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') # 현재시간을 파일명에 추가할끄다~~(형식:연월일_시분초[문자열])
# 파일에 10초마다 써라 설정
#^Croot@64f6d6e97ab5:/workspace# tensorboard -logdir=runs --reload_interval=5
writer = SummaryWriter(f'runs/fashion_mnist_trainer_{timestamp}',flush_secs=10) # 디폴트=120초 --> 120초마다 출력하게 만들어라
                                                                  # flush : 밀어내라~~~~~ex) 10초마다 밀어내라 , 즉 10초마다 새로운 정보 받아낼 수 있음, 테스트할때 사요해라
                                                                  # 신경망이 잘 학습하는지 5초,10초마다 확인하는 모니터링 필요함

# 모델 그래프를 TensorBoard에 추가 (가상 입력을 한번 통과시켜 그래프 생성)
dummy_input, _ = next(iter(training_loader))
writer.add_graph(model, dummy_input.to(device))

# 2. 훈련 및 검증 루프
num_epochs = 5
for epoch in range(num_epochs):
    # --- 훈련 루프 ---
    model.train()
    running_loss = 0.0
    for data in tqdm(training_loader, desc=f"Epoch {epoch+1} [Train]"):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_train_loss = running_loss / len(training_loader)
   
    # --- 검증 루프 ---
    model.eval()
    running_vloss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for vdata in validation_loader:
            vinputs, vlabels = vdata
            vinputs, vlabels = vinputs.to(device), vlabels.to(device)
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss.item()
            _, predicted = torch.max(voutputs.data, 1)
            total += vlabels.size(0)
            correct += (predicted == vlabels).sum().item()
    avg_val_loss = running_vloss / len(validation_loader)
    accuracy = 100 * correct / total
   
    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {accuracy:.2f}%")


    # 3. TensorBoard에 스칼라 값 기록
    # writer.add_scalars()를 사용하면 여러 스칼라를 하나의 그래프에 그릴 수 있습니다.
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_train_loss, 'Validation' : avg_val_loss },
                    epoch + 1)
    writer.add_scalar('Accuracy/validation', accuracy, epoch + 1)


# 4. writer 닫기
writer.close()
print("\nFinished Training! Check TensorBoard by running: tensorboard --logdir=runs")

# apt-get updata && apt-get install net-tools  : 미리 실행할 코드
# netstat -tuln | grep 6006 : 현재 사용중인 포트 번호확인 코드


Epoch 1 [Train]: 100%|██████████| 938/938 [00:17<00:00, 52.25it/s]


Epoch 1 | Train Loss: 0.4841 | Val Loss: 0.4268 | Val Acc: 84.72%


Epoch 2 [Train]: 100%|██████████| 938/938 [00:17<00:00, 53.57it/s]


Epoch 2 | Train Loss: 0.3651 | Val Loss: 0.3814 | Val Acc: 85.86%


Epoch 3 [Train]: 100%|██████████| 938/938 [00:15<00:00, 61.05it/s]


Epoch 3 | Train Loss: 0.3255 | Val Loss: 0.3773 | Val Acc: 86.01%


Epoch 4 [Train]: 100%|██████████| 938/938 [00:17<00:00, 55.06it/s]


Epoch 4 | Train Loss: 0.2999 | Val Loss: 0.3597 | Val Acc: 87.02%


Epoch 5 [Train]: 100%|██████████| 938/938 [00:16<00:00, 57.77it/s]


Epoch 5 | Train Loss: 0.2812 | Val Loss: 0.3508 | Val Acc: 87.09%

Finished Training! Check TensorBoard by running: tensorboard --logdir=runs
