#1-1

MNIST 데이터셋을 사용하여 간단한 VAE을 구현한 코드입니다.

코드를 실행시키고, 주석을 달아주세요.

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [2]:
# 하이퍼파라미터 설정
num_epochs = 50
batch_size = 100
learning_rate = 0.0002
img_size = 28 * 28
latent_dim = 20
hidden_size1 = 256
hidden_size2 = 512
hidden_size3 = 1024
dir_name = "VAE_results"

# GPU 사용 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 결과 이미지를 저장할 디렉토리 생성
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

# 전처리
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
# MNIST 데이터셋 로드
MNIST_dataset = datasets.MNIST(root='../../data/',
                               train=True,
                               transform=transform,
                               download=True)

# DataLoader 설정
data_loader = torch.utils.data.DataLoader(dataset=MNIST_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 18050818.97it/s]


Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 508349.81it/s]


Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 4469674.37it/s]


Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 9948056.80it/s]

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw






In [4]:
# VAE의 인코더 클래스 정의
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # 인코더의 Fully Connected 레이어 정의
        self.fc1 = nn.Linear(img_size, hidden_size3)
        self.fc2 = nn.Linear(hidden_size3, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size1)
        self.fc_mean = nn.Linear(hidden_size1, latent_dim)
        self.fc_logvar = nn.Linear(hidden_size1, latent_dim)
        self.leaky_relu = nn.LeakyReLU(0.2)

    def forward(self, x):
        # 인코더의 forward 계산
        x = self.leaky_relu(self.fc1(x))
        x = self.leaky_relu(self.fc2(x))
        x = self.leaky_relu(self.fc3(x))
        mean = self.fc_mean(x)
        logvar = self.fc_logvar(x)
        return mean, logvar

# VAE의 디코더 클래스 정의
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        # 디코더의 Fully Connected 레이어 정의
        self.fc1 = nn.Linear(latent_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, img_size)
        self.relu = nn.ReLU() # ReLU 활성화 함수
        self.sigmoid = nn.Sigmoid() # Sigmoid 활성화 함수

    def forward(self, x):
        # 디코더의 forward 계산
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

# 인코더와 디코더 모델 초기화 및 GPU로 전송
encoder = Encoder().to(device)
decoder = Decoder().to(device)

In [5]:
# 잠재 공간에서 샘플링을 위한 함수 정의
def reparameterize(mean, logvar):
    std = torch.exp(0.5 * logvar)
    eps = torch.randn_like(std)
    return mean + eps * std

# VAE 손실 함수 정의
def loss_function(recon_x, x, mean, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum') # 재구성 손실
    KLD = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp()) # Kullback-Leibler Divergence (KLD) 손실
    return BCE + KLD

# Adam Optimizer 설정
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate)

In [6]:
for epoch in range(num_epochs):
    encoder.train()
    decoder.train()
    train_loss = 0

    # 배치별 학습
    for images, _ in data_loader:
        images = images.view(-1, img_size).to(device) # 이미지 데이터를 Flatten

        images = (images + 1) / 2 # 이미지 값을 0~1로 변환

        optimizer.zero_grad()

        mean, logvar = encoder(images) # 인코더를 통해 mean, logvar 추출
        z = reparameterize(mean, logvar) # 잠재 변수 z 샘플링
        recon_images = decoder(z) # 디코더를 통해 재구성된 이미지 생성

        loss = loss_function(recon_images, images, mean, logvar) # 손실 함수 계산
        loss.backward() # 역전파로 손실을 통한 기울기 계산
        train_loss += loss.item()
        optimizer.step() # optimizer로 파라미터 업데이트

    # epoch별 평균 손실 출력
    avg_loss = train_loss / len(data_loader.dataset)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}')

    # 샘플 이미지 생성 및 저장
    with torch.no_grad():
        z = torch.randn(batch_size, latent_dim).to(device)
        sample = decoder(z).view(-1, 1, 28, 28)
        save_image(sample, os.path.join(dir_name, f'VAE_fake_image_{epoch + 1}.png'))

Epoch [1/50], Loss: 200.7084
Epoch [2/50], Loss: 151.4977
Epoch [3/50], Loss: 132.5315
Epoch [4/50], Loss: 125.6801
Epoch [5/50], Loss: 121.0569
Epoch [6/50], Loss: 117.1720
Epoch [7/50], Loss: 114.2857
Epoch [8/50], Loss: 111.9960
Epoch [9/50], Loss: 110.1114
Epoch [10/50], Loss: 108.6766
Epoch [11/50], Loss: 107.4454
Epoch [12/50], Loss: 106.3984
Epoch [13/50], Loss: 105.5095
Epoch [14/50], Loss: 104.7539
Epoch [15/50], Loss: 104.0979
Epoch [16/50], Loss: 103.4729
Epoch [17/50], Loss: 102.9730
Epoch [18/50], Loss: 102.4631
Epoch [19/50], Loss: 102.0132
Epoch [20/50], Loss: 101.6319
Epoch [21/50], Loss: 101.2538
Epoch [22/50], Loss: 100.9097
Epoch [23/50], Loss: 100.5563
Epoch [24/50], Loss: 100.2359
Epoch [25/50], Loss: 99.9689
Epoch [26/50], Loss: 99.6937
Epoch [27/50], Loss: 99.4005
Epoch [28/50], Loss: 99.2049
Epoch [29/50], Loss: 98.9711
Epoch [30/50], Loss: 98.7372
Epoch [31/50], Loss: 98.4926
Epoch [32/50], Loss: 98.3228
Epoch [33/50], Loss: 98.1624
Epoch [34/50], Loss: 97.9230

#1-2

아래 마크다운으로 VAE_fake_image_1.png와 VAE_fake_image_50.png를 함께 첨부해주세요.

![VAE_fake_image_1.png](VAE_fake_image_1.png)
![VAE_fake_image_50.png](VAE_fake_image_50.png)