#1-1

MNIST 데이터셋을 사용하여 간단한 VAE을 구현한 코드입니다.

코드를 실행시키고, 주석을 달아주세요.

In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [3]:
num_epochs = 50
batch_size = 100
learning_rate = 0.0002
img_size = 28 * 28
latent_dim = 20
hidden_size1 = 256
hidden_size2 = 512
hidden_size3 = 1024
dir_name = "VAE_results"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if not os.path.exists(dir_name):
    os.makedirs(dir_name)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
# 이미지 데이터를 정규화

In [4]:
# MNIST 데이터셋을 다운로드하고, 학습용 데이터로 설정
MNIST_dataset = datasets.MNIST(root='../../data/',
                               train=True,
                               transform=transform,
                               download=True)

# 데이터를 배치 단위로 불러오는 데이터 로더를 만듭니다.
data_loader = torch.utils.data.DataLoader(dataset=MNIST_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

In [5]:
# 인코더: 이미지를 압축하여 잠재 공간 벡터로 변환하는 역할
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(img_size, hidden_size3)
        self.fc2 = nn.Linear(hidden_size3, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size1)
        self.fc_mean = nn.Linear(hidden_size1, latent_dim)
        self.fc_logvar = nn.Linear(hidden_size1, latent_dim)
        self.leaky_relu = nn.LeakyReLU(0.2)

    def forward(self, x):
        x = self.leaky_relu(self.fc1(x))
        x = self.leaky_relu(self.fc2(x))
        x = self.leaky_relu(self.fc3(x))
        mean = self.fc_mean(x)
        logvar = self.fc_logvar(x)
        return mean, logvar

# 디코더: 잠재 공간 벡터를 원래 이미지로 복원하는 역할
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(latent_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, img_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

encoder = Encoder().to(device)
decoder = Decoder().to(device)

In [7]:
# 잠재 공간에서 샘플링하기 위한 재파라미터화 함수
def reparameterize(mean, logvar):
    std = torch.exp(0.5 * logvar)
    eps = torch.randn_like(std)
    return mean + eps * std

# 손실 함수: 원본 이미지와 복원된 이미지의 차이를 측정
def loss_function(recon_x, x, mean, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
    return BCE + KLD

optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate)

# 아래는 모델 학습

In [25]:
for epoch in range(num_epochs):
    encoder.train()
    decoder.train()
    train_loss = 0

    for images, _ in data_loader:
        images = images.view(-1, img_size).to(device)

        images = (images + 1) / 2

        optimizer.zero_grad()

        mean, logvar = encoder(images)
        z = reparameterize(mean, logvar)
        recon_images = decoder(z)

        loss = loss_function(recon_images, images, mean, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

    avg_loss = train_loss / len(data_loader.dataset)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}')

    with torch.no_grad():
        z = torch.randn(batch_size, latent_dim).to(device)
        sample = decoder(z).view(-1, 1, 28, 28)
        save_image(sample, os.path.join(dir_name, f'VAE_fake_image_{epoch + 1}.png'))

Epoch [1/50], Loss: 201.3646
Epoch [2/50], Loss: 152.5816
Epoch [3/50], Loss: 131.1248
Epoch [4/50], Loss: 123.2612
Epoch [5/50], Loss: 119.1044
Epoch [6/50], Loss: 115.9980
Epoch [7/50], Loss: 113.6039
Epoch [8/50], Loss: 111.6008
Epoch [9/50], Loss: 109.8704
Epoch [10/50], Loss: 108.5191
Epoch [11/50], Loss: 107.3797
Epoch [12/50], Loss: 106.3341
Epoch [13/50], Loss: 105.4505
Epoch [14/50], Loss: 104.7048
Epoch [15/50], Loss: 104.1100
Epoch [16/50], Loss: 103.4981
Epoch [17/50], Loss: 102.9666
Epoch [18/50], Loss: 102.5491
Epoch [19/50], Loss: 102.0597
Epoch [20/50], Loss: 101.7146
Epoch [21/50], Loss: 101.3092
Epoch [22/50], Loss: 101.0245
Epoch [23/50], Loss: 100.6467
Epoch [24/50], Loss: 100.3358
Epoch [25/50], Loss: 100.0424
Epoch [26/50], Loss: 99.7306
Epoch [27/50], Loss: 99.4546
Epoch [28/50], Loss: 99.2158
Epoch [29/50], Loss: 98.9704
Epoch [30/50], Loss: 98.6962
Epoch [31/50], Loss: 98.4921
Epoch [32/50], Loss: 98.2668
Epoch [33/50], Loss: 98.0621
Epoch [34/50], Loss: 97.821

#1-2

아래 마크다운으로 VAE_fake_image_1.png와 VAE_fake_image_50.png를 함께 첨부해주세요.

![VAE_fake_image_1.png](attachment:d955acf0-a61a-4122-bb9b-239f47bff0ee.png)

![VAE_fake_image_50.png](attachment:e8788201-fd8a-49ed-b3a8-6cef6b5467fb.png)