#1-1

MNIST 데이터셋을 사용하여 간단한 VAE을 구현한 코드입니다.

코드를 실행시키고, 주석을 달아주세요.

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image

In [2]:
num_epochs = 50
batch_size = 100
learning_rate = 0.0002
img_size = 28 * 28
latent_dim = 20
hidden_size1 = 256
hidden_size2 = 512
hidden_size3 = 1024
dir_name = "VAE_results"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if not os.path.exists(dir_name):
    os.makedirs(dir_name)

transform = transforms.Compose([ # 이미지를 Tensor로 변환
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)) # 이미지를 정규화
])

In [3]:
# MNIST 데이터셋 로드
MNIST_dataset = datasets.MNIST(root='../../data/',
                               train=True,
                               transform=transform,
                               download=True)

# DataLoader 설정
data_loader = torch.utils.data.DataLoader(dataset=MNIST_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 49902910.04it/s]


Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1568688.49it/s]

Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2061823.91it/s]


Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7375349.89it/s]

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw






In [4]:
class Encoder(nn.Module): # 인코더 클래스 정의 : 이미지를 저차원 잠재 공간으로 인코딩
    def __init__(self):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(img_size, hidden_size3)
        self.fc2 = nn.Linear(hidden_size3, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size1)
        self.fc_mean = nn.Linear(hidden_size1, latent_dim)
        self.fc_logvar = nn.Linear(hidden_size1, latent_dim)
        self.leaky_relu = nn.LeakyReLU(0.2)

    def forward(self, x): # 인코더의 순전파 함수 정의
        x = self.leaky_relu(self.fc1(x))
        x = self.leaky_relu(self.fc2(x))
        x = self.leaky_relu(self.fc3(x))
        mean = self.fc_mean(x)
        logvar = self.fc_logvar(x)
        return mean, logvar


class Decoder(nn.Module): # 디코더 클래스 정의 : 잠재 공간에서 다시 이미지로 복원
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(latent_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, img_size)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x): # 디코더 순전파 함수 정의
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

# 모델 생성 후 이동
encoder = Encoder().to(device)
decoder = Decoder().to(device)

In [5]:
def reparameterize(mean, logvar):
    std = torch.exp(0.5 * logvar) # 표준편차 걔산
    eps = torch.randn_like(std) # 표준 정규분포에서 무작위로 샘플링한 노이즈
    return mean + eps * std # 잠재변수 z 수식

# 손실함수 정의
def loss_function(recon_x, x, mean, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum') # 재구성된 이미지와 실제 이미지 간의 차이
    KLD = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp()) # 잠재 변수 분포와 정규 분포 간의 차이
    return BCE + KLD

optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=learning_rate) # 인코더와 디코더의 파라미터들을 결합하여 Adam 옵티마이저 정의

In [6]:
for epoch in range(num_epochs):
    encoder.train() # 인코더와 디코더를 학습 모드로 설정
    decoder.train()
    train_loss = 0 # 누적 손실 초기화

    for images, _ in data_loader: # 이미지 불러오기
        images = images.view(-1, img_size).to(device) # 1D 벡터로 변환하고 이동

        images = (images + 1) / 2 # 정규화

        optimizer.zero_grad() # 기울기 초기화

        mean, logvar = encoder(images) # z의 평균과 분산 계산
        z = reparameterize(mean, logvar) # z 샘플링
        recon_images = decoder(z) # 디코더를 통해 잠재 변수 z로부터 이미지 복원

        loss = loss_function(recon_images, images, mean, logvar) # 복원된 이미지와 원본 이미지 간 손실 계산
        loss.backward() # 역전파를 통해 기울기 계산 및 가중치 업데이트
        train_loss += loss.item() # 손실 값 누적
        optimizer.step() # 가중치 업데이트

    avg_loss = train_loss / len(data_loader.dataset) # 한 에포크 동안의 평균 손실 게산
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}') # 에포크마다 현재 손실 값 출력

# 학습된 모델로 샘플 이미지 생성 및 저장
    with torch.no_grad():
        z = torch.randn(batch_size, latent_dim).to(device)
        sample = decoder(z).view(-1, 1, 28, 28)
        save_image(sample, os.path.join(dir_name, f'VAE_fake_image_{epoch + 1}.png'))

Epoch [1/50], Loss: 200.7115
Epoch [2/50], Loss: 148.1723
Epoch [3/50], Loss: 129.8604
Epoch [4/50], Loss: 123.2824
Epoch [5/50], Loss: 118.6656
Epoch [6/50], Loss: 115.1988
Epoch [7/50], Loss: 112.7707
Epoch [8/50], Loss: 111.0326
Epoch [9/50], Loss: 109.5454
Epoch [10/50], Loss: 108.2151
Epoch [11/50], Loss: 107.0967
Epoch [12/50], Loss: 106.1673
Epoch [13/50], Loss: 105.3089
Epoch [14/50], Loss: 104.5917
Epoch [15/50], Loss: 103.9370
Epoch [16/50], Loss: 103.3781
Epoch [17/50], Loss: 102.8628
Epoch [18/50], Loss: 102.3716
Epoch [19/50], Loss: 101.9947
Epoch [20/50], Loss: 101.5714
Epoch [21/50], Loss: 101.2032
Epoch [22/50], Loss: 100.8605
Epoch [23/50], Loss: 100.5626
Epoch [24/50], Loss: 100.2610
Epoch [25/50], Loss: 99.9616
Epoch [26/50], Loss: 99.7174
Epoch [27/50], Loss: 99.4626
Epoch [28/50], Loss: 99.2242
Epoch [29/50], Loss: 99.0057
Epoch [30/50], Loss: 98.7842
Epoch [31/50], Loss: 98.5809
Epoch [32/50], Loss: 98.3925
Epoch [33/50], Loss: 98.1605
Epoch [34/50], Loss: 97.9879

#1-2

아래 마크다운으로 VAE_fake_image_1.png와 VAE_fake_image_50.png를 함께 첨부해주세요.

![VAE_fake_image_1](VAE_fake_image_1.png)
![VAE_fake_image_50](VAE_fake_image_50.png)