## Standard-GAN with MNIST data

### chuẩn bị dữ liệu

In [None]:
# import các thu viện cần thiết
import torch
from torch import nn, Tensor
import numpy as np
from torchvision.utils import save_image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
image_size = 64
transforms = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = datasets.MNIST(root='data', train=True, transform=transforms, download=True)

100%|██████████| 9.91M/9.91M [00:14<00:00, 679kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 119kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 826kB/s] 
100%|██████████| 4.54k/4.54k [00:00<00:00, 290kB/s]


In [36]:
batch_size = 32
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [37]:
# Lấy một batch từ data_loader
images, labels = next(iter(data_loader))

# In kích thước của batch và ảnh
print(f"Batch size: {images.shape}")


Batch size: torch.Size([32, 1, 64, 64])


### Xây dựng model

In [38]:
# xây dựng Generator bằng MLP 
# layer cuối gần cuối có 1024 neuron, layer cuối cùng có ảnh có kích thước 64x64
# reshape lại ảnh thành 64x64 (1 channel)

class Generator(nn.Module):
    def __init__(self, noise, latent_dim, image_size):
        super(Generator, self).__init__()
        self.latent_dim = latent_dim
        self.gen = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(256, 512),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(1024, image_size * image_size),
            nn.Tanh()
        )

    def forward(self, z: Tensor) -> Tensor:
        output = self.gen(z)
        output =  output.view(output.size(0), 1, image_size, image_size)
        return output

In [39]:
class Discriminator(nn.Module):
    def __init__(self, image_size):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            nn.Linear(image_size * image_size, 1024),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x: Tensor) -> Tensor:
        x = x.view(x.size(0), -1)
        output = self.disc(x)
        return output

### Training model

In [40]:
# model 
latent_dim = 32
generator = Generator(noise=torch.randn, latent_dim=latent_dim, image_size=image_size).to(device)
discriminator = Discriminator(image_size=image_size).to(device)

In [41]:
from torch import optim
# Khởi tạo các tham số
generator_optimal = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
discriminator_optimal = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

In [42]:
from torch.amp import GradScaler, autocast
scaler = GradScaler(device='cuda')

torch.backends.cudnn.benchmark = True

In [43]:
g_losses = []
d_losses = []
critertion = nn.BCEWithLogitsLoss()
image_size = 64
epochs = 100
for epoch in range(epochs):
    for i, (images, _) in enumerate(data_loader):
        images = images.to(device)
        real_labels = torch.ones(images.size(0),1).to(device)
        fake_labels = torch.zeros(images.size(0),1).to(device)

        #train Discriminator
        noise = torch.randn(images.size(0), latent_dim).to(device)
        fake_images = generator(noise)
        with autocast(device_type='cuda'):
            real_outputs = discriminator(images)
            fake_outputs = discriminator(fake_images)

            d_loss_real = critertion(real_outputs, real_labels)
            d_loss_fake = critertion(fake_outputs, fake_labels)
            d_loss = (d_loss_real + d_loss_fake)/2
        d_losses.append(d_loss.item())

        discriminator_optimal.zero_grad() # set tất cả các trong số về 0
        scaler.scale(d_loss).backward()
        #d_loss.backward() # tính gradient
        #discriminator_optimal.step() # cập nhật các tham số
        scaler.step(discriminator_optimal)
        scaler.update()


        #train Generator
        noise = torch.randn(images.size(0), latent_dim).to(device)
        with autocast(device_type='cuda'):
            fake_images = generator(noise)
            outputs = discriminator(fake_images)
            # tính loss -> làm cho ảnh ảo giống ảnh thật
            g_loss = critertion(outputs, real_labels) # đánh giá ảnh ảo với nhãn thật
            g_losses.append(g_loss.item())

        generator_optimal.zero_grad()
        scaler.scale(g_loss).backward()
        #g_loss.backward()
        #generator_optimal.step()
        scaler.step(generator_optimal)
        scaler.update()

    g_loss = sum(g_losses)/len(g_losses)
    d_loss = sum(d_losses)/len(d_losses)

    if i % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Generator Loss: {g_loss:.4f}, Discriminator Loss: {d_loss:.4f}")


KeyboardInterrupt: 

In [None]:
print(torch.cuda.is_available())  # Kiểm tra CUDA có khả dụng không
print(torch.cuda.current_device())  # Kiểm tra GPU hiện tại
print(torch.cuda.get_device_name(0))  # Tên GPU đang sử dụng

True
0
NVIDIA RTX A500 Laptop GPU


In [None]:
for i, (images, _) in enumerate(data_loader):
        images = images.to(device)
        real_labels = torch.ones(images.size(0), 1).to(device)
        fake_labels = torch.zeros(images.size(0), 1).to(device)

        # Train Discriminator
        noise = torch.randn(images.size(0), latent_dim).to(device)
        fake_images = generator(noise)
        real_outputs = discriminator(images)
        fake_outputs = discriminator(fake_images)

        d_loss_real = critertion(real_outputs, real_labels)
        d_loss_fake = critertion(fake_outputs, fake_labels)
        d_loss = d_loss_real + d_loss_fake

        discriminator.zero_grad()
        d_loss.backward()
        discriminator_optim.step()

        # Train Generator
        noise = torch.randn(images.size(0), latent_dim).to(device)
        fake_images = generator(noise)
        outputs = discriminator(fake_images)
        g_loss = critertion(outputs, real_labels)

        generator.zero_grad()
        g_loss.backward()
        generator_optim.step()

        g_losses.append(g_loss.item())
        d_losses.append(d_loss.item())

        if i % 100 == 0:
            print(f"Epoch [{epoch}/{epochs}], Batch [{i}/{len(data_loader)}], d_loss: {d_loss.item()}, g_loss: {g_loss.item()}")

    if epoch % 10 == 0:
        save_image(fake_images, f"gan_images_{epoch}.png")
generator = Generator(1, latent_dim, image_size).to(device)