In [16]:
import torch
from torchvision import datasets, transforms

def load_cifar10(batch_size=64, img_size=32):
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True)
    return trainloader

# Load CIFAR-10 dataset with drop_last=True
dataloader = load_cifar10()

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43098489.71it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [15]:
!rm -rf data

In [17]:
import torch.nn as nn
import torch.optim as optim
import numpy as np

class DCGAN_Generator(nn.Module):
    def __init__(self, latent_dim, img_shape):
        super(DCGAN_Generator, self).__init__()
        self.img_shape = img_shape
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256, 0.8),
            nn.ReLU(inplace=True),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512, 0.8),
            nn.ReLU(inplace=True),
            nn.Linear(512, int(np.prod(img_shape))),
            nn.Tanh()
        )

    def forward(self, z):
        img = self.model(z)
        img = img.view(img.size(0), *self.img_shape)
        return img

class DCGAN_Discriminator(nn.Module):
    def __init__(self, img_shape):
        super(DCGAN_Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        img_flat = img.view(img.size(0), -1)
        validity = self.model(img_flat)
        return validity

latent_dim = 100
img_shape = (3, 32, 32)
dcgan_generator = DCGAN_Generator(latent_dim, img_shape)
dcgan_discriminator = DCGAN_Discriminator(img_shape)

optimizer_G = torch.optim.Adam(dcgan_generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = torch.optim.Adam(dcgan_discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
criterion = torch.nn.BCELoss()


In [21]:
from torchvision.utils import save_image
import time
training_times = []

def train_dcgan(generator, discriminator, optimizer_G, optimizer_D, dataloader, epochs):
    for epoch in range(epochs):
        start_time = time.time()
        for i, (imgs, _) in enumerate(dataloader):
            real_imgs = imgs.cuda()
            valid = torch.ones(imgs.size(0), 1).cuda()
            fake = torch.zeros(imgs.size(0), 1).cuda()

            optimizer_G.zero_grad()
            noise = torch.randn(imgs.size(0), 100).cuda()
            gen_imgs = generator(noise)

            # Debug: Print batch sizes
            # print(f'Real images batch size: {real_imgs.size(0)}')
            # print(f'Generated images batch size: {gen_imgs.size(0)}')

            g_loss = criterion(discriminator(gen_imgs), valid)
            g_loss.backward()
            optimizer_G.step()

            optimizer_D.zero_grad()
            real_loss = criterion(discriminator(real_imgs), valid)
            fake_loss = criterion(discriminator(gen_imgs.detach()), fake)
            d_loss = (real_loss + fake_loss) / 2
            d_loss.backward()
            optimizer_D.step()

        end_time = time.time()
        epoch_time = end_time - start_time
        training_times.append(epoch_time)

    return gen_imgs

total_training_time = sum(training_times)
avg_training_time_per_epoch = total_training_time / num_epochs

print(f"Total training time: {total_training_time} seconds")
print(f"Avg training time per epoch: {avg_training_time_per_epoch} seconds")

dcgan_generator.cuda()
dcgan_discriminator.cuda()
gen_imgs_dcgan = train_dcgan(dcgan_generator, dcgan_discriminator, optimizer_G, optimizer_D, dataloader, epochs=50)
save_image(gen_imgs_dcgan.data[:25], 'dcgan_generated.png', nrow=5, normalize=True)

In [None]:
import pandas as pd

# Summarize and visualize results
def summarize_results(fid_scores, psnr_scores, ssim_scores, training_times):
    results = {
        'Model': ['DCGAN', 'VAE', 'WGAN-GP'],
        'FID Score': fid_scores,
        'PSNR': psnr_scores,
        'SSIM': ssim_scores,
        'Training Time (s)': training_times
    }

    df_results = pd.DataFrame(results)
    print(df_results)

    df_results.plot(x='Model', y=['FID Score', 'PSNR', 'SSIM', 'Training Time (s)'], kind='bar', subplots=True, layout=(2, 2), figsize=(14, 10))
    plt.show()

# Collect scores
fid_scores = [fid_dcgan, fid_vae, fid_wgan_gp]
psnr_scores = [psnr_dcgan, psnr_vae, psnr_wgan_gp]
ssim_scores = [ssim_dcgan, ssim_vae, ssim_wgan_gp]
# Assume training times are recorded
training_times = [1000, 1200, 1100]

summarize_results(fid_scores, psnr_scores, ssim_scores, training_times)


In [12]:
print(len(real_imgs), len(gen_imgs_dcgan))

64 16


FID Score (Fréchet Inception Distance):

含义：FID Score 是一种衡量生成图像质量的指标，通过比较真实图像与生成图像在特征空间上的统计分布差异来评估。它利用了预训练的深度学习模型（通常是 Inception 网络）提取的特征向量，计算两个分布之间的 Fréchet 距离。
解释：FID Score 越低表示生成图像与真实图像的统计分布越接近，生成器的质量越高。

PSNR (Peak Signal-to-Noise Ratio):

含义：PSNR 是一种用于测量图像质量的传统指标，它评估了生成图像与真实图像之间的峰值信噪比。PSNR 越高表示两幅图像之间的结构相似度越高。
解释：PSNR 是在像素级别计算的，较高的 PSNR 值意味着生成图像的像素值与真实图像的像素值非常接近，即图像质量较高。

SSIM (Structural Similarity Index):

含义：SSIM 是另一种用于测量图像相似性的指标，它不仅考虑像素级别的差异，还考虑亮度、对比度和结构信息之间的相关性。
解释：SSIM 的值在 0 到 1 之间，越接近 1 表示生成图像与真实图像在结构上越相似，即生成图像的结构与真实图像更接近。

总结：
FID Score 衡量生成图像与真实图像在特征空间上的分布相似性。

PSNR 衡量生成图像与真实图像之间的像素级别相似性。

SSIM 衡量生成图像与真实图像之间的结构相似性。

In [22]:
import numpy as np
from scipy.linalg import sqrtm
from skimage.metrics import peak_signal_noise_ratio as psnr, structural_similarity as ssim

def calculate_fid(real_images, generated_images):
    mu_real, sigma_real = np.mean(real_images, axis=0), np.cov(real_images, rowvar=False)
    mu_gen, sigma_gen = np.mean(generated_images, axis=0), np.cov(generated_images, rowvar=False)
    ssdiff = np.sum((mu_real - mu_gen)**2.0)
    covmean = sqrtm(sigma_real.dot(sigma_gen))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = ssdiff + np.trace(sigma_real + sigma_gen - 2.0 * covmean)
    return fid

def calculate_psnr(real_images, generated_images):
    assert len(real_images) == len(generated_images), "Length of real_images and generated_images must be the same"

    psnr_values = [
        psnr(real_images[i].detach().cpu().numpy().transpose(1, 2, 0),
             generated_images[i].detach().cpu().numpy().transpose(1, 2, 0))
        for i in range(len(real_images))
    ]
    return np.mean(psnr_values)

def calculate_ssim(real_images, generated_images):
    ssim_values = [ssim(real_images[i].detach().cpu().numpy().transpose(1, 2, 0),
            generated_images[i].detach().cpu().numpy().transpose(1, 2, 0), multichannel=True) for i in range(len(real_images))]
    return np.mean(ssim_values)

# Prepare data for FID calculation
real_imgs, _ = next(iter(dataloader))
real_imgs_flat = real_imgs.view(real_imgs.size(0), -1).detach().numpy()
gen_imgs_dcgan_flat = gen_imgs_dcgan.view(gen_imgs_dcgan.size(0), -1).detach().cpu().numpy()
# gen_imgs_vae_flat = gen_imgs_vae.view(gen_imgs_vae.size(0), -1).detach().cpu().numpy()
# gen_imgs_wgan_gp_flat = gen_imgs_wgan_gp.view(gen_imgs_wgan_gp.size(0), -1).detach().cpu().numpy()

# Calculate FID
fid_dcgan = calculate_fid(real_imgs_flat, gen_imgs_dcgan_flat)
# fid_vae = calculate_fid(real_imgs_flat, gen_imgs_vae_flat)
# fid_wgan_gp = calculate_fid(real_imgs_flat, gen_imgs_wgan_gp_flat)

# Calculate PSNR and SSIM
psnr_dcgan = calculate_psnr(real_imgs, gen_imgs_dcgan)
ssim_dcgan = calculate_ssim(real_imgs, gen_imgs_dcgan)
# psnr_vae = calculate_psnr(real_imgs, gen_imgs_vae)
# ssim_vae = calculate_ssim(real_imgs, gen_imgs_vae)
# psnr_wgan_gp = calculate_psnr(real_imgs, gen_imgs_wgan_gp)
# ssim_wgan_gp = calculate_ssim(real_imgs, gen_imgs_wgan_gp)

# Print metrics
print(f'FID Score for DCGAN: {fid_dcgan}')
print(f'PSNR for DCGAN: {psnr_dcgan}')
print(f'SSIM for DCGAN: {ssim_dcgan}')
# print(f'FID Score for VAE: {fid_vae}')
# print(f'PSNR for VAE: {psnr_vae}')
# print(f'SSIM for VAE: {ssim_vae}')
# print(f'FID Score for WGAN-GP: {fid_wgan_gp}')
# print(f'PSNR for WGAN-GP: {psnr_wgan_gp}')
# print(f'SSIM for WGAN-GP: {ssim_wgan_gp}')


FID Score for DCGAN: 4.92525077454931e+114
PSNR for DCGAN: 9.971077916296816
SSIM for DCGAN: 0.009066807106137276


  ssim_values = [ssim(real_images[i].detach().cpu().numpy().transpose(1, 2, 0),
