In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.nn import utils
import matplotlib.pyplot as plt
# 指定資料集的路徑
dataset_path = './Images'

# 指定要選擇的資料夾名稱
selected_breeds = ['Samoyed', 'Siberian_husky', 'Maltese_dog', 'malamute', 'beagle','Eskimo_dog']

# 定義資料轉換
transform = transforms.Compose([
    transforms.Resize((333, 500)), 
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 自定義資料集類別
class SelectedBreedDataset(ImageFolder):
    def __init__(self, root, transform=None, selected_breeds=None):
        super(SelectedBreedDataset, self).__init__(root, transform)
        if selected_breeds:
            self.samples = [
                (path, class_idx) for path, class_idx in self.samples
                if self.classes[class_idx] in selected_breeds
            ]
            self.targets = [class_idx for _, class_idx in self.samples]
            self.classes = selected_breeds

# 載入訓練資料集
train_dataset = SelectedBreedDataset(root=dataset_path, transform=transform, selected_breeds=selected_breeds)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)

# 獲取類別名稱
class_names = train_dataset.classes
print("類別名稱:", class_names)

# 檢查資料集的大小
print("訓練資料集大小:", len(train_dataset))

# 檢查資料載入器的批次大小
for images, labels in train_loader:
    print("訓練資料批次大小:", images.size())
    break

  from .autonotebook import tqdm as notebook_tqdm


類別名稱: ['Samoyed', 'Siberian_husky', 'Maltese_dog', 'malamute', 'beagle', 'Eskimo_dog']
訓練資料集大小: 1185
訓練資料批次大小: torch.Size([64, 3, 333, 500])


In [2]:
# 生成器網路
class Generator(nn.Module):
    def __init__(self, latent_dim=100, num_classes=10):
        super(Generator, self).__init__()
        self.latent_dim = latent_dim
        self.num_classes = num_classes
        self.label_emb = nn.Embedding(num_classes, latent_dim)

        self.model = nn.Sequential(
            nn.Linear(latent_dim * 2, 512 * 4 * 4),
            #nn.BatchNorm1d(512 * 4 * 4),
            nn.ReLU(inplace=True),
            nn.Unflatten(1, (512, 4, 4)),

            nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1),
            #nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
            #nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(64, 32, 4, stride=2, padding=1),
            #nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),

            nn.ConvTranspose2d(32, 3, 4, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, z, labels):
        c = self.label_emb(labels)
        x = torch.cat([z, c], 1)
        print("Input shape before batch normalization:", x.shape)
        out = self.model(x)
        return out

In [3]:
class Discriminator(nn.Module):
    def __init__(self, num_classes=10,device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
        super(Discriminator, self).__init__()
        self.num_classes = num_classes
        self.device = device
        self.label_emb = nn.Embedding(num_classes, 512).to(self.device)
        self.conv1 = nn.Conv2d(3, 32, 4, stride=2, padding=1).to(self.device)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2, padding=1).to(self.device)
        self.conv3 = nn.Conv2d(64, 128, 4, stride=2, padding=1).to(self.device)
        self.conv4 = nn.Conv2d(128, 256, 4, stride=2, padding=1).to(self.device)
        self.conv5 = nn.Conv2d(256, 512, 4, stride=2, padding=1).to(self.device)
        self.model = nn.Sequential(
            self.conv1,
            nn.LeakyReLU(0.2, inplace=True),
            self.conv2,
            nn.LeakyReLU(0.2, inplace=True),
            self.conv3,
            nn.LeakyReLU(0.2, inplace=True),
            self.conv4,
            nn.LeakyReLU(0.2, inplace=True),
            self.conv5,
            nn.LeakyReLU(0.2, inplace=True),
            nn.Flatten(),
        ).to(self.device)
        
        self.fc = nn.Linear(512, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, img, labels):
        x = self.model(img)
        c = self.label_emb(labels)
        
        # 動態調整 self.out 層的輸入大小
        flattened_size = x.shape[1]
        self.out = nn.Sequential(
            nn.Linear(flattened_size + 512, 512),
            nn.LeakyReLU(0.2, inplace=True),
            self.fc,
            self.sigmoid
        ).to(self.device)
        
        x = torch.cat([x, c], 1)
        validity = self.out(x)
        return validity

In [4]:
from tqdm import tqdm
import os
# 訓練開始前建立目錄
os.makedirs("generated_images", exist_ok=True)
# 設定超參數
latent_dim = 100
lr = 0.0002
b1 = 0.5
b2 = 0.999
num_epochs = 200
batch_size = images.size()[0]

# 初始化生成器和判別器
generator = Generator(latent_dim,len(class_names))
discriminator = Discriminator(len(class_names))

# 設定設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator.to(device)
discriminator.to(device)

# 設定優化器
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(b1, b2))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))

# 定義損失函數
criterion = nn.BCELoss()

# 生成固定的雜訊和標籤,用於生成圖像
fixed_noise = torch.randn(100, latent_dim, device=device)
fixed_labels = torch.randint(0, len(class_names), (100,), device=device)

# 訓練迴圈
for epoch in tqdm(range(num_epochs), desc="Epochs"):
    for i, (imgs, labels) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False)):
        print("Batch size:", imgs.shape[0])
        real_imgs = imgs.to(device)
        labels = labels.to(device)
        
        # 訓練判別器
        optimizer_D.zero_grad()
        
        # 真實圖像
        real_validity = discriminator(real_imgs, labels)
        d_real_loss = criterion(real_validity, torch.ones_like(real_validity))
        
        # 生成圖像
        z = torch.randn(batch_size, latent_dim, device=device)
        gen_labels = torch.randint(0, len(class_names), (batch_size,), device=device)
        gen_imgs = generator(z, gen_labels)
        fake_validity = discriminator(gen_imgs.detach(), gen_labels)
        d_fake_loss = criterion(fake_validity, torch.zeros_like(fake_validity))
        
        # 反向傳播與優化
        d_loss = (d_real_loss + d_fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()
        
        # 訓練生成器
        optimizer_G.zero_grad()
        
        # 生成圖像
        fake_validity = discriminator(gen_imgs, gen_labels)
        g_loss = criterion(fake_validity, torch.ones_like(fake_validity))
        
        # 反向傳播與優化
        g_loss.backward()
        optimizer_G.step()
        
        # 打印訓練狀態
        if (i+1) % 100 == 0:
            print(f"[Epoch {epoch+1}/{num_epochs}] [Batch {i+1}/{len(train_loader)}] [D loss: {d_loss.item():.4f}] [G loss: {g_loss.item():.4f}]")
    
    # 在每個 epoch 結束時生成並儲存圖像
    with torch.no_grad():
        gen_imgs = generator(fixed_noise, fixed_labels).detach().cpu()
        img_grid = torchvision.utils.make_grid(gen_imgs, nrow=10, normalize=True)
        plt.figure(figsize=(10,10))
        plt.imshow(img_grid.permute(1, 2, 0))
        plt.axis('off')
        plt.title(f"Epoch {epoch+1}")
        plt.savefig(f"generated_images/epoch_{epoch+1}.png", bbox_inches='tight')
        plt.close()

Epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Batch size: 64


Epochs:   0%|          | 0/200 [00:01<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.