In [4]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
import os
from PIL import Image

# Hyperparameters
batch_size = 4  # 큰 이미지를 처리하므로 배치 크기를 줄였습니다.
lr = 0.0002
num_epochs = 5
img_size = 512  # 이미지 크기를 512로 설정
channels = 1  # 흑백 이미지일 경우 1, 컬러 이미지일 경우 3
img_shape = (channels, img_size, img_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading
transform = transforms.Compose(
    [
        transforms.Resize((img_size, img_size)),  # 이미지 크기를 512x512로 조정
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)


# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, input_dir, gt_dir, transform=None):
        self.input_dir = input_dir
        self.gt_dir = gt_dir
        self.transform = transform
        self.input_images = sorted(os.listdir(input_dir))
        self.gt_images = sorted(os.listdir(gt_dir))

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_image = Image.open(
            os.path.join(self.input_dir, self.input_images[idx])
        ).convert("L")
        gt_image = Image.open(os.path.join(self.gt_dir, self.gt_images[idx])).convert(
            "L"
        )

        if self.transform:
            input_image = self.transform(input_image)
            gt_image = self.transform(gt_image)

        return input_image, gt_image


# Directories
input_dir = "./train_input"
gt_dir = "./train_gt"

# Data loading
custom_dataset = CustomDataset(input_dir, gt_dir, transform=transform)
custom_dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)


# Generator model
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        def down_block(
            in_channels, out_channels, kernel_size, stride, padding, normalize=True
        ):
            layers = [
                nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
            ]
            if normalize:
                layers.append(nn.BatchNorm2d(out_channels))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        def up_block(in_channels, out_channels, kernel_size, stride, padding):
            layers = [
                nn.ConvTranspose2d(
                    in_channels, out_channels, kernel_size, stride, padding
                ),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
            ]
            return layers

        self.encoder = nn.Sequential(
            *down_block(
                channels, 64, 4, 2, 1, normalize=False
            ),  # [batch, 64, 256, 256]
            *down_block(64, 128, 4, 2, 1),  # [batch, 128, 128, 128]
            *down_block(128, 256, 4, 2, 1),  # [batch, 256, 64, 64]
            *down_block(256, 512, 4, 2, 1),  # [batch, 512, 32, 32]
            *down_block(512, 512, 4, 2, 1),  # [batch, 512, 16, 16]
            *down_block(512, 512, 4, 2, 1),  # [batch, 512, 8, 8]
            *down_block(512, 512, 4, 2, 1),  # [batch, 512, 4, 4]
            *down_block(512, 512, 4, 2, 1),  # [batch, 512, 2, 2]
            *down_block(512, 512, 4, 2, 1),  # [batch, 512, 1, 1]
        )

        self.decoder = nn.Sequential(
            *up_block(512, 512, 4, 2, 1),  # [batch, 512, 2, 2]
            *up_block(512, 512, 4, 2, 1),  # [batch, 512, 4, 4]
            *up_block(512, 512, 4, 2, 1),  # [batch, 512, 8, 8]
            *up_block(512, 512, 4, 2, 1),  # [batch, 512, 16, 16]
            *up_block(512, 512, 4, 2, 1),  # [batch, 512, 32, 32]
            *up_block(512, 256, 4, 2, 1),  # [batch, 256, 64, 64]
            *up_block(256, 128, 4, 2, 1),  # [batch, 128, 128, 128]
            *up_block(128, 64, 4, 2, 1),  # [batch, 64, 256, 256]
            nn.ConvTranspose2d(64, channels, 4, 2, 1),  # [batch, channels, 512, 512]
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


# Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def block(
            in_channels, out_channels, kernel_size, stride, padding, normalize=True
        ):
            layers = [
                nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
            ]
            if normalize:
                layers.append(nn.InstanceNorm2d(out_channels))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(channels, 64, 4, 2, 1, normalize=False),  # [batch, 64, 256, 256]
            *block(64, 128, 4, 2, 1),  # [batch, 128, 128, 128]
            *block(128, 256, 4, 2, 1),  # [batch, 256, 64, 64]
            *block(256, 512, 4, 2, 1),  # [batch, 512, 32, 32]
            *block(512, 512, 4, 2, 1),  # [batch, 512, 16, 16]
            *block(512, 512, 4, 2, 1),  # [batch, 512, 8, 8]
            *block(512, 512, 4, 2, 1),  # [batch, 512, 4, 4]
            nn.Conv2d(512, 1, 4, 1, 0),  # [batch, 1, 1, 1]
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x).view(-1, 1)


# 모델 초기화
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss function
criterion = nn.BCELoss()

# Optimizers
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop
for epoch in range(num_epochs):
    for i, (input_imgs, gt_imgs) in enumerate(custom_dataloader):

        # Adversarial ground truths
        valid = torch.ones(input_imgs.size(0), 1, requires_grad=False).to(device)
        fake = torch.zeros(input_imgs.size(0), 1, requires_grad=False).to(device)

        # Configure input
        real_imgs = gt_imgs.to(device)
        input_imgs = input_imgs.to(device)

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        # Generate a batch of images
        gen_imgs = generator(input_imgs)

        # Loss measures generator's ability to fool the discriminator
        pred_fake = discriminator(gen_imgs)
        g_loss = criterion(pred_fake, valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Real images
        pred_real = discriminator(real_imgs)
        real_loss = criterion(pred_real, valid)

        # Fake images
        pred_fake = discriminator(gen_imgs.detach())
        fake_loss = criterion(pred_fake, fake)

        # Total loss
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            f"[Epoch {epoch+1}/{num_epochs}] [Batch {i+1}/{len(custom_dataloader)}] [D loss: {d_loss.item():.4f}] [G loss: {g_loss.item():.4f}]"
        )

[Epoch 1/5] [Batch 1/7401] [D loss: 0.7038] [G loss: 0.8415]
[Epoch 1/5] [Batch 2/7401] [D loss: 0.3768] [G loss: 2.6877]
[Epoch 1/5] [Batch 3/7401] [D loss: 1.4631] [G loss: 0.0704]
[Epoch 1/5] [Batch 4/7401] [D loss: 0.2719] [G loss: 14.4704]


KeyboardInterrupt: 

In [6]:
import torch.nn as nn

# Generator model
class Generator(nn.Module):
  def __init__(self):
    super(Generator, self).__init__()
    self.model = nn.Sequential(
      nn.Linear(100, 256),
      nn.LeakyReLU(0.2),
      nn.BatchNorm1d(256),
      nn.Linear(256, 512),
      nn.LeakyReLU(0.2),
      nn.BatchNorm1d(512),
      nn.Linear(512, 1024),
      nn.LeakyReLU(0.2),
      nn.BatchNorm1d(1024),
      nn.Linear(1024, 784),
      nn.Tanh()
    )

  def forward(self, x):
    return self.model(x)

# Example usage
generator = Generator()
print(generator)
# Discriminator model
class Discriminator(nn.Module):
  def __init__(self):
    super(Discriminator, self).__init__()
    self.model = nn.Sequential(
      nn.Linear(784, 1024),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(1024, 512),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(512, 256),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(256, 1),
      nn.Sigmoid()
    )

  def forward(self, x):
    return self.model(x)

# Example usage
discriminator = Discriminator()
print(discriminator)

Generator(
  (model): Sequential(
    (0): Linear(in_features=100, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
    (2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=256, out_features=512, bias=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Linear(in_features=512, out_features=1024, bias=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): Linear(in_features=1024, out_features=784, bias=True)
    (10): Tanh()
  )
)
Discriminator(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=1024, bias=True)
    (1): LeakyReLU(negative_slope=0.2)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Dropout(p=0.3, 

In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
import os
from PIL import Image

# Hyperparameters
batch_size = 64
lr = 0.0002
num_epochs = 5
img_shape = (1, 28, 28)  # 이미지 크기에 맞게 수정 필요

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading
transform = transforms.Compose(
    [
        transforms.Grayscale(),  # 이미지가 컬러인 경우 추가
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)


# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, input_dir, gt_dir, transform=None):
        self.input_dir = input_dir
        self.gt_dir = gt_dir
        self.transform = transform
        self.input_images = sorted(os.listdir(input_dir))
        self.gt_images = sorted(os.listdir(gt_dir))

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_image = Image.open(os.path.join(self.input_dir, self.input_images[idx]))
        gt_image = Image.open(os.path.join(self.gt_dir, self.gt_images[idx]))

        if self.transform:
            input_image = self.transform(input_image)
            gt_image = self.transform(gt_image)

        return input_image, gt_image


# Directories
input_dir = "./train_input"
gt_dir = "./train_gt"

# Data loading
custom_dataset = CustomDataset(input_dir, gt_dir, transform=transform)
custom_dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)


# Generator model
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(1, 64, 4, 2, 1),
            nn.ReLU(inplace=True),
            # 추가 레이어들...
            nn.ConvTranspose2d(64, 1, 4, 2, 1),
            nn.Tanh(),
        )

    def forward(self, x):
        return self.main(x)


# Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(1, 64, 4, 2, 1),
            nn.LeakyReLU(0.2, inplace=True),
            # 추가 레이어들...
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.main(x)


# 모델 초기화
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss function
criterion = nn.BCELoss()

# Optimizers
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop
for epoch in range(num_epochs):
    for i, (input_imgs, gt_imgs) in enumerate(custom_dataloader):

        # Adversarial ground truths
        valid = torch.ones((input_imgs.size(0), 1), requires_grad=False).to(device)
        fake = torch.zeros((input_imgs.size(0), 1), requires_grad=False).to(device)

        # Configure input
        real_imgs = gt_imgs.to(device)
        input_imgs = input_imgs.to(device)

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        # Generate a batch of images
        gen_imgs = generator(input_imgs)

        # Loss measures generator's ability to fool the discriminator
        g_loss = criterion(discriminator(gen_imgs), valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = criterion(discriminator(real_imgs), valid)
        fake_loss = criterion(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            f"[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(custom_dataloader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]"
        )

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x4194304 and 3136x1)

In [9]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
from PIL import Image
import torch.nn as nn
import torch.optim as optim

# Hyperparameters
batch_size = 64
lr = 0.0002
num_epochs = 5
latent_dim = 100
img_shape = (1, 28, 28)

# Data loading
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
)

mnist = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
dataloader = DataLoader(mnist, batch_size=batch_size, shuffle=True)

# Loss function
criterion = nn.BCELoss()

# Optimizers (generator와 discriminator는 이전에 정의되어 있어야 합니다)
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop
for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(dataloader):

        # Adversarial ground truths
        valid = torch.ones((imgs.size(0), 1), requires_grad=False).to(device)
        fake = torch.zeros((imgs.size(0), 1), requires_grad=False).to(device)

        # Configure input
        real_imgs = imgs.to(device)
        real_imgs = real_imgs.view(real_imgs.size(0), -1)

        # Generate a batch of images
        gen_imgs = generator(z)
        gen_imgs = gen_imgs.view(gen_imgs.size(0), -1)

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        # Sample noise as generator input
        z = torch.randn((imgs.size(0), latent_dim)).to(device)

        # Generate a batch of images
        gen_imgs = generator(z)

        # Loss measures generator's ability to fool the discriminator
        g_loss = criterion(discriminator(gen_imgs), valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = criterion(discriminator(real_imgs), valid)
        fake_loss = criterion(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            f"[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(dataloader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]"
        )


# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, input_dir, gt_dir, transform=None):
        self.input_dir = input_dir
        self.gt_dir = gt_dir
        self.transform = transform
        self.input_images = sorted(os.listdir(input_dir))
        self.gt_images = sorted(os.listdir(gt_dir))

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_image = Image.open(
            os.path.join(self.input_dir, self.input_images[idx])
        ).convert("L")
        gt_image = Image.open(os.path.join(self.gt_dir, self.gt_images[idx])).convert(
            "L"
        )

        if self.transform:
            input_image = self.transform(input_image)
            gt_image = self.transform(gt_image)

        return input_image, gt_image


# Directories
input_dir = "./train_input"
gt_dir = "./train_gt"

# Data loading
custom_dataset = CustomDataset(input_dir, gt_dir, transform=transform)
custom_dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

# Training loop for custom dataset
for epoch in range(num_epochs):
    for i, (input_imgs, gt_imgs) in enumerate(custom_dataloader):

        # Adversarial ground truths
        # Adversarial ground truths
        valid = torch.ones((input_imgs.size(0), 1), requires_grad=False).to(device)
        fake = torch.zeros((input_imgs.size(0), 1), requires_grad=False).to(device)

        # Sample noise as generator input
        z = torch.randn((input_imgs.size(0), latent_dim)).to(device)

        # Configure input
        real_imgs = gt_imgs.to(device)

        # -----------------
        #  Train Generator
        # -----------------
        optimizer_G.zero_grad()

        # Generate a batch of images
        gen_imgs = generator(z)

        # Loss measures generator's ability to fool the discriminator
        g_loss = criterion(discriminator(gen_imgs), valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = criterion(discriminator(real_imgs), valid)
        fake_loss = criterion(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            f"[Epoch {epoch}/{num_epochs}] [Batch {i}/{len(custom_dataloader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]"
        )

[Epoch 0/5] [Batch 0/938] [D loss: 0.4207763671875] [G loss: 0.9376246333122253]
[Epoch 0/5] [Batch 1/938] [D loss: 1.0952956676483154] [G loss: 3.9182088375091553]
[Epoch 0/5] [Batch 2/938] [D loss: 0.4105992913246155] [G loss: 1.275123119354248]
[Epoch 0/5] [Batch 3/938] [D loss: 0.6782832145690918] [G loss: 0.3810550570487976]
[Epoch 0/5] [Batch 4/938] [D loss: 0.4412800073623657] [G loss: 0.9696307182312012]
[Epoch 0/5] [Batch 5/938] [D loss: 0.5038331151008606] [G loss: 1.936354398727417]
[Epoch 0/5] [Batch 6/938] [D loss: 0.4852534830570221] [G loss: 1.2654916048049927]
[Epoch 0/5] [Batch 7/938] [D loss: 0.5434651374816895] [G loss: 0.8343309760093689]
[Epoch 0/5] [Batch 8/938] [D loss: 0.4930841624736786] [G loss: 1.0141769647598267]
[Epoch 0/5] [Batch 9/938] [D loss: 0.48950207233428955] [G loss: 1.2621713876724243]
[Epoch 0/5] [Batch 10/938] [D loss: 0.5283129215240479] [G loss: 0.919470489025116]
[Epoch 0/5] [Batch 11/938] [D loss: 0.5259958505630493] [G loss: 1.0776848793029

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32768x512 and 784x1024)

In [None]:
import matplotlib.pyplot as plt
import torchvision.utils as vutils

# 잠재 공간에서 무작위 노이즈 생성
z = torch.randn(64, latent_dim).to(device)

# 생성자를 사용하여 이미지 생성
generator.eval()  # 평가 모드로 전환
with torch.no_grad():
    gen_imgs = generator(z)

# 이미지 형태 조정 및 정규화 해제
gen_imgs = gen_imgs.view(gen_imgs.size(0), *img_shape)
gen_imgs = gen_imgs * 0.5 + 0.5  # [-1,1] 범위를 [0,1]로 변환

# 이미지 시각화
grid = vutils.make_grid(gen_imgs.cpu(), nrow=8, normalize=True)
plt.figure(figsize=(8, 8))
plt.imshow(grid.permute(1, 2, 0))
plt.axis("off")
plt.show()