In [3]:
# !wget htts://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/facades.tar.gz -0 facades.tar.gz
# !tar -zxvf facades.tar.gz -C ./
#!tar -zxvf edges2shoes.tar.gz -C ./

In [4]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.utils import save_image

In [5]:
print("train dataset: ", len(next(os.walk('./facades/train/'))[2]))
print("val dataset: ", len(next(os.walk('./facades/val/'))[2]))
print("test dataset: ", len(next(os.walk('./facades/test/'))[2]))

train dataset:  400
val dataset:  100
test dataset:  106


In [6]:
class FacadeDataset(Dataset):
    def __init__(self, root, transforms_=None, mode="train"):
        self.transform = transforms_

        self.files = sorted(glob.glob(os.path.join(root, mode) + "/*.jpg"))

        if mode == "train":
            self.files.extend(sorted(glob.glob(os.path.join(root,"test") + "/*.jpg")))
    
    def __getitem__(self, index):
        img = Image.open(self.files[index % len(self.files)])
        w, h = img.size
        img_A = img.crop((0,0, w/2, h)) # 이미지의 왼쪽 절반
        img_B = img.crop((w/2, 0, w, h)) # 이미지의 오른쪽 절반

        # Data augmentation을 위한 좌우 반전 (horizontal filp)
        if np.random.random() < 0.5:
            img_A = Image.fromarray(np.array(img_A)[:, ::-1, :], "RGB")
            img_B = Image.fromarray(np.array(img_B)[:, ::-1, :], "RGB")

        img_A = self.transform(img_A)
        img_B = self.transform(img_B)

        return {"A": img_A, "B": img_B}
    
    def __len__(self):
        return len(self.files)
     

In [7]:
transforms_ = transforms.Compose([
    transforms.Resize((256, 256), Image.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = FacadeDataset("facades", transforms_=transforms_)
val_dataset = FacadeDataset("facades", transforms_=transforms_)

train_dataloader = DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=0)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=True, num_workers=0)

In [None]:
# U-Net 아키텍처의 Down Sampling 모듈
class UNetDown(nn.Module):
    def __init__(self, in_channels, out_channels, normalize=True, dropout=0.0):
        super(UNetDown, self).__init__()
        # W와 H가 2배씩 감소 (-> stride를 2로 놨기 때문)
        layers = [nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False)]
        
        # 일반적으로 사용하는 BatchNorm 대신 InstanceNorm을 사용
        if normalize:
            layers.append(nn.InstanceNorm2d(out_channels))
        
        layers.append(nn.LeakyReLU(0.2))
        
        if dropout:
            layers.append(nn.Dropout(dropout))
        
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

class UNetUp(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.0):
        super(UNetUp, self).__init__()
        # 너비와 높이가 2배씩 증가
        layers = [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False)]
        layers.append(nn.InstanceNorm2d(out_channels))
        layers.append(nn.ReLU(inplace=True))
        if dropout:
            layers.append(nn.Dropout(dropout))
        self.model = nn.Sequential(*layers)

    def forward(self, x, skip_input):
        x = self.model(x)
        x = torch.cat((x, skip_input), 1) # channel level에서 합치기(concatenation)

        return x

# Generator (U-Net)
class GeneratorUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3):
        super(GeneratorUNet, self).__init__()

        self.down1 = UNetDown(in_channels, 64, normalize=False)         # 출력: [64 x 128 x 128]
        self.down2 = UNetDown(64, 128)                                  # 출력: [128 x 64 x 64]
        self.down3 = UNetDown(128, 256)                                 # 출력: [256 x 32 x 32]
        self.down4 = UNetDown(256, 512, dropout=0.5)                    # 출력: [512 x 16 x 16]
        self.down5 = UNetDown(512, 512, dropout=0.5)                    # 출력: [512 x  8 x  8]
        self.down6 = UNetDown(512, 512, dropout=0.5)                    # 출력: [512 x  4 x  4]
        self.down7 = UNetDown(512, 512, dropout=0.5)                    # 출력: [512 x  2 x  2]
        self.down8 = UNetDown(512, 512, normalize=False, dropout=0.5)   # 출력: [512 x  1 x  1]

        # skip connection 사용 (출력 채널의 크기 X 2 == 다음 입력 채널의 크기)
        self.up1 = UNetUp(512, 512, dropout=0.5)                        # 출력: [1024 x 2 x 2]
        self.up2 = UNetUp(1024, 512, dropout=0.5)                       # 출력: [1024 x 4 x 4]
        self.up3 = UNetUp(1024, 512, dropout=0.5)                       # 출력: [1024 x 8 x 8]
        self.up4 = UNetUp(1024, 512, dropout=0.5)                       # 출력: [1024 x 16 x 16]
        self.up5 = UNetUp(1024, 256)                                    # 출력: [512 x 32 x 32]
        self.up6 = UNetUp(512, 128)                                     # 출력: [256 x 64 x 64]
        self.up7 = UNetUp(256, 64)                                      # 출력: [128  x 128 x 128]

        self.final = nn.Sequential(
            nn.Upsample(scale_factor=2),                                # 출력: [128 x 256 x 256]
            nn.ZeroPad2d((1, 0, 1, 0)),
            nn.Conv2d(128, out_channels, kernel_size=4, padding=1),     # 출력: [3 x 256 x 256]
            nn.Tanh(),
        )
    
    def forward(self, x):
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        d6 = self.down6(d5)
        d7 = self.down7(d6)
        d8 = self.down8(d7)
        u1 = self.up1(d8, d7)
        u2 = self.up2(u1, d6)
        u3 = self.up3(u2, d5)
        u4 = self.up4(u3, d4)
        u5 = self.up5(u4, d3)
        u6 = self.up6(u5, d2)
        u7 = self.up7(u6, d1)

        return self.final(u7)

class Discriminator(nn.Module):
    def __init__(self, in_channels=3):
        super(Discriminator, self).__init__()

        def discriminator_block(in_channels, out_channels, normalization=True):
            # 너비와 높이가 2배씩 감소
            layers = [nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1)]
            if normalization:
                layers.append(nn.InstanceNorm2d(out_channels))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers
        self.model = nn.Sequential(
            # 두 개의 이미지(실제/변환된 이미지, 조건 이미지)를 입력 받으므로 입력 채널의 크기는 2배
            *discriminator_block(in_channels * 2, 64, normalization=False),                     # 출력: [64 x 128 x 128]
            *discriminator_block(64, 128),                                                      # 출력: [128 x 64 x 64]
            *discriminator_block(128, 256),                                                     # 출력: [256 x 32 x 32]
            *discriminator_block(256, 512),                                                     # 출력: [512 x 16 x 16]
            nn.ZeroPad2d((1,0,1,0)),
            nn.Conv2d(512, 1, kernel_size=4, padding=1, bias=False)                             # 출력: [1 x 16 x 16]           
        )
    # img_A: 실제/변환된 이미지, img_B: 조건(condition)
    def forward(self, imgA, imgB):
        # 이미지 두 개를 채널 레벨에서 연결하여(concatenate) 입력 데이터 생성
        img_input = torch.cat((imgA, imgB), 1)
        return self.model(img_input)


In [9]:
def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)

generator = GeneratorUNet()
discriminator = Discriminator()

generator.cuda()
discriminator.cuda()

# 가중치(weights) 초기화
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)

# 손실 함수(loss function)
criterion_GAN = torch.nn.MSELoss()
criterion_pixelwise = torch.nn.L1Loss()

criterion_GAN.cuda()
criterion_pixelwise.cuda()

# 학습률(learning rate) 설정
lr = 0.0002

# 생성자와 판별자를 위한 최적화 함수
optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

In [10]:
import time

epochs = 200
sample_interval = 200 # 몇번의 배치마다 결과를 출력할 것인지 설정

# 변환된 이미지와 정답 이미지 사이의 L1 pixel-wise 손실 weight parameter
lambda_pixel = 100

start_time = time.time()

for epoch in range(epochs):
    for i, batch in enumerate(train_dataloader):
        real_A = batch["B"].cuda()
        real_B = batch["A"].cuda()

        # 진짜(real)이미지와 가짜(fake) 이미지에 대한 정답 레이블 생성 (너비와 높이를 16씩 나눈 크기)
        real = torch.cuda.FloatTensor(real_A.size(0), 1, 16, 16).fill_(1.0) # 진짜(real): 1
        fake = torch.cuda.FloatTensor(real_A.size(0), 1, 16, 16).fill_(0.0) # 가짜(fake): 0

        # 생성자(generator)를 학습
        optimizer_G.zero_grad()

        # 이미지 생성
        fake_B = generator(real_A)

        # 생성자(generator)의 손실(loss) 값 계산
        loss_GAN = criterion_GAN(discriminator(fake_B, real_A), real)

        # 픽셀 단위(pixel-wise) L1 손실 값 계산
        loss_pixel = criterion_pixelwise(fake_B, real_B) 

        # 최종적인 손실(loss)
        loss_G = loss_GAN + lambda_pixel * loss_pixel

        # 생성자(generator) 업데이트
        loss_G.backward()
        optimizer_G.step()

        # 판별자(discriminator)를 학습
        optimizer_D.zero_grad()

        # 판별자(discriminator)의 손실(loss) 값 계산
        loss_real = criterion_GAN(discriminator(real_B, real_A), real) # 조건(condition): real_A
        loss_fake = criterion_GAN(discriminator(fake_B.detach(), real_A), fake)
        loss_D = (loss_real + loss_fake) / 2

        # 판별자(discriminator) 업데이트
        loss_D.backward()
        optimizer_D.step()

        done = epoch * len(train_dataloader) + i
        if done % sample_interval == 0:
            imgs = next(iter(val_dataloader)) # 10개의 이미지를 추출해 생성
            real_A = imgs["B"].cuda()
            real_B = imgs["A"].cuda()
            fake_B = generator(real_A)
            # real_A: 조건(condition), fake_B: 변환된 이미지(translated image), real_B: 정답 이미지
            img_sample = torch.cat((real_A.data, fake_B.data, real_B.data), -2) # 높이(height)를 기준으로 이미지를 연결하기
            save_image(img_sample, f"{done}.png", nrow=5, normalize=True)

    # 하나의 epoch이 끝날 때마다 로그(log) 출력
    print(f"[Epoch {epoch}/{epochs}] [D loss: {loss_D.item():.6f}] [G loss: {loss_G.item():.6f}]")
     


  real = torch.cuda.FloatTensor(real_A.size(0), 1, 16, 16).fill_(1.0) # 진짜(real): 1


[Epoch 0/200] [D loss: 0.393183] [G loss: 37.645794]
[Epoch 1/200] [D loss: 0.269841] [G loss: 40.689911]
[Epoch 2/200] [D loss: 0.109619] [G loss: 42.786335]
[Epoch 3/200] [D loss: 0.178152] [G loss: 28.858334]
[Epoch 4/200] [D loss: 0.101030] [G loss: 40.407902]
[Epoch 5/200] [D loss: 0.074875] [G loss: 31.086159]
[Epoch 6/200] [D loss: 0.065683] [G loss: 32.500851]
[Epoch 7/200] [D loss: 0.081637] [G loss: 34.368633]
[Epoch 8/200] [D loss: 0.087581] [G loss: 32.751453]
[Epoch 9/200] [D loss: 0.054882] [G loss: 35.529858]
[Epoch 10/200] [D loss: 0.111606] [G loss: 34.530083]
[Epoch 11/200] [D loss: 0.054014] [G loss: 33.015911]
[Epoch 12/200] [D loss: 0.123397] [G loss: 33.169804]
[Epoch 13/200] [D loss: 0.030678] [G loss: 35.079857]
[Epoch 14/200] [D loss: 0.062913] [G loss: 38.068287]
[Epoch 15/200] [D loss: 0.119451] [G loss: 39.732803]
[Epoch 16/200] [D loss: 0.066986] [G loss: 31.126286]
[Epoch 17/200] [D loss: 0.084212] [G loss: 28.811249]
[Epoch 18/200] [D loss: 0.034983] [G l