In [7]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms,models
import torch.optim as optim
from torchvision.utils import save_image
import torchvision.utils as vutils
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms.functional import invert
from PIL import Image
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [8]:
class StyleTransferDataset(Dataset):
    def __init__(self, gothic_dir, handwriting_dir, transform=None):
        self.gothic_images = sorted([os.path.join(gothic_dir, img) for img in os.listdir(gothic_dir)])
        self.handwriting_images = sorted([os.path.join(handwriting_dir, img) for img in os.listdir(handwriting_dir)])
        self.transform = transform

    def __len__(self):
        return len(self.gothic_images)

    def __getitem__(self, idx):
        gothic_image = Image.open(self.gothic_images[idx]).convert("RGB")
        handwriting_image = Image.open(self.handwriting_images[idx]).convert("RGB")
        
        if self.transform:
            gothic_image = self.transform(gothic_image)
            handwriting_image = self.transform(handwriting_image)
        
        return gothic_image, handwriting_image

# 데이터 변환 정의
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

In [9]:
# 데이터셋 경로 설정
gothic_dir = '../data/FONT_NONE_CLASS_GODIC'
handwriting_dir = '../data/FONT_NONE_CLASS'

# 데이터셋 및 데이터로더 초기화
dataset = StyleTransferDataset(gothic_dir, handwriting_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

In [10]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        
        # VGG19의 특성 추출 레이어
        self.vgg_features = models.vgg19(pretrained=True).features[:21]  # Conv4_1까지 사용
        for param in self.vgg_features.parameters():
            param.requires_grad = False  # VGG19의 가중치는 고정
        
        # 추가적인 학습 가능한 층들
        self.deconv_layers = nn.Sequential(
            nn.Conv2d(512, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            nn.Conv2d(64, 3, kernel_size=3, padding=1),
            nn.Tanh()  # 출력 범위를 [-1, 1]로 설정
        )
    
    def forward(self, x):
        x = self.vgg_features(x)  # VGG19의 특성 추출
        x = self.deconv_layers(x)  # 학습 가능한 층을 통과
        return x



class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.model(x)
        return x.view(-1, 1)

In [11]:

# 생성자와 판별자 초기화
generator = Generator().cuda()
discriminator = Discriminator().cuda()

# 손실 함수
adversarial_loss = nn.BCELoss().cuda()
mse_loss = nn.MSELoss().cuda()

# 옵티마이저
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))



In [12]:

def invert_colors(image):
    return 1 - image

In [13]:
def show_generated_images(images, num_images=128):
    plt.figure(figsize=(10, 10))
    plt.axis("off")
    plt.title("Generated Images")
    images = vutils.make_grid(images[:num_images], padding=2, normalize=True)
    images = np.transpose(images.cpu(), (1, 2, 0))
    plt.imshow(images)
    plt.show()

def save_generated_images(images, num_images, epoch, idx):
    plt.figure(figsize=(10, 10))
    plt.axis("off")
    plt.title("Generated Images")
    images = vutils.make_grid(images[:num_images], padding=2, normalize=True)
    images = np.transpose(images.cpu(), (1, 2, 0))
    # plt.imshow(images)
    fname = '../data/RESULT_SAVE/'+str(epoch)+'_'+str(idx)+'.jpg'
    plt.imsave(fname, images.numpy())
    plt.close()

In [None]:
import numpy as np

# 훈련 루프
num_epochs = 10
output_dir = 'output_images'
os.makedirs(output_dir, exist_ok=True)

for epoch in range(num_epochs):
    # 데이터 인덱스 셔플링
    indices = np.random.permutation(len(dataset))
    
    for idx in indices:
        gothic, handwriting = dataset[idx]
        gothic, handwriting = gothic.to(device).unsqueeze(0), handwriting.to(device).unsqueeze(0)
        batch_size = gothic.size(0)

        # 판별자 출력 크기에 맞춰 목표 텐서 크기 조정
        valid = torch.ones(discriminator(gothic).size(), requires_grad=False).to(device)
        fake = torch.zeros(discriminator(gothic).size(), requires_grad=False).to(device)

        # ---------------------
        #  Train Generator
        # ---------------------
        optimizer_G.zero_grad()

        generated_images = generator(gothic)

        # Adversarial loss
        g_adv_loss = adversarial_loss(discriminator(generated_images), valid)

        # Content loss
        content_loss = mse_loss(generated_images, handwriting)

        # Total loss
        g_loss = g_adv_loss + content_loss
        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------
        optimizer_D.zero_grad()

        real_loss = adversarial_loss(discriminator(handwriting), valid)
        fake_loss = adversarial_loss(discriminator(generated_images.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        # Output training stats
        if idx % 50 == 0:
            print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f'
                  % (epoch, num_epochs, idx, len(dataset), d_loss.item(), g_loss.item()))
            # fixed_noise = torch.randn(64, 100, 1, 1, device=device)
            fake_images = generator(gothic)
            save_generated_images(gothic, 128, epoch=epoch, idx=idx)

print("Training completed.")

[0/10][3300/10000] Loss_D: 0.5713 Loss_G: 0.9666
[0/10][6150/10000] Loss_D: 0.5512 Loss_G: 1.1469
[0/10][50/10000] Loss_D: 0.1749 Loss_G: 2.1540
[0/10][350/10000] Loss_D: 0.0902 Loss_G: 3.8696
[0/10][9600/10000] Loss_D: 0.0751 Loss_G: 4.1107
[0/10][6450/10000] Loss_D: 0.2386 Loss_G: 2.5111
[0/10][4800/10000] Loss_D: 0.0012 Loss_G: 7.1000
[0/10][6900/10000] Loss_D: 0.0050 Loss_G: 5.8397
[0/10][2250/10000] Loss_D: 0.0133 Loss_G: 5.2150
[0/10][3600/10000] Loss_D: 0.0050 Loss_G: 5.0590
[0/10][3800/10000] Loss_D: 0.0009 Loss_G: 6.9889
[0/10][7600/10000] Loss_D: 0.0010 Loss_G: 7.1129
[0/10][6950/10000] Loss_D: 0.0006 Loss_G: 7.4407
[0/10][4250/10000] Loss_D: 0.0005 Loss_G: 8.0218
[0/10][4750/10000] Loss_D: 0.0004 Loss_G: 8.0099
[0/10][2550/10000] Loss_D: 0.0004 Loss_G: 8.2482
[0/10][7500/10000] Loss_D: 0.0004 Loss_G: 8.0707
[0/10][4600/10000] Loss_D: 0.0003 Loss_G: 8.2341
[0/10][1950/10000] Loss_D: 0.0003 Loss_G: 8.4232
[0/10][0/10000] Loss_D: 0.0004 Loss_G: 8.3467
[0/10][8200/10000] Loss_D: