# GlyphGAN
Deep convolutional GAN trained on glyphs



In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from PIL import Image
import imageio

## Download dataset from kaggle
Store your kaggle.json file in your Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
# replace the first string with your file's location:
%cp "/content/gdrive/My Drive/kaggle.json" "/root/.kaggle/"
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
# download your dataset:
!kaggle datasets download font-book 

kaggle.json
Downloading font-book.zip to /content
 40% 9.00M/22.3M [00:00<00:00, 58.9MB/s]
100% 22.3M/22.3M [00:00<00:00, 88.6MB/s]


In [None]:
!unzip font-book.zip

## Set–up model

In [None]:
class Discriminator(nn.Module):
    def __init__(self, alpha=0.2):
        super(Discriminator, self).__init__()

        kernel_size = 4
        padding = 1
        stride = 2
        
        self.net = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size, stride, padding),
            nn.LeakyReLU(alpha),
            nn.Conv2d(128, 256, kernel_size, stride, padding),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(alpha),
            nn.Conv2d(256, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.Conv2d(512, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.Conv2d(512, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.Conv2d(512, 1024, kernel_size, stride, padding),
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(alpha),
        )
        self.output = nn.Linear(4 * 4 * 1024, 1)
        
    def forward(self, x):
        x = self.net(x)
        x = torch.reshape(x, (-1, 4 * 4 * 1024))
        x = self.output(x)
        
        if self.training:
            return x
        
        return F.sigmoid(x)

In [None]:
class Generator(nn.Module):
    def __init__(self, input_size=200, alpha=0.2):
        super(Generator, self).__init__()       
        kernel_size = 4
        padding = 1
        stride = 2
        
        self.input = nn.Linear(input_size, 4 * 4 * 1024)
        self.net = nn.Sequential(
            nn.BatchNorm2d(1024),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(1024, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(512, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(512, 512, kernel_size, stride, padding),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(512, 256, kernel_size, stride, padding),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(256, 128, kernel_size, stride, padding),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(alpha),
            nn.ConvTranspose2d(128, 3, kernel_size, stride, padding),
            nn.Tanh()
        )
  
    def forward(self, z):
        x = self.input(z)
        return self.net(x.view(-1, 1024, 4, 4))

In [None]:
class ImageFolderEX(datasets.ImageFolder):
    def __getitem__(self, index):
        def get_img(index):
            path, label = self.imgs[index]
            try:
                img = self.loader(os.path.join(self.root, path))
            except:
                img = get_img(index + 1)
            return img
        img = get_img(index)
        return self.transform(img) * 2 - 1  # rescale 0 - 1 to -1 - 1
trans = transforms.Compose([
    transforms.Resize((256, 256), interpolation=2), 
    transforms.ToTensor(), # implicitly normalizes the input to values between 0 - 1.
])

In [None]:
def train_dis(dis, gen, x):
    z = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)

    if next(gen.parameters()).is_cuda:
        x = x.cuda()
        z = z.cuda()

    dis.zero_grad()
    y_real_pred = dis(x)
    
    idx = np.random.uniform(0, 1, y_real_pred.shape)
    idx = np.argwhere(idx < 0.03)
    
    ones = np.ones(y_real_pred.shape) + np.random.uniform(-0.1, 0.1)
    ones[idx] = 0
    
    zeros = np.zeros(y_real_pred.shape) + np.random.uniform(0, 0.2)
    zeros[idx] = 1
    ones = torch.from_numpy(ones).float()
    zeros = torch.from_numpy(zeros).float()

    if next(gen.parameters()).is_cuda:
        ones = ones.cuda()
        zeros = zeros.cuda()

    loss_real = F.binary_cross_entropy_with_logits(y_real_pred, ones)

    generated = gen(z)
    y_fake_pred = dis(generated)

    loss_fake = F.binary_cross_entropy_with_logits(y_fake_pred, zeros)
    loss = loss_fake + loss_real
    loss.backward()
    optimizer_dis.step()
    return loss

            
def train_gen(gen, batch_size):
    z = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
    
    if next(gen.parameters()).is_cuda:
        z = z.cuda()
    
    gen.zero_grad()
    generated = gen(z)
    y_fake = dis(generated)

    ones = torch.ones_like(y_fake)
    if next(gen.parameters()).is_cuda:
        ones = ones.cuda()

    loss = F.binary_cross_entropy_with_logits(y_fake, ones)
    loss.backward()
    optimizer_gen.step()
    return loss, generated

## Train model

In [None]:
'''
set to your database's directory.
divide data classes into resp. subdirs.
see readme for further information.
'''
img_dir = "/root/a/"

In [None]:
def genTestImg():
  x = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
  img = gen(x.cuda())
  img = img.cpu().detach().numpy()[0]
  img = np.transpose(img, (1,2,0))
  img = np.uint8(np.interp(img, (-1, 1), (0, 255)))
  plt.imshow(img)
  plt.axis("off")
  plt.show()

In [None]:
dis = Discriminator().cuda()
gen = Generator().cuda()

lr = 0.0002 # default 0.0002
beta_1 = 0.5 # default 0.5
beta_2 = 0.999 # default 0.999
optimizer_gen = torch.optim.Adam(gen.parameters(), lr, betas=(beta_1, beta_2))
optimizer_dis = torch.optim.Adam(dis.parameters(), lr, betas=(beta_1, beta_2))

epochs = 50 # default 30
batch_size = 64 # default 64
data = torch.utils.data.DataLoader(ImageFolderEX(img_dir, trans), 
				   batch_size=batch_size, shuffle=True, 
				   drop_last=True, num_workers=2)

n = len(data)
for epoch in range(0, epochs):
    c = 0
    n = len(data) 

    for x in iter(data): 
        c += 1

        loss_dis = train_dis(dis, gen, x)
        loss_gen, generated = train_gen(gen, batch_size)
        
        global_step = epoch * n + c

        print(f'{c} \t loss_dis: {loss_dis.item()} \t loss_gen: {loss_gen.item()} \t epoch: {epoch}, \t global_step: {c}/{n}')
        genTestImg() # comment this out if you don't want to generate test imgs every iteration

## Run model

In [None]:
# generates 8 random tensors:
x1 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x2 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x3 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x4 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x5 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x6 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x7 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)
x8 = torch.tensor(np.random.normal(0, 1, (batch_size, 200)), dtype=torch.float32)

# generates images based using above tensors:
sett = [x1,x2,x3,x4,x5,x6,x7,x8,x1]
for i in sett:
  img2 = gen(i.cuda())
  img2 = img2.cpu().detach().numpy()[0]
  img2 = np.transpose(img2, (1,2,0))
  img2 = np.uint8(np.interp(img2, (-1, 1), (0, 255)))
  plt.imshow(img2)
  plt.axis("off")
  plt.show()

## Post–processing
Renders a linearly interpolated latent space video

In [None]:
latentspace = []

# choose the order in which you want the images to appear in the interpolation video
sett = [x6,x3,x8,x2,x4,x5,x1,x7,x6]

# this generates the interpolated framesusing linear interpolation
# the frames variable decide the number of images between two initial tensors:
frames = 100
for p in range(len(sett)-1):
  for k in range(frames):
    latent = sett[p].clone()
    for c in range(8):
      for i in range(200):
        newvalue = np.linspace(sett[p][c][i], sett[p+1][c][i],frames)[k]
        latent[c][i] = newvalue
    img2 = gen(latent.cuda())
    img2 = img2.cpu().detach().numpy()[0]
    img2 = np.transpose(img2, (1,2,0))
    img2 = np.uint8(np.interp(img2, (-1, 1), (0, 255)))
    latentspace.append(img2)


In [None]:
# combines the generated latent space images into a video file
imageio.mimwrite("glyphgan-render.mp4", latentspace , fps = 30)