In [1]:
import os
import pickle
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
from fontTools.ttLib import TTFont
from PIL import Image, ImageDraw, ImageFont

In [2]:
def get_existing_chars(filename):
    ttfont = TTFont(filename)
    return list({chr(key) for table in ttfont['cmap'].tables for key in table.cmap.keys()})

def get_image(font, char, size=128):
    img = Image.new('L', (1000,1000), 255)

    draw = ImageDraw.Draw(img)
    draw.text((200,200), char, font=font)

    npimg = 255 - np.array(img)
    wmin = npimg.sum(0).nonzero()[0].min()
    wmax = npimg.sum(0).nonzero()[0].max()
    hmin = npimg.sum(1).nonzero()[0].min()
    hmax = npimg.sum(1).nonzero()[0].max()

    npimg = 255 - npimg[hmin:hmax+1,wmin:wmax+1]

    whdiff = (wmax-wmin) - (hmax-hmin)
    wmargin = abs(whdiff) // 2 if whdiff < 0 else 0
    hmargin = abs(whdiff) // 2 if whdiff >= 0 else 0

    npimg = np.pad(npimg, ((hmargin, hmargin), (wmargin, wmargin)),
                       'constant', constant_values=255)
    img = Image.fromarray(npimg)
    
    return img

# utf-8
def get_all_korean():

    def nextKorLetterFrom(letter):
        lastLetterInt = 15572643
        if not letter:
            return '가'
        a = letter
        b = a.encode('utf8')
        c = int(b.hex(), 16)

        if c == lastLetterInt:
            return False

        d = hex(c + 1)
        e = bytearray.fromhex(d[2:])

        flag = True
        while flag:
            try:
                r = e.decode('utf-8')
                flag = False
            except UnicodeDecodeError:
                c = c+1
                d = hex(c)
                e = bytearray.fromhex(d[2:])
        return e.decode()

    returns = []
    flag = True
    k = ''
    while flag:
        k = nextKorLetterFrom(k)
        if k is False:
            flag = False
        else:
            returns.append(k)
    return returns

In [3]:
ak = get_all_korean()

In [4]:
def resize_with_padding(img, ratio, new_size, padding_color=255):
    # Open the image
    # Get the new dimensions based on the ratio
    new_width = int(img.width * ratio)
    new_height = int(img.height * ratio)
    
    # Resize the image using the new dimensions
    resized_img = img.resize((new_width, new_height), Image.BILINEAR)
    
    # Create a new blank image with padding
    padded_img = Image.new("L", (new_size, new_size), padding_color)
    
    # Calculate the position to paste the resized image
    left = (padded_img.width - resized_img.width) // 2
    top = (padded_img.height - resized_img.height) // 2
    right = left + resized_img.width
    bottom = top + resized_img.height
    
    # Paste the resized image onto the blank image
    padded_img.paste(resized_img, (left, top, right, bottom))
    
    return padded_img

In [5]:
ttffd = "/home/jupyter/ai_font/data/train_ttfs"
pngfd = "/home/jupyter/ai_font/data/train/pngs"

os.makedirs(pngfd, exist_ok=True)

fontfiles = sorted([(f.replace(".ttf",""), f"{ttffd}/{f}") for f in os.listdir(ttffd)])

In [None]:
font_size = 100
pbar = tqdm([f for i,f in enumerate(fontfiles) if i % 4 == 1])
for fontname, filename in pbar:
    fonts_in = np.unique([f.split("__")[0] for f in os.listdir(pngfd)])
    if fontname not in fonts_in:
        font = ImageFont.truetype(filename,font_size)
        existing_chars = get_existing_chars(filename)
        iter_chars = list(set(existing_chars).intersection(set(ak)))
        img_dict = {}
        for l in iter_chars:
            try:
                img_dict[l] = get_image(font, l)
            except:
                pass
        if len(img_dict) > 0:
            max_size = 0
            for k,v in img_dict.items():
                size = v.size
                if max(size) > max_size:
                    max_size = max(size)
            ratio = 127/max_size
            savecount = 0
            for k,v in img_dict.items():
                img = resize_with_padding(v, ratio, 128)
                imgpath = f"{pngfd}/{fontname}__{k}"
                with open(imgpath, "wb") as f:
                    img.save(f, "PNG")
                savecount += 1
                pbar.set_postfix(n_files=f"{savecount}/{len(img_dict)}", font=fontname)

 55%|█████▌    | 57/103 [06:40<11:57, 15.59s/it, font=어비 유진남푠체 볼드, n_files=225/2449]  

In [13]:
existing_chars

['𝇚',
 '𐐓',
 '𐂮',
 '𒉕',
 '𐌽',
 '𒊗',
 '𝉄',
 '𝒳',
 '𒄖',
 '𐌰',
 '𒄄',
 '𐌌',
 '𒁗',
 '𐒘',
 '𝅱',
 '𝇅',
 '𐅵',
 '𝔑',
 '\U000e002b',
 '𐎪',
 '𐎿',
 '𝈋',
 '𐒇',
 '𐂵',
 '𒄔',
 '𝖁',
 '𐐜',
 '𒁵',
 '𝞲',
 '𝞄',
 '𝘁',
 '𐤖',
 '𝃂',
 '𒀽',
 '𒌰',
 '𝚀',
 '𝕙',
 '𐂄',
 '𒉐',
 '𒁻',
 '𝜌',
 '𝘞',
 '𐁗',
 '𝐁',
 '𝗃',
 '𝛿',
 '𒊎',
 '𒑢',
 '𒋊',
 '𝓊',
 '𐤒',
 '𝃔',
 '𐄀',
 '𐐁',
 '𝛣',
 '𝄹',
 '𐎚',
 '𝍡',
 '𐄋',
 '𐂙',
 '𝓔',
 '𒁊',
 '𒐷',
 '𝁓',
 '𝌅',
 '𝛀',
 '𝙎',
 '\U000e005e',
 '𒊾',
 '𝁧',
 '𒄥',
 '𐌗',
 '𒆄',
 '𒀚',
 '𝃪',
 '𝗦',
 '𝕵',
 '𝞣',
 '𝇛',
 '𐃆',
 '𝌮',
 '𝗠',
 '𝄿',
 '𝛴',
 '𝄣',
 '𝘓',
 '𒁐',
 '𝆚',
 '𐨝',
 '𝐥',
 '𝗒',
 '𒈿',
 '𒋑',
 '𝂕',
 '𝑘',
 '\U0001d506',
 '𝝾',
 '\U000e0001',
 '𐎝',
 '𐎠',
 '𝁝',
 '𝃰',
 '𝃴',
 '𐀩',
 '𝇉',
 '𐑎',
 '𝜈',
 '𒃌',
 '𝙄',
 '𝑤',
 '𝞱',
 '𝅊',
 '𒉉',
 '𒆼',
 '𒊶',
 '𝔃',
 '𒁑',
 '𐁐',
 '𒄠',
 '𐀣',
 '𝛽',
 '𐐧',
 '𝛺',
 '𝕆',
 '𐐢',
 '𐠯',
 '𝟤',
 '𐂑',
 '𝟜',
 '𒐍',
 '\U000e007b',
 '𝟂',
 '𒅓',
 '𐐷',
 '𐐞',
 '𒃑',
 '𝓓',
 '𐑽',
 '𝛗',
 '𝍰',
 '𝑪',
 '𒁶',
 '𝅿',
 '𐅑',
 '𝖔',
 '\U000e0078',
 '𒄑',
 '𒁩',
 '𒅠',
 '𝂰',
 '𝁼',
 '𝍂',
 '𝂺',
 '𝁜',
 '𝐣',
 '𒉰',
 '𐅛',
 '𝌐'