In [None]:
import os
import pygame
import time
import sys
import pandas as pd
import numpy as np
import shutil
from matplotlib import cm
from collections import Counter
from PIL import Image, ImageOps
import random

pygame 2.0.1 (SDL 2.0.14, Python 3.9.2)
Hello from the pygame community. https://www.pygame.org/contribute.html


# if converting otf to ttf, remove AnyConv.com__

In [None]:
font_file = 'testing_fonts'
image_file = 'testing_images'

In [None]:
fonts = os.listdir(font_file)

path = "/Users/himanshu/Desktop/DL_project/datasets/fonts/"

for name in fonts:
    if "AnyConv.com__" in name:
        new_name = name.replace("AnyConv.com__", "")
        os.rename(path + name, path +new_name)

        print(name, "changed")


# from utils

In [None]:
import os
import pygame
import time
import sys
import pandas as pd
import numpy as np
import shutil
from matplotlib import cm
from collections import Counter
from PIL import Image

pygame.init()

def font2image(input_file, output_paths, characters, size, fill_color):
    input_file_name = input_file.split(os.sep)[-1].split('.')[0]   # get output file_name
    output_path = os.path.join(output_paths, input_file_name)

    if not os.path.exists(output_path):
        os.mkdir(output_path)

    AZ = [chr(i) for i in range(0x0041,0x005A+1)]
    file_sizes=[]
    for word in characters:
        font = pygame.font.Font(input_file, size)
        rtext = font.render(word, True, (0, 0, 0), (255, 255, 255))

        if word in AZ:      # for uppercase letter
            word = word+'+'
        pygame.image.save(rtext, os.path.join(output_path,word+".png"))

    remove_duplicated_images(output_path)
    process_image(output_path, size, fill_color)

def remove_duplicated_images(path):
    while True:
        files = os.listdir(path)
        if len(files)==0:
            print('!!!!!!!!!!!!!!!!!!error:{}'.format(path))
            break
        file_sizes = []
        for file in files:
            file_size = os.path.getsize(os.path.join(path,file))
            file_sizes.append(file_size)
        counter = Counter(file_sizes)
        most_common_number = counter.most_common(1)[0][1]
        if most_common_number<=10:
            break
        most_common = counter.most_common(1)[0][0]
        for file in files:                                        # remove empty images
            file_path = os.path.join(path, file)
            if os.path.getsize(file_path)==most_common:
                os.remove(file_path)

def load_image(path):
    image = Image.open(path).convert('L')
    image = np.array(image)
    return image

def cut_image(image):
    (h, w) = image.shape
    h_value = 255*h
    w_value = 255*w
    left = 0
    right = w
    upper = 0
    bottom = h
    for r in range(w):
        value = image[:, r].sum()
        if value==h_value:
            left += 1
        else:
            break
    for r in range(w-1, -1, -1):
        value = image[:,r].sum()
        if value==h_value:
            right -= 1
        else:
            break
    for c in range(h):
        value = image[c, :].sum()
        if value==w_value:
            upper += 1
        else:
            break
    for c in range(h-1, -1, -1):
        value = image[c, :].sum()
        if value==w_value:
            bottom -= 1
        else:
            break
    if left==w or right==0 or upper==h or bottom==0:
        left = 0
        right = w
        upper = 0
        bottom = h
    image_cut = image[upper:bottom, left:right]
    return image_cut

def resize_image(image_cut, size):
    (h, w) = image_cut.shape
    image_p = Image.fromarray(np.uint8(cm.gray(image_cut)*255))
    image_resized = image_p
    if h>w:
        if h>size:
            ratio = h/size
            adjust = int(w/ratio)
            if adjust<1:
                adjust=1
            image_resized = image_p.resize((adjust, size))
    else:
        if w>size:
            ratio = w/size
            adjust = int(h/ratio)
            if adjust<1:
                adjust=1
            image_resized = image_p.resize((size, adjust))
    return image_resized

def pad_image(image_resized, size):
    back = Image.new('L', (size, size), color=255)
    h_r, v_r = image_resized.size
    h = int((size-h_r)/2)
    v = int((size-v_r)/2)
    back.paste(image_resized,(h, v))
    return back

def color_text(image, color):
    '''
    By himanshu
    coloring the text
    '''
    return ImageOps.colorize(image, black =color, white ="white")

def get_random_color():

    color = 'rgb(' + str(random.randint(0, 255)) + ',' + str(random.randint(0, 255)) + ',' + str(random.randint(0, 255)) +')'

    return color

def process_image(path, size, fill_color):
    files = os.listdir(path)

    color = get_random_color()

    for file in files:
        file_path = os.path.join(path, file)
        image = load_image(file_path)
        image = cut_image(image)
        image = resize_image(image, size)
        image = pad_image(image, size)
        if fill_color == True:
            image = color_text(image, color)
        image.save(file_path)

def remove_empty_floder(path):
    files = os.listdir(path)
    for file in files:
        if not os.listdir(os.path.join(path,file)):
            os.rmdir(os.path.join(path,file))
            print(file,' |removed')
    print("done!")

# check current font exists the given characters or not
def check_image_exists(path, characters):
    AZ = [chr(i) for i in range(0x0041,0x005A+1)]
    for word in characters:
        if word in AZ:
            word = word+'+'
        image = word+'.png'
        image_path = os.path.join(path, image)
        if not os.path.exists(image_path):
            print('no ', word)
    print('done!')

In [None]:
english = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"

# adding 240 japanese characters
japanese = "一二三四五六七八九十百千上下左右中大小月日年早木林山川土空田天生花草虫犬人名女男子目耳口手足見音力気円入出立休先夕本文字学校村町森正水火玉王石竹糸貝車金雨赤青白数多少万半形太細広長点丸交光角計直線矢弱強高同親母父姉兄弟妹自友体毛頭顔首心時曜朝昼夜分週春夏秋冬今新古間方北南東西遠近前後内外場地国園谷野原里市京風雪雲池海岩星室戸家寺通門道話言答声聞語読書記紙画絵図工教晴思考知才理算作元食肉馬牛魚鳥羽鳴麦米茶色黄黒来行帰歩走止活店買売午汽弓回会組船明社切電毎合当台楽公引科歌刀番用何"

chinese = "的一是了我不人在他有这个上们来到时田大地为子中你说生国年川着就那和要她出也得里后自以会家可下而过天去能对小多然于心学么之都好看起发当没成只如事把还用第样道想作种开美总从无情己面最女但现前些所同日手又行意动方期它头经长儿回位分爱老因很给名法间斯知世什两次使身者被高已亲其进此话常与活正感见明问力理尔点文几定本公特做外孩相西果走将月十实向声车全信重三机工物气每并别真打太新比才便夫再书部水像眼等体却加电主界门利海受听表德少克代员许先口由死安写性马光白或住难望教命花结乐色更拉东神记处让母父应直字场平报友关放至张认接告入笑内英军候民岁往何度山觉路带万男边风解叫任金快原吃妈变通师立象数四失满战远格士音轻目条呢病始达深完今提求清王化空业思切怎非找片罗钱吗语元喜曾离飞科言干流欢约各即指合反题必该论交终林请医晚制球决传画保读运及则房早院量苦火布品近坐产答星精视五连司巴奇管类未朋且婚台夜青北队久乎越观落尽形影红爸百令周吧识步希亚术留市半热送兴造谈容极随演收首根讲整式取照办强石古华拿计您装似足双妻尼转诉米称丽客南领节衣站黑刻统断福城故历惊脸选包紧争另建维绝树系伤示愿持千史谁准联妇纪基买志静阿诗独复痛消社算义竟确酒需单治卡幸兰念举仅钟怕共毛句息功官待究跟穿室易游程号居考突皮哪费倒价图具刚脑永歌响商礼细专黄块脚味灵改据般破引食仍存众注笔甚某沉血备习校默务土微娘须试怀料调广苏显赛查密议底列富梦错座参八除跑亮假印设线温虽掉京初养香停际致阳纸李纳验助激够严证帝饭忘趣支春集丈木研班普导顿睡展跳获艺六波察群皇段急庭创区奥器谢弟店否害草排背止组州朝封睛板角况曲馆育忙质河续哥呼若推境遇雨标姐充围案伦护冷警贝著雪索剧啊船险烟依斗值帮汉慢佛肯闻唱沙局伯族低玩资屋击速顾泪洲团圣旁堂兵七露园牛哭旅街劳型烈姑陈莫鱼异抱宝权鲁简态级票怪寻杀律胜份汽右洋范床舞秘午登楼贵吸责例追较职属渐左录丝牙党继托赶章智冲叶胡吉卖坚喝肉遗救修松临藏担戏善卫药悲敢靠伊村戴词森耳差短祖云规窗散迷油旧适乡架恩投弹铁博雷府压超负勒杂醒洗采毫嘴毕九冰既状乱景席珍童顶派素脱农疑练野按犯拍征坏骨余承置彩灯巨琴免环姆暗换技翻束增忍餐洛塞缺忆判欧层付阵玛批岛项狗休懂武革良恶恋委拥娜妙探呀营退摇弄桌熟诺宣银势奖宫忽套康供优课鸟喊降夏困刘罪亡鞋健模败伴守挥鲜财孤枪禁恐伙杰迹妹遍盖副坦牌江顺秋萨菜划授归浪听凡预奶雄升编典袋莱含盛济蒙棋端腿招释介烧误"
korean = "ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄋᄌᄍᄎᄏᄐᄑ햬양약얀야앵액애앞앙압암알안악아어억언얼엄업엉에여역연열염엽영예용욕요왼외왜왕왈완와옹옴올온옥오우욱운울움웅워원월위유육윤율융윷잎잉입임일인익이의응읍음을은으"

print("English:", len(english), " | japanese:", len(japanese), " | chinese:", len(chinese), " | korean:", len(korean))


English: 62  | japanese: 240  | chinese: 994  | korean: 97


In [None]:
common_list = english + japanese

In [None]:

fonts = os.listdir(font_file)

done, failed = 0, 0

for font in fonts:
    try:
        font_path = os.path.join(font_file, font)
        font2image(font_path, image_file, common_list, 60, fill_color = False)
        done += 1
        print(font, "done")
    except:
        failed += 1
        print(font, "failed......")


print("----------------------------")

print("Successfully converted =", done)
print("Failed =", failed)

KajudenFont-Full-Regular.ttf done
FGNewKururin.otf done
MT_TARE.ttf done
.DS_Store failed......
mikiyu-mokomori-siro.ttf done
JiyunoTsubasa.ttf done
ArmedBanana.ttf done
UtsukushiFONT.otf done
Osaka.ttc done
yutaCo2_p_Regular_OT_100.otf done
yutapon_coding_080.ttc done
mikiyu-mokomori-b.ttf done
!!!!!!!!!!!!!!!!!!error:testing_images/NemukeMedium-0213
NemukeMedium-0213.otf done
gakuran_font.TTF done
amakara_sample.ttf done
Ounen-mouhitsu.otf done
----------------------------
Successfully converted = 15
Failed = 1
