In [1]:
font = "플레이브밤비"

## Download raw files

In [7]:
from google.cloud import storage
import os

def download_folder(bucket_name, source_folder_name, destination_folder_path):
    """
    Downloads a folder from a GCS bucket to a local directory.
    
    Args:
    - bucket_name (str): The name of the GCS bucket.
    - source_folder_name (str): The folder path in the GCS bucket.
    - destination_folder_path (str): The local path where the folder will be downloaded.
    """
    # Initialize the client
    client = storage.Client()

    # Get the bucket
    bucket = client.bucket(bucket_name)

    # List all blobs in the given folder
    blobs = bucket.list_blobs(prefix=source_folder_name)

    blobs = [b for b in blobs if b.name.endswith(".png")]
    for blob in blobs:a
        # Remove the folder prefix to construct the local file path
        relative_path = blob.name[len(source_folder_name):].lstrip('/')
        local_file_path = os.path.join(destination_folder_path, relative_path)

        # Ensure the local directory exists
        os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

        # Download the blob to the local file
        blob.download_to_filename(local_file_path)
        print(f"Downloaded {blob.name} to {local_file_path}")

# Example usage
download_folder(
    bucket_name='leo_font',
    source_folder_name=f'raw/{font}',
    destination_folder_path=f'/home/jupyter/ai_font/data/test/raw/{font}',
)


Downloaded raw/플레이브밤비/갊.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/갊.png
Downloaded raw/플레이브밤비/갸.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/갸.png
Downloaded raw/플레이브밤비/곁.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/곁.png
Downloaded raw/플레이브밤비/곬.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/곬.png
Downloaded raw/플레이브밤비/교.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/교.png
Downloaded raw/플레이브밤비/궤.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/궤.png
Downloaded raw/플레이브밤비/껴.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/껴.png
Downloaded raw/플레이브밤비/꽤.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/꽤.png
Downloaded raw/플레이브밤비/꾜.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/꾜.png
Downloaded raw/플레이브밤비/끝.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/끝.png
Downloaded raw/플레이브밤비/높.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/높.png
Downloaded raw/플레이브밤비/뉑.png to /home/jupyter/ai_font/data/test/raw/플레이브밤비/뉑.png
Down

## Filename Normalize

In [10]:
import unicodedata
from tqdm import tqdm

fd = f'/home/jupyter/ai_font/data/test/raw/{font}'
files = os.listdir(fd)

for f in tqdm(files):
    oenc = f.encode()
    n = unicodedata.normalize('NFC', f)
    nenc = n.encode()
    if oenc != nenc:
        print(n)
        os.rename(f"{fd}/{f}", f"{fd}/{n}")

100%|██████████| 132/132 [00:00<00:00, 793192.16it/s]


## Preprocess

In [13]:
import os
import time
import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image

in_fd = f'/home/jupyter/ai_font/data/test/raw/{font}'
out_fd = f'/home/jupyter/ai_font/data/test/processed/{font}'

os.makedirs(out_fd, exist_ok=True)

def augmenting(img):
    img = np.array(img)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(2,2))

    blurred = cv2.GaussianBlur(img, (3, 3), 0)
    processed = cv2.morphologyEx(blurred, cv2.MORPH_CLOSE, kernel)
    closing = Image.fromarray(processed)
    # err = cv2.erode(processed, kernel)
    # erode = Image.fromarray(err)
    # dil = cv2.dilate(processed, kernel)
    # dilate = Image.fromarray(dil)
    return {
        "closing": closing,
        # "erode": erode,
        # "dilate":dilate,
    }

for f in os.listdir(f"{in_fd}"):
    if f.endswith(".png"):
        path = f"{in_fd}/{f}"
        img = Image.open(path).convert("L")
        closing = augmenting(img)['closing']
        closing.save(f"{out_fd}/{font}__closing__{f}")

## Testmapper create

In [21]:
import pandas as pd
def get_all_korean():

    def nextKorLetterFrom(letter):
        lastLetterInt = 15572643
        if not letter:
            return '가'
        a = letter
        b = a.encode('utf8')
        c = int(b.hex(), 16)

        if c == lastLetterInt:
            return False

        d = hex(c + 1)
        e = bytearray.fromhex(d[2:])

        flag = True
        while flag:
            try:
                r = e.decode('utf-8')
                flag = False
            except UnicodeDecodeError:
                c = c+1
                d = hex(c)
                e = bytearray.fromhex(d[2:])
        return e.decode()

    returns = []
    flag = True
    k = ''
    while flag:
        k = nextKorLetterFrom(k)
        if k is False:
            flag = False
        else:
            returns.append(k)
    return returns

def get_lexicon(letter):
    ch1 = (ord(letter) - ord('가'))//588
    ch2 = ((ord(letter) - ord('가')) - (588*ch1)) // 28
    ch3 = (ord(letter) - ord('가')) - (588*ch1) - 28*ch2
    return np.array([ch1, ch2, ch3])

ak = get_all_korean()
testfd = f'/home/jupyter/ai_font/data/test/processed/{font}'
lex_mapper = {}
for k in tqdm(ak):
    lex_mapper[k] = {
        'double': [],
        'single': [],
    }
    
i = 0
for k in tqdm(ak):
    lex_k = get_lexicon(k)
    i += 1
    for l in ak[i:]:
        if not k == l:
            lex_l = get_lexicon(l)
            if np.sum(lex_k == lex_l) == 2:
                lex_mapper[k]['double'].append(l)
                lex_mapper[l]['double'].append(k)
            elif np.sum(lex_k == lex_l) == 1:
                lex_mapper[k]['single'].append(l)
                lex_mapper[l]['single'].append(k)
                
testmap = {}
testmap[font] = {}
for k in ak:
    testmap[font][k] = {
        'double':set(),
        'single':set(),
    }
for f in tqdm(os.listdir(testfd)):
    if f.endswith(".png"):
        font, _, letter = f.split(".")[0].split("__")
        if letter in ak:
            for k in lex_mapper[letter]['double']:
                testmap[font][k]['double'].add(letter)
            for k in lex_mapper[letter]['single']:
                testmap[font][k]['single'].add(letter)
            
testdf = []
for l in ak:
    testdf.append({
        'font': font,
        'letter': l,
        "double": list(testmap[font][l]['double']),
        "single": list(testmap[font][l]['single']),
    })
        
pd.DataFrame().from_dict(testdf).to_pickle(f"/home/jupyter/ai_font/data/pickle/testmap_{font}.pickle")

100%|██████████| 11172/11172 [00:00<00:00, 100203.93it/s]
100%|██████████| 11172/11172 [10:36<00:00, 17.56it/s] 
100%|██████████| 132/132 [00:00<00:00, 2331.09it/s]


NameError: name 'pd' is not defined