In [2]:
import os
import shutil
import random
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

# Font pad (Windows)
windows_fonts_dir = r"C:\Windows\Fonts"

# Fonts die gebruikt worden
fonts = {
    "AvenirNextLTPro": "AvenirNextLTPro-Demi.ttf",
    "Microsoft YaHei": "msyh.ttf",
    "AgencyFB": "AgencyFB-Bold.ttf",
    "Bahnschrift": "bahnschrift.ttf",
    "Consolas": "consola.ttf",
    "CourierNew": "courbd.ttf",
    "SegoeUI": "seguisb.ttf",
    "GillSans": "gillsansmt.ttf",
    "Posterama": "Posterama.ttf",
    "Lucida Console": "lucon.TTF",
    "OCRB": "ocrb.ttf",
    "Aptos Display": "aptos-display-bold.ttf",
    "Franklin Gothic": "Franklin Gothic Medium Cond - Regular.ttf",
    "verdana": "verdana.ttf",
    "Biome": "Biome W04 Regular.ttf",
    "daytona": "daytona-regular.ttf"
}

# Morfologische varianten met iteraties
morph_kernels = {
    "very_thin": ("erode", np.ones((2, 2), np.uint8), 1),
    "thin": ("erode", np.ones((1, 2), np.uint8), 2),
    "slightly_thin": ("erode", np.ones((2, 1), np.uint8), 2),
    "normal": ("none", None, 0),
    "slightly_thick": ("dilate", np.ones((2, 1), np.uint8), 2),
    "thick": ("dilate", np.ones((1, 2), np.uint8), 2),
    "very_thick": ("dilate", np.ones((2, 2), np.uint8), 1),
}

# Instellingen
output_base = "synthetic_digits"
temp_dir = os.path.join(output_base, "all")
train_dir = os.path.join(output_base, "train")
test_dir = os.path.join(output_base, "test")
img_size = (64, 64)
font_size = 42
cijfers = "0123456789"
split_ratio = 0.8  # 80% train, 20% test

# Stap 1: alles genereren naar tijdelijke map
for font_name, font_file in fonts.items():
    font_path = None
    for root, _, files in os.walk(windows_fonts_dir):
        if font_file.lower() in [f.lower() for f in files]:
            font_path = os.path.join(root, font_file)
            break

    if font_path is None:
        print(f"Font niet gevonden: {font_file}")
        continue

    try:
        font = ImageFont.truetype(font_path, font_size)
    except Exception as e:
        print(f"Fout bij laden van font {font_file}: {e}")
        continue

    for cijfer in cijfers:
        img = Image.new("L", img_size, color=0)
        draw = ImageDraw.Draw(img)

        bbox = draw.textbbox((0, 0), cijfer, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        offset_x = bbox[0]
        offset_y = bbox[1]

        pos_x = (img_size[0] - text_width) // 2 - offset_x
        pos_y = (img_size[1] - text_height) // 2 - offset_y

        draw.text((pos_x, pos_y), cijfer, fill=255, font=font)

        base_img = np.array(img)

        for variant, (op_type, kernel, iters) in morph_kernels.items():
            if op_type == "erode":
                variant_img = cv2.erode(base_img, kernel, iterations=iters)
            elif op_type == "dilate":
                variant_img = cv2.dilate(base_img, kernel, iterations=iters)
            else:
                variant_img = base_img.copy()

            label_dir = os.path.join(temp_dir, cijfer)
            os.makedirs(label_dir, exist_ok=True)
            filename = f"{font_name}_{variant}.png"
            filepath = os.path.join(label_dir, filename)
            cv2.imwrite(filepath, variant_img)

# Stap 2: splitten naar train/test (80% / 20%)
for cijfer in cijfers:
    cijfer_dir = os.path.join(temp_dir, cijfer)
    if not os.path.exists(cijfer_dir):
        continue
    images = os.listdir(cijfer_dir)
    random.shuffle(images)

    total = len(images)
    train_end = int(total * split_ratio)

    train_images = images[:train_end]
    test_images = images[train_end:]

    for subset, img_list in [("train", train_images), ("test", test_images)]:
        target_dir = os.path.join(output_base, subset, cijfer)
        os.makedirs(target_dir, exist_ok=True)

        for img_name in img_list:
            src = os.path.join(cijfer_dir, img_name)
            dst = os.path.join(target_dir, img_name)
            shutil.copyfile(src, dst)

