In [None]:
import os
import unicodedata
import random
import math
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageOps
import numpy as np
import csv
import shutil
corpus_file = "corpus/ne.txt"
fonts_dir = "fonts/static"
output_images = "data/images"
output_text = "data/"

num_images = 10 
num_words = 30 
image_size = (256, 64)
font_size_range = (32, 48)
os.makedirs(output_images, exist_ok=True)
os.makedirs(output_text, exist_ok=True)


In [None]:
def apply_skew(img, max_angle=5):
    """Random horizontal skew."""
    angle = random.uniform(-max_angle, max_angle)
    return img.transform(
        img.size,
        Image.AFFINE,
        (1, math.tan(math.radians(angle)), 0, 0, 1, 0),
        resample=Image.BICUBIC
    )

def apply_blur(img, max_radius=1.5):
    """Random Gaussian blur."""
    radius = random.uniform(0, max_radius)
    if radius > 0:
        img = img.filter(ImageFilter.GaussianBlur(radius=radius))
    return img

def apply_rotation(img, max_angle=5):
    """Random rotation."""
    angle = random.uniform(-max_angle, max_angle)
    return img.rotate(angle, expand=True, fillcolor="white")

def change_text_color(draw, x, y, text, font, colors=["black"]):
    """Random text color from list."""
    fill = random.choice(colors)
    draw.text((x, y), text, font=font, fill=fill)

def decenter_text(x, y, text_width, text_height, img_width, img_height, margin=10):
    """decenter text within image."""
    max_x = max(img_width - text_width - margin, margin)
    max_y = max(img_height - text_height - margin, margin)
    x = random.randint(margin, max_x)
    y = random.randint(margin, max_y)
    return x, y

def adjust_char_spacing(draw, x, y, text, font, spacing_range=(0, 5), fill="black"):
    """Draw text with random character spacing using textbbox."""
    for char in text:
        draw.text((x, y), char, font=font, fill=fill)
        bbox = draw.textbbox((x, y), char, font=font)
        char_width = bbox[2] - bbox[0]
        spacing = random.randint(*spacing_range)
        x += char_width + spacing
    return x, y

def center_and_resize_image(img, target_size=image_size):
    """
    Resize the image to fit inside target_size while maintaining aspect ratio.
    If the image is smaller, center it on a black background of target_size.
    """
    target_w, target_h = target_size
    img_w, img_h = img.size

    if img_w > target_w or img_h > target_h:
        img.thumbnail((target_w, target_h), Image.LANCZOS)
    new_img = Image.new("RGB", (target_w, target_h), color="black")
    paste_x = (target_w - img.width) // 2
    paste_y = (target_h - img.height) // 2
    new_img.paste(img, (paste_x, paste_y))

    return new_img


In [None]:
def draw_text(text, fontfile, output_path, base_image_size):
    text = unicodedata.normalize("NFC", text)
    
    font_size = random.randint(20, 48)
    pil_font = ImageFont.truetype(fontfile, font_size)
    
    temp_img = Image.new("RGB", base_image_size, color="white")
    draw = ImageDraw.Draw(temp_img)
    bbox = draw.textbbox((0, 0), text, font=pil_font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]
    
    while text_width + 20 > base_image_size[0] and font_size > 20:
        font_size -= 1
        pil_font = ImageFont.truetype(fontfile, font_size)
        bbox = draw.textbbox((0, 0), text, font=pil_font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
    
    img_width = max(base_image_size[0], text_width + 20)
    img_height = base_image_size[1]
    
    img = Image.new("RGB", (img_width, img_height), color="white")
    draw = ImageDraw.Draw(img)
    
    x, y = decenter_text(0, 0, text_width, text_height, img_width, img_height)
    draw.text((x, y), text, font=pil_font, fill="black")

    if random.random() < 0.5:
        img = apply_skew(img)
    if random.random() < 0.5:
        img = apply_blur(img)
    if random.random() < 0.3:
        img = apply_rotation(img)
    img = center_and_resize_image(img, (1156, 64))
    img.save(output_path)

def generate_labels_csv(base_dir):
    """
    For each split folder (train, val, test) inside base_dir,
    reads gt.txt and creates labels.csv with columns: filename, words
    """
    for split in ["train", "val", "test"]:
        split_dir = os.path.join(base_dir, split)
        gt_path = os.path.join(split_dir, "gt.txt")
        csv_path = os.path.join(split_dir, "labels.csv")

        if not os.path.exists(gt_path):
            print(f"⚠️ No gt.txt found for {split}")
            continue

        rows = []
        with open(gt_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                filename, text = line.split(" ", 1)
                rows.append((filename, text))

        # Write CSV
        with open(csv_path, "w", newline='', encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["filename", "words"])
            writer.writerows(rows)

        print(f"✅ Created {csv_path} with {len(rows)} entries")

In [None]:
gt_file_path = os.path.join(output_text, "gt.txt")

with open(corpus_file, "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f if line.strip()]

font_files = [os.path.join(fonts_dir, f) for f in os.listdir(fonts_dir)
              if f.lower().endswith((".ttf", ".otf"))]

counter = 0
with open(gt_file_path, "w", encoding="utf-8") as gt_file:
    for _ in range(num_images):
        num_lines = random.randint(1, num_words)
        selected_lines = random.choices(lines, k=num_lines)
        text = " ".join(selected_lines)
        
        font_file = random.choice(font_files)
        file_name = f"image_{counter:05d}.jpg"
        output_path = os.path.join(output_images, file_name)
        
        draw_text(text, font_file, output_path, image_size)
        gt_file.write(f"{file_name} {text}\n")
        
        counter += 1

print(f"Generated {counter} images in {output_images}")
print(f"GT file saved at: {gt_file_path}")


In [None]:
for file in os.listdir(output_images):
    if file.endswith(".jpg"):
        img_path = os.path.join(output_images, file)
        img = Image.open(img_path)
        img = center_and_resize_image(img,image_size)
        img.save(img_path)

In [None]:
import os
import random
import shutil

dataset_dir = "data"
image_path = os.path.join(dataset_dir, "images")
output_base = "dataset/"
train_ratio, val_ratio, test_ratio = 0.6, 0.2, 0.2
# ----------------

gt_path = os.path.join(dataset_dir, "gt.txt")

with open(gt_path, "r", encoding="utf-8") as f:
    entries = [line.strip() for line in f if line.strip()]

random.shuffle(entries)
total = len(entries)
train_end = int(total * train_ratio)
val_end = train_end + int(total * val_ratio)
train_entries = entries[:train_end]
val_entries = entries[train_end:val_end]
test_entries = entries[val_end:]

splits = {
    "train": train_entries,
    "val": val_entries,
    "test": test_entries
}
for split_name, split_data in splits.items():
    split_dir = os.path.join(output_base, split_name)
    os.makedirs(split_dir, exist_ok=True)
    
    split_gt = os.path.join(split_dir, "gt.txt")
    with open(split_gt, "w", encoding="utf-8") as gt_file:
        for entry in split_data:
            img_name = entry.split(" ", 1)[0]
            src_img = os.path.join(image_path, img_name)
            dst_img = os.path.join(split_dir, img_name)
        
            print(src_img, "->", dst_img)

            if os.path.exists(src_img):
                shutil.copy(src_img, dst_img)
            gt_file.write(entry + "\n")

print(f"✅ Split complete! Results saved in '{output_base}'")
print(f"Train: {len(train_entries)}, Val: {len(val_entries)}, Test: {len(test_entries)}")
generate_labels_csv(output_base)

