In [5]:
import os
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
import random
import re

# ---------- CONFIGURATION ----------
DATASET_DIR = "synthetic_braille_dataset"
DOT_RADIUS = 5      # radius; diameter ~10 px
D = 10              # dot spacing unit
H = 10              # horizontal gap
V = 8               # vertical gap
PAD = 5             # padding around braille cell

CELL_W = 2*D + H        # 30 px
CELL_H = 3*D + 2*V      # 46 px
IMG_W = CELL_W + 2*PAD  # 40 px
IMG_H = CELL_H + 2*PAD  # 56 px

VARIATIONS_PER_CHAR = 250
os.makedirs(DATASET_DIR, exist_ok=True)

# ---------- BRAILLE BIT UTILS ----------
def dots_to_bit(dots):
    bit = 0
    for d in dots:
        bit |= 1 << (d - 1)
    return bit

# ---------- TRANSLITERATION HELPER ----------
def safe_filename(text):
    mapping = {
        "अ": "a", "आ": "aa", "इ": "i", "ई": "ii", "उ": "u", "ऊ": "uu",
        "ए": "e", "ऐ": "ai", "ओ": "o", "औ": "au", "अं": "am", "अः": "ah",
        "क": "ka", "ख": "kha", "ग": "ga", "घ": "gha", "ङ": "nga",
        "च": "cha", "छ": "chha", "ज": "ja", "झ": "jha", "ञ": "nya",
        "ट": "ta", "ठ": "tha", "ड": "da", "ढ": "dha", "ण": "na",
        "त": "ta2", "थ": "tha2", "द": "da2", "ध": "dha2", "न": "na2",
        "प": "pa", "फ": "pha", "ब": "ba", "भ": "bha", "म": "ma",
        "य": "ya", "र": "ra", "ल": "la", "व": "va", "श": "sha",
        "ष": "shha", "स": "sa", "ह": "ha", "क्ष": "ksha", "ज्ञ": "gya",
    }
    text = mapping.get(text, text)
    return re.sub(r'[^a-zA-Z0-9_-]', '_', text)

# ---------- BRAILLE MAP ----------
braille_map = {
    dots_to_bit([1]): 'अ',
    dots_to_bit([3, 4, 5]): 'आ',
    dots_to_bit([2, 4]): 'इ',
    dots_to_bit([3, 5]): 'ई',
    dots_to_bit([1, 3, 6]): 'उ',
    dots_to_bit([1, 2, 5, 6]): 'ऊ',
    dots_to_bit([1, 5]): 'ए',
    dots_to_bit([3, 4]): 'ऐ',
    dots_to_bit([1, 3, 5]): 'ओ',
    dots_to_bit([2, 4, 6]): 'औ',
    dots_to_bit([5, 6]): 'अं',
    dots_to_bit([6]): 'अः',
    dots_to_bit([1, 3]): 'क',
    dots_to_bit([4, 6]): 'ख',
    dots_to_bit([1, 2, 4, 5]): 'ग',
    dots_to_bit([1, 2, 6]): 'घ',
    dots_to_bit([3, 4, 6]): 'ङ',
    dots_to_bit([1, 4]): 'च',
    dots_to_bit([1, 6]): 'छ',
    dots_to_bit([2, 4, 5]): 'ज',
    dots_to_bit([3, 5, 6]): 'झ',
    dots_to_bit([2, 5]): 'ञ',
    dots_to_bit([2, 3, 4, 5, 6]): 'ट',
    dots_to_bit([2, 4, 5, 6]): 'ठ',
    dots_to_bit([1, 2, 4, 6]): 'ड',
    dots_to_bit([1, 2, 3, 4, 5, 6]): 'ढ',
    dots_to_bit([3, 4, 5, 6]): 'ण',
    dots_to_bit([2, 3, 4, 5]): 'त',
    dots_to_bit([1, 4, 5, 6]): 'थ',
    dots_to_bit([1, 4, 5]): 'द',
    dots_to_bit([2, 3, 4, 6]): 'ध',
    dots_to_bit([1, 3, 4, 5]): 'न',
    dots_to_bit([1, 2, 3, 4]): 'प',
    dots_to_bit([2, 3, 5]): 'फ',
    dots_to_bit([1, 2]): 'ब',
    dots_to_bit([4, 5]): 'भ',
    dots_to_bit([1, 3, 4]): 'म',
    dots_to_bit([1, 3, 4, 5, 6]): 'य',
    dots_to_bit([1, 2, 3, 5]): 'र',
    dots_to_bit([1, 2, 3]): 'ल',
    dots_to_bit([1, 2, 3, 6]): 'व',
    dots_to_bit([1, 4, 6]): 'श',
    dots_to_bit([1, 2, 3, 4, 6]): 'ष',
    dots_to_bit([2, 3, 4]): 'स',
    dots_to_bit([1, 2, 5]): 'ह',
    dots_to_bit([1, 2, 3, 4, 5]): 'क्ष',
    dots_to_bit([1, 5, 6]): 'ज्ञ',
}

# ---------- DRAW BRAILLE ----------
def draw_braille_cell(dots_bit):
    img = np.full((IMG_H, IMG_W), 255, np.uint8)

    offset_x = PAD
    offset_y = PAD
    R = DOT_RADIUS

    dot_positions = {
        1: (offset_x + 0,         offset_y + 0),
        2: (offset_x + 0,         offset_y + D + V),
        3: (offset_x + 0,         offset_y + 2*(D + V)),
        4: (offset_x + D + H,     offset_y + 0),
        5: (offset_x + D + H,     offset_y + D + V),
        6: (offset_x + D + H,     offset_y + 2*(D + V)),
    }

    for i in range(1,7):
        x, y = dot_positions[i]
        color = 70 if dots_bit & (1 << (i - 1)) else 235 
        cv2.circle(img, (x, y), R, (color,), -1)

    return img

# ---------- AUGMENTATION ----------
def augment_image(img):
    # Convert to float for better manipulation
    img = img.astype(np.float32)

    # Brightness/contrast adjustment
    img = img * random.uniform(0.7, 1.3) + random.uniform(-20, 20)
    img = np.clip(img, 0, 255).astype(np.uint8)

    # Apply Gaussian blur sometimes
    if random.random() < 0.5:
        k = random.choice([3, 5])
        img = cv2.GaussianBlur(img, (k, k), 0)

    # Add noise to the image
    if random.random() < 0.7:  # 70% chance of noise
        noise = np.random.normal(0, random.randint(3, 12), img.shape)
        img = np.clip(img + noise, 0, 255).astype(np.uint8)

    # ---------- APPLY RANDOM ROTATION ----------
    # 50% chance of having a rotation, 50% will stay straight
    if random.random() < 0.5:
        # Get the image center
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)

        # Random angle between -5 and +5 degrees (small tilt)
        angle = random.uniform(-5, 5)  # degrees

        # Rotation matrix
        M = cv2.getRotationMatrix2D(center, angle, 1.0)

        # Apply the rotation using the rotation matrix
        img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR)

    return img
# ---------- GENERATE SYNTHETIC DATA ----------
rows = []
for bits, char in braille_map.items():
    latin = safe_filename(char)
    char_dir = Path(DATASET_DIR) / latin
    char_dir.mkdir(parents=True, exist_ok=True)

    for i in range(VARIATIONS_PER_CHAR):
        img = draw_braille_cell(bits)
        img = augment_image(img)
        fname = f"{latin}_{i:03d}.png"
        path = char_dir / fname
        cv2.imwrite(str(path), img)
        rows.append({
            "filename": str(path),
            "char": char,
            "latin": latin,
            "bits": bits
        })

df = pd.DataFrame(rows)
df.to_csv(Path(DATASET_DIR) / "labels.csv", index=False, encoding="utf-8-sig")
print(f"✅ Generated {len(df)} images in '{DATASET_DIR}'")

✅ Generated 11750 images in 'synthetic_braille_dataset'
