In [3]:
!pip install ultralytics

Defaulting to user installation because normal site-packages is not writeable
Collecting ultralytics
  Downloading ultralytics-8.3.127-py3-none-any.whl.metadata (37 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.127-py3-none-any.whl (1.0 MB)
   ---------------------------------------- 0.0/1.0 MB ? eta -:--:--
   ---------------------------------------- 1.0/1.0 MB 9.6 MB/s eta 0:00:00
Downloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, ultralytics-thop, ultralytics
Successfully installed py-cpuinfo-9.0.0 ultralytics-8.3.127 ultralytics-thop-2.0.14



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\stras\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [1]:
import os
import random
import shutil
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
from ultralytics import YOLO

In [7]:
# ✅ STEP 0: IMPORTS AND CONFIG

# === CONFIG ===
INPUT_DIR = "glyph_images"
SYNTH_IMG_DIR = "synthetic_sentences"
SYNTH_LBL_DIR = "synthetic_sentences_labels"
YOLO_DATA_DIR = "yolo_dataset"
CANVAS_SIZE = (1500, 150)
GLYPHS_PER_SENTENCE = 10
NUM_SENTENCES = 200

# === PREPARE FOLDERS ===
os.makedirs(SYNTH_IMG_DIR, exist_ok=True)
os.makedirs(SYNTH_LBL_DIR, exist_ok=True)
os.makedirs(os.path.join(YOLO_DATA_DIR, "images", "train"), exist_ok=True)
os.makedirs(os.path.join(YOLO_DATA_DIR, "labels", "train"), exist_ok=True)

# === STEP 1: SYNTHETIC SENTENCE GENERATION ===
glyph_files = sorted([f for f in os.listdir(INPUT_DIR) if f.endswith(".png")])[:10]
glyph_classes = {f: idx for idx, f in enumerate(glyph_files)}

for i in range(NUM_SENTENCES):
    canvas = Image.new('L', CANVAS_SIZE, color=255)
    x_offset = 5
    bboxes = []
    random.shuffle(glyph_files)

    for glyph_name in glyph_files:
        class_id = glyph_classes[glyph_name]
        glyph_path = os.path.join(INPUT_DIR, glyph_name)
        glyph = Image.open(glyph_path).convert('L')

        glyph_np = np.array(glyph)
        mask = glyph_np < 250
        if not np.any(mask):
            continue

        coords = np.argwhere(mask)
        y0, x0 = coords.min(axis=0)
        y1, x1 = coords.max(axis=0) + 1
        cropped = glyph.crop((x0, y0, x1, y1))

        max_height = CANVAS_SIZE[1] - 10
        if cropped.height > max_height:
            ratio = max_height / cropped.height
            new_size = (int(cropped.width * ratio), max_height)
            cropped = cropped.resize(new_size, resample=Image.BILINEAR)

        y_offset = (CANVAS_SIZE[1] - cropped.height) // 2
        canvas.paste(cropped, (x_offset, y_offset))

        bbox_x_center = (x_offset + cropped.width / 2) / CANVAS_SIZE[0]
        bbox_y_center = (y_offset + cropped.height / 2) / CANVAS_SIZE[1]
        bbox_width = cropped.width / CANVAS_SIZE[0]
        bbox_height = cropped.height / CANVAS_SIZE[1]
        bboxes.append((class_id, bbox_x_center, bbox_y_center, bbox_width, bbox_height))

        x_offset += cropped.width + 15

    img_path = os.path.join(SYNTH_IMG_DIR, f"sentence_{i}.png")
    canvas.convert("RGB").save(img_path)
    label_path = os.path.join(SYNTH_LBL_DIR, f"sentence_{i}.txt")
    with open(label_path, 'w') as f:
        for bbox in bboxes:
            f.write(f"{bbox[0]} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f} {bbox[4]:.6f}\n")

print("✅ Synthetic sentences generated.")

✅ Synthetic sentences generated.


In [8]:
import os
import shutil

YOLO_DATA_DIR = "yolo_dataset"
SENT_IMG_DIR = "synthetic_sentences"
SENT_LBL_DIR = "synthetic_sentences_labels"

for split in ["train", "val"]:  # assuming val might be added later
    os.makedirs(os.path.join(YOLO_DATA_DIR, "images", split), exist_ok=True)
    os.makedirs(os.path.join(YOLO_DATA_DIR, "labels", split), exist_ok=True)

for fn in os.listdir(SENT_IMG_DIR):
    if fn.endswith(".png"):
        shutil.copy(os.path.join(SENT_IMG_DIR, fn), os.path.join(YOLO_DATA_DIR, "images", "train", fn))

for fn in os.listdir(SENT_LBL_DIR):
    if fn.endswith(".txt"):
        shutil.copy(os.path.join(SENT_LBL_DIR, fn), os.path.join(YOLO_DATA_DIR, "labels", "train", fn))

print("✅ YOLO dataset folders populated.")


✅ YOLO dataset folders populated.


In [None]:
# Copy into YOLO folder
for fn in os.listdir(SENT_IMG_DIR):
    if fn.endswith(".png"):
        src = os.path.join(SENT_IMG_DIR, fn)
        dst = os.path.join(YOLO_DATA_DIR, "images", "train", fn)
        shutil.copy(src, dst)
for fn in os.listdir(SENT_LBL_DIR):
    if fn.endswith(".txt"):
        src = os.path.join(SENT_LBL_DIR, fn)
        dst = os.path.join(YOLO_DATA_DIR, "labels", "train", fn)
        shutil.copy(src, dst)

# ─── WRITE DATASET.YAML ───────────────────────────────────────────────────────
yaml_txt = f"""
path: {os.path.abspath(YOLO_DATA_DIR)}
train: images/train
val: images/train

names:
"""
for cid in range(GLYPHS_PER_SENTENCE):
    yaml_txt += f"  {cid}: Glyph_{cid}\n"

with open(os.path.join(YOLO_DATA_DIR, "glyphs-seg.yaml"), "w") as f:
    f.write(yaml_txt)

# ─── TRAIN SEGMENTATION MODEL ─────────────────────────────────────────────────
model = YOLO("yolov8n.pt", task='segment')

model.train(
    data=os.path.join(YOLO_DATA_DIR, "glyphs-seg.yaml"),
    epochs=50,
    imgsz=640,
    batch=8,
    patience=10,
)

# ─── TRAIN DETECTION MODEL ────────────────────────────────────────────────────
# Save the trained model
model_save_path = "models/glyph_detector.pt"
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

# Wait for training to complete and then copy the model
if os.path.exists("runs/detect/train/weights/best.pt"):
    shutil.copy("runs/detect/train/weights/best.pt", model_save_path)
    print(f"✅ Done: synthetic data with jitter & segmentation training complete!")
    print(f"✅ Model saved to: {model_save_path}")
else:
    print("⚠️ Training might not have produced weight files yet.")
    print("Please check 'runs/segment/train/weights/' directory after training completes.")

Ultralytics 8.3.127  Python-3.12.10 torch-2.5.1+cpu CPU (11th Gen Intel Core(TM) i7-1195G7 2.90GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=yolo_dataset\glyphs-seg.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=10, perspective=0.0, plots=

[34m[1mtrain: [0mScanning C:\Users\stras\Documents\GitHub\AI-project\yolo_dataset\labels\train.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]

[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 3.60.5 MB/s, size: 44.4 KB)



[34m[1mval: [0mScanning C:\Users\stras\Documents\GitHub\AI-project\yolo_dataset\labels\train.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]

Plotting labels to runs\detect\train4\labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train4[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G       2.05      4.534      1.419        178        640:  16%|█▌        | 4/25 [00:18<01:35,  4.56s/it]


KeyboardInterrupt: 

In [25]:
import cv2
import numpy as np
from PIL import Image

# ─── PARAMETERS ────────────────────────────────────────────────────────────────
TEST_IMG_PATH = "yolo_dataset/images/train/sentence_1.png"  # Path to a single test image

# ─── TEST PREDICTIONS ON SINGLE IMAGE ─────────────────────────────────────────
def test_model_on_image(img_path):
    # Load the test image
    img = Image.open(img_path)
    
    # Perform segmentation prediction
    # Load the best model from the training run
    trained_model = YOLO('models/glyph_detector.pt')  # Path to the best weights

    # Run inference with the trained model
    results = trained_model(img)
    
    # The results are now a list of detections
    result = results[0]  # The first (and only) result
    
    # Get predictions (boxes, scores, etc.)
    boxes = result.boxes.xyxy.cpu().numpy()  # Bounding boxes (xyxy format)
    confidences = result.boxes.conf.cpu().numpy()  # Confidence scores
    class_ids = result.boxes.cls.cpu().numpy()  # Class IDs
    
    # Convert image for visualization (if needed)
    img_cv2 = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    
    # Draw predictions on the image
    for box, conf, class_id in zip(boxes, confidences, class_ids):
        x1, y1, x2, y2 = map(int, box)
        color = (0, 255, 0)  # Green color for bounding box
        cv2.rectangle(img_cv2, (x1, y1), (x2, y2), color, 2)
        
        # Draw label and confidence
        label = f"Glyph_{int(class_id)}: {conf:.2f}"
        cv2.putText(img_cv2, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Show the image with OpenCV (no saving, just display)
    cv2.imshow("Test Image with Predictions", img_cv2)
    cv2.waitKey(0)  # Wait for a key press to close the window
    cv2.destroyAllWindows()

# Test the model on the single image
test_model_on_image(TEST_IMG_PATH)

print("✅ Done: Testing complete, results displayed!")



0: 64x640 1 Glyph_0, 1 Glyph_1, 1 Glyph_3, 1 Glyph_5, 1 Glyph_7, 1 Glyph_9, 34.6ms
Speed: 0.6ms preprocess, 34.6ms inference, 1.3ms postprocess per image at shape (1, 3, 64, 640)
✅ Done: Testing complete, results displayed!
