In [1]:
import cv2
import numpy as np
import pytesseract
import pandas as pd
from pathlib import Path
from PIL import Image
import re

# Paths
INPUT_DIR = Path("G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/images")  # Folder with input images
MASK_OUTPUT_DIR = Path("G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/masks")
MASK_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
CSV_PATH = Path("G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/kamra_measurements.csv")

# Initialize measurement storage
measurements = []

# Regular expressions for measurement extraction
patterns = {
    "HC": re.compile(r"HC\s*[:=]?\s*([0-9]+\.[0-9]+)"),
    "AC": re.compile(r"AC\s*[:=]?\s*([0-9]+\.[0-9]+)"),
    "BPD": re.compile(r"BPD\s*[:=]?\s*([0-9]+\.[0-9]+)"),
    "FL": re.compile(r"FL\s*[:=]?\s*([0-9]+\.[0-9]+)")
}

# Process each image
for img_path in sorted(INPUT_DIR.glob("*.jpg")):
    image_name = img_path.name
    image_bgr = cv2.imread(str(img_path))
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)

    # === Mask Extraction ===
    hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV)
    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([40, 255, 255])
    mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)

    contours, _ = cv2.findContours(mask_yellow, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    mask_filled = np.zeros_like(mask_yellow)
    cv2.drawContours(mask_filled, contours, -1, 255, thickness=cv2.FILLED)

    # Save the mask
    mask_filename = MASK_OUTPUT_DIR / f"{img_path.stem}_mask.png"
    Image.fromarray(mask_filled).save(mask_filename)

    # === Measurement Extraction ===
    text = pytesseract.image_to_string(image_rgb)
    row = {"image": image_name}
    for key, pattern in patterns.items():
        match = pattern.search(text)
        row[key] = float(match.group(1)) if match else None
    measurements.append(row)

# Save measurements to CSV
df = pd.DataFrame(measurements)
df.to_csv(CSV_PATH, index=False)

CSV_PATH.name  # return file name for verification


'kamra_measurements.csv'