In [None]:
!pip install openai langchain langchain-community langchain-core PyPDF2 accelerate bitsandbytes llama_index huggingface_hub chromadb groq anthropic python-docx pypandoc markitdown pytesseract pdf2image python-docx pymupdf pillow pycryptodome==3.15.0



In [None]:
from google.colab import drive
drive.mount('/GD', force_remount=True)

Mounted at /GD


# Pypandoc + Test Card + Vector DB

In [None]:
import os
import re
import json
import tempfile
import subprocess
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed

# ---------- PDF & I/O ----------
from PyPDF2 import PdfReader
import pypandoc

# ---------- LLM (OpenAI via LangChain for your rules/plan/test-card) ----------
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

# ---------- Colab secret helper ----------
try:
    from google.colab import userdata  # type: ignore
except Exception:  # pragma: no cover
    userdata = None

# ---------- Vector DB + embeddings ----------
import chromadb
from sentence_transformers import SentenceTransformer

# ---------- Images & PDF visuals (PyMuPDF) ----------
import numpy as np
from PIL import Image

try:
    import fitz  # PyMuPDF
except Exception:
    fitz = None

# Optional OCR
try:
    import pytesseract  # type: ignore
except Exception:
    pytesseract = None

try:
    from pdf2image import convert_from_path  # type: ignore
except Exception:
    convert_from_path = None

# ---------- MarkItDown for image description (OpenAI multimodal) ----------
from openai import OpenAI
from markitdown import MarkItDown

# ---------- DOCX post-processing (borders) ----------
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

# ============================ CONFIG ============================

# OpenAI key (colab or env)
import openai
openai.api_key = userdata.get('OpenaAIAPI') if userdata else os.environ.get("OPENAI_API_KEY", "")
os.environ["OPENAI_API_KEY"] = openai.api_key or os.environ.get("OPENAI_API_KEY", "")

# Model for rules / pairwise / test-cards
llm_model = "gpt-4.1"

# PDF & output paths (from your original code)
pdf_path = "/GD/My Drive/JITC/military_standards_pdf/MIL-STD-188-203-1A_1995.pdf"
output_docx_path = "/GD/My Drive/JITC/military_standards_pdf/MIL-STD-188-Auto-TestPlan.docx"
output_markdown_path = output_docx_path.replace(".docx", ".md")  # save master .md too

# Vector DB locations derived from the same folder as the PDF
folder_path = os.path.dirname(pdf_path)
chroma_path = os.path.join(folder_path, "chroma_store")       # persistent chroma
collection_name = "military_standards"                        # you can rename

# Ingestion knobs
EMBED_MODEL = "multi-qa-mpnet-base-dot-v1"
MARKITDOWN_MODEL = "o4-mini"
DESCRIBE_IMAGES = True
MAX_IMAGES_PER_PAGE = None
OCR_ENABLED = False
OCR_DPI = 220
PAGE_RASTER_DPI = 144
PROBE_DPI = 72
PROBE_NONWHITE_THRESHOLD = 0.01
DETECT_INLINE_BLOCKS = True
DETECT_DRAWINGS = True
MIN_DRAWING_AREA = 2000.0
RASTERIZE_WHEN_NON_XOBJECT_VISUALS = True
IMAGE_OUT_DIRNAME = "_extracted_images"
BATCH_SIZE_EMBED = 32
BATCH_SIZE_UPSERT = 64

# Parallelism for rule extraction / test-cards / pairwise sections
RULES_MAX_WORKERS = 8
PAIRWISE_MAX_WORKERS = 4

# Pandoc raw OpenXML page break for DOCX (works in Word)
PAGE_BREAK_MD = "\n```{=openxml}\n<w:p><w:r><w:br w:type=\"page\"/></w:r></w:p>\n```\n"

# ============================ PANDOC ============================

def ensure_pandoc():
    """Ensure pandoc is available; fallback to pypandoc downloader."""
    try:
        _ = pypandoc.get_pandoc_path()
    except OSError:
        print("Pandoc not found by pypandoc. Attempting to download via pypandoc...")
        pypandoc.download_pandoc()

def _ensure_table_borders_docx(docx_path: str):
    """
    Post-process the DOCX to ensure every table has visible borders.
    This sidesteps needing Pandoc's +attributes/custom styles.
    """
    doc = Document(docx_path)
    for table in doc.tables:
        tbl_pr = table._tbl.tblPr
        # remove existing borders
        for el in tbl_pr.findall(qn('w:tblBorders')):
            tbl_pr.remove(el)
        # add new borders
        tbl_borders = OxmlElement('w:tblBorders')
        for edge in ('top', 'left', 'bottom', 'right', 'insideH', 'insideV'):
            element = OxmlElement(f'w:{edge}')
            element.set(qn('w:val'), 'single')
            element.set(qn('w:sz'), '8')      # ~0.5pt
            element.set(qn('w:space'), '0')
            element.set(qn('w:color'), 'auto')
            tbl_borders.append(element)
        tbl_pr.append(tbl_borders)
    doc.save(docx_path)

def write_docx_with_pandoc(markdown_text: str, output_docx_path: str, reference_docx: str = None, also_save_md: str = None):
    ensure_pandoc()

    if also_save_md:
        os.makedirs(os.path.dirname(also_save_md), exist_ok=True)
        with open(also_save_md, "w", encoding="utf-8") as f:
            f.write(markdown_text)

    # Helpful for debugging your environment:
    try:
        ver = subprocess.check_output(["pandoc", "-v"]).decode(errors="ignore").splitlines()[0]
        print(f"[pandoc] {ver}")
    except Exception:
        pass

    with tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w", encoding="utf-8") as tmp:
        tmp.write(markdown_text)
        tmp_md_path = tmp.name

    extra_args = ["--toc", "--toc-depth=3", "--number-sections"]
    if reference_docx:
        extra_args += ["--reference-doc", reference_docx]

    # NOTE: Removed +attributes to avoid the "Unknown extension: attributes" error on older pandoc.
    # Keep raw OpenXML + fenced_code_blocks so page breaks render correctly.
    from_format = (
        "markdown"
        "+raw_attribute"
        "+fenced_code_blocks"
        "+fenced_divs"
        "+pipe_tables"
        "+autolink_bare_uris"
    )

    pypandoc.convert_file(
        tmp_md_path,
        to="docx",
        outputfile=output_docx_path,
        extra_args=extra_args,
        format=from_format
    )
    print(f"Test plan saved to {output_docx_path} (from={from_format})")

    # Ensure all tables have borders (since custom-style wasn't possible without +attributes)
    _ensure_table_borders_docx(output_docx_path)

# ============================ INGEST (PDF ‚Üí pages with text+image desc ‚Üí Chroma) ============================

@dataclass
class PageRecord:
    page: int
    text: str
    images: List[str]
    image_descriptions: List[str]

def _rect_area(rect: "fitz.Rect") -> float:
    try:
        return rect.get_area()
    except Exception:
        return max(0.0, (rect.x1 - rect.x0) * (rect.y1 - rect.y0))

def _get_text_rects(page: "fitz.Page") -> List["fitz.Rect"]:
    rects: List["fitz.Rect"] = []
    try:
        for b in page.get_text("blocks") or []:
            # (x0,y0,x1,y1, text, block_no, block_type, block_flags)
            if len(b) >= 8 and b[6] == 0:
                rects.append(fitz.Rect(b[0], b[1], b[2], b[3]))
    except Exception:
        pass
    return rects

def _visual_probe_has_nontext(page: "fitz.Page", text_rects: List["fitz.Rect"], probe_dpi: int, nonwhite_threshold: float) -> bool:
    scale = max(1e-6, probe_dpi / 72.0)
    pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)  # RGB8
    w, h = pix.width, pix.height
    if w == 0 or h == 0:
        return False
    img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(h, w, pix.n)

    # mask text rects to white
    for r in text_rects:
        x0 = max(0, int(r.x0 * scale)); y0 = max(0, int(r.y0 * scale))
        x1 = min(w, int(r.x1 * scale)); y1 = min(h, int(r.y1 * scale))
        if x1 > x0 and y1 > y0:
            img[y0:y1, x0:x1, :] = 255

    nonwhite = np.any(img < 250, axis=2)
    ratio = nonwhite.sum() / (w * h)
    return ratio >= nonwhite_threshold

def _init_markitdown(model_name: str) -> MarkItDown:
    client = OpenAI()
    return MarkItDown(llm_client=client, llm_model=model_name)

def _describe_images_for_pages(pages: List[PageRecord], md: MarkItDown, max_images_per_page: Optional[int]) -> None:
    for p in pages:
        descs: List[str] = []
        imgs = list(p.images)
        if max_images_per_page is not None:
            imgs = imgs[:max_images_per_page]
        for img_path in imgs:
            try:
                res = md.convert(img_path)
                txt = getattr(res, "text_content", None) or str(res)
            except Exception as e:
                txt = f"Image description failed: {e}"
            descs.append(txt)
        p.image_descriptions = descs

def preprocess_pdf_and_ingest_to_chroma(
    pdf_path: str,
    chroma_path: str,
    collection_name: str,
    *,
    describe_images: bool = DESCRIBE_IMAGES,
    max_images_per_page: Optional[int] = MAX_IMAGES_PER_PAGE,
    ocr_enabled: bool = OCR_ENABLED,
    ocr_dpi: int = OCR_DPI,
    page_raster_dpi: int = PAGE_RASTER_DPI,
    probe_dpi: int = PROBE_DPI,
    probe_nonwhite_threshold: float = PROBE_NONWHITE_THRESHOLD,
    detect_inline_blocks: bool = DETECT_INLINE_BLOCKS,
    detect_drawings: bool = DETECT_DRAWINGS,
    min_drawing_area: float = MIN_DRAWING_AREA,
    rasterize_when_non_xobject_visuals: bool = RASTERIZE_WHEN_NON_XOBJECT_VISUALS,
    image_out_dirname: str = IMAGE_OUT_DIRNAME,
    embed_model: str = EMBED_MODEL,
) -> List[PageRecord]:
    """
    Return list of PageRecord(page, text, images, image_descriptions) for this PDF,
    and store each page's (text + image descriptions) into a Chroma collection.
    """
    # Init Chroma and embedding model
    chroma_client = chromadb.PersistentClient(path=chroma_path)
    collection = chroma_client.get_or_create_collection(
        name=collection_name,
        metadata={"hnsw:space": "cosine"}
    )
    embedder = SentenceTransformer(embed_model)

    # Init MarkItDown
    md = _init_markitdown(MARKITDOWN_MODEL)

    # Extract pages with visuals/text via PyMuPDF if available
    image_root = os.path.join(os.path.dirname(pdf_path), image_out_dirname)
    os.makedirs(image_root, exist_ok=True)

    pages: List[PageRecord] = []

    if fitz is None:
        # Fallback: just extract text via PyPDF2 (no inline visuals) and no raster logic
        reader = PdfReader(pdf_path)
        for i, page in enumerate(reader.pages, start=1):
            text = page.extract_text() or ""
            # OCR fallback (optional)
            if ocr_enabled and not text.strip() and convert_from_path and pytesseract:
                try:
                    imgs = convert_from_path(pdf_path, dpi=ocr_dpi, first_page=i, last_page=i)
                    if imgs:
                        text = pytesseract.image_to_string(imgs[0]) or ""
                except Exception:
                    pass
            pages.append(PageRecord(page=i, text=text, images=[], image_descriptions=[]))
    else:
        # Rich path: detect XObjects / inline / drawings / probe and rasterize as needed
        doc = fitz.open(pdf_path)
        base = os.path.basename(pdf_path).replace(os.sep, "_")

        for i, page in enumerate(doc, start=1):
            text = page.get_text("text") or ""
            saved_images: List[str] = []
            text_rects = _get_text_rects(page)

            # XObject images
            try:
                for img in page.get_images(full=True):
                    xref = img[0]
                    try:
                        info = doc.extract_image(xref)
                        ext = info.get("ext", "png")
                        out = os.path.join(image_root, f"{base}_page_{i}_xref_{xref}.{ext}")
                        with open(out, "wb") as f:
                            f.write(info["image"])
                        # normalize png mode
                        if ext.lower() == "png":
                            try:
                                with Image.open(out) as im:
                                    if im.mode not in ("L", "RGB"):
                                        im = im.convert("L")
                                    im.save(out)
                            except Exception:
                                pass
                        saved_images.append(out)
                    except Exception:
                        pass
            except Exception:
                pass

            has_xobject = len(saved_images) > 0

            inline_present = False
            drawings_present = False
            probe_positive = False

            if not has_xobject:
                # Inline image blocks
                if detect_inline_blocks:
                    try:
                        raw = page.get_text("rawdict") or {}
                        blocks = raw.get("blocks", []) if isinstance(raw, dict) else []
                        for b in blocks:
                            if b.get("type") == 1:  # image block
                                inline_present = True
                                break
                    except Exception:
                        pass

                # Vector drawings
                if detect_drawings and not inline_present:
                    try:
                        drawings = page.get_drawings() or []
                        for d in drawings:
                            r = d.get("rect")
                            if isinstance(r, fitz.Rect) and _rect_area(r) >= min_drawing_area:
                                drawings_present = True
                                break
                    except Exception:
                        pass

                if not inline_present and not drawings_present:
                    try:
                        probe_positive = _visual_probe_has_nontext(page, text_rects, probe_dpi, probe_nonwhite_threshold)
                    except Exception:
                        probe_positive = False
                else:
                    probe_positive = True

                # Full-page raster if visuals but no XObjects
                if rasterize_when_non_xobject_visuals and probe_positive:
                    try:
                        scale = page_raster_dpi / 72.0
                        pix = page.get_pixmap(matrix=fitz.Matrix(scale, scale))
                        out = os.path.join(image_root, f"{base}_page_{i}_raster.png")
                        pix.save(out)
                        saved_images.append(out)
                    except Exception:
                        pass

            # OCR fallback if text empty
            if ocr_enabled and not text.strip() and convert_from_path and pytesseract:
                try:
                    imgs = convert_from_path(pdf_path, dpi=ocr_dpi, first_page=i, last_page=i)
                    if imgs:
                        text = pytesseract.image_to_string(imgs[0]) or text
                except Exception:
                    pass

            pages.append(PageRecord(page=i, text=text, images=saved_images, image_descriptions=[]))

        doc.close()

    # Describe images if requested
    if describe_images:
        _describe_images_for_pages(pages, md, max_images_per_page)

    # Upsert into Chroma (page-level ‚Äútext + image descriptions‚Äù)
    ids, documents, metadatas = [], [], []
    fname = os.path.basename(pdf_path)

    def _combine(p: PageRecord) -> str:
        base = (p.text or "").strip()
        if p.image_descriptions:
            join = "\n".join([f"[Image {k+1}] {d}" for k, d in enumerate(p.image_descriptions)])
            return f"{base}\n{join}" if base else join
        return base

    for p in pages:
        combined = _combine(p)
        if not combined:
            continue
        ids.append(f"{fname}_page_{p.page}")
        documents.append(combined)
        metadatas.append({
            "document_name": fname,
            "page": int(p.page),
            "image_files": json.dumps(p.images),
            "image_descriptions": json.dumps(p.image_descriptions),
            "source_path": pdf_path,
        })

    if documents:
        # embed + upsert
        vecs = []
        for i in range(0, len(documents), BATCH_SIZE_EMBED):
            chunk = documents[i:i+BATCH_SIZE_EMBED]
            embs = SentenceTransformer(EMBED_MODEL).encode(chunk, show_progress_bar=False)
            vecs.extend([e.tolist() for e in embs])

        for i in range(0, len(documents), BATCH_SIZE_UPSERT):
            collection.upsert(
                ids=ids[i:i+BATCH_SIZE_UPSERT],
                documents=documents[i:i+BATCH_SIZE_UPSERT],
                metadatas=metadatas[i:i+BATCH_SIZE_UPSERT],
                embeddings=vecs[i:i+BATCH_SIZE_UPSERT],
            )

    return pages

# ============================ SECTION BUILD (from preprocessed pages) ============================

def extract_sections_from_pages(
    pages: List[PageRecord],
    start_page: int = 12,
    pages_per_section: int = 10
) -> Tuple[Dict[str, str], List[str]]:
    """
    Build sections from *preprocessed* pages (text + image descriptions).
    Returns (sections_dict, section_names) with titles "Section k: Pages a-b".
    """
    bynum = {p.page: ((p.text or "").strip() + ("\n" + "\n".join([f"[Image {i+1}] {d}" for i, d in enumerate(p.image_descriptions)]) if p.image_descriptions else "")).strip()
             for p in pages}

    all_pages = sorted(bynum.keys())
    max_page = max(all_pages) if all_pages else 0

    sections: Dict[str, str] = {}
    section_idx = 1
    for i in range(start_page, max_page + 1, pages_per_section):
        chunk_pages = list(range(i, min(i + pages_per_section, max_page + 1)))
        if not chunk_pages:
            continue
        section_text = []
        for pg in chunk_pages:
            section_text.append(bynum.get(pg, ""))
        title = f"Section {section_idx}: Pages {chunk_pages[0]}-{chunk_pages[-1]}"
        sections[title] = "\n".join([t for t in section_text if t]).strip()
        section_idx += 1

    return sections, list(sections.keys())

# ============================ RULES / TEST CARDS / PAIRWISE ============================

def extract_rules_for_section(section_name, section_text):
    llm = ChatOpenAI(model_name=llm_model, openai_api_key=openai.api_key)
    prompt = (
        f"You are a MIL-STD-188 compliance and test planning expert.\n"
        f"Analyze the following section of a military standard and extract EVERY possible testable rule, specification, constraint, or requirement. "
        f"Rules MUST be extremely detailed, explicit, and step-by-step, and should include measurable criteria, acceptable ranges, and referenced figures or tables if mentioned. "
        f"For ambiguous or implicit requirements, describe a specific test strategy.\n"
        f"Generate a short, content-based TITLE for this section (do not use page numbers).\n"
        f"Organize your output as follows, using markdown headings and bolded text:\n\n"
        f"## [Section Title]\n"
        f"**Dependencies:**\n- List detailed dependencies as explicit tests, if any.\n\n"
        f"**Conflicts:**\n- List detected or possible conflicts and provide recommendations or mitigation steps.\n\n"
        f"**Test Rules:**\n1. (Very detailed, step-by-step numbered test rules)\n"
        f"\nSection Name: {section_name}\n\nSection Text:\n{section_text}\n\n"
        f"---\n"
        f"If you find truly nothing testable, reply: 'No testable rules in this section.'"
    )
    result = llm([HumanMessage(content=prompt)])
    return section_name, result.content

def synthesize_pairwise_test_plan(pair_name, rules_text_1, rules_text_2):
    llm = ChatOpenAI(model_name=llm_model, openai_api_key=openai.api_key)
    prompt = (
        "You are a senior QA documentation engineer.\n"
        "Given the DETAILED test rules and extracted section titles for two consecutive MIL-STD-188 sections, synthesize a single, logically organized, highly detailed test plan section.\n"
        "Combine rules, merge similar steps, cross-reference overlapping content, and call out dependencies or conflicts. "
        "Use a single, **content-based TITLE** for this combined section (not using page numbers). "
        "Keep bold markdown headings for 'Dependencies', 'Conflicts', and 'Test Rules'.\n"
        "Test rules must be extremely explicit, step-by-step, and cover ALL possible technical details and verification steps.\n"
        "Format the output using markdown.\n\n"
        "=== SECTION 1 ===\n" + rules_text_1 + "\n\n=== SECTION 2 ===\n" + rules_text_2 +
        "\n\n=== END ===\n\nOutput ONLY the combined test plan in the described format."
    )
    result = llm([HumanMessage(content=prompt)])
    return pair_name, result.content

def build_test_card_for_section(section_name, rules_markdown):
    llm = ChatOpenAI(model_name=llm_model, openai_api_key=openai.api_key)
    prompt = (
        "You are a QA test documentation assistant.\n"
        "From the following section rules (Markdown), generate a single Markdown pipe table named 'Test Card' "
        "that lists one row per test. Do NOT include any text before or after the table.\n"
        "Requirements:\n"
        "- Columns: Test ID | Test Title | Procedures | Executed | Pass | Fail | Notes\n"
        "- 'Procedures' should be concise numbered steps separated by <br> (e.g., '1) ...<br>2) ...').\n"
        "- Leave 'Executed', 'Pass', and 'Fail' empty with a checkbox symbol (use '‚òê'). Do NOT tick anything.\n"
        "- Derive Tests from the 'Test Rules' content. Use short, content-based titles.\n"
        "- Output ONLY the table in GitHub-style pipe-table format.\n\n"
        f"=== SECTION NAME ===\n{section_name}\n\n"
        f"=== SECTION RULES (MARKDOWN) ===\n{rules_markdown}\n\n"
        "=== END ==="
    )
    result = llm([HumanMessage(content=prompt)])
    table_md = result.content.strip()

    if '|' not in table_md:
        header = "| Test ID | Test Title | Procedures | Executed | Pass | Fail | Notes |\n"
        sep    = "|---|---|---|---|---|---|---|\n"
        table_md = header + sep + "| 1 | (LLM failed to tabulate) | See rules above | ‚òê | ‚òê | ‚òê | |\n"
    return table_md

# ============================ MARKDOWN BUILD ============================

def _sanitize_markdown(md: str) -> str:
    md = md.replace("üîπ ", "- ").replace("‚Ä¢ ", "- ").replace("‚Äì ", "- ")
    md = re.sub(r'^(\s*)(\d+)\)\s+', r'\1\2. ', md, flags=re.MULTILINE)
    md = re.sub(r'\*\*\s+(.*?)\s+\*\*', r'**\1**', md)
    return md.strip() + "\n"

def build_master_markdown(section_names, section_rule_reports, section_test_cards, pairwise_test_plans) -> str:
    parts = []
    parts.append("# MIL-STD-188 Automated Compliance Test Plan\n")
    parts.append("> Generated via automated extraction and synthesis pipeline (preprocessed with image understanding + vector DB).\n")
    parts.append(PAGE_BREAK_MD)  # proper DOCX page break

    for sec in section_names:
        rules_md = section_rule_reports.get(sec, "").strip()
        if rules_md:
            parts.append(_sanitize_markdown(rules_md))
            parts.append("\n")

        # Plain table (we'll set borders in DOCX post-process)
        test_card = section_test_cards.get(sec, "").strip()
        if test_card:
            parts.append("### Test Card\n\n")
            parts.append(test_card.rstrip() + "\n\n")

        if sec in pairwise_test_plans:
            combined_md = pairwise_test_plans[sec].strip()
            parts.append(_sanitize_markdown(combined_md))
            parts.append("\n")

        # real DOCX page break between sections
        parts.append(PAGE_BREAK_MD)

    return "\n".join(parts)

# ============================ MAIN ============================

if __name__ == "__main__":
    # A) PREPROCESS & INGEST: page-level (text + image descriptions) ‚Üí Chroma
    pages = preprocess_pdf_and_ingest_to_chroma(
        pdf_path=pdf_path,
        chroma_path=chroma_path,
        collection_name=collection_name,
        describe_images=DESCRIBE_IMAGES,
        max_images_per_page=MAX_IMAGES_PER_PAGE,
        ocr_enabled=OCR_ENABLED,
        ocr_dpi=OCR_DPI,
        page_raster_dpi=PAGE_RASTER_DPI,
        probe_dpi=PROBE_DPI,
        probe_nonwhite_threshold=PROBE_NONWHITE_THRESHOLD,
        detect_inline_blocks=DETECT_INLINE_BLOCKS,
        detect_drawings=DETECT_DRAWINGS,
        min_drawing_area=MIN_DRAWING_AREA,
        rasterize_when_non_xobject_visuals=RASTERIZE_WHEN_NON_XOBJECT_VISUALS,
        image_out_dirname=IMAGE_OUT_DIRNAME,
        embed_model=EMBED_MODEL,
    )

    # B) Build sections FROM the preprocessed pages (this now includes image descriptions inline)
    sections, section_names = extract_sections_from_pages(
        pages,
        start_page=12,
        pages_per_section=10
    )

    # C) Extract detailed rules per section
    section_rule_results: Dict[str, str] = {}
    with ThreadPoolExecutor(max_workers=RULES_MAX_WORKERS) as executor:
        futures = [executor.submit(extract_rules_for_section, name, text) for name, text in sections.items()]
        for fut in as_completed(futures):
            section_name, rules_md = fut.result()
            section_rule_results[section_name] = rules_md
            print(f"\n--- {section_name} ---\n{rules_md}\n{'='*60}")

    # D) Build Test Card per section
    section_test_cards: Dict[str, str] = {}
    for sec in section_names:
        rules_md = section_rule_results.get(sec, "")
        if not rules_md:
            continue
        table_md = build_test_card_for_section(sec, rules_md)
        section_test_cards[sec] = table_md
        print(f"\n--- Test Card for {sec} ---\n{table_md}\n{'='*60}")

    # E) Pairwise combined test plans
    pairwise_test_plans: Dict[str, str] = {}
    with ThreadPoolExecutor(max_workers=PAIRWISE_MAX_WORKERS) as executor:
        futures = []
        for i in range(len(section_names) - 1):
            s1 = section_names[i]
            s2 = section_names[i + 1]
            rules1 = section_rule_results.get(s1, "")
            rules2 = section_rule_results.get(s2, "")
            futures.append(executor.submit(synthesize_pairwise_test_plan, s1, rules1, rules2))
        for fut in as_completed(futures):
            pair_key, combined_md = fut.result()
            pairwise_test_plans[pair_key] = combined_md
            print(f"\n--- Pair: {pair_key} ---\n{combined_md}\n{'='*60}")

    # F) Build master Markdown and convert to DOCX
    master_md = build_master_markdown(section_names, section_rule_results, section_test_cards, pairwise_test_plans)
    write_docx_with_pandoc(
        master_md,
        output_docx_path=output_docx_path,
        reference_docx=None,        # optional: custom Word style template
        also_save_md=output_markdown_path
    )



--- Section 6: Pages 62-71 ---
## DTS-to-TDS Parallel Computer Interface: Input Requirements, Error Status, and Interrupt Handling

**Dependencies:**
- **MIL-STD-1397 Type A, Category I (NTDS SLOW) Interface Compliance:** All electrical, timing, and signal characteristics must comply with this referenced standard.
- **Tables and Figures:** Testing requires reference to TABLE VI (Error Status Summary), TABLE VII (Bit Assignments), TABLE VIII (Interrupt Codes), FIGURE 11 (Signal Flow Diagram), and cross-referencing with 5.2.4.1 and TABLE V for K0‚ÄìK16 parity group definitions.
- **Timing Parameters:** Requirements for timing of EIR reset and data placement depend on specifics in section 30.3.

**Conflicts:**
- **Ambiguity in ‚ÄúAllotted Time‚Äù for EIR Reset:** The phrase "after waiting the allotted time as specified in 30.3" requires test planners to obtain precise timing limits from section 30.3. If not provided or ambiguous, the test must validate for all reasonable edge cases and r