# AI Engineering Drawing Inspector (Final Version)

A context-aware GD&T checker that uses:
- Part context from BOM structure
- RAG retrieval from ASME Y14.5 standard
- Qwen2-VL for visual inspection

## 1. Setup & Installation

In [None]:
# ============================================================
# CELL 1A: Install Dependencies
# ============================================================
# Install required packages
!pip install -q transformers accelerate
!pip install -q qwen-vl-utils
!pip install -q pdf2image
!pip install -q faiss-cpu sentence-transformers
!pip install -q bitsandbytes
!apt-get install -y poppler-utils > /dev/null 2>&1

# Production Pipeline Dependencies (OCR + High-Res Rendering)
!pip install -q pymupdf paddleocr paddlepaddle opencv-python-headless

print("‚úÖ All packages installed!")

In [None]:
# ============================================================
# CELL 1B: Import Libraries
# ============================================================
import os
import json
import re
import pickle
import torch
from pathlib import Path
from pdf2image import convert_from_path
from PIL import Image
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info

# Production Pipeline Imports
import fitz  # PyMuPDF
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple, Dict, Any

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Load Model (Qwen2-VL)

In [None]:
# ============================================================
# CELL 2: Load Qwen2-VL Model (4-bit Quantized)
# ============================================================
import torch
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig

MODEL_ID = "Qwen/Qwen2-VL-72B-Instruct"

print(f"Loading {MODEL_ID} in 4-bit (NF4)...")

# Define 4-bit configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

# Load Model with SDPA (Native Flash Attention for PyTorch 2.0+)
model = Qwen2VLForConditionalGeneration.from_pretrained(
    MODEL_ID,
    device_map="auto",
    quantization_config=bnb_config,
    attn_implementation="sdpa",
    trust_remote_code=True
)

processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)

print("‚úÖ Qwen2-VL-72B (4-bit) Loaded Successfully!")
print(f"Memory Footprint: {model.get_memory_footprint() / 1e9:.2f} GB")

## 3. Load Context Databases

In [None]:
# ============================================================
# CELL 3A: Upload/Locate Configuration Files
# ============================================================
import os
import zipfile
from google.colab import files

# Expected config filenames
MAPPING_FILE = "400S_file_part_mapping.json"
STRUCTURE_FILE = "400S_detailed_structure_fixed.json"
RAG_INDEX_FILE = "asme_visual_index.pkl"

print("="*60)
print("STEP 1: Upload Configuration Files")
print("="*60)

# Helper to locate a file (cwd or inside rag_data)
def locate_file(filename):
    if os.path.exists(filename):
        return os.path.abspath(filename)
    nested_path = os.path.join("rag_data", filename)
    if os.path.exists(nested_path):
        return os.path.abspath(nested_path)
    return None

# Check for existing files first
FILE_MAPPING_PATH = locate_file(MAPPING_FILE)
STRUCTURE_PATH = locate_file(STRUCTURE_FILE)
RAG_INDEX_PATH = locate_file(RAG_INDEX_FILE)

# Upload missing files
missing_files = []
if not FILE_MAPPING_PATH:
    missing_files.append(MAPPING_FILE)
if not STRUCTURE_PATH:
    missing_files.append(STRUCTURE_FILE)
if not RAG_INDEX_PATH:
    missing_files.append(RAG_INDEX_FILE)

if missing_files:
    print(f"\nMissing files: {', '.join(missing_files)}")
    print("\nPlease upload the required files (or a ZIP containing them):")
    uploaded = files.upload()

    for filename in uploaded:
        if filename.lower().endswith('.zip'):
            print(f"\nExtracting {filename}...")
            with zipfile.ZipFile(filename, 'r') as zip_ref:
                zip_ref.extractall("rag_data")
            print("Extraction complete.")
            break

    FILE_MAPPING_PATH = locate_file(MAPPING_FILE) or os.path.abspath(MAPPING_FILE)
    STRUCTURE_PATH = locate_file(STRUCTURE_FILE) or os.path.abspath(STRUCTURE_FILE)
    RAG_INDEX_PATH = locate_file(RAG_INDEX_FILE)

# Set DATA_DIR
if FILE_MAPPING_PATH:
    DATA_DIR = os.path.dirname(FILE_MAPPING_PATH)
else:
    DATA_DIR = "/content"

# Print status
print("\n" + "="*60)
print("FILE STATUS:")
print("="*60)
print(f"File Mapping:  {'‚úÖ OK' if FILE_MAPPING_PATH and os.path.exists(FILE_MAPPING_PATH) else '‚ùå MISSING'}")
print(f"Structure:     {'‚úÖ OK' if STRUCTURE_PATH and os.path.exists(STRUCTURE_PATH) else '‚ùå MISSING'}")
print(f"RAG Index:     {'‚úÖ OK' if RAG_INDEX_PATH and os.path.exists(RAG_INDEX_PATH) else '‚ö†Ô∏è MISSING (optional)'}")
print(f"\nData directory: {DATA_DIR}")

In [None]:
# ============================================================
# CELL 3B: Load Part Context Databases
# ============================================================

def normalize_pn(pn):
    """Normalize part number for lookup (remove dashes, spaces, lowercase)."""
    return re.sub(r'[-\s]', '', str(pn)).lower()


def load_context_databases():
    """
    Load and build all context databases:
    1. filename_to_pn: Maps filenames to part numbers
    2. part_context_db: Full context for each part
    """
    print("Loading file mapping...")
    with open(FILE_MAPPING_PATH, 'r') as f:
        file_mapping_list = json.load(f)

    filename_to_pn = {}
    for entry in file_mapping_list:
        filename = entry['file']
        pn = entry['pn']
        if pn:
            filename_to_pn[filename] = pn
            filename_to_pn[filename + '.pdf'] = pn
            filename_to_pn[filename + '.PDF'] = pn

    print(f"  Loaded {len(file_mapping_list)} file mappings")

    print("Loading part structure...")
    with open(STRUCTURE_PATH, 'r') as f:
        structure_data = json.load(f)

    print("Building part context database...")
    part_context_db = {}

    for assembly_name, parts_list in structure_data.items():
        for part in parts_list:
            pn = part['pn']
            desc = part['desc']

            siblings_list = []
            siblings_pns = []

            for p_sibling in parts_list:
                if p_sibling['pn'] != pn:
                    safe_desc = str(p_sibling['desc']).replace('"', "'")
                    siblings_list.append(f"{p_sibling['pn']} ({safe_desc})")
                    siblings_pns.append(p_sibling['pn'])

            siblings_str = "; ".join(siblings_list[:12])
            if len(siblings_list) > 12:
                siblings_str += f"... and {len(siblings_list) - 12} more"

            lookup_key = normalize_pn(pn)

            part_context_db[lookup_key] = {
                'pn': pn,
                'description': desc,
                'assembly': assembly_name,
                'siblings': siblings_str,
                'siblings_list': siblings_pns
            }
            part_context_db[pn] = part_context_db[lookup_key]

    print(f"  Built context for {len(part_context_db) // 2} unique parts")

    return filename_to_pn, part_context_db


# Load databases
filename_to_pn, part_context_db = load_context_databases()
print("\n‚úÖ Context databases loaded successfully!")

In [None]:
# ============================================================
# CELL 3C: Initialize OCR Engine (PaddleOCR)
# ============================================================
import sys
import subprocess

print("Loading OCR Engine...")

# Fix langchain dependency issue BEFORE importing PaddleOCR
print("  Installing langchain dependencies...")
subprocess.run([sys.executable, "-m", "pip", "install", "-q", "langchain", "langchain-community"], 
               capture_output=True)

ocr_engine = None

try:
    from paddleocr import PaddleOCR
    ocr_engine = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
    print("‚úÖ OCR Engine Ready!")

except RuntimeError as e:
    if "PDX has already been initialized" in str(e):
        print("\n" + "="*60)
        print("üõë RUNTIME RESTART REQUIRED")
        print("="*60)
        print("The PaddleOCR library cannot be re-initialized in the same session.")
        print("Please go to the menu bar:")
        print("   Runtime > Restart session")
        print("Then run the cells again.")
    else:
        print(f"‚ùå Runtime Error: {e}")
except Exception as e:
    print(f"‚ùå Error: {e}")

def get_drawing_text_ocr(image_input):
    """
    Runs robust OCR on the drawing and returns a clean list of found text.
    """
    if ocr_engine is None:
        print("‚ö†Ô∏è OCR Engine not initialized. Please restart runtime.")
        return []

    try:
        result = ocr_engine.ocr(image_input, cls=True)
        text_set = set()

        if result and result[0]:
            for line in result[0]:
                text_content = line[1][0]
                confidence = line[1][1]
                if confidence > 0.85:
                    text_set.add(text_content.strip())

        return sorted(list(text_set))
    except Exception as e:
        print(f"‚ö†Ô∏è OCR Warning: {e}")
        return []

In [None]:
# ============================================================
# CELL 3D: LangChain Dependencies (Backup Install)
# ============================================================
# This cell is a backup in case cell 3C didn't install langchain properly
!pip install -q langchain langchain-community
print("‚úÖ LangChain dependencies installed!")

In [None]:
# ============================================================
# CELL 3E: Load RAG Index & Visual Database
# ============================================================
from sentence_transformers import SentenceTransformer
from google.colab import files
import numpy as np
import zipfile
import shutil

# Initialize globals
rag_data = []
rag_embeddings = None
rag_available = False
RAG_IMAGE_DIR = None

print("="*60)
print("RAG SYSTEM SETUP")
print("="*60)

# STEP 1: Load CLIP Model
print("\n[STEP 1/3] Loading CLIP model for semantic search...")
search_model = SentenceTransformer('clip-ViT-B-32')
print("  CLIP model loaded!")

# STEP 2: Load or Upload RAG Index (.pkl file)
print("\n[STEP 2/3] Loading RAG Index...")

index_loaded = False

if 'RAG_INDEX_PATH' in dir() and RAG_INDEX_PATH and os.path.exists(RAG_INDEX_PATH):
    print(f"  Found existing index: {RAG_INDEX_PATH}")
    with open(RAG_INDEX_PATH, 'rb') as f:
        rag_data = pickle.load(f)
    index_loaded = True
else:
    print("  No RAG index found.")
    print("\n  >> Please upload your RAG index file (asme_visual_index.pkl):")

    try:
        uploaded_index = files.upload()

        for filename in uploaded_index:
            if filename.endswith('.pkl'):
                RAG_INDEX_PATH = os.path.abspath(filename)
                with open(RAG_INDEX_PATH, 'rb') as f:
                    rag_data = pickle.load(f)
                index_loaded = True
                print(f"\n  Loaded index: {filename} ({len(rag_data)} entries)")
                break

        if not index_loaded:
            print("  WARNING: No .pkl file was uploaded!")
    except Exception as e:
        print(f"  Upload error: {e}")

# STEP 3: Load or Upload RAG Visual Database (images folder)
print("\n[STEP 3/3] Setting up RAG Visual Database (ASME page images)...")

existing_locations = [
    "/content/rag_visual_db",
    "/content/rag_data/rag_visual_db",
    "/content/data/rag_visual_db",
    "rag_visual_db",
    "rag_data/rag_visual_db",
]

if 'DATA_DIR' in dir() and DATA_DIR:
    existing_locations.insert(0, os.path.join(DATA_DIR, "rag_visual_db"))

found_images = False
for loc in existing_locations:
    if loc and os.path.exists(loc) and os.path.isdir(loc):
        img_count = len([f for f in os.listdir(loc) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        if img_count > 0:
            RAG_IMAGE_DIR = os.path.abspath(loc)
            found_images = True
            print(f"  Found existing images: {RAG_IMAGE_DIR} ({img_count} files)")
            break

if not found_images:
    print("  No existing image database found.")
    print("\n  >> Please upload your RAG visual database as a ZIP file:")

    try:
        uploaded_zip = files.upload()

        for filename in uploaded_zip:
            if filename.lower().endswith('.zip'):
                RAG_IMAGE_DIR = "/content/rag_visual_db"
                if os.path.exists(RAG_IMAGE_DIR):
                    shutil.rmtree(RAG_IMAGE_DIR)
                os.makedirs(RAG_IMAGE_DIR, exist_ok=True)

                print(f"\n  Extracting {filename}...")
                with zipfile.ZipFile(filename, 'r') as zf:
                    zf.extractall(RAG_IMAGE_DIR)

                all_images = []
                for root, dirs, fls in os.walk(RAG_IMAGE_DIR):
                    for f in fls:
                        if f.lower().endswith(('.png', '.jpg', '.jpeg')):
                            all_images.append(os.path.join(root, f))

                print(f"  Extracted {len(all_images)} images")

                if all_images:
                    common_dir = os.path.commonpath(all_images)
                    if os.path.isdir(common_dir) and common_dir != RAG_IMAGE_DIR:
                        RAG_IMAGE_DIR = common_dir

                found_images = True
                break

        if not found_images:
            print("  WARNING: No ZIP file uploaded. RAG retrieval will not work.")
            RAG_IMAGE_DIR = "/content/rag_visual_db"

    except Exception as e:
        print(f"  Upload error: {e}")
        RAG_IMAGE_DIR = "/content/rag_visual_db"

# FINALIZE: Build search index
print("\n" + "="*60)

if index_loaded and isinstance(rag_data, list) and len(rag_data) > 0:
    print("Optimizing search index...")
    embeddings_list = [item['embedding'] for item in rag_data]
    rag_embeddings = np.array(embeddings_list).astype('float32')
    rag_available = True

    print("\n‚úÖ RAG SYSTEM STATUS: READY")
    print(f"  Index entries:    {len(rag_data)}")
    print(f"  Image directory:  {RAG_IMAGE_DIR}")
    print(f"  Images exist:     {os.path.exists(RAG_IMAGE_DIR) if RAG_IMAGE_DIR else False}")
else:
    print("‚ùå RAG SYSTEM STATUS: NOT READY")
    print("  Index not loaded. Please re-run this cell and upload the .pkl file.")

print("="*60)

## 4. Helper Functions

In [None]:
# ============================================================
# CELL 4A: Core Helper Functions
# ============================================================

def extract_filename_key(filepath):
    """Extract the filename key for lookup from a full path."""
    filename = os.path.basename(filepath)
    name_no_ext = os.path.splitext(filename)[0]
    name_cleaned = re.sub(r'[\s_]*(Paint|PAINT)$', '', name_no_ext, flags=re.IGNORECASE)
    return name_cleaned.strip()


def get_part_context(filepath):
    """Look up part context from filename. Returns (part_number, context_dict) or (None, None)."""
    filename_key = extract_filename_key(filepath)

    if filename_key in filename_to_pn:
        pn = filename_to_pn[filename_key]
        lookup_key = normalize_pn(pn)
        if lookup_key in part_context_db:
            return pn, part_context_db[lookup_key]

    for ext in ['.pdf', '.PDF']:
        key = filename_key + ext
        if key in filename_to_pn:
            pn = filename_to_pn[key]
            lookup_key = normalize_pn(pn)
            if lookup_key in part_context_db:
                return pn, part_context_db[lookup_key]

    return None, None


def build_context_string(pn, context):
    """Build the context string for the inspection prompt."""
    if context is None:
        return "CONTEXT: Unknown Part (General Syntax Check Only). No assembly context available."

    desc = context.get('description', 'Unknown')
    assembly = context.get('assembly', 'Unknown Assembly')
    siblings = context.get('siblings', 'None listed')

    context_str = f"""CONTEXT: This is Part {pn} ({desc}).
It belongs to the {assembly}.
It must assemble with these mating parts: {siblings}.
CRITICAL: Check for mating tolerances suitable for a {desc}."""

    return context_str


def pdf_to_image(pdf_path, dpi=150):
    """Convert first page of PDF to PIL Image."""
    pages = convert_from_path(pdf_path, dpi=dpi, first_page=1, last_page=1)
    if pages:
        return pages[0]
    return None


print("‚úÖ Core helper functions defined.")

In [None]:
# ============================================================
# CELL 4B: Model Query Function
# ============================================================

def query_model(messages, max_tokens=1024):
    """Send a query to Qwen2-VL and get response."""
    text = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    image_inputs, video_inputs = process_vision_info(messages)

    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=False
        )

    generated_ids = output_ids[:, inputs.input_ids.shape[1]:]
    response = processor.batch_decode(
        generated_ids,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]

    return response.strip()


print("‚úÖ Model query function defined.")

In [None]:
# ============================================================
# CELL 4C: RAG Retrieval Function
# ============================================================

def retrieve_asme_pages(keywords, top_k=2):
    """Retrieve relevant ASME standard pages based on GD&T keywords."""
    global RAG_IMAGE_DIR

    if not rag_available or rag_embeddings is None:
        print("  WARNING: RAG system not available for retrieval")
        return []

    if RAG_IMAGE_DIR is None:
        print("  WARNING: RAG_IMAGE_DIR not set. Run the RAG setup cell first.")
        return []

    try:
        query_vector = search_model.encode([keywords])
        scores = np.dot(query_vector, rag_embeddings.T).flatten()
        top_indices = np.argsort(scores)[-top_k:][::-1]

        retrieved_images = []
        print(f"  RAG Search: '{keywords[:50]}...'")

        for idx in top_indices:
            item = rag_data[idx]
            rel_path = item['path'].replace('\\', '/')

            paths_to_try = [
                os.path.join(RAG_IMAGE_DIR, rel_path),
                os.path.join(RAG_IMAGE_DIR, os.path.basename(rel_path)),
            ]

            path_parts = rel_path.split('/')
            if len(path_parts) > 1:
                paths_to_try.append(os.path.join(RAG_IMAGE_DIR, path_parts[-1]))
                if len(path_parts) > 2:
                    paths_to_try.append(os.path.join(RAG_IMAGE_DIR, path_parts[-2], path_parts[-1]))

            print(f"    - {os.path.basename(rel_path)} (Score: {scores[idx]:.3f})")

            image_loaded = False
            for try_path in paths_to_try:
                if os.path.exists(try_path):
                    try:
                        img = Image.open(try_path).convert('RGB')
                        retrieved_images.append(img)
                        image_loaded = True
                        break
                    except Exception as e:
                        print(f"      Error opening image: {e}")

            if not image_loaded:
                print(f"      Image not found in {RAG_IMAGE_DIR}")

        return retrieved_images

    except Exception as e:
        print(f"  RAG retrieval error: {e}")
        import traceback
        traceback.print_exc()
        return []

print("‚úÖ RAG retrieval function defined.")

In [None]:
# ============================================================
# CELL 4D: Production Pipeline Helpers (OCR + Tiling)
# ============================================================
print("‚öôÔ∏è Initializing Production Pipeline...")

# Reuse OCR Engine from cell 3C if available
if 'ocr_engine' not in dir() or ocr_engine is None:
    print("  OCR Engine not found, initializing...")
    try:
        from paddleocr import PaddleOCR
        ocr_engine = PaddleOCR(use_angle_cls=True, lang='en', show_log=False)
        print("  OCR Engine initialized.")
    except Exception as e:
        print(f"  OCR Engine warning: {e}")
        print("  OCR will not be available. Vision-only mode will be used.")
        ocr_engine = None
else:
    print("  OCR Engine already loaded.")

def render_pdf_page(pdf_path: str, dpi: int = 300) -> Image.Image:
    """Renders the first page of a PDF to a High-Res PIL Image using PyMuPDF."""
    try:
        doc = fitz.open(pdf_path)
        page = doc.load_page(0) 
        zoom = dpi / 72.0
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat, alpha=False)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        doc.close()
        return img
    except Exception as e:
        print(f"‚ùå Rendering Error: {e}")
        return None

def run_paddleocr(img: Image.Image) -> List[str]:
    """Runs PaddleOCR on the image and returns a sorted, unique list of text found."""
    if ocr_engine is None:
        print("‚ö†Ô∏è OCR Engine not available")
        return []
    
    img_np = np.array(img)
    result = ocr_engine.ocr(img_np, cls=True)
    
    texts = []
    if result and result[0]:
        for line in result[0]:
            text_content, confidence = line[1]
            if confidence > 0.6:
                clean_text = text_content.replace("√ò", "DIA ").strip()
                texts.append(clean_text)
    
    return sorted(list(set(texts)))

def make_overlapping_tiles(full_img: Image.Image) -> List[Tuple[str, Image.Image]]:
    """Splits the image into 4 overlapping quadrants for better resolution."""
    w, h = full_img.size
    tile_w, tile_h = w // 2, h // 2
    overlap = int(min(w, h) * 0.15)

    boxes = {
        "Top-Left": (0, 0, tile_w + overlap, tile_h + overlap),
        "Top-Right": (w - (tile_w + overlap), 0, w, tile_h + overlap),
        "Bottom-Left": (0, h - (tile_h + overlap), tile_w + overlap, h),
        "Bottom-Right": (w - (tile_w + overlap), h - (tile_h + overlap), w, h)
    }

    tiles = []
    for name, box in boxes.items():
        tiles.append((name, full_img.crop(box)))
    return tiles

print("‚úÖ Production Pipeline Helpers Loaded.")

## 5. Main Inspection Function

In [None]:
# ============================================================
# CELL 5A: Main Inspection Function (RAG + OCR Hybrid)
# ============================================================
import numpy as np

def inspect_drawing_rag(drawing_path, verbose=True):
    """
    Main inspection function for engineering drawings.
    Uses Vision + OCR + RAG + Chain-of-Thought for comprehensive analysis.
    """

    if verbose:
        print(f"\n{'='*60}")
        print(f"INSPECTING: {os.path.basename(drawing_path)}")
        print('='*60)

    # === PHASE 0: PRE-PROCESSING ===
    if verbose:
        print("\n[1/5] Identifying part...")

    pn, context = get_part_context(drawing_path)

    if not context:
        return {
            'result': 'FAIL',
            'part_number': None,
            'description': None,
            'details': 'Identity Unknown - Context logic failed.'
        }

    context_str = build_context_string(pn, context)

    if pn:
        if verbose:
            print(f"  Part Number: {pn}")
            print(f"  Description: {context.get('description', 'N/A')}")
            print(f"  Assembly: {context.get('assembly', 'N/A')}")
            if context.get('siblings'):
                print(f"  Mating Parts: {context.get('siblings', 'N/A')}")
    else:
        if verbose:
            print("  Part not found in database - general inspection only")

    if verbose:
        print("\n[2/5] Loading drawing & running OCR scan...")

    try:
        drawing_image = pdf_to_image(drawing_path)
        if drawing_image is None:
            return {
                'result': 'ERROR',
                'part_number': pn,
                'description': context.get('description') if context else None,
                'details': 'Failed to convert PDF to image'
            }
        if verbose:
            print(f"  Drawing loaded: {drawing_image.size}")
    except Exception as e:
        return {
            'result': 'ERROR',
            'part_number': pn,
            'description': context.get('description') if context else None,
            'details': f'Error loading PDF: {str(e)}'
        }

    # OCR EXTRACTION
    ocr_text_list = []
    ocr_text_block = ""

    try:
        ocr_input = np.array(drawing_image)
        ocr_text_list = get_drawing_text_ocr(ocr_input)
        ocr_text_block = "\n".join(ocr_text_list)

        if verbose:
            print(f"  OCR Found {len(ocr_text_list)} text elements: {ocr_text_list[:5]}...")
    except Exception as e:
        if verbose:
            print(f"  OCR Warning: {e} - Proceeding with vision-only mode")

    # === PHASE A: VISION + OCR EXTRACTION ===
    if verbose:
        print("\n[3/5] CoT Step 1: Extraction (Vision + OCR)...")

    if ocr_text_block:
        extraction_prompt = f"""You are an Expert Engineering Drawing Scanner.

I have run an automated OCR scan on this drawing. Here is the raw text found:
--- OCR DATA START ---
{ocr_text_block}
--- OCR DATA END ---

YOUR TASK:
Use the OCR Data to help you visually locate and confirm the features on the drawing image.
Extract the following strictly. If the OCR list contains the number, trust it.

1. **Thread Callouts** (e.g. 'M10x1.5', '1/4-20 UNC'). Look for these specifically in the OCR data.
2. **Bore/Hole Dimensions** (e.g. '√ò0.500', '√ò1.00').
3. **Material Note**.
4. **GD&T Symbols** (Vision only).

Output the clean list of features found."""
    else:
        extraction_prompt = """Scan this drawing and extract the exact text for:
1.  **Thread Callouts** (e.g., '1/4-20 UNC', 'M6x1.0').
2.  **Bore/Hole Dimensions** with tolerances (e.g., '√ò0.500 +0.001/-0.000').
3.  **Material Note**.
4.  **GD&T Symbols**.
List them exactly as written on the print. Do not analyze yet."""

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": drawing_image},
                {"type": "text", "text": extraction_prompt}
            ]
        }
    ]

    extraction_text = query_model(messages, max_tokens=512)
    messages.append(
        {"role": "assistant", "content": [{"type": "text", "text": extraction_text}]}
    )

    if verbose:
        print(f"  Extracted Features:\n{extraction_text[:300]}...")

    # === PHASE B: RAG RETRIEVAL ===
    if verbose:
        print("\n[4/5] Retrieving ASME reference pages...")

    asme_images = []

    if rag_available:
        if verbose:
            print("  Mode: Context-Aware Retrieval (using extracted features)")
        rag_query = extraction_text
        if len(rag_query) < 20:
            rag_query = "General Dimensioning Rules"
        asme_images = retrieve_asme_pages(rag_query, top_k=2)
    else:
        if verbose:
            print("  WARNING: RAG not available - proceeding without ASME references")

    if verbose:
        print(f"  Total ASME pages for audit: {len(asme_images)}")

    # === PHASE C: STRICT LOGIC COMPARATOR ===
    if verbose:
        print("\n[5/5] CoT Step 2: Strict Logic Audit...")

    mating_parts_str = "None specified"
    if context and context.get('siblings'):
        mating_parts_str = context.get('siblings')

    audit_prompt = f"""You are a Strict Logic Comparator.
Compare the REQUIREMENTS (Context) vs ACTUALS (Extracted Data).

1. REQUIREMENTS (Mating Parts):
{context_str}

2. ACTUALS (Found on Drawing):
{extraction_text}

STRICT RULES:
- You must verify if the **specific dimension** required by the mating part exists in the ACTUALS.
- If Mating Part is 'Screw 3/4-16' and ACTUALS contains ONLY 'M10', '√ò0.50' -> **FAIL** (Mismatch).
- If Mating Part is 'Bearing √ò0.75' and ACTUALS contains '√ò1.75' -> **FAIL** (Mismatch).
- If the feature is NOT in the ACTUALS list, report: 'CANNOT VERIFY - Feature not found in OCR/Vision data'.

**OUTPUT STRUCTURE (Follow EXACTLY):**

**Line 1:** PASS or FAIL (Overall result)

**Then provide:**

1. **Tier 1 (General)**: State clearly if Material, Title Block, and General Tolerances are PRESENT or MISSING.

2. **Tier 2 (GD&T Syntax)**: Comments on symbol formatting.

3. **Tier 3 (Assembly Fit Analysis):**
   - 'Mating Part [PN] -> [PASS/FAIL]: [Evidence from ACTUALS list]'

4. **Citations:** Reference ASME images if applicable.

5. **Recommendations**"""

    content_2 = []
    for img in asme_images:
        content_2.append({"type": "image", "image": img})
    content_2.append({"type": "text", "text": audit_prompt})

    messages.append({"role": "user", "content": content_2})
    audit_response = query_model(messages, max_tokens=1500)

    # Parse result
    first_line = audit_response.split('\n')[0].strip().upper()
    if 'PASS' in first_line and 'FAIL' not in first_line:
        result = 'PASS'
    elif 'FAIL' in first_line:
        result = 'FAIL'
    else:
        response_upper = audit_response.upper()
        if 'TIER 1 FAILURE' in response_upper or 'TIER 2 FAILURE' in response_upper or 'TIER 3 FAILURE' in response_upper:
            result = 'FAIL'
        elif '**FAIL**' in response_upper:
            result = 'FAIL'
        elif '**PASS**' in response_upper:
            result = 'PASS'
        else:
            result = 'REVIEW'

    if verbose:
        print(f"\n{'='*60}")
        print(f"RESULT: {result}")
        print('='*60)
        print(audit_response)

    return {
        'result': result,
        'part_number': pn,
        'description': context.get('description') if context else None,
        'assembly': context.get('assembly') if context else None,
        'mating_parts': mating_parts_str,
        'gdt_symbols': extraction_text,
        'ocr_text_count': len(ocr_text_list),
        'asme_pages_used': len(asme_images),
        'details': audit_response
    }

print("‚úÖ inspect_drawing_rag() function defined.")

In [None]:
# ============================================================
# CELL 5B: Production Inspection Function (High-Res Tiling)
# ============================================================

def inspect_drawing_production(pdf_path, context_str=None):
    """
    Production-grade inspection using OCR + High-Res Tiling.
    
    Uses:
    1. Deterministic OCR (PaddleOCR) to read text before the LLM sees it
    2. Dynamic Tiling (4 quadrants) to improve resolution
    3. PyMuPDF (fitz) for high-speed 300 DPI rendering
    """
    print(f"\n{'='*60}\nINSPECTING (Production): {pdf_path}\n{'='*60}")
    
    # Auto-generate context if not provided
    if context_str is None:
        pn, ctx = get_part_context(pdf_path)
        if ctx:
            context_str = ctx.get('siblings', 'No mating parts specified')
            print(f"  Part: {pn} ({ctx.get('description', 'N/A')})")
        else:
            context_str = "Unknown part - general inspection only"
            print("  Part not found in database")
    
    # --- Phase A: Perception ---
    print("[1/4] Rendering High-Res Image...")
    full_img = render_pdf_page(pdf_path, dpi=300)
    if not full_img:
        return "FAIL: Image Rendering Failed"
    print(f"  Image size: {full_img.size}")
    
    print("[2/4] Extracting Deterministic OCR Evidence...")
    ocr_texts = run_paddleocr(full_img)
    ocr_block = "\n".join([f"- {t}" for t in ocr_texts[:80]]) 
    print(f"  > OCR Found {len(ocr_texts)} text elements.")

    print("[3/4] Generating High-Res Tiles...")
    tiles = make_overlapping_tiles(full_img)
    print(f"  > Generated {len(tiles)} tiles")

    # --- Phase B: Reasoning ---
    print("[4/4] Running Strict Logic Inference (Qwen2-VL-72B)...")
    
    system_prompt = """
    You are a Senior Quality Control Engineer.
    
    **CORE PROTOCOL:**
    1. **OCR IS AUTHORITY:** The 'OCR EVIDENCE' list is the ground truth for text.
    2. **VISUAL VERIFICATION:** Use the 'TILES' to visually confirm geometry.
    3. **STRICT COMPARISON:** Compare the 'MATING HYPOTHESIS' against the 'OCR EVIDENCE'.
    
    **FAILURE RULES:**
    - If Hypothesis needs '3/4-16' and Evidence says 'M10', output **FAIL**.
    - If Evidence is missing for a specific mating part, output **CANNOT VERIFY**.
    - Do NOT hallucinate a fit. Mismatches must be flagged.
    """

    user_text = f"""
    **PART 1: OCR EVIDENCE (FACTS)**
    {ocr_block}

    **PART 2: MATING HYPOTHESIS (REQUIREMENTS)**
    {context_str}

    **TASK:**
    For each Mating Part in the Hypothesis:
    1. SEARCH the OCR list and Tiles for the matching feature.
    2. COMPARE the dimensions/threads strictly.
    3. REPORT: 'Mating Part [Name] -> [PASS/FAIL]: [Evidence]'
    """

    # Build the Multi-Image Payload
    content_payload = []
    content_payload.append({'type': 'image', 'image': full_img})
    content_payload.append({'type': 'text', 'text': "FULL DRAWING VIEW"})
    
    for name, tile in tiles:
        content_payload.append({'type': 'image', 'image': tile})
        content_payload.append({'type': 'text', 'text': f"ZOOMED TILE: {name}"})
    
    content_payload.append({'type': 'text', 'text': user_text})

    messages = [
        {'role': 'system', 'content': system_prompt},
        {'role': 'user', 'content': content_payload}
    ]

    # Inference
    text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs = [full_img] + [t[1] for t in tiles]
    
    inputs = processor(
        text=[text_input],
        images=image_inputs,
        return_tensors="pt",
        padding=True
    ).to(model.device)

    generated_ids = model.generate(**inputs, max_new_tokens=1000)
    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    response = output_text.split("assistant\n")[-1] if "assistant\n" in output_text else output_text
    
    print(f"\n{'='*60}")
    print("RESULT:")
    print('='*60)
    print(response)
    
    return response

print("‚úÖ inspect_drawing_production() function defined.")

## 6. Batch Inspection Function

In [None]:
# ============================================================
# CELL 6: Batch Inspection Function
# ============================================================

def inspect_batch(drawing_folder, output_file="inspection_results.json", limit=None):
    """Inspect all PDF drawings in a folder."""
    import glob
    from tqdm.notebook import tqdm

    pdf_files = glob.glob(os.path.join(drawing_folder, "**/*.pdf"), recursive=True)
    pdf_files += glob.glob(os.path.join(drawing_folder, "**/*.PDF"), recursive=True)
    pdf_files = list(set(pdf_files))

    if limit:
        pdf_files = pdf_files[:limit]

    print(f"Found {len(pdf_files)} PDF files to inspect")

    results = []
    pass_count = 0
    fail_count = 0
    error_count = 0

    for pdf_path in tqdm(pdf_files, desc="Inspecting"):
        try:
            result = inspect_drawing_rag(pdf_path, verbose=False)
            result['file'] = os.path.basename(pdf_path)
            results.append(result)

            if result['result'] == 'PASS':
                pass_count += 1
            elif result['result'] == 'FAIL':
                fail_count += 1
            else:
                error_count += 1

        except Exception as e:
            results.append({
                'file': os.path.basename(pdf_path),
                'result': 'ERROR',
                'details': str(e)
            })
            error_count += 1

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"\n{'='*60}")
    print("BATCH INSPECTION SUMMARY")
    print('='*60)
    print(f"Total inspected: {len(results)}")
    print(f"PASS: {pass_count} ({100*pass_count/len(results):.1f}%)")
    print(f"FAIL: {fail_count} ({100*fail_count/len(results):.1f}%)")
    print(f"ERROR/REVIEW: {error_count} ({100*error_count/len(results):.1f}%)")
    print(f"\nResults saved to: {output_file}")

    return results


print("‚úÖ Batch inspection function defined.")

## 7. Test the Inspector

In [None]:
# ============================================================
# CELL 7A: Single File Test (Upload & Inspect)
# ============================================================
from google.colab import files

print("Upload a PDF drawing to inspect:")
uploaded = files.upload()

if uploaded:
    test_drawing = list(uploaded.keys())[0]
    print(f"Inspecting {test_drawing}...")
    result = inspect_drawing_rag(test_drawing, verbose=True)
else:
    print("No file uploaded.")

In [None]:
# ============================================================
# CELL 7B: OCR Libraries Install (Backup)
# ============================================================
# Run this if OCR is not working properly
!pip install -q paddlepaddle
!pip install -q "paddleocr>=2.0.1"
!pip install -q opencv-python-headless

print("‚úÖ OCR Libraries Installed!")

In [None]:
# ============================================================
# CELL 7C: Verify RAG Database Location
# ============================================================
import os
import glob

rag_db_path = os.path.join(DATA_DIR, "rag_visual_db")

print(f"Current DATA_DIR: {DATA_DIR}")
print(f"Code looks for RAG images at: {rag_db_path}")

if os.path.exists(rag_db_path):
    print("‚úÖ Folder exists.")
    images = glob.glob(os.path.join(rag_db_path, "**", "*.png"), recursive=True)
    images += glob.glob(os.path.join(rag_db_path, "**", "*.jpg"), recursive=True)
    print(f"‚úÖ Found {len(images)} images in database.")
else:
    print("‚ùå Folder NOT found.")

In [None]:
# ============================================================
# CELL 7D: Batch Test (Upload ZIP & Inspect All)
# ============================================================
from google.colab import files
import shutil
import zipfile
import glob

print("Upload a ZIP file containing PDF drawings for batch inspection:")
uploaded = files.upload()

if uploaded:
    zip_filename = next((f for f in uploaded if f.lower().endswith('.zip')), None)

    if zip_filename:
        batch_dir = "batch_drawings"
        if os.path.exists(batch_dir):
            shutil.rmtree(batch_dir)
        os.makedirs(batch_dir, exist_ok=True)

        print(f"Extracting {zip_filename} to {batch_dir}...")
        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall(batch_dir)

        pdfs = glob.glob(os.path.join(batch_dir, "**/*.pdf"), recursive=True)
        print(f"Found {len(pdfs)} PDFs in archive.")

        print(f"Running batch inspection...")
        results = inspect_batch(
            drawing_folder=batch_dir,
            output_file="inspection_results.json"
        )
    else:
        print("No .zip file found in upload.")
else:
    print("No files uploaded.")

## 8. View Failed Inspections

In [None]:
# ============================================================
# CELL 8: View Failed Inspections
# ============================================================

def show_failures(results):
    """Display details of failed inspections."""
    failures = [r for r in results if r.get('result') == 'FAIL']

    print(f"\nFAILED INSPECTIONS: {len(failures)}")
    print('='*60)

    for i, fail in enumerate(failures, 1):
        print(f"\n[{i}] {fail.get('file', 'Unknown')}")
        print(f"    Part: {fail.get('part_number', 'N/A')} - {fail.get('description', 'N/A')}")
        print(f"    Assembly: {fail.get('assembly', 'N/A')}")
        print(f"    GD&T Found: {fail.get('gdt_symbols', 'N/A')}")
        print(f"    Details: {fail.get('details', 'N/A')[:500]}...")


# Usage:
# show_failures(results)
print("‚úÖ show_failures() function defined.")