# AI Engineering Drawing Inspector (Final Version)

A context-aware GD&T checker that uses:
- Part context from BOM structure
- RAG retrieval from ASME Y14.5 standard
- Qwen2-VL for visual inspection

## 1. Setup & Installation

In [None]:
# ============================================================
# CELL 1A: Install Dependencies
# ============================================================
!pip install -q transformers accelerate
!pip install -q qwen-vl-utils
!pip install -q pdf2image
!pip install -q faiss-cpu sentence-transformers
!pip install -q bitsandbytes
!apt-get install -y poppler-utils > /dev/null 2>&1

# Production Pipeline Dependencies
!pip install -q pymupdf opencv-python-headless

# Tesseract OCR (replaces PaddleOCR - more stable)
!sudo apt-get install -y tesseract-ocr > /dev/null 2>&1
!pip install -q pytesseract

print("‚úÖ All packages installed!")

In [None]:
# ============================================================
# CELL 1B: Import Libraries
# ============================================================
import os
import json
import re
import pickle
import torch
from pathlib import Path
from pdf2image import convert_from_path
from PIL import Image
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info

# Production Pipeline Imports
import fitz  # PyMuPDF
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple, Dict, Any

# Tesseract OCR
import pytesseract

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Load Model (Qwen2-VL)

In [None]:
# ============================================================
# CELL 2: Load Qwen2-VL Model (4-bit Quantized)
# ============================================================
import torch
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig

MODEL_ID = "Qwen/Qwen2-VL-72B-Instruct"

print(f"Loading {MODEL_ID} in 4-bit (NF4)...")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = Qwen2VLForConditionalGeneration.from_pretrained(
    MODEL_ID,
    device_map="auto",
    quantization_config=bnb_config,
    attn_implementation="sdpa",
    trust_remote_code=True
)

processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)

print("‚úÖ Qwen2-VL-72B (4-bit) Loaded Successfully!")
print(f"Memory Footprint: {model.get_memory_footprint() / 1e9:.2f} GB")

## 3. Load Context Databases

In [None]:
# ============================================================
# CELL 3A: Upload Configuration Files
# ============================================================
import os
import zipfile
from google.colab import files

MAPPING_FILE = "400S_file_part_mapping.json"
STRUCTURE_FILE = "400S_detailed_structure_fixed.json"
RAG_INDEX_FILE = "asme_visual_index.pkl"

print("="*60)
print("STEP 1: Upload Configuration Files")
print("="*60)

def locate_file(filename):
    if os.path.exists(filename):
        return os.path.abspath(filename)
    nested_path = os.path.join("rag_data", filename)
    if os.path.exists(nested_path):
        return os.path.abspath(nested_path)
    return None

FILE_MAPPING_PATH = locate_file(MAPPING_FILE)
STRUCTURE_PATH = locate_file(STRUCTURE_FILE)
RAG_INDEX_PATH = locate_file(RAG_INDEX_FILE)

missing_files = []
if not FILE_MAPPING_PATH:
    missing_files.append(MAPPING_FILE)
if not STRUCTURE_PATH:
    missing_files.append(STRUCTURE_FILE)
if not RAG_INDEX_PATH:
    missing_files.append(RAG_INDEX_FILE)

if missing_files:
    print(f"\nMissing files: {', '.join(missing_files)}")
    print("\nPlease upload the required files (or a ZIP containing them):")
    uploaded = files.upload()

    for filename in uploaded:
        if filename.lower().endswith('.zip'):
            print(f"\nExtracting {filename}...")
            with zipfile.ZipFile(filename, 'r') as zip_ref:
                zip_ref.extractall("rag_data")
            print("Extraction complete.")
            break

    FILE_MAPPING_PATH = locate_file(MAPPING_FILE) or os.path.abspath(MAPPING_FILE)
    STRUCTURE_PATH = locate_file(STRUCTURE_FILE) or os.path.abspath(STRUCTURE_FILE)
    RAG_INDEX_PATH = locate_file(RAG_INDEX_FILE)

if FILE_MAPPING_PATH:
    DATA_DIR = os.path.dirname(FILE_MAPPING_PATH)
else:
    DATA_DIR = "/content"

print("\n" + "="*60)
print("FILE STATUS:")
print("="*60)
print(f"File Mapping:  {'‚úÖ OK' if FILE_MAPPING_PATH and os.path.exists(FILE_MAPPING_PATH) else '‚ùå MISSING'}")
print(f"Structure:     {'‚úÖ OK' if STRUCTURE_PATH and os.path.exists(STRUCTURE_PATH) else '‚ùå MISSING'}")
print(f"RAG Index:     {'‚úÖ OK' if RAG_INDEX_PATH and os.path.exists(RAG_INDEX_PATH) else '‚ö†Ô∏è MISSING'}")
print(f"\nData directory: {DATA_DIR}")

In [None]:
# ============================================================
# CELL 3B: Load Part Context Databases
# ============================================================

def normalize_pn(pn):
    """Normalize part number for lookup."""
    return re.sub(r'[-\s]', '', str(pn)).lower()

def load_context_databases():
    """Load and build all context databases."""
    print("Loading file mapping...")
    with open(FILE_MAPPING_PATH, 'r') as f:
        file_mapping_list = json.load(f)

    filename_to_pn = {}
    for entry in file_mapping_list:
        filename = entry['file']
        pn = entry['pn']
        if pn:
            filename_to_pn[filename] = pn
            filename_to_pn[filename + '.pdf'] = pn
            filename_to_pn[filename + '.PDF'] = pn

    print(f"  Loaded {len(file_mapping_list)} file mappings")

    print("Loading part structure...")
    with open(STRUCTURE_PATH, 'r') as f:
        structure_data = json.load(f)

    print("Building part context database...")
    part_context_db = {}

    for assembly_name, parts_list in structure_data.items():
        for part in parts_list:
            pn = part['pn']
            desc = part['desc']

            siblings_list = []
            siblings_pns = []

            for p_sibling in parts_list:
                if p_sibling['pn'] != pn:
                    safe_desc = str(p_sibling['desc']).replace('"', "'")
                    siblings_list.append(f"{p_sibling['pn']} ({safe_desc})")
                    siblings_pns.append(p_sibling['pn'])

            siblings_str = "; ".join(siblings_list[:12])
            if len(siblings_list) > 12:
                siblings_str += f"... and {len(siblings_list) - 12} more"

            lookup_key = normalize_pn(pn)

            part_context_db[lookup_key] = {
                'pn': pn,
                'description': desc,
                'assembly': assembly_name,
                'siblings': siblings_str,
                'siblings_list': siblings_pns
            }
            part_context_db[pn] = part_context_db[lookup_key]

    print(f"  Built context for {len(part_context_db) // 2} unique parts")
    return filename_to_pn, part_context_db

filename_to_pn, part_context_db = load_context_databases()
print("\n‚úÖ Context databases loaded successfully!")

In [None]:
# ============================================================
# CELL 3C-PREP: Verify Tesseract Installation
# ============================================================
import shutil

# Check if Tesseract is installed
tesseract_path = shutil.which("tesseract")
if tesseract_path:
    print(f"‚úÖ Tesseract found: {tesseract_path}")
else:
    print("‚ö†Ô∏è Tesseract not found. Installing...")
    !sudo apt-get install -y tesseract-ocr
    print("‚úÖ Tesseract installed!")

In [None]:
# ============================================================
# CELL 3C: Initialize Tesseract OCR
# ============================================================
import pytesseract
from PIL import Image
import re

print("Loading Tesseract OCR Engine...")

def get_drawing_text_ocr(image_input):
    """
    Runs Tesseract OCR on the drawing and returns a clean list of found text.
    Uses PSM 11 (Sparse Text) mode which is best for engineering drawings.
    
    Args:
        image_input: PIL Image or numpy array
        
    Returns:
        List of unique text strings found
    """
    try:
        # Convert numpy array to PIL Image if needed
        if isinstance(image_input, np.ndarray):
            img = Image.fromarray(image_input)
        else:
            img = image_input
        
        # Run Tesseract with sparse text mode (PSM 11)
        # PSM 11: Sparse text - Find as much text as possible in no particular order
        raw_text = pytesseract.image_to_string(img, config='--psm 11')
        
        # Split by newlines and clean
        lines = raw_text.split('\n')
        
        text_set = set()
        for line in lines:
            # Strip whitespace
            cleaned = line.strip()
            
            # Skip empty lines
            if not cleaned:
                continue
            
            # Skip garbage (less than 2 alphanumeric characters)
            alphanumeric_count = sum(1 for c in cleaned if c.isalnum())
            if alphanumeric_count < 2:
                continue
            
            # Normalize engineering symbols
            cleaned = cleaned.replace("√ò", "DIA ")
            cleaned = cleaned.replace("√∏", "DIA ")
            
            text_set.add(cleaned)
        
        return sorted(list(text_set))
    
    except Exception as e:
        print(f"‚ö†Ô∏è OCR Warning: {e}")
        return []

print("‚úÖ Tesseract OCR Engine Ready!")

In [None]:
# ============================================================
# CELL 3E: Load RAG Index & Visual Database
# ============================================================
import os
import pickle
from sentence_transformers import SentenceTransformer
import numpy as np

rag_data = []
rag_embeddings = None
rag_available = False
RAG_IMAGE_DIR = None

print("="*60)
print("RAG SYSTEM SETUP")
print("="*60)

print("\n[STEP 1/3] Loading CLIP model...")
search_model = SentenceTransformer('clip-ViT-B-32')
print("  ‚úÖ CLIP model loaded!")

print("\n[STEP 2/3] Loading RAG Index...")
index_loaded = False

# Check multiple locations for the index file
index_locations = [
    "/content/asme_visual_index.pkl",
    "/content/rag_data/asme_visual_index.pkl",
    "asme_visual_index.pkl",
]
if 'RAG_INDEX_PATH' in dir() and RAG_INDEX_PATH:
    index_locations.insert(0, RAG_INDEX_PATH)

for idx_path in index_locations:
    if idx_path and os.path.exists(idx_path):
        print(f"  ‚úÖ Found: {idx_path}")
        with open(idx_path, 'rb') as f:
            rag_data = pickle.load(f)
        RAG_INDEX_PATH = idx_path
        index_loaded = True
        break

if not index_loaded:
    print("  ‚ùå No index found. Please upload asme_visual_index.pkl:")
    from google.colab import files
    try:
        uploaded = files.upload()
        for filename in uploaded:
            if filename.endswith('.pkl'):
                with open(filename, 'rb') as f:
                    rag_data = pickle.load(f)
                index_loaded = True
                break
    except:
        pass

print("\n[STEP 3/3] Looking for RAG Visual Database...")

# Check multiple locations for the image folder
image_locations = [
    "/content/rag_visual_db",
    "/content/rag_data/rag_visual_db",
    "rag_visual_db",
]
if 'DATA_DIR' in dir() and DATA_DIR:
    image_locations.insert(0, os.path.join(DATA_DIR, "rag_visual_db"))

found_images = False
for loc in image_locations:
    if loc and os.path.exists(loc) and os.path.isdir(loc):
        # Count images
        img_files = [f for f in os.listdir(loc) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        if len(img_files) > 0:
            RAG_IMAGE_DIR = os.path.abspath(loc)
            found_images = True
            print(f"  ‚úÖ Found: {RAG_IMAGE_DIR} ({len(img_files)} images)")
            break

if not found_images:
    print("  ‚ùå No images found. Please upload rag_visual_db.zip:")
    from google.colab import files
    import zipfile, shutil
    try:
        uploaded = files.upload()
        for filename in uploaded:
            if filename.lower().endswith('.zip'):
                RAG_IMAGE_DIR = "/content/rag_visual_db"
                if os.path.exists(RAG_IMAGE_DIR):
                    shutil.rmtree(RAG_IMAGE_DIR)
                os.makedirs(RAG_IMAGE_DIR, exist_ok=True)
                with zipfile.ZipFile(filename, 'r') as zf:
                    zf.extractall(RAG_IMAGE_DIR)
                found_images = True
                print(f"  ‚úÖ Extracted to {RAG_IMAGE_DIR}")
                break
    except:
        RAG_IMAGE_DIR = "/content/rag_visual_db"

# Build search index
print("\n" + "="*60)
if index_loaded and len(rag_data) > 0:
    embeddings_list = [item['embedding'] for item in rag_data]
    rag_embeddings = np.array(embeddings_list).astype('float32')
    rag_available = True
    print("‚úÖ RAG SYSTEM: READY")
    print(f"  Index: {len(rag_data)} entries")
    print(f"  Images: {RAG_IMAGE_DIR}")
else:
    print("‚ùå RAG SYSTEM: NOT READY")
print("="*60)

## 4. Helper Functions

In [None]:
# ============================================================
# CELL 4A: Core Helper Functions
# ============================================================
import os
import re
from pdf2image import convert_from_path

def extract_filename_key(filepath):
    """Extract filename key for lookup."""
    filename = os.path.basename(filepath)
    name_no_ext = os.path.splitext(filename)[0]
    name_no_ext = re.sub(r'\s*\(\d+\)$', '', name_no_ext)  # Remove (1), (2) etc
    name_cleaned = re.sub(r'[\s_]*(Paint|PAINT)$', '', name_no_ext, flags=re.IGNORECASE)
    return name_cleaned.strip()

def get_part_context(filepath):
    """Look up part context from filename."""
    filename_key = extract_filename_key(filepath)

    if filename_key in filename_to_pn:
        pn = filename_to_pn[filename_key]
        lookup_key = normalize_pn(pn)
        if lookup_key in part_context_db:
            return pn, part_context_db[lookup_key]

    for ext in ['.pdf', '.PDF']:
        key = filename_key + ext
        if key in filename_to_pn:
            pn = filename_to_pn[key]
            lookup_key = normalize_pn(pn)
            if lookup_key in part_context_db:
                return pn, part_context_db[lookup_key]

    return None, None

def build_context_string(pn, context):
    """Build the context string for inspection prompt."""
    if context is None:
        return "CONTEXT: Unknown Part (General Syntax Check Only)."

    desc = context.get('description', 'Unknown')
    assembly = context.get('assembly', 'Unknown Assembly')
    siblings = context.get('siblings', 'None listed')

    return f"""CONTEXT: This is Part {pn} ({desc}).
It belongs to the {assembly}.
It must assemble with these mating parts: {siblings}.
CRITICAL: Check for mating tolerances suitable for a {desc}."""

def pdf_to_image(pdf_path, dpi=150):
    """Convert first page of PDF to PIL Image."""
    pages = convert_from_path(pdf_path, dpi=dpi, first_page=1, last_page=1)
    return pages[0] if pages else None

print("‚úÖ Core helper functions defined.")

In [None]:
# ============================================================
# CELL 4B: Model Query Function
# ============================================================
import torch
from qwen_vl_utils import process_vision_info

def query_model(messages, max_tokens=1024):
    """Send a query to Qwen2-VL and get response."""
    if 'model' not in globals() or 'processor' not in globals():
        raise RuntimeError("‚ö†Ô∏è Model not loaded. Run Cell 2 first.")

    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)

    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=False)

    generated_ids = output_ids[:, inputs.input_ids.shape[1]:]
    response = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    return response.strip()

print("‚úÖ Model query function defined.")

In [None]:
# ============================================================
# CELL 4C: RAG Retrieval Function
# ============================================================

def retrieve_asme_pages(keywords, top_k=2):
    """Retrieve relevant ASME standard pages based on keywords."""
    global RAG_IMAGE_DIR

    if not rag_available or rag_embeddings is None:
        print("  ‚ö†Ô∏è RAG system not available")
        return []

    if RAG_IMAGE_DIR is None:
        print("  ‚ö†Ô∏è RAG_IMAGE_DIR not set")
        return []

    try:
        query_vector = search_model.encode([keywords])
        scores = np.dot(query_vector, rag_embeddings.T).flatten()
        top_indices = np.argsort(scores)[-top_k:][::-1]

        retrieved_images = []
        print(f"  RAG Search: '{keywords[:50]}...'")

        for idx in top_indices:
            item = rag_data[idx]
            rel_path = item['path'].replace('\\', '/')

            paths_to_try = [
                os.path.join(RAG_IMAGE_DIR, rel_path),
                os.path.join(RAG_IMAGE_DIR, os.path.basename(rel_path)),
            ]

            path_parts = rel_path.split('/')
            if len(path_parts) > 1:
                paths_to_try.append(os.path.join(RAG_IMAGE_DIR, path_parts[-1]))

            print(f"    - {os.path.basename(rel_path)} (Score: {scores[idx]:.3f})")

            for try_path in paths_to_try:
                if os.path.exists(try_path):
                    try:
                        img = Image.open(try_path).convert('RGB')
                        retrieved_images.append(img)
                        break
                    except Exception as e:
                        print(f"      Error: {e}")

        return retrieved_images

    except Exception as e:
        print(f"  RAG error: {e}")
        return []

print("‚úÖ RAG retrieval function defined.")

In [None]:
# ============================================================
# CELL 4D: Production Pipeline Helpers (Tesseract OCR + Tiling)
# ============================================================
import fitz  # PyMuPDF
from PIL import Image
import numpy as np
from typing import List, Tuple
import pytesseract

print("‚öôÔ∏è Initializing Production Pipeline...")

def render_pdf_page(pdf_path: str, dpi: int = 300) -> Image.Image:
    """Renders the first page of a PDF to a High-Res PIL Image using PyMuPDF."""
    try:
        doc = fitz.open(pdf_path)
        page = doc.load_page(0)
        zoom = dpi / 72.0
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat, alpha=False)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        doc.close()
        return img
    except Exception as e:
        print(f"‚ùå Rendering Error: {e}")
        return None

def run_tesseract_ocr(img: Image.Image) -> List[str]:
    """
    Runs Tesseract OCR on the image and returns a sorted, unique list of text found.
    Uses PSM 11 (Sparse Text) mode which is best for engineering drawings.
    Normalizes common engineering symbols (√ò -> DIA).
    
    Args:
        img: PIL Image
        
    Returns:
        List of unique text strings found, sorted alphabetically
    """
    try:
        # Run Tesseract with sparse text mode (PSM 11)
        raw_text = pytesseract.image_to_string(img, config='--psm 11')
        
        # Split by newlines and clean
        lines = raw_text.split('\n')
        
        texts = []
        for line in lines:
            # Strip whitespace
            cleaned = line.strip()
            
            # Skip empty lines
            if not cleaned:
                continue
            
            # Skip garbage (less than 2 alphanumeric characters)
            alphanumeric_count = sum(1 for c in cleaned if c.isalnum())
            if alphanumeric_count < 2:
                continue
            
            # Normalize engineering symbols
            cleaned = cleaned.replace("√ò", "DIA ")
            cleaned = cleaned.replace("√∏", "DIA ")
            
            texts.append(cleaned)
        
        # Deduplicate and sort
        return sorted(list(set(texts)))
    
    except Exception as e:
        print(f"‚ö†Ô∏è Tesseract OCR Error: {e}")
        return []

def make_overlapping_tiles(full_img: Image.Image) -> List[Tuple[str, Image.Image]]:
    """Splits the image into 4 overlapping quadrants for better resolution."""
    w, h = full_img.size
    tile_w, tile_h = w // 2, h // 2
    overlap = int(min(w, h) * 0.15)

    boxes = {
        "Top-Left": (0, 0, tile_w + overlap, tile_h + overlap),
        "Top-Right": (w - (tile_w + overlap), 0, w, tile_h + overlap),
        "Bottom-Left": (0, h - (tile_h + overlap), tile_w + overlap, h),
        "Bottom-Right": (w - (tile_w + overlap), h - (tile_h + overlap), w, h)
    }

    tiles = []
    for name, box in boxes.items():
        tiles.append((name, full_img.crop(box)))
    return tiles

print("‚úÖ Production Pipeline Helpers Loaded (Tesseract OCR).")

## 5. Main Inspection Function

In [None]:
# ============================================================
# CELL 5A: Main Inspection Function (RAG + OCR Hybrid)
# ============================================================
import numpy as np
import os

def inspect_drawing_rag(drawing_path, verbose=True):
    """Main inspection function using Vision + OCR + RAG."""

    if verbose:
        print(f"\n{'='*60}")
        print(f"INSPECTING: {os.path.basename(drawing_path)}")
        print('='*60)

    # Phase 0: Identify Part
    if verbose:
        print("\n[1/5] Identifying part...")

    pn, context = get_part_context(drawing_path)

    if not context:
        if verbose:
            print(f"  ‚ùå Could not find context for '{drawing_path}'")
        return {'result': 'FAIL', 'part_number': None, 'description': None, 'details': 'Identity Unknown'}

    context_str = build_context_string(pn, context)

    if pn and verbose:
        print(f"  Part: {pn}")
        print(f"  Description: {context.get('description', 'N/A')}")
        print(f"  Assembly: {context.get('assembly', 'N/A')}")

    # Phase 1: Load Image + OCR
    if verbose:
        print("\n[2/5] Loading drawing & OCR scan...")

    try:
        drawing_image = pdf_to_image(drawing_path)
        if drawing_image is None:
            return {'result': 'ERROR', 'part_number': pn, 'details': 'Failed to load PDF'}
        if verbose:
            print(f"  Drawing loaded: {drawing_image.size}")
    except Exception as e:
        return {'result': 'ERROR', 'part_number': pn, 'details': f'PDF Error: {e}'}

    ocr_text_list = []
    ocr_text_block = ""
    try:
        ocr_input = np.array(drawing_image)
        ocr_text_list = get_drawing_text_ocr(ocr_input)
        ocr_text_block = "\n".join(ocr_text_list)
        if verbose:
            print(f"  OCR Found {len(ocr_text_list)} elements: {ocr_text_list[:5]}...")
    except Exception as e:
        if verbose:
            print(f"  OCR Warning: {e}")

    # Phase 2: Vision + OCR Extraction
    if verbose:
        print("\n[3/5] CoT Step 1: Extraction...")

    if ocr_text_block:
        extraction_prompt = f"""You are an Expert Engineering Drawing Scanner.

OCR Data found:
--- OCR DATA ---
{ocr_text_block}
--- END ---

Extract: 1. Thread Callouts 2. Bore/Hole Dimensions 3. Material Note 4. GD&T Symbols
Trust the OCR data. Output a clean list."""
    else:
        extraction_prompt = """Scan this drawing and extract:
1. Thread Callouts (e.g., '1/4-20 UNC')
2. Bore/Hole Dimensions with tolerances
3. Material Note
4. GD&T Symbols
List them exactly as written."""

    messages = [{"role": "user", "content": [{"type": "image", "image": drawing_image}, {"type": "text", "text": extraction_prompt}]}]
    extraction_text = query_model(messages, max_tokens=512)
    messages.append({"role": "assistant", "content": [{"type": "text", "text": extraction_text}]})

    if verbose:
        print(f"  Extracted:\n{extraction_text[:300]}...")

    # Phase 3: RAG Retrieval
    if verbose:
        print("\n[4/5] Retrieving ASME references...")

    asme_images = []
    if rag_available:
        asme_images = retrieve_asme_pages(extraction_text, top_k=2)
    if verbose:
        print(f"  ASME pages: {len(asme_images)}")

    # Phase 4: Audit
    if verbose:
        print("\n[5/5] CoT Step 2: Audit...")

    mating_parts_str = context.get('siblings', 'None') if context else 'None'

    audit_prompt = f"""You are a Strict Logic Comparator.

REQUIREMENTS: {context_str}
ACTUALS: {extraction_text}

RULES:
- Verify dimensions match mating parts
- If Mating Part is '3/4-16' and ACTUALS shows 'M10' -> FAIL
- Missing features -> CANNOT VERIFY

OUTPUT: Line 1: PASS or FAIL
Then: Tier 1 (General), Tier 2 (GD&T), Tier 3 (Assembly Fit), Recommendations"""

    content_2 = [{"type": "image", "image": img} for img in asme_images]
    content_2.append({"type": "text", "text": audit_prompt})
    messages.append({"role": "user", "content": content_2})

    audit_response = query_model(messages, max_tokens=1500)

    # Parse result
    first_line = audit_response.split('\n')[0].upper()
    if 'PASS' in first_line and 'FAIL' not in first_line:
        result = 'PASS'
    elif 'FAIL' in first_line:
        result = 'FAIL'
    else:
        result = 'REVIEW'

    if verbose:
        print(f"\n{'='*60}\nRESULT: {result}\n{'='*60}")
        print(audit_response)

    return {
        'result': result, 'part_number': pn,
        'description': context.get('description') if context else None,
        'assembly': context.get('assembly') if context else None,
        'mating_parts': mating_parts_str,
        'ocr_text_count': len(ocr_text_list),
        'asme_pages_used': len(asme_images),
        'details': audit_response
    }

print("‚úÖ inspect_drawing_rag() defined.")

In [None]:
# ============================================================
# CELL 5B: Production Inspection (Strict Mismatch Mode - Optimized)
# ============================================================

def inspect_drawing_production(pdf_path, context_str=None, use_tiles=False):
    """
    Production inspection using Tesseract OCR + Optional Tiling.
    Uses STRICT MISMATCH MODE - aggressively finds contradictions.
    
    Args:
        pdf_path: Path to PDF file
        context_str: Optional mating parts context (auto-fetched if None)
        use_tiles: If True, includes 4 tiles (uses more memory). Default False.
    """
    print(f"\n{'='*60}\nINSPECTING (Strict Mismatch Mode): {pdf_path}\n{'='*60}")

    # --- Auto-Context Logic ---
    pn = None
    ctx = None
    if not context_str:
        pn, ctx = get_part_context(pdf_path)
        if ctx:
            context_str = ctx.get('siblings', 'No mating parts listed')
            print(f"  Part: {pn} ({ctx.get('description', 'N/A')})")
            print(f"  Assembly: {ctx.get('assembly', 'N/A')}")
        else:
            context_str = "Unknown part - no mating context available"
            print("  ‚ö†Ô∏è Part not found in database")
    
    # --- Phase 1: Render High-Res Image ---
    print("\n[1/4] Rendering High-Res Image...")
    full_img = render_pdf_page(pdf_path, dpi=200)  # Reduced from 300 to 200 DPI
    if not full_img:
        return {"error": "FAIL: Rendering Failed"}
    print(f"  Size: {full_img.size}")

    # --- Phase 2: Tesseract OCR ---
    print("\n[2/4] Extracting Tesseract OCR Evidence...")
    ocr_texts = run_tesseract_ocr(full_img)
    # LIMIT to 30 elements to reduce token count
    ocr_limited = ocr_texts[:30]
    ocr_block = "\n".join([f"- {t}" for t in ocr_limited])
    print(f"  OCR Found {len(ocr_texts)} elements (using top 30)")
    if ocr_texts[:5]:
        print(f"  Preview: {ocr_texts[:5]}")

    # --- Phase 3: Optional Tiles ---
    tiles = []
    if use_tiles:
        print("\n[3/4] Generating Tiles...")
        tiles = make_overlapping_tiles(full_img)
        print(f"  Created {len(tiles)} tiles")
    else:
        print("\n[3/4] Skipping tiles (memory optimization)")

    # --- Phase 4: STRICT MISMATCH INFERENCE ---
    print("\n[4/4] Running STRICT MISMATCH Analysis...")
    
    # Compact system prompt
    system_prompt = """You are a STRICT MISMATCH DETECTOR. Find FAULTS, not compatibility.

RULES:
1. Imperial vs Metric = AUTOMATIC FAIL (e.g., 3/4-16 UNC vs M10x1.5)
2. Missing feature = FAIL
3. No assumptions - specs must match EXACTLY
4. Different thread pitch = FAIL"""

    # Compact user prompt
    user_text = f"""REQUIREMENTS (mating parts):
{context_str[:500]}

OCR EVIDENCE (from drawing):
{ocr_block}

TASK: Check if drawing specs match mating part requirements.

OUTPUT FORMAT:
**[Part Name]**
- Requirement: [needed spec]
- Drawing Shows: [OCR finding or NOT FOUND]
- Status: PASS/FAIL
- Reason: [brief]

FINAL VERDICT: PASS or FAIL
ISSUES: [list conflicts]"""

    # Build payload - only full image (no tiles by default)
    content = [
        {'type': 'image', 'image': full_img}, 
        {'type': 'text', 'text': user_text}
    ]
    
    # Add tiles only if requested
    if use_tiles and tiles:
        for name, tile in tiles:
            content.insert(-1, {'type': 'image', 'image': tile})
            content.insert(-1, {'type': 'text', 'text': f"TILE: {name}"})

    messages = [
        {'role': 'system', 'content': system_prompt}, 
        {'role': 'user', 'content': content}
    ]

    # Inference
    text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    if use_tiles and tiles:
        image_inputs = [full_img] + [t[1] for t in tiles]
    else:
        image_inputs = [full_img]

    inputs = processor(
        text=[text_input], 
        images=image_inputs, 
        return_tensors="pt", 
        padding=True
    ).to(model.device)
    
    # Check token count
    token_count = inputs.input_ids.shape[1]
    print(f"  Token count: {token_count}")
    if token_count > 30000:
        print(f"  ‚ö†Ô∏è Warning: High token count, may cause issues")
    
    generated_ids = model.generate(**inputs, max_new_tokens=800)
    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

    response = output_text.split("assistant\n")[-1] if "assistant\n" in output_text else output_text
    
    print(f"\n{'='*60}\nSTRICT MISMATCH RESULT:\n{'='*60}\n{response}")
    
    return {
        'response': response,
        'part_number': pn,
        'ocr_count': len(ocr_texts),
        'token_count': token_count
    }

print("‚úÖ inspect_drawing_production() defined (Optimized for memory).")

## 6. Batch Inspection Function

In [None]:
# ============================================================
# CELL 6: Batch Inspection Function
# ============================================================
import glob
import json

def inspect_batch(drawing_folder, output_file="inspection_results.json", limit=None):
    """Inspect all PDFs in a folder."""
    from tqdm.notebook import tqdm

    pdf_files = glob.glob(os.path.join(drawing_folder, "**/*.pdf"), recursive=True)
    pdf_files += glob.glob(os.path.join(drawing_folder, "**/*.PDF"), recursive=True)
    pdf_files = list(set(pdf_files))

    if limit:
        pdf_files = pdf_files[:limit]

    print(f"Found {len(pdf_files)} PDF files")

    results = []
    pass_count = fail_count = error_count = 0

    for pdf_path in tqdm(pdf_files, desc="Inspecting"):
        try:
            result = inspect_drawing_rag(pdf_path, verbose=False)
            result['file'] = os.path.basename(pdf_path)
            results.append(result)

            if result['result'] == 'PASS': pass_count += 1
            elif result['result'] == 'FAIL': fail_count += 1
            else: error_count += 1
        except Exception as e:
            results.append({'file': os.path.basename(pdf_path), 'result': 'ERROR', 'details': str(e)})
            error_count += 1

    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)

    print(f"\n{'='*60}\nSUMMARY\n{'='*60}")
    print(f"Total: {len(results)} | PASS: {pass_count} | FAIL: {fail_count} | ERROR: {error_count}")
    print(f"Saved to: {output_file}")
    return results

print("‚úÖ Batch inspection function defined.")

## 7. Test the Inspector

In [None]:
# ============================================================
# CELL 7A: Single File Test (STRICT MISMATCH MODE)
# ============================================================
from google.colab import files

print("Upload a PDF drawing to inspect (Strict Mismatch Mode):")
uploaded = files.upload()

if uploaded:
    test_drawing = list(uploaded.keys())[0]
    print(f"\nRunning STRICT MISMATCH inspection on {test_drawing}...")
    result = inspect_drawing_production(test_drawing)  # <-- Uses Strict Mismatch Mode
else:
    print("No file uploaded.")

In [None]:
# ============================================================
# CELL 7B: Recovery - Reload Context (After Restart)
# ============================================================
import os, json, re

if 'filename_to_pn' not in globals() or 'part_context_db' not in globals():
    print("üîÑ Reloading Context Databases...")

    MAPPING_FILE = "400S_file_part_mapping.json"
    STRUCTURE_FILE = "400S_detailed_structure_fixed.json"

    def locate_file(filename):
        if os.path.exists(filename): return os.path.abspath(filename)
        if os.path.exists(os.path.join("rag_data", filename)): return os.path.abspath(os.path.join("rag_data", filename))
        return None

    FILE_MAPPING_PATH = locate_file(MAPPING_FILE)
    STRUCTURE_PATH = locate_file(STRUCTURE_FILE)

    if not FILE_MAPPING_PATH or not STRUCTURE_PATH:
        print("‚ùå Config files not found. Please re-upload.")
    else:
        with open(FILE_MAPPING_PATH, 'r') as f:
            file_mapping_list = json.load(f)

        filename_to_pn = {}
        for entry in file_mapping_list:
            pn, fname = entry.get('pn'), entry.get('file')
            if pn and fname:
                filename_to_pn[fname] = pn
                filename_to_pn[fname + '.pdf'] = pn
                filename_to_pn[fname + '.PDF'] = pn

        with open(STRUCTURE_PATH, 'r') as f:
            structure_data = json.load(f)

        part_context_db = {}
        def normalize_pn(pn): return re.sub(r'[-\s]', '', str(pn)).lower()

        for assembly_name, parts_list in structure_data.items():
            for part in parts_list:
                pn, desc = part['pn'], part['desc']
                siblings = [f"{p['pn']} ({p['desc']})" for p in parts_list if p['pn'] != pn]
                key = normalize_pn(pn)
                ctx = {'pn': pn, 'description': desc, 'assembly': assembly_name, 'siblings': "; ".join(siblings[:12])}
                part_context_db[key] = ctx
                part_context_db[pn] = ctx

        print(f"‚úÖ Restored: {len(filename_to_pn)} mappings, {len(part_context_db)//2} parts")
else:
    print("‚úÖ Context already loaded.")

In [None]:
# ============================================================
# CELL 7C: OCR Libraries (Backup Install)
# ============================================================
!sudo apt-get install -y tesseract-ocr > /dev/null 2>&1
!pip install -q pytesseract
print("‚úÖ Tesseract OCR Libraries Installed!")

In [None]:
# ============================================================
# CELL 7D: Verify RAG Database
# ============================================================
import os, glob

if 'DATA_DIR' not in globals():
    DATA_DIR = "/content"

rag_db_path = os.path.join(DATA_DIR, "rag_visual_db")
print(f"DATA_DIR: {DATA_DIR}")
print(f"RAG path: {rag_db_path}")

if os.path.exists(rag_db_path):
    images = glob.glob(os.path.join(rag_db_path, "**/*.png"), recursive=True)
    images += glob.glob(os.path.join(rag_db_path, "**/*.jpg"), recursive=True)
    print(f"‚úÖ Found {len(images)} images")
else:
    print("‚ùå Folder NOT found")

In [None]:
# ============================================================
# CELL 7E: Batch Test (Upload ZIP)
# ============================================================
from google.colab import files
import shutil, zipfile, glob

print("Upload a ZIP file with PDF drawings:")
uploaded = files.upload()

if uploaded:
    zip_file = next((f for f in uploaded if f.lower().endswith('.zip')), None)
    if zip_file:
        batch_dir = "batch_drawings"
        if os.path.exists(batch_dir): shutil.rmtree(batch_dir)
        os.makedirs(batch_dir, exist_ok=True)

        print(f"Extracting {zip_file}...")
        with zipfile.ZipFile(zip_file, 'r') as zf:
            zf.extractall(batch_dir)

        pdfs = glob.glob(os.path.join(batch_dir, "**/*.pdf"), recursive=True)
        print(f"Found {len(pdfs)} PDFs")

        results = inspect_batch(batch_dir, "inspection_results.json")
    else:
        print("No ZIP file found")
else:
    print("No files uploaded")

## 8. View Failed Inspections

In [None]:
# ============================================================
# CELL 8: View Failed Inspections
# ============================================================

def show_failures(results):
    """Display failed inspections."""
    failures = [r for r in results if r.get('result') == 'FAIL']
    print(f"\nFAILED: {len(failures)}")
    print('='*60)

    for i, fail in enumerate(failures, 1):
        print(f"\n[{i}] {fail.get('file', 'Unknown')}")
        print(f"    Part: {fail.get('part_number', 'N/A')} - {fail.get('description', 'N/A')}")
        print(f"    Details: {fail.get('details', 'N/A')[:300]}...")

# Usage: show_failures(results)
print("‚úÖ show_failures() defined.")