# AI Drawing Inspector v4.0

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/skaumbdoallsaws-coder/AI-Drawing-Inspector/blob/main/notebooks/ai_inspector_v4.ipynb)

**Modular Type-Aware Architecture**

This notebook is a slim orchestrator (~80 lines). All logic lives in the `ai_inspector` package.

## Drawing Types Supported
- **MACHINED_PART** (71%): Holes, threads, GD&T, tolerances
- **SHEET_METAL** (10%): Bends, flat patterns
- **ASSEMBLY** (11%): BOM, balloons
- **WELDMENT** (4%): Weld symbols
- **CASTING** (2%): Critical dims
- **PURCHASED_PART** (2%): Manufacturer table
- **GEAR** (<1%): Gear data table

In [None]:
# Cell 1: Install Dependencies
!pip install -q pymupdf pillow openai
!pip install -q accelerate qwen-vl-utils bitsandbytes
!pip install -q git+https://github.com/huggingface/transformers
!pip install -q json-repair

# Clone and install ai_inspector package
!git clone https://github.com/skaumbdoallsaws-coder/AI-Drawing-Inspector.git /content/AI-tool 2>/dev/null || echo 'Repo exists'
%cd /content/AI-tool
!git pull
!pip install -q -e .
print('Dependencies installed!')

In [None]:
# Cell 2: Imports and Configuration
import os
import json
from pathlib import Path
from datetime import datetime
from google.colab import files, userdata
from IPython.display import display, Markdown

# ai_inspector imports
from ai_inspector import classify_drawing, DrawingType, __version__
from ai_inspector.utils import render_pdf, SwJsonLibrary, extract_pdf_text
from ai_inspector.analyzers import (
    MachinedPartAnalyzerLazy, resolve_part_identity,
    SheetMetalAnalyzer, WeldmentAnalyzer, AssemblyAnalyzer,
    CastingAnalyzer, PurchasedPartAnalyzer, GearAnalyzer
)
from ai_inspector.extractors import load_ocr_model, load_qwen_model
from ai_inspector.report import generate_qc_report, save_qc_report

print(f'ai_inspector v{__version__} loaded')

# Output directory
OUTPUT_DIR = '/content/output'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Cell 3: Load Models (takes ~2 min on T4)
hf_token = userdata.get('HF_TOKEN')

print('Loading LightOnOCR-2...')
ocr_model, ocr_processor = load_ocr_model(hf_token)

print('\nLoading Qwen2.5-VL-7B...')
qwen_model, qwen_processor = load_qwen_model()

print('\nModels ready!')

In [None]:
# Cell 4: Load SolidWorks Library
print('Upload sw_json_library.zip:')
uploaded = files.upload()

sw_lib = SwJsonLibrary()
for filename in uploaded:
    if filename.endswith('.zip'):
        sw_lib.load_from_zip(filename)
        break

print(f'\nLoaded {len(sw_lib)} parts from SolidWorks library')

In [None]:
# Cell 5: Upload and Process Drawing
print('Upload your PDF drawing:')
uploaded = files.upload()

for filename in uploaded:
    if filename.lower().endswith('.pdf'):
        DRAWING_PDF = filename
        break

print(f'\nProcessing: {DRAWING_PDF}')
print('='*50)

# 1. Render PDF
artifacts = render_pdf(DRAWING_PDF)

# 2. Extract text and classify
pdf_text = extract_pdf_text(DRAWING_PDF)
classification = classify_drawing(pdf_text)

print(f'\nDrawing Type: {classification.drawing_type.value}')
print(f'Confidence: {classification.confidence:.0%}')
print(f'Signals: {classification.signals}')
print(f'Use OCR: {classification.use_ocr}')
print(f'Use Qwen: {classification.use_qwen}')

# Mark pages
for art in artifacts:
    art.drawing_type = classification.drawing_type.value
    art.needs_ocr = classification.use_ocr

# Display first page
display(artifacts[0].get_thumbnail(800))

In [None]:
# Cell 6: Resolve Part Identity
identity = resolve_part_identity(DRAWING_PDF, artifacts, sw_lib)

print(f'Part Number: {identity.partNumber}')
print(f'Confidence: {identity.confidence:.0%}')
print(f'Source: {identity.source}')
print(f'SW Data: {"Found" if identity.has_sw_data else "NOT FOUND"}')

# Get SW entry if available
sw_entry = sw_lib.lookup(identity.partNumber) if identity.has_sw_data else None

In [None]:
# Cell 7: Run Type-Specific Analysis
analyzer = MachinedPartAnalyzerLazy(
    ocr_model=ocr_model,
    ocr_processor=ocr_processor,
    qwen_model=qwen_model,
    qwen_processor=qwen_processor
)

print(f'Running {classification.drawing_type.value} analysis...')
print('='*50)

result = analyzer.analyze(
    artifacts=artifacts,
    identity=identity,
    sw_entry=sw_entry
)

# Display comparison summary
comp = result.comparison
print(f'\n{"="*50}')
print('COMPARISON RESULTS')
print(f'{"="*50}')
print(f'Requirements: {comp["summary"]["totalRequirements"]}')
print(f'Found: {comp["summary"]["found"]}')
print(f'Missing: {comp["summary"]["missing"]}')
print(f'Extra: {comp["summary"]["extra"]}')
print(f'Match Rate: {comp["summary"]["matchRate"]}')

In [None]:
# Cell 8: Generate QC Report
openai_key = userdata.get('OPENAI_API_KEY')

if not openai_key:
    print('OPENAI_API_KEY not found in secrets. Skipping report.')
else:
    print('Generating QC Report with GPT-4o-mini...')
    
    # Build evidence dict
    evidence = {
        'drawingInfo': {
            'partDescription': result.quality_audit.get('titleBlockCompleteness', {}).get('descriptionValue', ''),
            'material': result.quality_audit.get('titleBlockCompleteness', {}).get('materialValue', ''),
            'notes': result.features[0].get('notes', []) if result.features else []
        },
        'foundCallouts': result.ocr_callouts + [{'raw': f.get('callout', ''), 'calloutType': f.get('type', '')} for f in result.features]
    }
    
    classification_info = {
        'overall_type': classification.drawing_type.value,
        'total_pages': len(artifacts),
        'pages_with_ocr': sum(1 for a in artifacts if a.needs_ocr),
        'ocr_skipped': not classification.use_ocr
    }
    
    report = generate_qc_report(
        diff_result=result.comparison,
        evidence=evidence,
        sw_data=sw_entry.data if sw_entry else None,
        drawing_quality=result.quality_audit,
        bom_data=result.bom_data,
        mfg_notes=result.manufacturing_notes,
        classification_info=classification_info,
        api_key=openai_key
    )
    
    # Save and display
    report_path = os.path.join(OUTPUT_DIR, 'QCReport.md')
    save_qc_report(report, identity.partNumber, report_path, DRAWING_PDF)
    print(f'\nSaved: {report_path}')
    
    display(Markdown(report))

In [None]:
# Cell 9: Save All Outputs
# Save analysis result
result_path = os.path.join(OUTPUT_DIR, 'AnalysisResult.json')
with open(result_path, 'w') as f:
    json.dump(result.to_dict(), f, indent=2, default=str)
print(f'Saved: {result_path}')

# Save comparison
diff_path = os.path.join(OUTPUT_DIR, 'DiffResult.json')
with open(diff_path, 'w') as f:
    json.dump(result.comparison, f, indent=2)
print(f'Saved: {diff_path}')

# Download outputs
print('\nDownloading outputs...')
for filename in os.listdir(OUTPUT_DIR):
    files.download(os.path.join(OUTPUT_DIR, filename))