# Test Normalize + Validate: Unit Normalization (M7) + Validation (M8)

This notebook tests the normalization and validation stages:

- **M7 -- Unit Normalizer**: Detects drawing units (inch/mm) from title block text
  and normalizes all dimension values to inches. Uses three-level detection:
  drawing-level, callout-level, and dual-hypothesis.
- **M8 -- Validator**: Validates parsed callouts against the callout schema.
  Invalid callouts are never dropped -- they become `Unknown` with error metadata.

**Runtime requirement:** CPU is sufficient (no GPU needed for this notebook),
but GPU runtime is fine if you plan to run other notebooks in the same session.

**No external models required.** These stages are pure Python logic.

In [None]:
# Cell 1: Install dependencies
# NOTE: GPU not required for this notebook, but set to GPU if running full suite.
%pip install pillow --quiet

# Clone the repo
!git clone https://github.com/skaumbdoallsaws-coder/AI-Drawing-Inspector.git /content/AI-Drawing-Inspector 2>/dev/null || \
    (cd /content/AI-Drawing-Inspector && git pull)

import sys
sys.path.insert(0, '/content/AI-Drawing-Inspector')

print('Dependencies installed.')

In [None]:
# Cell 2: Import modules
from ai_inspector.extractors.unit_normalizer import (
    detect_drawing_units,
    detect_callout_units,
    normalize_callout,
    normalize_callouts,
    MM_TO_INCH,
    DIMENSION_FIELDS,
    SKIP_FIELDS,
)
from ai_inspector.extractors.validator import (
    validate_callout,
    validate_and_repair_all,
)
from ai_inspector.schemas.callout_schema import (
    VALID_CALLOUT_TYPES,
    REQUIRED_FIELDS,
    get_required_fields,
)

print(f'Valid callout types: {VALID_CALLOUT_TYPES}')
print(f'Dimension fields (converted): {DIMENSION_FIELDS}')
print(f'Skip fields (not converted): {SKIP_FIELDS}')
print('Imports OK.')

In [None]:
# Cell 3: Create sample parsed callout dicts (holes, fillets, chamfers with string values)
# These represent what the regex parser outputs BEFORE normalization

sample_callouts = [
    # Hole -- inch drawing, string diameter
    {
        'calloutType': 'Hole',
        'diameter': '.500',
        'depth': 'THRU',
        'quantity': 2,
        'raw': '2X \u2300.500 THRU',
    },
    # Hole -- metric values (will look like mm when dual-hypothesis runs)
    {
        'calloutType': 'Hole',
        'diameter': '12.7',
        'depth': '25.4',
        'quantity': 1,
        'raw': '\u230012.7 DEEP 25.4',
    },
    # Fillet -- radius as string
    {
        'calloutType': 'Fillet',
        'radius': '.125',
        'raw': 'R.125 TYP.',
    },
    # Chamfer -- size as string
    {
        'calloutType': 'Chamfer',
        'size': '.045',
        'angle': '45',
        'raw': '.045 x 45\u00b0',
    },
    # Fillet -- large mm radius (3.175 mm = 0.125 inches)
    {
        'calloutType': 'Fillet',
        'radius': '3.175',
        'raw': 'R3.175mm',
    },
]

print(f'Created {len(sample_callouts)} sample callouts:')
for i, c in enumerate(sample_callouts):
    print(f'  [{i}] {c["calloutType"]:15s} raw="{c["raw"]}"')

In [None]:
# Cell 4: Test detect_drawing_units() with sample title block text
title_block_samples = [
    'UNLESS OTHERWISE SPECIFIED DIMENSIONS ARE IN INCHES',
    'ALL DIMENSIONS IN MILLIMETERS',
    'UNITS: INCH',
    'UNITS: MM',
    'THIRD ANGLE PROJECTION SCALE 1:1',  # No unit info
    'DIMENSIONS ARE IN METERS',           # Edge case: meters not mm
    '',                                    # Empty
]

print(f'{"Title Block Text":55s} | {"Detected Units"}')
print('-' * 80)
for text in title_block_samples:
    units = detect_drawing_units(text)
    print(f'{text:55s} | {units}')

# Also test callout-level unit detection
print('\n--- Callout-Level Unit Detection ---')
callout_texts = [
    '\u230012.7mm THRU',
    '\u2300.500" THRU',
    '\u2300.500 THRU',       # No unit hint
    'R3.175mm',
    'R.125 in.',
]
for text in callout_texts:
    units = detect_callout_units(text)
    print(f'  "{text:30s}" -> {units}')

In [None]:
# Cell 5: Test normalize_callout() -- verify string->float conversion
print('=== Normalization Tests ===')
print()

# Test 1: Inch drawing (explicit)
print('--- Test 1: Explicit inch drawing ---')
norm = normalize_callout(
    sample_callouts[0],
    raw_text='2X \u2300.500 THRU',
    drawing_units='inch',
)
print(f'  diameter: "{sample_callouts[0]["diameter"]}" -> {norm.get("diameter")}')
print(f'  method: {norm.get("_normalization_method")}')
assert isinstance(norm.get('diameter'), float), 'Expected float after normalization'
assert abs(norm['diameter'] - 0.5) < 0.0001, f'Expected 0.5, got {norm["diameter"]}'
print('  PASS')

# Test 2: Metric callout with mm hint
print('\n--- Test 2: Callout with mm hint ---')
norm2 = normalize_callout(
    sample_callouts[4],
    raw_text='R3.175mm',
    drawing_units=None,
)
print(f'  radius: "{sample_callouts[4]["radius"]}" -> {norm2.get("radius")}')
print(f'  method: {norm2.get("_normalization_method")}')
expected_radius = 3.175 * MM_TO_INCH  # Should be ~0.125 inches
assert abs(norm2['radius'] - expected_radius) < 0.0001, \
    f'Expected {expected_radius:.4f}, got {norm2["radius"]}'
print(f'  PASS (3.175mm = {norm2["radius"]:.4f}in)')

# Test 3: Dual hypothesis (large number suggests mm)
print('\n--- Test 3: Dual hypothesis ---')
norm3 = normalize_callout(
    sample_callouts[1],
    raw_text='\u230012.7 DEEP 25.4',
    drawing_units=None,
)
print(f'  diameter: "{sample_callouts[1]["diameter"]}" -> {norm3.get("diameter")}')
print(f'  depth: "{sample_callouts[1]["depth"]}" -> {norm3.get("depth")}')
print(f'  method: {norm3.get("_normalization_method")}')

# Test 4: Chamfer size field
print('\n--- Test 4: Chamfer size field ---')
norm4 = normalize_callout(
    sample_callouts[3],
    raw_text='.045 x 45\u00b0',
    drawing_units='inch',
)
print(f'  size: "{sample_callouts[3]["size"]}" -> {norm4.get("size")}')
print(f'  angle: "{sample_callouts[3]["angle"]}" -> {norm4.get("angle")} (should NOT convert)')
assert isinstance(norm4.get('size'), float), 'Chamfer size should be float'
assert isinstance(norm4.get('angle'), str), 'Angle should stay as string (in SKIP_FIELDS)'
print('  PASS')

In [None]:
# Cell 6: Test validate_callout() -- verify valid callouts pass, invalid become Unknown
print('=== Validation Tests ===')
print()

# Valid callout
valid_callout = {
    'calloutType': 'Hole',
    'diameter': 0.5,
    'raw': '\u2300.500 THRU',
}
result, is_valid, error = validate_callout(valid_callout)
print(f'Test valid Hole:   is_valid={is_valid}, error={error}')
assert is_valid, 'Expected valid'

# Missing required field (Hole needs diameter)
missing_field = {
    'calloutType': 'Hole',
    'raw': '\u2300??? THRU',
    # no diameter
}
result, is_valid, error = validate_callout(missing_field)
print(f'Test missing diameter: is_valid={is_valid}, error="{error}"')
assert not is_valid, 'Expected invalid'
assert result['calloutType'] == 'Unknown', 'Expected Unknown after repair'
assert result.get('_invalid') is True

# Invalid callout type
bad_type = {
    'calloutType': 'BogusType',
    'raw': 'some text',
}
result, is_valid, error = validate_callout(bad_type)
print(f'Test bad type: is_valid={is_valid}, error="{error}"')
assert not is_valid
assert result['_original_calloutType'] == 'BogusType'

# Missing raw field
no_raw = {
    'calloutType': 'Hole',
    'diameter': 0.5,
}
result, is_valid, error = validate_callout(no_raw)
print(f'Test missing raw: is_valid={is_valid}, error="{error}"')
assert not is_valid

# Negative diameter
negative = {
    'calloutType': 'Hole',
    'diameter': -0.5,
    'raw': '\u2300-.500',
}
result, is_valid, error = validate_callout(negative)
print(f'Test negative diameter: is_valid={is_valid}, error="{error}"')

print('\nAll validation tests passed.')

In [None]:
# Cell 7: Test validate_and_repair_all() -- batch validation with stats summary
print('=== Batch Validation Test ===')
print()

# First normalize all sample callouts
raw_texts = [c.get('raw', '') for c in sample_callouts]
normalized = normalize_callouts(
    sample_callouts,
    raw_texts=raw_texts,
    title_block_text='UNLESS OTHERWISE SPECIFIED DIMENSIONS ARE IN INCHES',
)

# Add raw field to each (validator requires it)
for norm, orig in zip(normalized, sample_callouts):
    norm['raw'] = orig.get('raw', '')

# Add some intentionally bad callouts
test_batch = normalized + [
    {'calloutType': 'Unknown', 'raw': ''},           # Missing raw
    {'calloutType': 'Hole', 'raw': 'something'},     # Missing diameter
    {'calloutType': 'FakeType', 'raw': 'fake text'}, # Bad type
]

repaired, stats = validate_and_repair_all(test_batch)

print(f'Batch size:  {stats["total"]}')
print(f'Valid:       {stats["valid"]}')
print(f'Invalid:     {stats["invalid"]}')
print()

if stats['errors']:
    print('Error breakdown:')
    for err, count in sorted(stats['errors'].items(), key=lambda x: -x[1]):
        print(f'  [{count}x] {err}')

print('\nRepaired callouts:')
for i, c in enumerate(repaired):
    invalid_marker = ' [INVALID]' if c.get('_invalid') else ''
    print(f'  [{i}] {c.get("calloutType", "?"):15s} raw="{c.get("raw", "")[:30]}"{invalid_marker}')
    if c.get('_validation_error'):
        print(f'       error: {c["_validation_error"]}')