# Image Processor Deep Dive

This notebook provides a comprehensive explanation of the Image Processor module, which handles image processing and OCR (Optical Character Recognition) tasks in our plumbing code project.

## Setup and Imports

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
import pytesseract
import logging
from typing import Dict, List, Any, Optional, Tuple

# Add project root to Python path
import sys
project_root = '/Users/aaronjpeters/PlumbingCodeAi/BuildingCodeai'
sys.path.append(project_root)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Import our processor
from main.utils.image_processor import ImageProcessor

## 1. Understanding Image Processing Pipeline

The Image Processor handles various image processing tasks including:
1. Image preprocessing
2. OCR text extraction
3. Image enhancement
4. Metadata extraction

Let's explore each component:

In [None]:
# Initialize the processor
image_processor = ImageProcessor()

def demonstrate_preprocessing(image_path: str):
    """Demonstrate image preprocessing steps."""
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        return None
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Denoise
    denoised = cv2.fastNlMeansDenoising(binary)
    
    return {
        'original': image,
        'grayscale': gray,
        'binary': binary,
        'denoised': denoised
    }

# Example usage:
# results = demonstrate_preprocessing('path/to/image.png')
# for name, img in results.items():
#     plt.figure(figsize=(10, 5))
#     plt.imshow(img, cmap='gray')
#     plt.title(name)
#     plt.show()

## 2. OCR Text Extraction

The processor uses Tesseract OCR to extract text from images:

In [None]:
def demonstrate_ocr(image_path: str) -> Dict[str, Any]:
    """Demonstrate OCR text extraction."""
    try:
        # Process image
        preprocessed = demonstrate_preprocessing(image_path)
        if preprocessed is None:
            return {'error': 'Failed to load image'}
        
        # Extract text from different versions
        results = {}
        for name, img in preprocessed.items():
            # Convert to PIL Image
            pil_img = Image.fromarray(img)
            
            # Extract text
            text = pytesseract.image_to_string(pil_img)
            results[f'{name}_text'] = text
        
        return results
    
    except Exception as e:
        return {'error': str(e)}

# Example usage:
# ocr_results = demonstrate_ocr('path/to/image.png')
# for version, text in ocr_results.items():
#     print(f"\n{version}:\n{text}")

## 3. Base64 Image Encoding

The Image Processor converts images to base64 format for efficient storage and transmission. This is particularly useful for:
- Embedding images in JSON
- Storing images in databases
- Transmitting images over HTTP

Let's explore the base64 encoding process:

In [None]:
import base64
from io import BytesIO

def demonstrate_base64_conversion(image_path: str) -> Dict[str, Any]:
    """Demonstrate base64 encoding of images."""
    try:
        # Read image
        with open(image_path, 'rb') as img_file:
            img_data = img_file.read()
        
        # Convert to base64
        base64_str = base64.b64encode(img_data).decode('utf-8')
        
        # Create a sample metadata structure
        metadata = {
            'file_name': os.path.basename(image_path),
            'file_size': len(img_data),
            'encoding': 'base64',
            'mime_type': 'image/png',  # Adjust based on actual image type
            'base64_data': base64_str
        }
        
        return metadata
        
    except Exception as e:
        return {'error': str(e)}

# Example usage:
# metadata = demonstrate_base64_conversion('path/to/image.png')
# print(json.dumps(metadata, indent=2))

### Converting Back from Base64

We can also convert base64 strings back to images:

In [None]:
def base64_to_image(base64_str: str) -> np.ndarray:
    """Convert base64 string back to image."""
    try:
        # Decode base64
        img_data = base64.b64decode(base64_str)
        
        # Convert to numpy array
        nparr = np.frombuffer(img_data, np.uint8)
        
        # Decode image
        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        
        return img
        
    except Exception as e:
        print(f"Error converting base64 to image: {e}")
        return None

# Example usage:
# metadata = demonstrate_base64_conversion('path/to/image.png')
# base64_str = metadata['base64_data']
# recovered_image = base64_to_image(base64_str)
# if recovered_image is not None:
#     plt.imshow(cv2.cvtColor(recovered_image, cv2.COLOR_BGR2RGB))
#     plt.title('Recovered from base64')
#     plt.show()

### Optimizing Base64 Conversion

Tips for efficient base64 handling:
1. **Size Optimization**
   - Compress images before encoding
   - Consider image quality vs size tradeoffs
   - Use appropriate image formats (PNG for text, JPEG for photos)

2. **Memory Management**
   - Process large images in chunks
   - Clean up temporary files
   - Use context managers for file handling

3. **Error Handling**
   - Validate base64 strings
   - Check for corruption
   - Handle decoding errors gracefully

## 3. Image Enhancement Techniques

Various enhancement techniques are used to improve OCR accuracy:

In [None]:
def demonstrate_enhancement(image: np.ndarray) -> Dict[str, np.ndarray]:
    """Demonstrate various image enhancement techniques."""
    results = {}
    
    # Contrast enhancement
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    enhanced = cv2.merge((cl,a,b))
    enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR)
    results['contrast_enhanced'] = enhanced
    
    # Sharpening
    kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
    sharpened = cv2.filter2D(image, -1, kernel)
    results['sharpened'] = sharpened
    
    # Noise reduction
    denoised = cv2.fastNlMeansDenoisingColored(image)
    results['denoised'] = denoised
    
    return results

# Example usage:
# image = cv2.imread('path/to/image.png')
# enhanced_results = demonstrate_enhancement(image)
# for name, img in enhanced_results.items():
#     plt.figure(figsize=(10, 5))
#     plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#     plt.title(name)
#     plt.show()

## 4. Complete Processing Example

Let's put everything together with a complete example:

In [None]:
def process_complete_example(image_path: str) -> Dict[str, Any]:
    """Process an image through the complete pipeline."""
    try:
        # Initialize processor
        processor = ImageProcessor()
        
        # Process the image
        result = processor.process_file(image_path)
        
        return {
            'success': True,
            'result': result
        }
        
    except Exception as e:
        return {
            'success': False,
            'error': str(e)
        }

# Example usage:
# result = process_complete_example('path/to/image.png')
# print(json.dumps(result, indent=2))

## 5. Best Practices and Tips

1. **Image Preparation**
   - Use high-resolution images
   - Ensure good lighting and contrast
   - Remove noise and artifacts

2. **OCR Optimization**
   - Preprocess images for better results
   - Use appropriate thresholding
   - Consider image orientation

3. **Performance**
   - Cache processed results
   - Use batch processing for multiple images
   - Optimize memory usage

4. **Error Handling**
   - Validate input images
   - Handle OCR failures gracefully
   - Provide meaningful error messages

5. **Testing**
   - Test with various image types
   - Verify OCR accuracy
   - Benchmark performance