# 13. Deployment Guide

Production deployment strategies including ONNX export, quantization, and containerization.

## Contents
1. [Setup](#1-setup)
2. [ONNX Export](#2-onnx-export)
3. [Model Quantization](#3-model-quantization)
4. [TorchScript Compilation](#4-torchscript-compilation)
5. [Docker Containerization](#5-docker-containerization)
6. [Performance Benchmarking](#6-performance-benchmarking)
7. [Deployment Configurations](#7-deployment-configurations)
8. [Monitoring & Logging](#8-monitoring-and-logging)

---

## 1. Setup

In [None]:
import sys
from pathlib import Path

# Add src to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root / 'src'))

import torch
import torch.nn as nn
import numpy as np
import json
import time
from typing import Dict, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Environment check
print(f"Python: {sys.version}")
print(f"PyTorch: {torch.__version__}")
print(f"Device: {'MPS' if torch.backends.mps.is_available() else 'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Check optional dependencies
try:
    import onnx
    import onnxruntime
    print(f"ONNX: {onnx.__version__}")
    print(f"ONNX Runtime: {onnxruntime.__version__}")
    ONNX_AVAILABLE = True
except ImportError:
    print("ONNX not installed. Run: pip install onnx onnxruntime")
    ONNX_AVAILABLE = False

In [None]:
# Load model
from miracle.model.backbone import MMDTAELSTMBackbone
from miracle.model.multihead_lm import MultiHeadGCodeLM

# Paths
VOCAB_PATH = project_root / 'data' / 'gcode_vocab_v2.json'
CHECKPOINT_PATH = project_root / 'outputs' / 'final_model' / 'checkpoint_best.pt'
EXPORT_DIR = project_root / 'exports'
EXPORT_DIR.mkdir(exist_ok=True)

# Load vocabulary
with open(VOCAB_PATH) as f:
    vocab = json.load(f)

print(f"Vocabulary size: {len(vocab['token_to_id'])}")

# Load checkpoint
device = torch.device('cpu')  # Use CPU for export

if CHECKPOINT_PATH.exists():
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=device, weights_only=False)
    config = checkpoint.get('config', {})
    print(f"Loaded checkpoint from epoch {checkpoint.get('epoch', 'unknown')}")
else:
    print(f"Checkpoint not found, using default config")
    config = {'hidden_dim': 256, 'num_layers': 4, 'num_heads': 8, 'dropout': 0.1}

In [None]:
# Initialize models
backbone = MMDTAELSTMBackbone(
    continuous_dim=155,
    categorical_dims=[10, 10, 50, 50],
    d_model=config.get('hidden_dim', 256),
    num_layers=config.get('num_layers', 4),
    num_heads=config.get('num_heads', 8),
    dropout=0.0  # Disable dropout for inference
).to(device)

lm = MultiHeadGCodeLM(
    d_model=config.get('hidden_dim', 256),
    vocab_sizes=vocab.get('head_vocab_sizes', {'type': 10, 'command': 50, 'param_type': 30, 'param_value': 100})
).to(device)

# Load weights if available
if CHECKPOINT_PATH.exists():
    backbone.load_state_dict(checkpoint['backbone_state_dict'])
    lm.load_state_dict(checkpoint['lm_state_dict'])
    print("Model weights loaded")

backbone.eval()
lm.eval()

# Count parameters
total_params = sum(p.numel() for p in backbone.parameters()) + sum(p.numel() for p in lm.parameters())
print(f"Total parameters: {total_params:,}")

## 2. ONNX Export

Export the model to ONNX format for cross-platform deployment.

In [None]:
class CombinedModel(nn.Module):
    """Combined backbone + LM for single-model export."""
    
    def __init__(self, backbone, lm):
        super().__init__()
        self.backbone = backbone
        self.lm = lm
        
    def forward(self, continuous, categorical):
        """Forward pass returning logits for all heads."""
        hidden = self.backbone(continuous, categorical)
        outputs = self.lm(hidden)
        # Return as tuple for ONNX compatibility
        return (
            outputs['type'],
            outputs['command'],
            outputs['param_type'],
            outputs['param_value']
        )

# Create combined model
combined_model = CombinedModel(backbone, lm)
combined_model.eval()

print("Combined model created for export")

In [None]:
if ONNX_AVAILABLE:
    # Create dummy inputs
    batch_size = 1
    seq_length = 64
    
    dummy_continuous = torch.randn(batch_size, seq_length, 155)
    dummy_categorical = torch.randint(0, 10, (batch_size, seq_length, 4))
    
    # Export to ONNX
    onnx_path = EXPORT_DIR / 'gcode_model.onnx'
    
    torch.onnx.export(
        combined_model,
        (dummy_continuous, dummy_categorical),
        str(onnx_path),
        input_names=['continuous', 'categorical'],
        output_names=['type_logits', 'command_logits', 'param_type_logits', 'param_value_logits'],
        dynamic_axes={
            'continuous': {0: 'batch_size', 1: 'seq_length'},
            'categorical': {0: 'batch_size', 1: 'seq_length'},
            'type_logits': {0: 'batch_size', 1: 'seq_length'},
            'command_logits': {0: 'batch_size', 1: 'seq_length'},
            'param_type_logits': {0: 'batch_size', 1: 'seq_length'},
            'param_value_logits': {0: 'batch_size', 1: 'seq_length'},
        },
        opset_version=14,
        do_constant_folding=True,
    )
    
    print(f"ONNX model exported to: {onnx_path}")
    print(f"File size: {onnx_path.stat().st_size / 1024 / 1024:.2f} MB")
    
    # Verify ONNX model
    onnx_model = onnx.load(str(onnx_path))
    onnx.checker.check_model(onnx_model)
    print("ONNX model verification passed!")
else:
    print("ONNX export skipped - install onnx package")

In [None]:
# Test ONNX inference
if ONNX_AVAILABLE:
    import onnxruntime as ort
    
    # Create ONNX Runtime session
    ort_session = ort.InferenceSession(str(onnx_path))
    
    # Test inference
    test_continuous = np.random.randn(1, 64, 155).astype(np.float32)
    test_categorical = np.random.randint(0, 10, (1, 64, 4)).astype(np.int64)
    
    ort_inputs = {
        'continuous': test_continuous,
        'categorical': test_categorical
    }
    
    # Run inference
    start = time.time()
    ort_outputs = ort_session.run(None, ort_inputs)
    onnx_time = (time.time() - start) * 1000
    
    print(f"ONNX inference time: {onnx_time:.2f} ms")
    print(f"Output shapes:")
    for i, name in enumerate(['type', 'command', 'param_type', 'param_value']):
        print(f"  {name}: {ort_outputs[i].shape}")
    
    # Compare with PyTorch
    with torch.no_grad():
        start = time.time()
        pt_outputs = combined_model(
            torch.tensor(test_continuous),
            torch.tensor(test_categorical)
        )
        pt_time = (time.time() - start) * 1000
    
    print(f"\nPyTorch inference time: {pt_time:.2f} ms")
    
    # Check output similarity
    for i, name in enumerate(['type', 'command', 'param_type', 'param_value']):
        diff = np.abs(ort_outputs[i] - pt_outputs[i].numpy()).max()
        print(f"  {name} max diff: {diff:.6f}")

## 3. Model Quantization

Reduce model size and improve inference speed with quantization.

In [None]:
# Dynamic quantization (post-training)
def quantize_model_dynamic(model):
    """Apply dynamic quantization to reduce model size."""
    quantized = torch.quantization.quantize_dynamic(
        model,
        {nn.Linear, nn.LSTM},  # Layers to quantize
        dtype=torch.qint8
    )
    return quantized

# Quantize the combined model
quantized_model = quantize_model_dynamic(combined_model)

# Save quantized model
quantized_path = EXPORT_DIR / 'gcode_model_quantized.pt'
torch.save(quantized_model.state_dict(), quantized_path)

# Compare sizes
original_size = sum(p.numel() * p.element_size() for p in combined_model.parameters()) / 1024 / 1024
quantized_size = quantized_path.stat().st_size / 1024 / 1024

print(f"Original model size (estimated): {original_size:.2f} MB")
print(f"Quantized model size: {quantized_size:.2f} MB")
print(f"Compression ratio: {original_size / quantized_size:.2f}x")

In [None]:
# Benchmark quantized model
def benchmark_model(model, continuous, categorical, num_runs=100, warmup=10):
    """Benchmark model inference speed."""
    model.eval()
    
    # Warmup
    with torch.no_grad():
        for _ in range(warmup):
            _ = model(continuous, categorical)
    
    # Benchmark
    times = []
    with torch.no_grad():
        for _ in range(num_runs):
            start = time.time()
            _ = model(continuous, categorical)
            times.append((time.time() - start) * 1000)
    
    return {
        'mean_ms': np.mean(times),
        'std_ms': np.std(times),
        'min_ms': np.min(times),
        'max_ms': np.max(times),
        'p95_ms': np.percentile(times, 95)
    }

# Test inputs
test_continuous = torch.randn(1, 64, 155)
test_categorical = torch.randint(0, 10, (1, 64, 4))

# Benchmark both models
print("Benchmarking inference speed...")
print("\nOriginal Model:")
original_stats = benchmark_model(combined_model, test_continuous, test_categorical)
for key, value in original_stats.items():
    print(f"  {key}: {value:.3f}")

print("\nQuantized Model:")
quantized_stats = benchmark_model(quantized_model, test_continuous, test_categorical)
for key, value in quantized_stats.items():
    print(f"  {key}: {value:.3f}")

speedup = original_stats['mean_ms'] / quantized_stats['mean_ms']
print(f"\nSpeedup: {speedup:.2f}x")

## 4. TorchScript Compilation

Compile to TorchScript for optimized inference and C++ deployment.

In [None]:
# Script the model (preferred over trace for models with control flow)
class ScriptableCombinedModel(nn.Module):
    """TorchScript-compatible combined model."""
    
    def __init__(self, backbone, lm):
        super().__init__()
        self.backbone = backbone
        self.lm = lm
        
    def forward(self, continuous: torch.Tensor, categorical: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
        """Forward pass with explicit type annotations."""
        hidden = self.backbone(continuous, categorical)
        outputs = self.lm(hidden)
        return (
            outputs['type'],
            outputs['command'],
            outputs['param_type'],
            outputs['param_value']
        )

# Create scriptable model
scriptable_model = ScriptableCombinedModel(backbone, lm)
scriptable_model.eval()

# Trace the model (simpler, works for most cases)
traced_model = torch.jit.trace(
    scriptable_model,
    (test_continuous, test_categorical)
)

# Save traced model
traced_path = EXPORT_DIR / 'gcode_model_traced.pt'
traced_model.save(str(traced_path))

print(f"TorchScript model saved to: {traced_path}")
print(f"File size: {traced_path.stat().st_size / 1024 / 1024:.2f} MB")

In [None]:
# Load and test TorchScript model
loaded_traced = torch.jit.load(str(traced_path))
loaded_traced.eval()

# Verify outputs match
with torch.no_grad():
    original_out = scriptable_model(test_continuous, test_categorical)
    traced_out = loaded_traced(test_continuous, test_categorical)

print("Output comparison (original vs traced):")
for i, name in enumerate(['type', 'command', 'param_type', 'param_value']):
    diff = (original_out[i] - traced_out[i]).abs().max().item()
    print(f"  {name}: max diff = {diff:.8f}")

# Benchmark TorchScript
print("\nTorchScript Model Benchmark:")
traced_stats = benchmark_model(loaded_traced, test_continuous, test_categorical)
for key, value in traced_stats.items():
    print(f"  {key}: {value:.3f}")

## 5. Docker Containerization

Create Docker configuration for production deployment.

In [None]:
# Generate Dockerfile
dockerfile_content = '''# G-Code Fingerprinting Model - Production Dockerfile
FROM python:3.9-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \\
    build-essential \\
    && rm -rf /var/lib/apt/lists/*

# Copy requirements first for layer caching
COPY requirements-prod.txt .
RUN pip install --no-cache-dir -r requirements-prod.txt

# Copy application code
COPY src/ ./src/
COPY exports/ ./exports/
COPY data/gcode_vocab_v2.json ./data/

# Set environment variables
ENV PYTHONPATH=/app/src
ENV MODEL_PATH=/app/exports/gcode_model_traced.pt
ENV VOCAB_PATH=/app/data/gcode_vocab_v2.json
ENV PORT=8000

# Expose port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \\
    CMD curl -f http://localhost:8000/health || exit 1

# Run server
CMD ["uvicorn", "miracle.api.server:app", "--host", "0.0.0.0", "--port", "8000"]
'''

# Save Dockerfile
dockerfile_path = EXPORT_DIR / 'Dockerfile'
dockerfile_path.write_text(dockerfile_content)
print(f"Dockerfile saved to: {dockerfile_path}")

In [None]:
# Generate production requirements
requirements_prod = '''# Production dependencies for G-code fingerprinting model
torch>=2.0.0
numpy>=1.24.0
fastapi>=0.100.0
uvicorn[standard]>=0.23.0
pydantic>=2.0.0
python-multipart>=0.0.6
'''

requirements_path = EXPORT_DIR / 'requirements-prod.txt'
requirements_path.write_text(requirements_prod)
print(f"Production requirements saved to: {requirements_path}")

In [None]:
# Generate docker-compose.yml
docker_compose = '''version: "3.8"

services:
  gcode-api:
    build:
      context: ..
      dockerfile: exports/Dockerfile
    ports:
      - "8000:8000"
    environment:
      - MODEL_PATH=/app/exports/gcode_model_traced.pt
      - VOCAB_PATH=/app/data/gcode_vocab_v2.json
      - LOG_LEVEL=info
    volumes:
      - ../logs:/app/logs
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    deploy:
      resources:
        limits:
          memory: 2G
        reservations:
          memory: 1G

  # Optional: Redis for caching
  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    restart: unless-stopped

volumes:
  redis_data:
'''

compose_path = EXPORT_DIR / 'docker-compose.yml'
compose_path.write_text(docker_compose)
print(f"Docker Compose saved to: {compose_path}")

In [None]:
# Print deployment instructions
print("""
=== Docker Deployment Instructions ===

1. Build the Docker image:
   cd exports
   docker build -t gcode-fingerprint:latest -f Dockerfile ..

2. Run with Docker:
   docker run -p 8000:8000 gcode-fingerprint:latest

3. Run with Docker Compose:
   docker-compose up -d

4. Test the API:
   curl http://localhost:8000/health

5. View logs:
   docker-compose logs -f gcode-api

6. Stop services:
   docker-compose down
""")

## 6. Performance Benchmarking

Comprehensive performance comparison across deployment options.

In [None]:
import matplotlib.pyplot as plt

def comprehensive_benchmark(models_dict, test_inputs, num_runs=100):
    """Benchmark multiple models."""
    results = {}
    
    for name, model in models_dict.items():
        print(f"Benchmarking {name}...")
        results[name] = benchmark_model(model, *test_inputs, num_runs=num_runs)
    
    return results

# Models to benchmark
models_to_test = {
    'PyTorch (float32)': combined_model,
    'Quantized (int8)': quantized_model,
    'TorchScript': loaded_traced,
}

# Run benchmarks
benchmark_results = comprehensive_benchmark(
    models_to_test,
    (test_continuous, test_categorical),
    num_runs=100
)

# Display results
print("\n" + "="*60)
print("BENCHMARK RESULTS")
print("="*60)
print(f"{'Model':<25} {'Mean (ms)':<12} {'Std (ms)':<12} {'P95 (ms)':<12}")
print("-"*60)
for name, stats in benchmark_results.items():
    print(f"{name:<25} {stats['mean_ms']:<12.3f} {stats['std_ms']:<12.3f} {stats['p95_ms']:<12.3f}")

In [None]:
# Visualize benchmark results
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Inference time comparison
names = list(benchmark_results.keys())
means = [benchmark_results[n]['mean_ms'] for n in names]
stds = [benchmark_results[n]['std_ms'] for n in names]

bars = axes[0].bar(names, means, yerr=stds, capsize=5, color=['steelblue', 'coral', 'forestgreen'])
axes[0].set_ylabel('Inference Time (ms)')
axes[0].set_title('Inference Time by Model Type')
axes[0].tick_params(axis='x', rotation=15)

# Add value labels
for bar, mean in zip(bars, means):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                 f'{mean:.2f}', ha='center', va='bottom')

# Throughput comparison (samples per second)
throughputs = [1000 / benchmark_results[n]['mean_ms'] for n in names]
bars2 = axes[1].bar(names, throughputs, color=['steelblue', 'coral', 'forestgreen'])
axes[1].set_ylabel('Throughput (samples/sec)')
axes[1].set_title('Throughput by Model Type')
axes[1].tick_params(axis='x', rotation=15)

for bar, tp in zip(bars2, throughputs):
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
                 f'{tp:.1f}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig(EXPORT_DIR / 'benchmark_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

## 7. Deployment Configurations

Configuration templates for different deployment scenarios.

In [None]:
# Configuration templates
deployment_configs = {
    'development': {
        'model_path': 'outputs/final_model/checkpoint_best.pt',
        'device': 'cpu',
        'batch_size': 1,
        'num_workers': 0,
        'log_level': 'DEBUG',
        'enable_profiling': True,
        'cache_predictions': False,
    },
    'production_cpu': {
        'model_path': 'exports/gcode_model_traced.pt',
        'device': 'cpu',
        'batch_size': 8,
        'num_workers': 4,
        'log_level': 'INFO',
        'enable_profiling': False,
        'cache_predictions': True,
        'cache_ttl_seconds': 300,
    },
    'production_gpu': {
        'model_path': 'exports/gcode_model_traced.pt',
        'device': 'cuda',
        'batch_size': 32,
        'num_workers': 4,
        'log_level': 'INFO',
        'enable_profiling': False,
        'cache_predictions': True,
        'cache_ttl_seconds': 300,
        'fp16_inference': True,
    },
    'edge_device': {
        'model_path': 'exports/gcode_model_quantized.pt',
        'device': 'cpu',
        'batch_size': 1,
        'num_workers': 1,
        'log_level': 'WARNING',
        'enable_profiling': False,
        'cache_predictions': False,
        'max_memory_mb': 512,
    }
}

# Save configurations
config_dir = EXPORT_DIR / 'configs'
config_dir.mkdir(exist_ok=True)

for env_name, config in deployment_configs.items():
    config_path = config_dir / f'{env_name}.json'
    with open(config_path, 'w') as f:
        json.dump(config, f, indent=2)
    print(f"Saved: {config_path}")

print(f"\nConfiguration files saved to: {config_dir}")

In [None]:
# Print configuration guide
print("""
=== Deployment Configuration Guide ===

development:
  - Full debugging enabled
  - Single-threaded for easier debugging
  - Uses original PyTorch model

production_cpu:
  - TorchScript optimized model
  - Multi-worker data loading
  - Prediction caching enabled
  - Suitable for cloud CPU instances

production_gpu:
  - GPU acceleration with FP16
  - Large batch processing
  - Maximum throughput
  - Requires CUDA-capable GPU

edge_device:
  - Quantized model for minimal footprint
  - Memory-constrained operation
  - Suitable for embedded systems

Usage:
  python -m miracle.api.server --config exports/configs/production_cpu.json
""")

## 8. Monitoring & Logging

Set up monitoring and logging for production deployment.

In [None]:
# Prometheus metrics template
prometheus_metrics = '''# Prometheus metrics for G-code fingerprinting service
from prometheus_client import Counter, Histogram, Gauge, generate_latest
import time

# Request counters
REQUESTS_TOTAL = Counter(
    'gcode_requests_total',
    'Total number of prediction requests',
    ['endpoint', 'status']
)

# Latency histograms
REQUEST_LATENCY = Histogram(
    'gcode_request_latency_seconds',
    'Request latency in seconds',
    ['endpoint'],
    buckets=[0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]
)

INFERENCE_LATENCY = Histogram(
    'gcode_inference_latency_seconds',
    'Model inference latency in seconds',
    buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25]
)

# Model metrics
MODEL_LOADED = Gauge(
    'gcode_model_loaded',
    'Whether the model is loaded (1) or not (0)'
)

BATCH_SIZE = Histogram(
    'gcode_batch_size',
    'Batch sizes of inference requests',
    buckets=[1, 2, 4, 8, 16, 32, 64]
)

# Prediction confidence
PREDICTION_CONFIDENCE = Histogram(
    'gcode_prediction_confidence',
    'Confidence scores of predictions',
    ['head'],
    buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]
)


class MetricsMiddleware:
    """Middleware to track request metrics."""
    
    def __init__(self, app):
        self.app = app
    
    async def __call__(self, scope, receive, send):
        if scope["type"] != "http":
            await self.app(scope, receive, send)
            return
        
        path = scope["path"]
        start_time = time.time()
        
        # Process request
        await self.app(scope, receive, send)
        
        # Record metrics
        duration = time.time() - start_time
        REQUEST_LATENCY.labels(endpoint=path).observe(duration)
        REQUESTS_TOTAL.labels(endpoint=path, status="success").inc()
'''

# Save metrics module
metrics_path = EXPORT_DIR / 'metrics.py'
metrics_path.write_text(prometheus_metrics)
print(f"Prometheus metrics module saved to: {metrics_path}")

In [None]:
# Logging configuration
logging_config = '''{
  "version": 1,
  "disable_existing_loggers": false,
  "formatters": {
    "standard": {
      "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    },
    "json": {
      "()": "pythonjsonlogger.jsonlogger.JsonFormatter",
      "format": "%(asctime)s %(name)s %(levelname)s %(message)s"
    }
  },
  "handlers": {
    "console": {
      "class": "logging.StreamHandler",
      "level": "INFO",
      "formatter": "standard",
      "stream": "ext://sys.stdout"
    },
    "file": {
      "class": "logging.handlers.RotatingFileHandler",
      "level": "DEBUG",
      "formatter": "json",
      "filename": "logs/gcode_api.log",
      "maxBytes": 10485760,
      "backupCount": 5
    }
  },
  "loggers": {
    "miracle": {
      "level": "DEBUG",
      "handlers": ["console", "file"],
      "propagate": false
    },
    "uvicorn": {
      "level": "INFO",
      "handlers": ["console"],
      "propagate": false
    }
  },
  "root": {
    "level": "INFO",
    "handlers": ["console"]
  }
}'''

logging_config_path = EXPORT_DIR / 'logging_config.json'
logging_config_path.write_text(logging_config)
print(f"Logging configuration saved to: {logging_config_path}")

In [None]:
# Summary of exported files
print("\n" + "="*60)
print("EXPORT SUMMARY")
print("="*60)

for file_path in sorted(EXPORT_DIR.rglob('*')):
    if file_path.is_file():
        size_kb = file_path.stat().st_size / 1024
        rel_path = file_path.relative_to(EXPORT_DIR)
        print(f"{rel_path:<40} {size_kb:>10.1f} KB")

total_size = sum(f.stat().st_size for f in EXPORT_DIR.rglob('*') if f.is_file())
print("-"*60)
print(f"{'Total':<40} {total_size/1024/1024:>10.2f} MB")

---

## Summary

This notebook covers production deployment:

1. **ONNX Export**: Cross-platform model format with runtime verification
2. **Quantization**: INT8 quantization for reduced size and faster inference
3. **TorchScript**: Optimized compilation for production
4. **Docker**: Containerization with health checks and compose files
5. **Benchmarking**: Performance comparison across deployment options
6. **Configurations**: Environment-specific deployment configs
7. **Monitoring**: Prometheus metrics and structured logging

---

**Navigation:**
← [Previous: 12_error_analysis](12_error_analysis.ipynb) |
[Next: 14_robustness_testing](14_robustness_testing.ipynb) →