# 04 - Model Export for Deployment

This notebook demonstrates how to export trained models for deployment.

## What you'll learn:
- How to export models to ONNX format
- How to export models to TorchScript format
- How to validate exported models
- How to run inference with exported models

In [None]:
import altair as alt
import numpy as np
from pathlib import Path

## Export Formats

Altair supports two export formats:

| Format | Extension | Use Case |
|--------|-----------|----------|
| **ONNX** | `.onnx` | Cross-platform (TensorRT, OpenVINO, ONNX Runtime) |
| **TorchScript** | `.pt` | PyTorch ecosystem (LibTorch C++, mobile) |

## Quick Export

The simplest way to export a model:

In [None]:
# Export to ONNX
# path = alt.export("run_abc123", "model.onnx")
# print(f"Exported to: {path}")

# Export to TorchScript
# path = alt.export("run_abc123", "model.pt", format="torchscript")
# print(f"Exported to: {path}")

## ONNX Export

ONNX is ideal for cross-platform deployment:

In [None]:
onnx_export_code = """
# Basic ONNX export
path = alt.export("run_abc123", "model.onnx")

# With custom options
path = alt.export(
    "run_abc123",
    "model.onnx",
    format="onnx",
    input_shape=(1, 3, 512, 512),  # Batch, Channels, Height, Width
    opset_version=17,               # ONNX opset version
    dynamic_axes=True,              # Allow variable batch/spatial size
    simplify=True,                  # Simplify the graph (requires onnxsim)
    validate=True,                  # Validate output matches PyTorch
)
"""
print(onnx_export_code)

### Dynamic vs Fixed Input Size

By default, ONNX exports support dynamic input sizes:

In [None]:
dynamic_code = """
# Dynamic axes (default) - supports any input size
path = alt.export("run_abc123", "model_dynamic.onnx", dynamic_axes=True)

# Fixed input size - slightly faster but inflexible
path = alt.export(
    "run_abc123",
    "model_fixed.onnx",
    input_shape=(1, 3, 512, 512),
    dynamic_axes=False,
)
"""
print(dynamic_code)

## TorchScript Export

TorchScript is ideal for PyTorch ecosystem deployment:

In [None]:
ts_export_code = """
# Basic TorchScript export
path = alt.export("run_abc123", "model.pt", format="torchscript")

# The model is exported using tracing by default
# and optimized for inference
"""
print(ts_export_code)

## Using the ModelExporter Class

For more control, use the `ModelExporter` class directly:

In [None]:
exporter_code = """
from altair.export import ModelExporter
from altair.models import build_model
import torch

# Load run and build model
run = alt.load("run_abc123")
model = build_model(run.config["model"])

# Load weights
checkpoint = torch.load(run.best_checkpoint)
model.load_state_dict(checkpoint["model_state_dict"])

# Create exporter
exporter = ModelExporter(
    model=model,
    input_shape=(1, 3, 512, 512),
    device="cuda",
)

# Export to ONNX with custom settings
result = exporter.to_onnx(
    "model.onnx",
    opset_version=17,
    dynamic_axes={
        "input": {0: "batch", 2: "height", 3: "width"},
        "output": {0: "batch", 2: "height", 3: "width"},
    },
    simplify=True,
    validate=True,
    input_names=["image"],
    output_names=["segmentation"],
)

print(f"Exported to: {result.path}")
print(f"File size: {result.file_size_mb:.2f} MB")
print(f"Metadata: {result.metadata}")
"""
print(exporter_code)

## Export to Multiple Formats

In [None]:
multi_export_code = """
from altair.export import ModelExporter

exporter = ModelExporter(model, input_shape=(1, 3, 512, 512))

# Export to all formats at once
results = exporter.export_all(
    output_dir="exported_models/",
    name="segmentation_model",
    formats=["onnx", "torchscript"],
)

for fmt, result in results.items():
    print(f"{fmt}: {result.path} ({result.file_size_mb:.2f} MB)")
"""
print(multi_export_code)

## FP16 Export

Export in half precision for faster inference:

In [None]:
fp16_code = """
from altair.export import ModelExporter

exporter = ModelExporter(model, input_shape=(1, 3, 512, 512))

# Convert to FP16
exporter.to_half()

# Export
result = exporter.to_onnx("model_fp16.onnx")
print(f"FP16 model size: {result.file_size_mb:.2f} MB")
"""
print(fp16_code)

## Validating Exported Models

In [None]:
validate_code = """
from altair.export import validate_onnx, validate_torchscript

# Validate ONNX model
results = validate_onnx("model.onnx", input_shape=(1, 3, 512, 512))
print(f"Valid: {results['valid']}")
print(f"Inference OK: {results['inference_ok']}")
print(f"Output shape: {results['output_shape']}")
print(f"File size: {results['file_size_mb']:.2f} MB")

# Validate TorchScript model
results = validate_torchscript("model.pt", input_shape=(1, 3, 512, 512))
print(f"Valid: {results['valid']}")
print(f"Inference OK: {results['inference_ok']}")
"""
print(validate_code)

## Inference with Exported Models

### ONNX Runtime

In [None]:
onnx_inference_code = """
from altair.export.exporter import ONNXInferenceSession
import numpy as np

# Create session
session = ONNXInferenceSession("model.onnx")

# Print model info
print(f"Input shape: {session.input_shape}")
print(f"Output shape: {session.output_shape}")

# Run inference
image = np.random.randn(1, 3, 512, 512).astype(np.float32)
output = session(image)

print(f"Output shape: {output.shape}")

# With actual image
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Preprocess
transform = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

image = np.array(Image.open("test.png").convert("RGB"))
transformed = transform(image=image)
input_tensor = transformed["image"].unsqueeze(0).numpy()

# Inference
output = session(input_tensor)
mask = output.argmax(axis=1).squeeze()  # For multiclass
# mask = (output > 0.5).squeeze()  # For binary
"""
print(onnx_inference_code)

### TorchScript

In [None]:
ts_inference_code = """
from altair.export.exporter import TorchScriptInferenceSession
import torch

# Create session
session = TorchScriptInferenceSession("model.pt", device="cuda")

# Run inference
image = torch.randn(1, 3, 512, 512)
output = session(image)

print(f"Output shape: {output.shape}")

# Move to different device
session.to("cpu")
"""
print(ts_inference_code)

## Command Line Export

You can also export using the CLI:

In [None]:
cli_code = """
# Basic ONNX export
altair export --run run_abc123 --output model.onnx

# TorchScript export
altair export --run run_abc123 --output model.pt --format torchscript

# With custom input shape
altair export --run run_abc123 --output model.onnx --input-shape 1,3,1024,1024

# Fixed input size (no dynamic axes)
altair export --run run_abc123 --output model.onnx --no-dynamic

# Skip simplification
altair export --run run_abc123 --output model.onnx --no-simplify

# Different opset version
altair export --run run_abc123 --output model.onnx --opset 14
"""
print(cli_code)

## Deployment Targets

### TensorRT (NVIDIA GPUs)

In [None]:
tensorrt_code = """
# 1. Export to ONNX
alt.export("run_abc123", "model.onnx", opset_version=17)

# 2. Convert to TensorRT engine (using trtexec)
# trtexec --onnx=model.onnx --saveEngine=model.trt --fp16

# 3. Use TensorRT engine for inference
# (See TensorRT documentation)
"""
print(tensorrt_code)

### OpenVINO (Intel)

In [None]:
openvino_code = """
# 1. Export to ONNX
alt.export("run_abc123", "model.onnx")

# 2. Convert with OpenVINO Model Optimizer
# mo --input_model model.onnx --output_dir openvino_model

# 3. Use OpenVINO runtime for inference
# (See OpenVINO documentation)
"""
print(openvino_code)

### LibTorch C++

In [None]:
libtorch_code = """
// 1. Export to TorchScript in Python
// alt.export("run_abc123", "model.pt", format="torchscript")

// 2. Load in C++
#include <torch/script.h>

int main() {
    // Load model
    torch::jit::script::Module model = torch::jit::load("model.pt");
    model.eval();
    
    // Create input
    torch::Tensor input = torch::randn({1, 3, 512, 512});
    
    // Run inference
    torch::Tensor output = model.forward({input}).toTensor();
    
    return 0;
}
"""
print(libtorch_code)

## Complete Export Pipeline

In [None]:
complete_pipeline = """
import altair as alt
from altair.export import validate_onnx
from pathlib import Path

# Configuration
run_id = "my_run_id"
output_dir = Path("deployed_models")
output_dir.mkdir(exist_ok=True)

# 1. Export to ONNX (dynamic input size)
print("Exporting to ONNX...")
onnx_path = alt.export(
    run_id,
    output_dir / "model.onnx",
    format="onnx",
    dynamic_axes=True,
    simplify=True,
    validate=True,
)
print(f"ONNX model: {onnx_path}")

# 2. Export to TorchScript
print("\nExporting to TorchScript...")
ts_path = alt.export(
    run_id,
    output_dir / "model.pt",
    format="torchscript",
    validate=True,
)
print(f"TorchScript model: {ts_path}")

# 3. Validate ONNX model
print("\nValidating ONNX model...")
results = validate_onnx(onnx_path)
print(f"Valid: {results['valid']}")
print(f"File size: {results['file_size_mb']:.2f} MB")

# 4. Test inference
print("\nTesting inference...")
from altair.export.exporter import ONNXInferenceSession
import numpy as np

session = ONNXInferenceSession(onnx_path)
dummy_input = np.random.randn(1, 3, 512, 512).astype(np.float32)
output = session(dummy_input)
print(f"Inference successful! Output shape: {output.shape}")

print("\nExport complete!")
"""
print(complete_pipeline)

## Next Steps

- **05_custom_config.ipynb**: Advanced configuration options