In [None]:
# Install required packages
%pip install tensorflow onnx onnxruntime

In [None]:
# Import libraries
import tensorflow as tf
import numpy as np
from pathlib import Path
import time

In [None]:
# Configuration
MODELS_DIR = Path("../data/models")
FACE_MODEL_PATH = MODELS_DIR / "face_detection" / "face_detector.h5"
SOUND_MODEL_PATH = MODELS_DIR / "sound_classification" / "audio_classifier.h5"

OUTPUT_DIR = MODELS_DIR / "optimized"

## TensorFlow Lite Conversion

In [None]:
def convert_to_tflite(model_path, output_path, quantize=True):
    """
    Convert Keras model to TensorFlow Lite.
    
    Args:
        model_path: Path to the Keras model
        output_path: Path for the output TFLite model
        quantize: Whether to apply quantization
    """
    # Load model
    model = tf.keras.models.load_model(model_path)
    
    # Create converter
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if quantize:
        # Apply default optimizations (dynamic range quantization)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
        # For full integer quantization (requires representative dataset)
        # converter.target_spec.supported_types = [tf.int8]
    
    # Convert
    tflite_model = converter.convert()
    
    # Save
    with open(output_path, 'wb') as f:
        f.write(tflite_model)
    
    # Report sizes
    original_size = Path(model_path).stat().st_size / 1024 / 1024
    optimized_size = len(tflite_model) / 1024 / 1024
    
    print(f"Original model size: {original_size:.2f} MB")
    print(f"Optimized model size: {optimized_size:.2f} MB")
    print(f"Reduction: {(1 - optimized_size/original_size)*100:.1f}%")
    
    return tflite_model

## Benchmark Inference Speed

In [None]:
def benchmark_tflite(tflite_model_path, input_shape, num_runs=100):
    """
    Benchmark TFLite model inference speed.
    
    Args:
        tflite_model_path: Path to TFLite model
        input_shape: Shape of input tensor
        num_runs: Number of inference runs
    """
    # Load TFLite model
    interpreter = tf.lite.Interpreter(model_path=str(tflite_model_path))
    interpreter.allocate_tensors()
    
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Create random input
    input_data = np.random.random(input_shape).astype(np.float32)
    
    # Warmup
    for _ in range(10):
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
    
    # Benchmark
    start_time = time.time()
    for _ in range(num_runs):
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
    end_time = time.time()
    
    avg_time = (end_time - start_time) / num_runs * 1000
    fps = 1000 / avg_time
    
    print(f"Average inference time: {avg_time:.2f} ms")
    print(f"Throughput: {fps:.1f} FPS")
    
    return avg_time

## Full Integer Quantization

For maximum performance on Raspberry Pi, use full integer quantization.

In [None]:
def convert_to_int8(model_path, output_path, representative_data_gen):
    """
    Convert model to fully quantized INT8 TFLite.
    
    Args:
        model_path: Path to Keras model
        output_path: Path for output TFLite model
        representative_data_gen: Generator yielding representative samples
    """
    model = tf.keras.models.load_model(model_path)
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_data_gen
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.int8
    converter.inference_output_type = tf.int8
    
    tflite_model = converter.convert()
    
    with open(output_path, 'wb') as f:
        f.write(tflite_model)
    
    print(f"INT8 model saved to: {output_path}")
    return tflite_model