# Convert models
This notebook converts the HDF5-serialized raw SLEAP models, first into TF `SavedModel` format, and then into TensorRT-optimized format at different precisions.

Before running this, see `download_data.ipynb`.

This is based on the [TensorRT example notebook here](https://github.com/NVIDIA/TensorRT/blob/master/quickstart/IntroNotebooks/2.%20Using%20the%20Tensorflow%20TensorRT%20Integration.ipynb).

See also: [TF-TRT integration](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-722/quick-start-guide/index.html#framework-integration)

In [1]:
models = ["bu", "centroid", "td"]
# precisions = ["FP32", "FP16", "INT8"]  # INT8 is broken?
precisions = ["FP32", "FP16"]

In [2]:
def convert_to_savedmodel(model):
    import os
    import tensorflow as tf

    h5_model_path = f"data/{model}_sleap_model/best_model.h5"
    saved_model_path = f"data/{model}_savedmodel"  # SavedModel proto folder
    if not os.path.exists(saved_model_path):
        model = tf.keras.models.load_model(h5_model_path, compile=False)
        model.save(saved_model_path)
        print(f"Saved: {h5_model_path} -> {saved_model_path}")

        
def convert_to_trt(model, precision):
    # https://github.com/NVIDIA/TensorRT/blob/master/quickstart/IntroNotebooks/2.%20Using%20the%20Tensorflow%20TensorRT%20Integration.ipynb
    import os
    
    saved_model_path = f"data/{model}_savedmodel"  # SavedModel proto folder
    opt_model_path = f"data/{model}_trt_{precision}"
    
    from trtutils import ModelOptimizer
    opt_model = ModelOptimizer(saved_model_path)
    print(f"Created ModelOptimizer with: {saved_model_path}")
    
    import numpy as np
    
    if precision == "INT8":
        import tensorflow as tf
        tf_model = tf.keras.models.load_model(saved_model_path)
        
        # not working:
        # InternalError:  Failed to feed calibration data
        # [[node TRTEngineOp_2_0 (defined at /mnt/helper.py:94) ]] [Op:__inference_pruned_20725]

        # Function call stack:
        # pruned
        N = 32
        calib_data = np.zeros((N,) + tuple(tf_model.inputs[0].shape[1:]))
        print("Set calibration data:", calib_data.shape)
        opt_model.set_calibration_data(calib_data)
    
    opt_model_ = opt_model.convert(opt_model_path, precision=precision)

    if os.path.exists(opt_model_path):
        print(f"Converted model: {opt_model_path}")
        return True
    else:
        print(f"failed to convert model: {opt_model_path}")
        return False

In [3]:
for model in models:
    convert_to_savedmodel(model)
    
    for precision in precisions:
        convert_to_trt(model, precision)

INFO:tensorflow:Assets written to: data/bu_savedmodel/assets
Saved: data/bu_sleap_model/best_model.h5 -> data/bu_savedmodel
Created ModelOptimizer with: data/bu_savedmodel
INFO:tensorflow:Linked TensorRT version: (7, 2, 2)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 3)
INFO:tensorflow:Loaded TensorRT 7.2.3 and linked TensorFlow against TensorRT 7.2.2. This is supported because TensorRT  minor/patch upgrades are backward compatible
INFO:tensorflow:Could not find TRTEngineOp_0_3 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Could not find TRTEngineOp_0_2 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Could not find TRTEngineOp_0_1 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Could not find TRTEngineOp_0_