https://docs.openvino.ai/latest/notebooks/301-tensorflow-training-openvino-pot-with-output.html

In [1]:
!pwd

/home/ubuntu/brats_2018_on_intel/notebooks


In [2]:
import copy
import os
import sys
import urllib
import time
from pathlib import Path

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

from addict import Dict

from openvino.tools.pot.api import Metric, DataLoader
from openvino.tools.pot.graph import load_model, save_model
from openvino.tools.pot.graph.model_utils import compress_model_weights
from openvino.tools.pot.engines.ie_engine import IEEngine
from openvino.tools.pot.pipeline.initializer import create_pipeline
from openvino.runtime import Core

sys.path.append("../utils")
# from notebook_utils import benchmark_model

import sys
sys.path.insert(0, "/home/ubuntu/brats_2018_on_intel/src/")

2022-09-08 23:17:27.087153: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-08 23:17:27.087185: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
path_to_add = "/home/ubuntu/miniconda3/envs/optimize_model/"
os.environ["PATH"] += os.pathsep + path_to_add

In [4]:
DATA_PATH = "/home/ubuntu/brats_2018_on_intel/data/processed/Task01_BrainTumour/"
DATA_DIR = "/home/ubuntu/brats_2018_on_intel/data/processed/"
DATASET = "Task01_BrainTumour/"

TRAIN_TEST_SPLIT = 0.80
VALIDATE_TEST_SPLIT = 0.50

BATCH_SIZE_TRAIN = 8
BATCH_SIZE_VALIDATE = 4
BATCH_SIZE_TEST = 1

TILE_HEIGHT = 144
TILE_WIDTH = 144
TILE_DEPTH = 144
NUMBER_INPUT_CHANNELS = 1

CROP_DIM = (TILE_HEIGHT,TILE_WIDTH,TILE_DEPTH,NUMBER_INPUT_CHANNELS)

NUMBER_OUTPUT_CLASSES = 1


MODEL_DIR = "/home/ubuntu/brats_2018_on_intel/models"
SAVED_MODEL_NAME = "3d_unet_decathlon"
SELECTED_MODEL_EPOCH = 27

FILTERS = 16
NUM_EPOCHS = 40

RANDOM_SEED = 64

OUTPUT_DIR = Path("/home/ubuntu/brats_2018_on_intel/models/openvino")
IR_MODEL_PRECISION = "FP32"

In [5]:
saved_model_path = Path(Path(MODEL_DIR) / SAVED_MODEL_NAME)

fp32_ir_name = Path(SAVED_MODEL_NAME + "_" + "tf" + "_" + "ov" + "_" + "fp32" + "_ir")
fp32_ir_path = Path(OUTPUT_DIR / fp32_ir_name)

pot_int8_ir_name = Path(SAVED_MODEL_NAME + "_" + "tf" + "_" + "ov" + "_" + "pot" + "_" + "int8" + "_ir")
pot_int8_ir_path = Path(OUTPUT_DIR / pot_int8_ir_name)

path_to_xml_file = f"{fp32_ir_path}.xml"
print(path_to_xml_file)
path_to_bin_file = f"{fp32_ir_path}.bin"
print(path_to_bin_file)

/home/ubuntu/brats_2018_on_intel/models/openvino/3d_unet_decathlon_tf_ov_fp32_ir.xml
/home/ubuntu/brats_2018_on_intel/models/openvino/3d_unet_decathlon_tf_ov_fp32_ir.bin


In [6]:
from data.dataloader import DatasetGenerator

In [7]:
brats_datafiles = DatasetGenerator(data_path=DATA_PATH, 
                                   train_test_split=TRAIN_TEST_SPLIT,
                                   validate_test_split=VALIDATE_TEST_SPLIT,
                                   batch_size_train=BATCH_SIZE_TRAIN,
                                   batch_size_validate=BATCH_SIZE_VALIDATE,
                                   batch_size_test=BATCH_SIZE_TEST,
                                   tile_height=TILE_HEIGHT, 
                                   tile_width=TILE_WIDTH, 
                                   tile_depth=TILE_DEPTH, 
                                   number_input_channels=NUMBER_INPUT_CHANNELS,
                                   number_output_classes=NUMBER_OUTPUT_CLASSES,
                                   random_seed=RANDOM_SEED)

2022-09-08 23:17:53.021306: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/ubuntu/miniconda3/envs/optimize_model/lib/python3.7/site-packages/cv2/../../lib64:
2022-09-08 23:17:53.021337: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-08 23:17:53.021353: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-1-35-172): /proc/driver/nvidia/version does not exist
2022-09-08 23:17:53.021652: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler 

In [8]:
class MyDataLoader(DataLoader):

    def __init__(self, config):

        super().__init__(config)

        """
        You can define this data loader to work with your custom dataset.
        In our case, we've already defined a TensorFlow `tf.data` object.
        We'll just pass that to the API's data loader and transpose the images and masks
        (OpenVINO assumes the data is channels first-- NCHWD)
        """

        self.items = np.arange(config["num_samples"])  # Just pass in how many samples you want to take
        self.dataset = config["test_dataset"]

        print("\nQuantizing FP32 OpenVINO model to INT8\n")

        print(f"Taking {len(self.items):,} random samples from the test dataset")

        self.batch_size = 1

    def set_subset(self, indices):
        self._subset = None

    @property
    def batch_num(self):
        return ceil(self.size / self.batch_size)

    @property
    def size(self):
        return self.items.shape[0]

    def __len__(self):
        return self.size

    def __getitem__(self, item):
        """
        """
        ds = self.dataset.take(1).as_numpy_iterator()  # Grab the next batch and take a single element (image/mask)
        for img, msk in ds:
            img = np.transpose(img, [0,4,1,2,3])  # OpenVINO expects the input to be channels first (NCHWD)
            msk = np.transpose(msk, [0,4,1,2,3])  # OpenVINO expects the label/output to be channels first (NCHWD)
        
        return (item, msk), img

In [9]:
class MyMetric(Metric):
    def __init__(self):
        super().__init__()
        self.name = "custom Metric - Dice score"
        self._values = []
        self.round = 1

    @property
    def value(self):
        """ Returns accuracy metric value for the last model output. """
        return {self.name: [self._values[-1]]}

    @property
    def avg_value(self):
        """ Returns accuracy metric value for all model outputs. """
        value = np.ravel(self._values).mean()
        print("Round #{}    Mean {} = {}".format(self.round, self.name, value))

        self.round += 1

        return {self.name: value}

    def update(self, outputs, labels):
        """ Updates prediction matches.
        Args:
            outputs: model output
            labels: annotations
        Put your post-processing code here.
        Put your custom metric code here.
        The metric gets appended to the list of metric values
        """

        def dice_score(pred, truth):
            """
            Sorensen Dice score
            Measure of the overlap between the prediction and ground truth masks
            """
            numerator = np.sum(np.round(pred) * truth) * 2.0
            denominator = np.sum(np.round(pred)) + np.sum(truth)

            return numerator / denominator


        metric = dice_score(labels[0], outputs[0])
        self._values.append(metric)

    def reset(self):
        """ Resets collected matches """
        self._values = []

    @property
    def higher_better(self):
        """Attribute whether the metric should be increased"""
        return True

    def get_attributes(self):
        return {self.name: {"direction": "higher-better", "type": ""}}

In [10]:
maximum_metric_drop = 0.05  # For accuracy-aware training. this defines how much the metric is allowed to change.
accuracy_aware_quantization=True


dataset_config = {
    "num_samples": 40,   # Get 40 samples
    "test_dataset": brats_datafiles.get_test()   # Pass our TensorFlow data loader to the API
}

# Model config specifies the model name and paths to model .xml and .bin file
model_config = Dict(
    {
        "model_name": pot_int8_ir_name,
        "model": path_to_xml_file,
        "weights": path_to_bin_file
    }
)


engine_config = Dict({
    "device": "CPU",
    "stat_requests_number": 4,
    "eval_requests_number": 4
})

default_quantization_algorithm = [
    {
        "name": "DefaultQuantization",
        "params": {
            "target_device": "CPU",
            "preset": "performance",
            #"stat_subset_size": 10
        }
    }
]

accuracy_aware_quantization_algorithm = [
    {
        "name": "AccuracyAwareQuantization", # compression algorithm name
        "params": {
            "target_device": "CPU",
            "preset": "performance",
            "stat_subset_size": 10,
            "metric_subset_ratio": 0.5, # A part of the validation set that is used to compare full-precision and quantized models
            "ranking_subset_size": 300, # A size of a subset which is used to rank layers by their contribution to the accuracy drop
            "max_iter_num": 10,    # Maximum number of iterations of the algorithm (maximum of layers that may be reverted back to full-precision)
            "maximal_drop": maximum_metric_drop,      # Maximum metric drop which has to be achieved after the quantization
            "drop_type": "absolute",    # Drop type of the accuracy metric: relative or absolute (default)
            "use_prev_if_drop_increase": True,     # Whether to use NN snapshot from the previous algorithm iteration in case if drop increases
            "base_algorithm": "DefaultQuantization" # Base algorithm that is used to quantize model at the beginning
        }
    }
]

class GraphAttrs(object):
    def __init__(self):
        self.keep_quantize_ops_in_IR = True
        self.keep_shape_ops = False
        self.data_type = "FP32"
        self.progress = False
        self.generate_experimental_IR_V10 = True
        self.blobs_as_inputs = True
        self.generate_deprecated_IR_V7 = False


In [11]:
model = load_model(model_config=model_config)

data_loader = MyDataLoader(dataset_config)

metric = MyMetric()


engine = IEEngine(config=engine_config, 
                  data_loader=data_loader, 
                  metric=metric)

if accuracy_aware_quantization:
    # https://docs.openvinotoolkit.org/latest/_compression_algorithms_quantization_accuracy_aware_README.html
    print("Accuracy-aware quantization method")
    pipeline = create_pipeline(accuracy_aware_quantization_algorithm, engine)
else:
    print("Default quantization method")
    pipeline = create_pipeline(default_quantization_algorithm, engine)


metric_results_FP32 = pipeline.evaluate(model)

compressed_model = pipeline.run(model=model)
#compress_model_weights(compressed_model)

save_model(model=compressed_model, 
           save_path=pot_int8_ir_path)

metric_results_INT8 = pipeline.evaluate(compressed_model)


print("\nFINAL RESULTS")

# print metric value
if metric_results_FP32:
    for name, value in metric_results_FP32.items():
        print(f"{name: <27s} FP32: {value}")

if metric_results_INT8:
    for name, value in metric_results_INT8.items():
        print(f"{name: <27s} INT8: {value}")


print(f"\nThe INT8 version of the model has been saved to the directory {pot_int8_ir_path}\n")


Quantizing FP32 OpenVINO model to INT8

Taking 40 random samples from the test dataset
Accuracy-aware quantization method


2022-09-08 23:18:00.379241: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2022-09-08 23:18:00.397044: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2899975000 Hz


Round #1    Mean custom Metric - Dice score = 6.014788227090376
Round #2    Mean custom Metric - Dice score = 5.485410646802075
Round #3    Mean custom Metric - Dice score = 5.219891631187945
Round #4    Mean custom Metric - Dice score = 19.096018631223835
Round #5    Mean custom Metric - Dice score = 6.595300398415693
Round #6    Mean custom Metric - Dice score = 5.844213650204565

FINAL RESULTS
custom Metric - Dice score  FP32: 6.014788227090376
custom Metric - Dice score  INT8: 5.844213650204565

The INT8 version of the model has been saved to the directory /home/ubuntu/brats_2018_on_intel/models/openvino/3d_unet_decathlon_tf_ov_pot_int8_ir



In [None]:
ie = Core()
model = ie.read_model(model=path_to_xml_file, weights=path_to_bin_file)
compiled_model = ie.compile_model(model=model, device_name="CPU")

del model

input_layer_name = next(iter(compiled_model.inputs))
output_layer_name = next(iter(compiled_model.outputs))

In [None]:
openvino_filename_int8 = os.path.join(int8_directory, openvino_modelname)
path_to_xml_file_int8 = f"{openvino_filename_int8}.xml"
path_to_bin_file_int8 = f"{openvino_filename_int8}.bin"

ie_int8 = Core()
model_int8 = ie.read_model(model=path_to_xml_file_int8, weights=path_to_bin_file_int8)
compiled_model_int8 = ie.compile_model(model=model_int8, device_name="CPU")

del model_int8

input_layer_name_int8 = next(iter(compiled_model_int8.inputs))
output_layer_name_int8 = next(iter(compiled_model_int8.outputs))

In [None]:
from model import dice_coef, soft_dice_coef, dice_loss
tf_model = tf.keras.models.load_model("/home/ubuntu/unet/3D/3d_unet_decathlon/3d_unet_decathlon.h5", 
                                      compile=True, 
                                      custom_objects={"dice_coef":dice_coef, "soft_dice_coef":soft_dice_coef, "dice_loss":dice_loss})

In [None]:
def plot_predictions(img_batch, msk_batch):
    for i in range(img_batch.shape[0]):
        img = img_batch[i:i+1,:,:,:,:]
        msk = msk_batch[i:i+1,:,:,:,:]    
    
        slicenum=np.argmax(np.sum(msk, axis=(1,2)))  # Find the slice with the largest tumor section

        plt.figure(figsize=(20,20))

        plt.subplot(1,5,1)
        plt.title("MRI", fontsize=20)
        plt.imshow(img[0,:,:,slicenum,0], cmap="gray")
        plt.subplot(1,5,2)
        plt.imshow(msk[0,:,:,slicenum,0], cmap="gray")
        plt.title("Ground truth", fontsize=20)

        
        """
        TensorFlow Model Prediction
        """
        start_time = time.time()
        prediction_tf = tf_model.predict(img)
        inference_time_tf = 1000.0*(time.time()-start_time)
        prediction_tf = tf.round(prediction_tf)
        dice_coef_tf = dice_coef(msk,prediction_tf)

        plt.subplot(1,5,3)
        plt.imshow(prediction_tf[0,:,:,slicenum,0], cmap="gray")
        plt.title(f"TensorFlow Prediction\nFP32\nDice = {dice_coef_tf:.4f}\n\nInference time\n{inference_time_tf:.4f} msecs", fontsize=20)

        
        
        """
        OpenVINO Model Prediction - FP32
        Note: OpenVINO assumes the input (and output) are organized as channels first (NCHWD)
        whereas TensorFlow assumes channels last (NHWDC). We'll use the NumPy transpose
        to change the order.
        """
        start_time = time.time()
        request = compiled_model.create_infer_request()
        request.infer(inputs={input_layer_name.any_name: img})
        prediction_ov = request.get_output_tensor(output_layer_name.index).data
        inference_time_ov = 1000.0*(time.time()-start_time)
        prediction_ov = tf.round(prediction_ov)
        dice_coef_ov = dice_coef(msk,prediction_ov)

        plt.subplot(1,5,4)
        plt.imshow(prediction_ov[0,:,:,slicenum,0], cmap="gray")
        plt.title(f"OpenVINO Prediction\nFP32\nDice = {dice_coef_ov:.4f}\n\nInference time\n{inference_time_ov:.4f} msecs", fontsize=20)

        
        """
        OpenVINO Model Prediction - INT8
        Note: OpenVINO assumes the input (and output) are organized as channels first (NCHWD)
        whereas TensorFlow assumes channels last (NHWDC). We'll use the NumPy transpose
        to change the order.
        """
        start_time = time.time()
        request_int8 = compiled_model_int8.create_infer_request()
        request_int8.infer(inputs={input_layer_name_int8.any_name: img})
        prediction_ov_int8 = request_int8.get_output_tensor(output_layer_name_int8.index).data
        inference_time_ov_int8 = 1000.0*(time.time()-start_time)
        prediction_ov_int8 = tf.round(prediction_ov_int8)
        dice_coef_ov_int8 = dice_coef(msk,prediction_ov_int8)

        plt.subplot(1,5,5)
        plt.imshow(prediction_ov_int8[0,:,:,slicenum,0], cmap="gray")
        plt.title(f"OpenVINO Prediction\nINT8\nDice = {dice_coef_ov_int8:.4f}\n\nInference time\n{inference_time_ov_int8:.4f} msecs", fontsize=20)


        
        import pickle

        data = {'img': img, 
                'msk': msk, 
                'prediction_ov_int8': prediction_ov_int8, 
                'dice_coef_ov_int8': dice_coef_ov_int8,
                'inference_time_ov_int8': inference_time_ov_int8,
                'prediction_ov': prediction_ov, 
                'dice_coef_ov': dice_coef_ov,
                'inference_time_ov': inference_time_ov,
                'prediction_tf': prediction_tf, 
                'dice_coef_tf': dice_coef_tf,
                'inference_time_tf': inference_time_tf
               }

        # data = [img, msk, prediction_ov, dice_coef_ov, prediction_tf, dice_coef_tf]

        with open('/home/ubuntu/unet/data/prediction_results.pkl', 'wb') as outfile:
            pickle.dump(data, outfile, pickle.HIGHEST_PROTOCOL)

        # with open('mat.pkl', 'rb') as infile:
        #     result = pickle.load(infile)

In [None]:
%matplotlib inline
ds = brats_datafiles.get_train().take(1).as_numpy_iterator()
for img, msk in ds:
    plot_predictions(img,msk)