In [None]:
import os
import sys
import platform
system_type = platform.system()
path = f'../{system_type.lower()}'
if system_type == 'Windows':
    path = path.replace('/', '\\')
sys.path.append(path)
sys.path.append("..")

In [None]:
import ctypes
calibrator = ctypes.CDLL("./calibrator.so")
calibrator_acc = ctypes.CDLL("./calibrator_acc.so")
evaluator = ctypes.CDLL("./evaluator.so")

In [24]:
#import libraries 
from optimizer import *
from calibrator import *
from evaluator import *
import pickle

import numpy as np
import onnx
import onnxruntime as rt


In [None]:
#load ONNX model 
onnx_model = onnx.load("handrecognition_model.onnx")


#optimize ONNX model 
optimized_model_path = optimize_fp_model("handrecognition_model.onnx")

In [None]:
#load calibration dataset
with open('X_cal.pkl', 'rb') as f:
    (test_images) = pickle.load(f)
with open('y_cal.pkl', 'rb') as f:
    (test_labels) = pickle.load(f)

calib_dataset = test_images[0:1800:20]
pickle_file_path = 'handrecognition_calib.pickle'

In [None]:
#calibration 
model_proto = onnx.load(optimized_model_path)
print('Generating the quantization table:')
calib = Calibrator('int16', 'per-tensor', 'minmax')


###for in8 conversion 
#calib = Calibrator('int8', 'per-channel', 'minmax') 


calib.set_providers(['CPUExecutionProvider'])
# Obtain the quantization parameter
calib.generate_quantization_table(model_proto, calib_dataset, 'handrecognition_calib.pickle')
# Generate the coefficient files for esp32s3
calib.export_coefficient_to_cpp(model_proto,  pickle_file_path, 'esp32s3', '.', 'handrecognition_coefficient', True)

In [25]:
    # Evaluate the performance
    print('Evaluating the performance on esp32s3:')
    eva = Evaluator('int16', 'per-tensor', 'esp32s3')
    eva.set_providers(['CPUExecutionProvider'])
    eva.generate_quantized_model(model_proto, pickle_file_path)

    output_names = [n.name for n in model_proto.graph.output]
    providers = ['CPUExecutionProvider']
    m = rt.InferenceSession(optimized_model_path, providers=providers)

    batch_size = 100
    batch_num = int(len(test_images) / batch_size)
    res = 0
    fp_res = 0
    input_name = m.get_inputs()[0].name
    for i in range(batch_num):
        # int8_model
        [outputs, _] = eva.evalute_quantized_model(test_images[i * batch_size:(i + 1) * batch_size], False)
        res = res + sum(np.argmax(outputs[0], axis=1) == test_labels[i * batch_size:(i + 1) * batch_size])

        # floating-point model
        fp_outputs = m.run(output_names, {input_name: test_images[i * batch_size:(i + 1) * batch_size].astype(np.float32)})
        fp_res = fp_res + sum(np.argmax(fp_outputs[0], axis=1) == test_labels[i * batch_size:(i + 1) * batch_size])

    print('accuracy of int16 model is: %f' % (res / len(test_images)))
    print('accuracy of fp32 model is: %f' % (fp_res / len(test_images)))

Evaluating the performance on esp32s3:
accuracy of int8 model is: 0.923148
accuracy of fp32 model is: 0.923148


In [40]:
import os
import sys
import platform
import ctypes
import pickle
import time
import numpy as np
import onnx
import onnxruntime as rt

# Determine the system type and set the path accordingly
system_type = platform.system()
path = f'../{system_type.lower()}'
if system_type == 'Windows':
    path = path.replace('/', '\\')
sys.path.append(path)
sys.path.append("..")

# Load the shared libraries
calibrator = ctypes.CDLL("./calibrator.so")
calibrator_acc = ctypes.CDLL("./calibrator_acc.so")
evaluator = ctypes.CDLL("./evaluator.so")

# Import custom modules
from optimizer import *
from calibrator import *
from evaluator import *

# Load ONNX model
onnx_model = onnx.load("handrecognition_model.onnx")

# Optimize ONNX model
optimized_model_path = optimize_fp_model("handrecognition_model.onnx")

# Load calibration dataset
with open('X_cal.pkl', 'rb') as f:
    calib_images = pickle.load(f)
with open('y_cal.pkl', 'rb') as f:
    calib_labels = pickle.load(f)

# Prepare calibration subset
calib_dataset = calib_images[0:1800:20]
pickle_file_path = 'handrecognition_calib.pickle'

# Calibration
model_proto = onnx.load(optimized_model_path)
print('Generating the quantization table:')
calib = Calibrator('int16', 'per-tensor', 'minmax')
calib.set_providers(['CPUExecutionProvider'])
calib.generate_quantization_table(model_proto, calib_dataset, 'handrecognition_calib.pickle')
calib.export_coefficient_to_cpp(model_proto, pickle_file_path, 'esp32s3', '.', 'handrecognition_coefficient', True)

# Load test dataset
with open('X_test.pkl', 'rb') as f:
    test_images = pickle.load(f)
with open('y_test.pkl', 'rb') as f:
    test_labels = pickle.load(f)

def evaluate_model_on_board(board_name, model_proto, pickle_file_path, test_images, test_labels, quantization_type):
    if board_name not in ['esp32s3', 'esp32', 'esp32c3']:
        print(f"Warning: Board {board_name} is not officially supported.")
    else:
        print(f'Evaluating the performance on {board_name} with {quantization_type} quantization:')
    try:
        eva = Evaluator(quantization_type, 'per-tensor', board_name)
        eva.set_providers(['CPUExecutionProvider'])
        eva.generate_quantized_model(model_proto, pickle_file_path)

        output_names = [n.name for n in model_proto.graph.output]
        providers = ['CPUExecutionProvider']
        m = rt.InferenceSession(optimized_model_path, providers=providers)

        batch_size = 100
        batch_num = int(len(test_images) / batch_size)
        res = 0
        fp_res = 0
        input_name = m.get_inputs()[0].name

        quantized_inference_times = []
        fp32_inference_times = []

        for i in range(batch_num):
            # Measure inference time for quantized model
            start_time = time.time()
            [outputs, _] = eva.evalute_quantized_model(test_images[i * batch_size:(i + 1) * batch_size], False)
            end_time = time.time()
            quantized_inference_times.append(end_time - start_time)
            res = res + sum(np.argmax(outputs[0], axis=1) == test_labels[i * batch_size:(i + 1) * batch_size])

            # Measure inference time for floating-point model
            start_time = time.time()
            fp_outputs = m.run(output_names, {input_name: test_images[i * batch_size:(i + 1) * batch_size].astype(np.float32)})
            end_time = time.time()
            fp32_inference_times.append(end_time - start_time)
            fp_res = fp_res + sum(np.argmax(fp_outputs[0], axis=1) == test_labels[i * batch_size:(i + 1) * batch_size])

        # Calculate average inference times per sample
        avg_quantized_inference_time = 1000000*((sum(quantized_inference_times) / len(quantized_inference_times)) / batch_size)
        avg_fp32_inference_time = 1000000*((sum(fp32_inference_times) / len(fp32_inference_times)) / batch_size)

        print(f'Accuracy of {quantization_type} model on {board_name} is: {res / len(test_images):.4f}')
        print(f'Accuracy of fp32 model on {board_name} is: {fp_res / len(test_images):.4f}')
        print(f'Average inference time per sample of {quantization_type} model on {board_name} is: {avg_quantized_inference_time:.6f} us')
        print(f'Average inference time per sample of fp32 model on {board_name} is: {avg_fp32_inference_time:.6f} us')

    except ValueError as e:
        print(f"Error evaluating on {board_name}: {e}")

if __name__ == "__main__":
    # Specify supported boards
    esp32_boards = ['esp32s3']
    quantization_types = ['int8', 'int16']

    for board in esp32_boards:
        for quant_type in quantization_types:
            evaluate_model_on_board(board, model_proto, pickle_file_path, test_images, test_labels, quant_type)


Generating the quantization table:
Converting coefficient to int16 per-tensor quantization for esp32s3
Exporting finish, the output files are: ./handrecognition_coefficient.cpp, ./handrecognition_coefficient.hpp

Quantized model info:
model input name: input, exponent: -7
Reshape layer name: sequential_1/conv2d_3/BiasAdd__38, output_exponent: -7
Conv layer name: sequential_1/conv2d_3/BiasAdd, output_exponent: -8
MaxPool layer name: sequential_1/max_pooling2d_3/MaxPool, output_exponent: -8
Conv layer name: sequential_1/conv2d_4/BiasAdd, output_exponent: -8
MaxPool layer name: sequential_1/max_pooling2d_4/MaxPool, output_exponent: -8
Conv layer name: sequential_1/conv2d_5/BiasAdd, output_exponent: -8
MaxPool layer name: sequential_1/max_pooling2d_5/MaxPool, output_exponent: -8
Transpose layer name: sequential_1/max_pooling2d_5/MaxPool__60, output_exponent: -8
Reshape layer name: sequential_1/flatten_1/Reshape, output_exponent: -8
Gemm layer name: fused_gemm_0, output_exponent: -8
Gemm la