In [1]:
import numpy as np
import cv2
import onnx
import onnxruntime as ort
import numpy as np
from PIL import Image
from pathlib import Path
import vai_q_onnx

import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.datasets import CIFAR10

import onnx
import onnxruntime as ort
from onnxruntime.quantization import CalibrationDataReader, QuantType, QuantFormat, CalibrationMethod, quantize_static

import os, shutil
from time import perf_counter

In [2]:
def unpickle(file):
    import pickle
    with open(file,'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
    return dict

datafile = r'./data/cifar-10-batches-py/test_batch'
metafile = r'./data/cifar-10-batches-py/batches.meta'

data_batch_1 = unpickle(datafile) 
metadata = unpickle(metafile)

images = data_batch_1['data']
labels = data_batch_1['labels']
images = np.reshape(images,(10000, 3, 32, 32))

dirname = 'images'
if not os.path.exists(dirname):
   os.mkdir(dirname)

In [3]:
quant_dir = './models/quant'
models = os.listdir(quant_dir)

ipu_time = []
for model_name in models[21:]:
    quantized_model_path = quant_dir + '/' + model_name
    model = onnx.load(quantized_model_path.encode('unicode_escape'))

    providers = ['VitisAIExecutionProvider']
    cache_dir = 'cache'
    if os.path.exists(cache_dir):
        shutil.rmtree(cache_dir)
    cache_key = 'modelcachekey'
    provider_options = [{
                    'config_file': 'vaip_config.json',
                    'cacheDir': str(cache_dir),
                    'cacheKey': str(cache_key)
                }]

    session = ort.InferenceSession(model.SerializeToString(), providers=providers,
                                provider_options=provider_options)

    elapsed = 0
    correct = 0
    for i in range(10000): 
        im = images[i]
        image_array = np.array(im).astype(np.float32)
        image_array = image_array/255
        input_data = np.expand_dims(image_array, axis=0)

        ts = perf_counter()
        outputs = session.run(None, {'input': input_data})
        elapsed += perf_counter() - ts

        output_array = outputs[0]
        predicted_class = np.argmax(output_array)
        predicted_label = metadata['label_names'][predicted_class]
        label = metadata['label_names'][labels[i]]
        if predicted_class == labels[i]:
            correct += 1
    ipu_time.append(elapsed)
    print(f"Model: {model_name}, Accuracy: {correct/10000}, Time: {elapsed}")

Model: ResNet34_3463_4.U8S8.onnx, Accuracy: 0.0486, Time: 15.356808400000071
Model: ResNet34_3463_6.U8S8.onnx, Accuracy: 0.0486, Time: 15.544282599999843
Model: ResNet34_3463_8.U8S8.onnx, Accuracy: 0.0486, Time: 15.096815899999996


In [5]:
ipu_time = [8.269969299999346, 8.108792799999776, 8.287699399999454, 7.7339816000000114, 7.837330499999981, 7.697161800000178, 8.738617600000067, 8.903890100000169, 8.723434799999886, 9.487967600000081, 9.639269599999949, 9.316346400000263, 8.882118200000106, 9.143361500000019, 8.934766700000843, 9.942197599999995, 10.221836500000052, 9.977555300000006, 10.800581199999977, 10.82645130000012, 10.735090399999912, 15.356808400000071, 15.544282599999843, 15.096815899999996]
print("IPU Time: ", ipu_time)
ipu_time_np = np.array(ipu_time)
np.save("ipu_time.npy", ipu_time_np)

IPU Time:  [8.269969299999346, 8.108792799999776, 8.287699399999454, 7.7339816000000114, 7.837330499999981, 7.697161800000178, 8.738617600000067, 8.903890100000169, 8.723434799999886, 9.487967600000081, 9.639269599999949, 9.316346400000263, 8.882118200000106, 9.143361500000019, 8.934766700000843, 9.942197599999995, 10.221836500000052, 9.977555300000006, 10.800581199999977, 10.82645130000012, 10.735090399999912, 15.356808400000071, 15.544282599999843, 15.096815899999996]


In [6]:
cpu_time = np.load("cpu_time.npy")

In [10]:
# Plot cpu_time and ipu_time with matplotlib
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt

# CPU time and IPU time data
cpu_time = [1.8717529, 2.5674117, 3.5455245, 1.6064468, 2.2081593, 2.7521948, 2.0935401, 3.0900235, 4.1593794, 2.3619324, 3.2820707, 4.5173192, 2.2513987, 3.257321, 4.216188, 2.722746, 4.1889586, 5.6440594, 3.6292706, 5.2090907, 7.4969121, 6.3848804, 9.4051659, 13.6893274]
ipu_time = [8.269969299999346, 8.108792799999776, 8.287699399999454, 7.7339816000000114, 7.837330499999981, 7.697161800000178, 8.738617600000067, 8.903890100000169, 8.723434799999886, 9.487967600000081, 9.639269599999949, 9.316346400000263, 8.882118200000106, 9.143361500000019, 8.934766700000843, 9.942197599999995, 10.221836500000052, 9.977555300000006, 10.800581199999977, 10.82645130000012, 10.735090399999912, 15.356808400000071, 15.544282599999843, 15.096815899999996]
quant_dir = './models/quant'
import os 
models = os.listdir(quant_dir)
# cpu_time to csv
import csv
with open('cpu_time.csv', mode='w') as file:
    writer = csv.writer(file)
    writer.writerow(['Model', 'CPU Time', 'IPU Time'])
    for i in range(len(models)):
        writer.writerow([models[i], cpu_time[i], ipu_time[i]])
