# Use nn-Meter for different model format
In this notebook, we showed nn-Meter examples of latency prediction for different model formats of Tensorflow, PyTorch, ONNX.

In [1]:
# list all supporting latency predictors
import nn_meter
predictors = nn_meter.list_latency_predictors()
for p in predictors:
    print(f"[Predictor] {p['name']}: version={p['version']}")

[Predictor] cortexA76cpu_tflite21: version=1.0
[Predictor] adreno640gpu_tflite21: version=1.0
[Predictor] adreno630gpu_tflite21: version=1.0
[Predictor] myriadvpu_openvino2019r2: version=1.0


In [2]:
# define basic information
__test_models_folder__ = '../data'
os.makedirs(__test_models_folder__, exist_ok=True)

# specify basic predictor
predictor_name = 'adreno640gpu_tflite21' # user can change text here to test other predictors
predictor_version = 1.0

import warnings
warnings.filterwarnings('ignore')

# Use nn-Meter for Tensorflow pb File

In [3]:
import os
from glob import glob
import nn_meter

# download data and unzip
ppath = os.path.join(__test_models_folder__, "pb_models")
if not os.path.isdir(ppath):
    os.mkdir(ppath)
    url = "https://github.com/microsoft/nn-Meter/releases/download/v1.0-data/pb_models.zip"
    nn_meter.download_from_url(url, ppath)

test_model_list = glob(ppath + "/**.pb")

# load predictor
predictor = nn_meter.load_latency_predictor(predictor_name, predictor_version)

# predict latency
result = {}
for test_model in test_model_list:
    latency = predictor.predict(test_model, model_type="pb") # in unit of ms
    result[os.path.basename(test_model)] = latency
    print(f'[RESULT] predict latency for {test_model}: {latency} ms')
    

[RESULT] predict latency for ../data/pb_models/alexnet_0.pb: 13.124763483485053 ms
[RESULT] predict latency for ../data/pb_models/densenet_0.pb: 73.65728637938379 ms
[RESULT] predict latency for ../data/pb_models/googlenet_0.pb: 34.50815902636508 ms
[RESULT] predict latency for ../data/pb_models/mnasnet_0.pb: 13.72939336097471 ms
[RESULT] predict latency for ../data/pb_models/mobilenetv1_0.pb: 13.972147254154745 ms
[RESULT] predict latency for ../data/pb_models/mobilenetv2_0.pb: 10.15371207191722 ms
[RESULT] predict latency for ../data/pb_models/mobilenetv3large_0.pb: 9.989918007478076 ms
[RESULT] predict latency for ../data/pb_models/mobilenetv3small_0.pb: 4.489849402954042 ms
[RESULT] predict latency for ../data/pb_models/proxylessnas_0.pb: 12.509469696629518 ms
[RESULT] predict latency for ../data/pb_models/resnet18_0.pb: 39.32351677226427 ms
[RESULT] predict latency for ../data/pb_models/resnet34_0.pb: 74.8891391278198 ms
[RESULT] predict latency for ../data/pb_models/resnet50_0.pb

# Use nn-Meter for PyTorch model


In [18]:
import os
import torchvision.models as models
import nn_meter

torchvision_models = {
    "resnet18": models.resnet18(),
    "alexnet": models.alexnet(),
    "vgg16": models.vgg16(),
    "squeezenet": models.squeezenet1_0(),
    "densenet161": models.densenet161(),
    "inception_v3": models.inception_v3(),
    "googlenet": models.googlenet(),
    "shufflenet_v2": models.shufflenet_v2_x1_0(),
    "mobilenet_v2": models.mobilenet_v2(),
    "resnext50_32x4d": models.resnext50_32x4d(),
    "wide_resnet50_2": models.wide_resnet50_2(),
    "mnasnet": models.mnasnet1_0()
}

# load predictor
predictor = nn_meter.load_latency_predictor(predictor_name, predictor_version)

for model_name in torchvision_models:
    latency = predictor.predict(torchvision_models[model_name], model_type="torch", input_shape=(1, 3, 224, 224)) 
    print(f'[RESULT] predict latency for {model_name}: {latency} ms')

[RESULT] predict latency for resnet18: 39.32351677226426 ms
[RESULT] predict latency for alexnet: 13.126684104716283 ms
[RESULT] predict latency for vgg16: 219.2647723703139 ms
[RESULT] predict latency for squeezenet: 18.674223659837843 ms
[RESULT] predict latency for densenet161: 186.56037984132988 ms
[RESULT] predict latency for inception_v3: 127.98419924992326 ms
[RESULT] predict latency for googlenet: 32.758087458683384 ms
[RESULT] predict latency for shufflenet_v2: 5.423898780782251 ms
[RESULT] predict latency for mobilenet_v2: 9.920667346583885 ms
[RESULT] predict latency for resnext50_32x4d: 230.96098225315293 ms
[RESULT] predict latency for wide_resnet50_2: 230.96098225315293 ms
[RESULT] predict latency for mnasnet: 11.630591102084342 ms


# Use nn-Meter for ONNX File

In [21]:
import os
from glob import glob
import nn_meter

# download data and unzip
ppath = os.path.join(__test_models_folder__, "onnx_models")
if not os.path.isdir(ppath):
    os.mkdir(ppath)
    url = "https://github.com/microsoft/nn-Meter/releases/download/v1.0-data/onnx_models.zip"
    nn_meter.download_from_url(url, ppath)

test_model_list = glob(ppath + "/**.onnx")

# load predictor
predictor = nn_meter.load_latency_predictor(predictor_name, predictor_version)

# predict latency
result = {}
for test_model in test_model_list:
    latency = predictor.predict(test_model, model_type="onnx") # in unit of ms
    result[os.path.basename(test_model)] = latency
    print(f'[RESULT] predict latency for {os.path.basename(test_model)}: {latency} ms')

[RESULT] predict latency for alexnet_0.onnx: 13.12668410471628 ms
[RESULT] predict latency for densenet_0.onnx: 186.5603798413299 ms
[RESULT] predict latency for googlenet_0.onnx: 32.758087458683384 ms
[RESULT] predict latency for mnasnet_0.onnx: 11.63059110208434 ms
[RESULT] predict latency for mobilenetv2_0.onnx: 9.920667346583883 ms
[RESULT] predict latency for mobilenetv3large_0.onnx: 12.548914975618422 ms
[RESULT] predict latency for mobilenetv3small_0.onnx: 6.705541180860482 ms
[RESULT] predict latency for resnet18_0.onnx: 39.32351677226426 ms
[RESULT] predict latency for shufflenetv2_0.onnx: 5.423898780782251 ms
[RESULT] predict latency for squeezenet_0.onnx: 18.674223659837843 ms
[RESULT] predict latency for vgg16_0.onnx: 219.26477237031392 ms


# Use nn-Meter for nn-Meter IR Graph

In [23]:
import os
from glob import glob
import nn_meter

# download data and unzip
ppath = os.path.join(__test_models_folder__, "nnmeter_ir_graphs")
if not os.path.isdir(ppath):
    os.mkdir(ppath)
    url = "https://github.com/microsoft/nn-Meter/releases/download/v1.0-data/ir_graphs.zip"
    nn_meter.download_from_url(url, ppath)

test_model_list = glob(ppath + "/**.json")

# load predictor
predictor = nn_meter.load_latency_predictor(predictor_name, predictor_version)

# predict latency
result = {}
for test_model in test_model_list:
    latency = predictor.predict(test_model, model_type="nnmeter-ir") # in unit of ms
    result[os.path.basename(test_model)] = latency
    print(f'[RESULT] predict latency for {os.path.basename(test_model)}: {latency} ms')

[RESULT] predict latency for alexnet_0.json: 13.124763483485058 ms
[RESULT] predict latency for densenet_0.json: 73.65728637938379 ms
[RESULT] predict latency for googlenet_0.json: 34.508159026365064 ms
[RESULT] predict latency for mnasnet_0.json: 13.72939336097471 ms
[RESULT] predict latency for mobilenetv1_0.json: 13.972147254154745 ms
[RESULT] predict latency for mobilenetv2_0.json: 10.15371207191722 ms
[RESULT] predict latency for mobilenetv3large_0.json: 9.989918007478074 ms
[RESULT] predict latency for mobilenetv3small_0.json: 4.489849402954042 ms
[RESULT] predict latency for proxylessnas_0.json: 12.509469696629518 ms
[RESULT] predict latency for resnet18_0.json: 39.32351677226428 ms
[RESULT] predict latency for resnet34_0.json: 74.88913912781982 ms
[RESULT] predict latency for resnet50_0.json: 91.73126828870865 ms
[RESULT] predict latency for shufflenetv2_0.json: 5.423898780782249 ms
[RESULT] predict latency for squeezenet_0.json: 18.074222853615616 ms
[RESULT] predict latency f