# Examples for how to use our predictor to predict model latency


For a given model , the first step is to identify and extract all the fused operators (kernels) within the model. We highly recommend using [nn-meter](https://github.com/microsoft/nn-Meter/tree/main) for detecting kernel fusion rules. Once you've detected and identified these kernels, you can proceed to collect their names and configurations." 

Here, we simply write a function to gather the kernels within the model.

In [5]:
from get_tf_kernel import get_kernel
model_path = '/home/edge/chengquan/test_model.tflite'
config = get_kernel(model_path)
config

{'conv-bn-relu': [[192, 3, 16, 3, 2, 4.128768, 0.000448], [96, 16, 16, 1, 1, 2.506752, 0.000272], [96, 16, 48, 1, 1, 7.520256, 0.000816], [48, 48, 24, 1, 1, 2.709504, 0.001176], [48, 24, 72, 1, 1, 4.1472, 0.0018], [48, 72, 24, 1, 1, 4.036608, 0.001752], [48, 24, 144, 1, 1, 8.2944, 0.0036], [24, 144, 40, 1, 1, 3.3408, 0.0058], [24, 40, 160, 1, 1, 3.77856, 0.00656], [24, 160, 40, 1, 1, 3.70944, 0.00644], [24, 40, 120, 1, 1, 2.83392, 0.00492], [12, 120, 80, 1, 1, 1.39392, 0.00968], [12, 80, 480, 1, 1, 5.59872, 0.03888], [12, 480, 80, 1, 1, 5.54112, 0.03848], [12, 80, 480, 1, 1, 5.59872, 0.03888], [6, 480, 112, 1, 1, 1.939392, 0.053872], [6, 112, 672, 1, 1, 2.733696, 0.075936], [6, 672, 112, 1, 1, 2.713536, 0.075376], [6, 112, 672, 1, 1, 2.733696, 0.075936], [3, 672, 160, 1, 1, 0.96912, 0.10768], [3, 160, 960, 1, 1, 1.39104, 0.15456], [3, 960, 160, 1, 1, 1.38384, 0.15376], [3, 160, 960, 1, 1, 1.39104, 0.15456], [1, 960, 1280, 1, 1, 1.23008, 1.23008]], 'hswish': [[96, 16], [24, 120], [12, 1

{'conv-bn-relu': [[192, 3, 16, 3, 2, 4.128768, 0.000448],
  [96, 16, 16, 1, 1, 2.506752, 0.000272],
  [96, 16, 48, 1, 1, 7.520256, 0.000816],
  [48, 48, 24, 1, 1, 2.709504, 0.001176],
  [48, 24, 72, 1, 1, 4.1472, 0.0018],
  [48, 72, 24, 1, 1, 4.036608, 0.001752],
  [48, 24, 144, 1, 1, 8.2944, 0.0036],
  [24, 144, 40, 1, 1, 3.3408, 0.0058],
  [24, 40, 160, 1, 1, 3.77856, 0.00656],
  [24, 160, 40, 1, 1, 3.70944, 0.00644],
  [24, 40, 120, 1, 1, 2.83392, 0.00492],
  [12, 120, 80, 1, 1, 1.39392, 0.00968],
  [12, 80, 480, 1, 1, 5.59872, 0.03888],
  [12, 480, 80, 1, 1, 5.54112, 0.03848],
  [12, 80, 480, 1, 1, 5.59872, 0.03888],
  [6, 480, 112, 1, 1, 1.939392, 0.053872],
  [6, 112, 672, 1, 1, 2.733696, 0.075936],
  [6, 672, 112, 1, 1, 2.713536, 0.075376],
  [6, 112, 672, 1, 1, 2.733696, 0.075936],
  [3, 672, 160, 1, 1, 0.96912, 0.10768],
  [3, 160, 960, 1, 1, 1.39104, 0.15456],
  [3, 960, 160, 1, 1, 1.38384, 0.15376],
  [3, 160, 960, 1, 1, 1.39104, 0.15456],
  [1, 960, 1280, 1, 1, 1.23008, 1.2

In [2]:
import os
import sys
sys.path.append('..')
from predictor_builder.model import NeuralNetwork
import torch
def get_pred_latency(op_config,predictor_path):
    latency_sum = 0
    for kernel in op_config:
        if op_config[kernel] == []:
            continue
        configs = op_config[kernel]
        weight_path = os.path.join(predictor_path,kernel+".pth")
        model = NeuralNetwork(input_features=len(configs[0]))
        model.load_state_dict(torch.load(weight_path,map_location=torch.device('cpu'))) 
        model.eval()
        py = 0
        pys = 0
        for config in configs:
            X = torch.Tensor(config)
            py = model(X).cpu().detach().numpy()
            pys = pys + py
        latency_sum = latency_sum + pys
    return latency_sum

  from .autonotebook import tqdm as notebook_tqdm


if you need our predictors , you can access them [here](https://huggingface.co/fcq/pred_lite/tree/main)

In [4]:
predictor_path = '/path/to/predictors/'
pred_latency = get_pred_latency(config,predictor_path) 
pred_latency

15.251722492277622