# Use LitePred to build your customed kernel latency predictor

### First step is to find similar device for your own device, this will enhance the accuracy of your predictor and profiling cost of your process 

You need to obtain the real kernel latency for representative configurations, we recommend using [nn-meter](https://github.com/microsoft/nn-Meter/tree/main) to test fusion rule and profile all the kernel data

In [1]:
import pandas as pd
import numpy as np
 
features = ['HW', 'CIN', 'COUT', 'KERNEL_SIZE', 'STRIDES','FLOPS','PARAMS','LATENCY']   # We use 'conv-bn-relu' kernel to show how to build predictor
df = pd.read_csv('../similar_conv_config_pixel5_tf27gpu.csv')      

In [2]:
data = df[features]#.to_numpy().astype(np.float32)
profile_result = data.values.tolist()
X = [result[:-1] for result in profile_result]
Y = [result[-1] for result in profile_result]

In [3]:
import torch
import torch.utils.data as Data
dataset = Data.TensorDataset(torch.Tensor(X), torch.Tensor(Y))

In [4]:
from detector import  Detector
pool_path = '/home/edge/chengquan/LitePred/predictors'
kernel_type='conv-bn-relu'
sm_detector = Detector(pool_path= pool_path,
                       dataset=dataset,
                       kernel_type=kernel_type)

In [5]:
similar_device = sm_detector.get_similar_device()

### Then you can leverage the profiled data and similar device to train your own predictor

In [6]:
import os
from model import NeuralNetwork
weight_path = os.path.join(pool_path,similar_device[0],f'{kernel_type}.pth')
weights = torch.load(weight_path)

In [10]:
features = ['HW', 'CIN', 'COUT', 'KERNEL_SIZE', 'STRIDES','FLOPS','PARAMS','LATENCY']
df_train = pd.read_csv('/home/edge/chengquan/LitePred/Data_conv-bn-relu.csv')
df_eval = pd.read_csv('/home/edge/chengquan/LitePred/Data_conv-bn-relu_test.csv')

data_train = df_train[features]
data_eval = df_eval[features]

X_train = [result[:-1] for result in data_train.values.tolist()]
Y_train = [result[-1] for result in data_train.values.tolist()]
train_dataset = Data.TensorDataset(torch.Tensor(X_train), torch.Tensor(Y_train))

X_eval = [result[:-1] for result in data_eval.values.tolist()]
Y_eval = [result[-1] for result in data_eval.values.tolist()]
eval_dataset = Data.TensorDataset(torch.Tensor(X_eval), torch.Tensor(Y_eval))

In [16]:
from trainer import Trainer
trainer = Trainer(train_dataset=train_dataset,
                  eval_dataset=eval_dataset,
                  kernel_type=kernel_type,
                  weights=weights,
                  epochs=350)
trainer.train()

successfully load similar device weights!


100%|██████████| 350/350 [21:07<00:00,  3.62s/it]

mlp: rmse: 1.1580; rmspe: 3.6057; error: 0.0953; 5% accuracy: 0.9187; 10% accuracy: 0.9668; 15% accuracy: 0.9911.
Test Error: 
 10% Accuracy: 0.9668, Avg loss: 0.018653 






In [18]:
trainer.save()

save model in f../predictors/conv-bn-relu.pth
