# 5. 加速推理

Treelite 是一个决策树推理工具，它支持对 XGBoost, LightGBM, sklearn 格式的模型加速。

在使用 CPU 推理时，它支持将模型编译成 .so 文件，在指令层面对决策树的分支进行分支预测等优化，加快推理速度。

In [1]:
# !pip install treelite
# !pip install tl2cgen

In [2]:
DIRECTORY = './data'
TEST_FILE = 'adult/adult.test'
MODEL_FILE = 'model_best.txt'
KKV_FILE = 'label_encoder_deploy.json'
SO_FILE = 'predictor.so'
COLS = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
        'hours-per-week', 'native-country', 'income']
LABEL_COL = 'income'

In [3]:
# coding: utf-8
import time
import numpy as np
import treelite
import tl2cgen
import lightgbm as lgb

import util

## 一、导入模型和数据

导入 LightGBM 模型

In [4]:
model_path = util.gen_abspath(DIRECTORY, MODEL_FILE)

# 用 LightGBM 从模型文件加载模型
bst = lgb.Booster(model_file=model_path)

# 用 Treelite 从模型文件加载模型
model = treelite.frontend.load_lightgbm_model(model_path)

In [5]:
# 获取 .so 文件路径
so_path = util.gen_abspath(DIRECTORY, SO_FILE)

# 编译 .so 文件
tl2cgen.export_lib(model, toolchain="gcc", libpath=so_path)

[23:29:58] /private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/tmpfk_zrtwk/libbuild/_deps/treelite-src/src/serializer.cc:202: The model you are loading originated from a newer Treelite version; some functionalities may be unavailable.
Currently running Treelite version 4.1.2
The model checkpoint was generated from Treelite version 4.2.1


[23:29:58] /Users/runner/work/tl2cgen/tl2cgen/src/compiler/ast/split.cc:30: Parallel compilation disabled; all member trees will be dumped to a single source file. This may increase compilation time and memory usage.


In [6]:
# 加载 .so 文件
predictor = tl2cgen.Predictor(so_path)
predictor

<tl2cgen.predictor.Predictor at 0x7fbae56fe8f0>

导入样本数据

In [7]:
file_path = util.gen_abspath(DIRECTORY, TEST_FILE)
df = util.read_csv(file_path, sep=',', header=None)
df.columns=COLS

df

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K.
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K.
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K.
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K.
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16276,39,Private,215419,Bachelors,13,Divorced,Prof-specialty,Not-in-family,White,Female,0,0,36,United-States,<=50K.
16277,64,?,321403,HS-grad,9,Widowed,?,Other-relative,Black,Male,0,0,40,United-States,<=50K.
16278,38,Private,374983,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,50,United-States,<=50K.
16279,44,Private,83891,Bachelors,13,Divorced,Adm-clerical,Own-child,Asian-Pac-Islander,Male,5455,0,40,United-States,<=50K.


In [8]:
# 转成特征与标号
X = df.drop(LABEL_COL, axis=1)  # features
y = df[LABEL_COL].apply(lambda e: 0 if e == ' <=50K.' else 1)  # label

# 处理类别特征
kkv_path = util.gen_abspath(DIRECTORY, KKV_FILE)
X, _ = util.load_label_encoder(X, kkv_path)

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/16281 [00:00<?, ?it/s]

## 二、使用 Treelite 加速推理

In [9]:
# 数据准备
X_arr = np.array(X)
X_arr_float64 = np.array(X.astype('float64'))

### 2.1 推理速度

编写一个装饰器，用于计算函数运行时间

In [10]:
tdict = dict()

def timer(func):
    """用于统计运行时间的装饰器"""
    def f(*args, **kwargs):
        before = time.time()
        rv = func(*args, **kwargs)
        after = time.time()
        print(f'time taken: {after - before:.5f}', )
        tdict[func.__name__] = after - before
        return rv
    return f

In [11]:
@timer
def lightgbm_inference(X):
    """LightGBM 推理"""
    return bst.predict(X)

r1 = lightgbm_inference(X)

time taken: 0.07800


In [12]:
@timer
def treelite_inference(X_arr):
    """Treelite 推理"""
    return treelite.gtil.predict(model, data=X_arr).flatten()

r2 = treelite_inference(X_arr)

time taken: 0.39537


In [13]:
@timer
def treelite_inference_so(X_arr_float64):
    """Treelite 编译后推理"""
    dmat = tl2cgen.DMatrix(X_arr_float64)
    return predictor.predict(dmat).flatten()

r3 = treelite_inference_so(X_arr_float64)

time taken: 0.03864


In [14]:
print("结论：")
print(f"- Treelite 编译前推理耗时是 LightGBM 推理耗时的：{tdict['treelite_inference'] / tdict['lightgbm_inference']:.2f} 倍")
print(f"- Treelite 编译后推理耗时是 LightGBM 推理耗时的：{tdict['treelite_inference_so'] / tdict['lightgbm_inference']:.2f} 倍")

结论：
- Treelite 编译前推理耗时是 LightGBM 推理耗时的：5.07 倍
- Treelite 编译后推理耗时是 LightGBM 推理耗时的：0.50 倍


### 2.2 推理准度

检查三次输出的结果是否相同

In [15]:
r1

array([0.00624845, 0.39836881, 0.65383252, ..., 0.92038485, 0.38058301,
       0.91099488])

In [16]:
r2

array([0.00624845, 0.39836881, 0.65383252, ..., 0.92038485, 0.38058301,
       0.91099488])

In [17]:
r3

array([0.00624845, 0.39836881, 0.65383252, ..., 0.92038485, 0.38058301,
       0.91099488])

In [18]:
print('r1 = r2?')
all(r1 == r2)

r1 = r2?


True

In [19]:
print('r1 = r3?')
all(r1 == r3)

r1 = r3?


True

### 2.3 通用函数

为方便复用代码，编写两个通用函数，分别用于：

 - 生成 .so 文件
 - 用 .so 文件推理

In [20]:
def gen_so_file(lgb_model_path, so_model_path):
    """生成 .so 文件，该文件可用于模型推理
    
    args:
    lgb_model_path : str
        LightGBM 模型 .txt 文件的路径
    so_model_path : str
        生成的 .so 文件的保存路径
    """
    model = treelite.frontend.load_lightgbm_model(lgb_model_path)
    tl2cgen.export_lib(model, toolchain="gcc", libpath=so_model_path)

def predict(X, so_model_path):
    """用 .so 文件进行模型推理

    args:
    X : np.array (2D)
        模型特征
    so_model_path : str
        .so 文件路径

    return:
    np.array (1D)
    """
    predictor = tl2cgen.Predictor(so_model_path)

    X_arr_float64 = np.array(X.astype('float64'))
    dmat = tl2cgen.DMatrix(X_arr_float64)
    return predictor.predict(dmat).flatten()