## Load Libraries

In [13]:
import json
from random import randint, sample
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, confusion_matrix

import treelite

from scipy.io import loadmat
import lightgbm as lgb


sns.set(style="ticks")
np.set_printoptions(suppress=True)

pd.set_option('display.max_rows', 150)

## Load Data

In [14]:
mat = loadmat("/data/data/spo2/raw/spo2_feature.mat")["f"]

X = mat[:, :-1]
y = mat[:, -1]

lgb_data = lgb.Dataset(X, y)

## Construct Model with All Data

In [15]:
def get_default_parameters():
    params = {
        'boosting_type': 'gbdt',
        'objective': 'multiclass',
        'num_class': 4,
        'metric': {'multi_error', 'multi_logloss'},
        'num_leaves': 31,
        'learning_rate': 0.05,
        'feature_fraction': 0.9,
        'bagging_fraction': 0.8,
        'bagging_freq': 5,
        'verbose': 0
    }
    return params

def run(lgb_train, lgb_eval, params, X_test=None, y_test=None):
    params["num_leaves"] = int(params["num_leaves"])

    gbm = lgb.train(params, lgb_train, num_boost_round=20,
                    valid_sets=lgb_eval, early_stopping_rounds=5)

    # predict
    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
    y_pred = [list(x).index(max(x)) for x in y_pred]

    # eval
    acc = accuracy_score(y_test, y_pred)
    print("The accuracy of prediction is: ", acc)
    return gbm

params = get_default_parameters()
received_params = {
    "num_leaves": 8,
    "learning_rate": 0.2,
    "bagging_fraction": 0.9636231730351655,
    "bagging_freq": 8
}
params.update(received_params)

gbm = run(lgb_data, lgb_data, params, X, y)

[1]	training's multi_logloss: 0.994843	training's multi_error: 0.315861
Training until validation scores don't improve for 5 rounds
[2]	training's multi_logloss: 0.920654	training's multi_error: 0.309986
[3]	training's multi_logloss: 0.865589	training's multi_error: 0.309233
[4]	training's multi_logloss: 0.822938	training's multi_error: 0.295677
[5]	training's multi_logloss: 0.789421	training's multi_error: 0.292815
[6]	training's multi_logloss: 0.76253	training's multi_error: 0.292665
[7]	training's multi_logloss: 0.742918	training's multi_error: 0.286188
[8]	training's multi_logloss: 0.724187	training's multi_error: 0.285435
[9]	training's multi_logloss: 0.709105	training's multi_error: 0.285435
[10]	training's multi_logloss: 0.697337	training's multi_error: 0.282573
[11]	training's multi_logloss: 0.684623	training's multi_error: 0.282121
[12]	training's multi_logloss: 0.673072	training's multi_error: 0.281669
[13]	training's multi_logloss: 0.662847	training's multi_error: 0.277
[14]

In [16]:
gbm.save_model("./model")

<lightgbm.basic.Booster at 0x7fbb340ae7f0>

In [18]:
# model = treelite.Model.from_xgboost(bst)
model = treelite.Model.load('./model', 'lightgbm')

model.export_srcpkg(platform='unix', toolchain='gcc', pkgpath='./mymodel.zip',
                    libname='mymodel.so', verbose=True, params={'quantize': 1})

[20:40:46] /workspace/src/frontend/lightgbm.cc:544: model.num_tree = 80
[20:40:46] /workspace/src/compiler/ast_native.cc:44: Using ASTNativeCompiler
[20:40:46] /workspace/src/compiler/ast/split.cc:24: Parallel compilation disabled; all member trees will be dumped to a single source file. This may increase compilation time and memory usage.
[20:40:46] /workspace/src/c_api/c_api.cc:286: Code generation finished. Writing code to files...
[20:40:46] /workspace/src/c_api/c_api.cc:291: Writing file recipe.json...
[20:40:46] /workspace/src/c_api/c_api.cc:291: Writing file header.h...
[20:40:46] /workspace/src/c_api/c_api.cc:291: Writing file main.c...


## Compare with C Implementation

### Consistency of Features

### Consistency of Probs