In [15]:
import pandas as pd
import numpy as np
import os
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
import lightgbm as lgb

# Train iris data with lightgbm

In [16]:
iris_data = load_iris(True)
(X, y) = iris_data
df = pd.DataFrame(X)
features = ['x1', 'x2', 'x3', 'x4']
df.columns = features

In [17]:
# train lightgbm model
gbm = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=6, 
                          learning_rate=0.1, n_estimators=10, objective='multiclass')
# please add feature_name option here, in case of predict exception with pmml
gbm.fit(X, y, eval_metric='logloss', feature_name=features)

LGBMClassifier(boosting_type='gbdt', colsample_bytree=1.0, learning_rate=0.1,
        max_bin=255, max_depth=6, min_child_samples=20,
        min_child_weight=0.001, min_split_gain=0.0, n_estimators=10,
        n_jobs=-1, num_leaves=31, objective='multiclass',
        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
        subsample=1.0, subsample_for_bin=200000, subsample_freq=1)

In [18]:
# save model
gbm_model_output = "/tmp/pmml-models/light-gbm/classifier.txt"
if not os.path.exists(gbm_model_output):
    os.makedirs("/tmp/pmml-models/light-gbm/")
gbm.booster_.save_model(gbm_model_output)

# Export PMML File

In [19]:
import ppmml
pmml_output = "/tmp/pmml-models/light-gbm/classifier.pmml"
ppmml.to_pmml(gbm_model_output, pmml_output, model_type="lightgbm")

INFO: 17-12-28 16:24:38: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/light-gbm/classifier.txt to pmml file
INFO: 17-12-28 16:24:40: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/light-gbm/classifier.pmml


# Predict with PMML File

In [20]:
# Prepare test data
test_data_input = "/tmp/pmml-models/light-gbm/test.csv"
df.to_csv(test_data_input, header=True, index=False)

In [21]:
data_output = "/tmp/pmml-models/light-gbm/predict.csv"
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 16:24:40: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/light-gbm/classifier.pmml, data_input: /tmp/pmml-models/light-gbm/test.csv, data_output: /tmp/pmml-models/light-gbm/predict.csv
INFO: 17-12-28 16:24:42: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/light-gbm/predict.csv


In [22]:
df = pd.read_csv(data_output)
df.head(5)

Unnamed: 0,x1,x2,x3,x4,_target,probability(0),probability(1),probability(2)
0,5.1,3.5,1.4,0.2,0,0.718363,0.14068,0.140957
1,4.9,3.0,1.4,0.2,0,0.709745,0.141756,0.148499
2,4.7,3.2,1.3,0.2,0,0.709745,0.141756,0.148499
3,4.6,3.1,1.5,0.2,0,0.710448,0.140906,0.148646
4,5.0,3.6,1.4,0.2,0,0.725032,0.137349,0.137619
