In [25]:
import os
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import xgboost

# Train iris data with xgboost

In [16]:
iris_data = load_iris(True)
(X, y) = iris_data
df = pd.DataFrame(X)
df.columns = ['x1', 'x2', 'x3', 'x4']

In [17]:
def gen_fmap_file(features, fout):
    with open(fout, 'w+') as f:
        for i, feature in enumerate(features):
            f.write("{0}\t{1}\tq\n".format(i, feature))
fmap_file = "/tmp/pmml-models/xgboost/classifier.fmap"
gen_fmap_file(['x1', 'x2', 'x3', 'x4'], fmap_file)

In [18]:
# output xgboost model
classifier = xgboost.XGBClassifier(max_depth=6, learning_rate=0.1,
                                   n_estimators=10,
                                   silent=True, objective='"multi:softmax"')
%time classifier.fit(X, y)

CPU times: user 13.1 ms, sys: 16.1 ms, total: 29.2 ms
Wall time: 32.1 ms


XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=6,
       min_child_weight=1, missing=None, n_estimators=10, nthread=-1,
       objective='multi:softprob', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [26]:
# save xgboost model
xgboost_model_path = "/tmp/pmml-models/xgboost/classifier.model"
if not os.path.exists("/tmp/pmml-models/xgboost"):
    os.makedirs("/tmp/pmml-models/xgboost")
classifier._Booster.save_model(xgboost_model_path)

# Export Model as PMML File

In [20]:
import ppmml

In [21]:
pmml_output = "/tmp/pmml-models/xgboost/classifier.pmml"
ppmml.to_pmml(model_input=xgboost_model_path,
              schema_input=fmap_file,
              pmml_output=pmml_output, model_type="xgboost")

INFO: 17-12-28 16:10:20: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/xgboost/classifier.model to pmml file
INFO: 17-12-28 16:10:22: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/xgboost/classifier.pmml


# Predict with pmml file, a simple predict function

In [22]:
# Prepare test data
test_data_input = "/tmp/pmml-models/xgboost/test.csv"
df.to_csv(test_data_input, header=True, index=False)

In [23]:
predict_output = "/tmp/pmml-models/xgboost/predict.csv"
ppmml.predict(pmml_input=pmml_output,
              data_input=test_data_input, data_output=predict_output, options={"separator": ","})

INFO: 17-12-28 16:10:22: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/xgboost/classifier.pmml, data_input: /tmp/pmml-models/xgboost/test.csv, data_output: /tmp/pmml-models/xgboost/predict.csv
INFO: 17-12-28 16:10:24: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/xgboost/predict.csv
