In [1]:
import shutil
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.externals import joblib
import ppmml

# Load data

In [2]:
iris_data = load_iris(True)
(X, y) = iris_data
df = pd.DataFrame(X)
df.columns = ['x1', 'x2', 'x3', 'x4']
df.head()

Unnamed: 0,x1,x2,x3,x4
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [3]:
base_path = "/tmp/pmml-models/sklearn/"
# Prepare test data
test_data_input = base_path + "test.csv"
df.to_csv(test_data_input, header=True, index=False)

# KMeans Example

In [4]:
# output kmeans model
kmeans = KMeans(n_clusters=3, max_iter=100, random_state=42)
%time kmeans.fit(X)
kmeans_model_path = base_path + "kmeans.pkl.z"
joblib.dump(kmeans, kmeans_model_path, compress = 9)
# call ppmml to convert to pmml file
kmeans_pmml_output = base_path + "kmeans.pmml"

CPU times: user 30.6 ms, sys: 1.26 ms, total: 31.9 ms
Wall time: 45.5 ms


## ppmml.to_pmml

In [5]:
ppmml.to_pmml(model_input=kmeans_model_path, pmml_output=kmeans_pmml_output)

INFO: 17-12-28 15:05:36: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/sklearn/kmeans.pkl.z to pmml file
INFO: 17-12-28 15:05:39: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/sklearn/kmeans.pmml


## Evaluate the pmml file

### ppmml.predict

In [6]:
# make prediction with pmml file
data_output = base_path + "kmeans_predict.csv"
ppmml.predict(kmeans_pmml_output, test_data_input, data_output)

INFO: 17-12-28 15:05:39: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/sklearn/kmeans.pmml, data_input: /tmp/pmml-models/sklearn/test.csv, data_output: /tmp/pmml-models/sklearn/kmeans_predict.csv
INFO: 17-12-28 15:05:41: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/sklearn/kmeans_predict.csv


# Decision Tree Example

In [7]:
# output decision tree model
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier(max_depth=6)
%time tree_clf.fit(X, y)
model_output = base_path + "decision_tree.pkl.z"
pmml_output = base_path + "decision_tree.pmml"
joblib.dump(tree_clf, model_output, compress = 9)
ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output)

CPU times: user 2 ms, sys: 1.69 ms, total: 3.7 ms
Wall time: 2.8 ms
INFO: 17-12-28 15:05:41: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/sklearn/decision_tree.pkl.z to pmml file
INFO: 17-12-28 15:05:43: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/sklearn/decision_tree.pmml


In [8]:
# make prediction with pmml file
data_output = base_path + "decision_tree_predict.csv"
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 15:05:43: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/sklearn/decision_tree.pmml, data_input: /tmp/pmml-models/sklearn/test.csv, data_output: /tmp/pmml-models/sklearn/decision_tree_predict.csv
INFO: 17-12-28 15:05:44: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/sklearn/decision_tree_predict.csv


# Random Forest Example

In [9]:
# output random forest model
from sklearn.ensemble import RandomForestClassifier
tree_clf = RandomForestClassifier(max_depth=6)
%time tree_clf.fit(X, y)
model_output = base_path + "random_forest.pkl.z"
pmml_output = base_path + "random_forest.pmml"
joblib.dump(tree_clf, model_output, compress = 9)
ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output)

CPU times: user 44 ms, sys: 3.93 ms, total: 47.9 ms
Wall time: 46.2 ms
INFO: 17-12-28 15:05:44: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/sklearn/random_forest.pkl.z to pmml file
INFO: 17-12-28 15:05:46: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/sklearn/random_forest.pmml


In [10]:
# make prediction with pmml file
data_output = base_path + "random_forest_predict.csv"
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 15:05:46: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/sklearn/random_forest.pmml, data_input: /tmp/pmml-models/sklearn/test.csv, data_output: /tmp/pmml-models/sklearn/random_forest_predict.csv
INFO: 17-12-28 15:05:48: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/sklearn/random_forest_predict.csv


# Logistic Regression Example

In [11]:
# output lr model
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(tol=1e-5)
%time lr.fit(X, y)
model_output = base_path + "logistic_regression.pkl.z"
pmml_output = base_path + "logistic_regression.pmml"
joblib.dump(tree_clf, model_output, compress = 9)
ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output)

CPU times: user 88.8 ms, sys: 26.3 ms, total: 115 ms
Wall time: 172 ms
INFO: 17-12-28 15:05:49: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/sklearn/logistic_regression.pkl.z to pmml file
INFO: 17-12-28 15:05:51: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/sklearn/logistic_regression.pmml


In [12]:
# make prediction with pmml file
data_output = base_path + "logistic_regression_predict.csv"
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 15:05:51: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/sklearn/logistic_regression.pmml, data_input: /tmp/pmml-models/sklearn/test.csv, data_output: /tmp/pmml-models/sklearn/logistic_regression_predict.csv
INFO: 17-12-28 15:05:52: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/sklearn/logistic_regression_predict.csv


# Neural Network Example

In [13]:
# output NN model
from sklearn.neural_network import MLPClassifier
nn = MLPClassifier()
%time nn.fit(X, y)
model_output = base_path + "neural_network.pkl.z"
pmml_output = base_path + "neural_network.pmml"
joblib.dump(tree_clf, model_output, compress = 9)
ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output)

CPU times: user 161 ms, sys: 18.3 ms, total: 179 ms
Wall time: 178 ms
INFO: 17-12-28 15:05:53: base_converter.py:89 * 140735235661824 Starting to convert model file /tmp/pmml-models/sklearn/neural_network.pkl.z to pmml file




INFO: 17-12-28 15:05:54: base_converter.py:96 * 140735235661824 Successfully generate pmml file: /tmp/pmml-models/sklearn/neural_network.pmml


In [14]:
# make prediction with pmml file
data_output = base_path + "neural_network_predict.csv"
ppmml.predict(pmml_output, test_data_input, data_output)

INFO: 17-12-28 15:05:54: evaluator.py:62 * 140735235661824 Starting to make predictions of pmml file: /tmp/pmml-models/sklearn/neural_network.pmml, data_input: /tmp/pmml-models/sklearn/test.csv, data_output: /tmp/pmml-models/sklearn/neural_network_predict.csv
INFO: 17-12-28 15:05:56: evaluator.py:80 * 140735235661824 Successfully generate predictions to path: /tmp/pmml-models/sklearn/neural_network_predict.csv
