Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prroy/knnc #169

Merged
merged 32 commits into from Nov 14, 2018
Merged
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
461be7e
Initial changes
Aug 28, 2018
608db06
Merged PCA with SVD and added KNN shape calculators in Linear
Aug 30, 2018
d88db00
Merged KNN classifier and regressor files
Sep 3, 2018
0dc6855
Added Naive Bayes converters
Sep 4, 2018
809d7d5
Addressed PR comments about name changes, reverting to previous names
Sep 4, 2018
7de7729
Removing extra import from init
Sep 4, 2018
b4fbaea
Added LassoLars and Ridge converters
Sep 6, 2018
7e48202
Fixed issues with running test on NB models
Sep 10, 2018
8b27764
Added comments
Sep 12, 2018
4af0ef3
Merged LeastSquares with LinearRegressor, added computational graph t…
Sep 17, 2018
879cbc3
Fixed bug in KNN Classifier
Sep 17, 2018
5635546
Added calculation of probability to NB converter
Sep 20, 2018
61d081c
Added unit tests
Sep 21, 2018
50a929c
Addressed review comments
Sep 24, 2018
9c6ffb0
Removed duplicate function
Sep 24, 2018
c9afa36
Added comment to clarify two output paths
Sep 25, 2018
fb8ba5d
Removed non-ascii character
Sep 25, 2018
29c5d48
Fixed topk calculation error in KNNR
Oct 1, 2018
3b6cb85
Fixed additional cases in PCA
Oct 1, 2018
73b5ad6
Fixed cntk and scikit versions
Oct 3, 2018
6020782
Resolved merge conflicts
Oct 26, 2018
0c9199b
Merge remote-tracking branch 'upstream/master'
Oct 30, 2018
e91853d
Added KNN Classifier converter
Nov 7, 2018
74555a5
Merge remote-tracking branch 'upstream/master'
Nov 7, 2018
e7eb5f2
Fixed merge conflicts
Nov 7, 2018
c26d5e6
Added KNN classifier tests
Nov 8, 2018
b1cd82a
Fixed some NB issues
Nov 8, 2018
819b34d
Updated computation graph and added runtime check in unit test
Nov 9, 2018
944a4a6
Updated computation graphs and KNN Classifier unit test
Nov 9, 2018
dae7687
changed np to numpy
Nov 9, 2018
05c9cb8
Addressed review comments
Nov 13, 2018
fab0885
Addressed review comments
Nov 13, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion onnxmltools/convert/sklearn/_parse.py
Expand Up @@ -37,6 +37,7 @@
from sklearn.svm import SVC, SVR, NuSVC, NuSVR

# K-nearest neighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor

# Naive Bayes
Expand All @@ -63,7 +64,7 @@
# one output for everything not in the list.
sklearn_classifier_list = [LogisticRegression, SGDClassifier, LinearSVC, SVC, NuSVC,
GradientBoostingClassifier, RandomForestClassifier, DecisionTreeClassifier,
ExtraTreesClassifier, BernoulliNB, MultinomialNB]
ExtraTreesClassifier, BernoulliNB, MultinomialNB, KNeighborsClassifier]

# Associate scikit-learn types with our operator names. If two scikit-learn models share a single name, it means their
# are equivalent in terms of conversion.
Expand Down Expand Up @@ -95,6 +96,7 @@
ExtraTreesRegressor: 'SklearnExtraTreesRegressor',
GradientBoostingClassifier: 'SklearnGradientBoostingClassifier',
GradientBoostingRegressor: 'SklearnGradientBoostingRegressor',
KNeighborsClassifier: 'SklearnKNeighborsClassifier',
KNeighborsRegressor: 'SklearnKNeighborsRegressor',
MultinomialNB: 'SklearnMultinomialNB',
BernoulliNB: 'SklearnBernoulliNB',
Expand Down
192 changes: 150 additions & 42 deletions onnxmltools/convert/sklearn/operator_converters/KNN.py
Expand Up @@ -5,10 +5,18 @@
# --------------------------------------------------------------------------

from ....proto import onnx_proto
from ...common._apply_operation import apply_abs, apply_mul, apply_reshape, apply_sub
from ...common._apply_operation import apply_abs, apply_mul, apply_reshape, apply_sub, apply_cast
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
from ...common._registration import register_converter
from .OneHotEncoder import convert_sklearn_one_hot_encoder
from .._parse import sklearn_operator_name_map
import numpy as np
from sklearn.preprocessing import OneHotEncoder
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved

class Oper:
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, model, inputs, op_type):
self.raw_operator = model
self.inputs = inputs
self.type = op_type

def convert_sklearn_knn(scope, operator, container):
# Computational graph:
Expand All @@ -24,6 +32,7 @@ def convert_sklearn_knn(scope, operator, container):
# C: Number of classes
# input: test set input
# output: test set output
# output_prob: test set class probabilities
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
#
# Graph:
#
Expand All @@ -50,45 +59,55 @@ def convert_sklearn_knn(scope, operator, container):
# V
# ARRAYFEATUREEXTRACTOR <- training_labels[M]
# |
# V (KNN Regressor)
# topk_labels[K] ------------------> REDUCEMEAN --> output[1]
# |
# /|\
# / | \(KNN Classifier)
# / | \
# / | \
# / | \__
# | | |
# V V V
# label0 -> EQUAL EQUAL ... EQUAL <- label(C-1)
# | | |
# V V V
# output_label_0[C] ... output_label_(C-1)[C]
# | | |
# V V V
# CAST CAST ... CAST
# | | |
# V V V
# output_cast_label_0[C] ... output_cast_label_(C-1)[C]
# | | |
# V V V
# REDUCESUM REDUCESUM ... REDUCESUM
# | | |
# V V V
# output_label_reduced_0[1] ... output_label_reduced_(C-1)[1]
# \ | /
# \____ | ____/
# \ | ___/
# \ | /
# \|/
# V
# CONCAT --> concat_labels[C]
# |
# V
# ARGMAX --> predicted_label[1]
# |
# V
# output[1] <--- ARRAYFEATUREEXTRACTOR <- classes[C]
# V (KNN Regressor)
# topk_labels[K] ----------------------------> REDUCEMEAN --> output[1]
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
# | |___________________________________________
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
# /|\ (probability calculation) |
# / | \(KNN Classifier) |
# / | \ V
# / | \ pred_label_shape[2] ----> RESHAPE
# / | \__ |
# | | | V
# V V V reshaped_pred_label[K, 1]
# label0 -> EQUAL EQUAL ... EQUAL <- label(C-1) |
# | | | |
# V V V |
# output_label_0[C] ... output_label_(C-1)[C] |
# | | | V
# V V V CAST
# CAST CAST ... CAST |
# | | | V
# V V V cast_pred_labels[K, 1]
# output_cast_label_0[C] ... output_cast_label_(C-1)[C] |
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
# | | | |
# V V V |
# REDUCESUM REDUCESUM ... REDUCESUM |
# | | | |
# V V V |
# output_label_reduced_0[1] ... output_label_reduced_(C-1)[1] |
# \ | / |
# \____ | ____/ |
# \ | ___/ |
# \ | / |
# \|/ |
# V |
# CONCAT --> concat_labels[C] |
# | |
# V |
# ARGMAX --> predicted_label[1] |
# | |
# V |
# output[1] <--- ARRAYFEATUREEXTRACTOR <- classes[C] |
# |
# |
# |
# ohe_model --> ONEHOTENCODER <-------------------------------------------|
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
# |
# V
# ohe_result[n_neighbors, C] -> REDUCEMEAN -> reduced_prob[1, C]
# |
# V
# output_probability[1, C] <- ZipMap

knn = operator.raw_operator
training_examples = knn._fit_X
Expand Down Expand Up @@ -133,12 +152,101 @@ def convert_sklearn_knn(scope, operator, container):
[topk_values_name, topk_indices_name], name=scope.get_unique_operator_name('TopK'), k=knn.n_neighbors)

if operator.type == 'SklearnKNeighborsClassifier':
raise NotImplementedError
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
classes = knn.classes_
concat_labels_name = scope.get_unique_variable_name('concat_labels')
classes_name = scope.get_unique_variable_name('classes')
predicted_label_name = scope.get_unique_variable_name('predicted_label')
final_label_name = scope.get_unique_variable_name('final_label')
reshaped_final_label_name = scope.get_unique_variable_name('reshaped_final_label')

class_type = onnx_proto.TensorProto.STRING
labels_name = [None] * len(classes)
output_label_name = [None] * len(classes)
output_cast_label_name = [None] * len(classes)
output_label_reduced_name = [None] * len(classes)
zipmap_attrs = {'name': scope.get_unique_operator_name('ZipMap')}

if np.issubdtype(knn.classes_.dtype, np.floating):
class_type = onnx_proto.TensorProto.INT32
classes = np.array(list(map(lambda x: int(x), classes)))
zipmap_attrs['classlabels_int64s'] = classes
elif np.issubdtype(knn.classes_.dtype, np.signedinteger):
class_type = onnx_proto.TensorProto.INT32
zipmap_attrs['classlabels_int64s'] = classes
else:
zipmap_attrs['classlabels_strings'] = classes
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
classes = np.array([s.encode('utf-8') for s in classes])

for i in range(len(classes)):
labels_name[i] = scope.get_unique_variable_name('class_labels_{}'.format(i))
container.add_initializer(labels_name[i], onnx_proto.TensorProto.INT32,
[], [i])
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved
output_label_name[i] = scope.get_unique_variable_name('output_label_{}'.format(i))
output_cast_label_name[i] = scope.get_unique_variable_name('output_cast_label_{}'.format(i))
output_label_reduced_name[i] = scope.get_unique_variable_name('output_label_reduced_{}'.format(i))

container.add_initializer(classes_name, class_type,
classes.shape, classes)
container.add_initializer(training_labels_name, onnx_proto.TensorProto.INT32,
training_labels.shape, training_labels)

container.add_node('ArrayFeatureExtractor', [training_labels_name, topk_indices_name], topk_labels_name,
name=scope.get_unique_operator_name('ArrayFeatureExtractor'), op_domain='ai.onnx.ml')
for i in range(len(classes)):
container.add_node('Equal', [labels_name[i], topk_labels_name],
output_label_name[i])
# Casting to Int32 instead of Int64 as ReduceSum doesn't seem to support Int64
apply_cast(scope, output_label_name[i], output_cast_label_name[i], container,
to=onnx_proto.TensorProto.INT32)
container.add_node('ReduceSum', output_cast_label_name[i],
output_label_reduced_name[i], axes=[1])

container.add_node('Concat', [s for s in output_label_reduced_name],
concat_labels_name, name=scope.get_unique_operator_name('Concat'), axis=0)
container.add_node('ArgMax', concat_labels_name,
predicted_label_name, name=scope.get_unique_operator_name('ArgMax'))
if class_type == onnx_proto.TensorProto.INT32:
container.add_node('ArrayFeatureExtractor', [classes_name, predicted_label_name], final_label_name,
name=scope.get_unique_operator_name('ArrayFeatureExtractor'), op_domain='ai.onnx.ml')
apply_reshape(scope, final_label_name, reshaped_final_label_name, container, desired_shape=[-1,])
apply_cast(scope, reshaped_final_label_name, operator.outputs[0].full_name, container,
to=onnx_proto.TensorProto.INT64)
else:
container.add_node('ArrayFeatureExtractor', [classes_name, predicted_label_name],
operator.outputs[0].full_name,
name=scope.get_unique_operator_name('ArrayFeatureExtractor2'), op_domain='ai.onnx.ml')

# Calculation of class probability
pred_label_shape = [-1, 1]

pred_label_shape_name = scope.get_unique_variable_name('pred_label_shape')
cast_pred_label_name = scope.get_unique_variable_name('cast_pred_label')
reshaped_pred_label_name = scope.get_unique_variable_name('reshaped_pred_label')
reduced_prob_name = scope.get_unique_variable_name('reduced_prob')

container.add_initializer(pred_label_shape_name, onnx_proto.TensorProto.INT64,
[len(pred_label_shape)], pred_label_shape)
ohe_model = OneHotEncoder(categorical_features='all').fit(training_labels.reshape((-1, 1)))

container.add_node('Reshape', [topk_labels_name, pred_label_shape_name],
reshaped_pred_label_name, name=scope.get_unique_operator_name('Reshape'))
apply_cast(scope, reshaped_pred_label_name, cast_pred_label_name, container, to=onnx_proto.TensorProto.INT64)

op_type = sklearn_operator_name_map[type(ohe_model)]
ohe_operator = Oper(ohe_model, cast_pred_label_name, op_type)
ohe_result_name = convert_sklearn_one_hot_encoder(scope, ohe_operator, container, flag=True)
prabhat00155 marked this conversation as resolved.
Show resolved Hide resolved

container.add_node('ReduceMean', ohe_result_name,
reduced_prob_name, name=scope.get_unique_operator_name('ReduceMean'), axes=[0])
container.add_node('ZipMap', reduced_prob_name, operator.outputs[1].full_name,
op_domain='ai.onnx.ml', **zipmap_attrs)
elif operator.type == 'SklearnKNeighborsRegressor':
container.add_node('ArrayFeatureExtractor', [training_labels_name, topk_indices_name],
topk_labels_name, name=scope.get_unique_operator_name('ArrayFeatureExtractor'), op_domain='ai.onnx.ml')
topk_labels_name, name=scope.get_unique_operator_name('ArrayFeatureExtractor'),
op_domain='ai.onnx.ml')
container.add_node('ReduceMean', topk_labels_name,
operator.output_full_names, name=scope.get_unique_operator_name('ReduceMean'))


register_converter('SklearnKNeighborsClassifier', convert_sklearn_knn)
register_converter('SklearnKNeighborsRegressor', convert_sklearn_knn)