onnx · xadupre · Jul 15, 2019 · Apr 24, 2019 · May 2, 2019 · May 3, 2019
diff --git a/skl2onnx/_parse.py b/skl2onnx/_parse.py
@@ -9,6 +9,7 @@
 from sklearn import pipeline
 from sklearn.base import ClassifierMixin, ClusterMixin
 from sklearn.neighbors import NearestNeighbors
+from sklearn.mixture import GaussianMixture
 from sklearn.svm import LinearSVC, NuSVC, SVC
 from sklearn.preprocessing import FunctionTransformer
 try:
@@ -110,6 +111,13 @@ def _parse_sklearn_simple_model(scope, model, inputs, custom_parsers=None):
                                                          FloatTensorType())
         this_operator.outputs.append(index_variable)
         this_operator.outputs.append(distance_variable)
+    elif type(model) == GaussianMixture:
+        label_variable = scope.declare_local_variable('label',
+                                                      Int64TensorType())
+        prob_variable = scope.declare_local_variable('probabilities',
+                                                     FloatTensorType())
+        this_operator.outputs.append(label_variable)
+        this_operator.outputs.append(prob_variable)
     else:
         # We assume that all scikit-learn operator produce a single output.
         variable = scope.declare_local_variable('variable', FloatTensorType())

diff --git a/skl2onnx/_supported_operators.py b/skl2onnx/_supported_operators.py
@@ -24,6 +24,9 @@
 from sklearn.linear_model import SGDRegressor
 from sklearn.svm import LinearSVR
 
+# Mixture
+from sklearn.mixture import GaussianMixture
+
 # Multi-class
 from sklearn.multiclass import OneVsRestClassifier
 
@@ -143,7 +146,7 @@ def build_sklearn_operator_name_map():
                 RobustScaler, OneHotEncoder, DictVectorizer,
                 GenericUnivariateSelect, RFE, RFECV, SelectFdr, SelectFpr,
                 SelectFromModel, SelectFwe, SelectKBest, SelectPercentile,
-                VarianceThreshold,
+                VarianceThreshold, GaussianMixture
     ] if k is not None}
     res.update({
         ElasticNet: 'SklearnElasticNetRegressor',

diff --git a/skl2onnx/helpers/onnx_helper.py b/skl2onnx/helpers/onnx_helper.py
@@ -45,16 +45,36 @@ def save_onnx_model(model, filename=None):
     return content
 
 
-def enumerate_model_node_outputs(model):
+def enumerate_model_node_outputs(model, add_node=False):
     """
-    Enumerates all the node of a model.
+    Enumerates all the nodes of a model.
+
+    :param model: ONNX graph
+    :param add_node: if False, the function enumerates
+        all output names from every node, otherwise, it
+        enumerates tuple (output name, node)
+    :return: enumerator
     """
     if not hasattr(model, "graph"):
         raise TypeError("Parameter model is not an ONNX model but "
                         "{}".format(type(model)))
     for node in model.graph.node:
         for out in node.output:
-            yield out
+            yield (out, node) if add_node else out
+
+
+def enumerate_model_initializers(model, add_node=False):
+    """
+    Enumerates all the initializers of a model.
+
+    :param model: ONNX graph
+    :param add_node: if False, the function enumerates
+        all output names from every node, otherwise, it
+        enumerates tuple (output name, node)
+    :return: enumerator
+    """
+    for node in model.graph.initializer:
+        yield (node.name, node) if add_node else node.name
 
 
 def select_model_inputs_outputs(model, outputs=None, inputs=None):

diff --git a/skl2onnx/operator_converters/__init__.py b/skl2onnx/operator_converters/__init__.py
@@ -17,6 +17,7 @@
 from . import feature_selection
 from . import flatten_op
 from . import function_transformer
+from . import gaussian_mixture
 from . import gradient_boosting
 from . import imputer_op
 from . import k_bins_discretiser
@@ -53,6 +54,7 @@
     feature_selection,
     flatten_op,
     function_transformer,
+    gaussian_mixture,
     gradient_boosting,
     imputer_op,
     k_bins_discretiser,

diff --git a/skl2onnx/operator_converters/gaussian_mixture.py b/skl2onnx/operator_converters/gaussian_mixture.py
@@ -0,0 +1,147 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import numpy as np
+from sklearn.mixture.gaussian_mixture import _compute_log_det_cholesky
+from ..common._registration import register_converter
+from ..algebra.onnx_ops import (
+    OnnxAdd, OnnxSub, OnnxMul, OnnxGemm, OnnxReduceSumSquare,
+    OnnxReduceLogSumExp, OnnxExp, OnnxArgMax, OnnxConcat
+)
+
+
+def convert_sklearn_gaussian_mixture(scope, operator, container):
+    """
+    Converter for *GaussianMixture*.
+    Parameters which change the prediction function:
+
+    * *covariance_type*
+    """
+    X = operator.inputs[0]
+    out = operator.outputs
+    op = operator.raw_operator
+    n_features = X.type.shape[1]
+    n_components = op.means_.shape[0]
+
+    # All comments come from scikit-learn code and tells
+    # which functions is being onnxified.
+    # def _estimate_weighted_log_prob(self, X):
+    # self._estimate_log_prob(X) + self._estimate_log_weights()
+    log_weights = np.log(op.weights_)  # self._estimate_log_weights()
+
+    # self._estimate_log_prob(X)
+    log_det = _compute_log_det_cholesky(
+        op.precisions_cholesky_, op.covariance_type, n_features)
+
+    if op.covariance_type == 'full':
+        # shape(op.means_) = (n_components, n_features)
+        # shape(op.precisions_cholesky_) =
+        #   (n_components, n_features, n_features)
+
+        # log_prob = np.empty((n_samples, n_components))
+        # for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
+        #     y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)
+        #     log_prob[:, k] = np.sum(np.square(y), axis=1)
+
+        ys = []
+        for c in range(n_components):
+            prec_chol = op.precisions_cholesky_[c, :, :]
+            cst = - np.dot(op.means_[c, :], prec_chol)
+            y = OnnxGemm(X, prec_chol, cst, alpha=1., beta=1.)
+            y2s = OnnxReduceSumSquare(y, axes=[1])
+            ys.append(y2s)
+        log_prob = OnnxConcat(*ys, axis=1)
+
+    elif op.covariance_type == 'tied':
+        # shape(op.means_) = (n_components, n_features)
+        # shape(op.precisions_cholesky_) =
+        #   (n_features, n_features)
+
+        # log_prob = np.empty((n_samples, n_components))
+        # for k, mu in enumerate(means):
+        #     y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol)
+        #     log_prob[:, k] = np.sum(np.square(y), axis=1)
+
+        precisions_chol = op.precisions_cholesky_
+        ys = []
+        for f in range(n_components):
+            cst = - np.dot(op.means_[f, :], precisions_chol)
+            y = OnnxGemm(X, precisions_chol, cst, alpha=1., beta=1.)
+            y2s = OnnxReduceSumSquare(y, axes=[1])
+            ys.append(y2s)
+        log_prob = OnnxConcat(*ys, axis=1)
+
+    elif op.covariance_type == 'diag':
+        # shape(op.means_) = (n_components, n_features)
+        # shape(op.precisions_cholesky_) =
+        #   (n_components, n_features)
+
+        # precisions = precisions_chol ** 2
+        # log_prob = (np.sum((means ** 2 * precisions), 1) -
+        #             2. * np.dot(X, (means * precisions).T) +
+        #             np.dot(X ** 2, precisions.T))
+
+        precisions = op.precisions_cholesky_ ** 2
+        mp = np.sum((op.means_ ** 2 * precisions), 1)
+        zeros = np.zeros((n_components, ))
+        xmp = OnnxGemm(X, (op.means_ * precisions).T, zeros,
+                       alpha=-2., beta=0.)
+        term = OnnxGemm(OnnxMul(X, X), precisions.T, zeros, alpha=1., beta=0.)
+        log_prob = OnnxAdd(OnnxAdd(mp, xmp), term)
+
+    elif op.covariance_type == 'spherical':
+        # shape(op.means_) = (n_components, n_features)
+        # shape(op.precisions_cholesky_) = (n_components, )
+
+        # precisions = precisions_chol ** 2
+        # log_prob = (np.sum(means ** 2, 1) * precisions -
+        #             2 * np.dot(X, means.T * precisions) +
+        #             np.outer(row_norms(X, squared=True), precisions))
+
+        zeros = np.zeros((n_components, ))
+        precisions = op.precisions_cholesky_ ** 2
+        normX = OnnxReduceSumSquare(X, axes=[1])
+        outer = OnnxGemm(normX, precisions[np.newaxis, :], zeros,
+                         alpha=1., beta=1.)
+        xmp = OnnxGemm(X, (op.means_.T * precisions), zeros,
+                       alpha=-2., beta=0.)
+        mp = np.sum(op.means_ ** 2, 1) * precisions
+        log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer))
+    else:
+        raise RuntimeError("Unknown op.covariance_type='{}'. Upgrade "
+                           "to a mroe recent version of skearn-onnx "
+                           "or raise an issue.".format(op.covariance_type))
+
+    # -.5 * (cst + log_prob) + log_det
+    cst = np.array([n_features * np.log(2 * np.pi)])
+    add = OnnxAdd(cst, log_prob)
+    mul = OnnxMul(add, np.array([-0.5]))
+    if isinstance(log_det, float):
+        log_det = np.array([log_det])
+    weighted_log_prob = OnnxAdd(OnnxAdd(mul, log_det), log_weights)
+
+    # labels
+    labels = OnnxArgMax(weighted_log_prob, axis=1, output_names=out[:1])
+
+    # def _estimate_log_prob_resp():
+    # np.exp(log_resp)
+    # weighted_log_prob = self._estimate_weighted_log_prob(X)
+    # log_prob_norm = logsumexp(weighted_log_prob, axis=1)
+    # with np.errstate(under='ignore'):
+    #    log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
+
+    log_prob_norm = OnnxReduceLogSumExp(weighted_log_prob, axes=[1])
+    log_resp = OnnxSub(weighted_log_prob, log_prob_norm)
+
+    # probabilities
+    probs = OnnxExp(log_resp, output_names=out[1:])
+
+    # final
+    labels.add_to(scope, container)
+    probs.add_to(scope, container)
+
+
+register_converter('SklearnGaussianMixture', convert_sklearn_gaussian_mixture)
diff --git a/skl2onnx/shape_calculators/__init__.py b/skl2onnx/shape_calculators/__init__.py
@@ -18,6 +18,7 @@
 from . import label_encoder
 from . import linear_classifier
 from . import linear_regressor
+from . import mixture
 from . import nearest_neighbours
 from . import one_hot_encoder
 from . import one_vs_rest_classifier
@@ -43,6 +44,7 @@
     label_encoder,
     linear_classifier,
     linear_regressor,
+    mixture,
     nearest_neighbours,
     one_hot_encoder,
     one_vs_rest_classifier,

diff --git a/skl2onnx/shape_calculators/mixture.py b/skl2onnx/shape_calculators/mixture.py
@@ -0,0 +1,30 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from ..common._registration import register_shape_calculator
+from ..common.data_types import FloatTensorType, Int64TensorType
+from ..common.utils import (
+    check_input_and_output_numbers,
+    check_input_and_output_types
+)
+
+
+def calculate_gaussian_mixture_output_shapes(operator):
+    check_input_and_output_numbers(operator, input_count_range=1,
+                                   output_count_range=2)
+    check_input_and_output_types(operator, good_input_types=[FloatTensorType])
+
+    if len(operator.inputs[0].type.shape) != 2:
+        raise RuntimeError('Input must be a [N, C]-tensor')
+
+    op = operator.raw_operator
+    N = operator.inputs[0].type.shape[0]
+    operator.outputs[0].type = Int64TensorType([N, 1])
+    operator.outputs[1].type = FloatTensorType([N, op.n_components])
+
+
+register_shape_calculator('SklearnGaussianMixture',
+                          calculate_gaussian_mixture_output_shapes)