skl2onnx/algebra/onnx_operator.py

# SPDX-License-Identifier: Apache-2.0

import warnings
from logging import getLogger
import numpy as np
from onnx import GraphProto
from onnx.helper import make_graph, make_model
from onnx.numpy_helper import from_array
from scipy.sparse import coo_matrix
from ..proto import TensorProto
from ..common.data_types import _guess_type_proto_str, _guess_type_proto_str_inv
from ..common._topology import (
    Variable,
    VariableStr,
    Scope,
    _update_domain_version,
    Operator,
    _get_main_opset_version,
    OPSET_TO_IR_VERSION,
)
from ..common._container import ModelComponentContainer
from ..common import utils
from ..common.data_types import guess_proto_type, _guess_numpy_type
from ..common._registration import _converter_pool, _shape_calculator_pool
from .._supported_operators import sklearn_operator_name_map
from ..proto import get_latest_tested_opset_version, onnx_proto
from ..helpers.onnx_helper import infer_outputs
from .graph_state import GraphState, GraphStateVar
from .type_helper import _guess_type


logger = getLogger("skl2onnx")


class OnnxOperatorItem:
    """
    Accessor to one of the output returned by a *OnnxOperator*.

    :param onx_op: OnnxOperator
    :param index: integer
    """

    def __init__(self, onx_op, index, op_version=None):
        if not isinstance(index, int):
            raise TypeError("index must be an integer.")
        self.onx_op = onx_op
        self.index = index
        self.op_version = op_version

    def __str__(self):
        """
        usual
        """
        return "%s[%d]" % (str(self.onx_op), self.index)

    def get_latest_tested_opset_version(self):
        """
        Returns ``get_latest_tested_opset_version()``
        of the wrapped *OnnxOperator* instance.
        """
        return self.onx_op.get_latest_tested_opset_version()

    def add_to(self, scope, container, operator=None, run_converters=False):
        """
        Adds outputs to the container if not already added,
        registered the outputs if the node is not final.

        :param scope: scope
        :param container: container
        :param operator: overwrite inputs
        :param run_converters: must be True if called from method `to_onnx`
        """
        self.onx_op.add_to(
            scope, container, operator=operator, run_converters=run_converters
        )

    def get_output_name(self, i=0):
        """
        Returns the output.
        """
        if i != 0:
            raise IndexError("Can only return the first item.")
        return self.onx_op.get_output_name(self.index)

    def get_output(self, i=0):
        """
        Returns the output.
        """
        if i != 0:
            raise IndexError("Can only return the first item.")
        return self.onx_op.get_output(self.index)

    @property
    def outputs(self):
        """
        Returns the outputs of the node.
        """
        if self.onx_op is None:
            raise RuntimeError(
                "self.onx_op cannot be None, type(self)={}".format(type(self))
            )
        if self.index is None:
            raise RuntimeError(
                "self.index cannot be None, type(self)={}".format(type(self))
            )
        outputs = self.onx_op.outputs
        if outputs is None:
            raise RuntimeError(
                "self.onx_op.outputs cannot be None, "
                "type(self)={}, type(self.onx_op)={}, "
                "type(self.onx_op.state)={}".format(
                    type(self), type(self.onx_op), type(self.onx_op.state)
                )
            )
        return outputs[self.index : self.index + 1]

    def get_output_type_inference(self, input_shapes=None):
        """
        Returns the inferred shape.
        """
        if self.onx_op is None:
            raise RuntimeError(
                "self.onx_op cannot be None, type(self)={}".format(type(self))
            )
        if self.index is None:
            raise RuntimeError(
                "self.index cannot be None, type(self)={}".format(type(self))
            )
        outputs = self.onx_op.get_output_type_inference(input_shapes)
        if outputs is None:
            raise RuntimeError(
                "self.onx_op.outputs cannot be None, "
                "type(self)={}, type(self.onx_op)={}, "
                "type(self.onx_op.state)={}".format(
                    type(self), type(self.onx_op), type(self.onx_op.state)
                )
            )
        return outputs[self.index : self.index + 1]


class OnnxOperator:
    """
    Ancestor to every *ONNX* operator exposed in
    :mod:`onnx_ops` and :mod:`onnx_ops_ml`. These files
    are automatically generated by unit test
    *test_onnx_operators_parse_spec*
    Every instance is supposed to be included in
    a graph as a node.

    :param inputs: list of inputs expected by the operator
    :param op_version: to select a specific version of the operator
    :param output_names: used defined names for the outputs
    :param domain: to overwrite the default domain
    :param global_context: operator *If* executes one subgraph
        whose nodes may use one existing output in the current
        context. If not used in the main graph, these operators
        are not linked to the output and cannot be retrieved.
        *global_context* is a dictionary mapped the subgraph input
        names to these operators.
    :param clear_subgraph_inputs: clears subgraphs outputs.
        Operator *If* does take subgraphs as attribute,
        there are subgraphs with no inputs and
        global variable as hidden inputs.
    :param kwargs: additional parameters of the operator

    .. versionchanged:: 1.10.1
        Parameter *global_context*, *clear_subgraph_inputs*
        were added.
    """

    class OnnxOperatorVariable(GraphStateVar):
        def __init__(self, index, name=None):
            self.index = index
            self.name = name

        def as_variable(self, scope):
            name = "ov%s" % self.name
            if hasattr(self, "variable_") and self.variable_.onnx_name == name:
                return self.variable_
            var = Variable(name, name, scope=scope, type=None)
            if scope is not None:
                scope.register_variable(var)
            self.variable_ = var
            return var

        def __repr__(self):
            return "OnnxOperatorVariable('%s')" % self.name

        def __iter__(self):
            yield self.name
            yield None

    class UnscopedVariable(GraphStateVar):
        def __init__(self, name):
            self.name = name

        def as_variable(self, scope):
            name = self.name
            if hasattr(self, "variable_") and self.variable_.onnx_name == name:
                return self.variable_
            if scope is not None:
                if name in scope.variables:
                    var = scope.variables[name]
                else:
                    onnx_name = scope.get_unique_variable_name(name)
                    var = Variable(name, onnx_name, scope=scope, type=None)
                    scope.register_variable(var)
                self.variable_ = var
            else:
                var = Variable(name, name, scope=scope, type=None)
            return var

        def __eq__(self, name):
            if isinstance(name, str):
                return name == self.name
            elif isinstance(name, OnnxOperator.UnscopedVariable):
                return self.name == name.name
            else:
                raise TypeError("Unsupported type for comparison {}".format(type(name)))

        def __repr__(self):
            return "UnscopedVariable('%s')" % self.name

        def __iter__(self):
            yield self.name
            yield None

    class ConstantVariable(GraphStateVar):
        def __init__(self, value):
            self.value = value

        def as_variable(self, scope):
            ha = utils.hash_array(self.value)
            name = "CST%s" % ha
            if hasattr(self, "variable_") and self.variable_.onnx_name == name:
                return self.variable_
            if scope is not None:
                var = scope.declare_local_variable(name, type=_guess_type(self.value))
            else:
                var = Variable(name, name, scope=scope, type=_guess_type(self.value))
            self.variable_ = var
            return var

        @property
        def ConstantValue(self):
            return self.value

        def __str__(self):
            return "Cst({})".format(self.value)

        def __iter__(self):
            yield "id%d" % id(self)
            yield _guess_type(self.value)

    def find_schema(self, op_version):
        """
        Checks if there is an existing schema for a
        specific version.

        :param op_version: requested version
        :return: schema
        """
        if not hasattr(self.__class__, "past_version"):
            raise RuntimeError(
                "Missing attribute 'past_version', there is "
                "no other available schema."
            )
        found = None
        for v in self.past_version.values():
            if v.since_version > op_version:
                continue
            if found is None or v.since_version > found.since_version:
                found = v
        if found is None:
            raise RuntimeError(
                "Operator '{}': requested version {} < "
                "{} schema version.".format(
                    self.__class__.__name__, op_version, self.since_version
                )
            )
        return found

    def __init__(
        self,
        *inputs,
        op_version=None,
        output_names=None,
        domain=None,
        global_context=None,
        clear_subgraph_inputs=False,
        **kwargs
    ):
        if output_names is None and self.__class__.__name__.startswith("OnnxScan"):
            raise NotImplementedError(
                "The class cannot infer the number of variables "
                "for node '{}' yet. output_names must be specified"
                ".".format(self.__class__.__name__)
            )
        if isinstance(output_names, (str, Variable)):
            output_names = [output_names]
            if isinstance(output_names[0], str):
                output_names[0] = VariableStr(output_names[0])
        elif isinstance(output_names, Operator):
            if len(output_names.outputs) == 0:
                raise ValueError(
                    "output_names cannot be empty (operator %r)." "" % output_names
                )
            output_names = output_names.outputs.copy()
        elif isinstance(output_names, Operator.OperatorList):
            if len(output_names) == 0:
                raise ValueError(
                    "output_names cannot be empty (operator %r)."
                    "" % self.__class__.__name__
                )
            output_names = output_names.copy()
        elif isinstance(output_names, list):
            if len(output_names) == 0:
                raise ValueError(
                    "output_names cannot be empty (operator %r)."
                    "" % self.__class__.__name__
                )
            output_names = output_names.copy()
            for i in range(len(output_names)):
                if isinstance(output_names[i], str):
                    output_names[i] = VariableStr(output_names[i])
        elif output_names is not None:
            raise TypeError(
                "output_names must be a string or a list not %r."
                "" % type(output_names)
            )

        if op_version is None:
            if domain == "":
                self.op_version = get_latest_tested_opset_version()
            else:
                self.op_version = None
        else:
            self.op_version = op_version
        self.since_version = self.__class__.since_version

        if self.op_version is not None and self.op_version < self.since_version:
            schema = self.find_schema(self.op_version)
            self.since_version = schema.since_version
            self.expected_inputs = schema.expected_inputs.copy()
            self.expected_outputs = schema.expected_outputs.copy()
            self.input_range = schema.input_range
            self.output_range = schema.output_range
        else:
            self.expected_inputs = (
                None
                if self.__class__.expected_inputs is None
                else self.__class__.expected_inputs.copy()
            )
            self.expected_outputs = (
                None
                if self.__class__.expected_outputs is None
                else self.__class__.expected_outputs.copy()
            )
            self.input_range = self.__class__.input_range
            self.output_range = self.__class__.output_range
            if self.__class__.__name__ not in {"OnnxScan", "OnnxLoop", "OnnxIf"}:
                # TODO: the minimum opset depends on embedded graph
                # by default, it takes the given op_version but the
                # optimal value could be lower.
                self.op_version = self.since_version
            if self.op_version is None:
                self.op_version = self.since_version

        if self.op_version is not None and self.op_version < self.since_version:
            raise RuntimeError(
                "Operator '{}': requested version {} < "
                "{} schema version.".format(
                    self.__class__.__name__, self.op_version, self.since_version
                )
            )

        self.state = None
        self.domain = domain
        self.kwargs = kwargs
        self.onnx_prefix_name = None

        # check inputs
        if len(inputs) == 0:
            if self.input_range[0] == self.input_range[1]:
                self.inputs = [
                    OnnxOperator.UnscopedVariable(_[0]) for _ in self.expected_inputs
                ]
            else:
                # The number of inputs may vary.
                self.inputs = None
        else:
            self.inputs = []
            for inp in inputs:
                if isinstance(inp, str):
                    self.inputs.append(OnnxOperator.UnscopedVariable(inp))
                elif isinstance(
                    inp, (OnnxOperator, Variable, OnnxOperatorItem, OnnxSubEstimator)
                ):
                    self.inputs.append(inp)
                elif isinstance(inp, tuple) and len(inp) == 2:
                    self.inputs.append(inp)
                elif isinstance(inp, (np.ndarray, coo_matrix)):
                    self.inputs.append(OnnxOperator.ConstantVariable(inp))
                elif isinstance(inp, TensorProto):
                    self.inputs.append(OnnxOperator.ConstantVariable(inp))
                elif isinstance(
                    inp,
                    (OnnxOperator.OnnxOperatorVariable, OnnxOperator.ConstantVariable),
                ):
                    self.inputs.append(inp)
                elif isinstance(
                    inp, (np.int64, np.float32, np.float64, np.bool_, np.int8, np.uint8)
                ):
                    self.inputs.append(OnnxOperator.ConstantVariable(inp))
                elif isinstance(inp, (float,)):
                    self.inputs.append(np.float64(inp))
                elif isinstance(inp, (int,)):
                    self.inputs.append(np.int64(inp))
                else:
                    raise TypeError(
                        "Unable to interpret the input name for type {} in "
                        "operator '{}' (value={}).".format(
                            type(inp), self.__class__.__name__, inp
                        )
                    )

        if self.inputs is not None:
            if (
                len(self.inputs) < self.input_range[0]
                or len(self.inputs) > self.input_range[1]
            ):
                raise RuntimeError(
                    "Operator '{}' expects a number of inputs "
                    "in [{}, {}] not {} (expected opset={}, "
                    "class opset={})".format(
                        self.operator_name,
                        *self.input_range,
                        len(self.inputs),
                        op_version,
                        self.op_version
                    )
                )
        # global context
        if global_context is None:
            self.global_context = None
        else:
            if not isinstance(global_context, dict):
                raise TypeError(
                    "global_context must be a dictionary not %r."
                    "" % type(global_context)
                )
            for k, v in global_context.items():
                if not isinstance(v, (OnnxOperator, OnnxOperatorItem)):
                    raise TypeError(
                        "Value %r in must be an OnnxOperator or an "
                        "OnnxOperatorItem not %r." % (k, type(v))
                    )
            self.global_context = global_context

        # check output
        self.output_names = output_names
        self.output_variables = None

        if self.output_names is not None:
            if len(self.output_names) == 0:
                raise ValueError(
                    "output_names can be None but cannot be empty for "
                    "operator %r." % self
                )
            if self.output_variables is None:
                self.output_variables = [None for o in self.output_names]
            for i in range(len(self.output_names)):
                name = self.output_names[i]
                if isinstance(name, Variable):
                    self.output_variables[i] = name
                else:
                    raise TypeError(
                        "output_names must be a list of strings "
                        "and element %r is %r (%r)" % (i, type(name), name)
                    )
            if all(map(lambda x: x is None, self.output_variables)):
                self.output_variables = None

        if self.output_names is not None and (
            self.expected_outputs is None
            or len(self.output_names) > len(self.expected_outputs)
        ):
            if self.expected_outputs is None:
                self.expected_outputs = []
            for i in range(len(self.expected_outputs), len(self.output_names)):
                self.expected_outputs.append((self.output_names[i], None))

        if self.expected_inputs is None or len(self.inputs) > len(self.expected_inputs):
            if self.expected_inputs is None:
                self.expected_inputs = []
            for i in range(len(self.expected_inputs), len(self.inputs)):
                inp = self.inputs[i]
                if isinstance(inp, GraphStateVar):
                    inp = tuple(inp)
                elif isinstance(inp, str):
                    inp = (inp, None)
                elif hasattr(inp, "add_to"):
                    # OnnxOperator
                    existing = set(_[0] for _ in self.expected_inputs)
                    i = 10
                    name = "input%d" % (10 + i)
                    while name in existing:
                        i += 1
                        name = "input%d" % (10 + i)
                    inp = (name, None)
                self.expected_inputs.append(inp)

        self.output_names_ = None
        self._post_process_attributes(clear_subgraph_inputs=clear_subgraph_inputs)
        logger.debug(
            "[Ops] +%s-%d (%s) id=%d",
            self.__class__.__name__,
            self.op_version,
            self.domain,
            id(self),
        )

    def _post_process_attributes(self, clear_subgraph_inputs=False):
        """
        Walks through attributes and replaces them by ONNX
        values.
        """
        # Looks into attributes if there is any tuple
        # (GraphProto, OnnxOperator). In that case, the function
        # replaces the tuple by the graph proto and keeps
        # in attributes graph_algebra the OnnxOperator
        # which is the source of it.
        updates = {}
        graph_algebra = {}
        for k, v in self.kwargs.items():
            if isinstance(v, tuple) and isinstance(v[0], GraphProto):
                updates[k] = v[0]
                graph_algebra[k] = v[1]
        if len(graph_algebra) > 0:
            self.kwargs.update(updates)
            self.graph_algebra = graph_algebra

        if clear_subgraph_inputs:
            for k, v in self.kwargs.items():
                if isinstance(v, GraphProto):
                    del v.input[:]

        if self.__class__.__name__ == "OnnxConstantOfShape":
            if "value" in self.kwargs:
                value = self.kwargs["value"]
                if isinstance(value, TensorProto):
                    return
                if isinstance(value, np.ndarray):
                    if value.shape == (1,):
                        val = value[0]
                    elif len(value.shape) == 0:
                        val = value
                    else:
                        raise RuntimeError(
                            "Unexpected shape %r for value, it must be "
                            "an array of one element." % value.shape
                        )
                    self.kwargs["value"] = from_array(
                        np.array([val], dtype=value.dtype)
                    )
                    return
                raise TypeError(
                    "Unexpected type %r for value. It should be an array "
                    "of one element." % type(value)
                )
            return

        if self.__class__.__name__ == "OnnxCast":
            if "to" in self.kwargs:
                value = self.kwargs["to"]
                if isinstance(value, int):
                    return
                to = guess_proto_type(_guess_numpy_type(value, None))
                self.kwargs["to"] = to
            return

    def __str__(self):
        """
        usual
        """
        return "{}({} in) -> {}".format(
            self.__class__.__name__,
            len(self.inputs) if self.inputs is not None else 0,
            (
                [str(o) for o in self.output_names]
                if self.output_names is not None
                else "?"
            ),
        )

    def set_onnx_name_prefix(self, onnx_prefix_name):
        """
        Provides a name to define a prefix in the onnx graph
        to avoid to get unreadable node names. The method
        does not overwrite an existing name, it propagates
        the prefix to inputs and stops the propagation
        if the prefix is already defined.
        """
        if self.onnx_prefix_name is None:
            self.onnx_prefix_name = onnx_prefix_name
            for inp in self.inputs:
                if hasattr(inp, "onnx_prefix_name"):
                    inp.set_onnx_name_prefix(onnx_prefix_name)
        return self

    @property
    def onnx_prefix(self):
        if self.onnx_prefix_name is None:
            name = self.__class__.__name__
            if name.startswith("Onnx"):
                name = name[4:]
            return name[:2]
        return self.onnx_prefix_name

    def __getitem__(self, index):
        """
        Returns an accessor to one of the output
        of this node.
        """
        return OnnxOperatorItem(self, index, self.op_version)

    def get_output_name(self, i, scope=None):
        "Returns name of output *i*."
        if self.state is not None:
            return self.state.computed_outputs_[i][0]
        if self.output_names_ is not None:
            return self.output_names_[i]
        self._set_output_names_(getattr(self, "scope", None) or scope, None)
        return self.output_names_[i]

    def get_output(self, i, scope=None):
        "Returns name of output *i*."
        if self.state is not None:
            return self.state.computed_outputs_[i]
        if self.output_names_ is not None:
            res = self.output_names_[i]
            if not isinstance(res, (tuple, Variable)):
                raise RuntimeError(
                    "Unable to retrieve output %r from %r." "" % (i, self)
                )
            return res

    def _set_output_names_(self, scope, operator):
        "Called by add_to."
        if operator is not None:
            self.operator_ = operator
        if self.output_names_ is not None:
            raise RuntimeError("output_names_ is already set.")
        elif self.output_variables is not None:
            outputs = [o.onnx_name for o in self.output_variables]
            self.output_names_ = outputs
        elif self.output_names:
            if not isinstance(self.output_names, (list, tuple)):
                louts = [self.output_names]
            else:
                louts = self.output_names
            if operator is not None and len(louts) != len(operator.outputs):
                raise RuntimeError(
                    "Output mismatch for '{}'\n{}\n{}".format(
                        type(operator.raw_operator), louts, operator.outputs
                    )
                )
            outputs = []
            for iname, name in enumerate(louts):
                if name is None:
                    raise AssertionError(
                        "Issue for operator '{}'.".format(type(operator.raw_operator))
                    )
                if name.startswith("u(") and name[-1] == ")":
                    name = scope.get_unique_variable_name(name[2:-1])
                elif operator is not None:
                    oout = operator.outputs[iname]
                    name = oout.onnx_name
                outputs.append(name)
            self.output_names_ = outputs
        elif self.expected_outputs is None:
            raise AttributeError(
                "expected_outputs is None for operator=%r, output_names=%r, "
                "output_variables=%r, operator=%r"
                % (self, self.output_names, self.output_variables, operator)
            )
        else:
            if scope is None:
                raise RuntimeError("scope must not be None.")
            outputs = []
            for name in self.expected_outputs:
                name = scope.get_unique_variable_name(self.onnx_prefix + "_" + name[0])
                outputs.append(name)
            self.output_names_ = outputs
        return outputs

    def _add_to_inputs(self, operator):
        inputs = []
        for input in self.inputs:
            if isinstance(input, OnnxOperator.OnnxOperatorVariable):
                if operator is None:
                    raise RuntimeError(
                        "A placeholder cannot be replaced "
                        "as an operator is not specified."
                    )
                if len(operator.inputs) == 0:
                    raise RuntimeError("No input variable in {}.".format(operator))
                # The inputs must be looked into the graph.
                for i in operator.inputs:
                    if i.onnx_name == input.name:
                        inputs.append(i)
                        break
                else:
                    vars = ", ".join(
                        map(lambda o: "'%s'" % o.onnx_name, operator.inputs)
                    )
                    raise RuntimeError(
                        "Unable to find variable " "{} in {}.".format(input, vars)
                    )
            else:
                inputs.append(input)
        return inputs

    def add_to(self, scope, container, operator=None, run_converters=False):
        """
        Adds outputs to the container if not already added,
        registered the outputs if the node is not final.

        :param scope: scope
        :param container: container
        :param operator: overwrite inputs
        :param run_converters: False by default, must be True if
            called from method `to_onnx`

        At this stage, inputs types are not necessarily known.
        """
        if self.state is None:
            if self.is_deprecated:
                raise RuntimeError(
                    "Node '{}' is deprecated. This API cannot deprecated "
                    "nodes.".format(self.__class__.__name__)
                )
            if self.op_version is not None and self.op_version < self.since_version:
                raise RuntimeError(
                    "Incompatible versions for node '{}'  op_version {} "
                    "< since_version {}.".format(
                        self.__class__.__name__, self.op_version, self.since_version
                    )
                )
            if self.kwargs.get("op_version", "") is None:
                kwargs = self.kwargs.copy()
                del kwargs["op_version"]
            else:
                kwargs = self.kwargs

            self._set_output_names_(scope, operator)
            domain = self.domain
            if domain is None:
                domain = self.__class__.domain
            inputs = self._add_to_inputs(operator)

            logger.debug("[Ops.add_to] state id=%d", id(self))
            self.state = GraphState(
                inputs,
                self.output_names_,
                self.operator_name,
                scope,
                container,
                None,
                op_version=self.op_version,
                op_domain=domain,
                onnx_prefix_name=self.onnx_prefix,
                expected_inputs=self.expected_inputs,
                expected_outputs=self.expected_outputs,
                input_range=self.input_range,
                output_range=self.output_range,
                operator=operator,
                run_converters=run_converters,
                **kwargs
            )
            self.state.run()
        self._verify_add_to_()

    def _verify_add_to_(self):
        if self.state is None:
            raise RuntimeError(
                "Graph was not produced for operator '{}': {}."
                "".format(self.__class__.__name__, self)
            )
        for i in self.inputs:
            if hasattr(i, "_verify_add_to_"):
                i._verify_add_to_()

    @property
    def outputs(self):
        """
        Returns the outputs of the node.
        """
        if self.state is None:
            raise RuntimeError("Method add_to was not called.")
        return self.state.outputs

    def get_output_type_inference(self, input_shapes=None):
        """
        Returns the expected output types in a list.
        """
        if self.state is not None and self.state.computed_outputs_ is not None:
            return self.state.computed_outputs_

        expected_inputs = (
            self.state.computed_inputs_
            if self.expected_inputs is None
            else self.expected_inputs
        )
        if expected_inputs is None:
            raise RuntimeError(
                "Attribute 'expected_inputs' is empty for %r, "
                "input_shapes=%r." % (self, input_shapes)
            )

        expected_outputs = (
            self.state.computed_outputs_
            if self.expected_outputs is None
            else self.expected_outputs
        )
        if expected_outputs is None:
            raise RuntimeError(
                "Attribute 'expected_outputs' is empty for %r, "
                "input_shapes=%r." % (self, input_shapes)
            )

        # Shape inference only work on a full graph.
        if input_shapes is None:
            input_shapes = self.inputs

        given = {}
        for i, inp in enumerate(input_shapes):
            if isinstance(inp, tuple):
                given[i] = inp[1]
            elif isinstance(inp, GraphStateVar):
                dt = inp.as_variable(scope=None)
                if dt.type is None:
                    continue
                given[i] = dt.type
        rev = {}
        for i, (name, v) in enumerate(expected_inputs):
            if v in rev:
                rev[v].append(i)
            else:
                rev[v] = [i]

        res = []
        for name, ct in expected_outputs:
            if isinstance(ct, str) and ct[0] in ("T", "V", "I"):
                if ct[0] not in rev or all(map(lambda k: k not in given, rev[ct])):
                    raise NotImplementedError(
                        "Unable to guess output type for (%r, %r) - "
                        "given=%r - rev=%r input_shapes=%r expected_inputs"
                        "=%r."
                        % (name, ct, given, rev, input_shapes, self.expected_inputs)
                    )
                found = False
                for ind in rev[ct]:
                    if ind in given:
                        res.append((name, given[ind]))
                        found = True
                        break
                if not found:
                    raise NotImplementedError(
                        "Unable to guess output type for (%r, %r) - "
                        "given=%r - rev=%r input_shapes=%r expected_inputs"
                        "=%r."
                        % (name, ct, given, rev, input_shapes, self.expected_inputs)
                    )
                continue
            if isinstance(ct, str):
                try:
                    dt = _guess_type_proto_str(ct, None)
                except NotImplementedError as e:
                    raise NotImplementedError(
                        "Unable to guess output type for (%r, %r) - "
                        "given=%r - rev=%r." % (name, ct, given, rev)
                    ) from e
                res.append((name, dt))
                continue
            try:
                dt = _guess_type_proto_str(_guess_type_proto_str_inv(ct), None)
            except NotImplementedError as e:
                raise NotImplementedError(
                    "Unable to guess output type for (%r, %r) - given=%r - "
                    "rev=%r." % (name, ct, given, rev)
                ) from e
            res.append((name, dt))

        return res

    def _clean_attributes(self, *args, recursive=True):
        """
        Removes attributes in this node and its parents.
        """
        for arg in args:
            if arg in ("state", "output_names_"):
                setattr(self, arg, None)
            elif hasattr(self, arg):
                delattr(self, arg)
        if recursive:
            for obj in self.inputs:
                if isinstance(obj, OnnxOperator):
                    obj._clean_attributes(*args, recursive=True)

    def to_onnx(
        self,
        inputs=None,
        outputs=None,
        other_outputs=None,
        target_opset=None,
        domain=None,
        verbose=0,
    ):
        """
        Converts this operator into an ONNX graph.

        :param inputs: specific inputs (as a dictionary) or
            default inputs if not specified
        :param outputs: specific outputs
        :param other_outputs: additional outputs to consider
            as graph outputs but not outputs of this particular
            node
        :param target_opset: dictionary with target opset per domain,
            None for the default one
        :param domain: domain of the operator
        :param verbose: prints information
        """
        if isinstance(target_opset, dict):
            dom = self.domain or ""
            target_opset = target_opset.get(dom, None)
        elif isinstance(target_opset, int):
            if self.domain not in ("", None):
                # The target_opset is for the domain ''
                # We ignore it.
                target_opset = None
        elif target_opset is not None:
            raise TypeError(
                "target_opset must be a dictionary {domain: "
                "target_opset} not %r for operator %r."
                % (target_opset, self.__class__.__name__)
            )
        if self.domain in ("", None) and target_opset == 1:
            raise RuntimeError("target_opset cannot be 1.")
        if (
            self.op_version is not None
            and target_opset is not None
            and self.op_version > target_opset
        ):
            raise RuntimeError(
                "target_opset={} is lower than the version={} requested "
                "for this node '{}'.".format(
                    target_opset, self.op_version, self.__class__.__name__
                )
            )
        if self.state is not None:
            # The conversion already happened and needs to be cleaned.
            self._clean_attributes("output_names_", "state")
        if inputs is None:
            raise NotImplementedError("inputs must be specified.")
        if isinstance(inputs, dict):
            inputs = [(k, v) for k, v in inputs.items()]
        new_inputs = []
        for obj in inputs:
            if isinstance(obj, Variable):
                new_inputs.append((obj.onnx_name, obj.type))
            elif isinstance(obj, tuple) and len(obj) == 2:
                ty = _guess_type(obj[1])
                new_inputs.append((obj[0], ty))
            else:
                raise TypeError(
                    "Inputs must be Variable or "
                    "tuple(name, type) not {}."
                    "".format(type(obj))
                )
        inputs = new_inputs
        for name, typ in inputs:
            if typ is None:
                raise RuntimeError(
                    "Type input '{}' for operator '{}' "
                    "is unknown. You should specify "
                    "input types.".format(name, self.__class__.__name__)
                )

        registered_models = dict(
            conv=_converter_pool,
            shape=_shape_calculator_pool,
            aliases=sklearn_operator_name_map,
        )

        target_opset = self.get_latest_tested_opset_version(target_opset)
        container = ModelComponentContainer(
            target_opset, registered_models=registered_models
        )

        model_name = self.__class__.__name__
        logger.debug("[Ops.to_onnx] %s id=%d", self.__class__.__name__, id(self))
        scope = Scope(
            model_name, target_opset=target_opset, registered_models=registered_models
        )
        for inp in inputs:
            var = Variable(inp[0], inp[0], scope=scope, type=inp[1])
            container.add_input(var)
            scope.register_variable(var)
        self.add_to(scope, container, run_converters=True)

        extra_outputs = []
        if other_outputs is not None:
            extra_outputs.extend(other_outputs)
        if self.global_context is not None:
            for name, var in self.global_context.items():
                if var.output_names is None:
                    # The variable name is likely to be different.
                    from .onnx_ops import OnnxIdentity

                    var2 = OnnxIdentity(
                        var, op_version=var.op_version, output_names=[name]
                    )
                else:
                    var2 = var
                extra_outputs.append(var2)
        for out in extra_outputs:
            if not hasattr(out, "add_to"):
                raise RuntimeError("Extra outputs must have method 'add_to'.")
            out.add_to(scope, container, run_converters=True)
        logger.debug(
            "[Ops.to_onnx] %s id=%d extra_outputs=%r",
            self.__class__.__name__,
            id(self),
            extra_outputs,
        )

        # infer shapes
        if outputs:
            if isinstance(outputs, dict):
                outputs = [(k, v) for k, v in outputs.items()]
            shapes = []
            for o in outputs:
                if isinstance(o, Variable):
                    shapes.append(o)
                elif isinstance(o, tuple):
                    if isinstance(o[1], np.ndarray):
                        type_shape = _guess_type(o[1])
                    else:
                        type_shape = o[1]
                    shapes.append(Variable(o[0], o[0], None, type_shape))
                else:
                    raise TypeError("Outputs must be Variable or " "tuple(name, type).")
            logger.debug(
                "[Ops.to_onnx] %s id=%d outputs=%r",
                self.__class__.__name__,
                id(self),
                outputs,
            )
        else:
            if verbose > 0:
                print("[op.to_onnx] infer outputs")
            shapes = infer_outputs(
                container,
                container.inputs,
                initializer=container.initializers,
                target_opset=target_opset,
            )
            if self.output_names:
                set_names = set(
                    v.onnx_name if hasattr(v, "onnx_name") else v
                    for v in self.output_names
                )
                shapes = [shape for shape in shapes if shape.onnx_name in set_names]

        logger.debug(
            "[Ops.to_onnx] %s id=%d shapes=%r",
            self.__class__.__name__,
            id(self),
            shapes,
        )
        if verbose > 0:
            print("[op.to_onnx] shapes=%r" % shapes)

        # add the output to the container
        for shape in shapes:
            container.add_output(shape)

        container.ensure_topological_order()
        if verbose >= 2:
            print("---NODES---")
            for node in container.nodes:
                print(
                    "  %s - %s: %r -> %r"
                    % (node.op_type, node.name, node.input, node.output)
                )

        # convert the graph
        graph = make_graph(
            container.nodes,
            model_name,
            container.inputs,
            container.outputs,
            container.initializers,
        )
        onnx_model = make_model(graph)

        # domains
        _update_domain_version(container, onnx_model)

        # metadata
        opv = min(target_opset, _get_main_opset_version(onnx_model) or target_opset)
        irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION)
        onnx_model.ir_version = irv
        onnx_model.producer_name = utils.get_producer()
        onnx_model.producer_version = utils.get_producer_version()
        onnx_model.domain = utils.get_domain()
        onnx_model.model_version = utils.get_model_version()
        return onnx_model

    def enumerate_nodes(self):
        """
        Iterates on all nodes of the graph.
        """
        yield self
        for input in self.inputs:
            if isinstance(input, OnnxOperator):
                for i in input.enumerate_nodes():
                    yield i

    def enumerate_variables(self):
        """
        Iterates on all nodes of the graph to find variables.
        Returns an iterator `(node, i)` which means
        `node.inputs[i]` is a variable.
        """
        for node in self.enumerate_nodes():
            if self.inputs:
                for i, input in enumerate(self.inputs):
                    if isinstance(input, (OnnxOperator.UnscopedVariable, Variable)):
                        yield (node, i)

    def enumerate_initial_types(self):
        """
        Retrieves iniatial types of the implemented functions.
        It goes through the graph and returns the name and types
        of all variables not computed by an intemediate node.

        :return: list of `(name, type)`
        """
        for node, i in self.enumerate_variables():
            input = node.inputs[i]
            if isinstance(input, Variable):
                yield (input.onnx_name, input.type)
            elif isinstance(input, OnnxOperator.UnscopedVariable):
                name = input.name
                typ = node.expected_inputs[i]
                yield (name, typ)

    def get_latest_tested_opset_version(self, target_opset=None):
        """
        Returns *op_version*, or the max of all results
        returned by these method applied on every input,
        or ``get_latest_tested_opset_version()``.
        """
        if target_opset is not None:
            return target_opset
        return get_latest_tested_opset_version()


class OnnxSubEstimator(OnnxOperator):
    """
    This operator is used to call the converter of a model
    while converting another one.
    See :ref:`l-custom-parser-alternative`.
    """

    since_version = 1
    expected_inputs = None
    expected_outputs = None
    input_range = [1, 1e9]
    output_range = [1, 1e9]

    def __init__(
        self,
        skl_op,
        *inputs,
        op_version=None,
        output_names=None,
        domain=None,
        options=None,
        input_types=None,
        **kwargs
    ):
        OnnxOperator.__init__(
            self,
            *inputs,
            op_version=op_version,
            output_names=output_names,
            domain=domain,
            **kwargs
        )
        self.operator_instance = skl_op
        self.options = options
        if skl_op is None and input_types is not None:
            raise RuntimeError(
                "input_types is only used when a sub-operator is defined."
            )
        self.input_types = input_types

    def __repr__(self):
        return "%s(%r, %s, op_version=%r, output_names=%r)" % (
            self.__class__.__name__,
            self.operator_instance,
            ", ".join("%r" % i for i in self.inputs),
            self.op_version,
            self.output_names,
        )

    def add_to(self, scope, container, operator=None, run_converters=False):
        """
        Adds outputs to the container if not already added,
        registered the outputs if the node is not final.

        :param scope: scope
        :param container: container
        :param operator: overwrite inputs
        :param run_converters: must be True if called from method `to_onnx`
        """
        if self.state is None:
            if self.kwargs.get("op_version", "") is None:
                kwargs = self.kwargs.copy()
                del kwargs["op_version"]
            else:
                kwargs = self.kwargs

            if self.output_names_ is not None:
                pass
            elif operator is not None:
                self.output_names_ = operator.outputs
            elif self.output_names:
                if not isinstance(self.output_names, (list, tuple)):
                    louts = [self.output_names]
                else:
                    louts = self.output_names
                outputs = []
                for name in louts:
                    if (
                        isinstance(name, str)
                        and name.startswith("u(")
                        and name[-1] == ")"
                    ):
                        name = VariableStr(
                            scope.get_unique_variable_name(name[2:-1]), scope=scope
                        )
                    if (
                        isinstance(name, Variable)
                        and name.raw_name.startswith("u(")
                        and name.raw_name[-1] == ")"
                    ):
                        name = VariableStr(
                            scope.get_unique_variable_name(name.raw_name[2:-1]),
                            scope=scope,
                            type=name.type,
                        )
                    outputs.append(name)
                self.output_names_ = outputs
            else:
                self.output_names_ = None

            inputs = []
            for input in self.inputs:
                if isinstance(input, OnnxOperator.OnnxOperatorVariable):
                    if operator is None:
                        raise RuntimeError(
                            "A placeholder cannot be replaced "
                            "as an operator is not specified."
                        )
                    if len(operator.inputs) == 0:
                        raise RuntimeError("No input variable in {}.".format(operator))
                    # The inputs must be looked into the graph.
                    for i in operator.inputs:
                        if i.onnx_name == input.name:
                            inputs.append(i)
                            break
                    else:
                        vars = ", ".join(
                            map(lambda o: "'%s'" % o.onnx_name, operator.inputs)
                        )
                        raise RuntimeError(
                            "Unable to find variable " "{} in {}.".format(input, vars)
                        )
                elif isinstance(input, tuple) and len(input) == 2:
                    if scope is not None and input[0] in scope.variables:
                        var = scope.variables[input[0]]
                    else:
                        var = Variable(input[0], input[0], scope=scope, type=input[1])
                        if scope is not None:
                            scope.register_variable(var)
                    inputs.append(var)
                else:
                    inputs.append(input)

            logger.debug("[SubOps.add_to] state id=%d", id(self))
            self.state = GraphState(
                inputs,
                self.output_names_,
                self.operator_instance,
                scope,
                container,
                None,
                op_version=self.op_version,
                op_domain=None,
                onnx_prefix_name=self.onnx_prefix,
                options=self.options,
                run_converters=run_converters,
                input_types=self.input_types,
                **kwargs
            )
            self.state.run()


class WrappedModelAlias:
    def __init__(self, model, alias):
        self.model = model
        self.alias = alias


class OnnxSubOperator(OnnxSubEstimator):
    """
    This class is deprecated and will be removed in version 1.9.
    It should be replaced by :class:`OnnxSubEstimator
    <skl2onnx.algebra.onnx_operator.OnnxSubEstimator>`.
    """

    def __init__(self, *args, **kwargs):
        OnnxSubEstimator.__init__(self, *args, **kwargs)
        warnings.warn(
            (
                "Class OnnxSubOperator will be removed in 1.10. "
                "It should be replaced by OnnxSubEstimator."
            ),
            DeprecationWarning,
        )