diff --git a/src/DotNetBridge/RmlEnvironment.cs b/src/DotNetBridge/RmlEnvironment.cs index dc9ff045..e9893426 100644 --- a/src/DotNetBridge/RmlEnvironment.cs +++ b/src/DotNetBridge/RmlEnvironment.cs @@ -52,14 +52,9 @@ protected override IHost RegisterCore(HostEnvironmentBase source } public RmlEnvironment(Bridge.CheckCancelled checkDelegate, int? seed = null, bool verbose = false) - : this(RandomUtils.Create(seed), verbose) + : base(seed, verbose) { CheckCancelled = checkDelegate; - } - - public RmlEnvironment(Random rand, bool verbose = false) - : base(rand, verbose) - { CultureInfo.CurrentUICulture = CultureInfo.InvariantCulture; EnsureDispatcher(); } diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index 3ea8c640..1cf0f52f 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -401,6 +401,7 @@ + diff --git a/src/python/nimbusml/ensemble/lightgbmbinaryclassifier.py b/src/python/nimbusml/ensemble/lightgbmbinaryclassifier.py index c87bbbb0..125f536b 100644 --- a/src/python/nimbusml/ensemble/lightgbmbinaryclassifier.py +++ b/src/python/nimbusml/ensemble/lightgbmbinaryclassifier.py @@ -110,6 +110,9 @@ class LightGbmBinaryClassifier( :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -165,6 +168,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -219,6 +223,7 @@ def __init__( batch_size=batch_size, use_categorical_split=use_categorical_split, handle_missing_value=handle_missing_value, + use_zero_as_missing_value=use_zero_as_missing_value, minimum_example_count_per_group=minimum_example_count_per_group, maximum_categorical_split_point_count=maximum_categorical_split_point_count, categorical_smoothing=categorical_smoothing, diff --git a/src/python/nimbusml/ensemble/lightgbmclassifier.py b/src/python/nimbusml/ensemble/lightgbmclassifier.py index a6951be2..d3d000e3 100644 --- a/src/python/nimbusml/ensemble/lightgbmclassifier.py +++ b/src/python/nimbusml/ensemble/lightgbmclassifier.py @@ -105,6 +105,9 @@ class LightGbmClassifier(core, BasePredictor, ClassifierMixin): :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -160,6 +163,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -214,6 +218,7 @@ def __init__( batch_size=batch_size, use_categorical_split=use_categorical_split, handle_missing_value=handle_missing_value, + use_zero_as_missing_value=use_zero_as_missing_value, minimum_example_count_per_group=minimum_example_count_per_group, maximum_categorical_split_point_count=maximum_categorical_split_point_count, categorical_smoothing=categorical_smoothing, diff --git a/src/python/nimbusml/ensemble/lightgbmranker.py b/src/python/nimbusml/ensemble/lightgbmranker.py index fb96f5cd..61bcbd90 100644 --- a/src/python/nimbusml/ensemble/lightgbmranker.py +++ b/src/python/nimbusml/ensemble/lightgbmranker.py @@ -105,6 +105,9 @@ class LightGbmRanker(core, BasePredictor, ClassifierMixin): :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -159,6 +162,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -212,6 +216,7 @@ def __init__( batch_size=batch_size, use_categorical_split=use_categorical_split, handle_missing_value=handle_missing_value, + use_zero_as_missing_value=use_zero_as_missing_value, minimum_example_count_per_group=minimum_example_count_per_group, maximum_categorical_split_point_count=maximum_categorical_split_point_count, categorical_smoothing=categorical_smoothing, diff --git a/src/python/nimbusml/ensemble/lightgbmregressor.py b/src/python/nimbusml/ensemble/lightgbmregressor.py index 0d0a69ae..89258a7f 100644 --- a/src/python/nimbusml/ensemble/lightgbmregressor.py +++ b/src/python/nimbusml/ensemble/lightgbmregressor.py @@ -98,6 +98,9 @@ class LightGbmRegressor(core, BasePredictor, RegressorMixin): :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -150,6 +153,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -201,6 +205,7 @@ def __init__( batch_size=batch_size, use_categorical_split=use_categorical_split, handle_missing_value=handle_missing_value, + use_zero_as_missing_value=use_zero_as_missing_value, minimum_example_count_per_group=minimum_example_count_per_group, maximum_categorical_split_point_count=maximum_categorical_split_point_count, categorical_smoothing=categorical_smoothing, diff --git a/src/python/nimbusml/internal/core/ensemble/lightgbmbinaryclassifier.py b/src/python/nimbusml/internal/core/ensemble/lightgbmbinaryclassifier.py index 2bf8468b..1ae0934d 100644 --- a/src/python/nimbusml/internal/core/ensemble/lightgbmbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/ensemble/lightgbmbinaryclassifier.py @@ -99,6 +99,9 @@ class LightGbmBinaryClassifier( :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -154,6 +157,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -183,6 +187,7 @@ def __init__( self.batch_size = batch_size self.use_categorical_split = use_categorical_split self.handle_missing_value = handle_missing_value + self.use_zero_as_missing_value = use_zero_as_missing_value self.minimum_example_count_per_group = minimum_example_count_per_group self.maximum_categorical_split_point_count = maximum_categorical_split_point_count self.categorical_smoothing = categorical_smoothing @@ -220,6 +225,7 @@ def _get_node(self, **all_args): batch_size=self.batch_size, use_categorical_split=self.use_categorical_split, handle_missing_value=self.handle_missing_value, + use_zero_as_missing_value=self.use_zero_as_missing_value, minimum_example_count_per_group=self.minimum_example_count_per_group, maximum_categorical_split_point_count=self.maximum_categorical_split_point_count, categorical_smoothing=self.categorical_smoothing, diff --git a/src/python/nimbusml/internal/core/ensemble/lightgbmclassifier.py b/src/python/nimbusml/internal/core/ensemble/lightgbmclassifier.py index 5feace13..7bb5466a 100644 --- a/src/python/nimbusml/internal/core/ensemble/lightgbmclassifier.py +++ b/src/python/nimbusml/internal/core/ensemble/lightgbmclassifier.py @@ -97,6 +97,9 @@ class LightGbmClassifier( :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -152,6 +155,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -181,6 +185,7 @@ def __init__( self.batch_size = batch_size self.use_categorical_split = use_categorical_split self.handle_missing_value = handle_missing_value + self.use_zero_as_missing_value = use_zero_as_missing_value self.minimum_example_count_per_group = minimum_example_count_per_group self.maximum_categorical_split_point_count = maximum_categorical_split_point_count self.categorical_smoothing = categorical_smoothing @@ -218,6 +223,7 @@ def _get_node(self, **all_args): batch_size=self.batch_size, use_categorical_split=self.use_categorical_split, handle_missing_value=self.handle_missing_value, + use_zero_as_missing_value=self.use_zero_as_missing_value, minimum_example_count_per_group=self.minimum_example_count_per_group, maximum_categorical_split_point_count=self.maximum_categorical_split_point_count, categorical_smoothing=self.categorical_smoothing, diff --git a/src/python/nimbusml/internal/core/ensemble/lightgbmranker.py b/src/python/nimbusml/internal/core/ensemble/lightgbmranker.py index 6c06148d..c3394cf4 100644 --- a/src/python/nimbusml/internal/core/ensemble/lightgbmranker.py +++ b/src/python/nimbusml/internal/core/ensemble/lightgbmranker.py @@ -95,6 +95,9 @@ class LightGbmRanker(BasePipelineItem, DefaultSignatureWithRoles): :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -149,6 +152,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -176,6 +180,7 @@ def __init__( self.batch_size = batch_size self.use_categorical_split = use_categorical_split self.handle_missing_value = handle_missing_value + self.use_zero_as_missing_value = use_zero_as_missing_value self.minimum_example_count_per_group = minimum_example_count_per_group self.maximum_categorical_split_point_count = maximum_categorical_split_point_count self.categorical_smoothing = categorical_smoothing @@ -212,6 +217,7 @@ def _get_node(self, **all_args): batch_size=self.batch_size, use_categorical_split=self.use_categorical_split, handle_missing_value=self.handle_missing_value, + use_zero_as_missing_value=self.use_zero_as_missing_value, minimum_example_count_per_group=self.minimum_example_count_per_group, maximum_categorical_split_point_count=self.maximum_categorical_split_point_count, categorical_smoothing=self.categorical_smoothing, diff --git a/src/python/nimbusml/internal/core/ensemble/lightgbmregressor.py b/src/python/nimbusml/internal/core/ensemble/lightgbmregressor.py index 20fe5e57..b4cb7b5e 100644 --- a/src/python/nimbusml/internal/core/ensemble/lightgbmregressor.py +++ b/src/python/nimbusml/internal/core/ensemble/lightgbmregressor.py @@ -90,6 +90,9 @@ class LightGbmRegressor( :param handle_missing_value: Enable special handling of missing value or not. + :param use_zero_as_missing_value: Enable usage of zero (0) as missing + value. + :param minimum_example_count_per_group: Minimum number of instances per categorical group. @@ -142,6 +145,7 @@ def __init__( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -168,6 +172,7 @@ def __init__( self.batch_size = batch_size self.use_categorical_split = use_categorical_split self.handle_missing_value = handle_missing_value + self.use_zero_as_missing_value = use_zero_as_missing_value self.minimum_example_count_per_group = minimum_example_count_per_group self.maximum_categorical_split_point_count = maximum_categorical_split_point_count self.categorical_smoothing = categorical_smoothing @@ -202,6 +207,7 @@ def _get_node(self, **all_args): batch_size=self.batch_size, use_categorical_split=self.use_categorical_split, handle_missing_value=self.handle_missing_value, + use_zero_as_missing_value=self.use_zero_as_missing_value, minimum_example_count_per_group=self.minimum_example_count_per_group, maximum_categorical_split_point_count=self.maximum_categorical_split_point_count, categorical_smoothing=self.categorical_smoothing, diff --git a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmbinaryclassifier.py b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmbinaryclassifier.py index 5a54c69f..4ae20be2 100644 --- a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmbinaryclassifier.py +++ b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmbinaryclassifier.py @@ -35,6 +35,7 @@ def trainers_lightgbmbinaryclassifier( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -88,6 +89,8 @@ def trainers_lightgbmbinaryclassifier( (inputs). :param handle_missing_value: Enable special handling of missing value or not. (inputs). + :param use_zero_as_missing_value: Enable usage of zero (0) as + missing value. (inputs). :param minimum_example_count_per_group: Minimum number of instances per categorical group. (inputs). :param maximum_categorical_split_point_count: Max number of @@ -243,6 +246,11 @@ def trainers_lightgbmbinaryclassifier( obj=handle_missing_value, none_acceptable=True, is_of_type=bool) + if use_zero_as_missing_value is not None: + inputs['UseZeroAsMissingValue'] = try_set( + obj=use_zero_as_missing_value, + none_acceptable=True, + is_of_type=bool) if minimum_example_count_per_group is not None: inputs['MinimumExampleCountPerGroup'] = try_set( obj=minimum_example_count_per_group, diff --git a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmclassifier.py b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmclassifier.py index 28f13e0a..d78f2b48 100644 --- a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmclassifier.py +++ b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmclassifier.py @@ -35,6 +35,7 @@ def trainers_lightgbmclassifier( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -86,6 +87,8 @@ def trainers_lightgbmclassifier( (inputs). :param handle_missing_value: Enable special handling of missing value or not. (inputs). + :param use_zero_as_missing_value: Enable usage of zero (0) as + missing value. (inputs). :param minimum_example_count_per_group: Minimum number of instances per categorical group. (inputs). :param maximum_categorical_split_point_count: Max number of @@ -240,6 +243,11 @@ def trainers_lightgbmclassifier( obj=handle_missing_value, none_acceptable=True, is_of_type=bool) + if use_zero_as_missing_value is not None: + inputs['UseZeroAsMissingValue'] = try_set( + obj=use_zero_as_missing_value, + none_acceptable=True, + is_of_type=bool) if minimum_example_count_per_group is not None: inputs['MinimumExampleCountPerGroup'] = try_set( obj=minimum_example_count_per_group, diff --git a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmranker.py b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmranker.py index 5a3a44fd..0c2e9e0a 100644 --- a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmranker.py +++ b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmranker.py @@ -34,6 +34,7 @@ def trainers_lightgbmranker( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -83,6 +84,8 @@ def trainers_lightgbmranker( (inputs). :param handle_missing_value: Enable special handling of missing value or not. (inputs). + :param use_zero_as_missing_value: Enable usage of zero (0) as + missing value. (inputs). :param minimum_example_count_per_group: Minimum number of instances per categorical group. (inputs). :param maximum_categorical_split_point_count: Max number of @@ -232,6 +235,11 @@ def trainers_lightgbmranker( obj=handle_missing_value, none_acceptable=True, is_of_type=bool) + if use_zero_as_missing_value is not None: + inputs['UseZeroAsMissingValue'] = try_set( + obj=use_zero_as_missing_value, + none_acceptable=True, + is_of_type=bool) if minimum_example_count_per_group is not None: inputs['MinimumExampleCountPerGroup'] = try_set( obj=minimum_example_count_per_group, diff --git a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmregressor.py b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmregressor.py index 32260ebe..9fbf3e69 100644 --- a/src/python/nimbusml/internal/entrypoints/trainers_lightgbmregressor.py +++ b/src/python/nimbusml/internal/entrypoints/trainers_lightgbmregressor.py @@ -32,6 +32,7 @@ def trainers_lightgbmregressor( batch_size=1048576, use_categorical_split=None, handle_missing_value=True, + use_zero_as_missing_value=False, minimum_example_count_per_group=100, maximum_categorical_split_point_count=32, categorical_smoothing=10.0, @@ -78,6 +79,8 @@ def trainers_lightgbmregressor( (inputs). :param handle_missing_value: Enable special handling of missing value or not. (inputs). + :param use_zero_as_missing_value: Enable usage of zero (0) as + missing value. (inputs). :param minimum_example_count_per_group: Minimum number of instances per categorical group. (inputs). :param maximum_categorical_split_point_count: Max number of @@ -218,6 +221,11 @@ def trainers_lightgbmregressor( obj=handle_missing_value, none_acceptable=True, is_of_type=bool) + if use_zero_as_missing_value is not None: + inputs['UseZeroAsMissingValue'] = try_set( + obj=use_zero_as_missing_value, + none_acceptable=True, + is_of_type=bool) if minimum_example_count_per_group is not None: inputs['MinimumExampleCountPerGroup'] = try_set( obj=minimum_example_count_per_group, diff --git a/src/python/nimbusml/internal/entrypoints/trainers_localdeepsvmbinaryclassifier.py b/src/python/nimbusml/internal/entrypoints/trainers_localdeepsvmbinaryclassifier.py new file mode 100644 index 00000000..0b2c5984 --- /dev/null +++ b/src/python/nimbusml/internal/entrypoints/trainers_localdeepsvmbinaryclassifier.py @@ -0,0 +1,175 @@ +# - Generated by tools/entrypoint_compiler.py: do not edit by hand +""" +Trainers.LocalDeepSvmBinaryClassifier +""" + +import numbers + +from ..utils.entrypoints import EntryPoint +from ..utils.utils import try_set, unlist + + +def trainers_localdeepsvmbinaryclassifier( + training_data, + predictor_model=None, + feature_column_name='Features', + label_column_name='Label', + example_weight_column_name=None, + normalize_features='Auto', + caching='Auto', + tree_depth=3, + lambda_w=0.1, + lambda_theta=0.01, + lambda_thetaprime=0.01, + sigma=1.0, + number_of_iterations=15000, + use_bias=True, + calibrator=None, + max_calibration_examples=1000000, + cache=True, + **params): + """ + **Description** + LD-SVM learns a binary, non-linear SVM classifier with a kernel that + is specifically designed to reduce prediction time. LD-SVM + learns decision boundaries that are locally linear. + + :param training_data: The data to be used for training (inputs). + :param feature_column_name: Column to use for features (inputs). + :param label_column_name: Column to use for labels (inputs). + :param example_weight_column_name: Column to use for example + weight (inputs). + :param normalize_features: Normalize option for the feature + column (inputs). + :param caching: Whether trainer should cache input training data + (inputs). + :param tree_depth: Depth of Local Deep SVM tree (inputs). + :param lambda_w: Regularizer for classifier parameter W (inputs). + :param lambda_theta: Regularizer for kernel parameter Theta + (inputs). + :param lambda_thetaprime: Regularizer for kernel parameter + Thetaprime (inputs). + :param sigma: Parameter for sigmoid sharpness (inputs). + :param number_of_iterations: Number of iterations (inputs). + :param use_bias: No bias (inputs). + :param calibrator: The calibrator kind to apply to the predictor. + Specify null for no calibration (inputs). + :param max_calibration_examples: The maximum number of examples + to use when training the calibrator (inputs). + :param cache: Whether to cache the data before the first + iteration (inputs). + :param predictor_model: The trained model (outputs). + """ + + entrypoint_name = 'Trainers.LocalDeepSvmBinaryClassifier' + inputs = {} + outputs = {} + + if training_data is not None: + inputs['TrainingData'] = try_set( + obj=training_data, + none_acceptable=False, + is_of_type=str) + if feature_column_name is not None: + inputs['FeatureColumnName'] = try_set( + obj=feature_column_name, + none_acceptable=True, + is_of_type=str, + is_column=True) + if label_column_name is not None: + inputs['LabelColumnName'] = try_set( + obj=label_column_name, + none_acceptable=True, + is_of_type=str, + is_column=True) + if example_weight_column_name is not None: + inputs['ExampleWeightColumnName'] = try_set( + obj=example_weight_column_name, + none_acceptable=True, + is_of_type=str, + is_column=True) + if normalize_features is not None: + inputs['NormalizeFeatures'] = try_set( + obj=normalize_features, + none_acceptable=True, + is_of_type=str, + values=[ + 'No', + 'Warn', + 'Auto', + 'Yes']) + if caching is not None: + inputs['Caching'] = try_set( + obj=caching, + none_acceptable=True, + is_of_type=str, + values=[ + 'Auto', + 'Memory', + 'None']) + if tree_depth is not None: + inputs['TreeDepth'] = try_set( + obj=tree_depth, + none_acceptable=True, + is_of_type=numbers.Real) + if lambda_w is not None: + inputs['LambdaW'] = try_set( + obj=lambda_w, + none_acceptable=True, + is_of_type=numbers.Real) + if lambda_theta is not None: + inputs['LambdaTheta'] = try_set( + obj=lambda_theta, + none_acceptable=True, + is_of_type=numbers.Real) + if lambda_thetaprime is not None: + inputs['LambdaThetaprime'] = try_set( + obj=lambda_thetaprime, + none_acceptable=True, + is_of_type=numbers.Real) + if sigma is not None: + inputs['Sigma'] = try_set( + obj=sigma, + none_acceptable=True, + is_of_type=numbers.Real) + if number_of_iterations is not None: + inputs['NumberOfIterations'] = try_set( + obj=number_of_iterations, + none_acceptable=True, + is_of_type=numbers.Real) + if use_bias is not None: + inputs['UseBias'] = try_set( + obj=use_bias, + none_acceptable=True, + is_of_type=bool) + if calibrator is not None: + inputs['Calibrator'] = try_set( + obj=calibrator, + none_acceptable=True, + is_of_type=dict) + if max_calibration_examples is not None: + inputs['MaxCalibrationExamples'] = try_set( + obj=max_calibration_examples, + none_acceptable=True, + is_of_type=numbers.Real) + if cache is not None: + inputs['Cache'] = try_set( + obj=cache, + none_acceptable=True, + is_of_type=bool) + if predictor_model is not None: + outputs['PredictorModel'] = try_set( + obj=predictor_model, none_acceptable=False, is_of_type=str) + + input_variables = { + x for x in unlist(inputs.values()) + if isinstance(x, str) and x.startswith("$")} + output_variables = { + x for x in unlist(outputs.values()) + if isinstance(x, str) and x.startswith("$")} + + entrypoint = EntryPoint( + name=entrypoint_name, inputs=inputs, outputs=outputs, + input_variables=input_variables, + output_variables=output_variables) + return entrypoint diff --git a/src/python/nimbusml/internal/utils/dataframes.py b/src/python/nimbusml/internal/utils/dataframes.py index 17572ad1..8e3665bc 100644 --- a/src/python/nimbusml/internal/utils/dataframes.py +++ b/src/python/nimbusml/internal/utils/dataframes.py @@ -63,7 +63,7 @@ def resolve_dataframe(dataframe): ret[name_i] = serie.values if infered_dtype == 'floating' or \ infered_dtype == 'mixed-integer-float': - s = serie.itemsize + s = serie.dtype.itemsize if s == 8: ret[str(i)] = serie.values.astype( np.float64, copy=False) @@ -77,7 +77,7 @@ def resolve_dataframe(dataframe): [_global_dtype_to_char_dict[ np.dtype(np.float32)]]) elif infered_dtype == 'integer': - s = serie.itemsize + s = serie.dtype.itemsize if s == 8: ret[str(i)] = serie.values.astype( np.int64, copy=False) diff --git a/src/python/nimbusml/tests/pipeline/test_uci_adult.py b/src/python/nimbusml/tests/pipeline/test_uci_adult.py index 00ae1728..2f6055de 100644 --- a/src/python/nimbusml/tests/pipeline/test_uci_adult.py +++ b/src/python/nimbusml/tests/pipeline/test_uci_adult.py @@ -36,7 +36,7 @@ def check_accuracy(test_file, label_column, predictions, threshold, sep=','): (test, label) = get_X_y(test_file, label_column, sep=sep) accuracy = np.mean(label[label_column].values == - predictions.ix[:, 'PredictedLabel'].values) + predictions['PredictedLabel'].values) assert_greater( accuracy, threshold, diff --git a/src/python/nimbusml/tests/test_syntax_learner.py b/src/python/nimbusml/tests/test_syntax_learner.py index 2c649304..edc05372 100644 --- a/src/python/nimbusml/tests/test_syntax_learner.py +++ b/src/python/nimbusml/tests/test_syntax_learner.py @@ -458,7 +458,7 @@ def test_syntax_slots_wo_pipeline(self): if spl[0] == 'age': ages.append(l2) X_xf1.columns = pandas.MultiIndex( - levels=levels, labels=labels, names=names) + levels=levels, codes=labels, names=names) print(X_xf1.head(n=2).T) col_ages = [('age', a) for a in ages] diff --git a/src/python/tools/manifest.json b/src/python/tools/manifest.json index 491ac877..8d14276b 100644 --- a/src/python/tools/manifest.json +++ b/src/python/tools/manifest.json @@ -11743,6 +11743,9 @@ "Name": "HandleMissingValue", "Type": "Bool", "Desc": "Enable special handling of missing value or not.", + "Aliases": [ + "hmv" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -11755,6 +11758,25 @@ ] } }, + { + "Name": "UseZeroAsMissingValue", + "Type": "Bool", + "Desc": "Enable usage of zero (0) as missing value.", + "Aliases": [ + "uzam" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + true, + false + ] + } + }, { "Name": "MinimumExampleCountPerGroup", "Type": "Int", @@ -12240,6 +12262,9 @@ "Name": "HandleMissingValue", "Type": "Bool", "Desc": "Enable special handling of missing value or not.", + "Aliases": [ + "hmv" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12252,6 +12277,25 @@ ] } }, + { + "Name": "UseZeroAsMissingValue", + "Type": "Bool", + "Desc": "Enable usage of zero (0) as missing value.", + "Aliases": [ + "uzam" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + true, + false + ] + } + }, { "Name": "MinimumExampleCountPerGroup", "Type": "Int", @@ -12737,6 +12781,9 @@ "Name": "HandleMissingValue", "Type": "Bool", "Desc": "Enable special handling of missing value or not.", + "Aliases": [ + "hmv" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12749,6 +12796,25 @@ ] } }, + { + "Name": "UseZeroAsMissingValue", + "Type": "Bool", + "Desc": "Enable usage of zero (0) as missing value.", + "Aliases": [ + "uzam" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + true, + false + ] + } + }, { "Name": "MinimumExampleCountPerGroup", "Type": "Int", @@ -13195,6 +13261,9 @@ "Name": "HandleMissingValue", "Type": "Bool", "Desc": "Enable special handling of missing value or not.", + "Aliases": [ + "hmv" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -13207,6 +13276,25 @@ ] } }, + { + "Name": "UseZeroAsMissingValue", + "Type": "Bool", + "Desc": "Enable usage of zero (0) as missing value.", + "Aliases": [ + "uzam" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": false, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + true, + false + ] + } + }, { "Name": "MinimumExampleCountPerGroup", "Type": "Int", @@ -13613,6 +13701,288 @@ "ITrainerOutput" ] }, + { + "Name": "Trainers.LocalDeepSvmBinaryClassifier", + "Desc": "LD-SVM learns a binary, non-linear SVM classifier with a kernel that is specifically designed to reduce prediction time. LD-SVM learns decision boundaries that are locally linear.", + "FriendlyName": "Local Deep SVM (LDSVM)", + "ShortName": "LDSVM", + "Inputs": [ + { + "Name": "TrainingData", + "Type": "DataView", + "Desc": "The data to be used for training", + "Aliases": [ + "data" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "FeatureColumnName", + "Type": "String", + "Desc": "Column to use for features", + "Aliases": [ + "feat" + ], + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": "Features" + }, + { + "Name": "LabelColumnName", + "Type": "String", + "Desc": "Column to use for labels", + "Aliases": [ + "lab" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": "Label" + }, + { + "Name": "ExampleWeightColumnName", + "Type": "String", + "Desc": "Column to use for example weight", + "Aliases": [ + "weight" + ], + "Required": false, + "SortOrder": 4.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "NormalizeFeatures", + "Type": { + "Kind": "Enum", + "Values": [ + "No", + "Warn", + "Auto", + "Yes" + ] + }, + "Desc": "Normalize option for the feature column", + "Aliases": [ + "norm" + ], + "Required": false, + "SortOrder": 5.0, + "IsNullable": false, + "Default": "Auto" + }, + { + "Name": "Caching", + "Type": { + "Kind": "Enum", + "Values": [ + "Auto", + "Memory", + "None" + ] + }, + "Desc": "Whether trainer should cache input training data", + "Aliases": [ + "cache" + ], + "Required": false, + "SortOrder": 6.0, + "IsNullable": false, + "Default": "Auto" + }, + { + "Name": "TreeDepth", + "Type": "Int", + "Desc": "Depth of Local Deep SVM tree", + "Aliases": [ + "depth" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 3, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 1, + 3, + 5, + 7 + ] + } + }, + { + "Name": "LambdaW", + "Type": "Float", + "Desc": "Regularizer for classifier parameter W", + "Aliases": [ + "lw" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 0.1, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 0.1, + 0.01, + 0.001 + ] + } + }, + { + "Name": "LambdaTheta", + "Type": "Float", + "Desc": "Regularizer for kernel parameter Theta", + "Aliases": [ + "lt" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 0.01, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 0.1, + 0.01, + 0.001 + ] + } + }, + { + "Name": "LambdaThetaprime", + "Type": "Float", + "Desc": "Regularizer for kernel parameter Thetaprime", + "Aliases": [ + "lp" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 0.01, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 0.1, + 0.01, + 0.001 + ] + } + }, + { + "Name": "Sigma", + "Type": "Float", + "Desc": "Parameter for sigmoid sharpness", + "Aliases": [ + "s" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 1.0, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 1.0, + 0.1, + 0.01 + ] + } + }, + { + "Name": "NumberOfIterations", + "Type": "Int", + "Desc": "Number of iterations", + "Aliases": [ + "iter", + "NumIterations" + ], + "Required": false, + "SortOrder": 50.0, + "IsNullable": false, + "Default": 15000, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + 10000, + 15000 + ] + } + }, + { + "Name": "UseBias", + "Type": "Bool", + "Desc": "No bias", + "Aliases": [ + "bias" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true, + "SweepRange": { + "RangeType": "Discrete", + "Values": [ + false, + true + ] + } + }, + { + "Name": "Calibrator", + "Type": { + "Kind": "Component", + "ComponentKind": "CalibratorTrainer" + }, + "Desc": "The calibrator kind to apply to the predictor. Specify null for no calibration", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": { + "Name": "PlattCalibrator" + } + }, + { + "Name": "MaxCalibrationExamples", + "Type": "Int", + "Desc": "The maximum number of examples to use when training the calibrator", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 1000000 + }, + { + "Name": "Cache", + "Type": "Bool", + "Desc": "Whether to cache the data before the first iteration", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + } + ], + "Outputs": [ + { + "Name": "PredictorModel", + "Type": "PredictorModel", + "Desc": "The trained model" + } + ], + "InputKind": [ + "ITrainerInputWithWeight", + "ITrainerInputWithLabel", + "ITrainerInput" + ], + "OutputKind": [ + "IBinaryClassificationOutput", + "ITrainerOutput" + ] + }, { "Name": "Trainers.LogisticRegressionBinaryClassifier", "Desc": "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function.",