From 49c93d6f6ce4e5e022d7a25887d81cad07d36988 Mon Sep 17 00:00:00 2001 From: "pieths.dev@gmail.com" Date: Fri, 15 Nov 2019 10:30:14 -0800 Subject: [PATCH 1/4] Generate PrefixColumnConcatenator with entry point compiler instead of manually. --- .../docstrings/PrefixColumnConcatenator.txt | 16 +++++++++ .../schema/prefixcolumnconcatenator.py | 19 ++++------ .../schema/prefixcolumnconcatenator.py | 36 ++++--------------- src/python/tools/manifest_diff.json | 6 ++++ 4 files changed, 35 insertions(+), 42 deletions(-) create mode 100644 src/python/docs/docstrings/PrefixColumnConcatenator.txt diff --git a/src/python/docs/docstrings/PrefixColumnConcatenator.txt b/src/python/docs/docstrings/PrefixColumnConcatenator.txt new file mode 100644 index 00000000..d5c806b3 --- /dev/null +++ b/src/python/docs/docstrings/PrefixColumnConcatenator.txt @@ -0,0 +1,16 @@ + """ + + Concatenates one or more columns of the same item type. + + .. seealso:: + :py:class:`ColumnDropper + `, + :py:class:`ColumnSelector + `. + + .. index:: transform, schema + + Example: + .. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py + :language: python + """ diff --git a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py index d202e947..7dfc2713 100644 --- a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py +++ b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------------------------- +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ PrefixColumnConcatenator """ @@ -15,19 +16,12 @@ from ...base_pipeline_item import BasePipelineItem, DefaultSignature -class PrefixColumnConcatenator(BasePipelineItem, DefaultSignature): +class PrefixColumnConcatenator( + BasePipelineItem, + DefaultSignature): """ - Combines several columns into a single vector-valued column by prefix - - .. remarks:: - ``PrefixColumnConcatenator`` creates a single vector-valued column from - multiple - columns. It can be performed on data before training a model. The - concatenation - can significantly speed up the processing of data when the number of - columns - is as large as hundreds to thousands. + Concatenates one or more columns of the same item type. :param params: Additional arguments sent to compute engine. @@ -82,8 +76,7 @@ def _get_node(self, **all_args): # validate output if output_columns is None: - raise ValueError( - "'None' output passed when it cannot be none.") + output_columns = input_columns if not isinstance(output_columns, list): raise ValueError( diff --git a/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py b/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py index 9a3aa443..c44bdc4c 100644 --- a/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py +++ b/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------------------------- +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ PrefixColumnConcatenator """ @@ -17,38 +18,15 @@ from ...internal.utils.utils import trace -class PrefixColumnConcatenator(core, BaseTransform, TransformerMixin): +class PrefixColumnConcatenator( + core, + BaseTransform, + TransformerMixin): """ - Combines several columns into a single vector-valued column by prefix. + Concatenates one or more columns of the same item type. - .. remarks:: - ``PrefixColumnConcatenator`` creates a single vector-valued column from - multiple - columns. It can be performed on data before training a model. The - concatenation - can significantly speed up the processing of data when the number of - columns - is as large as hundreds to thousands. - - :param columns: a dictionary of key-value pairs, where key is the output - column name and value is a list of input column names. - - * Only one key-value pair is allowed. - * Input column type: numeric or string. - * Output column type: - `Vector Type `_. - - The << operator can be used to set this value (see - `Column Operator `_) - - For example - * ColumnConcatenator(columns={'features': ['age', 'parity', - 'induced']}) - * ColumnConcatenator() << {'features': ['age', 'parity', - 'induced']}) - - For more details see `Columns `_. + :param columns: see `Columns `_. :param params: Additional arguments sent to compute engine. diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json index 68ab2fa5..5b320960 100644 --- a/src/python/tools/manifest_diff.json +++ b/src/python/tools/manifest_diff.json @@ -785,6 +785,12 @@ "NewName": "TypeConverter", "Module": "preprocessing.schema", "Type": "Transform" + }, + { + "Name": "Transforms.PrefixColumnConcatenator", + "NewName": "PrefixColumnConcatenator", + "Module": "preprocessing.schema", + "Type": "Transform" } ], "Components": [ From 216136f964c0fa01df272230ef84e9c8a173f826 Mon Sep 17 00:00:00 2001 From: "pieths.dev@gmail.com" Date: Fri, 15 Nov 2019 11:00:54 -0800 Subject: [PATCH 2/4] Update PrefixColumnConcatenator docs. --- .../docstrings/PrefixColumnConcatenator.txt | 30 ++++++++++++++++++- .../schema/prefixcolumnconcatenator.py | 11 ++++++- .../schema/prefixcolumnconcatenator.py | 30 +++++++++++++++++-- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/src/python/docs/docstrings/PrefixColumnConcatenator.txt b/src/python/docs/docstrings/PrefixColumnConcatenator.txt index d5c806b3..aac3d116 100644 --- a/src/python/docs/docstrings/PrefixColumnConcatenator.txt +++ b/src/python/docs/docstrings/PrefixColumnConcatenator.txt @@ -1,6 +1,34 @@ """ - Concatenates one or more columns of the same item type. + Combines several columns into a single vector-valued column by prefix. + + .. remarks:: + ``PrefixColumnConcatenator`` creates a single vector-valued column from + multiple + columns. It can be performed on data before training a model. The + concatenation + can significantly speed up the processing of data when the number of + columns + is as large as hundreds to thousands. + + :param columns: a dictionary of key-value pairs, where key is the output + column name and value is a list of input column names. + + * Only one key-value pair is allowed. + * Input column type: numeric or string. + * Output column type: + `Vector Type `_. + + The << operator can be used to set this value (see + `Column Operator `_) + + For example + * ColumnConcatenator(columns={'features': ['age', 'parity', + 'induced']}) + * ColumnConcatenator() << {'features': ['age', 'parity', + 'induced']}) + + For more details see `Columns `_. .. seealso:: :py:class:`ColumnDropper diff --git a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py index 7dfc2713..3db30bb9 100644 --- a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py +++ b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py @@ -21,7 +21,16 @@ class PrefixColumnConcatenator( DefaultSignature): """ - Concatenates one or more columns of the same item type. + Combines several columns into a single vector-valued column by prefix. + + .. remarks:: + ``PrefixColumnConcatenator`` creates a single vector-valued column from + multiple + columns. It can be performed on data before training a model. The + concatenation + can significantly speed up the processing of data when the number of + columns + is as large as hundreds to thousands. :param params: Additional arguments sent to compute engine. diff --git a/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py b/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py index c44bdc4c..6e0662e1 100644 --- a/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py +++ b/src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py @@ -24,9 +24,35 @@ class PrefixColumnConcatenator( TransformerMixin): """ - Concatenates one or more columns of the same item type. + Combines several columns into a single vector-valued column by prefix. - :param columns: see `Columns `_. + .. remarks:: + ``PrefixColumnConcatenator`` creates a single vector-valued column from + multiple + columns. It can be performed on data before training a model. The + concatenation + can significantly speed up the processing of data when the number of + columns + is as large as hundreds to thousands. + + :param columns: a dictionary of key-value pairs, where key is the output + column name and value is a list of input column names. + + * Only one key-value pair is allowed. + * Input column type: numeric or string. + * Output column type: + `Vector Type `_. + + The << operator can be used to set this value (see + `Column Operator `_) + + For example + * ColumnConcatenator(columns={'features': ['age', 'parity', + 'induced']}) + * ColumnConcatenator() << {'features': ['age', 'parity', + 'induced']}) + + For more details see `Columns `_. :param params: Additional arguments sent to compute engine. From 72f99c0b04d6afc00c35c658803500cc5d9b3bd5 Mon Sep 17 00:00:00 2001 From: "pieths.dev@gmail.com" Date: Fri, 15 Nov 2019 16:00:15 -0800 Subject: [PATCH 3/4] Add whitespace change to restart CI run. The Mac build never started. --- src/python/tools/manifest_diff.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json index 5b320960..a8cd262e 100644 --- a/src/python/tools/manifest_diff.json +++ b/src/python/tools/manifest_diff.json @@ -790,7 +790,7 @@ "Name": "Transforms.PrefixColumnConcatenator", "NewName": "PrefixColumnConcatenator", "Module": "preprocessing.schema", - "Type": "Transform" + "Type": "Transform" } ], "Components": [ From 8afb27968c6eb9e1c3714561dedb64fb9e34e672 Mon Sep 17 00:00:00 2001 From: "pieths.dev@gmail.com" Date: Mon, 18 Nov 2019 10:15:04 -0800 Subject: [PATCH 4/4] Fix incorrect check in PrefixColumnConcatenator core file. --- .../core/preprocessing/schema/prefixcolumnconcatenator.py | 3 ++- src/python/tools/code_fixer.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py index 3db30bb9..003e909f 100644 --- a/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py +++ b/src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py @@ -85,7 +85,8 @@ def _get_node(self, **all_args): # validate output if output_columns is None: - output_columns = input_columns + raise ValueError( + "'None' output passed when it cannot be none.") if not isinstance(output_columns, list): raise ValueError( diff --git a/src/python/tools/code_fixer.py b/src/python/tools/code_fixer.py index 21b6d1f4..3aa233ac 100644 --- a/src/python/tools/code_fixer.py +++ b/src/python/tools/code_fixer.py @@ -247,12 +247,18 @@ def fix_code(class_name, filename): all_args['output_for_sub_graph'] = {'Model' : \ all_args['predictor_model']}""" +prefixcolumnconcatenator_1 = "output_columns = input_columns" +prefixcolumnconcatenator_1_correct = """raise ValueError( + "'None' output passed when it cannot be none.")""" + signature_fixes_core = { 'NGramFeaturizer': (textTransform_1, textTransform_1_correct), 'ColumnConcatenator': [(concatColumns_1, concatColumns_1_correct)], 'ColumnSelector': [(columnselector_1, columnselector_1_correct)], 'OneVsRestClassifier': [ (onevsrestclassifier_1, onevsrestclassifier_1_correct)], + 'PrefixColumnConcatenator': (prefixcolumnconcatenator_1, + prefixcolumnconcatenator_1_correct) }