From 8353ceb09b70d5adaa3360593a83e6eb2e6a9360 Mon Sep 17 00:00:00 2001 From: Konstantin Slavnov Date: Wed, 31 Oct 2018 14:41:22 +0300 Subject: [PATCH] Delete hiding sourced-engine usage from algorithms Signed-off-by: Konstantin Slavnov --- sourced/ml/__init__.py | 2 +- sourced/ml/__main__.py | 3 ++- sourced/ml/algorithms/uast_ids_to_bag.py | 4 ++-- sourced/ml/cmd/args.py | 2 +- sourced/ml/cmd/repos2bow.py | 3 +-- sourced/ml/extractors/literals.py | 4 ++-- sourced/ml/tests/__init__.py | 2 +- sourced/ml/tests/test_basic_transformers.py | 3 ++- sourced/ml/tests/test_df_util.py | 2 +- sourced/ml/tests/test_quant_util.py | 2 +- sourced/ml/transformers/basic.py | 4 ++-- sourced/ml/transformers/content2ids.py | 2 +- sourced/ml/transformers/coocc.py | 3 ++- sourced/ml/transformers/moder.py | 18 +++++++++++------- sourced/ml/transformers/uast2bag_features.py | 2 +- sourced/ml/transformers/uast2quant.py | 2 +- sourced/ml/utils/__init__.py | 5 ----- sourced/ml/utils/engine.py | 2 +- 18 files changed, 33 insertions(+), 32 deletions(-) diff --git a/sourced/ml/__init__.py b/sourced/ml/__init__.py index 5a331cd3..951bee81 100644 --- a/sourced/ml/__init__.py +++ b/sourced/ml/__init__.py @@ -5,4 +5,4 @@ except ImportError: pass -__version__ = 0, 6, 3 +__version__ = 0, 6, 4 diff --git a/sourced/ml/__main__.py b/sourced/ml/__main__.py index 2fe8e9b4..18d6bfe2 100644 --- a/sourced/ml/__main__.py +++ b/sourced/ml/__main__.py @@ -10,7 +10,8 @@ from sourced.ml import cmd from sourced.ml.cmd import args from sourced.ml.cmd.run_swivel import mirror_tf_args -from sourced.ml.utils import install_bigartm, add_spark_args +from sourced.ml.utils import install_bigartm +from sourced.ml.utils.spark import add_spark_args def get_parser() -> argparse.ArgumentParser: diff --git a/sourced/ml/algorithms/uast_ids_to_bag.py b/sourced/ml/algorithms/uast_ids_to_bag.py index 19150506..427b1ab0 100644 --- a/sourced/ml/algorithms/uast_ids_to_bag.py +++ b/sourced/ml/algorithms/uast_ids_to_bag.py @@ -4,7 +4,7 @@ from sourced.ml.algorithms import TokenParser, NoopTokenParser from sourced.ml.algorithms.uast_to_bag import Uast2BagBase -from sourced.ml.utils import IDENTIFIER +from sourced.ml.utils import bblfsh_roles def uast2sequence(root): @@ -99,7 +99,7 @@ def __call__(self, uast): :param uast: The UAST root node. :return: bag """ - nodes = [node for node in uast2sequence(uast) if IDENTIFIER in node.roles] + nodes = [node for node in uast2sequence(uast) if bblfsh_roles.IDENTIFIER in node.roles] bag = defaultdict(int) for node in nodes: for sub in self._token_parser.process_token(node.token): diff --git a/sourced/ml/cmd/args.py b/sourced/ml/cmd/args.py index 82eb60d5..03c8a6c1 100644 --- a/sourced/ml/cmd/args.py +++ b/sourced/ml/cmd/args.py @@ -7,7 +7,7 @@ from sourced.ml import extractors from sourced.ml.transformers import BOWWriter, Moder -from sourced.ml.utils import add_engine_args +from sourced.ml.utils.engine import add_engine_args class ArgumentDefaultsHelpFormatterNoNone(argparse.ArgumentDefaultsHelpFormatter): diff --git a/sourced/ml/cmd/repos2bow.py b/sourced/ml/cmd/repos2bow.py index d5ae322f..07102d71 100644 --- a/sourced/ml/cmd/repos2bow.py +++ b/sourced/ml/cmd/repos2bow.py @@ -5,8 +5,7 @@ from sourced.ml.transformers import UastDeserializer, BagFeatures2TermFreq, Uast2BagFeatures, \ TFIDF, Cacher, Indexer, UastRow2Document, BOWWriter, Moder, create_uast_source, \ Repartitioner, PartitionSelector, Transformer, Distinct, Collector, FieldsSelector -from sourced.ml.utils import EngineConstants -from sourced.ml.utils.engine import pipeline_graph, pause +from sourced.ml.utils.engine import EngineConstants, pipeline_graph, pause from sourced.ml.utils.docfreq import create_or_load_ordered_df from sourced.ml.utils.quant import create_or_apply_quant from sourced.ml.models import DocumentFrequencies diff --git a/sourced/ml/extractors/literals.py b/sourced/ml/extractors/literals.py index f98804bd..a9679b4f 100644 --- a/sourced/ml/extractors/literals.py +++ b/sourced/ml/extractors/literals.py @@ -4,7 +4,7 @@ from sourced.ml.algorithms import UastIds2Bag, uast2sequence from sourced.ml.extractors import BagsExtractor, register_extractor -from sourced.ml.utils import LITERAL +from sourced.ml.utils import bblfsh_roles class HashedTokenParser: @@ -39,7 +39,7 @@ def __call__(self, uast): :param uast: The UAST root node. :return: bag """ - nodes = [node for node in uast2sequence(uast) if LITERAL in node.roles] + nodes = [node for node in uast2sequence(uast) if bblfsh_roles.LITERAL in node.roles] bag = defaultdict(int) for node in nodes: for sub in self._token_parser.process_token(node.token): diff --git a/sourced/ml/tests/__init__.py b/sourced/ml/tests/__init__.py index ec5fb0f9..b63fdb56 100644 --- a/sourced/ml/tests/__init__.py +++ b/sourced/ml/tests/__init__.py @@ -3,7 +3,7 @@ from modelforge.logs import setup_logging -from sourced.ml.utils import create_spark +from sourced.ml.utils.spark import create_spark from sourced.ml.utils.engine import get_engine_package, get_bblfsh_dependency, \ get_engine_version diff --git a/sourced/ml/tests/test_basic_transformers.py b/sourced/ml/tests/test_basic_transformers.py index 68022766..f8e13038 100644 --- a/sourced/ml/tests/test_basic_transformers.py +++ b/sourced/ml/tests/test_basic_transformers.py @@ -8,7 +8,8 @@ from pyspark import StorageLevel from pyspark.sql import Row -from sourced.ml.utils import create_engine, SparkDefault +from sourced.ml.utils.engine import create_engine +from sourced.ml.utils.spark import SparkDefault from sourced.ml.transformers import ParquetSaver, ParquetLoader, Collector, First, \ Identity, FieldsSelector, Repartitioner, DzhigurdaFiles, CsvSaver, Rower, \ PartitionSelector, Sampler, Distinct, Cacher, Ignition, HeadFiles, LanguageSelector, \ diff --git a/sourced/ml/tests/test_df_util.py b/sourced/ml/tests/test_df_util.py index e99758e3..b4c8d240 100644 --- a/sourced/ml/tests/test_df_util.py +++ b/sourced/ml/tests/test_df_util.py @@ -5,7 +5,7 @@ import unittest from sourced.ml.utils.docfreq import create_or_load_ordered_df -from sourced.ml.utils import create_spark +from sourced.ml.utils.spark import create_spark from sourced.ml.transformers import ParquetLoader, UastDeserializer, UastRow2Document, Counter, \ Uast2BagFeatures, Moder from sourced.ml.extractors import IdentifiersBagExtractor diff --git a/sourced/ml/tests/test_quant_util.py b/sourced/ml/tests/test_quant_util.py index 6e518234..4ce30706 100644 --- a/sourced/ml/tests/test_quant_util.py +++ b/sourced/ml/tests/test_quant_util.py @@ -7,7 +7,7 @@ from sourced.ml.extractors import ChildrenBagExtractor from sourced.ml.models import QuantizationLevels from sourced.ml.utils.quant import create_or_apply_quant -from sourced.ml.utils import create_spark +from sourced.ml.utils.spark import create_spark import sourced.ml.tests.models as paths diff --git a/sourced/ml/transformers/basic.py b/sourced/ml/transformers/basic.py index ad056e0e..2eac0fc2 100644 --- a/sourced/ml/transformers/basic.py +++ b/sourced/ml/transformers/basic.py @@ -9,8 +9,8 @@ from sourced.ml.extractors.helpers import filter_kwargs from sourced.ml.transformers.transformer import Transformer from sourced.ml.transformers.uast2bag_features import Uast2BagFeatures -from sourced.ml.utils import EngineConstants, get_spark_memory_config, create_engine, \ - create_spark, SparkDefault +from sourced.ml.utils.engine import EngineConstants, create_engine +from sourced.ml.utils.spark import get_spark_memory_config, create_spark, SparkDefault class Repartitioner(Transformer): diff --git a/sourced/ml/transformers/content2ids.py b/sourced/ml/transformers/content2ids.py index eb5b1fca..5c5ed0ea 100644 --- a/sourced/ml/transformers/content2ids.py +++ b/sourced/ml/transformers/content2ids.py @@ -11,7 +11,7 @@ from sourced.ml.algorithms import TokenParser from sourced.ml.transformers import Transformer -from sourced.ml.utils import EngineConstants +from sourced.ml.utils.engine import EngineConstants class ContentToIdentifiers(Transformer): diff --git a/sourced/ml/transformers/coocc.py b/sourced/ml/transformers/coocc.py index 423d78a5..b6da1e08 100644 --- a/sourced/ml/transformers/coocc.py +++ b/sourced/ml/transformers/coocc.py @@ -8,7 +8,8 @@ from sourced.ml.models import Cooccurrences, OrderedDocumentFrequencies from sourced.ml.transformers import Transformer -from sourced.ml.utils import bblfsh_roles, EngineConstants +from sourced.ml.utils import bblfsh_roles +from sourced.ml.utils.engine import EngineConstants class CooccModelSaver(Transformer): diff --git a/sourced/ml/transformers/moder.py b/sourced/ml/transformers/moder.py index ed01e7cc..d2f4a477 100644 --- a/sourced/ml/transformers/moder.py +++ b/sourced/ml/transformers/moder.py @@ -7,7 +7,8 @@ from sourced.ml.algorithms.uast_ids_to_bag import uast2sequence from sourced.ml.transformers import Transformer -from sourced.ml.utils import EngineConstants, FUNCTION, DECLARATION, NAME, IDENTIFIER +from sourced.ml.utils.engine import EngineConstants +from sourced.ml.utils import bblfsh_roles class Moder(Transformer): @@ -87,8 +88,8 @@ def extract_functions_from_uast(self, uast: Node): allfuncs = list(self.filter_uast(uast, self.FUNC_XPATH)) else: node_seq = uast2sequence(uast) - allfuncs = [node for node in node_seq if FUNCTION in node.roles and - DECLARATION in node.roles] + allfuncs = [node for node in node_seq if bblfsh_roles.FUNCTION in node.roles and + bblfsh_roles.DECLARATION in node.roles] internal = set() for func in allfuncs: if id(func) in internal: @@ -97,8 +98,9 @@ def extract_functions_from_uast(self, uast: Node): if self.USE_XPATH: sub_seq = self.filter_uast(func, self.FUNC_XPATH) else: - sub_seq = [node for node in uast2sequence(func) if FUNCTION in node.roles and - DECLARATION in node.roles] + sub_seq = [node for node in uast2sequence(func) if + bblfsh_roles.FUNCTION in node.roles and + bblfsh_roles.DECLARATION in node.roles] for sub in sub_seq: if sub != func: @@ -108,8 +110,10 @@ def extract_functions_from_uast(self, uast: Node): if self.USE_XPATH: f_seq = self.filter_uast(f, self.FUNC_NAME_XPATH) else: - f_seq = [node for node in uast2sequence(f) if FUNCTION in node.roles and - IDENTIFIER in node.roles and NAME in node.roles] + f_seq = [node for node in uast2sequence(f) if + bblfsh_roles.FUNCTION in node.roles and + bblfsh_roles.IDENTIFIER in node.roles and + bblfsh_roles.NAME in node.roles] name = "+".join(n.token for n in f_seq) if name: yield f, name diff --git a/sourced/ml/transformers/uast2bag_features.py b/sourced/ml/transformers/uast2bag_features.py index 4070151e..034f90c2 100644 --- a/sourced/ml/transformers/uast2bag_features.py +++ b/sourced/ml/transformers/uast2bag_features.py @@ -2,7 +2,7 @@ from sourced.ml.extractors import Extractor from sourced.ml.transformers.transformer import Transformer -from sourced.ml.utils import EngineConstants +from sourced.ml.utils.engine import EngineConstants class UastRow2Document(Transformer): diff --git a/sourced/ml/transformers/uast2quant.py b/sourced/ml/transformers/uast2quant.py index 1742644e..8967b355 100644 --- a/sourced/ml/transformers/uast2quant.py +++ b/sourced/ml/transformers/uast2quant.py @@ -3,7 +3,7 @@ from sourced.ml.extractors import BagsExtractor from sourced.ml.transformers.transformer import Transformer -from sourced.ml.utils import EngineConstants +from sourced.ml.utils.engine import EngineConstants class Uast2Quant(Transformer): diff --git a/sourced/ml/utils/__init__.py b/sourced/ml/utils/__init__.py index 6293e460..f779fe34 100644 --- a/sourced/ml/utils/__init__.py +++ b/sourced/ml/utils/__init__.py @@ -1,8 +1,3 @@ # flake8: noqa from sourced.ml.utils.bigartm import install_bigartm -from sourced.ml.utils.bblfsh_roles import IDENTIFIER, QUALIFIED, LITERAL, FUNCTION, DECLARATION, \ - NAME -from sourced.ml.utils.spark import add_spark_args, create_spark, get_spark_memory_config, \ - SparkDefault -from sourced.ml.utils.engine import add_engine_args, create_engine, EngineConstants, EngineDefault from sourced.ml.utils.pickleable_logger import PickleableLogger diff --git a/sourced/ml/utils/engine.py b/sourced/ml/utils/engine.py index 12f28883..8eb6d2f3 100644 --- a/sourced/ml/utils/engine.py +++ b/sourced/ml/utils/engine.py @@ -3,7 +3,7 @@ import requests from pkg_resources import get_distribution, DistributionNotFound from sourced.engine import Engine -from sourced.ml.utils import add_spark_args, create_spark, SparkDefault +from sourced.ml.utils.spark import add_spark_args, create_spark, SparkDefault def get_engine_version():