Skip to content
This repository has been archived by the owner on May 22, 2019. It is now read-only.

Commit

Permalink
Merge pull request #344 from zurk/master
Browse files Browse the repository at this point in the history
Delete hiding sourced-engine usage from algorithms
  • Loading branch information
vmarkovtsev committed Oct 31, 2018
2 parents 1bc0359 + 8353ceb commit 0c41c7f
Show file tree
Hide file tree
Showing 18 changed files with 33 additions and 32 deletions.
2 changes: 1 addition & 1 deletion sourced/ml/__init__.py
Expand Up @@ -5,4 +5,4 @@
except ImportError:
pass

__version__ = 0, 6, 3
__version__ = 0, 6, 4
3 changes: 2 additions & 1 deletion sourced/ml/__main__.py
Expand Up @@ -10,7 +10,8 @@
from sourced.ml import cmd
from sourced.ml.cmd import args
from sourced.ml.cmd.run_swivel import mirror_tf_args
from sourced.ml.utils import install_bigartm, add_spark_args
from sourced.ml.utils import install_bigartm
from sourced.ml.utils.spark import add_spark_args


def get_parser() -> argparse.ArgumentParser:
Expand Down
4 changes: 2 additions & 2 deletions sourced/ml/algorithms/uast_ids_to_bag.py
Expand Up @@ -4,7 +4,7 @@

from sourced.ml.algorithms import TokenParser, NoopTokenParser
from sourced.ml.algorithms.uast_to_bag import Uast2BagBase
from sourced.ml.utils import IDENTIFIER
from sourced.ml.utils import bblfsh_roles


def uast2sequence(root):
Expand Down Expand Up @@ -99,7 +99,7 @@ def __call__(self, uast):
:param uast: The UAST root node.
:return: bag
"""
nodes = [node for node in uast2sequence(uast) if IDENTIFIER in node.roles]
nodes = [node for node in uast2sequence(uast) if bblfsh_roles.IDENTIFIER in node.roles]
bag = defaultdict(int)
for node in nodes:
for sub in self._token_parser.process_token(node.token):
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/cmd/args.py
Expand Up @@ -7,7 +7,7 @@

from sourced.ml import extractors
from sourced.ml.transformers import BOWWriter, Moder
from sourced.ml.utils import add_engine_args
from sourced.ml.utils.engine import add_engine_args


class ArgumentDefaultsHelpFormatterNoNone(argparse.ArgumentDefaultsHelpFormatter):
Expand Down
3 changes: 1 addition & 2 deletions sourced/ml/cmd/repos2bow.py
Expand Up @@ -5,8 +5,7 @@
from sourced.ml.transformers import UastDeserializer, BagFeatures2TermFreq, Uast2BagFeatures, \
TFIDF, Cacher, Indexer, UastRow2Document, BOWWriter, Moder, create_uast_source, \
Repartitioner, PartitionSelector, Transformer, Distinct, Collector, FieldsSelector
from sourced.ml.utils import EngineConstants
from sourced.ml.utils.engine import pipeline_graph, pause
from sourced.ml.utils.engine import EngineConstants, pipeline_graph, pause
from sourced.ml.utils.docfreq import create_or_load_ordered_df
from sourced.ml.utils.quant import create_or_apply_quant
from sourced.ml.models import DocumentFrequencies
Expand Down
4 changes: 2 additions & 2 deletions sourced/ml/extractors/literals.py
Expand Up @@ -4,7 +4,7 @@

from sourced.ml.algorithms import UastIds2Bag, uast2sequence
from sourced.ml.extractors import BagsExtractor, register_extractor
from sourced.ml.utils import LITERAL
from sourced.ml.utils import bblfsh_roles


class HashedTokenParser:
Expand Down Expand Up @@ -39,7 +39,7 @@ def __call__(self, uast):
:param uast: The UAST root node.
:return: bag
"""
nodes = [node for node in uast2sequence(uast) if LITERAL in node.roles]
nodes = [node for node in uast2sequence(uast) if bblfsh_roles.LITERAL in node.roles]
bag = defaultdict(int)
for node in nodes:
for sub in self._token_parser.process_token(node.token):
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/tests/__init__.py
Expand Up @@ -3,7 +3,7 @@

from modelforge.logs import setup_logging

from sourced.ml.utils import create_spark
from sourced.ml.utils.spark import create_spark
from sourced.ml.utils.engine import get_engine_package, get_bblfsh_dependency, \
get_engine_version

Expand Down
3 changes: 2 additions & 1 deletion sourced/ml/tests/test_basic_transformers.py
Expand Up @@ -8,7 +8,8 @@
from pyspark import StorageLevel
from pyspark.sql import Row

from sourced.ml.utils import create_engine, SparkDefault
from sourced.ml.utils.engine import create_engine
from sourced.ml.utils.spark import SparkDefault
from sourced.ml.transformers import ParquetSaver, ParquetLoader, Collector, First, \
Identity, FieldsSelector, Repartitioner, DzhigurdaFiles, CsvSaver, Rower, \
PartitionSelector, Sampler, Distinct, Cacher, Ignition, HeadFiles, LanguageSelector, \
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/tests/test_df_util.py
Expand Up @@ -5,7 +5,7 @@
import unittest

from sourced.ml.utils.docfreq import create_or_load_ordered_df
from sourced.ml.utils import create_spark
from sourced.ml.utils.spark import create_spark
from sourced.ml.transformers import ParquetLoader, UastDeserializer, UastRow2Document, Counter, \
Uast2BagFeatures, Moder
from sourced.ml.extractors import IdentifiersBagExtractor
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/tests/test_quant_util.py
Expand Up @@ -7,7 +7,7 @@
from sourced.ml.extractors import ChildrenBagExtractor
from sourced.ml.models import QuantizationLevels
from sourced.ml.utils.quant import create_or_apply_quant
from sourced.ml.utils import create_spark
from sourced.ml.utils.spark import create_spark

import sourced.ml.tests.models as paths

Expand Down
4 changes: 2 additions & 2 deletions sourced/ml/transformers/basic.py
Expand Up @@ -9,8 +9,8 @@
from sourced.ml.extractors.helpers import filter_kwargs
from sourced.ml.transformers.transformer import Transformer
from sourced.ml.transformers.uast2bag_features import Uast2BagFeatures
from sourced.ml.utils import EngineConstants, get_spark_memory_config, create_engine, \
create_spark, SparkDefault
from sourced.ml.utils.engine import EngineConstants, create_engine
from sourced.ml.utils.spark import get_spark_memory_config, create_spark, SparkDefault


class Repartitioner(Transformer):
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/transformers/content2ids.py
Expand Up @@ -11,7 +11,7 @@

from sourced.ml.algorithms import TokenParser
from sourced.ml.transformers import Transformer
from sourced.ml.utils import EngineConstants
from sourced.ml.utils.engine import EngineConstants


class ContentToIdentifiers(Transformer):
Expand Down
3 changes: 2 additions & 1 deletion sourced/ml/transformers/coocc.py
Expand Up @@ -8,7 +8,8 @@

from sourced.ml.models import Cooccurrences, OrderedDocumentFrequencies
from sourced.ml.transformers import Transformer
from sourced.ml.utils import bblfsh_roles, EngineConstants
from sourced.ml.utils import bblfsh_roles
from sourced.ml.utils.engine import EngineConstants


class CooccModelSaver(Transformer):
Expand Down
18 changes: 11 additions & 7 deletions sourced/ml/transformers/moder.py
Expand Up @@ -7,7 +7,8 @@

from sourced.ml.algorithms.uast_ids_to_bag import uast2sequence
from sourced.ml.transformers import Transformer
from sourced.ml.utils import EngineConstants, FUNCTION, DECLARATION, NAME, IDENTIFIER
from sourced.ml.utils.engine import EngineConstants
from sourced.ml.utils import bblfsh_roles


class Moder(Transformer):
Expand Down Expand Up @@ -87,8 +88,8 @@ def extract_functions_from_uast(self, uast: Node):
allfuncs = list(self.filter_uast(uast, self.FUNC_XPATH))
else:
node_seq = uast2sequence(uast)
allfuncs = [node for node in node_seq if FUNCTION in node.roles and
DECLARATION in node.roles]
allfuncs = [node for node in node_seq if bblfsh_roles.FUNCTION in node.roles and
bblfsh_roles.DECLARATION in node.roles]
internal = set()
for func in allfuncs:
if id(func) in internal:
Expand All @@ -97,8 +98,9 @@ def extract_functions_from_uast(self, uast: Node):
if self.USE_XPATH:
sub_seq = self.filter_uast(func, self.FUNC_XPATH)
else:
sub_seq = [node for node in uast2sequence(func) if FUNCTION in node.roles and
DECLARATION in node.roles]
sub_seq = [node for node in uast2sequence(func) if
bblfsh_roles.FUNCTION in node.roles and
bblfsh_roles.DECLARATION in node.roles]

for sub in sub_seq:
if sub != func:
Expand All @@ -108,8 +110,10 @@ def extract_functions_from_uast(self, uast: Node):
if self.USE_XPATH:
f_seq = self.filter_uast(f, self.FUNC_NAME_XPATH)
else:
f_seq = [node for node in uast2sequence(f) if FUNCTION in node.roles and
IDENTIFIER in node.roles and NAME in node.roles]
f_seq = [node for node in uast2sequence(f) if
bblfsh_roles.FUNCTION in node.roles and
bblfsh_roles.IDENTIFIER in node.roles and
bblfsh_roles.NAME in node.roles]
name = "+".join(n.token for n in f_seq)
if name:
yield f, name
2 changes: 1 addition & 1 deletion sourced/ml/transformers/uast2bag_features.py
Expand Up @@ -2,7 +2,7 @@

from sourced.ml.extractors import Extractor
from sourced.ml.transformers.transformer import Transformer
from sourced.ml.utils import EngineConstants
from sourced.ml.utils.engine import EngineConstants


class UastRow2Document(Transformer):
Expand Down
2 changes: 1 addition & 1 deletion sourced/ml/transformers/uast2quant.py
Expand Up @@ -3,7 +3,7 @@

from sourced.ml.extractors import BagsExtractor
from sourced.ml.transformers.transformer import Transformer
from sourced.ml.utils import EngineConstants
from sourced.ml.utils.engine import EngineConstants


class Uast2Quant(Transformer):
Expand Down
5 changes: 0 additions & 5 deletions sourced/ml/utils/__init__.py
@@ -1,8 +1,3 @@
# flake8: noqa
from sourced.ml.utils.bigartm import install_bigartm
from sourced.ml.utils.bblfsh_roles import IDENTIFIER, QUALIFIED, LITERAL, FUNCTION, DECLARATION, \
NAME
from sourced.ml.utils.spark import add_spark_args, create_spark, get_spark_memory_config, \
SparkDefault
from sourced.ml.utils.engine import add_engine_args, create_engine, EngineConstants, EngineDefault
from sourced.ml.utils.pickleable_logger import PickleableLogger
2 changes: 1 addition & 1 deletion sourced/ml/utils/engine.py
Expand Up @@ -3,7 +3,7 @@
import requests
from pkg_resources import get_distribution, DistributionNotFound
from sourced.engine import Engine
from sourced.ml.utils import add_spark_args, create_spark, SparkDefault
from sourced.ml.utils.spark import add_spark_args, create_spark, SparkDefault


def get_engine_version():
Expand Down

0 comments on commit 0c41c7f

Please sign in to comment.