Skip to content

Commit

Permalink
Pipeline generalization. Related to #154 (#236 pre refactoring), #224
Browse files Browse the repository at this point in the history
…proceeds.
  • Loading branch information
nicolay-r committed Dec 20, 2021
1 parent 566a08f commit f8aa3a3
Show file tree
Hide file tree
Showing 16 changed files with 46 additions and 33 deletions.
1 change: 1 addition & 0 deletions arekit/common/data/input/providers/opinions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __iter_linked_text_opins(news_opins_for_extraction_func, parse_news_func,
parsed_news = parse_news_func(doc_id)

linked_text_opinion_lists = OpinionProvider.__iter_linked_text_opinion_lists(
# TODO. To be refactored.
text_opinion_pairs_provider=TextOpinionPairsProvider(
parsed_news=parsed_news,
value_to_group_id_func=value_to_group_id_func),
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/news/entities_grouping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from arekit.common.entities.base import Entity
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem


class EntitiesGroupingPipelineItem(TextParserPipelineItem):
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/news/objects_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from arekit.common.bound import Bound
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem


class SentenceObjectsParserPipelineItem(TextParserPipelineItem):
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/news/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from arekit.common.news.base import News
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.pipeline.context import PipelineContext
from arekit.common.text.parser import BaseTextParser
from arekit.common.text.pipeline_ctx import PipelineContext


class NewsParser(object):
Expand All @@ -11,7 +11,7 @@ def parse(news, text_parser):
assert(isinstance(news, News))
assert(isinstance(text_parser, BaseTextParser))

parsed_sentences = [text_parser.parse(NewsParser.__create_pipeline_ctx(news, sent_ind))
parsed_sentences = [text_parser.run(NewsParser.__create_pipeline_ctx(news, sent_ind))
for sent_ind in range(news.SentencesCount)]

return ParsedNews(doc_id=news.ID,
Expand Down
Empty file.
16 changes: 16 additions & 0 deletions arekit/common/pipeline/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem


class BasePipeline(object):

def __init__(self, pipeline):
assert(isinstance(pipeline, list))
self.__pipeline = pipeline

def run(self, pipeline_ctx):
assert(isinstance(pipeline_ctx, PipelineContext))

for item in filter(lambda itm: itm is not None, self.__pipeline):
assert(isinstance(item, TextParserPipelineItem))
item.apply(pipeline_ctx)
File renamed without changes.
10 changes: 10 additions & 0 deletions arekit/common/pipeline/item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from arekit.common.pipeline.context import PipelineContext


class TextParserPipelineItem(object):
""" Single pipeline item that might be instatiated and embedded into pipeline.
"""

def apply(self, pipeline_ctx):
assert(isinstance(pipeline_ctx, PipelineContext))
raise NotImplementedError()
18 changes: 4 additions & 14 deletions arekit/common/text/parser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
from arekit.common.pipeline.base import BasePipeline
from arekit.common.text.parsed import BaseParsedText
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem


class BaseTextParser(object):

def __init__(self, pipeline):
assert(isinstance(pipeline, list))
self.__pipeline = pipeline

def parse(self, pipeline_ctx):
assert(isinstance(pipeline_ctx, PipelineContext))

for item in filter(lambda itm: itm is not None, self.__pipeline):
assert(isinstance(item, TextParserPipelineItem))
item.apply(pipeline_ctx)
class BaseTextParser(BasePipeline):

def run(self, pipeline_ctx):
super(BaseTextParser, self).run(pipeline_ctx)
return BaseParsedText(terms=pipeline_ctx.provide("src"))
4 changes: 0 additions & 4 deletions arekit/common/text/pipeline_item.py

This file was deleted.

2 changes: 1 addition & 1 deletion arekit/contrib/experiment_rusentrel/common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from arekit.common.entities.base import Entity
from arekit.common.experiment.api.ctx_serialization import SerializationData
from arekit.common.news.entities_grouping import EntitiesGroupingPipelineItem
from arekit.common.pipeline.item import TextParserPipelineItem
from arekit.common.synonyms import SynonymsCollection
from arekit.common.text.parser import BaseTextParser
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.processing.text.pipeline_frames_lemmatized import LemmasBasedFrameVariantsParser
from arekit.processing.text.pipeline_tokenizer import DefaultTextTokenizer

Expand Down
4 changes: 2 additions & 2 deletions arekit/processing/text/pipeline_frames.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from arekit.common.frames.text_variant import TextFrameVariant
from arekit.common.frames.variants.collection import FrameVariantsCollection
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem
from arekit.processing.languages.mods import BaseLanguageMods
from arekit.processing.languages.ru.mods import RussianLanguageMods

Expand Down
2 changes: 1 addition & 1 deletion arekit/processing/text/pipeline_frames_lemmatized.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.pipeline.context import PipelineContext
from arekit.common.text.stemmer import Stemmer
from arekit.processing.languages.ru.mods import RussianLanguageMods
from arekit.processing.text.pipeline_frames import FrameVariantsParser
Expand Down
4 changes: 2 additions & 2 deletions arekit/processing/text/pipeline_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem
from arekit.common.utils import split_by_whitespaces
from arekit.processing.text.tokens import Tokens
from arekit.processing.text.token import Token
Expand Down
4 changes: 2 additions & 2 deletions examples/network/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from arekit.common.folding.nofold import NoFolding
from arekit.common.frames.variants.collection import FrameVariantsCollection
from arekit.common.opinions.collection import OpinionCollection
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.text.pipeline_item import TextParserPipelineItem
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.item import TextParserPipelineItem
from arekit.common.utils import split_by_whitespaces
from arekit.contrib.experiment_rusentrel.connotations.provider import RuSentiFramesConnotationProvider
from arekit.contrib.experiment_rusentrel.entities.str_simple_fmt import StringEntitiesSimpleFormatter
Expand Down
2 changes: 1 addition & 1 deletion tests/processing/test_frames_annotation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from arekit.common.frames.text_variant import TextFrameVariant
from arekit.common.text.pipeline_ctx import PipelineContext
from arekit.common.pipeline.context import PipelineContext
from arekit.processing.lemmatization.mystem import MystemWrapper
from arekit.processing.text.pipeline_frames_lemmatized import LemmasBasedFrameVariantsParser
from examples.repository import create_frame_variants_collection
Expand Down

0 comments on commit f8aa3a3

Please sign in to comment.