Skip to content

Commit

Permalink
#431 , #432 and #433 -- done.
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jan 2, 2023
1 parent 704fd46 commit 5fa3249
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 12 deletions.
7 changes: 3 additions & 4 deletions arekit/common/news/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,18 @@ def parse(news, text_parser, parent_ppl_ctx=None):
assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)

parsed_sentences = [text_parser.run(input_data=NewsParser.__get_sent(news, sent_ind).Text,
params_dict=NewsParser.__create_ppl_params(
news=news, sent_ind=sent_ind, parent_ppl_ctx=parent_ppl_ctx))
params_dict=NewsParser.__create_ppl_params(news=news, sent_ind=sent_ind),
parent_ctx=parent_ppl_ctx)
for sent_ind in range(news.SentencesCount)]

return ParsedNews(doc_id=news.ID,
parsed_sentences=parsed_sentences)

@staticmethod
def __create_ppl_params(news, sent_ind, parent_ppl_ctx):
def __create_ppl_params(news, sent_ind):
assert(isinstance(news, News))
return {
"s_ind": sent_ind, # sentence index. (as Metadata)
"doc_id": news.ID, # document index. (as Metadata)
"sentence": NewsParser.__get_sent(news, sent_ind), # Required for special sources.
"parent_ctx": parent_ppl_ctx # Parent pipeline context.
}
5 changes: 3 additions & 2 deletions arekit/common/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ def __init__(self, pipeline):
assert(isinstance(pipeline, list))
self.__pipeline = pipeline

def run(self, input_data, params_dict=None):
def run(self, input_data, params_dict=None, parent_ctx=None):
assert(isinstance(params_dict, dict) or params_dict is None)

pipeline_ctx = PipelineContext(params_dict if params_dict is not None else dict())
pipeline_ctx = PipelineContext(d=params_dict if params_dict is not None else dict(),
parent_ctx=parent_ctx)

for item in filter(lambda itm: itm is not None, self.__pipeline):
assert(isinstance(item, BasePipelineItem))
Expand Down
7 changes: 6 additions & 1 deletion arekit/common/pipeline/context.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from arekit.common.pipeline.conts import PARENT_CTX


class PipelineContext(object):
""" Context of parameters utilized in pipeline
"""

def __init__(self, d):
def __init__(self, d, parent_ctx=None):
assert(isinstance(d, dict))
assert(isinstance(parent_ctx, PipelineContext) or parent_ctx is None)
assert(PARENT_CTX not in d)
self._d = d

def __provide(self, param):
Expand Down
2 changes: 2 additions & 0 deletions arekit/common/pipeline/conts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Reference to the parent pipeline context.
PARENT_CTX = "parent_ctx"
5 changes: 3 additions & 2 deletions arekit/common/text/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

class BaseTextParser(BasePipeline):

def run(self, input_data, params_dict=None):
def run(self, input_data, params_dict=None, parent_ctx=None):
output_data = super(BaseTextParser, self).run(input_data=input_data,
params_dict=params_dict)
params_dict=params_dict,
parent_ctx=parent_ctx)

return BaseParsedText(terms=output_data)
7 changes: 4 additions & 3 deletions arekit/contrib/utils/pipelines/items/to_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, opinions_io, create_opinion_collection_func,
self.__create_opinion_collection_func = create_opinion_collection_func
self.__opinion_collection_writer = opinion_collection_writer

def __convert(self, data_folding, output_storage, target_func, data_type):
def __convert(self, data_folding, output_storage, target_func, data_type, pipeline_ctx):
""" From `output_storage` to `target` conversion.
output_storage: BaseRowsStorage
target_func: func(doc_id) -- considered to provide a target for the particular document.
Expand Down Expand Up @@ -76,7 +76,7 @@ def __convert(self, data_folding, output_storage, target_func, data_type):
input_data = set(output_storage.iter_column_values(column_name=const.DOC_ID))

# iterate over the result.
for _ in pipeline.run(input_data):
for _ in pipeline.run(input_data, parent_ctx=pipeline_ctx):
pass

def _iter_output_and_target_pairs(self, iter_index, data_type):
Expand All @@ -97,4 +97,5 @@ def apply_core(self, input_data, pipeline_ctx):
self.__convert(output_storage=output_storage,
target_func=target,
data_type=data_type,
data_folding=data_folding)
data_folding=data_folding,
pipeline_ctx=pipeline_ctx)

0 comments on commit 5fa3249

Please sign in to comment.