Skip to content

Commit

Permalink
#510 flag has been brought on top of the pipeline function, so metaro…
Browse files Browse the repository at this point in the history
…ws could be enabled or disabled
  • Loading branch information
nicolay-r committed Sep 17, 2023
1 parent eb52e3c commit 1c8bfd6
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions arekit/contrib/utils/pipelines/text_opinion/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,17 @@
from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter


def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func, text_opinion_filters):
def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
text_opinion_filters, use_meta):
""" use_meta: bool
this is mainly for tqdm and other console parameters to stay up-to-date
with the state in the case we do not have that much output results
across multiple amount of documents.
"""
assert(isinstance(annotators, list))
assert(isinstance(parsed_doc, ParsedDocument))
assert(isinstance(text_opinion_filters, list))
assert(isinstance(use_meta, bool))

def __to_id(text_opinion):
return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
Expand Down Expand Up @@ -53,15 +60,17 @@ def __to_id(text_opinion):
yield text_opinion_linkage

# This is the case to consider the end of the document.
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
if use_meta:
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)


def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators, entity_index_func,
text_opinion_filters=None):
text_opinion_filters=None, use_meta_between_docs=True):
assert(isinstance(text_parser, BaseTextParser))
assert(callable(get_doc_by_id_func))
assert(isinstance(annotators, list))
assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
assert(isinstance(use_meta_between_docs, bool))

extra_filters = [] if text_opinion_filters is None else text_opinion_filters
actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
Expand All @@ -77,7 +86,7 @@ def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators
# (parsed_doc) -> (text_opinions)
MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
annotators=annotators, parsed_doc=parsed_doc, entity_index_func=entity_index_func,
text_opinion_filters=actual_text_opinion_filters)),
text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),

# linkages[] -> linkages
FlattenIterPipelineItem()
Expand Down

0 comments on commit 1c8bfd6

Please sign in to comment.