Skip to content

Commit

Permalink
#471 variables renaming.
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jun 20, 2023
1 parent 286fcf7 commit 6072696
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,21 @@ def create_text_opinion_extraction_pipeline(text_parser,
assert(version in [RuAttitudesVersions.V20Large, RuAttitudesVersions.V20Base,
RuAttitudesVersions.V20BaseNeut, RuAttitudesVersions.V20LargeNeut])

doc_ops = RuAttitudesDocumentProvider(version=version,
keep_doc_ids_only=False,
doc_id_func=lambda doc_id: doc_id,
limit=limit)
doc_provider = RuAttitudesDocumentProvider(version=version,
keep_doc_ids_only=False,
doc_id_func=lambda doc_id: doc_id,
limit=limit)

pipeline = text_opinion_extraction_pipeline(
annotators=[
PredefinedTextOpinionAnnotator(doc_ops=doc_ops, label_formatter=RuAttitudesLabelFormatter(label_scaler))
PredefinedTextOpinionAnnotator(doc_provider=doc_provider,
label_formatter=RuAttitudesLabelFormatter(label_scaler))
],
text_opinion_filters=[
EntityBasedTextOpinionFilter(entity_filter=entity_filter),
DistanceLimitedTextOpinionFilter(terms_per_context)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)

return pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def create_text_opinion_extraction_pipeline(rusentrel_version,
stemmer=MystemWrapper(),
is_read_only=False)

doc_ops = RuSentrelDocumentProvider(version=rusentrel_version, synonyms=synonyms)
doc_provider = RuSentrelDocumentProvider(version=rusentrel_version, synonyms=synonyms)

pipeline = text_opinion_extraction_pipeline(
annotators=[
Expand All @@ -55,7 +55,7 @@ def create_text_opinion_extraction_pipeline(rusentrel_version,
EntityBasedTextOpinionFilter(entity_filter=entity_filter),
DistanceLimitedTextOpinionFilter(terms_per_context)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)

return pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def create_text_opinion_extraction_pipeline(sentinerel_version,
text_parser,
label_formatter=SentiNERELSentimentLabelFormatter(),
terms_per_context=50,
doc_ops=None,
doc_provider=None,
dist_in_sentences=0,
docs_limit=None,
entity_filter=None):
Expand All @@ -40,7 +40,7 @@ def create_text_opinion_extraction_pipeline(sentinerel_version,
sentinerel_version: enum
Version of the SentiNEREl collection.
text_parser: Is the way of how do we process the text.
doc_ops: DocumentProvider or None
doc_provider: DocumentProvider or None
In case of None we consider the default initialization.
label_formatter:
Formatter for labels which allows to: limit set of labels, and perform its conversion from
Expand All @@ -52,16 +52,16 @@ def create_text_opinion_extraction_pipeline(sentinerel_version,
pipelines per every type.
"""
assert(isinstance(sentinerel_version, SentiNerelVersions))
assert(isinstance(doc_ops, DocumentProvider) or doc_ops is None)
assert(isinstance(doc_provider, DocumentProvider) or doc_provider is None)

data_folding = None

if doc_ops is None:
if doc_provider is None:
# Default Initialization.
filenames_by_ids, data_folding = SentiNerelIOUtils.read_dataset_split(version=sentinerel_version,
docs_limit=docs_limit)
doc_ops = SentiNERELDocProvider(filename_by_id=filenames_by_ids,
version=sentinerel_version)
doc_provider = SentiNERELDocProvider(filename_by_id=filenames_by_ids,
version=sentinerel_version)

train_neut_annot = create_nolabel_text_opinion_annotator(terms_per_context=terms_per_context,
dist_in_sents=dist_in_sentences)
Expand All @@ -74,28 +74,28 @@ def create_text_opinion_extraction_pipeline(sentinerel_version,
DistanceLimitedTextOpinionFilter(terms_per_context)
]

predefined_annot = PredefinedTextOpinionAnnotator(doc_ops, label_formatter)
predefined_annot = PredefinedTextOpinionAnnotator(doc_provider, label_formatter)

pipelines = {
DataType.Train: create_train_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
annotators=[
predefined_annot,
train_neut_annot
],
text_opinion_filters=text_opinion_filters),
DataType.Test: create_test_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
annotators=[
test_neut_annot
],
text_opinion_filters=text_opinion_filters),
DataType.Etalon: create_etalon_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
predefined_annot=predefined_annot,
text_opinion_filters=text_opinion_filters),
DataType.Dev: create_etalon_with_no_label_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
annotators=[
predefined_annot,
train_neut_annot
Expand Down Expand Up @@ -142,47 +142,47 @@ def create_nolabel_text_opinion_annotator(terms_per_context, dist_in_sents=0, sy
error_on_synonym_end_missed=False))


def create_train_pipeline(text_parser, doc_ops, annotators, text_opinion_filters):
def create_train_pipeline(text_parser, doc_provider, annotators, text_opinion_filters):
""" Train pipeline is based on the predefined annotations and
automatic annotations of other pairs with a NoLabel.
"""
return text_opinion_extraction_pipeline(
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser,
annotators=annotators,
text_opinion_filters=text_opinion_filters)


def create_test_pipeline(text_parser, doc_ops, annotators, text_opinion_filters):
def create_test_pipeline(text_parser, doc_provider, annotators, text_opinion_filters):
""" This is a pipeline for TEST data annotation.
We perform annotation of the attitudes.
"""
assert(isinstance(text_parser, BaseTextParser))
assert(isinstance(annotators, list))
assert(isinstance(doc_ops, DocumentProvider))
assert(isinstance(doc_provider, DocumentProvider))

return text_opinion_extraction_pipeline(
annotators=annotators,
text_parser=text_parser,
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_opinion_filters=text_opinion_filters)


def create_etalon_pipeline(text_parser, doc_ops, predefined_annot, text_opinion_filters):
def create_etalon_pipeline(text_parser, doc_provider, predefined_annot, text_opinion_filters):
""" We adopt exact the same pipeline as for training data,
but we do not perform "NoLabel" annotation.
(we are interested only in sentiment attitudes).
"""
return create_train_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
annotators=[predefined_annot],
text_opinion_filters=text_opinion_filters)


def create_etalon_with_no_label_pipeline(annotators, text_parser, doc_ops, text_opinion_filters):
def create_etalon_with_no_label_pipeline(annotators, text_parser, doc_provider, text_opinion_filters):
""" We adopt exact the same pipeline as for training data.
"""
return create_train_pipeline(text_parser=text_parser,
doc_ops=doc_ops,
doc_provider=doc_provider,
annotators=annotators,
text_opinion_filters=text_opinion_filters)
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PredefinedTextOpinionAnnotator(BaseOpinionAnnotator):
It converts the pre-annotated Relations from BRAT-documents to TextOpinions
"""

def __init__(self, doc_ops, label_formatter, keep_any_type=False, entity_index_func=None):
def __init__(self, doc_provider, label_formatter, keep_any_type=False, entity_index_func=None):
"""
get_doc_func:
func(doc_id)
Expand All @@ -29,12 +29,12 @@ def __init__(self, doc_ops, label_formatter, keep_any_type=False, entity_index_f
entity_index_func: is a way of how we provide an external entity ID
fund(entity) -> ID
"""
assert(isinstance(doc_ops, DocumentProvider))
assert(isinstance(doc_provider, DocumentProvider))
assert(isinstance(label_formatter, StringLabelsFormatter))
assert(callable(entity_index_func) or entity_index_func is None)
super(PredefinedTextOpinionAnnotator, self).__init__()

self.__doc_ops = doc_ops
self.__doc_provider = doc_provider
self.__label_formatter = label_formatter
self.__keep_any_type = keep_any_type
self.__entity_index_func = (lambda brat_entity: brat_entity.ID) if \
Expand Down Expand Up @@ -66,7 +66,7 @@ def _annot_collection_core(self, parsed_doc):
EntityServiceProvider(self.__entity_index_func)
])
esp = pns.get_provider(EntityServiceProvider.NAME)
doc = self.__doc_ops.by_id(parsed_doc.RelatedDocID)
doc = self.__doc_provider.by_id(parsed_doc.RelatedDocID)

for brat_relation in doc.Relations:

Expand Down
6 changes: 3 additions & 3 deletions tests/contrib/utils/test_csv_stream_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,19 @@ def __launch(self, writer, target_extention):
# Declaring pipeline related context parameters.
#####
no_folding = NoFolding(doc_ids=[0, 1], supported_data_type=DataType.Train)
doc_ops = FooDocumentProvider()
doc_provider = FooDocumentProvider()
text_parser = BaseTextParser(pipeline=[BratTextEntitiesParser(), DefaultTextTokenizer(keep_tokens=True)])
train_pipeline = text_opinion_extraction_pipeline(
annotators=[
PredefinedTextOpinionAnnotator(
doc_ops,
doc_provider,
label_formatter=CustomLabelsFormatter(pos_label_type=Positive,
neg_label_type=Negative))
],
text_opinion_filters=[
DistanceLimitedTextOpinionFilter(terms_per_context=50)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)
#####

Expand Down
4 changes: 2 additions & 2 deletions tests/tutorials/test_tutorial_data_foldings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def test(self):

splitter_simple = SimpleCrossValidationSplitter(shuffle=True, seed=1)

doc_ops = FooDocumentProvider()
doc_provider = FooDocumentProvider()
doc_ids = list(range(2))

splitter_statistical = StatBasedCrossValidationSplitter(
docs_stat=SentenceBasedDocumentStatGenerator(doc_reader_func=doc_ops.by_id),
docs_stat=SentenceBasedDocumentStatGenerator(doc_reader_func=doc_provider.by_id),
doc_ids=doc_ids)

cv_folding = TwoClassCVFolding(
Expand Down
6 changes: 3 additions & 3 deletions tests/tutorials/test_tutorial_pipeline_sampling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,19 @@ def test(self):
# Declaring pipeline related context parameters.
#####
no_folding = NoFolding(doc_ids=[0, 1], supported_data_type=DataType.Train)
doc_ops = FooDocumentProvider()
doc_provider = FooDocumentProvider()
text_parser = BaseTextParser(pipeline=[BratTextEntitiesParser(), DefaultTextTokenizer(keep_tokens=True)])
train_pipeline = text_opinion_extraction_pipeline(
annotators=[
PredefinedTextOpinionAnnotator(
doc_ops,
doc_provider,
label_formatter=CustomLabelsFormatter(pos_label_type=Positive,
neg_label_type=Negative))
],
text_opinion_filters=[
DistanceLimitedTextOpinionFilter(terms_per_context=50)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)
#####

Expand Down
6 changes: 3 additions & 3 deletions tests/tutorials/test_tutorial_pipeline_sampling_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test(self):
# Declaring pipeline related context parameters.
#####
no_folding = NoFolding(doc_ids=[0, 1], supported_data_type=DataType.Train)
doc_ops = FooDocumentProvider()
doc_provider = FooDocumentProvider()
text_parser = BaseTextParser(pipeline=[
BratTextEntitiesParser(),
DefaultTextTokenizer(keep_tokens=True),
Expand All @@ -115,13 +115,13 @@ def test(self):
train_pipeline = text_opinion_extraction_pipeline(
annotators=[
PredefinedTextOpinionAnnotator(
doc_ops,
doc_provider,
label_formatter=CustomLabelsFormatter(pos_label_type=Positive, neg_label_type=Negative))
],
text_opinion_filters=[
DistanceLimitedTextOpinionFilter(terms_per_context=50)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)
#####

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def __init__(self, pos_label_type, neg_label_type):
class TestTextOpinionAnnotation(unittest.TestCase):

def test(self):
doc_ops = FooDocumentProvider()
doc_provider = FooDocumentProvider()
predefined_annotator = PredefinedTextOpinionAnnotator(
doc_ops=doc_ops,
doc_provider=doc_provider,
label_formatter=CustomLabelsFormatter(pos_label_type=PositiveLabel,
neg_label_type=NegativeLabel))

Expand Down Expand Up @@ -76,7 +76,7 @@ def test(self):
text_opinion_filters=[
DistanceLimitedTextOpinionFilter(terms_per_context=50)
],
get_doc_by_id_func=doc_ops.by_id,
get_doc_by_id_func=doc_provider.by_id,
text_parser=text_parser)

# Running the pipeline.
Expand Down

0 comments on commit 6072696

Please sign in to comment.