-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
170 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from arekit.common.labels.base import Label | ||
|
||
|
||
class OpinionBelongsTo(Label): | ||
pass | ||
|
||
|
||
class OpinionRelatesTo(Label): | ||
pass | ||
|
||
|
||
class NegEffectFrom(Label): | ||
pass | ||
|
||
|
||
class NegStateFrom(Label): | ||
pass | ||
|
||
|
||
class PosEffectFrom(Label): | ||
pass | ||
|
||
|
||
class PosAuthorFrom(Label): | ||
pass | ||
|
||
|
||
class NegAuthorFrom(Label): | ||
pass | ||
|
||
|
||
class PosStateFrom(Label): | ||
pass | ||
|
||
|
||
class NegativeTo(Label): | ||
pass | ||
|
||
|
||
class PositiveTo(Label): | ||
pass | ||
|
||
|
||
class AlternativeName(Label): | ||
pass | ||
|
||
|
||
class StateBelongsTo(Label): | ||
pass | ||
|
||
|
||
class OriginsFrom(Label): | ||
pass |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from arekit.common.experiment.api.ops_doc import DocumentOperations | ||
from arekit.contrib.source.nerel.io_utils import NerelVersions | ||
from arekit.contrib.source.nerel.reader import NerelDocReader | ||
|
||
|
||
class NERELDocOperation(DocumentOperations): | ||
""" Document reader for the collection of the RuSentNE competition 2023. | ||
For more details please follow the following repository: | ||
github: https://github.com/dialogue-evaluation/RuSentNE-evaluation | ||
""" | ||
|
||
def __init__(self, filename_by_id, version): | ||
""" filename_ids: dict | ||
Dictionary of {id: filename}, where | ||
- id: int | ||
- filename: str | ||
version: SentiNerelVersions | ||
Specify the appropriate version of teh SentiNEREL collection. | ||
""" | ||
assert(isinstance(filename_by_id, dict)) | ||
assert(isinstance(version, NerelVersions)) | ||
super(NERELDocOperation, self).__init__() | ||
self.__filename_by_id = filename_by_id | ||
self.__version = version | ||
self.__doc_reader = NerelDocReader(version) | ||
|
||
def by_id(self, doc_id): | ||
return self.__doc_reader.read_document(doc_id=doc_id, filename=self.__filename_by_id[doc_id]) |
58 changes: 58 additions & 0 deletions
58
arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from arekit.common.experiment.api.ops_doc import DocumentOperations | ||
from arekit.common.experiment.data_type import DataType | ||
from arekit.contrib.source.nerel.io_utils import NerelIOUtils, NerelVersions | ||
from arekit.contrib.utils.pipelines.sources.nerel.doc_ops import NERELDocOperation | ||
from arekit.contrib.utils.pipelines.sources.nerel.labels_fmt import NerelAnyLabelFormatter | ||
from arekit.contrib.utils.pipelines.text_opinion.annot.predefined import PredefinedTextOpinionAnnotator | ||
from arekit.contrib.utils.pipelines.text_opinion.extraction import text_opinion_extraction_pipeline | ||
from arekit.contrib.utils.pipelines.text_opinion.filters.distance_based import DistanceLimitedTextOpinionFilter | ||
from arekit.contrib.utils.pipelines.text_opinion.filters.entity_based import EntityBasedTextOpinionFilter | ||
|
||
|
||
def create_text_relation_extraction_pipeline(sentinerel_version, | ||
text_parser, | ||
label_formatter=NerelAnyLabelFormatter(), | ||
terms_per_context=50, | ||
doc_ops=None, | ||
docs_limit=None, | ||
entity_filter=None): | ||
assert(isinstance(sentinerel_version, NerelVersions)) | ||
assert(isinstance(doc_ops, DocumentOperations) or doc_ops is None) | ||
|
||
data_folding = None | ||
|
||
if doc_ops is None: | ||
# Default Initialization. | ||
filenames_by_ids, data_folding = NerelIOUtils.read_dataset_split(version=sentinerel_version, | ||
docs_limit=docs_limit) | ||
doc_ops = NERELDocOperation(filename_by_id=filenames_by_ids, | ||
version=sentinerel_version) | ||
|
||
text_opinion_filters = [ | ||
EntityBasedTextOpinionFilter(entity_filter=entity_filter), | ||
DistanceLimitedTextOpinionFilter(terms_per_context) | ||
] | ||
|
||
predefined_annot = PredefinedTextOpinionAnnotator(doc_ops, label_formatter) | ||
|
||
pipelines = { | ||
DataType.Train: text_opinion_extraction_pipeline(text_parser=text_parser, | ||
get_doc_by_id_func=doc_ops.by_id, | ||
annotators=[predefined_annot], | ||
text_opinion_filters=text_opinion_filters), | ||
DataType.Test: text_opinion_extraction_pipeline(text_parser=text_parser, | ||
get_doc_by_id_func=doc_ops.by_id, | ||
annotators=[predefined_annot], | ||
text_opinion_filters=text_opinion_filters), | ||
DataType.Dev: text_opinion_extraction_pipeline(text_parser=text_parser, | ||
get_doc_by_id_func=doc_ops.by_id, | ||
annotators=[predefined_annot], | ||
text_opinion_filters=text_opinion_filters), | ||
} | ||
|
||
# In the case when we setup a default data-folding. | ||
# There is a need to provide it, due to the needs in further. | ||
if data_folding is not None: | ||
return pipelines, data_folding | ||
|
||
return pipelines |
25 changes: 25 additions & 0 deletions
25
arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from arekit.common.labels.str_fmt import StringLabelsFormatter | ||
from arekit.contrib.source.nerel import labels | ||
|
||
|
||
class NerelAnyLabelFormatter(StringLabelsFormatter): | ||
|
||
def __init__(self): | ||
|
||
stol = { | ||
"OPINION_BELONGS_TO": labels.OpinionBelongsTo, | ||
"OPINION_RELATES_TO": labels.OpinionRelatesTo, | ||
"NEG_EFFECT_FROM": labels.NegEffectFrom, | ||
"POS_EFFECT_FROM": labels.PosEffectFrom, | ||
"NEG_STATE_FROM": labels.NegStateFrom, | ||
"POS_STATE_FROM": labels.PosStateFrom, | ||
"NEGATIVE_TO": labels.NegativeTo, | ||
"POSITIVE_TO": labels.PositiveTo, | ||
"STATE_BELONGS_TO": labels.StateBelongsTo, | ||
"POS_AUTHOR_FROM": labels.PosAuthorFrom, | ||
"NEG_AUTHOR_FROM": labels.NegAuthorFrom, | ||
"ALTERNATIVE_NAME": labels.AlternativeName, | ||
"ORIGINS_FROM": labels.OriginsFrom | ||
} | ||
|
||
super(NerelAnyLabelFormatter, self).__init__(stol=stol) |