-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
90 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from arekit.contrib.source.brat.entities.entity import BratEntity | ||
|
||
|
||
class BratCompoundEntity(BratEntity): | ||
""" Entity which contains the hierarchy of the other entities. | ||
""" | ||
|
||
def __init__(self, id_in_doc, value, e_type, root, entities, index_begin, index_end, group_index=None): | ||
assert(isinstance(entities, list)) | ||
assert(isinstance(root, BratCompoundEntity) or root is None) | ||
super(BratCompoundEntity, self).__init__(value=value, e_type=e_type, | ||
id_in_doc=id_in_doc, | ||
index_begin=index_begin, | ||
index_end=index_end, | ||
group_index=group_index) | ||
self.__entities = entities | ||
self.__root = root | ||
|
||
@property | ||
def Root(self): | ||
return self.__root | ||
|
||
def iter_childs(self): | ||
return iter(self.__entities) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import unittest | ||
|
||
from arekit.common.bound import Bound | ||
from arekit.common.entities.collection import EntityCollection | ||
from arekit.common.synonyms.grouping import SynonymsCollectionValuesGroupingProviders | ||
from arekit.contrib.source.brat.entities.entity import BratEntity | ||
from arekit.contrib.source.brat.sentences_reader import BratDocumentSentencesReader | ||
from arekit.contrib.utils.processing.lemmatization.mystem import MystemWrapper | ||
from arekit.contrib.utils.synonyms.stemmer_based import StemmerBasedSynonymCollection | ||
|
||
|
||
class TestCompoundEntites(unittest.TestCase): | ||
|
||
text = "мама мыла раму" | ||
entities = [ | ||
BratEntity(id_in_doc="T1", e_type="PERSON", index_begin=0, index_end=4, value="мама"), | ||
BratEntity(id_in_doc="T2", e_type="VERB", index_begin=5, index_end=9, value="мыла"), | ||
BratEntity(id_in_doc="T3", e_type="OBJECT", index_begin=10, index_end=14, value="раму"), | ||
BratEntity(id_in_doc="T3", e_type="ACTION", index_begin=0, index_end=9, value="мама мыла") | ||
] | ||
|
||
def test(self): | ||
s_data = [ | ||
{"text": self.text, "ind_begin": 0, "ind_end": len(self.text)} | ||
] | ||
|
||
synonyms = StemmerBasedSynonymCollection( | ||
iter_group_values_lists=[], stemmer=MystemWrapper(), is_read_only=False, debug=False) | ||
|
||
collection = EntityCollection( | ||
self.entities, | ||
value_to_group_id_func=lambda value: | ||
SynonymsCollectionValuesGroupingProviders.provide_existed_or_register_missed_value(synonyms, value)) | ||
|
||
sentences = BratDocumentSentencesReader.from_sentences_data(entities=collection, | ||
sentences_data=s_data) | ||
|
||
for sentence in sentences: | ||
for e, b in sentence.iter_entity_with_local_bounds(): | ||
assert(isinstance(b, Bound)) | ||
print(e.Value, b.Position, b.Position + b.Length) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |