Skip to content

Commit

Permalink
#495 experimental extension
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Aug 18, 2023
1 parent 2a7fa1b commit 248f781
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 11 deletions.
12 changes: 11 additions & 1 deletion arekit/common/entities/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
class Entity(object):

def __init__(self, value, e_type, display_value=None, group_index=None):
def __init__(self, value, e_type, childs=None, display_value=None, group_index=None):
assert(isinstance(value, str) and len(value) > 0)
assert(isinstance(e_type, str) or e_type is None)
assert(isinstance(display_value, str) or display_value is None)
assert(isinstance(group_index, int) or group_index is None)
assert(isinstance(childs, list) or childs is None)
self.__value = value.lower()
self.__type = e_type
self.__display_value = display_value
self.__group_index = group_index
self.__childs = childs

@property
def GroupIndex(self):
Expand Down Expand Up @@ -40,3 +42,11 @@ def set_group_index(self, value):
assert(isinstance(value, int) and value >= -1)
assert(self.__group_index is None)
self.__group_index = value

def iter_childs(self):
if self.__childs is None:
return
yield
for child in self.__childs:
yield child

3 changes: 2 additions & 1 deletion arekit/common/news/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

class DocumentEntity(Entity):

def __init__(self, value, display_value, e_type, id_in_doc, group_index):
def __init__(self, value, display_value, e_type, childs, id_in_doc, group_index):
""" id_in_doc: Id, utilized witin the internal services
"""
super(DocumentEntity, self).__init__(value=value,
e_type=e_type,
display_value=display_value,
childs=childs,
group_index=group_index)
self.__id = id_in_doc

Expand Down
33 changes: 31 additions & 2 deletions arekit/common/news/parsed/providers/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from collections import Counter

from arekit.common.entities.base import Entity
from arekit.common.news.entity import DocumentEntity
from arekit.common.news.parsed.base import ParsedNews
Expand Down Expand Up @@ -25,18 +27,45 @@ def init_parsed_news(self, parsed_news):
self._doc_entities = []
self.__entity_map.clear()

for index, entity in enumerate(parsed_news.iter_entities()):
c = Counter()
for _, entity in enumerate(parsed_news.iter_entities()):
assert(isinstance(entity, Entity))

# Register childrens.
doc_childs = {}
for ce in entity.iter_childs():
de = DocumentEntity(id_in_doc=c["entities"],
value=ce.Value,
e_type=ce.Type,
childs=None,
display_value=ce.DisplayValue,
group_index=ce.GroupIndex)
c["entities"] += 1
doc_childs[ce] = de

doc_entity = DocumentEntity(id_in_doc=index,
self._doc_entities.append(de)

# Register Root node.
doc_entity = DocumentEntity(id_in_doc=c["entities"],
value=entity.Value,
e_type=entity.Type,
childs=list(doc_childs.values()) if len(doc_childs) > 0 else None,
display_value=entity.DisplayValue,
group_index=entity.GroupIndex)
c["entities"] += 1

self._doc_entities.append(doc_entity)

if self.__entity_index_func is not None:
# For root node.
assert(self.__entity_index_func(entity) not in self.__entity_map)
self.__entity_map[self.__entity_index_func(entity)] = doc_entity
# For children.
for ce, de in doc_childs.items():
assert(self.__entity_index_func(ce) not in self.__entity_map)
self.__entity_map[self.__entity_index_func(ce)] = de

#print("Document entites registred:", len(self._doc_entities))

def get_document_entity(self, entity):
""" Maps entity to the related one with DocumentEntity type
Expand Down
11 changes: 10 additions & 1 deletion arekit/common/news/parsed/providers/entity_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def init_parsed_news(self, parsed_news):
super(EntityServiceProvider, self).init_parsed_news(parsed_news)
assert(isinstance(parsed_news, ParsedNews))
self.__iter_raw_terms_func = lambda: parsed_news.iter_terms(filter_func=None, term_only=False)
self.__init_entity_positions()
self.__entity_positions = self.__calculate_entity_positions()

# region public 'extract' methods

Expand Down Expand Up @@ -158,6 +158,15 @@ def __calculate_entity_positions(self):
for s_ind, t_ind_in_sent, term in self.__iter_raw_terms_func():

if isinstance(term, Entity):

# First childs
for _ in term.iter_childs():
position = TermPosition(term_ind_in_doc=t_ind_in_doc,
term_ind_in_sent=t_ind_in_sent,
s_ind=s_ind)
positions.append(position)

# Then actual root.
position = TermPosition(term_ind_in_doc=t_ind_in_doc,
term_ind_in_sent=t_ind_in_sent,
s_ind=s_ind)
Expand Down
5 changes: 1 addition & 4 deletions arekit/contrib/source/brat/entities/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ class BratCompoundEntity(BratEntity):

def __init__(self, id_in_doc, value, e_type, root, entities, index_begin, index_end,
display_value=None, group_index=None):
assert(isinstance(entities, list))
assert(isinstance(root, BratCompoundEntity) or root is None)
super(BratCompoundEntity, self).__init__(value=value, e_type=e_type,
childs=entities,
id_in_doc=id_in_doc,
index_begin=index_begin,
index_end=index_end,
Expand All @@ -28,6 +28,3 @@ def from_list(cls, root, childs):
@property
def Root(self):
return self.__root

def iter_childs(self):
return iter(self.__entities)
5 changes: 3 additions & 2 deletions arekit/contrib/source/brat/entities/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ class BratEntity(Entity):
Provides bounds, i.e. char indices in related sentence.
"""

def __init__(self, id_in_doc, e_type, index_begin, index_end, value, display_value=None, group_index=None):
def __init__(self, id_in_doc, e_type, index_begin, index_end, value,
childs=None, display_value=None, group_index=None):
""" index_begin: int
- char index (in case of string type of `text`)
- term index (in case of list type of `text`)
Expand All @@ -17,7 +18,7 @@ def __init__(self, id_in_doc, e_type, index_begin, index_end, value, display_val
assert(isinstance(e_type, str))
assert(isinstance(index_begin, int))
assert(isinstance(index_end, int))
super(BratEntity, self).__init__(value=value, e_type=e_type,
super(BratEntity, self).__init__(value=value, e_type=e_type, childs=childs,
display_value=display_value, group_index=group_index)

self.__e_type = e_type
Expand Down

0 comments on commit 248f781

Please sign in to comment.