-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
33 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,45 @@ | ||
from arekit.common.entities.base import Entity | ||
from arekit.common.news.entity import DocumentEntity | ||
from arekit.common.news.parsed.base import ParsedNews | ||
|
||
|
||
class BaseParsedNewsServiceProvider(object): | ||
|
||
def __init__(self): | ||
def __init__(self, entity_index_func): | ||
""" Outside enity indexing function | ||
entity_index_func: provides id for a given entity, i.e. | ||
func(entity) -> int (id) | ||
""" | ||
assert(callable(entity_index_func)) | ||
self._doc_entities = None | ||
self.__entity_map = {} | ||
self.__entity_index_func = entity_index_func | ||
|
||
@property | ||
def Name(self): | ||
raise NotImplementedError() | ||
|
||
def init_parsed_news(self, parsed_news): | ||
assert(isinstance(parsed_news, ParsedNews)) | ||
self._doc_entities = [DocumentEntity(id_in_doc=doc_id, value=entity.Value, | ||
e_type=entity.Type, group_index=entity.GroupIndex) | ||
for doc_id, entity in enumerate(parsed_news.iter_entities())] | ||
|
||
self._doc_entities = [] | ||
self.__entity_map.clear() | ||
|
||
for index, entity in enumerate(parsed_news.iter_entities()): | ||
|
||
doc_entity = DocumentEntity(id_in_doc=index, | ||
value=entity.Value, | ||
e_type=entity.Type, | ||
group_index=entity.GroupIndex) | ||
|
||
self._doc_entities.append(doc_entity) | ||
self.__entity_map[self.__entity_index_func(entity)] = doc_entity | ||
|
||
def get_document_entity(self, entity): | ||
""" Maps entity to the related one with DocumentEntity type | ||
""" | ||
assert(isinstance(entity, Entity)) | ||
return self.__entity_map[self.__entity_index_func(entity)] | ||
|
||
def contains_entity(self, entity): | ||
return self.__entity_index_func(entity) in self.__entity_map |