-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #126 from nicolay-r/0.21.0-rc
0.21.0 rc
- Loading branch information
Showing
157 changed files
with
791 additions
and
742 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import logging | ||
|
||
from arekit.common.experiment.formats.documents import DocumentOperations | ||
from arekit.common.experiment.formats.opinions import OpinionOperations | ||
from arekit.common.news.parsed.base import ParsedNews | ||
from arekit.common.utils import progress_bar_iter | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
class BaseAnnotator(object): | ||
""" | ||
Performs annotation for a particular data_type | ||
using OpinOps and DocOps API. | ||
""" | ||
|
||
def __init__(self): | ||
logger.info("Init annotator: [{}]".format(self.__class__)) | ||
|
||
@property | ||
def LabelsCount(self): | ||
raise NotImplementedError() | ||
|
||
# region private methods | ||
|
||
def __iter_annotated_collections(self, data_type, filter_func, doc_ops, opin_ops): | ||
assert(isinstance(doc_ops, DocumentOperations)) | ||
assert(isinstance(opin_ops, OpinionOperations)) | ||
|
||
docs_to_annot_list = filter(filter_func, | ||
doc_ops.iter_doc_ids_to_annotate()) | ||
|
||
if len(docs_to_annot_list) == 0: | ||
logger.info("[{}]: Nothing to annotate".format(data_type)) | ||
return | ||
|
||
logged_parsed_news_iter = progress_bar_iter( | ||
iterable=doc_ops.iter_parsed_news(docs_to_annot_list), | ||
desc="Annotating parsed news [{}]".format(data_type)) | ||
|
||
for parsed_news in logged_parsed_news_iter: | ||
assert(isinstance(parsed_news, ParsedNews)) | ||
yield parsed_news.RelatedNewsID, \ | ||
self._annot_collection_core(parsed_news=parsed_news, data_type=data_type, | ||
doc_ops=doc_ops, opin_ops=opin_ops) | ||
|
||
# endregion | ||
|
||
def _annot_collection_core(self, parsed_news, data_type, doc_ops, opin_ops): | ||
raise NotImplementedError | ||
|
||
# region public methods | ||
|
||
def serialize_missed_collections(self, data_type, doc_ops, opin_ops): | ||
assert(isinstance(opin_ops, OpinionOperations)) | ||
|
||
filter_func = lambda doc_id: opin_ops.try_read_annotated_opinion_collection( | ||
doc_id=doc_id, data_type=data_type) is None | ||
|
||
annot_it = self.__iter_annotated_collections( | ||
data_type, | ||
filter_func, | ||
doc_ops=doc_ops, | ||
opin_ops=opin_ops) | ||
|
||
for doc_id, collection in annot_it: | ||
opin_ops.save_annotated_opinion_collection(collection=collection, | ||
doc_id=doc_id, | ||
data_type=data_type) | ||
|
||
# endregion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class BaseAnnotationAlgorithm(object): | ||
|
||
def iter_opinions(self, parsed_news, entities_collection, existed_opinions=None): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.