-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
Showing
21 changed files
with
214 additions
and
239 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from arekit.common.entities.base import Entity | ||
from arekit.common.labels.base import Label | ||
from arekit.common.news.parsed.base import ParsedNews | ||
|
||
|
||
class BasePairProvider(object): | ||
|
||
def __init__(self, parsed_news): | ||
assert(isinstance(parsed_news, ParsedNews)) | ||
self.__entities = parsed_news.iter_entities() | ||
|
||
def _create_pair(self, source_entity, target_entity, label): | ||
raise NotImplementedError() | ||
|
||
# region private methods | ||
|
||
def _iter_from_entities(self, source_entities, target_entities, label, filter_func=None): | ||
assert(isinstance(label, Label)) | ||
assert(callable(filter_func) or filter_func is None) | ||
|
||
for source_entity in source_entities: | ||
for target_entity in target_entities: | ||
assert (isinstance(source_entity, Entity)) | ||
assert (isinstance(target_entity, Entity)) | ||
|
||
if filter_func is not None and not filter_func: | ||
continue | ||
|
||
yield self._create_pair(source_entity=source_entity, | ||
target_entity=target_entity, | ||
label=label) | ||
|
||
# endregion | ||
|
||
def iter_from_all(self, label, filter_func): | ||
assert(isinstance(label, Label)) | ||
|
||
return self._iter_from_entities(source_entities=self.__entities, | ||
target_entities=self.__entities, | ||
label=label, | ||
filter_func=filter_func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from arekit.common.entities.base import Entity | ||
from arekit.common.news.parsed.providers.base_pairs import BasePairProvider | ||
from arekit.common.opinions.base import Opinion | ||
|
||
|
||
class OpinionPairsProvider(BasePairProvider): | ||
|
||
def _create_pair(self, source_entity, target_entity, label): | ||
assert(isinstance(source_entity, Entity)) | ||
assert(isinstance(target_entity, Entity)) | ||
|
||
return Opinion(source_value=source_entity.Value, | ||
target_value=target_entity.Value, | ||
sentiment=label) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import logging | ||
|
||
from arekit.common.entities.base import Entity | ||
from arekit.common.entities.collection import EntityCollection | ||
from arekit.common.news.parsed.providers.base_pairs import BasePairProvider | ||
from arekit.common.opinions.base import Opinion | ||
from arekit.common.text_opinions.base import TextOpinion | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class TextOpinionPairsProvider(BasePairProvider): | ||
""" Document Related text opinion provider. | ||
""" | ||
|
||
def __init__(self, parsed_news, value_to_group_id_func): | ||
super(TextOpinionPairsProvider, self).__init__(parsed_news) | ||
|
||
self.__doc_id = parsed_news.RelatedDocID | ||
self.__value_to_group_id_func = value_to_group_id_func | ||
|
||
self.__entities_collection = EntityCollection( | ||
entities=list(parsed_news.iter_entities()), | ||
value_to_group_id_func=self.__value_to_group_id_func) | ||
|
||
def _create_pair(self, source_entity, target_entity, label): | ||
assert(isinstance(source_entity, Entity)) | ||
assert(isinstance(target_entity, Entity)) | ||
|
||
return TextOpinion(doc_id=self.__doc_id, | ||
source_id=source_entity.IdInDocument, | ||
target_id=target_entity.IdInDocument, | ||
label=label, | ||
owner=None, | ||
text_opinion_id=None) | ||
|
||
def iter_from_opinion(self, opinion, debug=False): | ||
""" Provides text-level opinion extraction by document-level opinions | ||
(Opinion class instances), for a particular document (doc_id), | ||
with the realated entity collection. | ||
""" | ||
assert(isinstance(opinion, Opinion)) | ||
|
||
key = self.__entities_collection.KeyType.BY_SYNONYMS | ||
source_entities = self.__entities_collection.try_get_entities(opinion.SourceValue, group_key=key) | ||
target_entities = self.__entities_collection.try_get_entities(opinion.TargetValue, group_key=key) | ||
|
||
if source_entities is None: | ||
if debug: | ||
logger.info("Appropriate entity for '{}'->'...' has not been found".format( | ||
opinion.SourceValue)) | ||
return | ||
yield | ||
|
||
if target_entities is None: | ||
if debug: | ||
logger.info("Appropriate entity for '...'->'{}' has not been found".format( | ||
opinion.TargetValue)) | ||
return | ||
yield | ||
|
||
pairs_it = self._iter_from_entities(source_entities=source_entities, | ||
target_entities=target_entities, | ||
label=opinion.Sentiment) | ||
|
||
for pair in pairs_it: | ||
yield pair |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.