Skip to content

Commit

Permalink
#224. Closed. #231 Related refactoring. Lead to #232.
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Dec 18, 2021
1 parent 3d67076 commit 7544fe9
Show file tree
Hide file tree
Showing 21 changed files with 214 additions and 239 deletions.
38 changes: 19 additions & 19 deletions arekit/common/data/input/providers/opinions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from arekit.common.data.input.sample import InputSampleBase
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
from arekit.common.news.base import News
from arekit.common.news.parsed.providers.text_opinion_pairs import TextOpinionPairsProvider


class OpinionProvider(object):
Expand All @@ -16,17 +16,18 @@ def __init__(self, text_opinions_linkages_it_func):

# region private methods

# TODO. #224 no need news func.
@staticmethod
def __iter_linked_text_opinion_lists(news, iter_opins_for_extraction, filter_text_opinion_func):
assert (isinstance(news, News))
def __iter_linked_text_opinion_lists(
text_opinion_pairs_provider,
iter_opins_for_extraction,
filter_text_opinion_func):

assert (isinstance(text_opinion_pairs_provider, TextOpinionPairsProvider))
assert (isinstance(iter_opins_for_extraction, collections.Iterable))
assert (callable(filter_text_opinion_func))

for opinion in iter_opins_for_extraction:
linked_text_opinions = news.extract_text_opinions_linkages(opinion)
assert (linked_text_opinions, TextOpinionsLinkage)

linked_text_opinions = TextOpinionsLinkage(text_opinion_pairs_provider.iter_from_opinion(opinion))
filtered_text_opinions = list(filter(filter_text_opinion_func, linked_text_opinions))

if len(filtered_text_opinions) == 0:
Expand All @@ -35,29 +36,31 @@ def __iter_linked_text_opinion_lists(news, iter_opins_for_extraction, filter_tex
yield filtered_text_opinions

@staticmethod
def __iter_linked_text_opins(read_news_func, news_opins_for_extraction_func,
parse_news_func, terms_per_context, doc_ids_it):
def __iter_linked_text_opins(news_opins_for_extraction_func, parse_news_func,
value_to_group_id_func, terms_per_context, doc_ids_it):
"""
Extracting text-level opinions based on doc-level opinions in documents,
obtained by information in experiment.
NOTE:
1. Assumes to provide the same label (doc level opinion) onto related text-level opinions.
"""
# TODO. #224 no need news func.
assert(callable(read_news_func))
assert(callable(parse_news_func))
assert(callable(value_to_group_id_func))
assert(isinstance(doc_ids_it, collections.Iterable))

curr_id = 0

value_to_group_id_func = None

for doc_id in doc_ids_it:

parsed_news = parse_news_func(doc_id)

linked_text_opinion_lists = OpinionProvider.__iter_linked_text_opinion_lists(
# TODO. #224 no need news.
news=read_news_func(parsed_news.RelatedDocID),
text_opinion_pairs_provider=TextOpinionPairsProvider(
parsed_news=parsed_news,
value_to_group_id_func=value_to_group_id_func),
iter_opins_for_extraction=news_opins_for_extraction_func(doc_id=parsed_news.RelatedDocID),
filter_text_opinion_func=lambda text_opinion: InputSampleBase.check_ability_to_create_sample(
parsed_news=parsed_news,
Expand All @@ -75,20 +78,17 @@ def __iter_linked_text_opins(read_news_func, news_opins_for_extraction_func,

# endregion

# TODO. #224 no need news func.
@classmethod
def create(cls, read_news_func, iter_news_opins_for_extraction,
def create(cls, iter_news_opins_for_extraction, value_to_group_id_func,
parse_news_func, terms_per_context):
# TODO. #224 no need news func.
assert(callable(read_news_func))
assert(callable(iter_news_opins_for_extraction))
assert(callable(value_to_group_id_func))
assert(isinstance(terms_per_context, int))
assert(callable(parse_news_func))

def it_func(doc_ids_it):
return cls.__iter_linked_text_opins(
# TODO. #224 no need news func.
read_news_func=lambda doc_id: read_news_func(doc_id),
value_to_group_id_func=value_to_group_id_func,
news_opins_for_extraction_func=lambda doc_id: iter_news_opins_for_extraction(doc_id=doc_id),
terms_per_context=terms_per_context,
doc_ids_it=doc_ids_it,
Expand Down
50 changes: 12 additions & 38 deletions arekit/common/experiment/annot/single_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from arekit.common.experiment.annot.base_annot import BaseAnnotationAlgorithm
from arekit.common.labels.base import Label
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.news.parsed.providers.opinion_pairs import OpinionPairsProvider
from arekit.common.opinions.base import Opinion
from arekit.common.dataset.text_opinions.enums import DistanceType
from arekit.common.dataset.text_opinions.helper import TextOpinionHelper
Expand Down Expand Up @@ -40,25 +41,6 @@ def __is_ignored_entity_value(self, entity_value):
assert(isinstance(entity_value, str))
return entity_value in self.__ignored_entity_values

def __iter_opinions_between_entities(self, relevant_pairs, entities_list):
assert(isinstance(entities_list, list))

for e1 in entities_list:
assert(isinstance(e1, Entity))

for e2 in entities_list:
assert(isinstance(e2, Entity))

key = self.__create_key_by_entity_pair(e1=e1, e2=e2)
if key not in relevant_pairs:
continue

opinion = Opinion(source_value=e1.Value,
target_value=e2.Value,
sentiment=self.__label_instance)

yield opinion

def __try_create_pair_key(self, parsed_news, e1, e2, existed_opinions):
assert(isinstance(e1, Entity))
assert(isinstance(e2, Entity))
Expand Down Expand Up @@ -99,25 +81,17 @@ def __try_create_pair_key(self, parsed_news, e1, e2, existed_opinions):
def iter_opinions(self, parsed_news, existed_opinions=None):
assert(isinstance(parsed_news, ParsedNews))

parsed_news.iter_terms()
relevant_pairs = {}

cached_entities_list = list(parsed_news.iter_entities())

for e1 in cached_entities_list:
assert(isinstance(e1, Entity))

for e2 in cached_entities_list:
assert(isinstance(e2, Entity))

key = self.__try_create_pair_key(parsed_news=parsed_news,
e1=e1, e2=e2,
existed_opinions=existed_opinions)
def __filter_pair_func(e1, e2):
key = self.__try_create_pair_key(
parsed_news=parsed_news,
e1=e1, e2=e2,
existed_opinions=existed_opinions)

if key is None:
continue
return key is not None

relevant_pairs[key] = 0
# Init opinion provider.
opinions_provider = OpinionPairsProvider(parsed_news=parsed_news)

return self.__iter_opinions_between_entities(relevant_pairs=relevant_pairs,
entities_list=cached_entities_list)
return opinions_provider.iter_from_all(
label=self.__label_instance,
filter_func=__filter_pair_func)
7 changes: 0 additions & 7 deletions arekit/common/news/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,3 @@ def iter_sentences(self):

def get_sentence(self, s_ind):
return self._sentences[s_ind]

def extract_text_opinions_linkages(self, opinion):
"""
opinions: iterable Opinion
is an iterable opinions that should be used to find a related text_opinion entries.
"""
raise NotImplementedError()
6 changes: 4 additions & 2 deletions arekit/common/news/parsed/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def __is_entity(term):
def __init_entity_positions(self):
self.__entity_positions = self.__calculate_entity_positions()

def __iter_entities(self):
return self.__iter_all_raw_terms(term_only=True, term_check=lambda term: self.__is_entity(term))

def __calculate_entity_positions(self):
positions = {}
t_ind_in_doc = 0
Expand Down Expand Up @@ -129,7 +132,7 @@ def iter_terms(self, term_check=None):
yield term

def iter_entities(self):
return self.__iter_all_raw_terms(term_only=True, term_check=lambda term: self.__is_entity(term))
return self.__iter_entities()

def iter_sentence_terms(self, sentence_index, return_id, term_check=None):
assert(isinstance(sentence_index, int))
Expand All @@ -144,7 +147,6 @@ def iter_sentence_terms(self, sentence_index, return_id, term_check=None):
yield ind_in_sent, term
else:
yield term

# endregion

def __iter__(self):
Expand Down
41 changes: 41 additions & 0 deletions arekit/common/news/parsed/providers/base_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from arekit.common.entities.base import Entity
from arekit.common.labels.base import Label
from arekit.common.news.parsed.base import ParsedNews


class BasePairProvider(object):

def __init__(self, parsed_news):
assert(isinstance(parsed_news, ParsedNews))
self.__entities = parsed_news.iter_entities()

def _create_pair(self, source_entity, target_entity, label):
raise NotImplementedError()

# region private methods

def _iter_from_entities(self, source_entities, target_entities, label, filter_func=None):
assert(isinstance(label, Label))
assert(callable(filter_func) or filter_func is None)

for source_entity in source_entities:
for target_entity in target_entities:
assert (isinstance(source_entity, Entity))
assert (isinstance(target_entity, Entity))

if filter_func is not None and not filter_func:
continue

yield self._create_pair(source_entity=source_entity,
target_entity=target_entity,
label=label)

# endregion

def iter_from_all(self, label, filter_func):
assert(isinstance(label, Label))

return self._iter_from_entities(source_entities=self.__entities,
target_entities=self.__entities,
label=label,
filter_func=filter_func)
14 changes: 14 additions & 0 deletions arekit/common/news/parsed/providers/opinion_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from arekit.common.entities.base import Entity
from arekit.common.news.parsed.providers.base_pairs import BasePairProvider
from arekit.common.opinions.base import Opinion


class OpinionPairsProvider(BasePairProvider):

def _create_pair(self, source_entity, target_entity, label):
assert(isinstance(source_entity, Entity))
assert(isinstance(target_entity, Entity))

return Opinion(source_value=source_entity.Value,
target_value=target_entity.Value,
sentiment=label)
67 changes: 67 additions & 0 deletions arekit/common/news/parsed/providers/text_opinion_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import logging

from arekit.common.entities.base import Entity
from arekit.common.entities.collection import EntityCollection
from arekit.common.news.parsed.providers.base_pairs import BasePairProvider
from arekit.common.opinions.base import Opinion
from arekit.common.text_opinions.base import TextOpinion

logger = logging.getLogger(__name__)


class TextOpinionPairsProvider(BasePairProvider):
""" Document Related text opinion provider.
"""

def __init__(self, parsed_news, value_to_group_id_func):
super(TextOpinionPairsProvider, self).__init__(parsed_news)

self.__doc_id = parsed_news.RelatedDocID
self.__value_to_group_id_func = value_to_group_id_func

self.__entities_collection = EntityCollection(
entities=list(parsed_news.iter_entities()),
value_to_group_id_func=self.__value_to_group_id_func)

def _create_pair(self, source_entity, target_entity, label):
assert(isinstance(source_entity, Entity))
assert(isinstance(target_entity, Entity))

return TextOpinion(doc_id=self.__doc_id,
source_id=source_entity.IdInDocument,
target_id=target_entity.IdInDocument,
label=label,
owner=None,
text_opinion_id=None)

def iter_from_opinion(self, opinion, debug=False):
""" Provides text-level opinion extraction by document-level opinions
(Opinion class instances), for a particular document (doc_id),
with the realated entity collection.
"""
assert(isinstance(opinion, Opinion))

key = self.__entities_collection.KeyType.BY_SYNONYMS
source_entities = self.__entities_collection.try_get_entities(opinion.SourceValue, group_key=key)
target_entities = self.__entities_collection.try_get_entities(opinion.TargetValue, group_key=key)

if source_entities is None:
if debug:
logger.info("Appropriate entity for '{}'->'...' has not been found".format(
opinion.SourceValue))
return
yield

if target_entities is None:
if debug:
logger.info("Appropriate entity for '...'->'{}' has not been found".format(
opinion.TargetValue))
return
yield

pairs_it = self._iter_from_entities(source_entities=source_entities,
target_entities=target_entities,
label=opinion.Sentiment)

for pair in pairs_it:
yield pair
3 changes: 1 addition & 2 deletions arekit/contrib/bert/run_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def __handle_iteration(self, data_type):

# Create opinion provider
opinion_provider = OpinionProvider.create(
# TODO. #224 no need news.
read_news_func=lambda doc_id: self._experiment.DocumentOperations.get_doc(doc_id),
value_to_group_id_func=None,
parse_news_func=lambda doc_id: self._experiment.DocumentOperations.parse_doc(doc_id),
iter_news_opins_for_extraction=lambda doc_id:
self._experiment.OpinionOperations.iter_opinions_for_extraction(doc_id=doc_id, data_type=data_type),
Expand Down
2 changes: 1 addition & 1 deletion arekit/contrib/experiment_rusentrel/exp_sl/documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from arekit.common.experiment.api.enums import BaseDocumentTag
from arekit.common.experiment.api.ops_doc import DocumentOperations
from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions
from arekit.contrib.source.rusentrel.news.base import RuSentRelNews
from arekit.contrib.source.rusentrel.news_reader import RuSentRelNews


class RuSentrelDocumentOperations(DocumentOperations):
Expand Down
Loading

0 comments on commit 7544fe9

Please sign in to comment.