Skip to content

Commit

Permalink
#332 refactoring. Done
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jun 13, 2022
1 parent eb4eb35 commit 2581363
Show file tree
Hide file tree
Showing 13 changed files with 111 additions and 91 deletions.
10 changes: 7 additions & 3 deletions arekit/contrib/experiment_rusentrel/exp_ds/opinions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from arekit.common.experiment.api.ops_opin import OpinionOperations
from arekit.common.experiment.data_type import DataType
from arekit.contrib.experiment_rusentrel.labels.scalers.ruattitudes import ExperimentRuAttitudesLabelConverter
from arekit.contrib.source.ruattitudes.news.helper import RuAttitudesNewsHelper


Expand All @@ -10,14 +11,17 @@ def __init__(self, ru_attitudes):
super(RuAttitudesOpinionOperations, self).__init__()

self.__ru_attitudes = ru_attitudes
self.__label_scaler = ExperimentRuAttitudesLabelConverter()

# region private methods

def __get_opinion_list_in_doc(self, doc_id, opinion_check=lambda _: True):
news = self.__ru_attitudes[doc_id]
return [opinion
for opinion, _ in RuAttitudesNewsHelper.iter_opinions_with_related_sentences(news)
if opinion_check(opinion)]

data_it = RuAttitudesNewsHelper.iter_opinions_with_related_sentences(
news=news, label_scaler=self.__label_scaler)

return [opinion for opinion, _ in data_it if opinion_check(opinion)]

# endregion

Expand Down
2 changes: 0 additions & 2 deletions arekit/contrib/experiment_rusentrel/exp_ds/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging

from arekit.common.utils import progress_bar_iter
from arekit.contrib.experiment_rusentrel.labels.scalers.ruattitudes import ExperimentRuAttitudesLabelConverter
from arekit.contrib.source.ruattitudes.collection import RuAttitudesCollection
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersions
from arekit.contrib.source.ruattitudes.news.base import RuAttitudesNews
Expand All @@ -20,7 +19,6 @@ def read_ruattitudes_in_memory(version, keep_doc_ids_only, doc_id_func):

it = RuAttitudesCollection.iter_news(version=version,
get_news_index_func=doc_id_func,
label_convereter=ExperimentRuAttitudesLabelConverter(),
return_inds_only=keep_doc_ids_only)

it_formatted_and_logged = progress_bar_iter(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from arekit.contrib.experiment_rusentrel.labels.types import ExperimentPositiveLabel, ExperimentNegativeLabel, \
ExperimentNeutralLabel
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelConverter
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelScaler


class ExperimentRuAttitudesLabelConverter(RuAttitudesLabelConverter):
class ExperimentRuAttitudesLabelConverter(RuAttitudesLabelScaler):

@classmethod
def _neutral_label_instance(cls):
Expand Down
13 changes: 5 additions & 8 deletions arekit/contrib/source/ruattitudes/collection.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersions, RuAttitudesIOUtils
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelConverter
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelScaler
from arekit.contrib.source.ruattitudes.reader import RuAttitudesFormatReader


class RuAttitudesCollection(object):

@staticmethod
def __get_reading_handler(input_file, read_inds_only, get_news_inds_func, label_converter):
def __get_reading_handler(input_file, read_inds_only, get_news_inds_func):
assert(isinstance(read_inds_only, bool))

if read_inds_only:
return RuAttitudesFormatReader.iter_news_inds(input_file=input_file,
get_news_index_func=get_news_inds_func)
else:
return RuAttitudesFormatReader.iter_news(
input_file=input_file,
label_converter=RuAttitudesLabelConverter() if label_converter is None else label_converter,
get_news_index_func=get_news_inds_func)
return RuAttitudesFormatReader.iter_news(input_file=input_file,
get_news_index_func=get_news_inds_func)

@staticmethod
def iter_news(version, get_news_index_func, return_inds_only, label_convereter=None):
def iter_news(version, get_news_index_func, return_inds_only):
"""
RuAttitudes collection reader from zip archive
"""
Expand All @@ -32,7 +30,6 @@ def iter_news(version, get_news_index_func, return_inds_only, label_convereter=N
process_func=lambda input_filepath: RuAttitudesCollection.__get_reading_handler(
input_file=input_filepath,
read_inds_only=return_inds_only,
label_converter=label_convereter,
get_news_inds_func=get_news_index_func),
version=version)

Expand Down
4 changes: 0 additions & 4 deletions arekit/contrib/source/ruattitudes/conts.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
from arekit.common.labels.base import NoLabel
from arekit.contrib.source.common.labels import PositiveLabel, NegativeLabel

# Default label scales.
NEU_INT_VALUE = 0
POS_INT_VALUE = 1
NEG_INT_VALUE = -1

# Default label instances.
RU_ATTITUDES_NEU_LABEL = NoLabel()
Expand Down
28 changes: 17 additions & 11 deletions arekit/contrib/source/ruattitudes/labels_scaler.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
from collections import OrderedDict

from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.contrib.source.ruattitudes.conts import \
NEG_INT_VALUE, POS_INT_VALUE, \
RU_ATTITUDES_POS_LABEL, RU_ATTITUDES_NEG_LABEL, NEU_INT_VALUE, RU_ATTITUDES_NEU_LABEL
RU_ATTITUDES_POS_LABEL, RU_ATTITUDES_NEG_LABEL, RU_ATTITUDES_NEU_LABEL


class RuAttitudesLabelConverter:
class RuAttitudesLabelScaler(BaseLabelScaler):

def __init__(self):

self.__int_to_label_dict = {
NEU_INT_VALUE: self._neutral_label_instance(),
POS_INT_VALUE: self._positive_label_instance(),
NEG_INT_VALUE: self._negative_label_instance()
}
self.__int_to_label_dict = OrderedDict([
(self._neutral_label_instance(), 0),
(self._positive_label_instance(), 1),
(self._negative_label_instance(), -1)])

self.__uint_to_label_dict = OrderedDict([
(self._neutral_label_instance(), 0),
(self._positive_label_instance(), 1),
(self._negative_label_instance(), 2)])

super(RuAttitudesLabelScaler, self).__init__(int_dict=self.__int_to_label_dict,
uint_dict=self.__uint_to_label_dict)

@classmethod
def _neutral_label_instance(cls):
Expand All @@ -24,6 +33,3 @@ def _positive_label_instance(cls):
@classmethod
def _negative_label_instance(cls):
return RU_ATTITUDES_NEG_LABEL

def int_to_label(self, int_value):
return self.__int_to_label_dict[int_value]
17 changes: 12 additions & 5 deletions arekit/contrib/source/ruattitudes/news/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
from arekit.common.news.base import News
from arekit.common.opinions.base import Opinion
from arekit.contrib.source.ruattitudes.news.opin_converter import RuAttitudesSentenceOpinionConverter
from arekit.contrib.source.ruattitudes.sentence.base import RuAttitudesSentence


Expand Down Expand Up @@ -50,28 +52,33 @@ def get_objects_declared_before(self, sentence_index):

# region base News

def extract_text_opinions_linkages(self, opinion):
def extract_text_opinions_linkages(self, opinion, label_scaler):
"""
Note: Complexity is O(N)
"""
assert(isinstance(opinion, Opinion))
return TextOpinionsLinkage(self.__iter_all_text_opinions_in_sentences(opinion=opinion))
assert(isinstance(label_scaler, BaseLabelScaler))

return TextOpinionsLinkage(self.__iter_all_text_opinions_in_sentences(
opinion=opinion, label_scaler=label_scaler))

# endregion

# region Private methods

def __iter_all_text_opinions_in_sentences(self, opinion):
def __iter_all_text_opinions_in_sentences(self, opinion, label_scaler):
for sentence in self.iter_sentences():
assert(isinstance(sentence, RuAttitudesSentence))

sentence_opin = sentence.find_sentence_opin_by_key(key=opinion.Tag)
if sentence_opin is None:
continue

yield sentence_opin.to_text_opinion(
yield RuAttitudesSentenceOpinionConverter.to_text_opinion(
sentence_opinion=sentence_opin,
doc_id=sentence.Owner.ID,
end_to_doc_id_func=lambda sent_level_id: sentence.get_doc_level_text_object_id(sent_level_id),
text_opinion_id=None)
text_opinion_id=None,
label_scaler=label_scaler)

# endregion
16 changes: 11 additions & 5 deletions arekit/contrib/source/ruattitudes/news/helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.contrib.source.ruattitudes.news.base import RuAttitudesNews
from arekit.contrib.source.ruattitudes.news.opin_converter import RuAttitudesSentenceOpinionConverter
from arekit.contrib.source.ruattitudes.sentence.base import RuAttitudesSentence
from arekit.contrib.source.ruattitudes.sentence.opinion import SentenceOpinion

Expand All @@ -12,8 +14,9 @@ def build_opinion_dict(news):
return RuAttitudesNewsHelper.__build_opinion_dict(news)

@staticmethod
def iter_opinions_with_related_sentences(news):
def iter_opinions_with_related_sentences(news, label_scaler):
assert(isinstance(news, RuAttitudesNews))
assert(isinstance(label_scaler, BaseLabelScaler))

doc_opinions = RuAttitudesNewsHelper.build_opinion_dict(news=news)
assert(isinstance(doc_opinions, dict))
Expand All @@ -22,7 +25,8 @@ def iter_opinions_with_related_sentences(news):

opinion, related_sentences = RuAttitudesNewsHelper.__extract_opinion_with_related_sentences(
news=news,
sentence_opin_tag=sentence_opin_tag)
sentence_opin_tag=sentence_opin_tag,
label_scaler=label_scaler)

if opinion is None:
continue
Expand All @@ -34,7 +38,7 @@ def iter_opinions_with_related_sentences(news):
# region private methods

@staticmethod
def __extract_opinion_with_related_sentences(news, sentence_opin_tag):
def __extract_opinion_with_related_sentences(news, sentence_opin_tag, label_scaler):
opinion = None
related_sentences = []

Expand All @@ -54,9 +58,11 @@ def __extract_opinion_with_related_sentences(news, sentence_opin_tag):

source, target = sentence.get_objects(sentence_opin)

opinion = sentence_opin.to_opinion(
opinion = RuAttitudesSentenceOpinionConverter.to_opinion(
sentence_opinion=sentence_opin,
source_value=source.Value,
target_value=target.Value)
target_value=target.Value,
label_scaler=label_scaler)

return opinion, related_sentences

Expand Down
41 changes: 41 additions & 0 deletions arekit/contrib/source/ruattitudes/news/opin_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.common.opinions.base import Opinion
from arekit.common.text_opinions.base import TextOpinion
from arekit.contrib.source.ruattitudes.sentence.opinion import SentenceOpinion


class RuAttitudesSentenceOpinionConverter:

@staticmethod
def to_text_opinion(sentence_opinion, doc_id, end_to_doc_id_func, text_opinion_id, label_scaler):
"""
Converts opinion into document-level referenced opinion
"""
assert(isinstance(sentence_opinion, SentenceOpinion))
assert (isinstance(label_scaler, BaseLabelScaler))

return TextOpinion(doc_id=doc_id,
text_opinion_id=text_opinion_id,
source_id=end_to_doc_id_func(sentence_opinion.SourceID),
target_id=end_to_doc_id_func(sentence_opinion.TargetID),
owner=None,
label=label_scaler.int_to_label(sentence_opinion.Label))

@staticmethod
def to_opinion(sentence_opinion, source_value, target_value, label_scaler):
"""
Converts onto document, non referenced opinion
(non bounded to the text).
"""
assert(isinstance(sentence_opinion, SentenceOpinion))
assert (isinstance(label_scaler, BaseLabelScaler))

opinion = Opinion(source_value=source_value,
target_value=target_value,
sentiment=label_scaler.int_to_label(sentence_opinion.Label))

# Using this tag allows to perform a revert operation,
# i.e. to find opinion_ref by opinion.
opinion.set_tag(sentence_opinion.Tag)

return opinion
18 changes: 6 additions & 12 deletions arekit/contrib/source/ruattitudes/reader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from arekit.common.utils import split_by_whitespaces
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelConverter
from arekit.contrib.source.ruattitudes.labels_scaler import RuAttitudesLabelScaler
from arekit.contrib.source.ruattitudes.news.base import RuAttitudesNews
from arekit.contrib.source.ruattitudes.sentence.base import RuAttitudesSentence
from arekit.contrib.source.ruattitudes.sentence.opinion import SentenceOpinion
Expand Down Expand Up @@ -52,11 +52,8 @@ def iter_news_inds(input_file, get_news_index_func):
local_index=local_news_ind)

@staticmethod
def iter_news(input_file, get_news_index_func, label_converter):
def iter_news(input_file, get_news_index_func):
assert(callable(get_news_index_func))
# TODO. #322 -- remove label converter and adopt the latter only when we need to perform
# TODO. #322 a conversion towards the opinion (text_opinion).
assert(isinstance(label_converter, RuAttitudesLabelConverter))

reset = False
title = None
Expand All @@ -79,8 +76,7 @@ def iter_news(input_file, get_news_index_func, label_converter):
objects_list.append(object)

if RuAttitudesFormatReader.OPINION_KEY in line:
sentence_opin = RuAttitudesFormatReader.__parse_sentence_opin(
line=line, label_converter=label_converter)
sentence_opin = RuAttitudesFormatReader.__parse_sentence_opin(line)
opinions_list.append(sentence_opin)

if RuAttitudesFormatReader.FRAMEVAR_TITLE in line:
Expand Down Expand Up @@ -175,14 +171,12 @@ def __parse_sentence(line, is_title):
return text.strip()

@staticmethod
def __parse_sentence_opin(line, label_converter):
assert(isinstance(label_converter, RuAttitudesLabelConverter))

def __parse_sentence_opin(line):
line = line[len(RuAttitudesFormatReader.OPINION_KEY):]

s_from = line.index('b:(')
s_to = line.index(')', s_from)
label = label_converter.int_to_label(int(line[s_from + 3:s_to]))
label = int(line[s_from + 3:s_to])

o_from = line.index('oi:[')
o_to = line.index(']', o_from)
Expand All @@ -197,7 +191,7 @@ def __parse_sentence_opin(line, label_converter):

sentence_opin = SentenceOpinion(source_id=source_object_id_in_sentence,
target_id=target_object_id_in_sentence,
label=label,
label_int=label,
tag=opninion_key)

return sentence_opin
Expand Down
Loading

0 comments on commit 2581363

Please sign in to comment.