Skip to content

Commit

Permalink
Merge pull request #126 from nicolay-r/0.21.0-rc
Browse files Browse the repository at this point in the history
0.21.0 rc
  • Loading branch information
nicolay-r authored Jun 14, 2021
2 parents 615af0f + aaa8fd3 commit b3b098b
Show file tree
Hide file tree
Showing 157 changed files with 791 additions and 742 deletions.
37 changes: 7 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
</p>

**AREkit** (Attitude and Relation Extraction Toolkit) -- is a python toolkit, devoted to
**sentiment attitude extraction** task. Please proceed to [[ABOUT.md]](ABOUT.md) for more details.
document level Attitude and Relation Extraction for text objects with objects-synonymy support.

## Dependencies

List of the toolset dependencies is as follows:

* python == 2.7 (No doubts it will be updated to 3.4+)

* pymystem3 == 0.1.9
Expand All @@ -30,30 +28,9 @@ Then install dependencies as follows:
pip install -r dependencies.txt
```

## References

### Applications

> TODO. Provide the list of all the applications, which are independed from the particular conferences.
### Papers

* Attention-Based Neural Networks for Sentiment Attitude Extraction using Distant Supervision
[[ACM-DOI]](https://doi.org/10.1145/3405962.3405985)
* Rusnachenko Nicolay, Loukachevitch Natalia
* WIMS-2020
* Studying Attention Models in Sentiment Attitude Extraction Task
[[Springer]](https://doi.org/10.1007/978-3-030-51310-8_15) /
[[arXiv:2006.11605]](https://arxiv.org/abs/2006.11605)
* Rusnachenko Nicolay, Loukachevitch Natalia
* NLDB-2020
* Distant Supervision for Sentiment Attitude Extraction
[[paper-ranlp-proceeding]](http://lml.bas.bg/ranlp2019/proceedings-ranlp-2019.pdf),
[[poster]](docs/ranlp_2019_poster_portrait.pdf)
* Rusnachenko Nikolay, Loukachevitch Natalia, Tutubalina Elena
* RANLP-2019
* Neural Network Approach for Extracting Aggregated Opinions from Analytical Articles
[[paper]](https://link.springer.com/chapter/10.1007/978-3-030-23584-0_10)
[[code]](https://github.com/nicolay-r/sentiment-pcnn/tree/ccis-2019)
* Nicolay Rusnachenko, Natalia Loukachevitch
* TSD-2018
## Framework Applications

* Neural Networks for attitude extraction
[[code]](https://github.com/nicolay-r/neural-networks-for-attitude-extraction)
* Input Formatter for BERT-based models
[[code]](https://github.com/nicolay-r/bert-utils-for-attitude-extraction)
18 changes: 16 additions & 2 deletions common/evaluation/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,17 @@ def _create_eval_result(self):

# region protected methods

def _calc_diff(self, etalon_opins, test_opins):
def _check_is_supported(self, label, is_label_supported):
if label is None:
return True

if not is_label_supported(label):
raise Exception(u"Label \"{label}\" is not supported by {e}".format(
label=label_to_str(label),
e=type(self).__name__))

def _calc_diff(self, etalon_opins, test_opins, is_label_supported):
assert(callable(is_label_supported))

it = self.__iter_diff_core(etalon_opins=etalon_opins,
test_opins=test_opins)
Expand All @@ -91,6 +101,9 @@ def _calc_diff(self, etalon_opins, test_opins):
for args in it:
opin, etalon_label, result_label = args

self._check_is_supported(label=etalon_label, is_label_supported=is_label_supported)
self._check_is_supported(label=result_label, is_label_supported=is_label_supported)

row = [opin.SourceValue.encode('utf-8'),
opin.TargetValue.encode('utf-8'),
None if etalon_label is None else label_to_str(etalon_label),
Expand Down Expand Up @@ -119,7 +132,8 @@ def evaluate(self, cmp_pairs):
for cmp_pair in cmp_pairs:
assert(isinstance(cmp_pair, OpinionCollectionsToCompare))
cmp_table = self._calc_diff(etalon_opins=cmp_pair.EtalonOpinionCollection,
test_opins=cmp_pair.TestOpinionCollection)
test_opins=cmp_pair.TestOpinionCollection,
is_label_supported=result.is_label_supported)

result.reg_doc(cmp_pair=cmp_pair, cmp_table=cmp_table)

Expand Down
9 changes: 7 additions & 2 deletions common/evaluation/results/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

class BaseEvalResult(object):

def __init__(self):
def __init__(self, supported_labels):
assert(isinstance(supported_labels, set))
self._cmp_tables = {}
self._total_result = OrderedDict()
self.__supported_labels = supported_labels

# region properties

Expand All @@ -25,6 +27,9 @@ def calculate(self):

# endregion

def is_label_supported(self, label):
return label in self.__supported_labels

def get_result_by_metric(self, metric_name):
assert(isinstance(metric_name, unicode))
return self._total_result[metric_name]
Expand All @@ -34,7 +39,7 @@ def iter_total_by_param_results(self):
return self._total_result.iteritems()

def iter_dataframe_cmp_tables(self):
yield self._cmp_tables.iteritems()
return self._cmp_tables.iteritems()

def reg_doc(self, cmp_pair, cmp_table):
""" Registering cmp_table.
Expand Down
File renamed without changes.
72 changes: 72 additions & 0 deletions common/experiment/annot/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import logging

from arekit.common.experiment.formats.documents import DocumentOperations
from arekit.common.experiment.formats.opinions import OpinionOperations
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.utils import progress_bar_iter

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


class BaseAnnotator(object):
"""
Performs annotation for a particular data_type
using OpinOps and DocOps API.
"""

def __init__(self):
logger.info("Init annotator: [{}]".format(self.__class__))

@property
def LabelsCount(self):
raise NotImplementedError()

# region private methods

def __iter_annotated_collections(self, data_type, filter_func, doc_ops, opin_ops):
assert(isinstance(doc_ops, DocumentOperations))
assert(isinstance(opin_ops, OpinionOperations))

docs_to_annot_list = filter(filter_func,
doc_ops.iter_doc_ids_to_annotate())

if len(docs_to_annot_list) == 0:
logger.info("[{}]: Nothing to annotate".format(data_type))
return

logged_parsed_news_iter = progress_bar_iter(
iterable=doc_ops.iter_parsed_news(docs_to_annot_list),
desc="Annotating parsed news [{}]".format(data_type))

for parsed_news in logged_parsed_news_iter:
assert(isinstance(parsed_news, ParsedNews))
yield parsed_news.RelatedNewsID, \
self._annot_collection_core(parsed_news=parsed_news, data_type=data_type,
doc_ops=doc_ops, opin_ops=opin_ops)

# endregion

def _annot_collection_core(self, parsed_news, data_type, doc_ops, opin_ops):
raise NotImplementedError

# region public methods

def serialize_missed_collections(self, data_type, doc_ops, opin_ops):
assert(isinstance(opin_ops, OpinionOperations))

filter_func = lambda doc_id: opin_ops.try_read_annotated_opinion_collection(
doc_id=doc_id, data_type=data_type) is None

annot_it = self.__iter_annotated_collections(
data_type,
filter_func,
doc_ops=doc_ops,
opin_ops=opin_ops)

for doc_id, collection in annot_it:
opin_ops.save_annotated_opinion_collection(collection=collection,
doc_id=doc_id,
data_type=data_type)

# endregion
4 changes: 4 additions & 0 deletions common/experiment/annot/base_annot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class BaseAnnotationAlgorithm(object):

def iter_opinions(self, parsed_news, entities_collection, existed_opinions=None):
pass
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
from arekit.common.entities.base import Entity
from arekit.common.entities.collection import EntityCollection
from arekit.common.experiment.neutral.algo.base import BaseNeutralAnnotationAlgorithm
from arekit.common.labels.base import NeutralLabel
from arekit.common.experiment.annot.base_annot import BaseAnnotationAlgorithm
from arekit.common.labels.base import NoLabel, Label
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.opinions.base import Opinion
from arekit.common.dataset.text_opinions.enums import DistanceType
from arekit.common.dataset.text_opinions.helper import TextOpinionHelper


class DefaultNeutralAnnotationAlgorithm(BaseNeutralAnnotationAlgorithm):
class DefaultSingleLabelAnnotationAlgorithm(BaseAnnotationAlgorithm):
"""
Neutral annotation algorithm which assumes to compose pairs
within a sentence which are not a part of sentiment.
"""

def __init__(self, dist_in_terms_bound, ignored_entity_values=None):
def __init__(self, dist_in_terms_bound, label_instance, dist_in_sents=0, ignored_entity_values=None):
"""
dist_in_terms_bound: int
max allowed distance in term (less than passed value)
"""
assert(isinstance(ignored_entity_values, list) or ignored_entity_values is None)
assert(isinstance(dist_in_terms_bound, int) or dist_in_terms_bound is None)
assert(isinstance(label_instance, Label))
assert(isinstance(dist_in_sents, int))

self.__ignored_entity_values = [] if ignored_entity_values is None else ignored_entity_values
self.__dist_in_terms_bound = dist_in_terms_bound
self.__dist_in_sents = dist_in_sents
self.__label_instance = label_instance

# region private methods

Expand All @@ -37,7 +41,7 @@ def __is_ignored_entity_value(self, entity_value):
assert(isinstance(entity_value, unicode))
return entity_value in self.__ignored_entity_values

def __iter_opinions_between_entties(self, relevant_pairs, entities_collection):
def __iter_opinions_between_entities(self, relevant_pairs, entities_collection):
assert(isinstance(entities_collection, EntityCollection))

for e1 in entities_collection:
Expand All @@ -52,11 +56,11 @@ def __iter_opinions_between_entties(self, relevant_pairs, entities_collection):

opinion = Opinion(source_value=e1.Value,
target_value=e2.Value,
sentiment=NeutralLabel())
sentiment=self.__label_instance)

yield opinion

def __try_create_pair_key(self, parsed_news, e1, e2, sentiment_opinions):
def __try_create_pair_key(self, parsed_news, e1, e2, existed_opinions):
assert(isinstance(e1, Entity))
assert(isinstance(e2, Entity))

Expand All @@ -72,7 +76,7 @@ def __try_create_pair_key(self, parsed_news, e1, e2, sentiment_opinions):
e1=e1, e2=e2,
distance_type=DistanceType.InSentences)

if s_dist > 0:
if s_dist > self.__dist_in_sents:
return

t_dist = TextOpinionHelper.calc_dist_between_entities(parsed_news=parsed_news,
Expand All @@ -82,18 +86,18 @@ def __try_create_pair_key(self, parsed_news, e1, e2, sentiment_opinions):
if self.__dist_in_terms_bound is not None and t_dist > self.__dist_in_terms_bound:
return

if sentiment_opinions is not None:
if existed_opinions is not None:
o = Opinion(source_value=e1.Value,
target_value=e2.Value,
sentiment=NeutralLabel())
if sentiment_opinions.has_synonymous_opinion(opinion=o):
sentiment=self.__label_instance)
if existed_opinions.has_synonymous_opinion(opinion=o):
return

return self.__create_key_by_entity_pair(e1=e1, e2=e2)

# endregion

def iter_neutral_opinions(self, parsed_news, entities_collection, sentiment_opinions=None):
def iter_opinions(self, parsed_news, entities_collection, existed_opinions=None):
assert(isinstance(parsed_news, ParsedNews))
assert(isinstance(entities_collection, EntityCollection))

Expand All @@ -107,12 +111,12 @@ def iter_neutral_opinions(self, parsed_news, entities_collection, sentiment_opin

key = self.__try_create_pair_key(parsed_news=parsed_news,
e1=e1, e2=e2,
sentiment_opinions=sentiment_opinions)
existed_opinions=existed_opinions)

if key is None:
continue

relevant_pairs[key] = 0

return self.__iter_opinions_between_entties(relevant_pairs=relevant_pairs,
entities_collection=entities_collection)
return self.__iter_opinions_between_entities(relevant_pairs=relevant_pairs,
entities_collection=entities_collection)
16 changes: 5 additions & 11 deletions common/experiment/data/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from arekit.common.labels.scaler import BaseLabelScaler
from arekit.common.model.model_io import BaseModelIO


Expand All @@ -8,19 +7,10 @@ class DataIO(object):
(data-serialization, training, etc.).
"""

def __init__(self, labels_scaler, stemmer):
assert(isinstance(labels_scaler, BaseLabelScaler))
self.__labels_scale = labels_scaler
def __init__(self, stemmer):
self.__stemmer = stemmer
self.__model_io = None

@property
def LabelsScaler(self):
""" Declares the amount of labels utilized in experiment. The latter
is necessary for conversions from int (uint) to Labels and vice versa.
"""
return self.__labels_scale

@property
def ModelIO(self):
""" Provides model paths for the resources utilized during training process.
Expand All @@ -29,6 +19,10 @@ def ModelIO(self):
"""
return self.__model_io

@property
def LabelsCount(self):
raise NotImplementedError()

@property
def Stemmer(self):
return self.__stemmer
Expand Down
Loading

0 comments on commit b3b098b

Please sign in to comment.