Skip to content

Commit

Permalink
#250 done
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jun 3, 2022
1 parent 7cf1ab9 commit 458062b
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 30 deletions.
2 changes: 1 addition & 1 deletion arekit/common/experiment/api/ctx_serialization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from arekit.common.experiment.annot.base import BaseAnnotator
from arekit.common.experiment.api.ctx_base import ExperimentContext
from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.common.opinions.annot.base import BaseAnnotator


class ExperimentSerializationContext(ExperimentContext):
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from arekit.common.experiment.annot.algo.base import BaseAnnotationAlgorithm
from arekit.common.labels.provider.base import BasePairLabelProvider
from arekit.common.news.entity import DocumentEntity
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.news.parsed.providers.entity_service import EntityServiceProvider, DistanceType
from arekit.common.news.parsed.providers.opinion_pairs import OpinionPairsProvider
from arekit.common.opinions.annot.algo.base import BaseAnnotationAlgorithm
from arekit.common.opinions.base import Opinion


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ class BaseAnnotator(object):
def __init__(self):
logger.info("Init annotator: [{}]".format(self.__class__))

def _annot_collection_core(self, parsed_news, data_type, opin_ops):
def _annot_collection_core(self, parsed_news, data_type):
raise NotImplementedError

# region public methods

def annotate_collection(self, data_type, parsed_news, opin_ops):
def annotate_collection(self, data_type, parsed_news):
return parsed_news.RelatedDocID, \
self._annot_collection_core(parsed_news=parsed_news, data_type=data_type, opin_ops=opin_ops)
self._annot_collection_core(parsed_news=parsed_news, data_type=data_type)

# endregion
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging

from arekit.common.experiment.annot.algo.base import BaseAnnotationAlgorithm
from arekit.common.experiment.annot.base import BaseAnnotator
from arekit.common.experiment.api.ops_opin import OpinionOperations
from arekit.common.experiment.data_type import DataType
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.opinions.annot.algo.base import BaseAnnotationAlgorithm
from arekit.common.opinions.annot.base import BaseAnnotator
from arekit.common.opinions.collection import OpinionCollection

logger = logging.getLogger(__name__)
Expand All @@ -15,25 +14,34 @@ class DefaultAnnotator(BaseAnnotator):
""" Algorithm-based annotator
"""

def __init__(self, annot_algo):
super(DefaultAnnotator, self).__init__()
def __init__(self, annot_algo, create_empty_collection_func, get_doc_etalon_opins_func):
"""
create_empty_collection_func:
function that creates an empty opinion collection
get_doc_etalon_opins_func:
function that provides etalon opinions for a document
"""
assert(isinstance(annot_algo, BaseAnnotationAlgorithm))
assert(callable(get_doc_etalon_opins_func))
super(DefaultAnnotator, self).__init__()

self.__annot_algo = annot_algo
self.__get_doc_etalon_opins_func = get_doc_etalon_opins_func
self.__create_empty_collection_func = create_empty_collection_func

# region private methods

def _annot_collection_core(self, parsed_news, data_type, opin_ops):
def _annot_collection_core(self, parsed_news, data_type):
assert(isinstance(parsed_news, ParsedNews))
assert(isinstance(data_type, DataType))
assert(isinstance(opin_ops, OpinionOperations))

opinions = opin_ops.get_etalon_opinion_collection(doc_id=parsed_news.RelatedDocID)
opinions = self.__get_doc_etalon_opins_func(parsed_news.RelatedDocID)

annotated_opins_it = self.__annot_algo.iter_opinions(
parsed_news=parsed_news,
existed_opinions=opinions if data_type == DataType.Train else None)

collection = opin_ops.create_opinion_collection(None)
collection = self.__create_empty_collection_func()
assert(isinstance(collection, OpinionCollection))

# Filling. Keep all the opinions without duplications.
Expand Down
2 changes: 1 addition & 1 deletion arekit/contrib/experiment_rusentrel/annot/algo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm
from arekit.common.labels.provider.constant import ConstantLabelProvider
from arekit.common.opinions.annot.algo.pair_based import PairBasedAnnotationAlgorithm
from arekit.contrib.experiment_rusentrel.labels.types import ExperimentNeutralLabel


Expand Down
11 changes: 7 additions & 4 deletions arekit/contrib/experiment_rusentrel/annot/factory.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from arekit.common.experiment.annot.default import DefaultAnnotator
from arekit.common.opinions.annot.default import DefaultAnnotator
from arekit.contrib.experiment_rusentrel.annot.two_scale import TwoScaleTaskAnnotator


class ExperimentAnnotatorFactory:

@staticmethod
def create(labels_count, create_algo):
def create(labels_count, create_algo, create_empty_collection_func, get_doc_etalon_opins_func):
assert(isinstance(labels_count, int))
assert(callable(create_algo))

if labels_count == 2:
return TwoScaleTaskAnnotator()
return TwoScaleTaskAnnotator(create_empty_collection_func=create_empty_collection_func,
get_doc_etalon_opins_func=get_doc_etalon_opins_func)
else:
return DefaultAnnotator(create_algo())
return DefaultAnnotator(create_algo(),
create_empty_collection_func=create_empty_collection_func,
get_doc_etalon_opins_func=get_doc_etalon_opins_func)
29 changes: 19 additions & 10 deletions arekit/contrib/experiment_rusentrel/annot/two_scale.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging

from arekit.common.experiment.annot.base import BaseAnnotator
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.opinions.annot.base import BaseAnnotator
from arekit.common.opinions.base import Opinion
from arekit.common.opinions.collection import OpinionCollection
from arekit.common.experiment.data_type import DataType
from arekit.contrib.experiment_rusentrel.labels.types import ExperimentNeutralLabel

Expand All @@ -14,24 +13,34 @@ class TwoScaleTaskAnnotator(BaseAnnotator):
""" For two scale classification task.
"""

def __init__(self):
def __init__(self, create_empty_collection_func, get_doc_etalon_opins_func):
"""
create_empty_collection_func:
function that creates an empty opinion collection
get_doc_etalon_opinions_func:
obtains opinion collection by a given document id
"""
assert(callable(create_empty_collection_func))
assert(callable(get_doc_etalon_opins_func))
super(TwoScaleTaskAnnotator, self).__init__()

self.__create_empty_collection_func = create_empty_collection_func
self.__get_doc_etalon_opins_func = get_doc_etalon_opins_func

# region static methods

def _annot_collection_core(self, parsed_news, data_type, opin_ops):
def _annot_collection_core(self, parsed_news, data_type):
assert(isinstance(parsed_news, ParsedNews))
assert(isinstance(data_type, DataType))

doc_id = parsed_news.RelatedDocID
neut_collection = opin_ops.create_opinion_collection()
assert(isinstance(neut_collection, OpinionCollection))
neut_collection = self.__create_empty_collection_func()

# We copy all the opinions from etalon collection
# into neutral one with the replaced sentiment values.
# as we treat such opinions as neutral one since only NeutralLabels
# could be casted into correct string.
for opinion in opin_ops.get_etalon_opinion_collection(doc_id):
for opinion in self.__get_doc_etalon_opins_func(doc_id):
neut_collection.add_opinion(Opinion(source_value=opinion.SourceValue,
target_value=opinion.TargetValue,
sentiment=ExperimentNeutralLabel()))
Expand All @@ -42,13 +51,13 @@ def _annot_collection_core(self, parsed_news, data_type, opin_ops):

# region public methods

def annotate_collection(self, data_type, parsed_news, opin_ops):
def annotate_collection(self, data_type, parsed_news):

if data_type == DataType.Train:
# Return empty collection.
return opin_ops.create_opinion_collection()
return self.__create_empty_collection_func()

super(TwoScaleTaskAnnotator, self).annotate_collection(
data_type, parsed_news=parsed_news, opin_ops=opin_ops)
data_type=data_type, parsed_news=parsed_news)

# endregion
2 changes: 1 addition & 1 deletion arekit/contrib/utils/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from arekit.common.data.input.sample import InputSampleBase
from arekit.common.experiment.annot.base import BaseAnnotator
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
from arekit.common.news.parsed.providers.entity_service import EntityServiceProvider
from arekit.common.news.parsed.providers.text_opinion_pairs import TextOpinionPairsProvider
from arekit.common.news.parsed.service import ParsedNewsService
from arekit.common.opinions.annot.base import BaseAnnotator
from arekit.common.opinions.base import Opinion
from arekit.common.pipeline.item_map import MapPipelineItem
from arekit.common.pipeline.items.flatten import FlattenIterPipelineItem
Expand Down

0 comments on commit 458062b

Please sign in to comment.