Skip to content

Commit

Permalink
Merge pull request #89 from nicolay-r/0.20.5-rc
Browse files Browse the repository at this point in the history
0.20.5 rc
  • Loading branch information
nicolay-r authored Mar 11, 2021
2 parents d1a39ab + 8b331b0 commit ac07e88
Show file tree
Hide file tree
Showing 16 changed files with 235 additions and 88 deletions.
20 changes: 20 additions & 0 deletions common/evaluation/results/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,26 @@ def calc_precision(result_answers, answer_exist):
return 0.0 if answer_exist else 1.0


def calc_precision_micro(get_result_by_label_func, labels):
assert(callable(get_result_by_label_func))
assert(isinstance(labels, list))
results = [get_result_by_label_func(label) for label in labels]
tp_sum = sum([len(res.filter_comparison_true()) for res in results])
tp_fn_sum = sum([len(res) for res in results])
return (1.0 * tp_sum) / tp_fn_sum


def calc_recall_micro(get_origin_answers_by_label_func,
get_result_answers_by_label_func,
labels):
assert(callable(get_origin_answers_by_label_func))
assert(callable(get_result_answers_by_label_func))
results = [get_result_answers_by_label_func(label) for label in labels]
tp_sum = sum([len(res.filter_comparison_true()) for res in results])
tp_fp_sum = sum([len(get_origin_answers_by_label_func(label)) for label in labels])
return (1.0 * tp_sum) / tp_fp_sum


def calc_prec_and_recall(cmp_table,
label,
opinions_exist):
Expand Down
81 changes: 53 additions & 28 deletions common/evaluation/results/three_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from arekit.common.evaluation.results import metrics
from arekit.common.evaluation.results.base import BaseEvalResult
from arekit.common.evaluation.results.utils import calc_f1_3c, calc_f1_single_class
from arekit.common.evaluation.results.metrics import calc_precision_micro, calc_recall_micro
from arekit.common.evaluation.results.utils import calc_f1_3c_macro, calc_f1_single_class
from arekit.common.labels.base import NegativeLabel, PositiveLabel, NeutralLabel, Label
from arekit.common.opinions.collection import OpinionCollection

Expand All @@ -18,9 +19,12 @@ class ThreeClassEvalResult(BaseEvalResult):
C_POS_RECALL = u'pos_recall'
C_NEG_RECALL = u'neg_recall'
C_NEU_RECALL = u'neu_recall'
C_PREC_MICRO = u'prec_micro'
C_RECALL_MICRO = u'recall_micro'
C_F1_POS = u'f1_pos'
C_F1_NEG = u'f1_neg'
C_F1_NEU = u'f1_neu'
C_F1_MICRO = u'f1_micro'

def __init__(self):
super(ThreeClassEvalResult, self).__init__()
Expand Down Expand Up @@ -72,8 +76,8 @@ def reg_doc(self, cmp_pair, cmp_table):
opinions_exist=has_neu)

# Add document results.
f1 = calc_f1_3c(pos_prec=pos_prec, neg_prec=neg_prec, neu_prec=neu_prec,
pos_recall=pos_recall, neg_recall=neg_recall, neu_recall=neu_recall)
f1 = calc_f1_3c_macro(pos_prec=pos_prec, neg_prec=neg_prec, neu_prec=neu_prec,
pos_recall=pos_recall, neg_recall=neg_recall, neu_recall=neu_recall)

# Filling results.
doc_id = cmp_pair.DocumentID
Expand All @@ -85,40 +89,61 @@ def reg_doc(self, cmp_pair, cmp_table):
self.__doc_results[doc_id][self.C_POS_RECALL] = pos_recall
self.__doc_results[doc_id][self.C_NEG_RECALL] = neg_recall
self.__doc_results[doc_id][self.C_NEU_RECALL] = neu_recall
self.__doc_results[doc_id][self.C_PREC_MICRO] = calc_precision_micro(
get_result_by_label_func=cmp_table.filter_result_column_by_label,
labels=[self.__pos_label, self.__neg_label, self.__neu_label])
self.__doc_results[doc_id][self.C_RECALL_MICRO] = calc_recall_micro(
get_origin_answers_by_label_func=cmp_table.filter_original_column_by_label,
get_result_answers_by_label_func=cmp_table.filter_result_column_by_label,
labels=[self.__pos_label, self.__neg_label, self.__neu_label])

def calculate(self):
pos_prec, neg_prec, neu_prec, pos_recall, neg_recall, neu_recall = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
pos_prec_macro = 0.0
neg_prec_macro = 0.0
neu_prec_macro = 0.0
pos_recall_macro = 0.0
neg_recall_macro = 0.0
neu_recall_macro = 0.0
prec_micro_macro = 0.0
recall_micro_macro = 0.0

for info in self.__doc_results.itervalues():
pos_prec += info[self.C_POS_PREC]
neg_prec += info[self.C_NEG_PREC]
neu_prec += info[self.C_NEU_PREC]
pos_recall += info[self.C_POS_RECALL]
neg_recall += info[self.C_NEG_RECALL]
neu_recall += info[self.C_NEU_RECALL]
pos_prec_macro += info[self.C_POS_PREC]
neg_prec_macro += info[self.C_NEG_PREC]
neu_prec_macro += info[self.C_NEU_PREC]
pos_recall_macro += info[self.C_POS_RECALL]
neg_recall_macro += info[self.C_NEG_RECALL]
neu_recall_macro += info[self.C_NEU_RECALL]
prec_micro_macro += info[self.C_PREC_MICRO]
recall_micro_macro += info[self.C_RECALL_MICRO]

if len(self.__doc_results) > 0:
pos_prec /= len(self.__doc_results)
neg_prec /= len(self.__doc_results)
neu_prec /= len(self.__doc_results)
pos_recall /= len(self.__doc_results)
neg_recall /= len(self.__doc_results)
neu_recall /= len(self.__doc_results)

f1 = calc_f1_3c(pos_prec=pos_prec, neg_prec=neg_prec, neu_prec=neu_prec,
pos_recall=pos_recall, neg_recall=neg_recall, neu_recall=neu_recall)
pos_prec_macro /= len(self.__doc_results)
neg_prec_macro /= len(self.__doc_results)
neu_prec_macro /= len(self.__doc_results)
pos_recall_macro /= len(self.__doc_results)
neg_recall_macro /= len(self.__doc_results)
neu_recall_macro /= len(self.__doc_results)
prec_micro_macro /= len(self.__doc_results)
recall_micro_macro /= len(self.__doc_results)

f1 = calc_f1_3c_macro(pos_prec=pos_prec_macro, neg_prec=neg_prec_macro, neu_prec=neu_prec_macro,
pos_recall=pos_recall_macro, neg_recall=neg_recall_macro, neu_recall=neu_recall_macro)

# Filling total result.
self._total_result[self.C_F1] = f1
self._total_result[self.C_F1_POS] = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
self._total_result[self.C_F1_NEG] = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
self._total_result[self.C_F1_NEU] = calc_f1_single_class(prec=neu_prec, recall=neu_recall)
self._total_result[self.C_POS_PREC] = pos_prec
self._total_result[self.C_NEG_PREC] = neg_prec
self._total_result[self.C_NEU_PREC] = neu_prec
self._total_result[self.C_POS_RECALL] = pos_recall
self._total_result[self.C_NEG_RECALL] = neg_recall
self._total_result[self.C_NEU_RECALL] = neu_recall
self._total_result[self.C_F1_POS] = calc_f1_single_class(prec=pos_prec_macro, recall=pos_recall_macro)
self._total_result[self.C_F1_NEG] = calc_f1_single_class(prec=neg_prec_macro, recall=neg_recall_macro)
self._total_result[self.C_F1_NEU] = calc_f1_single_class(prec=neu_prec_macro, recall=neu_recall_macro)
self._total_result[self.C_POS_PREC] = pos_prec_macro
self._total_result[self.C_NEG_PREC] = neg_prec_macro
self._total_result[self.C_NEU_PREC] = neu_prec_macro
self._total_result[self.C_POS_RECALL] = pos_recall_macro
self._total_result[self.C_NEG_RECALL] = neg_recall_macro
self._total_result[self.C_NEU_RECALL] = neu_recall_macro
self._total_result[self.C_PREC_MICRO] = prec_micro_macro
self._total_result[self.C_RECALL_MICRO] = recall_micro_macro
self._total_result[self.C_F1_MICRO] = calc_f1_single_class(prec=prec_micro_macro, recall=recall_micro_macro)

def iter_document_results(self):
return self.__doc_results.iteritems()
48 changes: 24 additions & 24 deletions common/evaluation/results/two_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from arekit.common.evaluation.results import metrics
from arekit.common.evaluation.results.base import BaseEvalResult
from arekit.common.evaluation.results.utils import calc_f1_single_class, calc_f1
from arekit.common.evaluation.results.utils import calc_f1_single_class, calc_f1_macro
from arekit.common.labels.base import PositiveLabel, NegativeLabel, Label
from arekit.common.opinions.collection import OpinionCollection

Expand Down Expand Up @@ -53,10 +53,10 @@ def reg_doc(self, cmp_pair, cmp_table):
opinions_exist=has_neg)

# Add document results.
f1 = calc_f1(pos_prec=pos_prec,
neg_prec=neg_prec,
pos_recall=pos_recall,
neg_recall=neg_recall)
f1 = calc_f1_macro(pos_prec=pos_prec,
neg_prec=neg_prec,
pos_recall=pos_recall,
neg_recall=neg_recall)

# Filling results.
doc_id = cmp_pair.DocumentID
Expand All @@ -68,33 +68,33 @@ def reg_doc(self, cmp_pair, cmp_table):
self.__doc_results[doc_id][self.C_NEG_RECALL] = neg_recall

def calculate(self):
pos_prec, neg_prec, pos_recall, neg_recall = (0.0, 0.0, 0.0, 0.0)
pos_prec_macro, neg_prec_macro, pos_recall_macro, neg_recall_macro = (0.0, 0.0, 0.0, 0.0)

for info in self.__doc_results.itervalues():
pos_prec += info[self.C_POS_PREC]
neg_prec += info[self.C_NEG_PREC]
pos_recall += info[self.C_POS_RECALL]
neg_recall += info[self.C_NEG_RECALL]
pos_prec_macro += info[self.C_POS_PREC]
neg_prec_macro += info[self.C_NEG_PREC]
pos_recall_macro += info[self.C_POS_RECALL]
neg_recall_macro += info[self.C_NEG_RECALL]

if len(self.__doc_results) > 0:
pos_prec /= len(self.__doc_results)
neg_prec /= len(self.__doc_results)
pos_recall /= len(self.__doc_results)
neg_recall /= len(self.__doc_results)
pos_prec_macro /= len(self.__doc_results)
neg_prec_macro /= len(self.__doc_results)
pos_recall_macro /= len(self.__doc_results)
neg_recall_macro /= len(self.__doc_results)

f1 = calc_f1(pos_prec=pos_prec,
neg_prec=neg_prec,
pos_recall=pos_recall,
neg_recall=neg_recall)
f1 = calc_f1_macro(pos_prec=pos_prec_macro,
neg_prec=neg_prec_macro,
pos_recall=pos_recall_macro,
neg_recall=neg_recall_macro)

# Filling total result.
self._total_result[self.C_F1] = f1
self._total_result[self.C_F1_POS] = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
self._total_result[self.C_F1_NEG] = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
self._total_result[self.C_POS_PREC] = pos_prec
self._total_result[self.C_NEG_PREC] = neg_prec
self._total_result[self.C_POS_RECALL] = pos_recall
self._total_result[self.C_NEG_RECALL] = neg_recall
self._total_result[self.C_F1_POS] = calc_f1_single_class(prec=pos_prec_macro, recall=pos_recall_macro)
self._total_result[self.C_F1_NEG] = calc_f1_single_class(prec=neg_prec_macro, recall=neg_recall_macro)
self._total_result[self.C_POS_PREC] = pos_prec_macro
self._total_result[self.C_NEG_PREC] = neg_prec_macro
self._total_result[self.C_POS_RECALL] = pos_recall_macro
self._total_result[self.C_NEG_RECALL] = neg_recall_macro

def iter_document_results(self):
return self.__doc_results.iteritems()
18 changes: 9 additions & 9 deletions common/evaluation/results/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ def calc_f1_single_class(prec, recall):
return 0


def calc_f1(pos_prec, neg_prec, pos_recall, neg_recall):
f1_pos = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
f1_neg = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
return (f1_pos + f1_neg) * 1.0 / 2
def calc_f1_macro(pos_prec, neg_prec, pos_recall, neg_recall):
f1_pos_macro = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
f1_neg_macro = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
return (f1_pos_macro + f1_neg_macro) * 1.0 / 2


def calc_f1_3c(pos_prec, neg_prec, neu_prec, pos_recall, neg_recall, neu_recall):
f1_pos = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
f1_neg = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
f1_neu = calc_f1_single_class(prec=neu_prec, recall=neu_recall)
return (f1_pos + f1_neg + f1_neu) * 1.0 / 3
def calc_f1_3c_macro(pos_prec, neg_prec, neu_prec, pos_recall, neg_recall, neu_recall):
f1_pos_macro = calc_f1_single_class(prec=pos_prec, recall=pos_recall)
f1_neg_macro = calc_f1_single_class(prec=neg_prec, recall=neg_recall)
f1_neu_macro = calc_f1_single_class(prec=neu_prec, recall=neu_recall)
return (f1_pos_macro + f1_neg_macro + f1_neu_macro) * 1.0 / 3
11 changes: 11 additions & 0 deletions common/experiment/formats/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, exp_data, experiment_io, opin_ops, doc_ops, name, extra_name_
assert(isinstance(doc_ops, DocumentOperations))
assert(isinstance(name, unicode))
assert(isinstance(extra_name_suffix, unicode))

self.__experiment_data = exp_data
self.__experiment_io = experiment_io
self.__opin_operations = opin_ops
Expand Down Expand Up @@ -62,6 +63,16 @@ def DocumentOperations(self):

# endregion

def _init_log_flag(self, do_log):
assert(isinstance(do_log, bool))
self._do_log = do_log

def log_info(self, message, forced=False):
assert (isinstance(message, unicode))
if not self._do_log and not forced:
return
logger.info(message)

def entity_to_group(self, entity):
""" This function provides provides integer group for a particular entity
This grouping method assumes to be implmented in a nested experiment.
Expand Down
2 changes: 1 addition & 1 deletion common/experiment/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_target_dir(self):
return join_dir_with_subfolder_name(subfolder_name=self.__get_experiment_folder_name(),
dir=self.get_experiment_sources_dir())

def get_experiment_folder(self):
def get_experiment_folder_name(self):
return self.__get_experiment_folder_name()

# region protected methods
Expand Down
10 changes: 9 additions & 1 deletion contrib/bert/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# AREbert input TSV formatters
# ARElm 0.20.5

<p align="center">
<img src="logo.png"/>
</p>

This contributional project provides functionality realted to language model aspects
of sentiment attitude extraction task.

3 changes: 3 additions & 0 deletions contrib/bert/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ def set_iter_index(self, it_index):
def set_log_dir(self, target_dir):
raise NotImplementedError()

def check_log_exists(self):
raise NotImplementedError()

def write_results(self, result, data_type, epoch_index):
raise NotImplementedError()

Expand Down
Binary file added contrib/bert/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions contrib/bert/output/eval_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@ class EvalHelper(object):
""" Specific provide for results evaluation.
"""

def get_results_dir(self, target_dir):
raise NotImplementedError()

def get_results_filename(self, iter_index, epoch_index):
raise NotImplementedError()
Loading

0 comments on commit ac07e88

Please sign in to comment.