Skip to content

Commit

Permalink
#172 Updated, #240
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Dec 24, 2021
1 parent da585af commit 309691f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 29 deletions.
29 changes: 8 additions & 21 deletions arekit/common/data/views/ouput_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@


class BaseOutputView(BaseStorageView):
""" Results output represents a table, which stored in pandas dataframe.
This dataframe assumes to provide the following columns:
- id -- is a row identifier, which is compatible with row_inds in serialized opinions.
- doc_id -- is an id, towards which the output corresponds to.
- labels -- uint labels (amount of columns depends on the scaler)
"""

def __init__(self, ids_provider, storage):
assert(isinstance(ids_provider, BaseIDProvider))
Expand All @@ -41,16 +35,15 @@ def __iter_doc_opinion_ids(self, doc_df):
return [self._ids_provider.parse_opinion_in_opinion_id(row_id)
for row_id in doc_df[const.ID]]

def __iter_doc_ids(self):
return set(self._storage.iter_column_values(column_name=const.DOC_ID))
def __iter_opinions_by_linkages(self, linkages_df, opinions_view):
for df_linkage in linkages_df:
assert (isinstance(df_linkage, pd.DataFrame))
yield self._iter_by_opinions(linked_df=df_linkage, opinions_view=opinions_view)

# endregion

# region protected methods

def _get_column_header(self):
raise NotImplementedError()

def _iter_by_opinions(self, linked_df, opinions_view):
raise NotImplementedError()

Expand All @@ -71,23 +64,17 @@ def _compose_opinion_by_opinion_id(self, sample_id, opinions_view, calc_label_fu
# region public methods

def iter_doc_ids(self):
return self.__iter_doc_ids()
return set(self._storage.iter_column_values(column_name=const.DOC_ID))

def iter_opinion_linkages(self, doc_id, opinions_view):
assert(isinstance(opinions_view, BaseOpinionStorageView))
doc_df = self._storage.find_by_value(column_name=const.DOC_ID, value=doc_id)

doc_opin_ids = self.__iter_doc_opinion_ids(doc_df)
doc_opin_id_patterns = self.__iter_id_patterns(doc_opin_ids)
linkages_df = self.__iter_opinion_linkages_df(doc_df=doc_df,
row_ids=doc_opin_id_patterns)

for df_linkage in linkages_df:
assert (isinstance(df_linkage, pd.DataFrame))

opinions_iter = self._iter_by_opinions(linked_df=df_linkage,
opinions_view=opinions_view)
linkages_df = self.__iter_opinion_linkages_df(doc_df=doc_df, row_ids=doc_opin_id_patterns)
opinions_iter = self.__iter_opinions_by_linkages(linkages_df, opinions_view=opinions_view)

yield OpinionsLinkage(linked_data=opinions_iter)
return map(lambda opinions: OpinionsLinkage(opinions), opinions_iter)

# endregion
10 changes: 5 additions & 5 deletions arekit/common/data/views/output_multiple.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ def __init__(self, labels_scaler, storage):

# region private methods

def __get_column_header(self):
return [str(self.__labels_scaler.label_to_uint(label))
for label in self.__labels_scaler.ordered_suppoted_labels()]

def __calculate_label(self, row):
"""
Using a single row (probabilities by each class)
"""
labels_prob = [row[label] for label in self._get_column_header()]
labels_prob = [row[label] for label in self.__get_column_header()]
return self.__labels_scaler.uint_to_label(value=np.argmax(labels_prob))

# endregion

# region protected methods

def _get_column_header(self):
return [str(self.__labels_scaler.label_to_uint(label))
for label in self.__labels_scaler.ordered_suppoted_labels()]

def _iter_by_opinions(self, linked_df, opinions_view):
assert(isinstance(linked_df, pd.DataFrame))
assert(isinstance(opinions_view, BaseOpinionStorageView))
Expand Down
3 changes: 0 additions & 3 deletions arekit/contrib/bert/views/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ def __iter_linked_opinion_indices(self, linked_df):

# region protected methods

def _get_column_header(self):
return [BertBinaryOutputView.NO, BertBinaryOutputView.YES]

def _iter_by_opinions(self, linked_df, opinions_view):
assert(isinstance(linked_df, pd.DataFrame))
assert(isinstance(opinions_view, BaseOpinionStorageView))
Expand Down

0 comments on commit 309691f

Please sign in to comment.