Skip to content

Commit

Permalink
#376 refactoring, removed id-depended parsing methods
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jul 10, 2023
1 parent a62a025 commit b3e62ba
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 34 deletions.
4 changes: 4 additions & 0 deletions arekit/common/data/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
TEXT = 'text_a'
LABEL = 'label'

# Global identifier of the opinion in the sampled data.
OPINION_ID = "opinion_id"
OPINION_LINKAGE_ID = "linkage_id"

# Corresponds to fields with attitude ends. (values, STRING)
SOURCE = 'source'
TARGET = 'target'
Expand Down
4 changes: 4 additions & 0 deletions arekit/common/data/input/providers/rows/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def __assign_value(column, value):
index_in_linked=index_in_linked,
label_scaler=self._label_provider.LabelScaler)

row[const.OPINION_ID] = text_opinion_linkage.First.TextOpinionID

row[const.OPINION_LINKAGE_ID] = index_in_linked

row[const.DOC_ID] = text_opinion_linkage.First.DocID

row[const.SENT_IND] = sentence_ind
Expand Down
23 changes: 0 additions & 23 deletions arekit/common/data/row_ids/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,3 @@ def create_pattern(id_value, p_type):
def convert_sample_id_to_opinion_id(sample_id):
assert(isinstance(sample_id, str))
return sample_id[:sample_id.index(BaseIDProvider.INDEX[0])] + BaseIDProvider.INDEX.format(0)

# region 'parse' methods

@staticmethod
def _parse(row_id, pattern):
assert(isinstance(pattern, str))

_from = row_id.index(pattern[0]) + 1
_to = row_id.index(BaseIDProvider.SEPARATOR, _from, len(row_id))

return int(row_id[_from:_to])

@staticmethod
def parse_opinion_in_opinion_id(opinion_id):
assert(isinstance(opinion_id, str))
return BaseIDProvider._parse(opinion_id, BaseIDProvider.OPINION)

@staticmethod
def parse_opinion_in_sample_id(sample_id):
assert(isinstance(sample_id, str))
return BaseIDProvider._parse(sample_id, BaseIDProvider.OPINION)

# endregion
4 changes: 1 addition & 3 deletions arekit/common/data/views/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ def iter_from_storage(self, storage):

linked = []
current_opinion_id = undefined
for row_index, sample_id in enumerate(storage.iter_column_values(const.ID)):
sample_id = str(sample_id)
opinion_id = self.__row_ids_provider.parse_opinion_in_sample_id(sample_id)
for row_index, opinion_id in enumerate(storage.iter_column_values(const.OPINION_ID)):
if current_opinion_id != undefined:
if opinion_id != current_opinion_id:
yield linked
Expand Down
12 changes: 11 additions & 1 deletion arekit/contrib/networks/input/rows_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def __handle_text(value):
const.S_IND: lambda value: int(value),
const.T_IND: lambda value: int(value),
const.SENT_IND: lambda value: int(value),
const.OPINION_ID: lambda value: int(value),
const.OPINION_LINKAGE_ID: lambda value: int(value),
const.ENTITY_VALUES: lambda value: __process_values_list(value),
const.ENTITY_TYPES: lambda value: __process_values_list(value),
const.ENTITIES: lambda value: __process_indices_list(value),
Expand Down Expand Up @@ -82,7 +84,15 @@ def __value_or_none(self, key):
@property
def SampleID(self):
return self.__params[const.ID]


@property
def OpinionID(self):
return self.__params[const.OPINION_ID]

@property
def OpinionLinkageID(self):
return self.__params[const.OPINION_LINKAGE_ID]

@property
def Terms(self):
return self.__params[const.TEXT]
Expand Down
9 changes: 2 additions & 7 deletions arekit/contrib/utils/data/views/linkages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ def __init__(self, ids_provider, storage):

# region private methods

def __iter_doc_opinion_ids(self, row_ids):
for row_id in row_ids:
yield self._ids_provider.parse_opinion_in_opinion_id(row_id)

def __iter_opinions_by_linkages(self, linkages_df, opinions_view):
for df_linkage in linkages_df:
assert (isinstance(df_linkage, pd.DataFrame))
Expand All @@ -42,9 +38,8 @@ def _iter_by_opinions(self, linked_df, opinions_view):

def iter_opinion_linkages(self, doc_id, opinions_view):
assert(isinstance(opinions_view, BaseOpinionStorageView))
doc_df = self._storage.find_by_value(column_name=const.DOC_ID, value=doc_id)
row_ids = [row_id for row_id in doc_df[const.ID]] # TODO. Adopt storage.
doc_opin_ids = self.__iter_doc_opinion_ids(row_ids=row_ids)
doc_df = self._storage.find_by_value(column_name=const.OPINION_ID, value=doc_id)
doc_opin_ids = [opinion_id for opinion_id in doc_df[const.OPINION_ID]]

doc_opin_id_patterns = map(
lambda opinion_id: self._ids_provider.create_pattern(id_value=opinion_id, p_type=BaseIDProvider.OPINION),
Expand Down

0 comments on commit b3e62ba

Please sign in to comment.