Skip to content

Commit

Permalink
#534 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Nov 5, 2023
1 parent a9bbbfe commit d98d205
Show file tree
Hide file tree
Showing 16 changed files with 68 additions and 29 deletions.
4 changes: 2 additions & 2 deletions arekit/common/context/terms_mapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common.context.token import Token
from arekit.common.entities.base import Entity
Expand All @@ -10,7 +10,7 @@ class TextTermsMapper(object):
def iter_mapped(self, terms):
""" Performs mapping operation of each terms in a sequence
"""
assert(isinstance(terms, collections.Iterable))
assert(isinstance(terms, Iterable))

self._before_mapping()

Expand Down
7 changes: 4 additions & 3 deletions arekit/common/data/input/providers/rows/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
from collections import Counter
from collections.abc import Iterable
import logging

from arekit.common.data.input.providers.contents import ContentsProvider
Expand Down Expand Up @@ -39,9 +40,9 @@ def __iter_rows(self, linked_data, idle_mode):

def iter_by_rows(self, contents_provider, doc_ids_iter, idle_mode):
assert(isinstance(contents_provider, ContentsProvider))
assert(isinstance(doc_ids_iter, collections.Iterable))
assert(isinstance(doc_ids_iter, Iterable))

self.__rows_counter = collections.Counter()
self.__rows_counter = Counter()

for linked_data in contents_provider.from_doc_ids(doc_ids=doc_ids_iter, idle_mode=idle_mode):
assert(isinstance(linked_data, LinkedDataWrapper))
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/docs/parsed/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common.entities.base import Entity
from arekit.common.text.enums import TermFormat
Expand All @@ -25,7 +25,7 @@ def __init__(self, doc_id, parsed_sentences):
parsed_sentences: iterable of ParsedSentence type
NOTE: Considered sentences with labeled Entities in it!
"""
assert(isinstance(parsed_sentences, collections.Iterable))
assert(isinstance(parsed_sentences, Iterable))

self.__doc_id = doc_id
self.__parsed_sentences = list(parsed_sentences)
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/frames/variants/collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable
from arekit.common.frames.variants.base import FrameVariant


Expand All @@ -23,7 +23,7 @@ def __register_frame(frames_dict, frames_list, id):
# region public methods

def fill_from_iterable(self, variants_with_id, overwrite_existed_variant, raise_error_on_existed_variant):
assert(isinstance(variants_with_id, collections.Iterable))
assert(isinstance(variants_with_id, Iterable))
assert(isinstance(overwrite_existed_variant, bool))
assert(isinstance(raise_error_on_existed_variant, bool))
assert(len(self.__variants) == 0)
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/linkage/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import collections
from collections.abc import Iterable


class LinkedDataWrapper(object):

def __init__(self, linked_data):
assert(isinstance(linked_data, collections.Iterable))
assert(isinstance(linked_data, Iterable))
self.__linked_data = list(linked_data)
self.__tag = None

Expand Down
4 changes: 2 additions & 2 deletions arekit/common/opinions/collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common import log_utils
from arekit.common.labels.base import Label
Expand All @@ -23,7 +23,7 @@ def __init__(self, synonyms,
raise_exception_on_duplicates: bool
denotes whether there is a need to fire exception for duplicates in opinions list.
"""
assert(isinstance(opinions, collections.Iterable) or isinstance(opinions, type(None)))
assert(isinstance(opinions, Iterable) or isinstance(opinions, type(None)))
assert(isinstance(synonyms, SynonymsCollection))
assert(isinstance(error_on_duplicates, bool))
assert(isinstance(error_on_synonym_end_missed, bool))
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/synonyms/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common import log_utils

Expand All @@ -12,7 +12,7 @@ def __init__(self, iter_group_values_lists=None, is_read_only=True, debug=False)
debug: bool
utilized for logging the salient information during usage.
"""
assert(isinstance(iter_group_values_lists, collections.Iterable) or iter_group_values_lists is None)
assert(isinstance(iter_group_values_lists, Iterable) or iter_group_values_lists is None)
assert(isinstance(is_read_only, bool))
assert(isinstance(debug, bool))

Expand Down
4 changes: 2 additions & 2 deletions arekit/common/text/partitioning/str.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common.bound import Bound
from arekit.common.text.partitioning.base import BasePartitioning
Expand All @@ -11,7 +11,7 @@ class StringPartitioning(BasePartitioning):

def provide(self, text, parts_it):
assert(isinstance(text, str))
assert(isinstance(parts_it, collections.Iterable))
assert(isinstance(parts_it, Iterable))

start = 0
parts = []
Expand Down
4 changes: 2 additions & 2 deletions arekit/common/text/partitioning/terms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common.bound import Bound
from arekit.common.text.partitioning.base import BasePartitioning
Expand All @@ -11,7 +11,7 @@ class TermsPartitioning(BasePartitioning):

def provide(self, text, parts_it):
assert(isinstance(text, list))
assert(isinstance(parts_it, collections.Iterable))
assert(isinstance(parts_it, Iterable))

start = 0
parts = []
Expand Down
6 changes: 3 additions & 3 deletions arekit/contrib/networks/embedding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable
import numpy as np


Expand Down Expand Up @@ -31,7 +31,7 @@ def VocabularySize(self):

@classmethod
def from_word_embedding_pairs_iter(cls, word_embedding_pairs):
assert(isinstance(word_embedding_pairs, collections.Iterable))
assert(isinstance(word_embedding_pairs, Iterable))

matrix = []
words = []
Expand All @@ -51,7 +51,7 @@ def from_word_embedding_pairs_iter(cls, word_embedding_pairs):

@classmethod
def from_list_with_embedding_func(cls, words_iter, embedding_func):
assert(isinstance(words_iter, collections.Iterable))
assert(isinstance(words_iter, Iterable))
assert(callable(embedding_func))

matrix = []
Expand Down
4 changes: 2 additions & 2 deletions arekit/contrib/utils/io_utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable
import logging
from os.path import join, exists

Expand All @@ -25,7 +25,7 @@ def filename_template(data_type):


def check_targets_existence(targets):
assert (isinstance(targets, collections.Iterable))
assert (isinstance(targets, Iterable))

result = True
for filepath in targets:
Expand Down
6 changes: 3 additions & 3 deletions arekit/contrib/utils/pipelines/opinion_collections.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import collections
from collections.abc import Iterable

from arekit.common.labels.scaler.base import BaseLabelScaler
from arekit.common.linkage.base import LinkedDataWrapper
Expand All @@ -18,7 +18,7 @@ def __create_labeled_opinion(item, label):


def __linkages_to_opinions(linkages_iter, labels_helper, label_calc_mode):
assert(isinstance(linkages_iter, collections.Iterable))
assert(isinstance(linkages_iter, Iterable))

for linkage in linkages_iter:
assert(isinstance(linkage, LinkedDataWrapper))
Expand All @@ -31,7 +31,7 @@ def __linkages_to_opinions(linkages_iter, labels_helper, label_calc_mode):


def __fill_opinion_collection(opinions_iter, collection, supported_labels):
assert(isinstance(opinions_iter, collections.Iterable))
assert(isinstance(opinions_iter, Iterable))
assert(isinstance(collection, OpinionCollection))
assert(isinstance(supported_labels, set) or supported_labels is None)

Expand Down
5 changes: 3 additions & 2 deletions arekit/contrib/utils/serializer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections
import logging

from collections.abc import Iterable

from arekit.common.data.input.providers.columns.sample import SampleColumnsProvider
from arekit.common.data.input.providers.rows.base import BaseRowProvider
from arekit.common.data.input.repositories.base import BaseInputRepository
Expand Down Expand Up @@ -28,7 +29,7 @@ def create_samples_repo(keep_labels, rows_provider, storage):
@staticmethod
def fill_and_write(pipeline, repo, target, writer, doc_ids_iter, desc=""):
assert(isinstance(pipeline, BasePipeline))
assert(isinstance(doc_ids_iter, collections.Iterable))
assert(isinstance(doc_ids_iter, Iterable))
assert(isinstance(repo, BaseInputRepository))

doc_ids = list(doc_ids_iter)
Expand Down
29 changes: 29 additions & 0 deletions tests/text/test_nested_entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest

from arekit.common.text.parsed import BaseParsedText
from arekit.common.text.parser import BaseTextParser
from arekit.contrib.utils.pipelines.items.text.entities_default import TextEntitiesParser


class TestNestedEntities(unittest.TestCase):

def test(self):
s = """24 марта президент [США] [Джо-Байден] провел переговоры с
лидерами стран [Евросоюза] в [Брюсселе] , вызвав внимание рынка и предположения о
том, что [Америке] удалось уговорить [ЕС] совместно бойкотировать российские нефть
и газ. [[Европейский]-[Союз]] крайне зависим от [России] в плане поставок нефти и
газа."""

tep = TextEntitiesParser()

text_parser = BaseTextParser(pipeline=[
TextEntitiesParser(),
])

parsed_text = text_parser.run(s.split())
assert(isinstance(parsed_text, BaseParsedText))
print(parsed_text._terms)


if __name__ == '__main__':
unittest.main()
4 changes: 4 additions & 0 deletions tests/tutorials/test_tutorial_pipeline_sampling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,7 @@ def test(self):
source = join(self.__output_dir, "sample-train-0" + writer.extension())
storage = reader.read(source)
self.assertEqual(20, len(storage), "Amount of rows is non equal!")


if __name__ == '__main__':
unittest.main()
4 changes: 4 additions & 0 deletions tests/tutorials/test_tutorial_pipeline_sampling_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,7 @@ def test(self):
source = join(self.__output_dir, "sample-train-0" + writer.extension())
storage = reader.read(source)
self.assertEqual(20, len(storage), "Amount of rows is non equal!")


if __name__ == '__main__':
unittest.main()

0 comments on commit d98d205

Please sign in to comment.