Skip to content

Commit

Permalink
#215 Done.
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Nov 29, 2021
1 parent c1c9ea4 commit c199921
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 15 deletions.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
from arekit.common.languages.ru.mods import RussianLanguageMods
from arekit.common.text_frame_variant import TextFrameVariant
from arekit.common.languages.mods import BaseLanguageMods
from arekit.processing.text.enums import TermFormat
from arekit.processing.text.parsed import ParsedText


class FrameVariantsSearcher(object):
class FrameVariantsAnnotationHelper(object):

# region private methods

Expand All @@ -24,14 +22,14 @@ def __get_preposition(terms, index):
# endregion

@staticmethod
def iter_frames_from_parsed_text(frame_variants, parsed_text, locale_mods=RussianLanguageMods):
def iter_frames_from_lemmas(frame_variants, lemmas, locale_mods=RussianLanguageMods):
"""
Considered to perform frames annotation across lemmatized terms.
"""
assert(isinstance(frame_variants, FrameVariantsCollection))
assert(isinstance(parsed_text, ParsedText))
assert(isinstance(lemmas, list))
assert(issubclass(locale_mods, BaseLanguageMods))

lemmas = [locale_mods.replace_specific_word_chars(lemma) if isinstance(lemma, str) else lemma
for lemma in parsed_text.iter_terms(term_format=TermFormat.Lemma)]

start_ind = 0
last_ind = 0
max_variant_len = max([len(variant) for _, variant in frame_variants.iter_variants()])
Expand All @@ -43,7 +41,7 @@ def iter_frames_from_parsed_text(frame_variants, parsed_text, locale_mods=Russia
if not(last_ind < len(lemmas)):
continue

is_all_words_within = FrameVariantsSearcher.__check_all_words_within(
is_all_words_within = FrameVariantsAnnotationHelper.__check_all_words_within(
terms=lemmas,
start_index=start_ind,
last_index=last_ind)
Expand All @@ -56,8 +54,8 @@ def iter_frames_from_parsed_text(frame_variants, parsed_text, locale_mods=Russia
if not frame_variants.has_variant(ctx_value):
continue

prep_term = FrameVariantsSearcher.__get_preposition(terms=lemmas,
index=start_ind)
prep_term = FrameVariantsAnnotationHelper.__get_preposition(terms=lemmas,
index=start_ind)

yield TextFrameVariant(
variant=frame_variants.get_variant_by_value(ctx_value),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from arekit.common.languages.ru.mods import RussianLanguageMods
from arekit.common.text_frame_variant import TextFrameVariant
from arekit.common.languages.mods import BaseLanguageMods
from arekit.common.frame_variants.search import FrameVariantsSearcher
from arekit.processing.frames.annot import FrameVariantsAnnotationHelper
from arekit.processing.text.enums import TermFormat
from arekit.processing.text.parsed import ParsedText

Expand Down Expand Up @@ -41,9 +41,12 @@ def parse_frames_in_parsed_text(frame_variants_collection, parsed_text, locale_m
assert(isinstance(parsed_text, ParsedText))
assert(issubclass(locale_mods, BaseLanguageMods))

frame_variants_iter = FrameVariantsSearcher.iter_frames_from_parsed_text(
lemmas = [locale_mods.replace_specific_word_chars(lemma) if isinstance(lemma, str) else lemma
for lemma in parsed_text.iter_terms(term_format=TermFormat.Lemma)]

frame_variants_iter = FrameVariantsAnnotationHelper.iter_frames_from_lemmas(
frame_variants=frame_variants_collection,
parsed_text=parsed_text,
lemmas=lemmas,
locale_mods=locale_mods)

if frame_variants_iter is None:
Expand Down
2 changes: 1 addition & 1 deletion arekit/processing/text/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

from arekit.common.entities.base import Entity
from arekit.common.frame_variants.collection import FrameVariantsCollection
from arekit.common.frame_variants.parse import FrameVariantsParser
from arekit.common.news.base import News
from arekit.common.news.parse_options import NewsParseOptions
from arekit.common.news.parsed.base import ParsedNews
from arekit.common.utils import split_by_whitespaces
from arekit.processing.frames.parser import FrameVariantsParser
from arekit.processing.text.parsed import ParsedText
from arekit.processing.text.tokens import Tokens
from arekit.processing.text.token import Token
Expand Down

0 comments on commit c199921

Please sign in to comment.