Skip to content

Commit

Permalink
Refactoring #178
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Aug 24, 2021
1 parent 272fbdf commit 5142044
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 18 deletions.
26 changes: 14 additions & 12 deletions arekit/common/news/base.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
from arekit.common.news.objects_parser import BaseObjectsParser


class News(object):

def __init__(self, news_id, sentences, entities_parser):
def __init__(self, news_id, sentences):
assert(isinstance(news_id, int))
assert(isinstance(sentences, list))
assert(isinstance(entities_parser, BaseObjectsParser))
self.__news_id = news_id
self.__entities_parser = entities_parser

self.__news_id = news_id
self._sentences = sentences

# region properties
Expand All @@ -29,13 +24,10 @@ def SentencesCount(self):

# endregion

def parse_sentence(self, sent_ind):
def sentence_to_terms_list(self, sent_ind):
assert(isinstance(sent_ind, int))
sentence = self._sentences[sent_ind]
return self.__entities_parser.parse(sentence)

def get_entities_collection(self):
raise NotImplementedError("Document does not support entities collection generation.")
return self._sentence_to_terms_list_core(sentence)

def iter_sentences(self, return_text):
"""
Expand All @@ -56,3 +48,13 @@ def extract_linked_text_opinions(self, opinion):
is an iterable opinions that should be used to find a related text_opinion entries.
"""
raise NotImplementedError()

def get_entities_collection(self):
raise NotImplementedError("Document does not support entities collection generation.")

@staticmethod
def _sentence_to_terms_list_core(sentence):
"""
pipeline processing application towards the particular sentence.
"""
raise NotImplementedError()
6 changes: 6 additions & 0 deletions arekit/common/news/objects_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,9 @@ def parse(self, sentence):
entries.extend(last_part)

return entries

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
pass
9 changes: 6 additions & 3 deletions arekit/contrib/source/rusentrel/news/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ def __init__(self, doc_id, sentences, entities):
assert(isinstance(sentences, list))
assert(isinstance(entities, RuSentRelDocumentEntityCollection))

super(RuSentRelNews, self).__init__(news_id=doc_id,
sentences=sentences,
entities_parser=RuSentRelTextEntitiesParser())
super(RuSentRelNews, self).__init__(news_id=doc_id, sentences=sentences)

self.__entities = entities

Expand Down Expand Up @@ -142,4 +140,9 @@ def extract_linked_text_opinions(self, opinion):
def get_entities_collection(self):
return self.__entities

@staticmethod
def _sentence_to_terms_list_core(sentence):
with RuSentRelTextEntitiesParser() as parser:
return parser.parse(sentence)

# endregion
2 changes: 1 addition & 1 deletion arekit/processing/text/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __parse_sentence(news, sent_ind, parse_options):

if parse_options.ParseEntities:
# Providing a modified list with parsed unicode terms.
terms_list = news.parse_sentence(sent_ind)
terms_list = news.sentence_to_terms_list(sent_ind)
return TextParser.__parse_string_list(terms_iter=terms_list,
skip_term=lambda term: isinstance(term, Entity),
# TODO. Declare Stemmer within a derived parse options.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_ruattitudes_news_text_parsing(self):

# Parse single sentence.
assert(isinstance(news, RuAttitudesNews))
parsed_text = news.parse_sentence(0)
parsed_text = news.sentence_to_terms_list(0)
self.__print_parsed_text(parsed_text)

# Parse news via external parser.
Expand All @@ -46,7 +46,7 @@ def test_rusentrel_news_text_parsing(self):
version=version)

assert(isinstance(news, RuSentRelNews))
parsed_text = news.parse_sentence(8)
parsed_text = news.sentence_to_terms_list(8)
self.__print_parsed_text(parsed_text)

# Parse news via external parser.
Expand Down

0 comments on commit 5142044

Please sign in to comment.