-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils_comments.py
35 lines (25 loc) 路 1.05 KB
/
utils_comments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from utils_ceb import CEBApi
from utils_gd import GuttenbergDialogApi
def iter_text_comments(speakers, book_path_func):
assert(isinstance(speakers, set))
assert(callable(book_path_func))
g_api = GuttenbergDialogApi()
for k in range(3):
for book_id, comments in g_api.filter_comment_with_speaker_at_k(book_path_func=book_path_func, k=k):
for comment in comments:
# Seek for speaker in a comment.
for term in comment.split():
if GuttenbergDialogApi.is_character(term):
if CEBApi.speaker_variant_to_speaker(term) in speakers:
yield comment, [term]
break
def mask_text_entities(text, mask_template="_"):
g_api = GuttenbergDialogApi()
terms = text.split(' ')
inds_to_mask = []
for term_ind, term in enumerate(terms):
if g_api.has_character(term):
inds_to_mask.append(term_ind)
for i in inds_to_mask:
terms[i] = mask_template
return " ".join(terms)