-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
split_text.py
44 lines (35 loc) · 1.28 KB
/
split_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python3
""" Splits a book of the bible according to the story json object """
import re
def gen_book(bookfile):
"""
Takes an opened SFM file and reads it into a list of lists, such that any
verse can be indexed by chapter and verse in the format
`book[chapter][verse]`
"""
book = []
for line in bookfile:
if re.match(r"\\c", line):
book.append([])
elif re.match(r"\\v", line):
book[-1].append(re.sub(r"\\v [0-9]+ ", "", line).strip())
return book
def flatten(double_list, start_pair, end_pair):
""" flattens a list of lists from the start to the end """
return (
double_list[start_pair[0]][start_pair[1] :]
+ sum(double_list[start_pair[0] + 1 : end_pair[0]], [])
+ double_list[end_pair[0]][: end_pair[1]]
)
def split_texts(story, book):
"""
returns a list of individiaul pages based at the JSON story object and a
book in the form List[chapter][verse]
"""
verse_pages = []
for page in story["pages"]:
nonnumber = r"[^0-9]"
start = list(map(int, re.split(nonnumber, page["ref_start"])))
end = list(map(int, re.split(nonnumber, page["ref_end"])))
verse_pages.append("".join(flatten(book, start, end)))
return verse_pages