Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 110 lines (88 sloc) 4.36 KB
#!/usr/bin/env python3
import concurrent.futures
import argparse
import os
import fnmatch
import regex
from bs4 import BeautifulSoup
import se
def is_positive_integer(value: str) -> int:
"""
Helper function for argparse.
Raise an exception if value is not a positive integer.
"""
int_value = int(value)
if int_value <= 0:
raise argparse.ArgumentTypeError("{} is not a positive integer".format(value))
return int_value
def main():
parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--increment", "-i", action="store_true", help="increment the target endnote number and all following endnotes")
group.add_argument("--decrement", "-d", action="store_true", help="decrement the target endnote number and all following endnotes")
parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=is_positive_integer, help="the endnote number to start reordering at")
parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
args = parser.parse_args()
directory = os.path.abspath(args.directory)
target_endnote_number = int(args.target_endnote_number)
endnote_count = 0
source_directory = os.path.join(directory, "src")
if args.increment:
step = 1
else:
step = -1
# First get the highest numbered endnote
try:
endnotes_filename = os.path.join(source_directory, "epub", "text", "endnotes.xhtml")
with open(endnotes_filename, "r+", encoding="utf-8") as file:
xhtml = file.read()
soup = BeautifulSoup(xhtml, "lxml")
endnote_count = len(soup.select("li[id^=note-]"))
if args.increment:
note_range = range(endnote_count, target_endnote_number - 1, -1)
else:
note_range = range(target_endnote_number, endnote_count + 1, 1)
for endnote_number in note_range:
xhtml = xhtml.replace("id=\"note-{}\"".format(endnote_number), "id=\"note-{}\"".format(endnote_number + step), 1)
xhtml = xhtml.replace("#noteref-{}\"".format(endnote_number), "#noteref-{}\"".format(endnote_number + step), 1)
# There may be some links within the notes that refer to other endnotes.
# These potentially need incrementing / decrementing too. This code assumes
# a link that looks something like <a href="#note-1">note 1</a>.
endnote_links = regex.findall(r"href=\"#note-(\d+)\"(.*?) (\d+)</a>", xhtml)
for link in endnote_links:
link_number = int(link[0])
if (link_number < target_endnote_number and args.increment) or (link_number > target_endnote_number and not args.increment):
continue
xhtml = xhtml.replace("href=\"#note-{0}\"{1} {0}</a>".format(link[0], link[1]), "href=\"#note-{0}\"{1} {0}</a>".format(link_number + step, link[1]))
file.seek(0)
file.write(xhtml)
file.truncate()
except Exception:
se.print_error("Couldn’t open endnotes file: {}".format(endnotes_filename))
exit(1)
with concurrent.futures.ProcessPoolExecutor() as executor:
for root, _, filenames in os.walk(source_directory):
for filename in fnmatch.filter(filenames, "*.xhtml"):
# Skip endnotes.xhtml since we already processed it
if filename == "endnotes.xhtml":
continue
executor.submit(process_endnotes_in_file, filename, root, note_range, step)
def process_endnotes_in_file(filename: str, root: str, note_range: range, step: int):
with open(os.path.join(root, filename), "r+", encoding="utf-8") as file:
xhtml = file.read()
processed_xhtml = xhtml
processed_xhtml_is_modified = False
for endnote_number in note_range:
# If we’ve already changed some notes and can’t find the next then we don’t need to continue searching
if not "id=\"noteref-{}\"".format(endnote_number) in processed_xhtml and processed_xhtml_is_modified:
break
processed_xhtml = processed_xhtml.replace("id=\"noteref-{}\"".format(endnote_number), "id=\"noteref-{}\"".format(endnote_number + step), 1)
processed_xhtml = processed_xhtml.replace("#note-{}\"".format(endnote_number), "#note-{}\"".format(endnote_number + step), 1)
processed_xhtml = processed_xhtml.replace(">{}</a>".format(endnote_number), ">{}</a>".format(endnote_number + step), 1)
processed_xhtml_is_modified = processed_xhtml_is_modified or (processed_xhtml != xhtml)
if processed_xhtml_is_modified:
file.seek(0)
file.write(processed_xhtml)
file.truncate()
if __name__ == "__main__":
main()