Code refactoring

standardebooks · May 25, 2018 · 5aabfdc · 5aabfdc
1 parent 7e9ec5b
commit 5aabfdc
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 54 deletions.
diff --git a/build b/build
@@ -283,13 +283,13 @@ def main():
 
 		# Clean up old output files if any
 		for kindle_thumbnail in glob.glob(os.path.join(output_directory, "thumbnail_*_EBOK_portrait.jpg")):
-			se.epub.quiet_remove(kindle_thumbnail)
-		se.epub.quiet_remove(os.path.join(output_directory, "cover.jpg"))
-		se.epub.quiet_remove(os.path.join(output_directory, "cover-thumbnail.jpg"))
-		se.epub.quiet_remove(os.path.join(output_directory, epub_output_filename))
-		se.epub.quiet_remove(os.path.join(output_directory, epub3_output_filename))
-		se.epub.quiet_remove(os.path.join(output_directory, kobo_output_filename))
-		se.epub.quiet_remove(os.path.join(output_directory, kindle_output_filename))
+			se.quiet_remove(kindle_thumbnail)
+		se.quiet_remove(os.path.join(output_directory, "cover.jpg"))
+		se.quiet_remove(os.path.join(output_directory, "cover-thumbnail.jpg"))
+		se.quiet_remove(os.path.join(output_directory, epub_output_filename))
+		se.quiet_remove(os.path.join(output_directory, epub3_output_filename))
+		se.quiet_remove(os.path.join(output_directory, kobo_output_filename))
+		se.quiet_remove(os.path.join(output_directory, kindle_output_filename))
 
 		# Are we including proofreading CSS?
 		if args.proof:

diff --git a/create-draft b/create-draft
@@ -12,7 +12,6 @@ import regex
 from ftfy import fix_text
 import se
 import se.formatting
-import se.epub
 from bs4 import BeautifulSoup
 
 
@@ -401,7 +400,7 @@ def main():
 
 		try:
 			fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
-			pg_ebook_html = se.epub.strip_bom(fixed_pg_ebook_html)
+			pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
 		except Exception as ex:
 			se.print_error("Couldn't determine text encoding of Project Gutenberg HTML file. Error: {}".format(ex))
 			exit(1)

diff --git a/se/__init__.py b/se/__init__.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 import sys
+import os
 from typing import Union
 from textwrap import wrap
 from termcolor import colored
@@ -97,6 +98,32 @@ def replace_in_file(absolute_path: str, search: Union[str, list], replace: Union
 			file.write(processed_data)
 			file.truncate()
 
+def strip_bom(string: str) -> str:
+	"""
+	Remove the Unicode Byte Order Mark from a string.
+
+	INPUTS
+	string: A Unicode string
+
+	OUTPUTS
+	The input string with the Byte Order Mark removed
+	"""
+
+	if string.startswith(UNICODE_BOM):
+		string = string[1:]
+
+	return string
+
+def quiet_remove(absolute_path: str) -> None:
+	"""
+	Helper function to delete a file without throwing an exception if the file doesn't exist.
+	"""
+
+	try:
+		os.remove(absolute_path)
+	except Exception:
+		pass
+
 def print_error(message: str, verbose: bool = False) -> None:
 	"""
 	Helper function to print a colored error message to the console.

diff --git a/se/easy_xml.py b/se/easy_xml.py
@@ -17,13 +17,15 @@ class EasyXmlTree:
 	def __init__(self, xhtml_string: str):
 		# We have to remove the default namespace declaration from our document, otherwise
 		# xpath won't find anything at all.  See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
+
 		self.__xhtml_string = xhtml_string#.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")
 		self.etree = etree.fromstring(str.encode(self.__xhtml_string))
 
 	def css_select(self, selector: str) -> list:
 		"""
 		Shortcut to select elements based on CSS selector.
 		"""
+
 		return self.xpath(cssselect.CSSSelector(selector, translator="html", namespaces=se.XHTML_NAMESPACES).path)
 
 	def xpath(self, selector: str) -> list:

diff --git a/se/epub.py b/se/epub.py
@@ -9,46 +9,14 @@
 from lxml import etree
 
 
-def strip_bom(string: str) -> str:
-	"""
-	Remove the Unicode Byte Order Mark from a string.
-
-	INPUTS
-	string: A Unicode string
-
-	OUTPUTS
-	The input string with the Byte Order Mark removed
-	"""
-
-	if string.startswith(se.UNICODE_BOM):
-		string = string[1:]
-
-	return string
-
-def quiet_remove(absolute_path: str) -> None:
-	"""
-	Delete a file without throwing an exception if the file doesn't exist.
-
-	INPUTS
-	absolute_path: A filename
-
-	OUTPUTS
-	None
-	"""
-
-	try:
-		os.remove(absolute_path)
-	except Exception:
-		pass
-
-def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename: str) -> se.easy_xml.EasyXmlTree:
+def convert_toc_to_ncx(epub_root_absolute_path: str, toc_filename: str, xsl_filename: str) -> se.easy_xml.EasyXmlTree:
 	"""
 	Take an epub3 HTML5 ToC file and convert it to an epub2 NCX file. NCX output is written to the same directory as the ToC file, in a file named "toc.ncx".
 
 	epub structure must be in the SE format.
 
 	INPUTS
-	epub_root_directory: The root directory of an unzipped epub
+	epub_root_absolute_path: The root directory of an unzipped epub
 	toc_filename: The filename of the ToC file
 	xsl_filename: The filename for the XSL file used to perform the transformation
 
@@ -57,13 +25,13 @@ def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename
 	"""
 
 	# Use an XSLT transform to generate the NCX
-	with open(os.path.join(epub_root_directory, "epub", toc_filename), "r", encoding="utf-8") as file:
+	with open(os.path.join(epub_root_absolute_path, "epub", toc_filename), "r", encoding="utf-8") as file:
 		toc_tree = se.easy_xml.EasyXmlTree(file.read())
 
 	transform = etree.XSLT(etree.parse(xsl_filename))
-	ncx_tree = transform(toc_tree.etree, cwd="'{}{}'".format(epub_root_directory, os.path.sep))
+	ncx_tree = transform(toc_tree.etree, cwd="'{}{}'".format(epub_root_absolute_path, os.path.sep))
 
-	with open(os.path.join(epub_root_directory, "epub", "toc.ncx"), "w", encoding="utf-8") as file:
+	with open(os.path.join(epub_root_absolute_path, "epub", "toc.ncx"), "w", encoding="utf-8") as file:
 		ncx_xhtml = etree.tostring(ncx_tree, encoding="unicode", pretty_print=True, with_tail=False)
 		ncx_xhtml = regex.sub(r" xml:lang=\"\?\?\"", "", ncx_xhtml)
 
@@ -79,12 +47,12 @@ def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename
 
 	return toc_tree
 
-def write_epub(epub_root_directory: str, output_absolute_path: str) -> None:
+def write_epub(epub_root_absolute_path: str, output_absolute_path: str) -> None:
 	"""
 	Given a root directory, compress it into a final epub file.
 
 	INPUTS
-	epub_root_directory: The root directory of an unzipped epub
+	epub_root_absolute_path: The root directory of an unzipped epub
 	output_absolute_path: The filename of the output file
 
 	OUTPUTS
@@ -93,10 +61,10 @@ def write_epub(epub_root_directory: str, output_absolute_path: str) -> None:
 
 	# We can't enable global compression here because according to the spec, the `mimetype` file must be uncompressed.  The rest of the files, however, can be compressed.
 	with zipfile.ZipFile(output_absolute_path, mode="w") as epub:
-		epub.write(os.path.join(epub_root_directory, "mimetype"), "mimetype")
-		epub.write(os.path.join(epub_root_directory, "META-INF", "container.xml"), "META-INF/container.xml", compress_type=zipfile.ZIP_DEFLATED)
+		epub.write(os.path.join(epub_root_absolute_path, "mimetype"), "mimetype")
+		epub.write(os.path.join(epub_root_absolute_path, "META-INF", "container.xml"), "META-INF/container.xml", compress_type=zipfile.ZIP_DEFLATED)
 
-		for root, _, files in os.walk(epub_root_directory):
+		for root, _, files in os.walk(epub_root_absolute_path):
 			for file in files:
 				if file != "mimetype" and file != "container.xml":
-					epub.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), epub_root_directory), compress_type=zipfile.ZIP_DEFLATED)
+					epub.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), epub_root_absolute_path), compress_type=zipfile.ZIP_DEFLATED)
diff --git a/split-file b/split-file
@@ -3,7 +3,7 @@
 import argparse
 import os
 import regex
-import se.epub
+import se
 
 def output(chapter_number: int, header_xhtml: str, chapter_xhtml: str) -> None:
 	"""
@@ -20,7 +20,7 @@ def main():
 	args = parser.parse_args()
 
 	with open(args.filename, "r", encoding="utf-8") as file:
-		xhtml = se.epub.strip_bom(file.read())
+		xhtml = se.strip_bom(file.read())
 
 	with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates", "header.xhtml"), "r", encoding="utf-8") as file:
 		header_xhtml = file.read()