Skip to content

Commit

Permalink
Code refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
acabal committed May 25, 2018
1 parent 7e9ec5b commit 5aabfdc
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 54 deletions.
14 changes: 7 additions & 7 deletions build
Expand Up @@ -283,13 +283,13 @@ def main():

# Clean up old output files if any
for kindle_thumbnail in glob.glob(os.path.join(output_directory, "thumbnail_*_EBOK_portrait.jpg")):
se.epub.quiet_remove(kindle_thumbnail)
se.epub.quiet_remove(os.path.join(output_directory, "cover.jpg"))
se.epub.quiet_remove(os.path.join(output_directory, "cover-thumbnail.jpg"))
se.epub.quiet_remove(os.path.join(output_directory, epub_output_filename))
se.epub.quiet_remove(os.path.join(output_directory, epub3_output_filename))
se.epub.quiet_remove(os.path.join(output_directory, kobo_output_filename))
se.epub.quiet_remove(os.path.join(output_directory, kindle_output_filename))
se.quiet_remove(kindle_thumbnail)
se.quiet_remove(os.path.join(output_directory, "cover.jpg"))
se.quiet_remove(os.path.join(output_directory, "cover-thumbnail.jpg"))
se.quiet_remove(os.path.join(output_directory, epub_output_filename))
se.quiet_remove(os.path.join(output_directory, epub3_output_filename))
se.quiet_remove(os.path.join(output_directory, kobo_output_filename))
se.quiet_remove(os.path.join(output_directory, kindle_output_filename))

# Are we including proofreading CSS?
if args.proof:
Expand Down
3 changes: 1 addition & 2 deletions create-draft
Expand Up @@ -12,7 +12,6 @@ import regex
from ftfy import fix_text
import se
import se.formatting
import se.epub
from bs4 import BeautifulSoup


Expand Down Expand Up @@ -401,7 +400,7 @@ def main():

try:
fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
pg_ebook_html = se.epub.strip_bom(fixed_pg_ebook_html)
pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
except Exception as ex:
se.print_error("Couldn't determine text encoding of Project Gutenberg HTML file. Error: {}".format(ex))
exit(1)
Expand Down
27 changes: 27 additions & 0 deletions se/__init__.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3

import sys
import os
from typing import Union
from textwrap import wrap
from termcolor import colored
Expand Down Expand Up @@ -97,6 +98,32 @@ def replace_in_file(absolute_path: str, search: Union[str, list], replace: Union
file.write(processed_data)
file.truncate()

def strip_bom(string: str) -> str:
"""
Remove the Unicode Byte Order Mark from a string.
INPUTS
string: A Unicode string
OUTPUTS
The input string with the Byte Order Mark removed
"""

if string.startswith(UNICODE_BOM):
string = string[1:]

return string

def quiet_remove(absolute_path: str) -> None:
"""
Helper function to delete a file without throwing an exception if the file doesn't exist.
"""

try:
os.remove(absolute_path)
except Exception:
pass

def print_error(message: str, verbose: bool = False) -> None:
"""
Helper function to print a colored error message to the console.
Expand Down
2 changes: 2 additions & 0 deletions se/easy_xml.py
Expand Up @@ -17,13 +17,15 @@ class EasyXmlTree:
def __init__(self, xhtml_string: str):
# We have to remove the default namespace declaration from our document, otherwise
# xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python

self.__xhtml_string = xhtml_string#.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")
self.etree = etree.fromstring(str.encode(self.__xhtml_string))

def css_select(self, selector: str) -> list:
"""
Shortcut to select elements based on CSS selector.
"""

return self.xpath(cssselect.CSSSelector(selector, translator="html", namespaces=se.XHTML_NAMESPACES).path)

def xpath(self, selector: str) -> list:
Expand Down
54 changes: 11 additions & 43 deletions se/epub.py
Expand Up @@ -9,46 +9,14 @@
from lxml import etree


def strip_bom(string: str) -> str:
"""
Remove the Unicode Byte Order Mark from a string.
INPUTS
string: A Unicode string
OUTPUTS
The input string with the Byte Order Mark removed
"""

if string.startswith(se.UNICODE_BOM):
string = string[1:]

return string

def quiet_remove(absolute_path: str) -> None:
"""
Delete a file without throwing an exception if the file doesn't exist.
INPUTS
absolute_path: A filename
OUTPUTS
None
"""

try:
os.remove(absolute_path)
except Exception:
pass

def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename: str) -> se.easy_xml.EasyXmlTree:
def convert_toc_to_ncx(epub_root_absolute_path: str, toc_filename: str, xsl_filename: str) -> se.easy_xml.EasyXmlTree:
"""
Take an epub3 HTML5 ToC file and convert it to an epub2 NCX file. NCX output is written to the same directory as the ToC file, in a file named "toc.ncx".
epub structure must be in the SE format.
INPUTS
epub_root_directory: The root directory of an unzipped epub
epub_root_absolute_path: The root directory of an unzipped epub
toc_filename: The filename of the ToC file
xsl_filename: The filename for the XSL file used to perform the transformation
Expand All @@ -57,13 +25,13 @@ def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename
"""

# Use an XSLT transform to generate the NCX
with open(os.path.join(epub_root_directory, "epub", toc_filename), "r", encoding="utf-8") as file:
with open(os.path.join(epub_root_absolute_path, "epub", toc_filename), "r", encoding="utf-8") as file:
toc_tree = se.easy_xml.EasyXmlTree(file.read())

transform = etree.XSLT(etree.parse(xsl_filename))
ncx_tree = transform(toc_tree.etree, cwd="'{}{}'".format(epub_root_directory, os.path.sep))
ncx_tree = transform(toc_tree.etree, cwd="'{}{}'".format(epub_root_absolute_path, os.path.sep))

with open(os.path.join(epub_root_directory, "epub", "toc.ncx"), "w", encoding="utf-8") as file:
with open(os.path.join(epub_root_absolute_path, "epub", "toc.ncx"), "w", encoding="utf-8") as file:
ncx_xhtml = etree.tostring(ncx_tree, encoding="unicode", pretty_print=True, with_tail=False)
ncx_xhtml = regex.sub(r" xml:lang=\"\?\?\"", "", ncx_xhtml)

Expand All @@ -79,12 +47,12 @@ def convert_toc_to_ncx(epub_root_directory: str, toc_filename: str, xsl_filename

return toc_tree

def write_epub(epub_root_directory: str, output_absolute_path: str) -> None:
def write_epub(epub_root_absolute_path: str, output_absolute_path: str) -> None:
"""
Given a root directory, compress it into a final epub file.
INPUTS
epub_root_directory: The root directory of an unzipped epub
epub_root_absolute_path: The root directory of an unzipped epub
output_absolute_path: The filename of the output file
OUTPUTS
Expand All @@ -93,10 +61,10 @@ def write_epub(epub_root_directory: str, output_absolute_path: str) -> None:

# We can't enable global compression here because according to the spec, the `mimetype` file must be uncompressed. The rest of the files, however, can be compressed.
with zipfile.ZipFile(output_absolute_path, mode="w") as epub:
epub.write(os.path.join(epub_root_directory, "mimetype"), "mimetype")
epub.write(os.path.join(epub_root_directory, "META-INF", "container.xml"), "META-INF/container.xml", compress_type=zipfile.ZIP_DEFLATED)
epub.write(os.path.join(epub_root_absolute_path, "mimetype"), "mimetype")
epub.write(os.path.join(epub_root_absolute_path, "META-INF", "container.xml"), "META-INF/container.xml", compress_type=zipfile.ZIP_DEFLATED)

for root, _, files in os.walk(epub_root_directory):
for root, _, files in os.walk(epub_root_absolute_path):
for file in files:
if file != "mimetype" and file != "container.xml":
epub.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), epub_root_directory), compress_type=zipfile.ZIP_DEFLATED)
epub.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), epub_root_absolute_path), compress_type=zipfile.ZIP_DEFLATED)
4 changes: 2 additions & 2 deletions split-file
Expand Up @@ -3,7 +3,7 @@
import argparse
import os
import regex
import se.epub
import se

def output(chapter_number: int, header_xhtml: str, chapter_xhtml: str) -> None:
"""
Expand All @@ -20,7 +20,7 @@ def main():
args = parser.parse_args()

with open(args.filename, "r", encoding="utf-8") as file:
xhtml = se.epub.strip_bom(file.read())
xhtml = se.strip_bom(file.read())

with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates", "header.xhtml"), "r", encoding="utf-8") as file:
header_xhtml = file.read()
Expand Down

0 comments on commit 5aabfdc

Please sign in to comment.