# Build Small Wonders Ebook

This was a proof-of-concept notebook to develop the ebook generation script. The real action now is in `build_ebook.py`.

### Setup

In [1]:
import datetime
import re
import uuid
from collections import OrderedDict
from itertools import pairwise
from pathlib import Path

from ebooklib import epub
from markdown_it import MarkdownIt
from PIL import Image

In [2]:
md = MarkdownIt("commonmark", {"typographer": True})
md.enable(["replacements", "smartquotes"])

markdown_it.main.MarkdownIt()

In [3]:
front_matter = ['0a-about.md', '0b-cover-artist.md', '0c-keyhole.md']
front_matter_titles = ["Title Page & Copyright", "About the Cover Artist", "Thru the Keyhole"]
front_matter_paths = [Path(fn) for fn in front_matter]

# Generate filenames for all pieces
content_types = ('story', 'poem', 'reprint')
pieces = [f"{idx+1}a-{content_types[idx % 3]}.md" for idx in range(0,9)]
author_bios = [f"{idx+1}b-author.md" for idx in range(0,9)]
piece_paths = [Path(fn) for fn in pieces]
author_bio_paths = [Path(fn) for fn in author_bios]

editors_path = Path("editors.txt")

description_path = Path("description.html")
stylesheet_path = Path("stylesheet.css")

images_path = Path("images")
cover_path = images_path / "cover.jpg"

In [4]:
magazine_subjects = [
    'magazine',
    'science fiction',
    'fantasy',
    'science fiction magazine',
    'Science Fiction - Short Stories',
    'Science Fiction - Poetry',
    'Science Fiction &amp; Fantasy',
    'short fiction',
    'short stories',
    'poetry'
]

In [5]:
# TODO check all files' existence and that their update times are ~ the same (w/in a day)

### Prep

In [6]:
# Get titles and authors
titles = []
authors = []
errs = []
for fp in piece_paths:
    content = md.parse(fp.read_text(encoding='utf-8'))
    title = None
    author = None
    for cur_token, next_token in pairwise(content):
        if cur_token.markup == "#" and title is None:
            title = next_token.content
        elif cur_token.markup == "##" and author is None:
            author = re.sub(r"[Bb]y +", "", next_token.content)
    file_errs = []
    if title is None:
        file_errs.append("No title found. Are you missing a # Markdown heading?")
    if author is None:
        file_errs.append("No author found. Are you missing a ## Markdown heading?")
    if file_errs:
        err_desc = " ".join(file_errs)
        errs.append(f"{fp}: {err_desc}")
    else:
        titles.append(title)
        authors.append(author)

if errs:
    raise RuntimeError("Issues finding titles/authors.\n  "+"\n  ".join(errs))

In [7]:
# Get editors
editors = editors_path.read_text().split("\n")

In [8]:
# TODO get issue number by scraping the about text. but for now:
issue_num = 4
m = re.search("Issue +(\d+)", front_matter_paths[0].read_text())
if m is None:
    raise RuntimeError(f"Couldn't find issue number in {front_matter_paths[0]}")
m.group(1)

'4'

### Support Functions

In [9]:
# Adapted from the ebooklib class so I could tweak it -- the existing class lower-cases all
# xml attributes, which wrecks the viewBox attribute
class EpubCoverHtml(epub.EpubHtml):
    def __init__(self, uid='cover', file_name='cover.xhtml', image_name='', title='Cover'):
        super(EpubCoverHtml, self).__init__(uid=uid, file_name=file_name, title=title)

        self.image_name = image_name
        self.is_linear = False

    def is_chapter(self):
        return False

    def set_content(self, cover_path):
        self.content = self.get_cover_html_content(cover_path)

    def get_content(self):
        return self.content

    def get_cover_html_content(self, cover_path: Path) -> bytes:
        with Image.open(cover_path) as img:
            cover_width, cover_height = img.size
    
        return ('''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en" xml:lang="en">
 <head>
  <style>
    body { margin: 0em; padding: 0em; }
    img { max-width: 100%; max-height: 100%; }
  </style>
 </head>
 <body>
   <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
   height="100%" width="100%" viewBox="0 0 '''+f"{cover_width} {cover_height}"+'''" preserveAspectRatio="xMidYMid meet" version="1.1">
     <image href="'''+self.image_name+'''" alt="Cover art"/></svg>
 </body>
</html>''').encode()

    def __str__(self):
        return '<EpubCoverHtml:%s:%s>' % (self.id, self.file_name)

# Adapted from the ebooklib function so I can tweak it
def set_cover(book, file_name: str, cover_path: Path, create_page=True, title="Cover"):
    """
    Set cover and create cover document if needed.

    :Args:
      - book: epub object to add the cover to
      - file_name: file name of the cover page (in the epub file)
      - cover_path: Path to the image file
      - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
      - title: Title for the cover page (if defined).
    """

    cover_path = Path(cover_path)
    content = cover_path.read_bytes()

    c0 = epub.EpubCover(file_name=file_name)
    c0.content = content
    book.add_item(c0)

    if create_page:
        c1 = EpubCoverHtml(title=title, file_name="cover.xhtml", image_name=file_name)
        c1.set_content(cover_path)
        book.add_item(c1)

    book.add_metadata(None, 'meta', '', OrderedDict([('name', 'cover'), ('content', 'cover-img')]))

### Make the Ebook

In [10]:
# Create ebook with metadata
book = epub.EpubBook()
book.set_identifier(str(uuid.uuid4()))
book.set_title(f"Small Wonders Issue {issue_num}")
book.set_language("en")
book.add_metadata('DC', 'publisher', 'Small Wonders LLC')
for editor in editors:
    book.add_metadata('DC', 'creator', editor)
for author in authors:
    book.add_metadata('DC', 'contributor', author)
book.add_metadata('DC', 'date', datetime.date.today().isoformat())
book.add_metadata('DC', 'description', description_path.read_text(encoding='utf-8'))
for subject in magazine_subjects:
    book.add_metadata('DC', 'subject', subject)

In [11]:
# Stylesheets
css = epub.EpubItem(uid="base_stylesheet",
                    file_name="styles/stylesheet.css",
                    media_type="text/css",
                    content=stylesheet_path.read_text(encoding='utf-8'))
book.add_item(css)

<ebooklib.epub.EpubItem at 0x266c49a3690>

In [12]:
# Cover
set_cover(book, "cover.jpg", cover_path, title=f"Small Wonders Issue {issue_num}")

In [13]:
ebook_chs = []  # Keep track of what we're making
file_num = 0  # and how many files we've generated

In [14]:
book.add_item(epub.EpubNcx())
nav = epub.EpubNav()
ebook_chs[0:0] = [nav]

In [15]:
# Front matter
for path, title in zip(front_matter_paths, front_matter_titles):
    ch = epub.EpubHtml(title=title, file_name=f"body{file_num:02}.xhtml", lang="en")
    ch.set_content('<div class="frontmatter">'+md.render(path.read_text(encoding='utf-8'))+'</div>')
    ebook_chs.append(ch)
    file_num += 1

In [16]:
# Content
current_year = datetime.datetime.now().year
for ndx, (piece_path, bio_path, title, author) in enumerate(zip(piece_paths, author_bio_paths, titles, authors)):
    content = '<div class="piece">'
    if "poem" in str(piece_path):
        lines = piece_path.read_text(encoding="utf-8").splitlines()
        in_content = False
        for line in lines:
            if not in_content and not line.strip():
                continue
            if in_content or not line.startswith("#"):
                in_content = True
                content += f'<div class="poem">{line}&nbsp;</div>\n'
            elif line.startswith("##"):
                content += f'<h2>{line[2:]}</h2>\n\n'
            elif line.startswith("#"):
                content += f'<h1>{line[1:]}</h1>\n\n'
    else:
        content += md.render(piece_path.read_text(encoding='utf-8'))
    if "reprint" not in str(piece_path):
        # Add the end div and copyright statement
        content += f'</div>\n\n<div class="endmatter">\n<p>Copyright © {current_year} by {author}</p>\n</div>\n\n'
    else:
        # Add the end div before the already-given copyright statement
        ndx = content.find("<p>Copyright ©")
        if ndx == -1:
            print(f"Warning: Couldn't find copyright statement in {piece_path}")
        else:
            content = content[:ndx] + '</div>\n<div class="endmatter">\n' + content[ndx:] + "</div>\n\n"

    # Add Author info
    image_path = images_path / f"{piece_path.stem[0]}-author.jpg"
    image = epub.EpubImage(uid=image_path.stem, file_name=piece_path.name, media_type='image/jpeg', content=image_path.read_bytes())
    book.add_item(image)
    content += f'<p class="author-pic"><img class="author" src="{piece_path.name}" alt="{author}"/></p>\n\n'
    content += md.render(bio_path.read_text(encoding='utf-8'))

    ch = epub.EpubHtml(title=title, file_name=f"body{file_num:02}.xhtml", lang="en")
    ch.set_content(content)
    ebook_chs.append(ch)
    file_num += 1

In [17]:
# Spine
book.spine = tuple(ebook_chs)
for ch in ebook_chs:
    ch.add_item(css)
    book.add_item(ch)

In [18]:
# Table of Contents
book.toc = tuple(ebook_chs)

In [19]:
epub.write_epub("test.epub", book)