Permalink
Browse files

Start refactoring generic "build a book" stuff out

  • Loading branch information...
palfrey committed Sep 30, 2012
1 parent f014ed5 commit f35f7b767347f9f0743355103b49058147eec4b5
Showing with 67 additions and 42 deletions.
  1. +10 −42 books.py
  2. +57 −0 common.py
View
@@ -3,13 +3,12 @@
from google.protobuf import text_format
from blog_pb2 import All
from re import compile, DOTALL, MULTILINE
-from os import mkdir, system
-from os.path import exists, join
-from hashlib import md5
+from os import system
+from os.path import exists
from codecs import open
-from sys import argv
from urlparse import urljoin
from optparse import OptionParser
+from common import generatePage, tocStart, tocEnd
c = Cache()
@@ -51,16 +50,7 @@
index = 1
while page!=None:
folder = "%s #%02d"%(s.description, index)
- if not exists(folder):
- mkdir(folder)
- toc = open(join(folder, "toc.html"), "wb", "utf-8")
- toc.write("""<html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <title>%s</title>
- </head>
- <body class="vcenter">
- <div style="display:none">
-""" % folder)
+ toc = tocStart()
titlePattern = compile(s.titlePattern, DOTALL | MULTILINE)
contentPattern = compile(s.contentPattern, DOTALL | MULTILINE)
nextPattern = compile(s.nextPattern, DOTALL | MULTILINE)
@@ -82,43 +72,21 @@
age = 3600
else:
break
- fname = md5(page).hexdigest() + ".html"
- fpath = join(folder, fname)
-
title = titlePattern.search(data)
assert title != None, page
title = title.groups()[0]
-
- toc.write("\t\t\t<a title=\"%s\" href=\"%s\" />\n"%(title, fname))
- if not exists(fpath):
- newitems = True
- content = contentPattern.search(data)
- assert content != None, page
- content = content.groups()[0]
-
- open (fpath, "wb", "utf-8").write(u"""<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
- <head>
- <style type="text/css" title="override_css">
- @page {padding: 0pt; margin:0pt}
- </style>
- <title>%s</title>
- </head>
- <body>
- <h1>%s</h1>
- %s
- </body>
- </html>"""%(title, title, content))
- if link != None:
+ content = contentPattern.search(data)
+ assert content != None, page
+ content = content.groups()[0]
+ newitems = generatePage(title, content, folder, toc) or newitems
+ if link is not None:
link = link.groups()[0]
newpage = urljoin(page, link)
if page == None or newpage == page:
page = None
break
page = newpage
- toc.write("""\t\t</div>
- </body>
-</html>""")
- toc.close()
+ tocEnd(toc)
if newitems or not exists(folder + ".mobi"):
cmd = "rm -f book.zip && zip -j book.zip %s/* && ebook-convert book.zip \"%s.mobi\" --output-profile kindle --margin-top 0 --margin-bottom 0 --margin-left 0 --authors=\"%s\" --input-encoding=utf-8" %(folder.replace(" ", "\\ "), folder, s.author)
print cmd
View
@@ -0,0 +1,57 @@
+from codecs import open
+from os import mkdir
+from os.path import join, exists
+
+try:
+ import hashlib
+except ImportError: # python < 2.5
+ import md5
+ hashlib = None
+
+def hexdigest_md5(data):
+ if hashlib:
+ return hashlib.md5(data).hexdigest()
+ else:
+ return md5.new(data).hexdigest()
+
+def generatePage(page, title, content, folder, toc):
+ fname = hexdigest_md5(page) + ".html"
+ fpath = join(folder, fname)
+ toc.write("\t\t\t<a title=\"%s\" href=\"%s\" />\n" % (title, fname))
+ if not exists(fpath):
+ open(fpath, "wb", "utf-8").write(u"""<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+ <head>
+ <style type="text/css" title="override_css">
+ @page {padding: 0pt; margin:0pt}
+ </style>
+ <title>%s</title>
+ </head>
+ <body>
+ <h1>%s</h1>
+ %s
+ </body>
+ </html>""" % (title, title, content))
+ return True
+ else:
+ return False
+
+
+def tocStart(folder):
+ if not exists(folder):
+ mkdir(folder)
+ toc = open(join(folder, "toc.html"), "wb", "utf-8")
+ toc.write("""<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>%s</title>
+ </head>
+ <body class="vcenter">
+ <div style="display:none">
+""" % folder)
+ return toc
+
+
+def tocEnd(toc):
+ toc.write("""\t\t</div>
+ </body>
+</html>""")
+ toc.close()

0 comments on commit f35f7b7

Please sign in to comment.