Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Martin Budden
authored and
Martin Budden
committed
Mar 14, 2010
0 parents
commit 882c817
Showing
24 changed files
with
1,480 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
""" | ||
epub | ||
""" | ||
|
||
__version__ = '0.01.0' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
#!/bin/env python | ||
#coding=utf-8 | ||
# file: epub.py | ||
|
||
|
||
import re | ||
import sys | ||
import os.path | ||
from jinja2 import Environment, PackageLoader | ||
|
||
class epub(): | ||
def __init__(self,bookdir): | ||
self.env = Environment(loader=PackageLoader('epub', 'templates')) | ||
self.sections = [] | ||
self.bookdir = os.path.join(os.curdir,bookdir) | ||
if not os.path.isdir(self.bookdir): | ||
os.makedirs(self.bookdir) | ||
self.sections = [] | ||
self.opffilename = "content.opf" | ||
self.ncxfilename = "toc.ncx" | ||
self.title = "" | ||
self.uuid = "" | ||
self.author = "" | ||
self.author_as = "" | ||
self.depth = "1" | ||
self.description = "" | ||
self.source = "" | ||
self.publisher = "" | ||
self.publication = "" | ||
self.published = "" | ||
self.rights = "" | ||
self.language = "en" | ||
self.chapterTranslation = "Chapter" | ||
self.xmlExt = ".html" | ||
|
||
def set(self,title,author,author_as,published,source): | ||
self.title = title | ||
self.author = author | ||
self.author_as = author_as | ||
self.published = published | ||
self.source = source | ||
|
||
def setUuid(self,uuid): | ||
self.uuid = uuid | ||
|
||
def addSection(self,section): | ||
self.sections.append(section) | ||
|
||
def writeFile(self,dir,filename,content): | ||
filename = os.path.join(dir,filename) | ||
FILE = open(filename,"w") | ||
FILE.write(content) | ||
FILE.close() | ||
|
||
def writeMimeType(self): | ||
s = "application/epub+zip" | ||
self.writeFile(self.bookdir,"mimetype",s) | ||
|
||
def writeMetaInf(self): | ||
self.metainfdir = os.path.join(self.bookdir,"META-INF") | ||
if not os.path.isdir(self.metainfdir): | ||
os.makedirs(self.metainfdir) | ||
template = self.env.get_template("container.xml") | ||
s = template.render({'file':self.opffilename}) | ||
self.writeFile(self.metainfdir,"container"+self.xmlExt,s) | ||
|
||
def writeStylesheets(self): | ||
self.cssdir = os.path.join(self.opsdir,"css") | ||
if not os.path.isdir(self.cssdir): | ||
os.makedirs(self.cssdir) | ||
for i in self.stylesheets: | ||
template = self.env.get_template(i) | ||
s = template.render() | ||
self.writeFile(self.cssdir,i,s); | ||
|
||
def writeImages(self): | ||
self.imagedir = os.path.join(self.opsdir,"images") | ||
if not os.path.isdir(self.imagedir): | ||
os.makedirs(self.imagedir) | ||
|
||
def writeOpf(self): | ||
# Open Packaging Format | ||
template = self.env.get_template("content.opf") | ||
s = template.render({'title':self.title,'uuid':self.uuid,'language':self.language,'author':self.author,'author_as':self.author_as,'description':self.description,'publisher':self.publisher,'source':self.source,'published':self.published,'rights':self.rights,'stylesheets':self.stylesheets,'sections':self.sections}) | ||
self.writeFile(self.opsdir,self.opffilename,s); | ||
|
||
def writeNcx(self): | ||
# Navigation Center for XML | ||
template = self.env.get_template("toc.ncx") | ||
s = template.render({'uid':self.uuid,'depth':self.depth,'title':self.title,'author':self.author_as, 'sections':self.sections}) | ||
self.writeFile(self.opsdir,self.ncxfilename,s); | ||
|
||
def writeTitle(self,filename): | ||
css = "titlepage.css" | ||
template = self.env.get_template("titlepage.xml") | ||
s = template.render({'title':self.title,'css':css,'author':self.author,'published':self.published,'source':self.source}) | ||
self.writeFile(self.opsdir,filename,s); | ||
|
||
def writeChapter(self,section): | ||
css = "main.css" | ||
#if section['title'].find("section")==-1: | ||
#<span class="translation">{{ translation }}</span> <span class="count">{{ count }}</span> | ||
#extra = extra % {'translation':self.sectionTranslation,'count':section['count']} | ||
template = self.env.get_template("main.xml") | ||
s = template.render({'css':css,'id':section['id'],'class':section['class'],'title':section['title'],'text':section['text']}) | ||
self.writeFile(self.opsdir,section['file']+self.xmlExt,s); | ||
|
||
def writeContent(self): | ||
for i in self.sections: | ||
if i['class'] == "title": | ||
self.writeTitle(i['file']+self.xmlExt) | ||
else: | ||
self.writeChapter(i) | ||
|
||
def writeOps(self): | ||
# Open Publication Structure | ||
self.opsdir = os.path.join(self.bookdir,"OPS") | ||
if not os.path.isdir(self.opsdir): | ||
os.makedirs(self.opsdir) | ||
self.stylesheets = ["page.css","titlepage.css","about.css","main.css"] #,"play.css"] | ||
self.writeOpf() | ||
self.writeNcx() | ||
self.writeStylesheets() | ||
#self.writeImages() | ||
self.writeContent() | ||
|
||
def writeEpub(self): | ||
self.writeMimeType() | ||
self.writeMetaInf() | ||
self.writeOps() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/bin/env python | ||
#coding=utf-8 | ||
# file: getbook.py | ||
|
||
import os | ||
import os.path | ||
|
||
from optparse import OptionParser | ||
import uuid | ||
import zipfile | ||
|
||
from epub import epub | ||
from mediawikibook import get_wikisource_work | ||
|
||
def zipBook(title): | ||
myZipFile = zipfile.ZipFile(title+".epub","w") | ||
os.chdir(title) | ||
for root,dirs,files in os.walk("."): | ||
for fileName in files: | ||
if fileName[0] != ".": | ||
myZipFile.write(os.path.join(root,fileName)) | ||
myZipFile.close() | ||
os.chdir("..") | ||
|
||
def main(): | ||
parser = OptionParser() | ||
#parser.add_option("-f","--file",dest="filename",help="write report to FILE",metavar="FILE") | ||
parser.add_option("-d","--date",dest="date",help="published DATE",metavar="DATE") | ||
#parser.add_option("-q","--quiet",action="store_false",dest="verbose",default=True,help="don't print status messages to stdout") | ||
|
||
(options,args) = parser.parse_args() | ||
#title = args[0] | ||
#title = "Treasure_Island" | ||
title = "Through the Looking-Glass, and What Alice Found There" | ||
title = "Great Expectations" | ||
#title = "Groundwork of the Metaphysics of Morals" | ||
bookdir = title | ||
book = epub(bookdir) | ||
host = "en.wikisource.org" | ||
get_wikisource_work(book,host,title) | ||
#title = "Space" | ||
#host = "en.wikipedia.org" | ||
#book = get_mediawiki_book(book,host,title) | ||
uid = uuid.uuid4() | ||
book.setUuid(uid) | ||
book.writeEpub() | ||
zipBook(title) | ||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#!/bin/env python | ||
#coding=utf-8 | ||
# file: getshake.py | ||
|
||
import sys | ||
import os | ||
import os.path | ||
import re | ||
import sys | ||
import time | ||
import uuid | ||
from optparse import OptionParser | ||
import httplib | ||
import zipfile | ||
|
||
from ebook import ebook | ||
from xml.sax import make_parser, SAXException | ||
import xml.sax.handler | ||
|
||
class MyXMLHandler(xml.sax.handler.ContentHandler): | ||
def __init__(self): | ||
self.stack = [] | ||
self.elementIndex =0 | ||
self.limit = 100000 | ||
self.inElement = "" | ||
self.text = "" | ||
self.chapterText = "" | ||
|
||
def setBook(self,filename,uid): | ||
self.count = 1 | ||
self.playorder = 1 | ||
self.act = 0 | ||
self.scene = 0 | ||
author = "William Shakespeare" | ||
title = "Hamlet" | ||
author_as = "Shakespeare, William" | ||
published = "" | ||
bookdir = title | ||
|
||
#bookdir = author + " - " + title | ||
self.book = ebook(bookdir) | ||
self.book.set(uid,title,author,author_as,published,"Testsource") | ||
self.book.addChapter({'class':"title",'type':"cover",'id':"level1-title",'playorder':"1",'title':"Title",'file':"titlepage",'text':title}) | ||
|
||
|
||
def startElement(self,name,attributes): | ||
self.elementIndex += 1 | ||
if self.elementIndex > self.limit and self.limit > 0: | ||
raise SAXException('Reached limit count') # stop parsing | ||
self.stack.append(name); | ||
self.inElement = name | ||
if name=="ACT": | ||
self.act += 1 | ||
self.scene = 0 | ||
if name=="SCENE": | ||
self.scene += 1 | ||
if name=="SCENE" or name=="FM" or name=="PERSONAE": | ||
print "MMM", name | ||
tag = "<"+name+">" | ||
tag = tag.encode("utf-8") | ||
#print tag | ||
if name!="PLAY": | ||
self.text += tag | ||
|
||
|
||
def characters(self,data): | ||
text = data.encode('utf-8').strip() | ||
text = re.sub("\n","",text) | ||
self.text += text | ||
|
||
def endElement(self,name): | ||
self.stack.pop() | ||
tag = "</"+name+">\n" | ||
tag = tag.encode("utf-8") | ||
if name!="PLAY": | ||
self.text += tag | ||
self.chapterText += self.text | ||
if name=="SCENE" or name=="FM" or name=="PERSONAE": | ||
print "nn", name | ||
xmlid = "act"+str(self.act)+"-scene"+str(self.scene) | ||
if name=="FM" or name=="PERSONAE": | ||
xmlid = name | ||
print self.chapterText | ||
xmlid = xmlid.encode("utf-8") | ||
self.chapterTitle = "test chapter title" | ||
self.book.addChapter({'class':"chapter",'type':"text",'id':xmlid,'playorder':str(self.playorder),'count':str(self.count),'title':self.chapterTitle,'file':"main"+str(self.count),'text':self.chapterText}) | ||
self.chapterText = "" | ||
self.count += 1 | ||
self.playorder += 1 | ||
#print self.text | ||
self.text = "" | ||
|
||
def parseXML(filename,uid): | ||
handler = MyXMLHandler() | ||
handler.setBook(filename,uid) | ||
parser = xml.sax.make_parser() | ||
parser.setContentHandler(handler) | ||
try: | ||
parser.parse(filename) | ||
except SAXException: | ||
print "caught" | ||
print filename | ||
return handler.book | ||
|
||
def getXMLBook(filename,uid): | ||
return parseXML(filename,uid) | ||
|
||
|
||
def zipBook(title): | ||
myZipFile = zipfile.ZipFile(title+".epub","w") | ||
os.chdir(title) | ||
for root,dirs,files in os.walk("."): | ||
for fileName in files: | ||
if fileName[0] != ".": | ||
myZipFile.write(os.path.join(root,fileName)) | ||
myZipFile.close() | ||
os.chdir("..") | ||
|
||
def main(): | ||
parser = OptionParser() | ||
#parser.add_option("-f","--file",dest="filename",help="write report to FILE",metavar="FILE") | ||
parser.add_option("-d","--date",dest="date",help="published DATE",metavar="DATE") | ||
#parser.add_option("-q","--quiet",action="store_false",dest="verbose",default=True,help="don't print status messages to stdout") | ||
|
||
(options,args) = parser.parse_args() | ||
uid = uuid.uuid4() | ||
title = "hamlet" | ||
#title = args[0] | ||
filename = "hamlet.xml" | ||
book = getXMLBook(filename,uid) | ||
book.writeBook() | ||
zipBook(book.title) | ||
|
||
main() |
Oops, something went wrong.