Skip to content

Commit

Permalink
support for DRM-d books
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Peng committed Nov 26, 2010
1 parent 6b1b51c commit 77fe0a2
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 50 deletions.
83 changes: 60 additions & 23 deletions Kindelabra.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,16 @@
import gtk
import kindle

VERSION = '0.1'
FILTER = ['pdf', 'mobi', 'prc', 'txt', 'tpz', 'azw', 'manga']
VERSION = '0.2'

class KindleUI:
'''Interface for manipulating a Kindle collection JSON file
'''
def __init__(self):
self.root = os.getcwd()
self.filemodel = gtk.TreeStore(str, str)
self.filemodel = gtk.TreeStore(str, str, bool)
self.fileview = self.get_view('Files', self.filemodel, 'fileview')
self.colmodel = gtk.TreeStore(str, str)
self.colmodel = gtk.TreeStore(str, str, str)
self.colview = self.get_view('Collections', self.colmodel, 'colview')

self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
Expand Down Expand Up @@ -112,14 +111,30 @@ def load(self, widget):
else:
self.status("Kindle files not found")

def get_collections(self, colmodel):
def get_collections(self):
for collection in self.db:
citer = colmodel.append(None, [collection, ""])
citer = self.colmodel.append(None, [collection, "", ""])
for namehash in self.db[collection]['items']:
namehash = str(namehash.lstrip("*"))
if re.match('\*[\w]', namehash):
namehash = str(namehash.lstrip("*"))
asin = re.match('\#(\w+)\^\w{4}', namehash)
if asin:
asin = asin.group(1)
book = self.kindle.searchAsin(asin)
namehash = book.hash
if namehash in self.kindle.files:
filename = os.path.basename(self.kindle.files[namehash])
fiter = colmodel.append(citer, [filename, namehash])
if self.kindle.files[namehash].title:
filename = self.kindle.files[namehash].title
else:
filename = os.path.basename(self.kindle.files[namehash].path)
if self.kindle.files[namehash].asin:
asin = self.kindle.files[namehash].asin
else:
asin = ""
fiter = self.colmodel.append(citer, [filename, namehash, asin])
#if asin != "":
#else:
#for row in self.filemodel

def add_collection(self, widget):
(dialog, input_box) = self.collection_prompt("Add Collection", "New Collection name:")
Expand All @@ -131,7 +146,7 @@ def add_collection(self, widget):
if colname == "":
return
if not colname in self.db:
coliter = self.colmodel.append(None, [colname, ""])
coliter = self.colmodel.append(None, [colname, "", ""])
treesel = self.colview.get_selection()
treesel.unselect_all()
treesel.select_iter(coliter)
Expand Down Expand Up @@ -219,6 +234,16 @@ def get_path_value(self, model, row):
piter = model[path].iter
return model.get(piter, 0, 1)

def get_colpath_value(self, model, row):
if isinstance(row, gtk.TreeRowReference):
path = row.get_path()
elif isinstance(row, tuple):
path = row
else:
return None
piter = model[path].iter
return model.get(piter, 0, 1, 2)

def get_hashes(self, filestore, filerows):
filehashes = list()
for row in filerows:
Expand Down Expand Up @@ -269,11 +294,15 @@ def add_file(self, widget):
for colpath, colname in targetcols:
colname = unicode(colname)
if colname in self.db:
if not self.db.in_collection(colname, filehash):
colstore.append(colstore[colpath].iter, [filename, filehash])
self.db.add_filehash(colname, filehash)
else:
self.status("%s is already in collection %s" % (filename, colname))
try:
asin = self.kindle.files[filehash].asin
if not self.db.in_collection(colname, asin):
colstore.append(colstore[colpath].iter, [filename, filehash, asin])
self.db.add_asin(colname, self.kindle.files[filehash].asin, self.kindle.files[filehash].type)
except TypeError:
if not self.db.in_collection(colname, filehash):
colstore.append(colstore[colpath].iter, [filename, filehash, ""])
self.db.add_filehash(colname, filehash)
else:
self.status("No such collection:" + colname)
self.colview.expand_all()
Expand All @@ -288,10 +317,16 @@ def del_file(self, widget):
for row in range(len(ref)):
gtkrow = ref[row]
path = gtkrow.get_path()
(filename, filehash) = self.get_path_value(colstore, gtkrow)
collection = unicode(self.get_path_value(colstore, (path[0], ))[0])
jsonhash = '*' + filehash
if self.db[collection].has_hash(filehash):
(filename, filehash, asin) = self.get_colpath_value(colstore, gtkrow)
collection = unicode(self.get_colpath_value(colstore, (path[0], ))[0])
if asin != '':
book = self.kindle.searchAsin(asin)
asin = "#%s^%s" % (book.asin, book.type)
if self.db[collection].has_hash(asin):
self.db[collection]['items'].remove(asin)
colstore.remove(colstore[path].iter)
elif self.db[collection].has_hash(filehash):
jsonhash = '*' + filehash
self.db[collection]['items'].remove(jsonhash)
colstore.remove(colstore[path].iter)
else:
Expand All @@ -316,7 +351,7 @@ def get_view(self, title, model, name):
def revert(self, widget):
self.db = kindle.CollectionDB(self.colfile)
self.colmodel.clear()
self.get_collections(self.colmodel)
self.get_collections()
self.colview.expand_all()
self.status("Kindle collections reloaded")

Expand Down Expand Up @@ -344,10 +379,12 @@ def get_files(self, filemodel, tree, piter=None, path=""):
for node in tree:
if node == 'files':
for filename in tree['files']:
filehash = self.kindle.get_hash('/mnt/us' + '/'.join([path, filename]))
filemodel.append(piter, [filename, filehash])
filehash = kindle.get_hash('/mnt/us' + '/'.join([path, filename]))
if filehash in self.kindle.files and self.kindle.files[filehash].title:
filename = self.kindle.files[filehash].title
filemodel.append(piter, [filename, filehash, False])
else:
niter = filemodel.append(piter, [node, ""])
niter = filemodel.append(piter, [node, "", False])
self.get_files(filemodel, tree[node], niter, '/'.join([path,node]))

def refresh(self, widget):
Expand Down
50 changes: 50 additions & 0 deletions ebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
#author:Richard Peng
#project:Kindelabra
#website:http://www.richardpeng.com/projects/kindelabra/
#repository:https://github.com/richardpeng/Kindelabra
#license:Creative Commons GNU GPL v2
# (http://creativecommons.org/licenses/GPL/2.0/)

import struct

class Sectionizer:
def __init__(self, filename, perm):
self.f = file(filename, perm)
header = self.f.read(78)
self.ident = header[0x3C:0x3C+8]
if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
raise ValueError('invalid file format')
num_sections, = struct.unpack_from('>H', header, 76)
sections = self.f.read(num_sections*8)
self.sections = struct.unpack_from('>%dL' % (num_sections*2), sections, 0)[::2] + (0xfffffff, )

def loadSection(self, section):
before, after = self.sections[section:section+2]
self.f.seek(before)
return self.f.read(after - before)

class Mobi:
def __init__(self, filename):
sections = Sectionizer(filename, 'rb')
header = sections.loadSection(0)
len_mobi = struct.unpack_from('>L', header, 20)[0] + 16
mobi_raw = header[:len_mobi]
titleoffset, titlelen = struct.unpack_from('>LL', mobi_raw, 84)
self.title = header[titleoffset:titleoffset+titlelen]
len_exth, = struct.unpack_from('>L', header, len_mobi+4)
exth_records = header[len_mobi:len_mobi+len_exth][12:]
self.exth = dict()
while len(exth_records) > 8:
rectype, reclen = struct.unpack_from('>LL', exth_records)
recdata = exth_records[8:reclen]
self.exth[rectype] = recdata
exth_records = exth_records[reclen:]

def main():
m = Mobi('book.azw')
if 113 in m.exth:
print m.exth[113]

if __name__ == "__main__":
main()
78 changes: 51 additions & 27 deletions kindle.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import json
import sys

import ebook

KINDLEROOT = '/mnt/us'
FILTER = ['pdf', 'mobi', 'prc', 'txt', 'tpz', 'azw', 'manga']
FOLDERS = ['documents', 'pictures']
Expand Down Expand Up @@ -49,14 +51,6 @@ def toKindleDb(self):
tmpjson[tmpkey] = tmpvalue
return tmpjson

# Returns a list of collection names containing a given filehash
def search(self, filehash):
cols = list()
for collection in self:
if self[collection].has_hash(filehash):
cols.append(collection)
return cols

def in_collection(self, collection, filehash):
if self[collection].has_hash(filehash):
return True
Expand All @@ -67,6 +61,29 @@ def add_filehash(self, collection, filehash):
filehash = '*'+filehash
self[collection]['items'].append(filehash)

def add_asin(self, collection, asin, booktype):
asin = "#%s^%s" % (asin, booktype)
self[collection]['items'].append(asin)

class Ebook():
def __init__(self, path):
self.path = get_kindle_path(path)
self.hash = get_hash(self.path)
self.title = None
self.meta = None
self.asin = None
self.type = None
ext = os.path.splitext(path)[1][1:]
if ext in ['mobi', 'azw', 'prc']:
self.meta = ebook.Mobi(path)
self.title = self.meta.title
if 113 in self.meta.exth:
self.asin = self.meta.exth[113]
if 501 in self.meta.exth:
self.type = self.meta.exth[501]
if 503 in self.meta.exth:
self.title = self.meta.exth[503]

class Kindle:
'''Access a Kindle filesystem
'''
Expand All @@ -82,19 +99,26 @@ def init_data(self):

for path in self.files:
regex = re.compile(r'.*?/(%s)' % '|'.join(FOLDERS))
self.get_filenodes(self.filetree, re.sub(regex, r'\1', self.files[path]).split('/'))
self.get_filenodes(self.filetree, re.sub(regex, r'\1', self.files[path].path).split('/'))

def load_folder(self, path):
sys.stdout.write("Loading " + path)
for root, dirs, files in os.walk(os.path.join(self.root, path)):
for filename in files:
if os.path.splitext(filename)[1][1:] in FILTER:
kindlepath = self.get_kindle_path(root, filename)
filehash = self.get_hash(kindlepath)
self.files[filehash] = kindlepath
fullpath = os.path.abspath(os.path.join(root, filename))
book = Ebook(fullpath)
self.files[book.hash] = book
sys.stdout.write(".")
sys.stdout.write("\n")

def searchAsin(self, asin):
'''Returns the Ebook with asin
'''
for filehash in self.files:
if self.files[filehash].asin:
return self.files[filehash]

# Adds files to the dictionary: tree
def get_filenodes(self, tree, nodes):
if len(nodes) > 1:
Expand All @@ -106,24 +130,24 @@ def get_filenodes(self, tree, nodes):
tree['files'] = list()
tree['files'].append(nodes[0])

# Returns a full path on the kindle filesystem
def get_kindle_path(self, folder, filename):
return '/'.join([KINDLEROOT, re.sub(r'.*(documents|pictures)', r'\1', folder), filename]).replace('\\', '/')

# Returns a SHA-1 hash
def get_hash(self, path):
path = unicode(path).encode('utf-8')
return hashlib.sha1(path).hexdigest()

# Checks if the specified folder is a Kindle filestructure
def is_connected(self):
docs = os.path.exists(os.path.join(self.root, 'documents'))
sys = os.path.exists(os.path.join(self.root, 'system'))
return docs and sys

def searchTitle(self, title):
matches = list()
for filehash in self.files:
if re.search(title, self.files[filehash], re.IGNORECASE):
matches.append((filehash, self.files[filehash]))
return matches
# Returns a full path on the kindle filesystem
def get_kindle_path(path):
path = os.path.normpath(path)
folder = os.path.dirname(path)
filename = os.path.basename(path)
return '/'.join([KINDLEROOT, re.sub(r'.*(documents|pictures)', r'\1', folder), filename]).replace('\\', '/')

# Returns a SHA-1 hash
def get_hash(path):
path = unicode(path).encode('utf-8')
return hashlib.sha1(path).hexdigest()

if __name__ == "__main__":
k = Kindle("Kindle")
k.init_data()

0 comments on commit 77fe0a2

Please sign in to comment.