support for DRM-d books

richardpeng · Nov 26, 2010 · 77fe0a2 · 77fe0a2
1 parent 6b1b51c
commit 77fe0a2
Show file tree

Hide file tree

Showing 3 changed files with 161 additions and 50 deletions.
diff --git a/Kindelabra.py b/Kindelabra.py
@@ -14,17 +14,16 @@
 import gtk
 import kindle
 
-VERSION = '0.1'
-FILTER = ['pdf', 'mobi', 'prc', 'txt', 'tpz', 'azw', 'manga']
+VERSION = '0.2'
 
 class KindleUI:
     '''Interface for manipulating a Kindle collection JSON file
     '''
     def __init__(self):
         self.root = os.getcwd()
-        self.filemodel = gtk.TreeStore(str, str)
+        self.filemodel = gtk.TreeStore(str, str, bool)
         self.fileview = self.get_view('Files', self.filemodel, 'fileview')
-        self.colmodel = gtk.TreeStore(str, str)
+        self.colmodel = gtk.TreeStore(str, str, str)
         self.colview = self.get_view('Collections', self.colmodel, 'colview')
 
         self.window = gtk.Window(gtk.WINDOW_TOPLEVEL)
@@ -112,14 +111,30 @@ def load(self, widget):
             else:
                 self.status("Kindle files not found")
 
-    def get_collections(self, colmodel):
+    def get_collections(self):
         for collection in self.db:
-            citer = colmodel.append(None, [collection, ""])
+            citer = self.colmodel.append(None, [collection, "", ""])
             for namehash in self.db[collection]['items']:
-                namehash = str(namehash.lstrip("*"))
+                if re.match('\*[\w]', namehash):
+                    namehash = str(namehash.lstrip("*"))
+                asin = re.match('\#(\w+)\^\w{4}', namehash)
+                if asin:
+                    asin = asin.group(1)
+                    book = self.kindle.searchAsin(asin)
+                    namehash = book.hash
                 if namehash in self.kindle.files:
-                    filename = os.path.basename(self.kindle.files[namehash])
-                    fiter = colmodel.append(citer, [filename, namehash])
+                    if self.kindle.files[namehash].title:
+                        filename = self.kindle.files[namehash].title
+                    else:
+                        filename = os.path.basename(self.kindle.files[namehash].path)
+                    if self.kindle.files[namehash].asin:
+                        asin = self.kindle.files[namehash].asin
+                    else:
+                        asin = ""
+                    fiter = self.colmodel.append(citer, [filename, namehash, asin])
+                    #if asin != "":
+                    #else:
+                    #for row in self.filemodel
 
     def add_collection(self, widget):
         (dialog, input_box) = self.collection_prompt("Add Collection", "New Collection name:")
@@ -131,7 +146,7 @@ def add_collection(self, widget):
         if colname == "":
             return
         if not colname in self.db:
-            coliter = self.colmodel.append(None, [colname, ""])
+            coliter = self.colmodel.append(None, [colname, "", ""])
             treesel = self.colview.get_selection()
             treesel.unselect_all()
             treesel.select_iter(coliter)
@@ -219,6 +234,16 @@ def get_path_value(self, model, row):
         piter = model[path].iter
         return model.get(piter, 0, 1)
 
+    def get_colpath_value(self, model, row):
+        if isinstance(row, gtk.TreeRowReference):
+            path = row.get_path()
+        elif isinstance(row, tuple):
+            path = row
+        else:
+            return None
+        piter = model[path].iter
+        return model.get(piter, 0, 1, 2)
+
     def get_hashes(self, filestore, filerows):
         filehashes = list()
         for row in filerows:
@@ -269,11 +294,15 @@ def add_file(self, widget):
             for colpath, colname in targetcols:
                 colname = unicode(colname)
                 if colname in self.db:
-                    if not self.db.in_collection(colname, filehash):
-                        colstore.append(colstore[colpath].iter, [filename, filehash])
-                        self.db.add_filehash(colname, filehash)
-                    else:
-                        self.status("%s is already in collection %s" % (filename, colname))
+                    try:
+                        asin = self.kindle.files[filehash].asin
+                        if not self.db.in_collection(colname, asin):
+                            colstore.append(colstore[colpath].iter, [filename, filehash, asin])
+                            self.db.add_asin(colname, self.kindle.files[filehash].asin, self.kindle.files[filehash].type)
+                    except TypeError:
+                        if not self.db.in_collection(colname, filehash):
+                            colstore.append(colstore[colpath].iter, [filename, filehash, ""])
+                            self.db.add_filehash(colname, filehash)
                 else:
                     self.status("No such collection:" + colname)
         self.colview.expand_all()
@@ -288,10 +317,16 @@ def del_file(self, widget):
         for row in range(len(ref)):
             gtkrow = ref[row]
             path = gtkrow.get_path()
-            (filename, filehash) = self.get_path_value(colstore, gtkrow)
-            collection = unicode(self.get_path_value(colstore, (path[0], ))[0])
-            jsonhash = '*' + filehash
-            if self.db[collection].has_hash(filehash):
+            (filename, filehash, asin) = self.get_colpath_value(colstore, gtkrow)
+            collection = unicode(self.get_colpath_value(colstore, (path[0], ))[0])
+            if asin != '':
+                book = self.kindle.searchAsin(asin)
+                asin = "#%s^%s" % (book.asin, book.type)
+                if self.db[collection].has_hash(asin):
+                    self.db[collection]['items'].remove(asin)
+                    colstore.remove(colstore[path].iter)
+            elif self.db[collection].has_hash(filehash):
+                jsonhash = '*' + filehash
                 self.db[collection]['items'].remove(jsonhash)
                 colstore.remove(colstore[path].iter)
             else:
@@ -316,7 +351,7 @@ def get_view(self, title, model, name):
     def revert(self, widget):
         self.db = kindle.CollectionDB(self.colfile)
         self.colmodel.clear()
-        self.get_collections(self.colmodel)
+        self.get_collections()
         self.colview.expand_all()
         self.status("Kindle collections reloaded")
 
@@ -344,10 +379,12 @@ def get_files(self, filemodel, tree, piter=None, path=""):
         for node in tree:
             if node == 'files':
                 for filename in tree['files']:
-                    filehash = self.kindle.get_hash('/mnt/us' + '/'.join([path, filename]))
-                    filemodel.append(piter, [filename, filehash])
+                    filehash = kindle.get_hash('/mnt/us' + '/'.join([path, filename]))
+                    if filehash in self.kindle.files and self.kindle.files[filehash].title:
+                        filename = self.kindle.files[filehash].title
+                    filemodel.append(piter, [filename, filehash, False])
             else:
-                niter = filemodel.append(piter, [node, ""])
+                niter = filemodel.append(piter, [node, "", False])
                 self.get_files(filemodel, tree[node], niter, '/'.join([path,node]))
 
     def refresh(self, widget):

diff --git a/ebook.py b/ebook.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+#author:Richard Peng
+#project:Kindelabra
+#website:http://www.richardpeng.com/projects/kindelabra/
+#repository:https://github.com/richardpeng/Kindelabra
+#license:Creative Commons GNU GPL v2
+# (http://creativecommons.org/licenses/GPL/2.0/)
+
+import struct
+
+class Sectionizer:
+    def __init__(self, filename, perm):
+        self.f = file(filename, perm)
+        header = self.f.read(78)
+        self.ident = header[0x3C:0x3C+8]
+        if self.ident != 'BOOKMOBI' and self.ident != 'TEXtREAd':
+            raise ValueError('invalid file format')
+        num_sections, = struct.unpack_from('>H', header, 76)
+        sections = self.f.read(num_sections*8)
+        self.sections = struct.unpack_from('>%dL' % (num_sections*2), sections, 0)[::2] + (0xfffffff, )
+
+    def loadSection(self, section):
+        before, after = self.sections[section:section+2]
+        self.f.seek(before)
+        return self.f.read(after - before)
+
+class Mobi:
+    def __init__(self, filename):
+        sections = Sectionizer(filename, 'rb')
+        header = sections.loadSection(0)
+        len_mobi = struct.unpack_from('>L', header, 20)[0] + 16
+        mobi_raw = header[:len_mobi]
+        titleoffset, titlelen = struct.unpack_from('>LL', mobi_raw, 84)
+        self.title = header[titleoffset:titleoffset+titlelen]
+        len_exth, = struct.unpack_from('>L', header, len_mobi+4)
+        exth_records = header[len_mobi:len_mobi+len_exth][12:]
+        self.exth = dict()
+        while len(exth_records) > 8:
+            rectype, reclen = struct.unpack_from('>LL', exth_records)
+            recdata = exth_records[8:reclen]
+            self.exth[rectype] = recdata
+            exth_records = exth_records[reclen:]
+
+def main():
+    m = Mobi('book.azw')
+    if 113 in m.exth:
+        print m.exth[113]
+
+if __name__ == "__main__":
+    main()
diff --git a/kindle.py b/kindle.py
@@ -12,6 +12,8 @@
 import json
 import sys
 
+import ebook
+
 KINDLEROOT = '/mnt/us'
 FILTER = ['pdf', 'mobi', 'prc', 'txt', 'tpz', 'azw', 'manga']
 FOLDERS = ['documents', 'pictures']
@@ -49,14 +51,6 @@ def toKindleDb(self):
             tmpjson[tmpkey] = tmpvalue
         return tmpjson
 
-    # Returns a list of collection names containing a given filehash
-    def search(self, filehash):
-        cols = list()
-        for collection in self:
-            if self[collection].has_hash(filehash):
-                cols.append(collection)
-        return cols
-
     def in_collection(self, collection, filehash):
         if self[collection].has_hash(filehash):
             return True
@@ -67,6 +61,29 @@ def add_filehash(self, collection, filehash):
         filehash = '*'+filehash
         self[collection]['items'].append(filehash)
 
+    def add_asin(self, collection, asin, booktype):
+        asin = "#%s^%s" % (asin, booktype)
+        self[collection]['items'].append(asin)
+
+class Ebook():
+    def __init__(self, path):
+        self.path = get_kindle_path(path)
+        self.hash = get_hash(self.path)
+        self.title = None
+        self.meta = None
+        self.asin = None
+        self.type = None
+        ext = os.path.splitext(path)[1][1:]
+        if ext in ['mobi', 'azw', 'prc']:
+            self.meta = ebook.Mobi(path)
+            self.title = self.meta.title
+            if 113 in self.meta.exth:
+                self.asin = self.meta.exth[113]
+            if 501 in self.meta.exth:
+                self.type = self.meta.exth[501]
+            if 503 in self.meta.exth:
+                self.title = self.meta.exth[503]
+
 class Kindle:
     '''Access a Kindle filesystem
     '''
@@ -82,19 +99,26 @@ def init_data(self):
 
             for path in self.files:
                 regex = re.compile(r'.*?/(%s)' % '|'.join(FOLDERS))
-                self.get_filenodes(self.filetree, re.sub(regex, r'\1', self.files[path]).split('/'))
+                self.get_filenodes(self.filetree, re.sub(regex, r'\1', self.files[path].path).split('/'))
 
     def load_folder(self, path):
         sys.stdout.write("Loading " + path)
         for root, dirs, files in os.walk(os.path.join(self.root, path)):
             for filename in files:
                 if os.path.splitext(filename)[1][1:] in FILTER:
-                    kindlepath = self.get_kindle_path(root, filename)
-                    filehash = self.get_hash(kindlepath)
-                    self.files[filehash] = kindlepath
+                    fullpath = os.path.abspath(os.path.join(root, filename))
+                    book = Ebook(fullpath)
+                    self.files[book.hash] = book
                     sys.stdout.write(".")
         sys.stdout.write("\n")
 
+    def searchAsin(self, asin):
+        '''Returns the Ebook with asin
+        '''
+        for filehash in self.files:
+            if self.files[filehash].asin:
+                return self.files[filehash]
+
     # Adds files to the dictionary: tree
     def get_filenodes(self, tree, nodes):
         if len(nodes) > 1:
@@ -106,24 +130,24 @@ def get_filenodes(self, tree, nodes):
                 tree['files'] = list()
             tree['files'].append(nodes[0])
 
-    # Returns a full path on the kindle filesystem
-    def get_kindle_path(self, folder, filename):
-        return '/'.join([KINDLEROOT, re.sub(r'.*(documents|pictures)', r'\1', folder), filename]).replace('\\', '/')
-
-    # Returns a SHA-1 hash
-    def get_hash(self, path):
-        path = unicode(path).encode('utf-8')
-        return hashlib.sha1(path).hexdigest()
-
     # Checks if the specified folder is a Kindle filestructure
     def is_connected(self):
         docs = os.path.exists(os.path.join(self.root, 'documents'))
         sys = os.path.exists(os.path.join(self.root, 'system'))
         return docs and sys
 
-    def searchTitle(self, title):
-        matches = list()
-        for filehash in self.files:
-            if re.search(title, self.files[filehash], re.IGNORECASE):
-                matches.append((filehash, self.files[filehash]))
-        return matches
+# Returns a full path on the kindle filesystem
+def get_kindle_path(path):
+    path = os.path.normpath(path)
+    folder = os.path.dirname(path)
+    filename = os.path.basename(path)
+    return '/'.join([KINDLEROOT, re.sub(r'.*(documents|pictures)', r'\1', folder), filename]).replace('\\', '/')
+
+# Returns a SHA-1 hash
+def get_hash(path):
+    path = unicode(path).encode('utf-8')
+    return hashlib.sha1(path).hexdigest()
+
+if __name__ == "__main__":
+    k = Kindle("Kindle")
+    k.init_data()