Skip to content

Commit

Permalink
Edit Book: Add a 'Links' report to the Reports tool
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Jan 24, 2015
1 parent 0aa5efc commit 2495790
Show file tree
Hide file tree
Showing 3 changed files with 252 additions and 33 deletions.
88 changes: 86 additions & 2 deletions src/calibre/ebooks/oeb/polish/report.py
Expand Up @@ -11,7 +11,7 @@
from itertools import chain

from calibre import prepare_string_for_xml, force_unicode
from calibre.ebooks.oeb.base import XPath
from calibre.ebooks.oeb.base import XPath, xml2text
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
from calibre.ebooks.oeb.polish.css import build_selector, PSEUDO_PAT, MIN_SPACE_RE
from calibre.ebooks.oeb.polish.spell import get_all_words
Expand Down Expand Up @@ -95,6 +95,90 @@ def images_data(container, book_locale):
posixpath.basename(name), len(image_data), *safe_img_data(container, name, mt)))
return tuple(image_data)

def description_for_anchor(elem):
def check(x, min_len=4):
if x:
x = x.strip()
if len(x) >= min_len:
return x[:30]

desc = check(elem.get('title'))
if desc is not None:
return desc
desc = check(elem.text)
if desc is not None:
return desc
if len(elem) > 0:
desc = check(elem[0].text)
if desc is not None:
return desc
# Get full text for tags that have only a few descendants
for i, x in enumerate(elem.iterdescendants('*')):
if i > 5:
break
else:
desc = check(xml2text(elem), min_len=1)
if desc is not None:
return desc

def create_anchor_map(root, pat, name):
ans = {}
for elem in pat(root):
anchor = elem.get('id') or elem.get('name')
if anchor and anchor not in ans:
ans[anchor] = (LinkLocation(name, elem.sourceline, anchor), description_for_anchor(elem))
return ans

Anchor = namedtuple('Anchor', 'id location text')
L = namedtuple('Link', 'location text is_external href path_ok anchor_ok anchor ok')
def Link(location, text, is_external, href, path_ok, anchor_ok, anchor):
if is_external:
ok = None
else:
ok = path_ok and anchor_ok
return L(location, text, is_external, href, path_ok, anchor_ok, anchor, ok)

def links_data(container, book_locale):
anchor_map = {}
links = []
anchor_pat = XPath('//*[@id or @name]')
link_pat = XPath('//h:a[@href]')
for name, mt in container.mime_map.iteritems():
if mt in OEB_DOCS:
root = container.parsed(name)
anchor_map[name] = create_anchor_map(root, anchor_pat, name)
for a in link_pat(root):
href = a.get('href')
text = description_for_anchor(a)
if href:
base, frag = href.partition('#')[0::2]
if frag and not base:
dest = name
else:
dest = safe_href_to_name(container, href, name)
location = LinkLocation(name, a.sourceline, href)
links.append((base, frag, dest, location, text))
else:
links.append(('', '', None, location, text))

for base, frag, dest, location, text in links:
if dest is None:
link = Link(location, text, True, base, True, True, Anchor(frag, None, None))
else:
if dest in anchor_map:
loc = LinkLocation(dest, None, None)
if frag:
anchor = anchor_map[dest].get(frag)
if anchor is None:
link = Link(location, text, False, dest, True, False, Anchor(frag, loc, None))
else:
link = Link(location, text, False, dest, True, True, Anchor(frag, *anchor))
else:
link = Link(location, text, False, dest, True, True, Anchor(None, loc, None))
else:
link = Link(location, text, False, dest, False, False, Anchor(frag, None, None))
yield link

Word = namedtuple('Word', 'id word locale usage')

def words_data(container, book_locale):
Expand Down Expand Up @@ -235,7 +319,7 @@ def matches_for_selector(selector, root):
def gather_data(container, book_locale):
timing = {}
data = {}
for x in 'files chars images words css'.split():
for x in 'files chars images links words css'.split():
st = time.time()
data[x] = globals()[x + '_data'](container, book_locale)
if isinstance(data[x], types.GeneratorType):
Expand Down
29 changes: 1 addition & 28 deletions src/calibre/gui2/tweak_book/completion/basic.py
Expand Up @@ -12,9 +12,9 @@
from PyQt5.Qt import QObject, pyqtSignal, Qt

from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import xml2text
from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_FONTS, name_to_href
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.report import description_for_anchor
from calibre.gui2 import is_gui_thread
from calibre.gui2.tweak_book import current_container, editors
from calibre.gui2.tweak_book.completion.utils import control, data, DataError
Expand Down Expand Up @@ -91,33 +91,6 @@ def complete_names(names_data, data_conn):
descriptions = {href:d(name) for name, href in nmap.iteritems()}
return items, descriptions, {}


def description_for_anchor(elem):
def check(x, min_len=4):
if x:
x = x.strip()
if len(x) >= min_len:
return x[:30]

desc = check(elem.get('title'))
if desc is not None:
return desc
desc = check(elem.text)
if desc is not None:
return desc
if len(elem) > 0:
desc = check(elem[0].text)
if desc is not None:
return desc
# Get full text for tags that have only a few descendants
for i, x in enumerate(elem.iterdescendants('*')):
if i > 5:
break
else:
desc = check(xml2text(elem), min_len=1)
if desc is not None:
return desc

def create_anchor_map(root):
ans = {}
for elem in root.xpath('//*[@id or @name]'):
Expand Down
168 changes: 165 additions & 3 deletions src/calibre/gui2/tweak_book/reports.py
Expand Up @@ -6,7 +6,7 @@
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'

import time
import time, textwrap, os
from threading import Thread
from future_builtins import map
from operator import itemgetter
Expand All @@ -22,12 +22,12 @@
QListWidgetItem, QLineEdit, QStackedWidget, QSplitter, QByteArray, QPixmap,
QStyledItemDelegate, QModelIndex, QRect, QStyle, QPalette, QTimer, QMenu,
QAbstractItemModel, QTreeView, QFont, QRadioButton, QHBoxLayout,
QFontDatabase, QComboBox)
QFontDatabase, QComboBox, QUrl, QWebView)

from calibre import human_readable, fit_image
from calibre.constants import DEBUG
from calibre.ebooks.oeb.polish.report import gather_data, CSSEntry, CSSFileMatch, MatchLocation
from calibre.gui2 import error_dialog, question_dialog, choose_save_file
from calibre.gui2 import error_dialog, question_dialog, choose_save_file, open_url
from calibre.gui2.tweak_book import current_container, tprefs, dictionaries
from calibre.gui2.tweak_book.widgets import Dialog
from calibre.gui2.progress_indicator import ProgressIndicator
Expand Down Expand Up @@ -112,6 +112,7 @@ class FilesView(QTableView):

double_clicked = pyqtSignal(object)
delete_requested = pyqtSignal(object, object)
current_changed = pyqtSignal(object, object)
DELETE_POSSIBLE = True

def __init__(self, model, parent=None):
Expand All @@ -126,6 +127,10 @@ def __init__(self, model, parent=None):
self.setContextMenuPolicy(Qt.CustomContextMenu)
self.customContextMenuRequested.connect(self.show_context_menu)

def currentChanged(self, current, previous):
QTableView.currentChanged(self, current, previous)
self.current_changed.emit(*map(self.proxy.mapToSource, (current, previous)))

def customize_context_menu(self, menu, selected_locations, current_location):
pass

Expand Down Expand Up @@ -479,6 +484,159 @@ def save(self):
self.files.save_table('image-files-table')
# }}}

# Links {{{

class LinksModel(FileCollection):

COLUMN_HEADERS = [_('OK'), _('Source'), _('Source text'), _('Target'), _('Anchor'), _('Target text')]

def __init__(self, parent=None):
FileCollection.__init__(self, parent)
self.num_bad = 0

def __call__(self, data):
self.beginResetModel()
self.links = self.files = data['links']
self.total_size = len(self.links)
self.num_bad = sum(1 for link in self.links if link.ok is False)
psk = numeric_sort_key
self.sort_keys = tuple((
link.ok, psk(link.location.name), psk(link.text or ''), psk(link.href or ''), psk(link.anchor.id or ''), psk(link.anchor.text or ''))
for link in self.links)
self.endResetModel()

def data(self, index, role=Qt.DisplayRole):
if role == SORT_ROLE:
try:
return self.sort_keys[index.row()][index.column()]
except IndexError:
pass
elif role == Qt.DisplayRole:
col = index.column()
try:
link = self.links[index.row()]
except IndexError:
return None
if col == 0:
return {True:'✓ ', False:'✗'}.get(link.ok)
if col == 1:
return link.location.name
if col == 2:
return link.text
if col == 3:
return link.href
if col == 4:
return link.anchor.id
if col == 5:
return link.anchor.text
elif role == Qt.ToolTipRole:
col = index.column()
try:
link = self.links[index.row()]
except IndexError:
return None
if col == 0:
return {True:_('The link destination exists'), False:_('The link destination does not exist')}.get(
link.ok, _('The link destination could not be verified'))
if col == 2:
if link.text:
return textwrap.fill(link.text)
if col == 5:
if link.anchor.text:
return textwrap.fill(link.anchor.text)
elif role == Qt.UserRole:
try:
return self.links[index.row()]
except IndexError:
pass

class WebView(QWebView):

def sizeHint(self):
return QSize(600, 200)

class LinksWidget(QWidget):

def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.l = l = QVBoxLayout(self)

self.filter_edit = e = QLineEdit(self)
l.addWidget(e)
self.splitter = s = QSplitter(Qt.Vertical, self)
l.addWidget(s)
e.setPlaceholderText(_('Filter'))
self.model = m = LinksModel(self)
self.links = f = FilesView(m, self)
f.DELETE_POSSIBLE = False
self.to_csv = f.to_csv
f.double_clicked.connect(self.double_clicked)
e.textChanged.connect(f.proxy.filter_text)
s.addWidget(f)
self.links.restore_table('links-table', sort_column=1)
self.view = WebView(self)
s.addWidget(self.view)
self.ignore_current_change = False
self.current_url = None
f.current_changed.connect(self.current_changed)
try:
s.restoreState(read_state('links-view-splitter'))
except TypeError:
pass
s.setCollapsible(0, False), s.setCollapsible(1, True)
s.setStretchFactor(0, 10)

def __call__(self, data):
self.ignore_current_change = True
self.model(data)
self.filter_edit.clear()
self.links.resize_rows()
self.view.setHtml('<p>'+_(
'Click entries above to see their destination here'))
self.ignore_current_change = False

def current_changed(self, current, previous):
link = current.data(Qt.UserRole)
if link is None:
return
url = None
if link.is_external:
if link.href:
frag = ('#' + link.anchor.id) if link.anchor.id else ''
url = QUrl(link.href + frag)
elif link.anchor.location:
path = current_container().name_to_abspath(link.anchor.location.name)
if path and os.path.exists(path):
url = QUrl.fromLocalFile(path)
if link.anchor.id:
url.setFragment(link.anchor.id)
if url is None:
self.view.setHtml('<p>' + _('No destination found for this link'))
self.current_url = url
elif url != self.current_url:
self.current_url = url
self.view.setUrl(url)

def double_clicked(self, index):
link = index.data(Qt.UserRole)
if link is None:
return
if index.column() < 3:
# Jump to source
jump_to_location(link.location)
else:
# Jump to destination
if link.is_external:
if link.href:
open_url(link.href)
elif link.anchor.location:
jump_to_location(link.anchor.location)

def save(self):
self.links.save_table('links-table')
save_state('links-view-splitter', bytearray(self.splitter.saveState()))
# }}}

# Words {{{

class WordsModel(FileCollection):
Expand Down Expand Up @@ -952,6 +1110,10 @@ def __init__(self, parent=None):
s.addWidget(c)
QListWidgetItem(_('Characters'), r)

self.links = li = LinksWidget(self)
s.addWidget(li)
QListWidgetItem(_('Links'), r)

self.splitter.setStretchFactor(1, 500)
try:
self.splitter.restoreState(read_state('splitter-state'))
Expand Down

0 comments on commit 2495790

Please sign in to comment.