Skip to content

Commit

Permalink
DOCX Input: Add support for cross references. Cross references that a…
Browse files Browse the repository at this point in the history
…re used to insert hyperlinks are now converted into hyperlinks in the output document. Features to insert paragraph/page numbers are not supported.
  • Loading branch information
kovidgoyal committed May 8, 2014
1 parent d3c67c7 commit 24299b1
Showing 1 changed file with 36 additions and 15 deletions.
51 changes: 36 additions & 15 deletions src/calibre/ebooks/docx/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def parse(raw, log=None):
' f:entry-type g:page-range-separator h:heading k:crossref-separator'
' l:page-number-separator p:letter-range s:sequence-name r:run-together y:yomi z:langcode')

parse_ref = parser('ref',
'd:separator f:footnote h:hyperlink n:number p:position r:relative-number t:suppress w:number-full-context')

parse_noteref = parser('noteref',
'f:footnote h:hyperlink p:position')


class Fields(object):

def __init__(self):
Expand Down Expand Up @@ -117,7 +124,7 @@ def __call__(self, doc, log):
if stack:
stack[-1].contents.append(elem)

field_types = ('hyperlink', 'xe', 'index')
field_types = ('hyperlink', 'xe', 'index', 'ref', 'noteref')
parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types}
field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types}

Expand All @@ -131,28 +138,42 @@ def __call__(self, doc, log):
if func is not None:
func(field, field_parsers[field.name], log)

def get_runs(self, field):
all_runs = []
current_runs = []
# We only handle spans in a single paragraph
# being wrapped in <a>
for x in field.contents:
if x.tag.endswith('}p'):
if current_runs:
all_runs.append(current_runs)
current_runs = []
elif x.tag.endswith('}r'):
current_runs.append(x)
if current_runs:
all_runs.append(current_runs)
return all_runs

def parse_hyperlink(self, field, parse_func, log):
# Parse hyperlink fields
hl = parse_func(field.instructions, log)
if hl:
if 'target' in hl and hl['target'] is None:
hl['target'] = '_blank'
all_runs = []
current_runs = []
# We only handle spans in a single paragraph
# being wrapped in <a>
for x in field.contents:
if x.tag.endswith('}p'):
if current_runs:
all_runs.append(current_runs)
current_runs = []
elif x.tag.endswith('}r'):
current_runs.append(x)
if current_runs:
all_runs.append(current_runs)
for runs in all_runs:
for runs in self.get_runs(field):
self.hyperlink_fields.append((hl, runs))

def parse_ref(self, field, parse_func, log):
ref = parse_func(field.instructions, log)
dest = ref.get(None, None)
if dest is not None and 'hyperlink' in ref:
for runs in self.get_runs(field):
self.hyperlink_fields.append(({'anchor':dest}, runs))
else:
self.log.warn('Unsupported reference field (%s), ignoring: %r' % (field.name, ref))

parse_noteref = parse_ref

def parse_xe(self, field, parse_func, log):
# Parse XE fields
if None in (field.start, field.end):
Expand Down

0 comments on commit 24299b1

Please sign in to comment.