Skip to content

Commit

Permalink
Parse PsfontMap entries on-demand.
Browse files Browse the repository at this point in the history
See previous commit for description of pdftex.map.  The vast majority
of entries (dozens of thousands) in pdftex.map actually end up being
unused, and their parsing is just wasted.  This patch takes advantage of
the fact that we can quickly recover the tex font name from pdftex.map
entries (it's just the first word), so we can very quickly build a
mapping of tex font names to unparsed pdftex.map entries, and then only
parse the few entries that we'll need on-demand.  This speeds up e.g.
```
python -c 'from pylab import *; rcParams["text.usetex"] = True; plot(); savefig("/tmp/test.pdf")'
```
by ~700ms (~20%) on the matplotlib macos.
  • Loading branch information
anntzer committed Feb 19, 2021
1 parent 6e7ed9d commit ba7f9fd
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 64 deletions.
121 changes: 60 additions & 61 deletions lib/matplotlib/dviread.py
Expand Up @@ -838,24 +838,30 @@ class PsfontsMap:
{'slant': 0.16700000000000001}
>>> entry.filename
"""
__slots__ = ('_font', '_filename')
__slots__ = ('_filename', '_unparsed', '_parsed')

# Create a filename -> PsfontsMap cache, so that calling
# `PsfontsMap(filename)` with the same filename a second time immediately
# returns the same object.
@lru_cache()
def __new__(cls, filename):
self = object.__new__(cls)
self._font = {}
self._filename = os.fsdecode(filename)
# Some TeX distributions have enormous pdftex.map files which would
# take hundreds of milliseconds to parse, but it is easy enough to just
# store the unparsed lines (keyed by the first word, which is the
# texname) and parse them on-demand.
with open(filename, 'rb') as file:
self._parse(file)
self._unparsed = {line.split(b' ', 1)[0]: line for line in file}
self._parsed = {}
return self

def __getitem__(self, texname):
assert isinstance(texname, bytes)
if texname in self._unparsed:
self._parse_and_cache_line(self._unparsed.pop(texname))
try:
result = self._font[texname]
return self._parsed[texname]
except KeyError:
fmt = ('A PostScript file for the font whose TeX name is "{0}" '
'could not be found in the file "{1}". The dviread module '
Expand All @@ -864,21 +870,14 @@ def __getitem__(self, texname):
'This problem can often be solved by installing '
'a suitable PostScript font package in your (TeX) '
'package manager.')
msg = fmt.format(texname.decode('ascii'), self._filename)
msg = textwrap.fill(msg, break_on_hyphens=False,
break_long_words=False)
_log.info(msg)
_log.info(textwrap.fill(
fmt.format(texname.decode('ascii'), self._filename),
break_on_hyphens=False, break_long_words=False))
raise
fn, enc = result.filename, result.encoding
if fn is not None and not fn.startswith(b'/'):
fn = find_tex_file(fn)
if enc is not None and not enc.startswith(b'/'):
enc = find_tex_file(result.encoding)
return result._replace(filename=fn, encoding=enc)

def _parse(self, file):

def _parse_and_cache_line(self, line):
"""
Parse the font mapping file.
Parse a line in the font mapping file.
The format is, AFAIK: texname fontname [effects and filenames]
Effects are PostScript snippets like ".177 SlantFont",
Expand All @@ -898,50 +897,50 @@ def _parse(self, file):
# http://tex.stackexchange.com/questions/10826/
# http://article.gmane.org/gmane.comp.tex.pdftex/4914

word_re = re.compile(br'"([^"]*)(?:"|$)|(\S+)')
for line in file:
if not line or line.startswith((b" ", b"%", b"*", b";", b"#")):
continue
tfmname = basename = special = encodingfile = fontfile = None
matches = word_re.finditer(line)
for match in matches:
quoted, unquoted = match.groups()
if unquoted:
if unquoted.startswith(b"<<"): # font
fontfile = unquoted[2:]
elif unquoted.startswith(b"<["): # encoding
encodingfile = unquoted[2:]
elif unquoted.startswith(b"<"): # font or encoding
if unquoted == b"<":
word = next(filter(None, next(matches).groups()))
if unquoted.endswith(b".enc"):
encodingfile = word
else:
fontfile = word
else:
if unquoted.endswith(b".enc"):
encodingfile = unquoted[1:]
else:
fontfile = unquoted[1:]
elif tfmname is None:
tfmname = unquoted
elif basename is None:
basename = unquoted
elif quoted:
special = quoted
if basename is None:
basename = tfmname
effects = {}
if special:
words = reversed(special.split())
for word in words:
if word == b"SlantFont":
effects["slant"] = float(next(words))
elif word == b"ExtendFont":
effects["extend"] = float(next(words))
self._font[tfmname] = PsFont(
texname=tfmname, psname=basename, effects=effects,
encoding=encodingfile, filename=fontfile)
if not line or line.startswith((b" ", b"%", b"*", b";", b"#")):
return
tfmname = basename = special = encodingfile = fontfile = None
matches = re.finditer(br'"([^"]*)(?:"|$)|(\S+)', line)
for match in matches:
quoted, unquoted = match.groups()
if unquoted:
if unquoted.startswith(b"<<"): # font
fontfile = unquoted[2:]
elif unquoted.startswith(b"<["): # encoding
encodingfile = unquoted[2:]
elif unquoted.startswith(b"<"): # font or encoding
word = (
# <foo => foo
unquoted[1:]
# < by itself => read the next word
or next(filter(None, next(matches).groups())))
if word.endswith(b".enc"):
encodingfile = word
else:
fontfile = word
elif tfmname is None:
tfmname = unquoted
elif basename is None:
basename = unquoted
elif quoted:
special = quoted
if basename is None:
basename = tfmname
effects = {}
if special:
words = reversed(special.split())
for word in words:
if word == b"SlantFont":
effects["slant"] = float(next(words))
elif word == b"ExtendFont":
effects["extend"] = float(next(words))
if encodingfile is not None and not encodingfile.startswith(b"/"):
encodingfile = find_tex_file(encodingfile)
if fontfile is not None and not fontfile.startswith(b"/"):
fontfile = find_tex_file(fontfile)
self._parsed[tfmname] = PsFont(
texname=tfmname, psname=basename, effects=effects,
encoding=encodingfile, filename=fontfile)


@_api.deprecated("3.3")
Expand Down
6 changes: 3 additions & 3 deletions lib/matplotlib/tests/baseline_images/dviread/test.map
@@ -1,10 +1,10 @@
% used by test_dviread.py
TeXfont1 PSfont1 <font1.pfb <font1.enc
TeXfont2 PSfont2 <font2.enc <font2.pfa
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo <font3.pfa
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo < font3.pfa
TeXfont4 PSfont4 "-0.1 SlantFont 2.2 ExtendFont" <font4.enc <font4.pfa
TeXfont5 PSfont5 <encoding1.enc <encoding2.enc <font5.pfb
TeXfont6 PSfont6
TeXfont7 PSfont7 <font7.enc
TeXfont8 PSfont8 <font8.pfb
TeXfont7 PSfont7 < font7.enc
TeXfont8 PSfont8 <<font8.pfb
TeXfont9 </absolute/font9.pfb

0 comments on commit ba7f9fd

Please sign in to comment.