lib/matplotlib/afm.py

"""
This is a python interface to Adobe Font Metrics Files.  Although a
number of other python implementations exist (and may be more complete
than mine) I decided not to go with them because either they were
either

  1) copyrighted or used a non-BSD compatible license

  2) had too many dependencies and I wanted a free standing lib

  3) Did more than I needed and it was easier to write my own than
     figure out how to just get what I needed from theirs

It is pretty easy to use, and requires only built-in python libs::

    >>> from afm import AFM
    >>> fh = open('ptmr8a.afm')
    >>> afm = AFM(fh)
    >>> afm.string_width_height('What the heck?')
    (6220.0, 683)
    >>> afm.get_fontname()
    'Times-Roman'
    >>> afm.get_kern_dist('A', 'f')
    0
    >>> afm.get_kern_dist('A', 'y')
    -92.0
    >>> afm.get_bbox_char('!')
    [130, -9, 238, 676]
    >>> afm.get_bbox_font()
    [-168, -218, 1000, 898]


AUTHOR:
  John D. Hunter <jdh2358@gmail.com>
"""

from __future__ import print_function

import sys
import os
import re
from _mathtext_data import uni2type1

#Convert string the a python type

# some afm files have floats where we are expecting ints -- there is
# probably a better way to handle this (support floats, round rather
# than truncate).  But I don't know what the best approach is now and
# this change to _to_int should at least prevent mpl from crashing on
# these JDH (2009-11-06)


def _to_int(x):
    return int(float(x))

_to_float = float
if sys.version_info[0] >= 3:
    def _to_str(x):
        return x.decode('utf8')
else:
    _to_str = str


def _to_list_of_ints(s):
    s = s.replace(b',', b' ')
    return [_to_int(val) for val in s.split()]


def _to_list_of_floats(s):
    return [_to_float(val) for val in s.split()]


def _to_bool(s):
    if s.lower().strip() in (b'false', b'0', b'no'):
        return False
    else:
        return True


def _sanity_check(fh):
    """
    Check if the file at least looks like AFM.
    If not, raise :exc:`RuntimeError`.
    """

    # Remember the file position in case the caller wants to
    # do something else with the file.
    pos = fh.tell()
    try:
        line = fh.readline()
    finally:
        fh.seek(pos, 0)

    # AFM spec, Section 4: The StartFontMetrics keyword [followed by a
    # version number] must be the first line in the file, and the
    # EndFontMetrics keyword must be the last non-empty line in the
    # file. We just check the first line.
    if not line.startswith(b'StartFontMetrics'):
        raise RuntimeError('Not an AFM file')


def _parse_header(fh):
    """
    Reads the font metrics header (up to the char metrics) and returns
    a dictionary mapping *key* to *val*.  *val* will be converted to the
    appropriate python type as necessary; e.g.:

        * 'False'->False
        * '0'->0
        * '-168 -218 1000 898'-> [-168, -218, 1000, 898]

    Dictionary keys are

      StartFontMetrics, FontName, FullName, FamilyName, Weight,
      ItalicAngle, IsFixedPitch, FontBBox, UnderlinePosition,
      UnderlineThickness, Version, Notice, EncodingScheme, CapHeight,
      XHeight, Ascender, Descender, StartCharMetrics

    """
    headerConverters = {
        b'StartFontMetrics': _to_float,
        b'FontName': _to_str,
        b'FullName': _to_str,
        b'FamilyName': _to_str,
        b'Weight': _to_str,
        b'ItalicAngle': _to_float,
        b'IsFixedPitch': _to_bool,
        b'FontBBox': _to_list_of_ints,
        b'UnderlinePosition': _to_int,
        b'UnderlineThickness': _to_int,
        b'Version': _to_str,
        b'Notice': _to_str,
        b'EncodingScheme': _to_str,
        b'CapHeight': _to_float,  # Is the second version a mistake, or
        b'Capheight': _to_float,  # do some AFM files contain 'Capheight'? -JKS
        b'XHeight': _to_float,
        b'Ascender': _to_float,
        b'Descender': _to_float,
        b'StdHW': _to_float,
        b'StdVW': _to_float,
        b'StartCharMetrics': _to_int,
        b'CharacterSet': _to_str,
        b'Characters': _to_int,
        }

    d = {}
    while 1:
        line = fh.readline()
        if not line:
            break
        line = line.rstrip()
        if line.startswith(b'Comment'):
            continue
        lst = line.split(b' ', 1)
        #print '%-s\t%-d line :: %-s' % ( fh.name, len(lst), line )
        key = lst[0]
        if len(lst) == 2:
            val = lst[1]
        else:
            val = b''
        #key, val = line.split(' ', 1)
        try:
            d[key] = headerConverters[key](val)
        except ValueError:
            print('Value error parsing header in AFM:',
                  key, val, file=sys.stderr)
            continue
        except KeyError:
            print('Found an unknown keyword in AFM header (was %s)' % key,
                file=sys.stderr)
            continue
        if key == b'StartCharMetrics':
            return d
    raise RuntimeError('Bad parse')


def _parse_char_metrics(fh):
    """
    Return a character metric dictionary.  Keys are the ASCII num of
    the character, values are a (*wx*, *name*, *bbox*) tuple, where
    *wx* is the character width, *name* is the postscript language
    name, and *bbox* is a (*llx*, *lly*, *urx*, *ury*) tuple.

    This function is incomplete per the standard, but thus far parses
    all the sample afm files tried.
    """

    ascii_d = {}
    name_d = {}
    while 1:
        line = fh.readline()
        if not line:
            break
        line = line.rstrip()
        if line.startswith(b'EndCharMetrics'):
            return ascii_d, name_d
        vals = line.split(b';')[:4]
        if len(vals) != 4:
            raise RuntimeError('Bad char metrics line: %s' % line)
        num = _to_int(vals[0].split()[1])
        wx = _to_float(vals[1].split()[1])
        name = vals[2].split()[1]
        name = name.decode('ascii')
        bbox = _to_list_of_floats(vals[3][2:])
        bbox = map(int, bbox)
        # Workaround: If the character name is 'Euro', give it the
        # corresponding character code, according to WinAnsiEncoding (see PDF
        # Reference).
        if name == 'Euro':
            num = 128
        if num != -1:
            ascii_d[num] = (wx, name, bbox)
        name_d[name] = (wx, bbox)
    raise RuntimeError('Bad parse')


def _parse_kern_pairs(fh):
    """
    Return a kern pairs dictionary; keys are (*char1*, *char2*) tuples and
    values are the kern pair value.  For example, a kern pairs line like
    ``KPX A y -50``

    will be represented as::

      d[ ('A', 'y') ] = -50

    """

    line = fh.readline()
    if not line.startswith(b'StartKernPairs'):
        raise RuntimeError('Bad start of kern pairs data: %s' % line)

    d = {}
    while 1:
        line = fh.readline()
        if not line:
            break
        line = line.rstrip()
        if len(line) == 0:
            continue
        if line.startswith(b'EndKernPairs'):
            fh.readline()  # EndKernData
            return d
        vals = line.split()
        if len(vals) != 4 or vals[0] != b'KPX':
            raise RuntimeError('Bad kern pairs line: %s' % line)
        c1, c2, val = _to_str(vals[1]), _to_str(vals[2]), _to_float(vals[3])
        d[(c1, c2)] = val
    raise RuntimeError('Bad kern pairs parse')


def _parse_composites(fh):
    """
    Return a composites dictionary.  Keys are the names of the
    composites.  Values are a num parts list of composite information,
    with each element being a (*name*, *dx*, *dy*) tuple.  Thus a
    composites line reading:

      CC Aacute 2 ; PCC A 0 0 ; PCC acute 160 170 ;

    will be represented as::

      d['Aacute'] = [ ('A', 0, 0), ('acute', 160, 170) ]

    """
    d = {}
    while 1:
        line = fh.readline()
        if not line:
            break
        line = line.rstrip()
        if len(line) == 0:
            continue
        if line.startswith(b'EndComposites'):
            return d
        vals = line.split(b';')
        cc = vals[0].split()
        name, numParts = cc[1], _to_int(cc[2])
        pccParts = []
        for s in vals[1:-1]:
            pcc = s.split()
            name, dx, dy = pcc[1], _to_float(pcc[2]), _to_float(pcc[3])
            pccParts.append((name, dx, dy))
        d[name] = pccParts

    raise RuntimeError('Bad composites parse')


def _parse_optional(fh):
    """
    Parse the optional fields for kern pair data and composites

    return value is a (*kernDict*, *compositeDict*) which are the
    return values from :func:`_parse_kern_pairs`, and
    :func:`_parse_composites` if the data exists, or empty dicts
    otherwise
    """
    optional = {
        b'StartKernData': _parse_kern_pairs,
        b'StartComposites':  _parse_composites,
        }

    d = {b'StartKernData': {}, b'StartComposites': {}}
    while 1:
        line = fh.readline()
        if not line:
            break
        line = line.rstrip()
        if len(line) == 0:
            continue
        key = line.split()[0]

        if key in optional:
            d[key] = optional[key](fh)

    l = (d[b'StartKernData'], d[b'StartComposites'])
    return l


def parse_afm(fh):
    """
    Parse the Adobe Font Metics file in file handle *fh*. Return value
    is a (*dhead*, *dcmetrics*, *dkernpairs*, *dcomposite*) tuple where
    *dhead* is a :func:`_parse_header` dict, *dcmetrics* is a
    :func:`_parse_composites` dict, *dkernpairs* is a
    :func:`_parse_kern_pairs` dict (possibly {}), and *dcomposite* is a
    :func:`_parse_composites` dict (possibly {})
    """
    _sanity_check(fh)
    dhead = _parse_header(fh)
    dcmetrics_ascii, dcmetrics_name = _parse_char_metrics(fh)
    doptional = _parse_optional(fh)
    return dhead, dcmetrics_ascii, dcmetrics_name, doptional[0], doptional[1]


class AFM(object):

    def __init__(self, fh):
        """
        Parse the AFM file in file object *fh*
        """
        (dhead, dcmetrics_ascii, dcmetrics_name, dkernpairs, dcomposite) = \
            parse_afm(fh)
        self._header = dhead
        self._kern = dkernpairs
        self._metrics = dcmetrics_ascii
        self._metrics_by_name = dcmetrics_name
        self._composite = dcomposite

    def get_bbox_char(self, c, isord=False):
        if not isord:
            c = ord(c)
        wx, name, bbox = self._metrics[c]
        return bbox

    def string_width_height(self, s):
        """
        Return the string width (including kerning) and string height
        as a (*w*, *h*) tuple.
        """
        if not len(s):
            return 0, 0
        totalw = 0
        namelast = None
        miny = 1e9
        maxy = 0
        for c in s:
            if c == '\n':
                continue
            wx, name, bbox = self._metrics[ord(c)]
            l, b, w, h = bbox

            # find the width with kerning
            try:
                kp = self._kern[(namelast, name)]
            except KeyError:
                kp = 0
            totalw += wx + kp

            # find the max y
            thismax = b + h
            if thismax > maxy:
                maxy = thismax

            # find the min y
            thismin = b
            if thismin < miny:
                miny = thismin
            namelast = name

        return totalw, maxy - miny

    def get_str_bbox_and_descent(self, s):
        """
        Return the string bounding box
        """
        if not len(s):
            return 0, 0, 0, 0
        totalw = 0
        namelast = None
        miny = 1e9
        maxy = 0
        left = 0
        if not isinstance(s, unicode):
            s = s.decode('ascii')
        for c in s:
            if c == '\n':
                continue
            name = uni2type1.get(ord(c), 'question')
            try:
                wx, bbox = self._metrics_by_name[name]
            except KeyError:
                name = 'question'
                wx, bbox = self._metrics_by_name[name]
            l, b, w, h = bbox
            if l < left:
                left = l
            # find the width with kerning
            try:
                kp = self._kern[(namelast, name)]
            except KeyError:
                kp = 0
            totalw += wx + kp

            # find the max y
            thismax = b + h
            if thismax > maxy:
                maxy = thismax

            # find the min y
            thismin = b
            if thismin < miny:
                miny = thismin
            namelast = name

        return left, miny, totalw, maxy - miny, -miny

    def get_str_bbox(self, s):
        """
        Return the string bounding box
        """
        return self.get_str_bbox_and_descent(s)[:4]

    def get_name_char(self, c, isord=False):
        """
        Get the name of the character, ie, ';' is 'semicolon'
        """
        if not isord:
            c = ord(c)
        wx, name, bbox = self._metrics[c]
        return name

    def get_width_char(self, c, isord=False):
        """
        Get the width of the character from the character metric WX
        field
        """
        if not isord:
            c = ord(c)
        wx, name, bbox = self._metrics[c]
        return wx

    def get_width_from_char_name(self, name):
        """
        Get the width of the character from a type1 character name
        """
        wx, bbox = self._metrics_by_name[name]
        return wx

    def get_height_char(self, c, isord=False):
        """
        Get the height of character *c* from the bounding box.  This
        is the ink height (space is 0)
        """
        if not isord:
            c = ord(c)
        wx, name, bbox = self._metrics[c]
        return bbox[-1]

    def get_kern_dist(self, c1, c2):
        """
        Return the kerning pair distance (possibly 0) for chars *c1*
        and *c2*
        """
        name1, name2 = self.get_name_char(c1), self.get_name_char(c2)
        return self.get_kern_dist_from_name(name1, name2)

    def get_kern_dist_from_name(self, name1, name2):
        """
        Return the kerning pair distance (possibly 0) for chars
        *name1* and *name2*
        """
        try:
            return self._kern[(name1, name2)]
        except:
            return 0

    def get_fontname(self):
        "Return the font name, e.g., 'Times-Roman'"
        return self._header[b'FontName']

    def get_fullname(self):
        "Return the font full name, e.g., 'Times-Roman'"
        name = self._header.get(b'FullName')
        if name is None:  # use FontName as a substitute
            name = self._header[b'FontName']
        return name

    def get_familyname(self):
        "Return the font family name, e.g., 'Times'"
        name = self._header.get(b'FamilyName')
        if name is not None:
            return name

        # FamilyName not specified so we'll make a guess
        name = self.get_fullname()
        extras = br'(?i)([ -](regular|plain|italic|oblique|bold|semibold|light|ultralight|extra|condensed))+$'
        return re.sub(extras, '', name)

    def get_weight(self):
        "Return the font weight, e.g., 'Bold' or 'Roman'"
        return self._header[b'Weight']

    def get_angle(self):
        "Return the fontangle as float"
        return self._header[b'ItalicAngle']

    def get_capheight(self):
        "Return the cap height as float"
        return self._header[b'CapHeight']

    def get_xheight(self):
        "Return the xheight as float"
        return self._header[b'XHeight']

    def get_underline_thickness(self):
        "Return the underline thickness as float"
        return self._header[b'UnderlineThickness']

    def get_horizontal_stem_width(self):
        """
        Return the standard horizontal stem width as float, or *None* if
        not specified in AFM file.
        """
        return self._header.get(b'StdHW', None)

    def get_vertical_stem_width(self):
        """
        Return the standard vertical stem width as float, or *None* if
        not specified in AFM file.
        """
        return self._header.get(b'StdVW', None)


if __name__ == '__main__':
    #pathname = '/usr/local/lib/R/afm/'
    pathname = '/usr/local/share/fonts/afms/adobe'

    for fname in os.listdir(pathname):
        with open(os.path.join(pathname, fname)) as fh:
            afm = AFM(fh)
            w, h = afm.string_width_height('John Hunter is the Man!')