lib/prawn/fonts/ttf.rb

# frozen_string_literal: true

# prawn/font/ttf.rb : Implements AFM font support for Prawn
#
# Copyright May 2008, Gregory Brown / James Healy / Jamis Buck
# All Rights Reserved.
#
# This is free software. Please see the LICENSE and COPYING files for details.

require 'ttfunk'
require 'ttfunk/subset_collection'
require_relative 'to_unicode_cmap'

module Prawn
  module Fonts
    # TrueType font.
    #
    # @note You shouldn't use this class directly.
    class TTF < Font
      # TrueType font error.
      class Error < StandardError
        # @private
        DEFAULT_MESSAGE = 'TTF font error'

        # @private
        MESSAGE_WITH_FONT = 'TTF font error in font %<font>s'

        def initialize(message = DEFAULT_MESSAGE, font: nil)
          if font && message == DEFAULT_MESSAGE
            super(format(MESSAGE_WITH_FONT, font: font))
          else
            super(message)
          end
        end
      end

      # Signals absence of a Unicode character map in the font.
      class NoUnicodeCMap < Error
        # @private
        DEFAULT_MESSAGE = 'No unicode cmap found in font'

        # @private
        MESSAGE_WITH_FONT = 'No unicode cmap found in font %<font>s'
      end

      # Signals absense of a PostScript font name.
      class NoPostscriptName < Error
        # @private
        DEFAULT_MESSAGE = 'Can not detect a postscript name'

        # @private
        MESSAGE_WITH_FONT = 'Can not detect a postscript name in font %<font>s'
      end

      # TTFunk font.
      # @return [TTFunk::File]
      attr_reader :ttf
      attr_reader :subsets

      # Does this font support Unicode?
      #
      # @return [true]
      def unicode?
        true
      end

      # An adapter for subset collection to represent a full font.
      #
      # @private
      class FullFontSubsetsCollection
        FULL_FONT = Object.new.tap do |obj|
          obj.singleton_class.define_method(:inspect) do
            super().insert(-2, ' FULL_FONT')
          end
        end.freeze

        def initialize(original)
          @original = original

          (@cmap ||= original.cmap.unicode.first) || raise(NoUnicodeCMap.new(font: name))

          @code_space_size =
            case cmap.code_map.keys.max
            when 0..0xff then 1
            when 0x100..0xffff then 2
            when 0x10000..0xffffff then 3
            else
              4
            end

          # Codespaces are not sequentional, they're ranges in
          # a multi-dimentional space. Each byte is considered separately. So we
          # have to maximally extend the lower two bytes in order to allow for
          # continuos Unicode mapping.
          # We only keep the highest byte because Unicode only goes to 1FFFFF
          # and fonts usually cover even less of the space. We don't want to
          # list all those unmapped charac codes here.
          @code_space_max = cmap.code_map.keys.max | ('ff' * (code_space_size - 1)).to_i(16)
        end

        # Encode characters.
        #
        # @return [Array<Array(FULL_FONT, String)>]
        def encode(characters)
          [
            [
              FULL_FONT,
              characters.map { |c|
                check_bounds!(c)
                [cmap[c]].pack('n')
              }.join(''),
            ],
          ]
        end

        private

        attr_reader :cmap
        attr_reader :code_space_size
        attr_reader :code_space_max

        def check_bounds!(num)
          if num > code_space_max
            raise Error, "CID (#{num}) exceedes code space size"
          end
        end
      end

      # @param document [Prawn::Document]
      # @param name [String] font file path
      # @param options [Hash]
      # @option options :family [String]
      # @option options :style [Symbol]
      def initialize(document, name, options = {})
        super

        @ttf = read_ttf_file
        @subsets =
          if full_font_embedding
            FullFontSubsetsCollection.new(@ttf)
          else
            TTFunk::SubsetCollection.new(@ttf)
          end
        @italic_angle = nil

        @attributes = {}
        @bounding_boxes = {}
        @char_widths = {}
        @has_kerning_data = @ttf.kerning.exists? && @ttf.kerning.tables.any?

        @ascender = Integer(@ttf.ascent * scale_factor)
        @descender = Integer(@ttf.descent * scale_factor)
        @line_gap = Integer(@ttf.line_gap * scale_factor)
      end

      # Compute width of a string at the specified size, optionally with kerning
      # applied.
      #
      # @param string [String] *must* be encoded as UTF-8
      # @param options [Hash{Symbol => any}]
      # @option options :size [Number]
      # @option options :kerning [Boolean] (false)
      # @return [Number]
      def compute_width_of(string, options = {})
        scale = (options[:size] || size) / 1000.0
        if options[:kerning]
          kern(string).reduce(0) { |s, r|
            if r.is_a?(Numeric)
              s - r
            else
              r.reduce(s) { |a, e| a + character_width_by_code(e) }
            end
          } * scale
        else
          string.codepoints.reduce(0) { |s, r|
            s + character_width_by_code(r)
          } * scale
        end
      end

      # The font bbox.
      #
      # @return [Array(Number, Number, Number, Number)]
      def bbox
        @bbox ||= @ttf.bbox.map { |i| Integer(i * scale_factor) }
      end

      # Does this font contain kerning data.
      #
      # @return [Boolean]
      def has_kerning_data? # rubocop: disable Naming/PredicateName
        @has_kerning_data
      end

      # Perform any changes to the string that need to happen before it is
      # rendered to the canvas. Returns an array of subset "chunks", where the
      # even-numbered indices are the font subset number, and the following
      # entry element is either a string or an array (for kerned text).
      #
      # @param text [String] must be in UTF-8 encoding
      # @param options [Hash{Symbol => any}]
      # @option options :kerning [Boolean]
      # @return [Array<Array(0, (String, Array)>]
      def encode_text(text, options = {})
        text = text.chomp

        if options[:kerning]
          last_subset = nil
          kern(text).reduce([]) do |result, element|
            if element.is_a?(Numeric)
              unless result.last[1].is_a?(Array)
                result.last[1] = [result.last[1]]
              end
              result.last[1] << element
              result
            else
              encoded = @subsets.encode(element)

              if encoded.first[0] == last_subset
                result.last[1] << encoded.first[1]
                encoded.shift
              end

              if encoded.any?
                last_subset = encoded.last[0]
                result + encoded
              else
                result
              end
            end
          end
        else
          @subsets.encode(text.unpack('U*'))
        end
      end

      # Base name of the font.
      #
      # @return [String]
      def basename
        @basename ||= @ttf.name.postscript_name
      end

      # @devnote not sure how to compute this for true-type fonts...
      #
      # @private
      # @return [Number]
      def stem_v
        0
      end

      # @private
      # @return [Number]
      def italic_angle
        return @italic_angle if @italic_angle

        if @ttf.postscript.exists?
          raw = @ttf.postscript.italic_angle
          hi = raw >> 16
          low = raw & 0xFF
          hi = -((hi ^ 0xFFFF) + 1) if hi & 0x8000 != 0
          @italic_angle = Float("#{hi}.#{low}")
        else
          @italic_angle = 0
        end

        @italic_angle
      end

      # @private
      # @return [Number]
      def cap_height
        @cap_height ||=
          begin
            height = (@ttf.os2.exists? && @ttf.os2.cap_height) || 0
            height.zero? ? @ascender : height
          end
      end

      # @private
      # @return [number]
      def x_height
        # FIXME: seems like if os2 table doesn't exist, we could
        # just find the height of the lower-case 'x' glyph?
        (@ttf.os2.exists? && @ttf.os2.x_height) || 0
      end

      # @private
      # @return [Number]
      def family_class
        @family_class ||= ((@ttf.os2.exists? && @ttf.os2.family_class) || 0) >> 8
      end

      # @private
      # @return [Boolean]
      def serif?
        @serif ||= [1, 2, 3, 4, 5, 7].include?(family_class)
      end

      # @private
      # @return [Boolean]
      def script?
        @script ||= family_class == 10
      end

      # @private
      # @return [Integer]
      def pdf_flags
        @pdf_flags ||=
          begin
            flags = 0
            flags |= 0x0001 if @ttf.postscript.fixed_pitch?
            flags |= 0x0002 if serif?
            flags |= 0x0008 if script?
            flags |= 0x0040 if italic_angle != 0
            # Assume the font contains at least some non-latin characters
            flags | 0x0004
          end
      end

      # Normlize text to a compatible encoding.
      #
      # @param text [String]
      # @return [String]
      def normalize_encoding(text)
        text.encode(::Encoding::UTF_8)
      rescue StandardError
        raise Prawn::Errors::IncompatibleStringEncoding,
          "Encoding #{text.encoding} can not be transparently converted to UTF-8. " \
            'Please ensure the encoding of the string you are attempting to use is set correctly'
      end

      # Encode text to UTF-8.
      #
      # @param text [String]
      # @return [String]
      def to_utf8(text)
        text.encode('UTF-8')
      end

      # Does this font has a glyph for the character?
      #
      # @param char [String]
      # @return [Boolean]
      def glyph_present?(char)
        code = char.codepoints.first
        cmap[code].positive?
      end

      # Returns the number of characters in `str` (a UTF-8-encoded string).
      #
      # @param str [String]
      # @return [Integer]
      def character_count(str)
        str.length
      end

      private

      def cmap
        (@cmap ||= @ttf.cmap.unicode.first) || raise(NoUnicodeCMap.new(font: name))
      end

      # +string+ must be UTF8-encoded.
      #
      # Returns an array. If an element is a numeric, it represents the
      # kern amount to inject at that position. Otherwise, the element
      # is an array of UTF-16 characters.
      def kern(string)
        a = []

        string.each_codepoint do |r|
          if a.empty?
            a << [r]
          elsif (kern = kern_pairs_table[[cmap[a.last.last], cmap[r]]])
            kern *= scale_factor
            a << -kern << [r]
          else
            a.last << r
          end
        end

        a
      end

      def kern_pairs_table
        @kern_pairs_table ||=
          if has_kerning_data?
            @ttf.kerning.tables.first.pairs
          else
            {}
          end
      end

      def hmtx
        @hmtx ||= @ttf.horizontal_metrics
      end

      def character_width_by_code(code)
        return 0 unless cmap[code]

        # Some TTF fonts have nonzero widths for \n (UTF-8 / ASCII code: 10).
        # Patch around this as we'll never be drawing a newline with a width.
        return 0.0 if code == 10

        @char_widths[code] ||= Integer(hmtx.widths[cmap[code]] * scale_factor)
      end

      def scale_factor
        @scale_factor ||= 1000.0 / @ttf.header.units_per_em
      end

      def register(subset)
        temp_name = @ttf.name.postscript_name.delete("\0").to_sym
        ref = @document.ref!(Type: :Font, BaseFont: temp_name)

        # Embed the font metrics in the document after everything has been
        # drawn, just before the document is emitted.
        @document.renderer.before_render { |_doc| embed(ref, subset) }

        ref
      end

      def embed(reference, subset)
        if full_font_embedding
          embed_full_font(reference)
        else
          embed_subset(reference, subset)
        end
      end

      def embed_subset(reference, subset)
        font = TTFunk::File.new(@subsets[subset].encode)
        unicode_mapping = @subsets[subset].to_unicode_map
        embed_simple_font(reference, font, unicode_mapping)
      end

      def embed_simple_font(reference, font, unicode_mapping)
        if font_type(font) == :unknown
          raise Error, %(Simple font embedding is not uspported for font "#{font.name}.")
        end

        true_type = font_type(font) == :true_type
        open_type = font_type(font) == :open_type

        # empirically, it looks like Adobe Reader will not display fonts
        # if their font name is more than 33 bytes long. Strange. But true.
        basename = font.name.postscript_name[0, 33].delete("\0")

        raise NoPostscriptName.new(font: font) if basename.nil?

        fontfile = @document.ref!({})
        fontfile.data[:Length1] = font.contents.size
        fontfile.stream << font.contents.string
        fontfile.stream.compress! if @document.compression_enabled?

        descriptor = @document.ref!(
          Type: :FontDescriptor,
          FontName: basename.to_sym,
          FontBBox: bbox,
          Flags: pdf_flags,
          StemV: stem_v,
          ItalicAngle: italic_angle,
          Ascent: @ascender,
          Descent: @descender,
          CapHeight: cap_height,
          XHeight: x_height,
        )

        first_char, last_char = unicode_mapping.keys.minmax
        hmtx = font.horizontal_metrics
        widths =
          (first_char..last_char).map { |code|
            if unicode_mapping.key?(code)
              gid = font.cmap.tables.first.code_map[code]
              Integer(hmtx.widths[gid] * scale_factor)
            else
              # These characters are not in the document so we don't ever use
              # these values but we need to encode them so let's use as little
              # sapce as possible.
              0
            end
          }

        # It would be nice to have Encoding set for the macroman subsets,
        # and only do a ToUnicode cmap for non-encoded unicode subsets.
        # However, apparently Adobe Reader won't render MacRoman encoded
        # subsets if original font contains unicode characters. (It has to
        # be some flag or something that ttfunk is simply copying over...
        # but I can't figure out which flag that is.)
        #
        # For now, it's simplest to just create a unicode cmap for every font.
        # It offends my inner purist, but it'll do.

        to_unicode = @document.ref!({})
        to_unicode << ToUnicodeCMap.new(unicode_mapping).generate
        to_unicode.stream.compress! if @document.compression_enabled?

        reference.data.update(
          BaseFont: basename.to_sym,
          FontDescriptor: descriptor,
          FirstChar: first_char,
          LastChar: last_char,
          Widths: @document.ref!(widths),
          ToUnicode: to_unicode,
        )

        if true_type
          reference.data.update(Subtype: :TrueType)
          descriptor.data.update(FontFile2: fontfile)
        elsif open_type
          @document.renderer.min_version(1.6)
          reference.data.update(Subtype: :Type1)
          descriptor.data.update(FontFile3: fontfile)
          fontfile.data.update(Subtype: :OpenType)
        end
      end

      def embed_full_font(reference)
        embed_composite_font(reference, @ttf)
      end

      def embed_composite_font(reference, font)
        if font_type(font) == :unknown
          raise Error, %(Composite font embedding is not uspported for font "#{font.name}.")
        end

        true_type = font_type(font) == :true_type
        open_type = font_type(font) == :open_type

        fontfile = @document.ref!({})
        fontfile.data[:Length1] = font.contents.size if true_type
        fontfile.data[:Subtype] = :CIDFontType0C if open_type
        fontfile.stream << font.contents.string
        fontfile.stream.compress! if @document.compression_enabled?

        # empirically, it looks like Adobe Reader will not display fonts
        # if their font name is more than 33 bytes long. Strange. But true.
        basename = font.name.postscript_name[0, 33].delete("\0")

        descriptor = @document.ref!(
          Type: :FontDescriptor,
          FontName: basename.to_sym,
          FontBBox: bbox,
          Flags: pdf_flags,
          StemV: stem_v,
          ItalicAngle: italic_angle,
          Ascent: @ascender,
          Descent: @descender,
          CapHeight: cap_height,
          XHeight: x_height,
        )
        descriptor.data[:FontFile2] = fontfile if true_type
        descriptor.data[:FontFile3] = fontfile if open_type

        to_unicode = @document.ref!({})
        to_unicode << ToUnicodeCMap.new(
          font.cmap.unicode.first
          .code_map
          .reject { |cid, gid| gid.zero? || (0xd800..0xdfff).cover?(cid) }
          .invert
          .sort.to_h,
          2, # Identity-H is a 2-byte encoding
        ).generate
        to_unicode.stream.compress! if @document.compression_enabled?

        widths =
          font.horizontal_metrics.widths.map { |w| (w * scale_factor).round }

        child_font = @document.ref!(
          Type: :Font,
          BaseFont: basename.to_sym,
          CIDSystemInfo: {
            Registry: 'Adobe',
            Ordering: 'Identity',
            Supplement: 0,
          },
          FontDescriptor: descriptor,
          W: [0, widths],
        )
        if true_type
          child_font.data.update(
            Subtype: :CIDFontType2,
            CIDToGIDMap: :Identity,
          )
        end
        if open_type
          child_font.data[:Subtype] = :CIDFontType0
        end

        reference.data.update(
          Subtype: :Type0,
          BaseFont: basename.to_sym,
          Encoding: :'Identity-H',
          DescendantFonts: [child_font],
          ToUnicode: to_unicode,
        )
      end

      def font_type(font)
        if font.directory.tables.key?('glyf')
          :true_type
        elsif font.directory.tables.key?('CFF ')
          :open_type
        else
          :unknown
        end
      end

      def read_ttf_file
        TTFunk::File.open(@name)
      end
    end
  end
end