Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change text box to return remaining text as UTF-8, improve Win1252 handling internally, raise errors or warnings rather than silently replacing invalid glyphs #793

Closed
wants to merge 7 commits into from
29 changes: 0 additions & 29 deletions data/encodings/win_ansi.txt

This file was deleted.

1 change: 0 additions & 1 deletion lib/prawn.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def configuration(*args)
require_relative "prawn/security"
require_relative "prawn/document"
require_relative "prawn/font"
require_relative "prawn/encoding"
require_relative "prawn/measurements"
require_relative "prawn/repeater"
require_relative "prawn/outline"
Expand Down
2 changes: 1 addition & 1 deletion lib/prawn/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class Document

VALID_OPTIONS = [:page_size, :page_layout, :margin, :left_margin,
:right_margin, :top_margin, :bottom_margin, :skip_page_creation,
:compress, :skip_encoding, :background, :info,
:compress, :background, :info,
:text_formatter, :print_scaling]

# Any module added to this array will be included into instances of
Expand Down
33 changes: 0 additions & 33 deletions lib/prawn/encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,39 +82,6 @@ class WinAnsi #:nodoc:
oslash ugrave uacute ucircumflex
udieresis yacute thorn ydieresis
]

def initialize
@mapping_file = "#{Prawn::DATADIR}/encodings/win_ansi.txt"
load_mapping if self.class.mapping.empty?
end

# Converts a Unicode codepoint into a valid WinAnsi single byte character.
#
# If there is no WinAnsi equivlant for a character, a _ will be substituted.
#
def [](codepoint)
# unicode codepoints < 255 map directly to the single byte value in WinAnsi
return codepoint if codepoint <= 255

# There are a handful of codepoints > 255 that have equivilants in WinAnsi.
# Replace anything else with an underscore
self.class.mapping[codepoint] || 95
end

def self.mapping
@mapping ||= {}
end

private

def load_mapping
File.open(@mapping_file, "r:BINARY") do |f|
f.each do |l|
_, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
self.class.mapping["0x#{unicode}".hex] = "0x#{single_byte}".hex if single_byte
end
end
end
end
end
end
31 changes: 20 additions & 11 deletions lib/prawn/font/afm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@
#
# This is free software. Please see the LICENSE and COPYING files for details.

require_relative '../../prawn/encoding'
require_relative "../encoding"

module Prawn
class Font

# @private

class AFM < Font
class << self
attr_accessor :hide_m17n_warning
end

self.hide_m17n_warning = false

BUILT_INS = %w[ Courier Helvetica Times-Roman Symbol ZapfDingbats
Courier-Bold Courier-Oblique Courier-BoldOblique
Times-Bold Times-Italic Times-BoldItalic
Expand Down Expand Up @@ -44,7 +50,6 @@ def initialize(document, name, options={}) #:nodoc:

super

@@winansi ||= Prawn::Encoding::WinAnsi.new # parse data/encodings/win_ansi.txt once only
@@font_data ||= SynchronizedCache.new # parse each ATM font file once only

file_name = @name.dup
Expand Down Expand Up @@ -94,11 +99,17 @@ def has_kerning_data?
# is replaced with a string in WinAnsi encoding.
#
def normalize_encoding(text)
enc = @@winansi
text.unpack("U*").collect { |i| enc[i] }.pack("C*")
rescue ArgumentError
text.encode("windows-1252")
rescue ::Encoding::InvalidByteSequenceError,
::Encoding::UndefinedConversionError

raise Prawn::Errors::IncompatibleStringEncoding,
"Arguments to text methods must be UTF-8 encoded"
"Your document includes text that's not compatible with the Windows-1252 character set.\n"+
"If you need full UTF-8 support, use TTF fonts instead of PDF's built-in fonts\n."
end

def to_utf8(text)
text.encode("UTF-8")
end

# Returns the number of characters in +str+ (a WinAnsi-encoded string).
Expand All @@ -124,11 +135,9 @@ def encode_text(text, options={})
end

def glyph_present?(char)
if char == "_"
true
else
normalize_encoding(char) != "_"
end
!!normalize_encoding(char)
rescue Prawn::Errors::IncompatibleStringEncoding
false
end

private
Expand Down
4 changes: 4 additions & 0 deletions lib/prawn/font/ttf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ def normalize_encoding(text)
end
end

def to_utf8(text)
text.encode("UTF-8")
end

def glyph_present?(char)
code = char.codepoints.first
cmap[code] > 0
Expand Down
15 changes: 10 additions & 5 deletions lib/prawn/text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -199,17 +199,13 @@ def formatted_text(array, options={})

if @indent_paragraphs
self.text_formatter.array_paragraphs(array).each do |paragraph|
options[:skip_encoding] = false
remaining_text = draw_indented_formatted_line(paragraph, options)
options[:skip_encoding] = true

if @no_text_printed
# unless this paragraph was an empty line
unless @all_text_printed
@bounding_box.move_past_bottom
options[:skip_encoding] = false
remaining_text = draw_indented_formatted_line(paragraph, options)
options[:skip_encoding] = true
end
end

Expand All @@ -218,7 +214,6 @@ def formatted_text(array, options={})
end
else
remaining_text = fill_formatted_text_box(array, options)
options[:skip_encoding] = true
draw_remaining_formatted_text_on_new_pages(remaining_text, options)
end
end
Expand Down Expand Up @@ -292,6 +287,16 @@ def draw_text(text, options)
# should already be set
#
def draw_text!(text, options)
unless font.unicode? || font.class.hide_m17n_warning || text.ascii_only?
warn "PDF's built-in fonts have very limited support for "+
"internationalized text.\nIf you need full UTF-8 support, "+
"consider using a TTF font instead.\n\nTo disable this "+
"warning, add the following line to your code:\n"+
"Prawn::Font::AFM.hide_m17n_warning = true\n"

font.class.hide_m17n_warning = true
end

x,y = map_to_absolute(options[:at])
add_text_content(text,x,y,options)
end
Expand Down
7 changes: 0 additions & 7 deletions lib/prawn/text/box.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ module Text
# document.default_leading]
# <tt>:single_line</tt>::
# <tt>boolean</tt>. If true, then only the first line will be drawn [false]
# <tt>:skip_encoding</tt>::
# <tt>boolean</tt> [false]
# <tt>:overflow</tt>::
# <tt>:truncate</tt>, <tt>:shrink_to_fit</tt>, or <tt>:expand</tt>
# This controls the behavior when the amount of text
Expand All @@ -99,11 +97,6 @@ module Text
#
# Returns any text that did not print under the current settings.
#
# NOTE: if an AFM font is used, then the returned text is encoded in
# WinAnsi. Subsequent calls to text_box that pass this returned text back
# into text box must include a :skip_encoding => true option. This is
# unnecessary when using TTF fonts because those operate on UTF-8 encoding.
#
# == Exceptions
#
# Raises <tt>Prawn::Errrors::CannotFit</tt> if not wide enough to print
Expand Down
12 changes: 4 additions & 8 deletions lib/prawn/text/formatted/box.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def initialize(formatted_text, options={})
@rotate = options[:rotate] || 0
@rotate_around = options[:rotate_around] || :upper_left
@single_line = options[:single_line]
@skip_encoding = options[:skip_encoding] || @document.skip_encoding
@draw_text_callback = options[:draw_text_callback]

# if the text rendering mode is :unknown, force it back to :fill
Expand Down Expand Up @@ -229,7 +228,9 @@ def render(flags={})
end
end

unprinted_text
unprinted_text.map do |e|
e.merge(:text => @document.font.to_utf8(e[:text]))
end
end

# The width available at this point in the box
Expand Down Expand Up @@ -335,7 +336,6 @@ def valid_options
:disable_wrap_by_char,
:leading, :character_spacing,
:mode, :single_line,
:skip_encoding,
:document,
:direction,
:fallback_fonts,
Expand All @@ -345,11 +345,7 @@ def valid_options
private

def normalized_text(flags)
if @skip_encoding
text = original_text
else
text = normalize_encoding
end
text = normalize_encoding

text.each { |t| t.delete(:color) } if flags[:dry_run]

Expand Down
1 change: 1 addition & 0 deletions lib/prawn/text/formatted/line_wrap.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def scan_pattern
"[#{whitespace}]+|" +
"#{hyphen}+[^#{break_chars}]*|" +
"#{soft_hyphen}"

Regexp.new(pattern)
end

Expand Down
5 changes: 1 addition & 4 deletions manual/cover.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@
], :at => [170, cursor - 160])

if Dir.exist?("#{Prawn::BASEDIR}/.git")
#long git commit hash
#commit = `git show --pretty=%H`
#short git commit hash
commit = `git show --pretty=%h`
git_commit = "git commit: #{commit}"
git_commit = "git commit: #{commit.lines.first}"
else
git_commit = ""
end
Expand Down
1 change: 1 addition & 0 deletions manual/example_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
require "prawn/manual_builder"

Prawn::ManualBuilder.manual_dir = File.dirname(__FILE__)
Prawn::Font::AFM.hide_m17n_warning = true
2 changes: 1 addition & 1 deletion manual/text/text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
s.example "registering_families"
end

p.section "M17n" do |s|
p.section "Multilingualization" do |s|
s.example "utf8"
s.example "line_wrapping"
s.example "right_to_left_text"
Expand Down
8 changes: 4 additions & 4 deletions manual/text/utf8.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
text "€", :size => 32
move_down 20

text "Seems ok. Now let's try something more complex:"
text "ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει."
text "This works, because € is one of the few "+
"non-ASCII glyphs supported in PDF built-in fonts."

move_down 20

text "Looks like the current font (#{font.inspect}) doesn't support those."
text "Let's try them with another font."
text "For full internationalized text support, we need to use TTF fonts:"
move_down 20

font("#{Prawn::DATADIR}/fonts/DejaVuSans.ttf") do
Expand Down
3 changes: 2 additions & 1 deletion manual/text/win_ansi_charset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
when :center then offset = (total_width - width)/2
end

text_box(field, :at => [dx + offset, y], :skip_encoding => true)
text_box(field.force_encoding("windows-1252").encode("UTF-8"),
:at => [dx + offset, y])
end

dx += total_width
Expand Down
Loading