Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
279 lines (235 sloc) 8.11 KB
# encoding: utf-8
# frozen_string_literal: true
module Mail
class Ruby19
class StrictCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Ruby19.decode_utf7(string)
else
string.force_encoding(Mail::Ruby19.pick_encoding(charset))
end
end
end
class BestEffortCharsetEncoder
def encode(string, charset)
case charset
when /utf-?7/i
Mail::Ruby19.decode_utf7(string)
else
string.force_encoding(pick_encoding(charset))
end
end
private
def pick_encoding(charset)
charset = case charset
when /ansi_x3.110-1983/
'ISO-8859-1'
when /Windows-?1258/i # Windows-1258 is similar to 1252
"Windows-1252"
else
charset
end
Mail::Ruby19.pick_encoding(charset)
end
end
class << self
attr_accessor :charset_encoder
end
self.charset_encoder = BestEffortCharsetEncoder.new
# Escapes any parenthesis in a string that are unescaped this uses
# a Ruby 1.9.1 regexp feature of negative look behind
def Ruby19.escape_paren( str )
re = /(?<!\\)([\(\)])/ # Only match unescaped parens
str.gsub(re) { |s| '\\' + s }
end
def Ruby19.paren( str )
str = ::Mail::Utilities.unparen( str )
str = escape_paren( str )
'(' + str + ')'
end
def Ruby19.escape_bracket( str )
re = /(?<!\\)([\<\>])/ # Only match unescaped brackets
str.gsub(re) { |s| '\\' + s }
end
def Ruby19.bracket( str )
str = ::Mail::Utilities.unbracket( str )
str = escape_bracket( str )
'<' + str + '>'
end
def Ruby19.decode_base64(str)
if !str.end_with?("=") && str.length % 4 != 0
str = str.ljust((str.length + 3) & ~3, "=")
end
str.unpack( 'm' ).first
end
def Ruby19.encode_base64(str)
[str].pack( 'm' )
end
def Ruby19.has_constant?(klass, string)
klass.const_defined?( string, false )
end
def Ruby19.get_constant(klass, string)
klass.const_get( string )
end
def Ruby19.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8)
to_encoding = to_encoding.to_s if RUBY_VERSION < '1.9.3'
to_encoding = Encoding.find(to_encoding)
replacement_char = to_encoding == Encoding::UTF_8 ? '' : '?'
charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char)
end
# From Ruby stdlib Net::IMAP
def Ruby19.encode_utf7(string)
string.gsub(/(&)|[^\x20-\x7e]+/) do
if $1
"&-"
else
base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0")
"&" + base64.delete("=").tr("/", ",") + "-"
end
end.force_encoding(Encoding::ASCII_8BIT)
end
def Ruby19.decode_utf7(utf7)
utf7.gsub(/&([^-]+)?-/n) do
if $1
($1.tr(",", "/") + "===").unpack("m")[0].encode(Encoding::UTF_8, Encoding::UTF_16BE)
else
"&"
end
end
end
def Ruby19.b_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Ruby19.encode_base64(str), encoding]
end
def Ruby19.b_value_decode(str)
match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m)
if match
charset = match[1]
str = Ruby19.decode_base64(match[2])
str = charset_encoder.encode(str, charset)
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Ruby19.q_value_encode(str, encoding = nil)
encoding = str.encoding.to_s
[Encodings::QuotedPrintable.encode(str), encoding]
end
def Ruby19.q_value_decode(str)
match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m)
if match
charset = match[1]
string = match[2].gsub(/_/, '=20')
# Remove trailing = if it exists in a Q encoding
string = string.sub(/\=$/, '')
str = Encodings::QuotedPrintable.decode(string)
str = charset_encoder.encode(str, charset)
# We assume that binary strings hold utf-8 directly to work around
# jruby/jruby#829 which subtly changes String#encode semantics.
str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT
end
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Ruby19.param_decode(str, encoding)
str = uri_parser.unescape(str)
str = charset_encoder.encode(str, encoding) if encoding
transcode_to_scrubbed_utf8(str)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
end
def Ruby19.param_encode(str)
encoding = str.encoding.to_s.downcase
language = Configuration.instance.param_encode_language
"#{encoding}'#{language}'#{uri_parser.escape(str)}"
end
def Ruby19.uri_parser
URI::DEFAULT_PARSER
end
# Pick a Ruby encoding corresponding to the message charset. Most
# charsets have a Ruby encoding, but some need manual aliasing here.
#
# TODO: add this as a test somewhere:
# Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
# Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
def Ruby19.pick_encoding(charset)
charset = charset.to_s
encoding = case charset.downcase
# ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I
when /^iso[-_]?8859-(\d+)(-i)?$/
"ISO-8859-#{$1}"
# ISO-8859-15, ISO-2022-JP and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})$/
"ISO-#{$1}-#{$2}"
# "ISO-2022-JP-KDDI" and alike
when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/
"ISO-#{$1}-#{$2}-#{$3}"
# UTF-8, UTF-32BE and alike
when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/
"UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE')
# Windows-1252 and alike
when /^windows-?(.*)$/
"Windows-#{$1}"
when '8bit'
Encoding::ASCII_8BIT
# alternatives/misspellings of us-ascii seen in the wild
when /^iso[-_]?646(-us)?$/, 'us=ascii'
Encoding::ASCII
# Microsoft-specific alias for MACROMAN
when 'macintosh'
Encoding::MACROMAN
# Microsoft-specific alias for CP949 (Korean)
when 'ks_c_5601-1987'
Encoding::CP949
# Wrongly written Shift_JIS (Japanese)
when 'shift-jis'
Encoding::Shift_JIS
# GB2312 (Chinese charset) is a subset of GB18030 (its replacement)
when 'gb2312'
Encoding::GB18030
when 'cp-850'
Encoding::CP850
when 'latin2'
Encoding::ISO_8859_2
else
charset
end
convert_to_encoding(encoding)
end
if "string".respond_to?(:byteslice)
def Ruby19.string_byteslice(str, *args)
str.byteslice(*args)
end
else
def Ruby19.string_byteslice(str, *args)
str.unpack('C*').slice(*args).pack('C*').force_encoding(str.encoding)
end
end
class << self
private
def convert_to_encoding(encoding)
if encoding.is_a?(Encoding)
encoding
else
# Fall back to ASCII for charsets that Ruby doesn't recognize
begin
Encoding.find(encoding)
rescue ArgumentError
Encoding::BINARY
end
end
end
def transcode_to_scrubbed_utf8(str)
decoded = str.encode(Encoding::UTF_8, :undef => :replace, :invalid => :replace, :replace => "")
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "").encode(Encoding::UTF_8)
end
end
end
end
You can’t perform that action at this time.