Skip to content
Browse files

Pulling Active Support multibyte requirements over for Ruby 1.8.7

  • Loading branch information...
1 parent 3c4c2c8 commit 6eb4c44a15eb1707dde60959e85e5c536ef136e2 @mikel committed
View
1 Gemfile
@@ -1,6 +1,5 @@
source :rubygems
-gem "activesupport", ">= 2.3.6"
gem "tlsmail" if RUBY_VERSION <= '1.8.6'
gem "mime-types", "~> 1.16"
gem "treetop", "~> 1.4.8"
View
16 lib/mail.rb
@@ -26,13 +26,19 @@ module Mail # :doc:
require 'mail/version'
- require 'mail/core_extensions/nil'
- require 'mail/core_extensions/string'
+ # Only load our extensions if AS is not already loaded
+ unless defined?(ActiveSupport)
+ require 'mail/core_extensions/nil'
+ require 'mail/core_extensions/string'
+ require 'mail/core_extensions/string/access'
+ require 'mail/core_extensions/string/multibyte'
+ require 'mail/core_extensions/object'
+ require 'mail/multibyte'
+ require 'mail/indifferent_hash'
+ end
+
require 'mail/core_extensions/shellwords' unless String.new.respond_to?(:shellescape)
require 'mail/core_extensions/smtp' if RUBY_VERSION < '1.9.3'
- require 'mail/core_extensions/object'
-
- require 'mail/indifferent_hash'
require 'mail/patterns'
require 'mail/utilities'
View
98 lib/mail/core_extensions/string/access.rb
@@ -0,0 +1,98 @@
+
+class String
+ unless '1.9'.respond_to?(:force_encoding)
+ # Returns the character at the +position+ treating the string as an array (where 0 is the first character).
+ #
+ # Examples:
+ # "hello".at(0) # => "h"
+ # "hello".at(4) # => "o"
+ # "hello".at(10) # => ERROR if < 1.9, nil in 1.9
+ def at(position)
+ mb_chars[position, 1].to_s
+ end
+
+ # Returns the remaining of the string from the +position+ treating the string as an array (where 0 is the first character).
+ #
+ # Examples:
+ # "hello".from(0) # => "hello"
+ # "hello".from(2) # => "llo"
+ # "hello".from(10) # => "" if < 1.9, nil in 1.9
+ def from(position)
+ mb_chars[position..-1].to_s
+ end
+
+ # Returns the beginning of the string up to the +position+ treating the string as an array (where 0 is the first character).
+ #
+ # Examples:
+ # "hello".to(0) # => "h"
+ # "hello".to(2) # => "hel"
+ # "hello".to(10) # => "hello"
+ def to(position)
+ mb_chars[0..position].to_s
+ end
+
+ # Returns the first character of the string or the first +limit+ characters.
+ #
+ # Examples:
+ # "hello".first # => "h"
+ # "hello".first(2) # => "he"
+ # "hello".first(10) # => "hello"
+ def first(limit = 1)
+ if limit == 0
+ ''
+ elsif limit >= size
+ self
+ else
+ mb_chars[0...limit].to_s
+ end
+ end
+
+ # Returns the last character of the string or the last +limit+ characters.
+ #
+ # Examples:
+ # "hello".last # => "o"
+ # "hello".last(2) # => "lo"
+ # "hello".last(10) # => "hello"
+ def last(limit = 1)
+ if limit == 0
+ ''
+ elsif limit >= size
+ self
+ else
+ mb_chars[(-limit)..-1].to_s
+ end
+ end
+ else
+ def at(position)
+ self[position]
+ end
+
+ def from(position)
+ self[position..-1]
+ end
+
+ def to(position)
+ self[0..position]
+ end
+
+ def first(limit = 1)
+ if limit == 0
+ ''
+ elsif limit >= size
+ self
+ else
+ to(limit - 1)
+ end
+ end
+
+ def last(limit = 1)
+ if limit == 0
+ ''
+ elsif limit >= size
+ self
+ else
+ from(-limit)
+ end
+ end
+ end
+end
View
72 lib/mail/core_extensions/string/multibyte.rb
@@ -0,0 +1,72 @@
+# encoding: utf-8
+require 'mail/multibyte'
+
+class String
+ if RUBY_VERSION >= "1.9"
+ # == Multibyte proxy
+ #
+ # +mb_chars+ is a multibyte safe proxy for string methods.
+ #
+ # In Ruby 1.8 and older it creates and returns an instance of the Mail::Multibyte::Chars class which
+ # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
+ # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string.
+ #
+ # name = 'Claus Müller'
+ # name.reverse # => "rell??M sualC"
+ # name.length # => 13
+ #
+ # name.mb_chars.reverse.to_s # => "rellüM sualC"
+ # name.mb_chars.length # => 12
+ #
+ # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that
+ # it becomes easy to run one version of your code on multiple Ruby versions.
+ #
+ # == Method chaining
+ #
+ # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
+ # method chaining on the result of any of these methods.
+ #
+ # name.mb_chars.reverse.length # => 12
+ #
+ # == Interoperability and configuration
+ #
+ # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between
+ # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+ # object. Interoperability problems can be resolved easily with a +to_s+ call.
+ #
+ # For more information about the methods defined on the Chars proxy see Mail::Multibyte::Chars. For
+ # information about how to change the default Multibyte behaviour see Mail::Multibyte.
+ def mb_chars
+ if Mail::Multibyte.proxy_class.consumes?(self)
+ Mail::Multibyte.proxy_class.new(self)
+ else
+ self
+ end
+ end
+
+ def is_utf8? #:nodoc
+ case encoding
+ when Encoding::UTF_8
+ valid_encoding?
+ when Encoding::ASCII_8BIT, Encoding::US_ASCII
+ dup.force_encoding(Encoding::UTF_8).valid_encoding?
+ else
+ false
+ end
+ end
+ else
+ def mb_chars
+ if Mail::Multibyte.proxy_class.wants?(self)
+ Mail::Multibyte.proxy_class.new(self)
+ else
+ self
+ end
+ end
+
+ # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+ # them), returns false otherwise.
+ def is_utf8?
+ Mail::Multibyte::Chars.consumes?(self)
+ end
+ end
+end
View
136 lib/mail/indifferent_hash.rb
@@ -1,26 +1,142 @@
module Mail
# Sort of like ActiveSupport HashWithIndifferentAccess, but lighter
class IndifferentHash < Hash
- def initialize(other=nil)
- if other.is_a?(Hash)
- self.default = other.default
- self.update(other)
+
+ def initialize(constructor = {})
+ if constructor.is_a?(Hash)
+ super()
+ update(constructor)
+ else
+ super(constructor)
+ end
+ end
+
+ def default(key = nil)
+ if key.is_a?(Symbol) && include?(key = key.to_s)
+ self[key]
else
super
end
end
- def [](key_name)
- super(key_name.to_sym)
+ def self.new_from_hash_copying_default(hash)
+ IndifferentHash.new(hash).tap do |new_hash|
+ new_hash.default = hash.default
+ end
end
- def []=(k, v)
- super(k.to_sym, v)
+ alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
+ alias_method :regular_update, :update unless method_defined?(:regular_update)
+
+ # Assigns a new value to the hash:
+ #
+ # hash = HashWithIndifferentAccess.new
+ # hash[:key] = "value"
+ #
+ def []=(key, value)
+ regular_writer(convert_key(key), convert_value(value))
end
+ alias_method :store, :[]=
+
+ # Updates the instantized hash with values from the second:
+ #
+ # hash_1 = HashWithIndifferentAccess.new
+ # hash_1[:key] = "value"
+ #
+ # hash_2 = HashWithIndifferentAccess.new
+ # hash_2[:key] = "New Value!"
+ #
+ # hash_1.update(hash_2) # => {"key"=>"New Value!"}
+ #
def update(other_hash)
- super(other_hash.inject({}) {|c, (k, v)| c[k.to_sym] = v; c})
+ other_hash.each_pair { |key, value| regular_writer(convert_key(key), convert_value(value)) }
+ self
end
- alias merge! update
+
+ alias_method :merge!, :update
+
+ # Checks the hash for a key matching the argument passed in:
+ #
+ # hash = HashWithIndifferentAccess.new
+ # hash["key"] = "value"
+ # hash.key? :key # => true
+ # hash.key? "key" # => true
+ #
+ def key?(key)
+ super(convert_key(key))
+ end
+
+ alias_method :include?, :key?
+ alias_method :has_key?, :key?
+ alias_method :member?, :key?
+
+ # Fetches the value for the specified key, same as doing hash[key]
+ def fetch(key, *extras)
+ super(convert_key(key), *extras)
+ end
+
+ # Returns an array of the values at the specified indices:
+ #
+ # hash = HashWithIndifferentAccess.new
+ # hash[:a] = "x"
+ # hash[:b] = "y"
+ # hash.values_at("a", "b") # => ["x", "y"]
+ #
+ def values_at(*indices)
+ indices.collect {|key| self[convert_key(key)]}
+ end
+
+ # Returns an exact copy of the hash.
+ def dup
+ IndifferentHash.new(self)
+ end
+
+ # Merges the instantized and the specified hashes together, giving precedence to the values from the second hash
+ # Does not overwrite the existing hash.
+ def merge(hash)
+ self.dup.update(hash)
+ end
+
+ # Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second.
+ # This overloaded definition prevents returning a regular hash, if reverse_merge is called on a HashWithDifferentAccess.
+ def reverse_merge(other_hash)
+ super self.class.new_from_hash_copying_default(other_hash)
+ end
+
+ def reverse_merge!(other_hash)
+ replace(reverse_merge( other_hash ))
+ end
+
+ # Removes a specified key from the hash.
+ def delete(key)
+ super(convert_key(key))
+ end
+
+ def stringify_keys!; self end
+ def stringify_keys; dup end
+ def symbolize_keys; to_hash.symbolize_keys end
+ def to_options!; self end
+
+ def to_hash
+ Hash.new(default).merge!(self)
+ end
+
+ protected
+
+ def convert_key(key)
+ key.kind_of?(Symbol) ? key.to_s : key
+ end
+
+ def convert_value(value)
+ if value.class == Hash
+ self.class.new_from_hash_copying_default(value)
+ elsif value.is_a?(Array)
+ value.dup.replace(value.map { |e| convert_value(e) })
+ else
+ value
+ end
+ end
+
end
end
View
42 lib/mail/multibyte.rb
@@ -0,0 +1,42 @@
+# encoding: utf-8
+module Mail #:nodoc:
+ module Multibyte
+ require 'mail/multibyte/exceptions'
+ require 'mail/multibyte/chars'
+ require 'mail/multibyte/unicode'
+
+ # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
+ # class so you can support other encodings. See the Mail::Multibyte::Chars implementation for
+ # an example how to do this.
+ #
+ # Example:
+ # Mail::Multibyte.proxy_class = CharsForUTF32
+ def self.proxy_class=(klass)
+ @proxy_class = klass
+ end
+
+ # Returns the current proxy class
+ def self.proxy_class
+ @proxy_class ||= Mail::Multibyte::Chars
+ end
+
+ # Regular expressions that describe valid byte sequences for a character
+ VALID_CHARACTER = {
+ # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
+ 'UTF-8' => /\A(?:
+ [\x00-\x7f] |
+ [\xc2-\xdf] [\x80-\xbf] |
+ \xe0 [\xa0-\xbf] [\x80-\xbf] |
+ [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
+ \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
+ [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
+ \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
+ # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
+ 'Shift_JIS' => /\A(?:
+ [\x00-\x7e\xa1-\xdf] |
+ [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
+ }
+ end
+end
+
+require 'mail/multibyte/utils'
View
474 lib/mail/multibyte/chars.rb
@@ -0,0 +1,474 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+ module Multibyte #:nodoc:
+ # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
+ # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
+ # encoding safe manner. All the normal String methods are also implemented on the proxy.
+ #
+ # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
+ # which would normally return a String object now return a Chars object so methods can be chained.
+ #
+ # "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string"
+ #
+ # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
+ # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
+ #
+ # bad.explicit_checking_method "T".mb_chars.downcase.to_s
+ #
+ # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
+ # encodings you can write your own multibyte string handler and configure it through
+ # Mail::Multibyte.proxy_class.
+ #
+ # class CharsForUTF32
+ # def size
+ # @wrapped_string.size / 4
+ # end
+ #
+ # def self.accepts?(string)
+ # string.length % 4 == 0
+ # end
+ # end
+ #
+ # Mail::Multibyte.proxy_class = CharsForUTF32
+ class Chars
+ attr_reader :wrapped_string
+ alias to_s wrapped_string
+ alias to_str wrapped_string
+
+ if RUBY_VERSION >= "1.9"
+ # Creates a new Chars instance by wrapping _string_.
+ def initialize(string)
+ @wrapped_string = string
+ @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
+ end
+ else
+ def initialize(string) #:nodoc:
+ @wrapped_string = string
+ end
+ end
+
+ # Forward all undefined methods to the wrapped string.
+ def method_missing(method, *args, &block)
+ if method.to_s =~ /!$/
+ @wrapped_string.__send__(method, *args, &block)
+ self
+ else
+ result = @wrapped_string.__send__(method, *args, &block)
+ result.kind_of?(String) ? chars(result) : result
+ end
+ end
+
+ # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
+ # only if the optional second parameter evaluates to +true+.
+ def respond_to?(method, include_private=false)
+ super || @wrapped_string.respond_to?(method, include_private) || false
+ end
+
+ # Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
+ def acts_like_string?
+ true
+ end
+
+ # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
+ def self.consumes?(string)
+ # Unpack is a little bit faster than regular expressions.
+ string.unpack('U*')
+ true
+ rescue ArgumentError
+ false
+ end
+
+ include Comparable
+
+ # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
+ # equal or after the object on the right side of the operation. It accepts any object
+ # that implements +to_s+:
+ #
+ # 'é'.mb_chars <=> 'ü'.mb_chars # => -1
+ #
+ # See <tt>String#<=></tt> for more details.
+ def <=>(other)
+ @wrapped_string <=> other.to_s
+ end
+
+ if RUBY_VERSION < "1.9"
+ # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
+ # +false+ otherwise.
+ def self.wants?(string)
+ $KCODE == 'UTF8' && consumes?(string)
+ end
+
+ # Returns a new Chars object containing the _other_ object concatenated to the string.
+ #
+ # Example:
+ # ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
+ def +(other)
+ chars(@wrapped_string + other)
+ end
+
+ # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
+ #
+ # Example:
+ # 'Café périferôl'.mb_chars =~ /ô/ # => 12
+ def =~(other)
+ translate_offset(@wrapped_string =~ other)
+ end
+
+ # Inserts the passed string at specified codepoint offsets.
+ #
+ # Example:
+ # 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
+ def insert(offset, fragment)
+ unpacked = Unicode.u_unpack(@wrapped_string)
+ unless offset > unpacked.length
+ @wrapped_string.replace(
+ Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
+ )
+ else
+ raise IndexError, "index #{offset} out of string"
+ end
+ self
+ end
+
+ # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
+ #
+ # Example:
+ # 'Café'.mb_chars.include?('é') # => true
+ def include?(other)
+ # We have to redefine this method because Enumerable defines it.
+ @wrapped_string.include?(other)
+ end
+
+ # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
+ #
+ # Example:
+ # 'Café périferôl'.mb_chars.index('ô') # => 12
+ # 'Café périferôl'.mb_chars.index(/\w/u) # => 0
+ def index(needle, offset=0)
+ wrapped_offset = first(offset).wrapped_string.length
+ index = @wrapped_string.index(needle, wrapped_offset)
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
+ end
+
+ # Returns the position _needle_ in the string, counting in
+ # codepoints, searching backward from _offset_ or the end of the
+ # string. Returns +nil+ if _needle_ isn't found.
+ #
+ # Example:
+ # 'Café périferôl'.mb_chars.rindex('é') # => 6
+ # 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
+ def rindex(needle, offset=nil)
+ offset ||= length
+ wrapped_offset = first(offset).wrapped_string.length
+ index = @wrapped_string.rindex(needle, wrapped_offset)
+ index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
+ end
+
+ # Returns the number of codepoints in the string
+ def size
+ Unicode.u_unpack(@wrapped_string).size
+ end
+ alias_method :length, :size
+
+ # Strips entire range of Unicode whitespace from the right of the string.
+ def rstrip
+ chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
+ end
+
+ # Strips entire range of Unicode whitespace from the left of the string.
+ def lstrip
+ chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
+ end
+
+ # Strips entire range of Unicode whitespace from the right and left of the string.
+ def strip
+ rstrip.lstrip
+ end
+
+ # Returns the codepoint of the first character in the string.
+ #
+ # Example:
+ # 'こんにちは'.mb_chars.ord # => 12371
+ def ord
+ Unicode.u_unpack(@wrapped_string)[0]
+ end
+
+ # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
+ #
+ # Example:
+ #
+ # "¾ cup".mb_chars.rjust(8).to_s
+ # # => " ¾ cup"
+ #
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
+ # # => "   ¾ cup"
+ def rjust(integer, padstr=' ')
+ justify(integer, :right, padstr)
+ end
+
+ # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
+ #
+ # Example:
+ #
+ # "¾ cup".mb_chars.rjust(8).to_s
+ # # => "¾ cup "
+ #
+ # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
+ # # => "¾ cup   "
+ def ljust(integer, padstr=' ')
+ justify(integer, :left, padstr)
+ end
+
+ # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
+ #
+ # Example:
+ #
+ # "¾ cup".mb_chars.center(8).to_s
+ # # => " ¾ cup "
+ #
+ # "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
+ # # => " ¾ cup  "
+ def center(integer, padstr=' ')
+ justify(integer, :center, padstr)
+ end
+
+ else
+ def =~(other)
+ @wrapped_string =~ other
+ end
+ end
+
+ # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
+ # instances instead of String. This makes chaining methods easier.
+ #
+ # Example:
+ # 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
+ def split(*args)
+ @wrapped_string.split(*args).map { |i| i.mb_chars }
+ end
+
+ # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
+ #
+ # Example:
+ #
+ # s = "Müller"
+ # s.mb_chars[2] = "e" # Replace character with offset 2
+ # s
+ # # => "Müeler"
+ #
+ # s = "Müller"
+ # s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
+ # s
+ # # => "Möler"
+ def []=(*args)
+ replace_by = args.pop
+ # Indexed replace with regular expressions already works
+ if args.first.is_a?(Regexp)
+ @wrapped_string[*args] = replace_by
+ else
+ result = Unicode.u_unpack(@wrapped_string)
+ if args[0].is_a?(Fixnum)
+ raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
+ min = args[0]
+ max = args[1].nil? ? min : (min + args[1] - 1)
+ range = Range.new(min, max)
+ replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
+ elsif args.first.is_a?(Range)
+ raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
+ range = args[0]
+ else
+ needle = args[0].to_s
+ min = index(needle)
+ max = min + Unicode.u_unpack(needle).length - 1
+ range = Range.new(min, max)
+ end
+ result[range] = Unicode.u_unpack(replace_by)
+ @wrapped_string.replace(result.pack('U*'))
+ end
+ end
+
+ # Reverses all characters in the string.
+ #
+ # Example:
+ # 'Café'.mb_chars.reverse.to_s # => 'éfaC'
+ def reverse
+ chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
+ end
+
+ # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
+ # character.
+ #
+ # Example:
+ # 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
+ def slice(*args)
+ if args.size > 2
+ raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
+ elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
+ raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
+ elsif (args.size == 2 && !args[1].is_a?(Numeric))
+ raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
+ elsif args[0].kind_of? Range
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
+ result = cps.nil? ? nil : cps.pack('U*')
+ elsif args[0].kind_of? Regexp
+ result = @wrapped_string.slice(*args)
+ elsif args.size == 1 && args[0].kind_of?(Numeric)
+ character = Unicode.u_unpack(@wrapped_string)[args[0]]
+ result = character && [character].pack('U')
+ else
+ cps = Unicode.u_unpack(@wrapped_string).slice(*args)
+ result = cps && cps.pack('U*')
+ end
+ result && chars(result)
+ end
+ alias_method :[], :slice
+
+ # Limit the byte size of the string to a number of bytes without breaking characters. Usable
+ # when the storage for a string is limited for some reason.
+ #
+ # Example:
+ # s = 'こんにちは'
+ # s.mb_chars.limit(7) # => "こに"
+ def limit(limit)
+ slice(0...translate_offset(limit))
+ end
+
+ # Convert characters in the string to uppercase.
+ #
+ # Example:
+ # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
+ def upcase
+ chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
+ end
+
+ # Convert characters in the string to lowercase.
+ #
+ # Example:
+ # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
+ def downcase
+ chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
+ end
+
+ # Converts the first character to uppercase and the remainder to lowercase.
+ #
+ # Example:
+ # 'über'.mb_chars.capitalize.to_s # => "Über"
+ def capitalize
+ (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
+ end
+
+ # Capitalizes the first letter of every word, when possible.
+ #
+ # Example:
+ # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró"
+ # "日本語".mb_chars.titleize # => "日本語"
+ def titleize
+ chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
+ end
+ alias_method :titlecase, :titleize
+
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
+ # passing strings to databases and validations.
+ #
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
+ # Mail::Multibyte::Unicode.default_normalization_form
+ def normalize(form = nil)
+ chars(Unicode.normalize(@wrapped_string, form))
+ end
+
+ # Performs canonical decomposition on all the characters.
+ #
+ # Example:
+ # 'é'.length # => 2
+ # 'é'.mb_chars.decompose.to_s.length # => 3
+ def decompose
+ chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
+ end
+
+ # Performs composition on all the characters.
+ #
+ # Example:
+ # 'é'.length # => 3
+ # 'é'.mb_chars.compose.to_s.length # => 2
+ def compose
+ chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
+ end
+
+ # Returns the number of grapheme clusters in the string.
+ #
+ # Example:
+ # 'क्षि'.mb_chars.length # => 4
+ # 'क्षि'.mb_chars.g_length # => 3
+ def g_length
+ Unicode.g_unpack(@wrapped_string).length
+ end
+
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
+ #
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
+ def tidy_bytes(force = false)
+ chars(Unicode.tidy_bytes(@wrapped_string, force))
+ end
+
+ %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
+ # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
+ # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
+ if public_method_defined?(method)
+ define_method("#{method}!") do |*args|
+ @wrapped_string = send(args.nil? ? method : method, *args).to_s
+ self
+ end
+ end
+ end
+
+ protected
+
+ def translate_offset(byte_offset) #:nodoc:
+ return nil if byte_offset.nil?
+ return 0 if @wrapped_string == ''
+
+ if @wrapped_string.respond_to?(:force_encoding)
+ @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
+ end
+
+ begin
+ @wrapped_string[0...byte_offset].unpack('U*').length
+ rescue ArgumentError => e
+ byte_offset -= 1
+ retry
+ end
+ end
+
+ def justify(integer, way, padstr=' ') #:nodoc:
+ raise ArgumentError, "zero width padding" if padstr.length == 0
+ padsize = integer - size
+ padsize = padsize > 0 ? padsize : 0
+ case way
+ when :right
+ result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
+ when :left
+ result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
+ when :center
+ lpad = padding((padsize / 2.0).floor, padstr)
+ rpad = padding((padsize / 2.0).ceil, padstr)
+ result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
+ end
+ chars(result)
+ end
+
+ def padding(padsize, padstr=' ') #:nodoc:
+ if padsize != 0
+ chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
+ else
+ ''
+ end
+ end
+
+ def chars(string) #:nodoc:
+ self.class.new(string)
+ end
+ end
+ end
+end
View
8 lib/mail/multibyte/exceptions.rb
@@ -0,0 +1,8 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+ module Multibyte #:nodoc:
+ # Raised when a problem with the encoding was found.
+ class EncodingError < StandardError; end
+ end
+end
View
392 lib/mail/multibyte/unicode.rb
@@ -0,0 +1,392 @@
+module Mail
+ module Multibyte
+ module Unicode
+
+ extend self
+
+ # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
+ # information about normalization.
+ NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
+
+ # The Unicode version that is supported by the implementation
+ UNICODE_VERSION = '5.2.0'
+
+ # The default normalization used for operations that require normalization. It can be set to any of the
+ # normalizations in NORMALIZATION_FORMS.
+ #
+ # Example:
+ # Mail::Multibyte::Unicode.default_normalization_form = :c
+ attr_accessor :default_normalization_form
+ @default_normalization_form = :kc
+
+ # Hangul character boundaries and properties
+ HANGUL_SBASE = 0xAC00
+ HANGUL_LBASE = 0x1100
+ HANGUL_VBASE = 0x1161
+ HANGUL_TBASE = 0x11A7
+ HANGUL_LCOUNT = 19
+ HANGUL_VCOUNT = 21
+ HANGUL_TCOUNT = 28
+ HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
+ HANGUL_SCOUNT = 11172
+ HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
+ HANGUL_JAMO_FIRST = 0x1100
+ HANGUL_JAMO_LAST = 0x11FF
+
+ # All the unicode whitespace
+ WHITESPACE = [
+ (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
+ 0x0020, # White_Space # Zs SPACE
+ 0x0085, # White_Space # Cc <control-0085>
+ 0x00A0, # White_Space # Zs NO-BREAK SPACE
+ 0x1680, # White_Space # Zs OGHAM SPACE MARK
+ 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
+ (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
+ 0x2028, # White_Space # Zl LINE SEPARATOR
+ 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
+ 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
+ 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
+ 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
+ ].flatten.freeze
+
+ # BOM (byte order mark) can also be seen as whitespace, it's a non-rendering character used to distinguish
+ # between little and big endian. This is not an issue in utf-8, so it must be ignored.
+ LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
+
+ # Returns a regular expression pattern that matches the passed Unicode codepoints
+ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
+ array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+ end
+ TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
+ LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
+
+ # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
+ # valid UTF-8.
+ #
+ # Example:
+ # Unicode.u_unpack('Café') # => [67, 97, 102, 233]
+ def u_unpack(string)
+ begin
+ string.unpack 'U*'
+ rescue ArgumentError
+ raise EncodingError, 'malformed UTF-8 character'
+ end
+ end
+
+ # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
+ # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
+ # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
+ #
+ # Primarily used by the grapheme cluster support.
+ def in_char_class?(codepoint, classes)
+ classes.detect { |c| database.boundary[c] === codepoint } ? true : false
+ end
+
+ # Unpack the string at grapheme boundaries. Returns a list of character lists.
+ #
+ # Example:
+ # Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]]
+ # Unicode.g_unpack('Café') # => [[67], [97], [102], [233]]
+ def g_unpack(string)
+ codepoints = u_unpack(string)
+ unpacked = []
+ pos = 0
+ marker = 0
+ eoc = codepoints.length
+ while(pos < eoc)
+ pos += 1
+ previous = codepoints[pos-1]
+ current = codepoints[pos]
+ if (
+ # CR X LF
+ ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
+ # L X (L|V|LV|LVT)
+ ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
+ # (LV|V) X (V|T)
+ ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
+ # (LVT|T) X (T)
+ ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
+ # X Extend
+ (database.boundary[:extend] === current)
+ )
+ else
+ unpacked << codepoints[marker..pos-1]
+ marker = pos
+ end
+ end
+ unpacked
+ end
+
+ # Reverse operation of g_unpack.
+ #
+ # Example:
+ # Unicode.g_pack(Unicode.g_unpack('क्षि')) # => 'क्षि'
+ def g_pack(unpacked)
+ (unpacked.flatten).pack('U*')
+ end
+
+ # Re-order codepoints so the string becomes canonical.
+ def reorder_characters(codepoints)
+ length = codepoints.length- 1
+ pos = 0
+ while pos < length do
+ cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
+ if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
+ codepoints[pos..pos+1] = cp2.code, cp1.code
+ pos += (pos > 0 ? -1 : 1)
+ else
+ pos += 1
+ end
+ end
+ codepoints
+ end
+
+ # Decompose composed characters to the decomposed form.
+ def decompose_codepoints(type, codepoints)
+ codepoints.inject([]) do |decomposed, cp|
+ # if it's a hangul syllable starter character
+ if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
+ sindex = cp - HANGUL_SBASE
+ ncp = [] # new codepoints
+ ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
+ ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
+ tindex = sindex % HANGUL_TCOUNT
+ ncp << (HANGUL_TBASE + tindex) unless tindex == 0
+ decomposed.concat ncp
+ # if the codepoint is decomposable in with the current decomposition type
+ elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatability)
+ decomposed.concat decompose_codepoints(type, ncp.dup)
+ else
+ decomposed << cp
+ end
+ end
+ end
+
+ # Compose decomposed characters to the composed form.
+ def compose_codepoints(codepoints)
+ pos = 0
+ eoa = codepoints.length - 1
+ starter_pos = 0
+ starter_char = codepoints[0]
+ previous_combining_class = -1
+ while pos < eoa
+ pos += 1
+ lindex = starter_char - HANGUL_LBASE
+ # -- Hangul
+ if 0 <= lindex and lindex < HANGUL_LCOUNT
+ vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
+ if 0 <= vindex and vindex < HANGUL_VCOUNT
+ tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
+ if 0 <= tindex and tindex < HANGUL_TCOUNT
+ j = starter_pos + 2
+ eoa -= 2
+ else
+ tindex = 0
+ j = starter_pos + 1
+ eoa -= 1
+ end
+ codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
+ end
+ starter_pos += 1
+ starter_char = codepoints[starter_pos]
+ # -- Other characters
+ else
+ current_char = codepoints[pos]
+ current = database.codepoints[current_char]
+ if current.combining_class > previous_combining_class
+ if ref = database.composition_map[starter_char]
+ composition = ref[current_char]
+ else
+ composition = nil
+ end
+ unless composition.nil?
+ codepoints[starter_pos] = composition
+ starter_char = composition
+ codepoints.delete_at pos
+ eoa -= 1
+ pos -= 1
+ previous_combining_class = -1
+ else
+ previous_combining_class = current.combining_class
+ end
+ else
+ previous_combining_class = current.combining_class
+ end
+ if current.combining_class == 0
+ starter_pos = pos
+ starter_char = codepoints[pos]
+ end
+ end
+ end
+ codepoints
+ end
+
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
+ #
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
+ def tidy_bytes(string, force = false)
+ if force
+ return string.unpack("C*").map do |b|
+ tidy_byte(b)
+ end.flatten.compact.pack("C*").unpack("U*").pack("U*")
+ end
+
+ bytes = string.unpack("C*")
+ conts_expected = 0
+ last_lead = 0
+
+ bytes.each_index do |i|
+
+ byte = bytes[i]
+ is_cont = byte > 127 && byte < 192
+ is_lead = byte > 191 && byte < 245
+ is_unused = byte > 240
+ is_restricted = byte > 244
+
+ # Impossible or highly unlikely byte? Clean it.
+ if is_unused || is_restricted
+ bytes[i] = tidy_byte(byte)
+ elsif is_cont
+ # Not expecting contination byte? Clean up. Otherwise, now expect one less.
+ conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
+ else
+ if conts_expected > 0
+ # Expected continuation, but got ASCII or leading? Clean backwards up to
+ # the leading byte.
+ (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
+ conts_expected = 0
+ end
+ if is_lead
+ # Final byte is leading? Clean it.
+ if i == bytes.length - 1
+ bytes[i] = tidy_byte(bytes.last)
+ else
+ # Valid leading byte? Expect continuations determined by position of
+ # first zero bit, with max of 3.
+ conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
+ last_lead = i
+ end
+ end
+ end
+ end
+ bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
+ end
+
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
+ # passing strings to databases and validations.
+ #
+ # * <tt>string</tt> - The string to perform normalization on.
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
+ # Mail::Multibyte.default_normalization_form
+ def normalize(string, form=nil)
+ form ||= @default_normalization_form
+ # See http://www.unicode.org/reports/tr15, Table 1
+ codepoints = u_unpack(string)
+ case form
+ when :d
+ reorder_characters(decompose_codepoints(:canonical, codepoints))
+ when :c
+ compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints)))
+ when :kd
+ reorder_characters(decompose_codepoints(:compatability, codepoints))
+ when :kc
+ compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints)))
+ else
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
+ end.pack('U*')
+ end
+
+ def apply_mapping(string, mapping) #:nodoc:
+ u_unpack(string).map do |codepoint|
+ cp = database.codepoints[codepoint]
+ if cp and (ncp = cp.send(mapping)) and ncp > 0
+ ncp
+ else
+ codepoint
+ end
+ end.pack('U*')
+ end
+
+ # Holds data about a codepoint in the Unicode database
+ class Codepoint
+ attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
+ end
+
+ # Holds static data from the Unicode database
+ class UnicodeDatabase
+ ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
+
+ attr_writer(*ATTRIBUTES)
+
+ def initialize
+ @codepoints = Hash.new(Codepoint.new)
+ @composition_exclusion = []
+ @composition_map = {}
+ @boundary = {}
+ @cp1252 = {}
+ end
+
+ # Lazy load the Unicode database so it's only loaded when it's actually used
+ ATTRIBUTES.each do |attr_name|
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
+ def #{attr_name} # def codepoints
+ load # load
+ @#{attr_name} # @codepoints
+ end # end
+ EOS
+ end
+
+ # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
+ def load
+ begin
+ @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
+ rescue Exception => e
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), Mail::Multibyte is unusable")
+ end
+
+ # Redefine the === method so we can write shorter rules for grapheme cluster breaks
+ @boundary.each do |k,_|
+ @boundary[k].instance_eval do
+ def ===(other)
+ detect { |i| i === other } ? true : false
+ end
+ end if @boundary[k].kind_of?(Array)
+ end
+
+ # define attr_reader methods for the instance variables
+ class << self
+ attr_reader(*ATTRIBUTES)
+ end
+ end
+
+ # Returns the directory in which the data files are stored
+ def self.dirname
+ File.dirname(__FILE__) + '/../values/'
+ end
+
+ # Returns the filename for the data file for this version
+ def self.filename
+ File.expand_path File.join(dirname, "unicode_tables.dat")
+ end
+ end
+
+ private
+
+ def tidy_byte(byte)
+ if byte < 160
+ [database.cp1252[byte] || byte].pack("U").unpack("C*")
+ elsif byte < 192
+ [194, byte]
+ else
+ [195, byte - 64]
+ end
+ end
+
+ def database
+ @database ||= UnicodeDatabase.new
+ end
+
+ end
+ end
+end
View
60 lib/mail/multibyte/utils.rb
@@ -0,0 +1,60 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+ module Multibyte #:nodoc:
+ if Kernel.const_defined?(:Encoding)
+ # Returns a regular expression that matches valid characters in the current encoding
+ def self.valid_character
+ VALID_CHARACTER[Encoding.default_external.to_s]
+ end
+ else
+ def self.valid_character
+ case $KCODE
+ when 'UTF8'
+ VALID_CHARACTER['UTF-8']
+ when 'SJIS'
+ VALID_CHARACTER['Shift_JIS']
+ end
+ end
+ end
+
+ if 'string'.respond_to?(:valid_encoding?)
+ # Verifies the encoding of a string
+ def self.verify(string)
+ string.valid_encoding?
+ end
+ else
+ def self.verify(string)
+ if expression = valid_character
+ # Splits the string on character boundaries, which are determined based on $KCODE.
+ string.split(//).all? { |c| expression =~ c }
+ else
+ true
+ end
+ end
+ end
+
+ # Verifies the encoding of the string and raises an exception when it's not valid
+ def self.verify!(string)
+ raise EncodingError.new("Found characters with invalid encoding") unless verify(string)
+ end
+
+ if 'string'.respond_to?(:force_encoding)
+ # Removes all invalid characters from the string.
+ #
+ # Note: this method is a no-op in Ruby 1.9
+ def self.clean(string)
+ string
+ end
+ else
+ def self.clean(string)
+ if expression = valid_character
+ # Splits the string on character boundaries, which are determined based on $KCODE.
+ string.split(//).grep(expression).join
+ else
+ string
+ end
+ end
+ end
+ end
+end
View
30 lib/mail/version_specific/ruby_1_8.rb
@@ -1,9 +1,5 @@
# encoding: utf-8
-# For multibyte strings in Ruby 1.8
-require 'active_support'
-require 'active_support/core_ext/string'
-
module Mail
class Ruby18
require 'base64'
@@ -19,13 +15,13 @@ def Ruby18.escape_paren( str )
re = /([\(\)])/ # Only match unescaped parens
str.gsub(re) { |s| '\\' + s }
end
-
+
def Ruby18.paren( str )
str = $1 if str =~ /^\((.*)?\)$/
str = escape_paren( str )
'(' + str + ')'
end
-
+
def Ruby18.escape_bracket( str )
re = /\\\>/
str = str.gsub(re) { |s| '>'}
@@ -34,36 +30,36 @@ def Ruby18.escape_bracket( str )
re = /([\<\>])/ # Only match unescaped parens
str.gsub(re) { |s| '\\' + s }
end
-
+
def Ruby18.bracket( str )
str = $1 if str =~ /^\<(.*)?\>$/
str = escape_bracket( str )
'<' + str + '>'
end
-
+
def Ruby18.decode_base64(str)
Base64.decode64(str) if str
end
-
+
def Ruby18.encode_base64(str)
Base64.encode64(str)
end
-
+
def Ruby18.has_constant?(klass, string)
klass.constants.include?( string )
end
-
+
def Ruby18.get_constant(klass, string)
klass.const_get( string )
end
-
+
def Ruby18.b_value_encode(str, encoding)
# Ruby 1.8 requires an encoding to work
raise ArgumentError, "Must supply an encoding" if encoding.nil?
encoding = encoding.to_s.upcase.gsub('_', '-')
[Encodings::Base64.encode(str), encoding]
end
-
+
def Ruby18.b_value_decode(str)
match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m)
if match
@@ -72,14 +68,14 @@ def Ruby18.b_value_decode(str)
end
str
end
-
+
def Ruby18.q_value_encode(str, encoding)
# Ruby 1.8 requires an encoding to work
raise ArgumentError, "Must supply an encoding" if encoding.nil?
encoding = encoding.to_s.upcase.gsub('_', '-')
[Encodings::QuotedPrintable.encode(str), encoding]
end
-
+
def Ruby18.q_value_decode(str)
match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m)
if match
@@ -88,11 +84,11 @@ def Ruby18.q_value_decode(str)
end
str
end
-
+
def Ruby18.param_decode(str, encoding)
URI.unescape(str)
end
-
+
def Ruby18.param_encode(str)
encoding = $KCODE.to_s.downcase
language = Configuration.instance.param_encode_language
View
9 lib/mail/version_specific/ruby_1_9.rb
@@ -1,14 +1,5 @@
# encoding: utf-8
-unless ''.respond_to?(:mb_chars)
- class String
- # Compatability with ActiveSupport, which returns self in 1.9
- def mb_chars
- self
- end
- end
-end
-
module Mail
class Ruby19
View
4 spec/mail/fields/common/parameter_hash_spec.rb
@@ -5,8 +5,8 @@
it "should return the values in the hash" do
hash = Mail::ParameterHash.new
hash.merge!({'value1' => 'one', 'value2' => 'two'})
- hash.keys.should include(:value1)
- hash.keys.should include(:value2)
+ hash.keys.should include("value1")
+ hash.keys.should include("value2")
hash.values.should include('one')
hash.values.should include('two')
end
View
4 spec/mail/fields/content_disposition_field_spec.rb
@@ -53,12 +53,12 @@
c.decoded.should == 'attachment'
end
end
-
+
describe "instance methods" do
it "should give it's disposition type" do
c = Mail::ContentDispositionField.new('Content-Disposition: attachment; filename=File')
c.disposition_type.should == 'attachment'
- c.parameters.should == {:filename => 'File'}
+ c.parameters.should == {"filename" => 'File'}
end
# see spec/fixtures/trec_2005_corpus/missing_content_disposition.eml
View
60 spec/mail/fields/content_type_field_spec.rb
@@ -147,17 +147,17 @@
it "should return a parameter as a hash" do
c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII')
- c.parameters.should == {:charset => 'US-ASCII'}
+ c.parameters.should == {"charset" => 'US-ASCII'}
end
it "should return multiple parameters as a hash" do
c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII; format=flowed')
- c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+ c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
end
it "should return boundry parameters" do
c = Mail::ContentTypeField.new('multipart/mixed; boundary=Apple-Mail-13-196941151')
- c.parameters.should == {:boundary => 'Apple-Mail-13-196941151'}
+ c.parameters.should == {"boundary" => 'Apple-Mail-13-196941151'}
end
it "should be indifferent with the access" do
@@ -197,7 +197,7 @@
c.content_type.should == 'application/octet-stream'
c.main_type.should == 'application'
c.sub_type.should == 'octet-stream'
- c.parameters.should == {:'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"}
+ c.parameters.should == {'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"}
end
it "should handle 'application/pdf;'" do
@@ -215,7 +215,7 @@
c.content_type.should == 'application/pdf'
c.main_type.should == 'application'
c.sub_type.should == 'pdf'
- c.parameters.should == {:name => "broken.pdf"}
+ c.parameters.should == {"name" => "broken.pdf"}
end
it "should handle 'application/pkcs7-signature;'" do
@@ -233,7 +233,7 @@
c.content_type.should == 'application/pkcs7-signature'
c.main_type.should == 'application'
c.sub_type.should == 'pkcs7-signature'
- c.parameters.should == {:name => "smime.p7s"}
+ c.parameters.should == {"name" => "smime.p7s"}
end
it "should handle 'application/x-gzip; NAME=blah.gz'" do
@@ -242,7 +242,7 @@
c.content_type.should == 'application/x-gzip'
c.main_type.should == 'application'
c.sub_type.should == 'x-gzip'
- c.parameters.should == {:NAME => "blah.gz"}
+ c.parameters.should == {"NAME" => "blah.gz"}
end
it "should handle 'image/jpeg'" do
@@ -314,7 +314,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"}
+ c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"}
end
it "should handle 'multipart/alternative; boundary=----=_NextPart_000_0093_01C81419.EB75E850'" do
@@ -323,7 +323,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"}
+ c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"}
end
it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do
@@ -332,7 +332,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"}
+ c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"}
end
it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do
@@ -341,7 +341,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"}
+ c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"}
end
it "should handle 'multipart/mixed'" do
@@ -368,7 +368,7 @@
c.content_type.should == 'multipart/mixed'
c.main_type.should == 'multipart'
c.sub_type.should == 'mixed'
- c.parameters.should == {:boundary => "Apple-Mail-13-196941151"}
+ c.parameters.should == {"boundary" => "Apple-Mail-13-196941151"}
end
it "should handle 'multipart/mixed; boundary=mimepart_427e4cb4ca329_133ae40413c81ef'" do
@@ -377,7 +377,7 @@
c.content_type.should == 'multipart/mixed'
c.main_type.should == 'multipart'
c.sub_type.should == 'mixed'
- c.parameters.should == {:boundary => "mimepart_427e4cb4ca329_133ae40413c81ef"}
+ c.parameters.should == {"boundary" => "mimepart_427e4cb4ca329_133ae40413c81ef"}
end
it "should handle 'multipart/report; report-type=delivery-status;'" do
@@ -386,7 +386,7 @@
c.content_type.should == 'multipart/report'
c.main_type.should == 'multipart'
c.sub_type.should == 'report'
- c.parameters.should == {:"report-type" => "delivery-status"}
+ c.parameters.should == {"report-type" => "delivery-status"}
end
it "should handle 'multipart/signed;'" do
@@ -422,7 +422,7 @@
c.content_type.should == 'text/html'
c.main_type.should == 'text'
c.sub_type.should == 'html'
- c.parameters.should == {:charset => 'iso-8859-1'}
+ c.parameters.should == {"charset" => 'iso-8859-1'}
end
it "should handle 'TEXT/PLAIN; charset=ISO-8859-1;'" do
@@ -431,7 +431,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'ISO-8859-1'}
+ c.parameters.should == {"charset" => 'ISO-8859-1'}
end
it "should handle 'text/plain'" do
@@ -458,7 +458,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'ISO-8859-1'}
+ c.parameters.should == {"charset" => 'ISO-8859-1'}
end
it "should handle 'text/plain; charset=ISO-8859-1;'" do
@@ -467,7 +467,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'ISO-8859-1', :format => 'flowed'}
+ c.parameters.should == {"charset" => 'ISO-8859-1', "format" => 'flowed'}
end
it "should handle 'text/plain; charset=us-ascii;'" do
@@ -476,7 +476,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'us-ascii'}
+ c.parameters.should == {"charset" => 'us-ascii'}
end
it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do
@@ -485,7 +485,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+ c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
end
it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do
@@ -494,7 +494,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+ c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
end
it "should handle 'text/plain; charset=utf-8'" do
@@ -503,7 +503,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'utf-8'}
+ c.parameters.should == {"charset" => 'utf-8'}
end
it "should handle 'text/plain; charset=utf-8'" do
@@ -512,7 +512,7 @@
c.content_type.should == 'text/plain'
c.main_type.should == 'text'
c.sub_type.should == 'plain'
- c.parameters.should == {:charset => 'X-UNKNOWN'}
+ c.parameters.should == {"charset" => 'X-UNKNOWN'}
end
it "should handle 'text/x-ruby-script;'" do
@@ -530,7 +530,7 @@
c.content_type.should == 'text/x-ruby-script'
c.main_type.should == 'text'
c.sub_type.should == 'x-ruby-script'
- c.parameters.should == {:name => 'hello.rb'}
+ c.parameters.should == {"name" => 'hello.rb'}
end
it "should handle 'multipart/mixed; boundary=\"=_NextPart_Lycos_15031600484464_ID\"" do
@@ -539,7 +539,7 @@
c.content_type.should == 'multipart/mixed'
c.main_type.should == 'multipart'
c.sub_type.should == 'mixed'
- c.parameters.should == {:boundary => '=_NextPart_Lycos_15031600484464_ID'}
+ c.parameters.should == {"boundary" => '=_NextPart_Lycos_15031600484464_ID'}
end
it "should handle 'multipart/alternative; boundary=----=_=NextPart_000_0093_01C81419.EB75E850" do
@@ -548,7 +548,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'}
+ c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'}
end
it "should handle 'multipart/alternative; boundary=\"----=_=NextPart_000_0093_01C81419.EB75E850\"" do
@@ -557,7 +557,7 @@
c.content_type.should == 'multipart/alternative'
c.main_type.should == 'multipart'
c.sub_type.should == 'alternative'
- c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'}
+ c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'}
end
it "should handle 'multipart/related;boundary=1_4626B816_9F1690;Type=\"application/smil\";Start=\"<mms.smil.txt>\"'" do
@@ -566,7 +566,7 @@
c.content_type.should == 'multipart/related'
c.main_type.should == 'multipart'
c.sub_type.should == 'related'
- c.parameters.should == {:boundary => '1_4626B816_9F1690', :Type => 'application/smil', :Start => '<mms.smil.txt>'}
+ c.parameters.should == {"boundary" => '1_4626B816_9F1690', "Type" => 'application/smil', "Start" => '<mms.smil.txt>'}
end
it "should handle 'IMAGE/JPEG; name=\"IM 006.jpg\"'" do
@@ -575,7 +575,7 @@
c.content_type.should == 'image/jpeg'
c.main_type.should == 'image'
c.sub_type.should == 'jpeg'
- c.parameters.should == {:name => "IM 006.jpg"}
+ c.parameters.should == {"name" => "IM 006.jpg"}
end
end
@@ -623,7 +623,7 @@
result = %Q{Content-Type: application/octet-stream;\r\n\sfilename*=sjis'jp'01%20Quien%20Te%20Dij%91at.%20Pitbull.mp3\r\n}
end
c.filename = string
- c.parameters.should == {:filename => string}
+ c.parameters.should == {"filename" => string}
c.encoded.should == result
$KCODE = @original if RUBY_VERSION < '1.9'
end
View
7 spec/mail/message_spec.rb
@@ -1065,19 +1065,19 @@ def basic_email
mail = Mail.new
mail.content_type = ["text", "plain", { :charset => 'US-ASCII' }]
mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=US-ASCII\r\n]
- mail.content_type_parameters.should == {:charset => "US-ASCII"}
+ mail.content_type_parameters.should == {"charset" => "US-ASCII"}
end
it "should be able to set a content type with an array and hash with a non-usascii field" do
mail = Mail.new
mail.content_type = ["text", "plain", { :charset => 'UTF-8' }]
mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=UTF-8\r\n]
- mail.content_type_parameters.should == {:charset => "UTF-8"}
+ mail.content_type_parameters.should == {"charset" => "UTF-8"}
end
it "should allow us to specify a content type in a block" do
mail = Mail.new { content_type ["text", "plain", { "charset" => "UTF-8" }] }
- mail.content_type_parameters.should == {:charset => "UTF-8"}
+ mail.content_type_parameters.should == {"charset" => "UTF-8"}
end
end
@@ -1494,6 +1494,7 @@ def self.delivering_email(mail)
it "shouldn't die with an invalid Content-Type header" do
mail = Mail.new('Content-Type: invalid/invalid; charset="iso-8859-1"')
+ mail.attachment?
doing { mail.attachment? }.should_not raise_error
end
View
4 spec/mail/mime_messages_spec.rb
@@ -53,7 +53,7 @@
it "should return the content-type parameters" do
mail = Mail.new("Content-Type: text/plain; charset=US-ASCII; format=flowed")
- mail.content_type_parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+ mail.content_type_parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
end
it "should recognize a multipart email" do
@@ -455,7 +455,7 @@
m.parts.first[:content_type].content_type.should == 'image/png'
m.parts.last[:content_type].content_type.should == 'text/plain'
end
-
+
it "should allow you to add a body as text part if you have added a file and not truncate after newlines - issue 208" do
m = Mail.new do
from 'mikel@from.lindsaar.net'

0 comments on commit 6eb4c44

Please sign in to comment.
Something went wrong with that request. Please try again.