Permalink
Browse files

Ripped out and replaced the entire encoding / decoding suite of tools…

…. :encoded anything gets you the item ready to put into an email. :decoded anything gets you the items value decoded and ready to view... there.. that's simple isn't it?. All specs passing, warnings in 1.9.1 on regexp encoding problems...
  • Loading branch information...
1 parent 9c085f1 commit e5840b7cc80725790ae9822dfe4e56c80e24df59 @mikel committed Nov 4, 2009
Showing with 1,149 additions and 243 deletions.
  1. +4 −0 CHANGELOG.rdoc
  2. +23 −0 README.rdoc
  3. +4 −0 lib/mail/configuration.rb
  4. +11 −0 lib/mail/elements/address.rb
  5. +94 −18 lib/mail/encodings/encodings.rb
  6. +1 −1 lib/mail/encodings/quoted_printable.rb
  7. +9 −0 lib/mail/fields/bcc_field.rb
  8. +9 −0 lib/mail/fields/cc_field.rb
  9. +25 −1 lib/mail/fields/common/common_address.rb
  10. +8 −5 lib/mail/fields/common/common_date.rb
  11. +1 −70 lib/mail/fields/common/common_field.rb
  12. +10 −0 lib/mail/fields/common/common_message_id.rb
  13. +10 −0 lib/mail/fields/common/parameter_hash.rb
  14. +11 −1 lib/mail/fields/content_disposition_field.rb
  15. +10 −0 lib/mail/fields/content_id_field.rb
  16. +11 −1 lib/mail/fields/content_location_field.rb
  17. +11 −1 lib/mail/fields/content_transfer_encoding_field.rb
  18. +12 −2 lib/mail/fields/content_type_field.rb
  19. +9 −0 lib/mail/fields/date_field.rb
  20. +9 −0 lib/mail/fields/from_field.rb
  21. +9 −0 lib/mail/fields/in_reply_to_field.rb
  22. +9 −0 lib/mail/fields/keywords_field.rb
  23. +9 −0 lib/mail/fields/message_id_field.rb
  24. +9 −0 lib/mail/fields/mime_version_field.rb
  25. +9 −0 lib/mail/fields/received_field.rb
  26. +9 −0 lib/mail/fields/references_field.rb
  27. +9 −0 lib/mail/fields/reply_to_field.rb
  28. +9 −0 lib/mail/fields/resent_bcc_field.rb
  29. +9 −0 lib/mail/fields/resent_cc_field.rb
  30. +9 −0 lib/mail/fields/resent_date_field.rb
  31. +9 −0 lib/mail/fields/resent_from_field.rb
  32. +9 −0 lib/mail/fields/resent_message_id_field.rb
  33. +9 −0 lib/mail/fields/resent_sender_field.rb
  34. +9 −0 lib/mail/fields/resent_to_field.rb
  35. +9 −0 lib/mail/fields/return_path_field.rb
  36. +9 −0 lib/mail/fields/sender_field.rb
  37. +9 −0 lib/mail/fields/to_field.rb
  38. +82 −0 lib/mail/fields/unstructured_field.rb
  39. +1 −1 lib/mail/header.rb
  40. +11 −9 lib/mail/message.rb
  41. +7 −1 lib/mail/utilities.rb
  42. +26 −2 lib/mail/version_specific/ruby_1_8.rb
  43. +27 −1 lib/mail/version_specific/ruby_1_9.rb
  44. +2 −0 lib/tasks/corpus.rake
  45. +33 −0 spec/fixtures/emails/error_emails/header_fields_with_empty_values.eml
  46. +28 −0 spec/mail/elements/address_spec.rb
  47. +130 −17 spec/mail/encodings/encodings_spec.rb
  48. +6 −1 spec/mail/fields/bcc_field_spec.rb
  49. +6 −1 spec/mail/fields/cc_field_spec.rb
  50. +40 −0 spec/mail/fields/common/common_address_spec.rb
  51. +25 −0 spec/mail/fields/common/common_date_spec.rb
  52. +36 −73 spec/mail/fields/common/common_field_spec.rb
  53. +29 −0 spec/mail/fields/common/common_message_id_spec.rb
  54. +8 −0 spec/mail/fields/common/parameter_hash_spec.rb
  55. +10 −0 spec/mail/fields/content_disposition_field_spec.rb
  56. +10 −0 spec/mail/fields/content_id_field_spec.rb
  57. +10 −0 spec/mail/fields/content_location_field_spec.rb
  58. +10 −0 spec/mail/fields/content_transfer_encoding_field_spec.rb
  59. +29 −10 spec/mail/fields/content_type_field_spec.rb
  60. +11 −0 spec/mail/fields/date_field_spec.rb
  61. +11 −1 spec/mail/fields/from_field_spec.rb
  62. +21 −0 spec/mail/fields/in_reply_to_field_spec.rb
  63. +12 −0 spec/mail/fields/keywords_field_spec.rb
  64. +5 −0 spec/mail/fields/message_id_field_spec.rb
  65. +10 −0 spec/mail/fields/mime_version_field_spec.rb
  66. +10 −0 spec/mail/fields/received_field_spec.rb
  67. +1 −1 spec/mail/fields/reply_to_field_spec.rb
  68. +1 −1 spec/mail/fields/resent_bcc_field_spec.rb
  69. +1 −1 spec/mail/fields/resent_cc_field_spec.rb
  70. +1 −1 spec/mail/fields/resent_from_field_spec.rb
  71. +1 −1 spec/mail/fields/resent_to_field_spec.rb
  72. +15 −7 spec/mail/fields/structured_field_spec.rb
  73. +6 −1 spec/mail/fields/to_field_spec.rb
  74. +27 −3 spec/mail/fields/unstructured_field_spec.rb
  75. +2 −2 spec/mail/header_spec.rb
  76. +10 −5 spec/mail/message_spec.rb
  77. +3 −3 spec/mail/part_spec.rb
View
@@ -1,3 +1,7 @@
+== Wed Nov 4 12:54:43 UTC 2009 Mikel Lindsaar <raasdnil@gmail.com>
+
+* Renamed Mail::Message.encode! to Mail::Message.ready_to_send!, deprecated :encode!
+
== Tue Nov 3 00:59:45 UTC 2009 Mikel Lindsaar <raasdnil@gmail.com>
* Tested mail against entire Enron set (2.3gb) and the Trec 2005 set (0.5gb), ~ half a million emails without crashing
View
@@ -91,6 +91,29 @@ If you want to install mail manually, you can download the gem from github and d
# gem install mail-1.0.0.gem
+== Encodings
+
+If you didn't know, handling encodings in Emails is not as straight forward as you
+would hope.
+
+I have tried to simplify it some:
+
+1. All objects that can render into an email, have an :encoded method. Encoded will
+ return the object as a complete string ready to send in the mail system, that is,
+ it will include the header field and value and CRLF at the end and wrapped as
+ needed.
+2. All objects that can render into an email, have a :decoded method. Decoded will
+ return the object's "value" only as a string. This means it will not include
+ the header fields (like 'To:' or 'Subject:').
+3. By default, calling :to_s on an object will call it's encoded method, that is, make
+ it ready to send in an email.
+4. Structured fields that have parameter values that can be encoded (e.g. Content-Type) will
+ provide decoded parameter values when you call the parameter names as methods against
+ the object.
+5. Structured fields that have parameter values that can be encoded (e.g. Content-Type) will
+ provide encoded parameter values when you call the parameter names through the
+ object.parameters['<parameter_name>'] method call.
+
== Contributing
Please do! Contributing is easy in Mail:
@@ -85,6 +85,10 @@ def tls?
@tls || false
end
+ def param_encode_language(value = nil)
+ value ? @encode_language = value : @encode_language ||= 'en'
+ end
+
end
end
@@ -82,6 +82,7 @@ def address=(value)
def display_name
parse unless @parsed
@display_name ||= get_display_name
+ Encodings.decode_encode(@display_name, @output_type) if @display_name
end
# Provides a way to assign a display name to an already made Mail::Address object.
@@ -154,6 +155,16 @@ def inspect
parse unless @parsed
"#<#{self.class}:#{self.object_id} Address: |#{to_s}| >"
end
+
+ def encoded
+ @output_type = :encode
+ format
+ end
+
+ def decoded
+ @output_type = :decode
+ format
+ end
private
@@ -2,6 +2,8 @@
module Mail
module Encodings
+ include Mail::Patterns
+
# Is the encoding we want defined?
#
# Example:
@@ -25,6 +27,72 @@ def Encodings.get_encoding( str )
RubyVer.get_constant(Mail::Encodings, string)
end
+ # Encodes a parameter value using URI Escaping, note the language field 'en' can
+ # be set using Mail::Configuration, like so:
+ #
+ # Mail.defaults.do
+ # param_encode_language 'jp'
+ # end
+ #
+ # The character set used for encoding will either be the value of $KCODE for
+ # Ruby < 1.9 or the encoding on the string passed in.
+ #
+ # Example:
+ #
+ # Mail::Encodings.param_encode("This is fun") #=> "us-ascii'en'This%20is%20fun"
+ def Encodings.param_encode(str)
+ RubyVer.param_encode(str)
+ end
+
+ # Decodes a parameter value using URI Escaping.
+ #
+ # Example:
+ #
+ # Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
+ #
+ # str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
+ # str.encoding #=> 'ISO-8859-1' ## Only on Ruby 1.9
+ # str #=> "This is fun"
+ def Encodings.param_decode(str, encoding)
+ RubyVer.param_decode(str, encoding)
+ end
+
+ # Decodes or encodes a string as needed for either Base64 or QP encoding types in
+ # the =?<encoding>?[QB]?<string>?=" format.
+ #
+ # The output type needs to be :decode to decode the input string or :encode to
+ # encode the input string. The character set used for encoding will either be
+ # the value of $KCODE for Ruby < 1.9 or the encoding on the string passed in.
+ #
+ # On encoding, will only send out Base64 encoded strings.
+ def Encodings.decode_encode(str, output_type)
+ case
+ when output_type == :decode
+ Encodings.value_decode(str)
+ else
+ if str.ascii_only?
+ str
+ else
+ Encodings.b_value_encode(str, find_encoding(str))
+ end
+ end
+ end
+
+ # Decodes a given string as Base64 or Quoted Printable, depending on what
+ # type it is.
+ #
+ #
+ def Encodings.value_decode(str)
+ case
+ when str =~ /\=\?.+?\?B\?.+?\?\=/
+ Encodings.b_value_decode(str)
+ when str =~ /\=\?.+?\?Q\?.+?\?\=/
+ Encodings.q_value_decode(str)
+ else
+ str
+ end
+ end
+
# Encode a string with Base64 Encoding and returns it ready to be inserted
# as a value for a field, that is, in the =?<charset>?B?<string>?= format
#
@@ -33,13 +101,25 @@ def Encodings.get_encoding( str )
# Encodings.b_value_encode('This is あ string', 'UTF-8')
# #=> "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?="
def Encodings.b_value_encode(str, encoding = nil)
- return str if str.ascii_only?
string, encoding = RubyVer.b_value_encode(str, encoding)
string.split("\n").map do |str|
"=?#{encoding}?B?#{str.chomp}?="
end.join(" ")
end
+ # Decodes a Base64 string from the "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=" format
+ #
+ # Example:
+ #
+ # Encodings.b_value_encode("=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=")
+ # #=> 'This is あ string'
+ def Encodings.b_value_decode(str)
+ string = str.split(Mail::Patterns::WSP)
+ string.flatten.map do |s|
+ RubyVer.b_value_decode(s)
+ end.join('')
+ end
+
# Encode a string with Quoted-Printable Encoding and returns it ready to be inserted
# as a value for a field, that is, in the =?<charset>?Q?<string>?= format
#
@@ -48,31 +128,27 @@ def Encodings.b_value_encode(str, encoding = nil)
# Encodings.q_value_encode('This is あ string', 'UTF-8')
# #=> "=?UTF-8?Q?This_is_=E3=81=82_string?="
def Encodings.q_value_encode(str, encoding = nil)
- return str if str.ascii_only?
string, encoding = RubyVer.q_value_encode(str, encoding)
"=?#{encoding}?Q?#{string.chomp}?="
end
- # Decodes a parameter value using URI Escaping.
+ # Decodes a Quoted-Printable string from the "=?UTF-8?Q?This_is_=E3=81=82_string?=" format
#
# Example:
#
- # Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
- #
- # str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
- # str.encoding #=> 'ISO-8859-1' ## Only on Ruby 1.9
- # str #=> "This is fun"
- def Encodings.param_decode(str, encoding)
- RubyVer.param_decode(str, encoding)
+ # Encodings.b_value_encode("=?UTF-8?Q?This_is_=E3=81=82_string?=")
+ # #=> 'This is あ string'
+ def Encodings.q_value_decode(str)
+ string = str.split(Mail::Patterns::WSP)
+ string.flatten.map do |s|
+ RubyVer.q_value_decode(s)
+ end.join('')
end
-
- # Encodes a parameter value using URI Escaping.
- #
- # Example:
- #
- # Mail::Encodings.param_encode("This is fun") #=> "This%20is%20fun"
- def Encodings.param_encode(str)
- URI.escape(str)
+
+ private
+
+ def Encodings.find_encoding(str)
+ RUBY_VERSION >= '1.9' ? str.encoding : $KCODE
end
end
@@ -5,7 +5,7 @@ class QuotedPrintable
# Decode the string from Quoted-Printable
def self.decode(str)
- str.unpack("M*").first
+ str.unpack("M*").first.gsub('_', ' ')
end
def self.encode(str)
@@ -31,10 +31,19 @@ class BccField < StructuredField
include Mail::CommonAddress
FIELD_NAME = 'bcc'
+ CAPITALIZED_FIELD = 'Bcc'
def initialize(*args)
super(FIELD_NAME, strip_field(FIELD_NAME, args.last))
end
+ def encoded
+ do_encode(CAPITALIZED_FIELD)
+ end
+
+ def decoded
+ do_decode
+ end
+
end
end
@@ -31,10 +31,19 @@ class CcField < StructuredField
include Mail::CommonAddress
FIELD_NAME = 'cc'
+ CAPITALIZED_FIELD = 'Cc'
def initialize(*args)
super(FIELD_NAME, strip_field(FIELD_NAME, args.last))
end
+ def encoded
+ do_encode(CAPITALIZED_FIELD)
+ end
+
+ def decoded
+ do_decode
+ end
+
end
end
@@ -33,14 +33,38 @@ def groups
end
@groups
end
+
+ # Returns the addresses that are part of groups
+ def group_addresses
+ groups.map { |k,v| v.map { |a| a.format } }.flatten
+ end
# Returns the name of all the groups in a string
def group_names # :nodoc:
tree.group_names
end
-
+
private
+ def do_encode(field_name)
+ return '' unless value
+ address_array = tree.addresses.reject { |a| group_addresses.include?(a.encoded) }.compact.map { |a| a.encoded }
+ address_text = address_array.join(", \r\n\t")
+ group_array = groups.map { |k,v| "#{k}: #{v.map { |a| a.encoded }.join(", \r\n\t")};" }
+ group_text = group_array.join(" \r\n\t")
+ return_array = [address_text, group_text].reject { |a| a.blank? }
+ "#{field_name}: #{return_array.join(", \r\n\t")}\r\n"
+ end
+
+ def do_decode
+ address_array = tree.addresses.reject { |a| group_addresses.include?(a.decoded) }.map { |a| a.decoded }
+ address_text = address_array.join(", ")
+ group_array = groups.map { |k,v| "#{k}: #{v.map { |a| a.decoded }.join(", ")};" }
+ group_text = group_array.join(" ")
+ return_array = [address_text, group_text].reject { |a| a.blank? }
+ return_array.join(", ")
+ end
+
# Returns the syntax tree of the Addresses
def tree # :nodoc:
@tree ||= AddressList.new(value)
@@ -2,10 +2,6 @@
module Mail
module CommonDate # :nodoc:
- module ClassMethods # :nodoc:
-
- end
-
module InstanceMethods # :doc:
# Returns a date time object of the parsed date
@@ -14,6 +10,14 @@ def date_time
end
private
+
+ def do_encode(field_name)
+ "#{field_name}: #{value}\r\n"
+ end
+
+ def do_decode
+ "#{value}"
+ end
def element
@element ||= Mail::DateTimeElement.new(value)
@@ -27,7 +31,6 @@ def tree
end
def self.included(receiver) # :nodoc:
- receiver.extend ClassMethods
receiver.send :include, InstanceMethods
end
Oops, something went wrong.

0 comments on commit e5840b7

Please sign in to comment.