Permalink
Browse files

Changing encoding strategy from breaking on word boundaries, to space…

…s, also clean up of charset assignment
  • Loading branch information...
1 parent 74d6fcd commit b36bb737f6a35b2ac3da36538e13a6b7d6d0b937 @mikel committed Apr 4, 2010
Showing with 45 additions and 32 deletions.
  1. +8 −0 CHANGELOG.rdoc
  2. +5 −6 lib/mail/encodings.rb
  3. +8 −3 lib/mail/header.rb
  4. +15 −14 lib/mail/message.rb
  5. +2 −2 spec/mail/encodings_spec.rb
  6. +7 −7 spec/mail/message_spec.rb
View
8 CHANGELOG.rdoc
@@ -1,3 +1,11 @@
+== Sun Apr 4 06:41:46 UTC 2010 Mikel Lindsaar <raasdnil@gmail.com>
+
+* Created non-ascii header auto encoding for address fields and unstructured fields
+* Changed default behaviour of mail, if you specify a charset, it will use that charset
+ regardless of what is in the body. Previously, if the body was all US-ASCII, it would
+ set the charset to US-ASCII in preference.
+*
+
== Mon 29 Mar 2010 07:04:34 UTC Mikel Lindsaar <raasdnil@gmail.com>
* Version bump to 2.1.5.3
View
11 lib/mail/encodings.rb
@@ -171,19 +171,18 @@ def Encodings.encode_non_usascii(address, charset)
# Encode any non usascii strings embedded inside of quotes
address.gsub!(/(".*?[^#{us_ascii}].+?")/) { |s| Encodings.b_value_encode(unquote(s), charset) }
# Then loop through all remaining items and encode as needed
- tokens = address.split(/\b/)
+ tokens = address.split(/\s/)
tokens.enum_with_index.map do |word, i|
if word.ascii_only?
word
else
- previous_space = tokens[i-1] && match = tokens[i-1].match(/^(\s+)$/)
- previous_non_ascii = tokens[i-2] && !tokens[i-2].ascii_only?
- if previous_space && previous_non_ascii
- word = match[1] + word
+ previous_non_ascii = tokens[i-1] && !tokens[i-1].ascii_only?
+ if previous_non_ascii
+ word = " #{word}"
end
Encodings.b_value_encode(word, charset)
end
- end.join
+ end.join(' ')
end
# Encode a string with Base64 Encoding and returns it ready to be inserted
View
11 lib/mail/header.rb
@@ -33,8 +33,9 @@ class Header
# no automatic processing of that field will happen. If you find one of
# these cases, please make a patch and send it in, or at the least, send
# me the example so we can fix it.
- def initialize(header_text = nil)
+ def initialize(header_text = nil, charset = nil)
@errors = []
+ @charset = charset
self.raw_source = header_text.to_crlf
split_header if header_text
end
@@ -74,7 +75,7 @@ def fields=(unfolded_fields)
@fields = Mail::FieldList.new
unfolded_fields.each do |field|
- field = Field.new(field)
+ field = Field.new(field, nil, charset)
field.errors.each { |error| self.errors << error }
selected = select_field_for(field.name)
@@ -163,10 +164,14 @@ def charset
if self[:content_type] && self[:content_type].parameters
self[:content_type].parameters[:charset]
else
- nil
+ @charset
end
end
+ def charset=(val)
+ @charset = val
+ end
+
LIMITED_FIELDS = %w[ date from sender reply-to to cc bcc
message-id in-reply-to references subject
return-path content-type mime-version
View
29 lib/mail/message.rb
@@ -100,7 +100,9 @@ def initialize(*args, &block)
@text_part = nil
@html_part = nil
@errors = nil
-
+ @header = nil
+ @charset = 'UTF-8'
+
@perform_deliveries = true
@raise_delivery_errors = true
@@ -353,7 +355,7 @@ def envelope_date
# mail.header = 'To: mikel@test.lindsaar.net\r\nFrom: Bob@bob.com'
# mail.header #=> <#Mail::Header
def header=(value)
- @header = Mail::Header.new(value)
+ @header = Mail::Header.new(value, charset)
end
# Returns the header object of the message object. Or, if passed
@@ -805,7 +807,7 @@ def resent_cc( val = nil )
# mail.resent_cc = 'Mikel <mikel@test.lindsaar.net>'
# mail.resent_cc #=> ['mikel@test.lindsaar.net']
# mail.resent_cc = 'Mikel <mikel@test.lindsaar.net>, ada@test.lindsaar.net'
- # mail.resent_cc #=> ['mikel@test.lindsaar.net', 'ada@test.lindsaar.net']
+ # mail.resent_cc #=> ['mikel@test.lindsaar.net', 'ada@test.lindsaar.net']
def resent_cc=( val )
header[:resent_cc] = val
end
@@ -1271,13 +1273,13 @@ def has_date?
def has_mime_version?
header.has_mime_version?
end
-
+
def has_content_type?
!!header[:content_type]
end
def has_charset?
- !!charset
+ !!(header[:content_type] && header[:content_type].parameters['charset'])
end
def has_content_transfer_encoding?
@@ -1330,9 +1332,7 @@ def add_content_type
#
# Otherwise raises a warning
def add_charset
- if body.only_us_ascii? and !body.empty?
- header[:content_type].parameters['charset'] = 'US-ASCII'
- elsif !body.empty?
+ if !body.empty?
warning = "Non US-ASCII detected and no charset defined.\nDefaulting to UTF-8, set your own if this is incorrect.\n"
STDERR.puts(warning)
header[:content_type].parameters['charset'] = 'UTF-8'
@@ -1374,17 +1374,18 @@ def message_content_type
# Returns the character set defined in the content type field
def charset
- content_type ? content_type_parameters['charset'] : nil
+ if @header
+ content_type ? content_type_parameters['charset'] : @charset
+ else
+ @charset
+ end
end
# Sets the charset to the supplied value. Will set the content type to text/plain if
# it does not already exist
def charset=(value)
- if content_type
- content_type_parameters['charset'] = value
- else
- self.content_type ['text', 'plain', {'charset' => value}]
- end
+ @charset = value
+ @header.charset = value
end
# Returns the main content type
View
4 spec/mail/encodings_spec.rb
@@ -218,7 +218,7 @@
mail = Mail.new
mail.subject = original
mail[:subject].decoded.should == original
- mail[:subject].encoded.should == result
+ mail[:subject].encoded.gsub("UTF-8", "UTF8").should == result
end
it "should round trip a complex string properly" do
@@ -230,7 +230,7 @@
mail = Mail.new
mail.subject = original
mail[:subject].decoded.should == original
- mail[:subject].encoded.should == result
+ mail[:subject].encoded.gsub("UTF-8", "UTF8").should == result
mail = Mail.new(mail.encoded)
mail[:subject].decoded.should == original
mail[:subject].encoded.gsub("UTF-8", "UTF8").should == result
View
14 spec/mail/message_spec.rb
@@ -133,13 +133,13 @@ def basic_email
it "should give the header class the header to parse" do
header = Mail::Header.new("To: mikel\r\nFrom: bob\r\nSubject: Hello!")
- Mail::Header.should_receive(:new).with("To: mikel\r\nFrom: bob\r\nSubject: Hello!").and_return(header)
+ Mail::Header.should_receive(:new).with("To: mikel\r\nFrom: bob\r\nSubject: Hello!", 'UTF-8').and_return(header)
mail = Mail::Message.new(basic_email)
end
it "should give the header class the header to parse even if there is no body" do
header = Mail::Header.new("To: mikel\r\nFrom: bob\r\nSubject: Hello!")
- Mail::Header.should_receive(:new).with("To: mikel\r\nFrom: bob\r\nSubject: Hello!").and_return(header)
+ Mail::Header.should_receive(:new).with("To: mikel\r\nFrom: bob\r\nSubject: Hello!", 'UTF-8').and_return(header)
mail = Mail::Message.new("To: mikel\r\nFrom: bob\r\nSubject: Hello!")
end
@@ -157,15 +157,15 @@ def basic_email
it "should give the header the part before the line without spaces and the body the part without" do
header = Mail::Header.new("To: mikel")
body = Mail::Body.new("G'Day!")
- Mail::Header.should_receive(:new).with("To: mikel").and_return(header)
+ Mail::Header.should_receive(:new).with("To: mikel", 'UTF-8').and_return(header)
Mail::Body.should_receive(:new).with("G'Day!").and_return(body)
mail = Mail::Message.new("To: mikel\r\n\r\nG'Day!")
end
it "should give allow for whitespace on the gap line between header and body" do
header = Mail::Header.new("To: mikel")
body = Mail::Body.new("G'Day!")
- Mail::Header.should_receive(:new).with("To: mikel").and_return(header)
+ Mail::Header.should_receive(:new).with("To: mikel", 'UTF-8').and_return(header)
Mail::Body.should_receive(:new).with("G'Day!").and_return(body)
mail = Mail::Message.new("To: mikel\r\n \r\nG'Day!")
end
@@ -945,12 +945,12 @@ def basic_email
mail.should_not be_has_charset
end
- it "should not raise a warning if there is no charset defined and only US-ASCII chars" do
+ it "should raise a warning if there is no charset defined and only US-ASCII chars" do
body = "This is plain text US-ASCII"
mail = Mail.new
mail.body = body
- STDERR.should_not_receive(:puts)
- mail.to_s
+ STDERR.should_receive(:puts)
+ mail.to_s
end
it "should set the content type to text/plain; charset=us-ascii" do

0 comments on commit b36bb73

Please sign in to comment.