Skip to content

Commit

Permalink
When FormatOptions.AllowMixedHeaderCharsets is disabled, always use u…
Browse files Browse the repository at this point in the history
…ser-specified charset

Previously this could/would still use us-ascii and/or iso-8859-1 if
the entire header could fit within one of those charsets.

Fixes issue #493
  • Loading branch information
jstedfast committed Jul 6, 2019
1 parent b50fc8e commit 3ad8de7
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 39 deletions.
6 changes: 3 additions & 3 deletions MimeKit/FormatOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ internal IMimeFilter CreateNewLineFilter (bool ensureNewLine = false)
/// Gets or sets whether the formatter should allow mixed charsets in the headers.
/// </summary>
/// <remarks>
/// <para>When this option is enabled, the MIME formatter will try to use US-ASCII and/or
/// ISO-8859-1 to encode headers when appropriate rather than being forced to use the
/// <para>When this option is enabled, the MIME formatter will try to use us-ascii and/or
/// iso-8859-1 to encode headers when appropriate rather than being forced to use the
/// specified charset for all encoded-word tokens in order to maximize readability.</para>
/// <para>Unfortunately, mail clients like Outlook and Thunderbird do not treat
/// encoded-word tokens individually and assume that all tokens are encoded using the
Expand All @@ -234,7 +234,7 @@ internal IMimeFilter CreateNewLineFilter (bool ensureNewLine = false)
/// <a href="https://bugzilla.mozilla.org/show_bug.cgi?id=317263">
/// https://bugzilla.mozilla.org/show_bug.cgi?id=317263</a>.</para>
/// </remarks>
/// <value><c>true</c> if the formatter should be allowed to use ISO-8859-1 when encoding headers; otherwise, <c>false</c>.</value>
/// <value><c>true</c> if the formatter should be allowed to use us-ascii and/or iso-8859-1 when encoding headers; otherwise, <c>false</c>.</value>
public bool AllowMixedHeaderCharsets {
get { return allowMixedHeaderCharsets; }
set {
Expand Down
57 changes: 25 additions & 32 deletions MimeKit/Utils/Rfc2047.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1045,11 +1045,17 @@ enum WordType {
EncodedWord
}

enum WordEncoding {
Ascii,
Latin1,
UserSpecified
}

class Word {
public WordType Type;
public int StartIndex;
public int CharCount;
public int Encoding; // 0 => ASCII, 1 => iso-8859-1, 2 => custom
public WordEncoding Encoding;
public int ByteCount;
public int EncodeCount;
public int QuotedPairs;
Expand Down Expand Up @@ -1111,10 +1117,10 @@ static bool ExceedsMaxLineLength (FormatOptions options, Encoding charset, Word
switch (word.Type) {
case WordType.EncodedWord:
switch (word.Encoding) {
case 1:
case WordEncoding.Latin1:
length = EstimateEncodedWordLength ("iso-8859-1", word.ByteCount, word.EncodeCount);
break;
case 0:
case WordEncoding.Ascii:
length = EstimateEncodedWordLength ("us-ascii", word.ByteCount, word.EncodeCount);
break;
default:
Expand Down Expand Up @@ -1159,7 +1165,7 @@ static IList<Word> GetRfc822Words (FormatOptions options, Encoding charset, stri

if (c < 127) {
if (IsCtrl (c)) {
word.Encoding = Math.Max (word.Encoding, 1);
word.Encoding = (WordEncoding) Math.Max ((int) word.Encoding, (int) WordEncoding.Latin1);
word.Type = WordType.EncodedWord;
word.EncodeCount++;
} else if (phrase && !IsAtom (c)) {
Expand All @@ -1176,7 +1182,7 @@ static IList<Word> GetRfc822Words (FormatOptions options, Encoding charset, stri
nchars = 1;
} else if (c < 256) {
// iso-8859-1
word.Encoding = Math.Max (word.Encoding, 1);
word.Encoding = (WordEncoding) Math.Max ((int) word.Encoding, (int) WordEncoding.Latin1);
word.Type = WordType.EncodedWord;
word.EncodeCount++;
word.ByteCount++;
Expand All @@ -1198,11 +1204,11 @@ static IList<Word> GetRfc822Words (FormatOptions options, Encoding charset, stri
n = 3;
}

word.Encoding = WordEncoding.UserSpecified;
word.Type = WordType.EncodedWord;
word.CharCount += nchars;
word.EncodeCount += n;
word.ByteCount += n;
word.Encoding = 2;
}

if (ExceedsMaxLineLength (options, charset, word)) {
Expand Down Expand Up @@ -1288,11 +1294,11 @@ static bool ShouldMergeWords (FormatOptions options, Encoding charset, IList<Wor
if (next.Type == WordType.QuotedString)
return false;

switch (Math.Max (word.Encoding, next.Encoding)) {
case 1:
switch ((WordEncoding) Math.Max ((int) word.Encoding, (int) next.Encoding)) {
case WordEncoding.Latin1:
length = EstimateEncodedWordLength ("iso-8859-1", length, encoded);
break;
case 0:
case WordEncoding.Ascii:
length = EstimateEncodedWordLength ("us-ascii", length, encoded);
break;
default:
Expand All @@ -1311,8 +1317,9 @@ static IList<Word> Merge (FormatOptions options, Encoding charset, IList<Word> w
if (words.Count < 2)
return words;

int lwspCount, encoding, encoded, quoted, byteCount, length;
int lwspCount, encoded, quoted, byteCount, length;
var merged = new List<Word> ();
WordEncoding encoding;
Word word, next;

word = words[0];
Expand All @@ -1323,18 +1330,18 @@ static IList<Word> Merge (FormatOptions options, Encoding charset, IList<Word> w
next = words[i];

if (word.Type != WordType.Atom && word.Type == next.Type) {
encoding = (WordEncoding) Math.Max ((int) word.Encoding, (int) next.Encoding);
lwspCount = next.StartIndex - (word.StartIndex + word.CharCount);
byteCount = word.ByteCount + lwspCount + next.ByteCount;
encoding = Math.Max (word.Encoding, next.Encoding);
encoded = word.EncodeCount + next.EncodeCount;
quoted = word.QuotedPairs + next.QuotedPairs;

if (word.Type == WordType.EncodedWord) {
switch (encoding) {
case 1:
case WordEncoding.Latin1:
length = EstimateEncodedWordLength ("iso-8859-1", byteCount, encoded);
break;
case 0:
case WordEncoding.Ascii:
length = EstimateEncodedWordLength ("us-ascii", byteCount, encoded);
break;
default:
Expand Down Expand Up @@ -1376,7 +1383,7 @@ static IList<Word> Merge (FormatOptions options, Encoding charset, IList<Word> w
word.Type = (WordType) Math.Max ((int) word.Type, (int) next.Type);
word.CharCount = (next.StartIndex + next.CharCount) - word.StartIndex;
word.ByteCount = word.ByteCount + lwspCount + next.ByteCount;
word.Encoding = Math.Max (word.Encoding, next.Encoding);
word.Encoding = (WordEncoding) Math.Max ((int) word.Encoding, (int) next.Encoding);
word.EncodeCount = word.EncodeCount + next.EncodeCount;
word.QuotedPairs = word.QuotedPairs + next.QuotedPairs;
} else {
Expand All @@ -1398,23 +1405,9 @@ static byte[] Encode (FormatOptions options, Encoding charset, string text, bool
byte[] encoded;

if (!options.AllowMixedHeaderCharsets) {
int maxEncoding = 0;

for (int i = 0; i < words.Count; i++) {
if (words[i].Type != WordType.EncodedWord || words[i].Encoding == maxEncoding)
continue;

if (words[i].Encoding > maxEncoding) {
maxEncoding = words[i].Encoding;
for (int j = 0; j < i; j++) {
if (words[j].Type != WordType.EncodedWord)
continue;

words[j].Encoding = maxEncoding;
}
} else {
words[i].Encoding = maxEncoding;
}
if (words[i].Type == WordType.EncodedWord)
words[i].Encoding = WordEncoding.UserSpecified;
}
}

Expand Down Expand Up @@ -1447,10 +1440,10 @@ static byte[] Encode (FormatOptions options, Encoding charset, string text, bool
}

switch (word.Encoding) {
case 0: // us-ascii
case WordEncoding.Ascii:
AppendEncodedWord (str, Encoding.ASCII, text, start, length, mode);
break;
case 1: // iso-8859-1
case WordEncoding.Latin1:
AppendEncodedWord (str, CharsetUtils.Latin1, text, start, length, mode);
break;
default: // custom charset
Expand Down
24 changes: 20 additions & 4 deletions UnitTests/InternetAddressListTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,8 @@ public void TestMailboxWithDotsInTheName ()
[Test]
public void TestMailboxWith8bitName ()
{
const string encoded = "Patrik =?iso-8859-1?b?RqVkbHRzdHKldm0=?= <paf@nada.kth.se>";
//const string encoded = "Patrik =?iso-8859-1?b?RqVkbHRzdHKldm0=?= <paf@nada.kth.se>";
const string encoded = "Patrik =?utf-8?b?RsKlZGx0c3RywqV2bQ==?= <paf@nada.kth.se>";
const string text = "Patrik F¥dltstr¥vm <paf@nada.kth.se>";
var expected = new InternetAddressList ();

Expand Down Expand Up @@ -504,10 +505,14 @@ public void TestEncodingMailboxWithReallyLongWord ()
const string expected = "=?us-ascii?q?reeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaallllllllllll?=\n =?us-ascii?q?llllllllllllllllllllllllllllllllllllllllllly?= long word\n\t<really.long.word@example.com>";
const string name = "reeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaallllllllllllllllllllllllllllllllllllllllllllllllllllllly long word";
var mailbox = new MailboxAddress (name, "really.long.word@example.com");
var options = FormatOptions.Default.Clone ();
var list = new InternetAddressList ();
list.Add (mailbox);

var actual = list.ToString (UnixFormatOptions, true);
options.NewLineFormat = NewLineFormat.Unix;
options.AllowMixedHeaderCharsets = true;

var actual = list.ToString (options, true);

Assert.AreEqual (expected, actual, "Encoding really long mailbox did not match expected result: {0}", expected);
Assert.IsTrue (InternetAddressList.TryParse (actual, out list), "Failed to parse really long mailbox");
Expand Down Expand Up @@ -569,17 +574,19 @@ public void TestEncodingSimpleAddressList ()
public void TestEncodingLongNameMixedQuotingAndEncoding ()
{
const string name = "Dr. xxxxxxxxxx xxxxx | xxxxxx.xxxxxxx für xxxxxxxxxxxxx xxxx";
const string encodedName = "\"Dr. xxxxxxxxxx xxxxx | xxxxxx.xxxxxxx\" =?iso-8859-1?b?Zvxy?= xxxxxxxxxxxxx xxxx";
const string encodedNameLatin1 = "\"Dr. xxxxxxxxxx xxxxx | xxxxxx.xxxxxxx\" =?iso-8859-1?b?Zvxy?= xxxxxxxxxxxxx xxxx";
const string encodedNameUnicode = "\"Dr. xxxxxxxxxx xxxxx | xxxxxx.xxxxxxx\" =?utf-8?b?ZsO8cg==?= xxxxxxxxxxxxx xxxx";
const string encodedMailbox = "\"Dr. xxxxxxxxxx xxxxx | xxxxxx.xxxxxxx\" =?iso-8859-1?b?Zvxy?= xxxxxxxxxxxxx\n xxxx <x.xxxxx@xxxxxxx-xxxxxx.xx>";
const string address = "x.xxxxx@xxxxxxx-xxxxxx.xx";
var options = FormatOptions.Default.Clone ();

options.NewLineFormat = NewLineFormat.Unix;
options.AllowMixedHeaderCharsets = true;

var buffer = Rfc2047.EncodePhrase (options, Encoding.UTF8, name);
var result = Encoding.UTF8.GetString (buffer);

Assert.AreEqual (encodedName, result);
Assert.AreEqual (encodedNameLatin1, result);

var mailbox = new MailboxAddress (name, address);
var list = new InternetAddressList ();
Expand All @@ -589,6 +596,15 @@ public void TestEncodingLongNameMixedQuotingAndEncoding ()
result = list.ToString (options, true);

Assert.AreEqual (encodedMailbox, result);

// Now disable smart encoding

options.AllowMixedHeaderCharsets = false;

buffer = Rfc2047.EncodePhrase (options, Encoding.UTF8, name);
result = Encoding.UTF8.GetString (buffer);

Assert.AreEqual (encodedNameUnicode, result);
}

[Test]
Expand Down

0 comments on commit 3ad8de7

Please sign in to comment.