Skip to content

Commit

Permalink
Encode Subject header correctly per RFC 2047
Browse files Browse the repository at this point in the history
Thee're some differences between RFC 2047 which we should be
used to encode header value and RFC 2045 for body:
1) Use CRLF+SPACE for soft line break.
2) SPACE and TAB should always be encoded.
3) Multiple encoded text should be used

Did manual test with FAR file manager w/ Observer plugin and
7-Zip w/ eDecoder plugin.

Bug: 794835
Change-Id: I5b87b7392d2208dd58bf512c7ee59c87bc32a85a
Reviewed-on: https://chromium-review.googlesource.com/835009
Reviewed-by: Xianzhu Wang <wangxianzhu@chromium.org>
Reviewed-by: Łukasz Anforowicz <lukasza@chromium.org>
Reviewed-by: Daniel Cheng <dcheng@chromium.org>
Commit-Queue: Jian Li <jianli@chromium.org>
Cr-Commit-Position: refs/heads/master@{#530371}
  • Loading branch information
jianli-chromium authored and Commit Bot committed Jan 19, 2018
1 parent 2ba21bc commit a97e4b2
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 30 deletions.
79 changes: 74 additions & 5 deletions third_party/WebKit/Source/core/frame/MHTMLTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,29 @@ class MHTMLTest : public ::testing::Test {
LineReader line_reader(
std::string(mhtml_data->data(), mhtml_data->length()));
std::string line;
while (line_reader.GetNextLine(&line) && line.length()) {
line_reader.GetNextLine(&line);
while (line.length()) {
// Peek next line to see if it starts with soft line break. If yes, append
// to current line.
std::string next_line;
while (true) {
line_reader.GetNextLine(&next_line);
if (next_line.length() > 1 &&
(next_line[0] == ' ' || next_line[0] == '\t')) {
line += &(next_line.at(1));
continue;
}
break;
}

std::string::size_type pos = line.find(':');
if (pos == std::string::npos)
continue;
std::string key = line.substr(0, pos);
std::string value = line.substr(pos + 2);
mhtml_headers.emplace(key, value);

line = next_line;
}
return mhtml_headers;
}
Expand Down Expand Up @@ -251,14 +267,16 @@ TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingAllPrintableCharacters) {

EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ("multipart/related;", mhtml_headers["Content-Type"]);
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ("abc", mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}

TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingNonPrintableCharacters) {
const char kURL[] = "http://www.example.com/";
const char kTitle[] = u8"abc=\u261D\U0001F3FB";
const char kTitle[] = "abc \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
Expand All @@ -268,12 +286,43 @@ TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingNonPrintableCharacters) {

EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ("multipart/related;", mhtml_headers["Content-Type"]);
EXPECT_EQ("=?utf-8?Q?abc=3D=E2=98=9D=F0=9F=8F=BB?=",
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ("=?utf-8?Q?abc=20=09=3D=E2=98=9D=F0=9F=8F=BB?=",
mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}

TEST_F(MHTMLTest,
TestMHTMLHeadersWithLongTitleContainingNonPrintableCharacters) {
const char kURL[] = "http://www.example.com/";
const char kTitle[] =
"01234567890123456789012345678901234567890123456789"
"01234567890123456789012345678901234567890123456789"
" \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
MHTMLArchive::kUseDefaultEncoding);

std::map<std::string, std::string> mhtml_headers = ExtractMHTMLHeaders(data);

EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ(
"=?utf-8?Q?012345678901234567890123456789"
"012345678901234567890123456789012?="
"=?utf-8?Q?345678901234567890123456789"
"0123456789=20=09=3D=E2=98=9D=F0=9F?="
"=?utf-8?Q?=8F=BB?=",
mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}

TEST_F(MHTMLTest, TestMHTMLEncoding) {
const char kURL[] = "http://www.example.com";
AddTestResources();
Expand Down Expand Up @@ -466,4 +515,24 @@ TEST_F(MHTMLTest, FormControlElements) {
EXPECT_FALSE(document->getElementById("fm")->IsDisabledFormControl());
}

TEST_F(MHTMLTest, LoadMHTMLContainingSoftLineBreaks) {
const char kURL[] = "http://www.example.com";

// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "soft_line_break.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
// We should not have problem to concatenate header lines separated by soft
// line breaks.
Document* document = frame->GetDocument();
ASSERT_TRUE(document);

// We should not have problem to concatenate body lines separated by soft
// line breaks.
EXPECT_TRUE(document->getElementById(
"AVeryLongID012345678901234567890123456789012345678901234567890End"));
}

} // namespace blink
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
From: <Saved by Blink>
Subject: =?utf-8?Q?012345678901234567890123456789012345678901234567890
1234567890123456789012345678901234567890123456789
=3D=E2=98=9D=F0=9F=8F=BB?=
Date: Thu, 4 Oct 2017 21:18:18 -0000
MIME-Version: 1.0
Content-Type: multipart/related;
type="text/html";
boundary="----MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK----"

------MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK----
Content-Type: text/html
Content-ID: <frame-31894-fca076b5-329b-490d-a9ee-6974bf0c4bcd@mhtml.blink>
Content-Transfer-Encoding: quoted-printable
Content-Location: http://localhost/soft_line_break.html

<html>
<head><meta http-equiv=3D"Content-Type" content=3D"text/html; charset=
=3DUTF-8">
</head>
<body>
<div id=3D"AVeryLongID01234567890123456789012345678901234567890123456=
7890End">
</div>
</body>
</html>
------MultipartBoundary--e77OylKXx1PBMEF67x53AwnQLf4DUmwdt037X9MjPK------
85 changes: 78 additions & 7 deletions third_party/WebKit/Source/platform/mhtml/MHTMLArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,78 @@

namespace blink {

const char* const kQuotedPrintable = "quoted-printable";
const char* const kBase64 = "base64";
const char* const kBinary = "binary";
namespace {

const size_t kMaximumLineLength = 76;
const char kCrlfLineEnding[] = "\r\n";

const char kRFC2047EncodingPrefix[] = "=?utf-8?Q?";
const size_t kRFC2047EncodingPrefixLength = 10;
const char kRFC2047EncodingSuffix[] = "?=";
const size_t kRFC2047EncodingSuffixLength = 2;

const char kQuotedPrintable[] = "quoted-printable";
const char kBase64[] = "base64";
const char kBinary[] = "binary";

} // namespace

// Controls quoted-printable encoding characters in body, per RFC 2045.
class QuotedPrintableEncodeBodyDelegate : public QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeBodyDelegate() = default;
~QuotedPrintableEncodeBodyDelegate() override = default;

size_t GetMaxLineLengthForEncodedContent() const override {
return kMaximumLineLength;
}

bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
// They should be encoded only if they appear at the end of a body line.
return end_of_line;
}

void DidStartLine(Vector<char>& out) override {
// Nothing to add.
}

void DidFinishLine(bool last_line, Vector<char>& out) override {
if (!last_line) {
out.push_back('=');
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
}
}
};

// Controls quoted-printable encoding characters in headers, per RFC 2047.
class QuotedPrintableEncodeHeaderDelegate
: public QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeHeaderDelegate() = default;
~QuotedPrintableEncodeHeaderDelegate() override = default;

size_t GetMaxLineLengthForEncodedContent() const override {
return kMaximumLineLength - kRFC2047EncodingPrefixLength -
kRFC2047EncodingSuffixLength;
}

bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const override {
// They should always be encoded if they appear anywhere in the header.
return true;
}

void DidStartLine(Vector<char>& out) override {
out.Append(kRFC2047EncodingPrefix, kRFC2047EncodingPrefixLength);
}

void DidFinishLine(bool last_line, Vector<char>& out) override {
out.Append(kRFC2047EncodingSuffix, kRFC2047EncodingSuffixLength);
if (!last_line) {
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
out.push_back(' ');
}
}
};

static String ConvertToPrintableCharacters(const String& text) {
// If the text contains all printable ASCII characters, no need for encoding.
Expand All @@ -70,9 +139,11 @@ static String ConvertToPrintableCharacters(const String& text) {
// where, "utf-8" is the chosen charset to represent the text and "Q" is the
// Quoted-Printable format to convert to 7-bit printable ASCII characters.
CString utf8_text = text.Utf8();
QuotedPrintableEncodeHeaderDelegate header_delegate;
Vector<char> encoded_text;
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(), encoded_text);
return "=?utf-8?Q?" + String(encoded_text.data(), encoded_text.size()) + "?=";
QuotedPrintableEncode(utf8_text.data(), utf8_text.length(), &header_delegate,
encoded_text);
return String(encoded_text.data(), encoded_text.size());
}

MHTMLArchive::MHTMLArchive() = default;
Expand Down Expand Up @@ -245,15 +316,15 @@ void MHTMLArchive::GenerateMHTMLPart(const String& boundary,
size_t data_length = flat_data.size();
Vector<char> encoded_data;
if (!strcmp(content_encoding, kQuotedPrintable)) {
QuotedPrintableEncode(data, data_length, encoded_data);
QuotedPrintableEncodeBodyDelegate body_delegate;
QuotedPrintableEncode(data, data_length, &body_delegate, encoded_data);
output_buffer.Append(encoded_data.data(), encoded_data.size());
output_buffer.Append("\r\n", 2u);
} else {
DCHECK(!strcmp(content_encoding, kBase64));
// We are not specifying insertLFs = true below as it would cut the lines
// with LFs and MHTML requires CRLFs.
Base64Encode(data, data_length, encoded_data);
const size_t kMaximumLineLength = 76;
size_t index = 0;
size_t encoded_data_length = encoded_data.size();
do {
Expand Down
28 changes: 13 additions & 15 deletions third_party/WebKit/Source/platform/text/QuotedPrintable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@

namespace blink {

static const size_t kMaximumLineLength = 76;

static const char kCrlfLineEnding[] = "\r\n";

static size_t LengthOfLineEndingAtIndex(const char* input,
Expand All @@ -54,15 +52,13 @@ static size_t LengthOfLineEndingAtIndex(const char* input,
return 0;
}

void QuotedPrintableEncode(const Vector<char>& in, Vector<char>& out) {
QuotedPrintableEncode(in.data(), in.size(), out);
}

void QuotedPrintableEncode(const char* input,
size_t input_length,
QuotedPrintableEncodeDelegate* delegate,
Vector<char>& out) {
out.clear();
out.ReserveCapacity(input_length);
delegate->DidStartLine(out);
size_t current_line_length = 0;
for (size_t i = 0; i < input_length; ++i) {
bool is_last_character = (i == input_length - 1);
Expand All @@ -74,13 +70,14 @@ void QuotedPrintableEncode(const char* input,
current_character != '\t')
requires_encoding = true;

// Space and tab characters have to be encoded if they appear at the end of
// a line.
// Decide if space and tab characters need to be encoded.
if (!requires_encoding &&
(current_character == '\t' || current_character == ' ') &&
(is_last_character ||
LengthOfLineEndingAtIndex(input, input_length, i + 1)))
requires_encoding = true;
(current_character == '\t' || current_character == ' ')) {
bool end_of_line = is_last_character ||
LengthOfLineEndingAtIndex(input, input_length, i + 1);
requires_encoding =
delegate->ShouldEncodeWhiteSpaceCharacters(end_of_line);
}

// End of line should be converted to CR-LF sequences.
if (!is_last_character) {
Expand All @@ -103,10 +100,10 @@ void QuotedPrintableEncode(const char* input,

// Insert a soft line break if necessary.
if (current_line_length + length_of_encoded_character >
kMaximumLineLength) {
out.push_back('=');
out.Append(kCrlfLineEnding, strlen(kCrlfLineEnding));
delegate->GetMaxLineLengthForEncodedContent()) {
delegate->DidFinishLine(false /*last_line*/, out);
current_line_length = 0;
delegate->DidStartLine(out);
}

// Finally, insert the actual character(s).
Expand All @@ -120,6 +117,7 @@ void QuotedPrintableEncode(const char* input,
current_line_length++;
}
}
delegate->DidFinishLine(true /*last_line*/, out);
}

void QuotedPrintableDecode(const Vector<char>& in, Vector<char>& out) {
Expand Down
34 changes: 31 additions & 3 deletions third_party/WebKit/Source/platform/text/QuotedPrintable.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,38 @@

namespace blink {

PLATFORM_EXPORT void QuotedPrintableEncode(const Vector<char>&, Vector<char>&);
PLATFORM_EXPORT void QuotedPrintableEncode(const char*, size_t, Vector<char>&);
// Delegate for controling the behavior of quoted-printable encoding. The
// original characters may be encoded a bit differently depending on where
// they live, header or body. For example, "=CRLF" should be used to break
// long line in body while "CRLF+SPACE" should be used to break long line in
// header.
class PLATFORM_EXPORT QuotedPrintableEncodeDelegate {
public:
QuotedPrintableEncodeDelegate() = default;
virtual ~QuotedPrintableEncodeDelegate() = default;

// Returns maximum number of characters allowed for an encoded line, excluding
// prefix and soft line break.
virtual size_t GetMaxLineLengthForEncodedContent() const = 0;

// Returns true if space and tab characters need to be encoded.
virtual bool ShouldEncodeWhiteSpaceCharacters(bool end_of_line) const = 0;

// Called when an encoded line starts. The delegate can take this chance to
// add any prefix.
virtual void DidStartLine(Vector<char>& out) = 0;

// Called when an encoded line ends. The delegate can take this chance to add
// any suffix. If it is not last line, a soft line break should also
// be added after the suffix.
virtual void DidFinishLine(bool last_line, Vector<char>& out) = 0;
};

PLATFORM_EXPORT void QuotedPrintableEncode(const char*,
size_t,
QuotedPrintableEncodeDelegate*,
Vector<char>&);

PLATFORM_EXPORT void QuotedPrintableDecode(const Vector<char>&, Vector<char>&);
PLATFORM_EXPORT void QuotedPrintableDecode(const char*, size_t, Vector<char>&);

} // namespace blink
Expand Down

0 comments on commit a97e4b2

Please sign in to comment.