Skip to content

Commit

Permalink
Update dangling markup mitigations.
Browse files Browse the repository at this point in the history
Still behind a flag, just updating the checks to look for both `\n` and
`<` rather than just the former. This is in line with the patches up at
whatwg/url#284 and
whatwg/fetch#519.

Intent to Remove: https://groups.google.com/a/chromium.org/d/msg/blink-dev/KaA_YNOlTPk/VmmoV88xBgAJ.

Bug: 680970
Change-Id: Ifda61a0afe1f0e97620acef7dc54b005c6f74840
Reviewed-on: https://chromium-review.googlesource.com/514024
Commit-Queue: Mike West <mkwst@chromium.org>
Reviewed-by: Jochen Eisinger <jochen@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#474341}
Cr-Mirrored-From: https://chromium.googlesource.com/chromium/src
Cr-Mirrored-Commit: 9e5ae901660de47ef1b844c6113eae91b5ae8e9e
  • Loading branch information
mikewest authored and Commit Bot committed May 24, 2017
1 parent c0cb55b commit 6b19814
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 48 deletions.
18 changes: 16 additions & 2 deletions gurl_unittest.cc
Expand Up @@ -645,12 +645,26 @@ TEST(GURLTest, Newlines) {
// Constructor.
GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
EXPECT_TRUE(url_1.parsed_for_possibly_invalid_spec().whitespace_removed);
EXPECT_FALSE(
url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

// Relative path resolver.
GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
EXPECT_EQ("http://www.google.com/foo", url_2.spec());
EXPECT_TRUE(url_2.parsed_for_possibly_invalid_spec().whitespace_removed);
EXPECT_FALSE(
url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

// Constructor.
GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n ");
EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec());
EXPECT_TRUE(
url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

// Relative path resolver.
GURL url_4 = url_1.Resolve(" \n /fo\to<\r ");
EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec());
EXPECT_TRUE(
url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup);

// Note that newlines are NOT stripped from ReplaceComponents.
}
Expand Down
6 changes: 3 additions & 3 deletions third_party/mozilla/url_parse.cc
Expand Up @@ -690,7 +690,7 @@ bool DoExtractQueryKeyValue(const CHAR* spec,

} // namespace

Parsed::Parsed() : whitespace_removed(false), inner_parsed_(NULL) {}
Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}

Parsed::Parsed(const Parsed& other)
: scheme(other.scheme),
Expand All @@ -701,7 +701,7 @@ Parsed::Parsed(const Parsed& other)
path(other.path),
query(other.query),
ref(other.ref),
whitespace_removed(other.whitespace_removed),
potentially_dangling_markup(other.potentially_dangling_markup),
inner_parsed_(NULL) {
if (other.inner_parsed_)
set_inner_parsed(*other.inner_parsed_);
Expand All @@ -717,7 +717,7 @@ Parsed& Parsed::operator=(const Parsed& other) {
path = other.path;
query = other.query;
ref = other.ref;
whitespace_removed = other.whitespace_removed;
potentially_dangling_markup = other.potentially_dangling_markup;
if (other.inner_parsed_)
set_inner_parsed(*other.inner_parsed_);
else
Expand Down
8 changes: 6 additions & 2 deletions third_party/mozilla/url_parse.h
Expand Up @@ -177,8 +177,12 @@ struct URL_EXPORT Parsed {
// the string with the scheme stripped off.
Component GetContent() const;

// True if whitespace was removed from the URL during parsing.
bool whitespace_removed;
// True if the URL's source contained a raw `<` character, and whitespace was
// removed from the URL during parsing
//
// TODO(mkwst): Link this to something in a spec if
// https://github.com/whatwg/url/pull/284 lands.
bool potentially_dangling_markup;

// This is used for nested URL types, currently only filesystem. If you
// parse a filesystem URL, the resulting Parsed will have a nested
Expand Down
13 changes: 10 additions & 3 deletions url_canon.h
Expand Up @@ -231,14 +231,21 @@ class URL_EXPORT CharsetConverter {
//
// Therefore, callers should not use the buffer, since it may actually be empty,
// use the computed pointer and |*output_len| instead.
URL_EXPORT const char* RemoveURLWhitespace(const char* input, int input_len,
//
// If |input| contained both removable whitespace and a raw `<` character,
// |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
// left untouched.
URL_EXPORT const char* RemoveURLWhitespace(const char* input,
int input_len,
CanonOutputT<char>* buffer,
int* output_len);
int* output_len,
bool* potentially_dangling_markup);
URL_EXPORT const base::char16* RemoveURLWhitespace(
const base::char16* input,
int input_len,
CanonOutputT<base::char16>* buffer,
int* output_len);
int* output_len,
bool* potentially_dangling_markup);

// IDN ------------------------------------------------------------------------

Expand Down
28 changes: 19 additions & 9 deletions url_canon_etc.cc
Expand Up @@ -22,10 +22,12 @@ inline bool IsRemovableURLWhitespace(int ch) {
// Backend for RemoveURLWhitespace (see declaration in url_canon.h).
// It sucks that we have to do this, since this takes about 13% of the total URL
// canonicalization time.
template<typename CHAR>
const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len,
template <typename CHAR>
const CHAR* DoRemoveURLWhitespace(const CHAR* input,
int input_len,
CanonOutputT<CHAR>* buffer,
int* output_len) {
int* output_len,
bool* potentially_dangling_markup) {
// Fast verification that there's nothing that needs removal. This is the 99%
// case, so we want it to be fast and don't care about impacting the speed
// when we do find whitespace.
Expand All @@ -46,8 +48,11 @@ const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len,

// Remove the whitespace into the new buffer and return it.
for (int i = 0; i < input_len; i++) {
if (!IsRemovableURLWhitespace(input[i]))
if (!IsRemovableURLWhitespace(input[i])) {
if (potentially_dangling_markup && input[i] == 0x3C)
*potentially_dangling_markup = true;
buffer->push_back(input[i]);
}
}
*output_len = buffer->length();
return buffer->data();
Expand Down Expand Up @@ -274,17 +279,22 @@ void DoCanonicalizeRef(const CHAR* spec,

} // namespace

const char* RemoveURLWhitespace(const char* input, int input_len,
const char* RemoveURLWhitespace(const char* input,
int input_len,
CanonOutputT<char>* buffer,
int* output_len) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
int* output_len,
bool* potentially_dangling_markup) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
potentially_dangling_markup);
}

const base::char16* RemoveURLWhitespace(const base::char16* input,
int input_len,
CanonOutputT<base::char16>* buffer,
int* output_len) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len);
int* output_len,
bool* potentially_dangling_markup) {
return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
potentially_dangling_markup);
}

char CanonicalSchemeChar(base::char16 ch) {
Expand Down
8 changes: 4 additions & 4 deletions url_canon_relative.cc
Expand Up @@ -443,11 +443,11 @@ bool DoResolveRelativeURL(const char* base_url,
Parsed* out_parsed) {
// |base_parsed| is the starting point for our output. Since we may have
// removed whitespace from |relative_url| before entering this method, we'll
// carry over the |whitespace_removed| flag.
bool whitespace_removed = out_parsed->whitespace_removed;
// carry over the |potentially_dangling_markup| flag.
bool potentially_dangling_markup = out_parsed->potentially_dangling_markup;
*out_parsed = base_parsed;
if (whitespace_removed)
out_parsed->whitespace_removed = true;
if (potentially_dangling_markup)
out_parsed->potentially_dangling_markup = true;

// Sanity check: the input should have a host or we'll break badly below.
// We can only resolve relative URLs with base URLs that have hosts and
Expand Down
19 changes: 7 additions & 12 deletions url_util.cc
Expand Up @@ -185,8 +185,8 @@ bool DoFindAndCompareScheme(const CHAR* str,
// This matches the canonicalization done in DoCanonicalize function.
RawCanonOutputT<CHAR> whitespace_buffer;
int spec_len;
const CHAR* spec = RemoveURLWhitespace(str, str_len,
&whitespace_buffer, &spec_len);
const CHAR* spec =
RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr);

Component our_scheme;
if (!ExtractScheme(spec, spec_len, &our_scheme)) {
Expand Down Expand Up @@ -214,11 +214,8 @@ bool DoCanonicalize(const CHAR* spec,
// Possibly this will result in copying to the new buffer.
RawCanonOutputT<CHAR> whitespace_buffer;
if (whitespace_policy == REMOVE_WHITESPACE) {
int original_len = spec_len;
spec =
RemoveURLWhitespace(spec, original_len, &whitespace_buffer, &spec_len);
if (spec_len != original_len)
output_parsed->whitespace_removed = true;
spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len,
&output_parsed->potentially_dangling_markup);
}

Parsed parsed_input;
Expand Down Expand Up @@ -296,11 +293,9 @@ bool DoResolveRelative(const char* base_spec,
// copying to the new buffer.
RawCanonOutputT<CHAR> whitespace_buffer;
int relative_length;
const CHAR* relative = RemoveURLWhitespace(in_relative, in_relative_length,
&whitespace_buffer,
&relative_length);
if (in_relative_length != relative_length)
output_parsed->whitespace_removed = true;
const CHAR* relative = RemoveURLWhitespace(
in_relative, in_relative_length, &whitespace_buffer, &relative_length,
&output_parsed->potentially_dangling_markup);

bool base_is_authority_based = false;
bool base_is_hierarchical = false;
Expand Down
36 changes: 23 additions & 13 deletions url_util_unittest.cc
Expand Up @@ -374,23 +374,32 @@ TEST(URLUtilTest, TestNoRefComponent) {
EXPECT_FALSE(resolved_parsed.ref.is_valid());
}

TEST(URLUtilTest, RelativeWhitespaceRemoved) {
TEST(URLUtilTest, PotentiallyDanglingMarkup) {
struct ResolveRelativeCase {
const char* base;
const char* rel;
bool whitespace_removed;
bool potentially_dangling_markup;
const char* out;
} cases[] = {
{"https://example.com/", "/path", false, "https://example.com/path"},
{"https://example.com/", "\n/path", true, "https://example.com/path"},
{"https://example.com/", "\r/path", true, "https://example.com/path"},
{"https://example.com/", "\t/path", true, "https://example.com/path"},
{"https://example.com/", "/pa\nth", true, "https://example.com/path"},
{"https://example.com/", "/pa\rth", true, "https://example.com/path"},
{"https://example.com/", "/pa\tth", true, "https://example.com/path"},
{"https://example.com/", "/path\n", true, "https://example.com/path"},
{"https://example.com/", "/path\r", true, "https://example.com/path"},
{"https://example.com/", "/path\r", true, "https://example.com/path"},
{"https://example.com/", "/path<", false, "https://example.com/path%3C"},
{"https://example.com/", "\n/path<", true, "https://example.com/path%3C"},
{"https://example.com/", "\r/path<", true, "https://example.com/path%3C"},
{"https://example.com/", "\t/path<", true, "https://example.com/path%3C"},
{"https://example.com/", "/pa\nth<", true, "https://example.com/path%3C"},
{"https://example.com/", "/pa\rth<", true, "https://example.com/path%3C"},
{"https://example.com/", "/pa\tth<", true, "https://example.com/path%3C"},
{"https://example.com/", "/path\n<", true, "https://example.com/path%3C"},
{"https://example.com/", "/path\r<", true, "https://example.com/path%3C"},
{"https://example.com/", "/path\r<", true, "https://example.com/path%3C"},
{"https://example.com/", "\n/<path", true, "https://example.com/%3Cpath"},
{"https://example.com/", "\r/<path", true, "https://example.com/%3Cpath"},
{"https://example.com/", "\t/<path", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<pa\nth", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<pa\rth", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<pa\tth", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<path\n", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<path\r", true, "https://example.com/%3Cpath"},
{"https://example.com/", "/<path\r", true, "https://example.com/%3Cpath"},
};

for (const auto& test : cases) {
Expand All @@ -407,7 +416,8 @@ TEST(URLUtilTest, RelativeWhitespaceRemoved) {
ASSERT_TRUE(valid);
output.Complete();

EXPECT_EQ(test.whitespace_removed, resolved_parsed.whitespace_removed);
EXPECT_EQ(test.potentially_dangling_markup,
resolved_parsed.potentially_dangling_markup);
EXPECT_EQ(test.out, resolved);
}
}
Expand Down

0 comments on commit 6b19814

Please sign in to comment.