Skip to content

Commit

Permalink
Only skip certain hidden elements at MHTML serialization
Browse files Browse the repository at this point in the history
1) Elements with hidden attributes
2) Hidden form elements

We orginally tried to act more aggresively by skipping all possible
hidden elements. But this does break certain web sites that uses
nth CSS selectors.

BUG=697970
TEST=tests updated
TBR=svaldez@chromium.org

Review-Url: https://codereview.chromium.org/2819723003
Cr-Commit-Position: refs/heads/master@{#464856}
  • Loading branch information
jianli-chromium authored and Commit bot committed Apr 15, 2017
1 parent fb546e9 commit 2ddf990
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 26 deletions.
6 changes: 3 additions & 3 deletions chrome/browser/download/save_page_browsertest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1304,9 +1304,9 @@ IN_PROC_BROWSER_TEST_P(SavePageOriginalVsSavedComparisonTest, Style) {

GURL url(embedded_test_server()->GetURL("a.com", "/save_page/style.htm"));

// The original page has 7 iframes. 2 of them are both hidden and affecting
// no layout. So these two are excluded from the saved page.
TestOriginalVsSavedPage(save_page_type, url, 7, 5, expected_substrings);
// The original page has 7 iframes. One of them that contains hidden attribute
// will be excluded from the saved page.
TestOriginalVsSavedPage(save_page_type, url, 7, 6, expected_substrings);
}

// Test for saving a page with broken subresources:
Expand Down
23 changes: 8 additions & 15 deletions third_party/WebKit/Source/web/WebFrameSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "public/web/WebFrameSerializer.h"

#include "core/HTMLNames.h"
#include "core/InputTypeNames.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
#include "core/frame/Frame.h"
Expand Down Expand Up @@ -124,21 +125,13 @@ bool MHTMLFrameSerializerDelegate::ShouldIgnoreElement(const Element& element) {

bool MHTMLFrameSerializerDelegate::ShouldIgnoreHiddenElement(
const Element& element) {
// Do not include elements that are are set to hidden without affecting layout
// by the page. For those elements that are hidden by default, they will not
// be excluded:
// 1) All elements that are head or part of head, including head, meta, style,
// link and etc.
// 2) Some specific elements in body: meta, style, datalist, option and etc.
if (element.GetLayoutObject())
return false;
if (isHTMLHeadElement(element) || isHTMLMetaElement(element) ||
isHTMLStyleElement(element) || isHTMLDataListElement(element) ||
isHTMLOptionElement(element) || isHTMLLinkElement(element)) {
return false;
}
Element* parent = element.parentElement();
return parent && !isHTMLHeadElement(parent);
// Do not include the element that is marked with hidden attribute.
if (element.FastHasAttribute(HTMLNames::hiddenAttr))
return true;

// Do not include the hidden form element.
return isHTMLInputElement(element) &&
toHTMLInputElement(&element)->type() == InputTypeNames::hidden;
}

bool MHTMLFrameSerializerDelegate::ShouldIgnoreMetaElement(
Expand Down
13 changes: 6 additions & 7 deletions third_party/WebKit/Source/web/tests/WebFrameSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,16 +273,19 @@ TEST_F(WebFrameSerializerSanitizationTest, RemoveHiddenElements) {
String mhtml =
GenerateMHTMLParts("http://www.test.com", "hidden_elements.html");

// These hidden elements that do not affect layout should be removed.
EXPECT_EQ(WTF::kNotFound, mhtml.Find("<h1"));
// The element with hidden attribute should be removed.
EXPECT_EQ(WTF::kNotFound, mhtml.Find("<p id=3D\"hidden_id\""));

// The hidden form element should be removed.
EXPECT_EQ(WTF::kNotFound, mhtml.Find("<input type=3D\"hidden\""));

// These default-hidden elements should not be removed.
// All other hidden elements should not be removed.
EXPECT_NE(WTF::kNotFound, mhtml.Find("<html"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<head"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<style"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<title"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<h1"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<h2"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<datalist"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<option"));
// One for meta in head and another for meta in body.
Expand All @@ -292,11 +295,7 @@ TEST_F(WebFrameSerializerSanitizationTest, RemoveHiddenElements) {
// One for link in head and another for link in body.
EXPECT_EQ(2, MatchSubstring(mhtml, "<link", 5));

// These hidden elements that affect layout should remain intact.
EXPECT_NE(WTF::kNotFound, mhtml.Find("<h2"));

// These visible elements should remain intact.
EXPECT_NE(WTF::kNotFound, mhtml.Find("<h2"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<p id=3D\"visible_id\""));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<form"));
EXPECT_NE(WTF::kNotFound, mhtml.Find("<input type=3D\"text\""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<meta http-equiv="Content-Security-Policy" content="child-src 'self'">
</head>
<body>
<select name="choices" style="display:none;">
<select name="choices" hidden>
<option value="c1">c1</option>
<option value="c2">c2</option>
</select>
Expand Down

0 comments on commit 2ddf990

Please sign in to comment.