Skip to content

Commit

Permalink
Auto merge of #9248 - askalski:issue9185, r=<try>
Browse files Browse the repository at this point in the history
Issue9185

I have to write tests yet, but I wanted to see full Travis CI output.

Fixes #9185

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/9248)
<!-- Reviewable:end -->
  • Loading branch information
bors-servo committed Feb 19, 2016
2 parents 361bb24 + 784f01e commit a8bbba2
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 6 deletions.
81 changes: 75 additions & 6 deletions components/script/dom/htmlscriptelement.rs
Expand Up @@ -70,7 +70,7 @@ pub struct HTMLScriptElement {

#[ignore_heap_size_of = "Defined in rust-encoding"]
/// https://html.spec.whatwg.org/multipage/#concept-script-encoding
block_character_encoding: DOMRefCell<EncodingRef>,
block_character_encoding: DOMRefCell<Option<EncodingRef>>,
}

impl HTMLScriptElement {
Expand All @@ -85,7 +85,7 @@ impl HTMLScriptElement {
ready_to_be_parser_executed: Cell::new(false),
parser_document: JS::from_ref(document),
load: DOMRefCell::new(None),
block_character_encoding: DOMRefCell::new(UTF_8 as EncodingRef),
block_character_encoding: DOMRefCell::new(None),
}
}

Expand Down Expand Up @@ -247,7 +247,7 @@ impl HTMLScriptElement {
// Step 13.
if let Some(ref charset) = element.get_attribute(&ns!(), &atom!("charset")) {
if let Some(encodingRef) = encoding_from_whatwg_label(&charset.Value()) {
*self.block_character_encoding.borrow_mut() = encodingRef;
*self.block_character_encoding.borrow_mut() = Some(encodingRef);
}
}

Expand Down Expand Up @@ -390,10 +390,79 @@ impl HTMLScriptElement {

// Step 2.b.1.a.
ScriptOrigin::External(Ok((metadata, bytes))) => {
debug!("loading external script, url = {}", metadata.final_url);
// TODO(#9185): implement encoding determination.
(DOMString::from(UTF_8.decode(&*bytes, DecoderTrap::Replace).unwrap()),
true,
metadata.final_url)

// Step 1.
// TODO: If the resource's Content Type metadata, if any,
// specifies a character encoding, and the user agent supports
// that encoding, then let character encoding be that encoding,
// and jump to the bottom step in this series of steps.

let encoding_after_step1: Option<EncodingRef> = match metadata.charset {
Some(encoding) => match encoding_from_whatwg_label(&encoding) {
Some(enc_ref) => Some(enc_ref),
None => {
debug!("error loading script, unknown encoding {} found in ContentType metadata", encoding);
None}
},
None => None
};

if encoding_after_step1.is_some() {
debug!("charset set after step1")
};

// Step 2.
// TODO: If the algorithm above set the script block's
// character encoding, then let character encoding be that
// encoding, and jump to the bottom step in this series of
// steps.

let encoding_after_step2: Option<EncodingRef> = match encoding_after_step1 {
Some(enc_ref) => Some(enc_ref),
None => *self.block_character_encoding.borrow()
};

if encoding_after_step1.is_none() && encoding_after_step2.is_some() {
debug!("charset set after step2");
};

// Step 3.
// TODO: Let character encoding be the script block's fallback
// character encoding.

let encoding_after_step3: Option<EncodingRef> = match encoding_after_step2 {
Some(enc_ref) => Some(enc_ref),
None => {
let fallback_charset = (*self.parser_document).Charset();
match encoding_from_whatwg_label(&fallback_charset) {
Some(enc_ref) => Some(enc_ref),
None => {
debug!("error loading script, unknown encoding {} given as block's\
fallback charactr encoding (self.parser_document.Charset())", fallback_charset);
None}
}
},
};

if encoding_after_step2.is_none() && encoding_after_step3.is_some() {
debug!("charset set after step3");
};

// Step 4.
// TODO: Otherwise, decode the file to Unicode, using character
// encoding as the fallback encoding.

let final_encoding = encoding_after_step3.unwrap_or(UTF_8 as EncodingRef);

if encoding_after_step3.is_none() {
debug!("charset to UTF-8 as fallback");
};

(DOMString::from(final_encoding.decode(&*bytes, DecoderTrap::Replace).unwrap()),
true,
metadata.final_url)
},

// Step 2.b.1.c.
Expand Down
8 changes: 8 additions & 0 deletions tests/wpt/metadata/MANIFEST.json
Expand Up @@ -19095,6 +19095,14 @@
"path": "html/semantics/scripting-1/the-script-element/script-before-after-events.html",
"url": "/html/semantics/scripting-1/the-script-element/script-before-after-events.html"
},
{
"path": "html/semantics/scripting-1/the-script-element/script-charset-01.html",
"url": "/html/semantics/scripting-1/the-script-element/script-charset-01.html"
},
{
"path": "html/semantics/scripting-1/the-script-element/script-charset-02.html",
"url": "/html/semantics/scripting-1/the-script-element/script-charset-02.html"
},
{
"path": "html/semantics/scripting-1/the-script-element/script-for-event-xhtml.xhtml",
"url": "/html/semantics/scripting-1/the-script-element/script-for-event-xhtml.xhtml"
Expand Down
@@ -0,0 +1,5 @@
(function() {
window.getSomeString = function() {
return "śćążź"; //<- these are five Polish letters, similar to scazz. It can be read correctly only with windows 1250 encoding.
};
})();
@@ -0,0 +1,5 @@
(function() {
window.getSomeString = function() {
return "œæ¹¿Ÿ"; //<- these are five Polish letters, similar to scazz. It can be read correctly only with windows 1250 encoding.
};
})();
@@ -0,0 +1,89 @@
<!DOCTYPE html>
<head>
<meta charset="utf-8">
<title>Script @type: unknown parameters</title>
<link rel="author" title="askalski" href="github.com/askalski">
<link rel="help" href="https://html.spec.whatwg.org/multipage/#scriptingLanguages">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id="log"></div>

<!-- "Step1" tests -->
<!-- charset is set incorrectly via Content Type "text/javascript;charset=utf-8" in response
which has priority before a correct setting in "charset" attribute of script tag.
-->
<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript%3Bcharset=utf-8" charset="windows-1250">
</script>
<script>
test(function() {
//these strings should not match, since the file charset is set incorrectly
assert_not_equals(window.getSomeString(), "śćążź");
});
</script>
<!-- charset is set correctly via Content Type "text/javascript;charset=utf-8" in response
which has priority before a incorrect setting in "charset" attribute of script tag.
-->

<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript%3Bcharset=windows-1250" charset="utf-8">
</script>
<script>
//the charset is set correctly via Content Type "text/javascript;charset=windows-1250" in respones
test(function() {
assert_equals(window.getSomeString(), "śćążź");
});
</script>

<!-- end of step1 tests, now step2 tests -->
<!-- in this case, the response's Content Type does not bring charset information.
Second step takes block character encoding if available.-->
<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript" charset="utf-8">
</script>
<script>
test(function() {
//these strings should not match, since the file charset is set incorrectly in "charset" tag of <script> above
assert_not_equals(window.getSomeString(), "śćążź");
});
</script>
<!-- charset is set correctly via Content Type "text/javascript;charset=utf-8" in response
which has priority before a incorrect setting in "charset" attribute of script tag.
-->

<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript" charset="windows-1250">
</script>
<script>
//the charset is set correctly via content attribute in <script> above
test(function() {
assert_equals(window.getSomeString(), "śćążź");
});
</script>

<!-- end of step2 tests, now step3 tests -->
<!-- in this case, neither response's Content Type nor charset attribute bring correct charset information.
Third step takes this document's character encoding (declared correctly as UTF-8).-->
<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript">
</script>
<script>
test(function() {
//these strings should not match, since the tested file is in windows-1250, and document is utf-8
assert_not_equals(window.getSomeString(), "śćążź");
});
</script>

<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-utf8.js&ct=text/javascript">
</script>
<script>
//these strings should match, both document and tested file are utf-8
test(function() {
assert_equals(window.getSomeString(), "śćążź");
});
</script>

<!-- the last portion of tests (step4) are in file script-charset-02.html

</head>
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<head>
<!-- TODO:
askalski: while this test pass, it does not test anything now.
It should test, whether with no document.charset set in any way, the
external scripts will get decoded using utf-8 as fallback character encoding.
It seems like utf-8 is also a fallback encoding to html (my guess), so
the part of the code I was attempting to test is never reached.
-->
<title>Script @type: unknown parameters</title>
<link rel="author" title="askalski" href="github.com/askalski">
<link rel="help" href="https://html.spec.whatwg.org/multipage/#scriptingLanguages">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<div id="log"></div>

<!-- test of step4, which is taking utf-8 as fallback -->
<!-- in this case, neither response's Content Type nor charset attribute bring correct charset information.
Furthermore, document's encoding is not set.-->
<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript">
</script>
<script>
test(function() {
//these strings should not match, since the tested file is in windows-1250, and fallback is defined as utf-8
assert_not_equals(window.getSomeString().length, 5);
});
</script>

<script type="text/javascript"
src="serve-with-content-type.py?fn=external-script-utf8.js&ct=text/javascript">
</script>
<script>
//these strings should match, since fallback utf-8 is the correct setting.
test(function() {
assert_equals(window.getSomeString().length, 5);
});
</script>

</head>
@@ -0,0 +1,18 @@
import os

def main(request, response):
directory = os.path.dirname(__file__)

try:
file_name = request.GET.first("fn")
content_type = request.GET.first("ct")
content = open(os.path.join(directory, file_name), "rb").read()

response.headers.set("Content-Type", content_type)
response.content = content
except:
response.set_error(400, "Not enough parameters or file not found")

#raise Exception("e " + content_type)

#return [("Content-Type", content_type)], content

0 comments on commit a8bbba2

Please sign in to comment.