Auto merge of #9248 - askalski:issue9185, r=<try>

Issue9185 I have to write tests yet, but I wanted to see full Travis CI output. Fixes #9185  [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/9248)
servo · Feb 19, 2016 · a8bbba2 · a8bbba2
2 parents 361bb24 + 784f01e
commit a8bbba2
Show file tree

Hide file tree

Showing 7 changed files with 240 additions and 6 deletions.
diff --git a/components/script/dom/htmlscriptelement.rs b/components/script/dom/htmlscriptelement.rs
@@ -70,7 +70,7 @@ pub struct HTMLScriptElement {
 
     #[ignore_heap_size_of = "Defined in rust-encoding"]
     /// https://html.spec.whatwg.org/multipage/#concept-script-encoding
-    block_character_encoding: DOMRefCell<EncodingRef>,
+    block_character_encoding: DOMRefCell<Option<EncodingRef>>,
 }
 
 impl HTMLScriptElement {
@@ -85,7 +85,7 @@ impl HTMLScriptElement {
             ready_to_be_parser_executed: Cell::new(false),
             parser_document: JS::from_ref(document),
             load: DOMRefCell::new(None),
-            block_character_encoding: DOMRefCell::new(UTF_8 as EncodingRef),
+            block_character_encoding: DOMRefCell::new(None),
         }
     }
 
@@ -247,7 +247,7 @@ impl HTMLScriptElement {
         // Step 13.
         if let Some(ref charset) = element.get_attribute(&ns!(), &atom!("charset")) {
             if let Some(encodingRef) = encoding_from_whatwg_label(&charset.Value()) {
-                *self.block_character_encoding.borrow_mut() = encodingRef;
+                *self.block_character_encoding.borrow_mut() = Some(encodingRef);
             }
         }
 
@@ -390,10 +390,79 @@ impl HTMLScriptElement {
 
             // Step 2.b.1.a.
             ScriptOrigin::External(Ok((metadata, bytes))) => {
+                debug!("loading external script, url = {}", metadata.final_url);
                 // TODO(#9185): implement encoding determination.
-                (DOMString::from(UTF_8.decode(&*bytes, DecoderTrap::Replace).unwrap()),
-                 true,
-                 metadata.final_url)
+
+                // Step 1.
+                // TODO: If the resource's Content Type metadata, if any,
+                // specifies a character encoding, and the user agent supports
+                // that encoding, then let character encoding be that encoding,
+                // and jump to the bottom step in this series of steps.
+
+                let encoding_after_step1: Option<EncodingRef> = match metadata.charset {
+                    Some(encoding) => match encoding_from_whatwg_label(&encoding) {
+                        Some(enc_ref) => Some(enc_ref),
+                        None => {
+                            debug!("error loading script, unknown encoding {} found in ContentType metadata", encoding);
+                            None}
+                        },
+                    None => None
+                };
+
+                if encoding_after_step1.is_some() {
+                    debug!("charset set after step1")
+                };
+
+                // Step 2.
+                // TODO: If the algorithm above set the script block's
+                // character encoding, then let character encoding be that
+                // encoding, and jump to the bottom step in this series of
+                // steps.
+
+                let encoding_after_step2: Option<EncodingRef> = match encoding_after_step1 {
+                    Some(enc_ref) => Some(enc_ref),
+                    None => *self.block_character_encoding.borrow()
+                };
+
+                if encoding_after_step1.is_none() && encoding_after_step2.is_some() {
+                    debug!("charset set after step2");
+                };
+
+                // Step 3.
+                // TODO: Let character encoding be the script block's fallback
+                // character encoding.
+
+                let encoding_after_step3: Option<EncodingRef> = match encoding_after_step2 {
+                    Some(enc_ref) => Some(enc_ref),
+                    None => {
+                        let fallback_charset = (*self.parser_document).Charset();
+                        match encoding_from_whatwg_label(&fallback_charset) {
+                            Some(enc_ref) => Some(enc_ref),
+                            None => {
+                                debug!("error loading script, unknown encoding {} given as block's\
+                                    fallback charactr encoding (self.parser_document.Charset())", fallback_charset);
+                                None}
+                            }
+                        },
+                };
+
+                if encoding_after_step2.is_none() && encoding_after_step3.is_some() {
+                    debug!("charset set after step3");
+                };
+
+                // Step 4.
+                // TODO: Otherwise, decode the file to Unicode, using character
+                // encoding as the fallback encoding.
+
+                let final_encoding = encoding_after_step3.unwrap_or(UTF_8 as EncodingRef);
+
+                if encoding_after_step3.is_none() {
+                    debug!("charset to UTF-8 as fallback");
+                };
+
+                (DOMString::from(final_encoding.decode(&*bytes, DecoderTrap::Replace).unwrap()),
+                    true,
+                    metadata.final_url)
             },
 
             // Step 2.b.1.c.

diff --git a/tests/wpt/metadata/MANIFEST.json b/tests/wpt/metadata/MANIFEST.json
@@ -19095,6 +19095,14 @@
         "path": "html/semantics/scripting-1/the-script-element/script-before-after-events.html",
         "url": "/html/semantics/scripting-1/the-script-element/script-before-after-events.html"
       },
+      {
+        "path": "html/semantics/scripting-1/the-script-element/script-charset-01.html",
+        "url": "/html/semantics/scripting-1/the-script-element/script-charset-01.html"
+      },
+      {
+        "path": "html/semantics/scripting-1/the-script-element/script-charset-02.html",
+        "url": "/html/semantics/scripting-1/the-script-element/script-charset-02.html"
+      },
       {
         "path": "html/semantics/scripting-1/the-script-element/script-for-event-xhtml.xhtml",
         "url": "/html/semantics/scripting-1/the-script-element/script-for-event-xhtml.xhtml"

diff --git a/.../web-platform-tests/html/semantics/scripting-1/the-script-element/external-script-utf8.js b/.../web-platform-tests/html/semantics/scripting-1/the-script-element/external-script-utf8.js
@@ -0,0 +1,5 @@
+(function() {
+  window.getSomeString = function() {
+    return "śćążź"; //<- these are five Polish letters, similar to scazz. It can be read correctly only with windows 1250 encoding.
+  };
+})();
diff --git a/...atform-tests/html/semantics/scripting-1/the-script-element/external-script-windows1250.js b/...atform-tests/html/semantics/scripting-1/the-script-element/external-script-windows1250.js
@@ -0,0 +1,5 @@
+(function() {
+  window.getSomeString = function() {
+    return "œæ¹¿Ÿ"; //<- these are five Polish letters, similar to scazz. It can be read correctly only with windows 1250 encoding.
+  };
+})();
diff --git a/...t/web-platform-tests/html/semantics/scripting-1/the-script-element/script-charset-01.html b/...t/web-platform-tests/html/semantics/scripting-1/the-script-element/script-charset-01.html
@@ -0,0 +1,89 @@
+<!DOCTYPE html>
+<head>
+  <meta charset="utf-8">
+  <title>Script @type: unknown parameters</title>
+  <link rel="author" title="askalski" href="github.com/askalski">
+  <link rel="help" href="https://html.spec.whatwg.org/multipage/#scriptingLanguages">
+  <script src="/resources/testharness.js"></script>
+  <script src="/resources/testharnessreport.js"></script>
+  <div id="log"></div>
+
+  <!-- "Step1" tests -->
+  <!-- charset is set incorrectly via Content Type "text/javascript;charset=utf-8" in response
+      which has priority before a correct setting in "charset" attribute of script tag.
+   -->
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript%3Bcharset=utf-8" charset="windows-1250">
+  </script>
+  <script>
+  test(function() {
+    //these strings should not match, since the file charset is set incorrectly
+    assert_not_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+  <!-- charset is set correctly via Content Type "text/javascript;charset=utf-8" in response
+      which has priority before a incorrect setting in "charset" attribute of script tag.
+   -->
+
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript%3Bcharset=windows-1250" charset="utf-8">
+  </script>
+  <script>
+  //the charset is set correctly via Content Type "text/javascript;charset=windows-1250" in respones
+  test(function() {
+    assert_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+
+  <!-- end of step1 tests, now step2 tests -->
+  <!-- in this case, the response's Content Type does not bring charset information.
+  Second step takes block character encoding if available.-->
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript" charset="utf-8">
+  </script>
+  <script>
+  test(function() {
+    //these strings should not match, since the file charset is set incorrectly in "charset" tag of <script> above
+    assert_not_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+  <!-- charset is set correctly via Content Type "text/javascript;charset=utf-8" in response
+      which has priority before a incorrect setting in "charset" attribute of script tag.
+   -->
+
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript" charset="windows-1250">
+  </script>
+  <script>
+  //the charset is set correctly via content attribute in <script> above
+  test(function() {
+    assert_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+
+  <!-- end of step2 tests, now step3 tests -->
+  <!-- in this case, neither response's Content Type nor charset attribute bring correct charset information.
+  Third step takes this document's character encoding (declared correctly as UTF-8).-->
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript">
+  </script>
+  <script>
+  test(function() {
+    //these strings should not match, since the tested file is in windows-1250, and document is utf-8
+    assert_not_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-utf8.js&ct=text/javascript">
+  </script>
+  <script>
+  //these strings should match, both document and tested file are utf-8
+  test(function() {
+    assert_equals(window.getSomeString(), "śćążź");
+  });
+  </script>
+
+  <!-- the last portion of tests (step4) are in file script-charset-02.html
+
+</head>
diff --git a/...t/web-platform-tests/html/semantics/scripting-1/the-script-element/script-charset-02.html b/...t/web-platform-tests/html/semantics/scripting-1/the-script-element/script-charset-02.html
@@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<head>
+  <!-- TODO:
+  askalski: while this test pass, it does not test anything now.
+  It should test, whether with no document.charset set in any way, the
+  external scripts will get decoded using utf-8 as fallback character encoding.
+  It seems like utf-8 is also a fallback encoding to html (my guess), so
+  the part of the code I was attempting to test is never reached.
+  -->
+  <title>Script @type: unknown parameters</title>
+  <link rel="author" title="askalski" href="github.com/askalski">
+  <link rel="help" href="https://html.spec.whatwg.org/multipage/#scriptingLanguages">
+  <script src="/resources/testharness.js"></script>
+  <script src="/resources/testharnessreport.js"></script>
+  <div id="log"></div>
+
+  <!-- test of step4, which is taking utf-8 as fallback -->
+  <!-- in this case, neither response's Content Type nor charset attribute bring correct charset information.
+  Furthermore, document's encoding is not set.-->
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-windows1250.js&ct=text/javascript">
+  </script>
+  <script>
+  test(function() {
+    //these strings should not match, since the tested file is in windows-1250, and fallback is defined as utf-8
+    assert_not_equals(window.getSomeString().length, 5);
+  });
+  </script>
+
+  <script type="text/javascript"
+    src="serve-with-content-type.py?fn=external-script-utf8.js&ct=text/javascript">
+  </script>
+  <script>
+  //these strings should match, since fallback utf-8 is the correct setting.
+  test(function() {
+    assert_equals(window.getSomeString().length, 5);
+  });
+  </script>
+
+</head>
diff --git a/...b-platform-tests/html/semantics/scripting-1/the-script-element/serve-with-content-type.py b/...b-platform-tests/html/semantics/scripting-1/the-script-element/serve-with-content-type.py
@@ -0,0 +1,18 @@
+import os
+
+def main(request, response):
+    directory = os.path.dirname(__file__)
+
+    try:
+        file_name = request.GET.first("fn")
+        content_type = request.GET.first("ct")
+        content = open(os.path.join(directory, file_name), "rb").read()
+
+        response.headers.set("Content-Type", content_type)
+        response.content = content
+    except:
+        response.set_error(400, "Not enough parameters or file not found")
+
+    #raise Exception("e " + content_type)
+
+    #return [("Content-Type", content_type)], content