Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement "extract mime type" algorithm #23566

Open
jdm opened this issue Jun 14, 2019 · 1 comment
Open

Implement "extract mime type" algorithm #23566

jdm opened this issue Jun 14, 2019 · 1 comment

Comments

@jdm
Copy link
Member

@jdm jdm commented Jun 14, 2019

This change never merged in #22767, because it triggered unexpected test failures on Linux that nobody got around to investigating.

From 420a8996810b8d212352a523edb115cb097dd82d Mon Sep 17 00:00:00 2001
From: Anthony Ramine <n.oxyde@gmail.com>
Date: Sun, 27 Jan 2019 10:43:20 +0100
Subject: [PATCH] Fix the implementation of the "extract MIME type"

---
 components/net/fetch/methods.rs               | 18 +++--
 components/net_traits/lib.rs                  | 65 ++++++++++++++++++-
 components/script/dom/headers.rs              | 10 +--
 .../fetch/content-type/script.window.js.ini   | 18 -----
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/components/net/fetch/methods.rs b/components/net/fetch/methods.rs
index 4022ca6b9bb9..1e0affafc8a3 100644
--- a/components/net/fetch/methods.rs
+++ b/components/net/fetch/methods.rs
@@ -23,8 +23,9 @@ use net_traits::filemanager_thread::RelativePos;
 use net_traits::request::{CredentialsMode, Destination, Referrer, Request, RequestMode};
 use net_traits::request::{Origin, ResponseTainting, Window};
 use net_traits::response::{Response, ResponseBody, ResponseType};
-use net_traits::ResourceAttribute;
-use net_traits::{FetchTaskTarget, NetworkError, ReferrerPolicy, ResourceFetchTiming};
+use net_traits::{
+    self, FetchTaskTarget, NetworkError, ReferrerPolicy, ResourceAttribute, ResourceFetchTiming,
+};
 use servo_url::ServoUrl;
 use std::borrow::Cow;
 use std::fs::File;
@@ -801,7 +802,7 @@ pub fn should_be_blocked_due_to_nosniff(
 
     // Step 4
     // Note: an invalid MIME type will produce a `None`.
-    let content_type_header = response_headers.typed_get::<ContentType>();
+    let content_type_header = net_traits::extract_mime_type(&response_headers);
 
     /// <https://html.spec.whatwg.org/multipage/#scriptingLanguages>
     #[inline]
@@ -832,14 +833,11 @@ pub fn should_be_blocked_due_to_nosniff(
 
     match content_type_header {
         // Step 6
-        Some(ref ct) if destination.is_script_like() => {
-            !is_javascript_mime_type(&ct.clone().into())
-        },
+        Some(ref ct) if destination.is_script_like() => !is_javascript_mime_type(ct),
 
         // Step 7
         Some(ref ct) if destination == Destination::Style => {
-            let m: mime::Mime = ct.clone().into();
-            m.type_() != mime::TEXT && m.subtype() != mime::CSS
+            ct.type_() != mime::TEXT && ct.subtype() != mime::CSS
         },
 
         None if destination == Destination::Style || destination.is_script_like() => true,
@@ -854,8 +852,8 @@ fn should_be_blocked_due_to_mime_type(
     response_headers: &HeaderMap,
 ) -> bool {
     // Step 1
-    let mime_type: mime::Mime = match response_headers.typed_get::<ContentType>() {
-        Some(header) => header.into(),
+    let mime_type = match net_traits::extract_mime_type(response_headers) {
+        Some(header) => header,
         None => return false,
     };
 
diff --git a/components/net_traits/lib.rs b/components/net_traits/lib.rs
index 6a38b0a8cc9e..63af82c078e4 100644
--- a/components/net_traits/lib.rs
+++ b/components/net_traits/lib.rs
@@ -22,9 +22,8 @@ use crate::request::{Request, RequestInit};
 use crate::response::{HttpsState, Response, ResponseInit};
 use crate::storage_thread::StorageThreadMsg;
 use cookie::Cookie;
-use headers_core::HeaderMapExt;
-use headers_ext::{ContentType, ReferrerPolicy as ReferrerPolicyHeader};
-use http::{Error as HttpError, HeaderMap};
+use headers::{ContentType, HeaderMapExt, ReferrerPolicy as ReferrerPolicyHeader};
+use http::{header, Error as HttpError, HeaderMap};
 use hyper::Error as HyperError;
 use hyper::StatusCode;
 use hyper_serde::Serde;
@@ -660,3 +659,63 @@ pub fn http_percent_encode(bytes: &[u8]) -> String {
 
     url::percent_encoding::percent_encode(bytes, HTTP_VALUE).to_string()
 }
+
+// https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
+pub fn extract_mime_type(headers: &HeaderMap) -> Option<Mime> {
+    // Steps 1-2.
+    // NOTE: We can't own a MIME type essence or a charset param, so we just
+    // compare the essences between the temporary and current MIME type in
+    // the loop.
+
+    // Step 3.
+    let mut mime_type = None::<Mime>;
+
+    // Step 4.
+    let values = headers.get_all(header::CONTENT_TYPE);
+
+    // Step 5.
+    // NOTE: Unapplicable, we use an iterator.
+
+    // Step 6.
+    for value in values {
+        // Steps 6.1-6.2.
+        let value = match value.to_str() {
+            Ok(value) => value,
+            Err(_) => continue,
+        };
+        let temporary_mime_type = value.parse().ok().filter(|mime_type: &Mime| {
+            (mime_type.type_(), mime_type.subtype()) != (mime::STAR, mime::STAR)
+        });
+        let mut temporary = match temporary_mime_type {
+            Some(mime_type) => mime_type,
+            None => continue,
+        };
+
+        // If we haven't extracted a MIME type yet, there is nothing
+        // to compare essence with.
+        if let Some(old) = mime_type.take() {
+            // Step 6.4.
+            // NOTE: There is nothing more to do if the essences don't match.
+
+            // Step 6.5.
+            if (temporary.type_(), temporary.subtype()) == (old.type_(), old.subtype()) {
+                if temporary.get_param(mime::CHARSET).is_none() {
+                    // FIXME(nox): We shouldn't have to check whether the former
+                    // MIME type had a charset param all the time.
+                    if let Some(charset) = old.get_param(mime::CHARSET) {
+                        // FIXME(nox): https://github.com/hyperium/mime/issues/109
+                        temporary = format!("{}; charset={}", temporary, charset)
+                            .parse()
+                            .unwrap();
+                    }
+                }
+            }
+        }
+
+        // Step 3.
+        mime_type = Some(temporary);
+    }
+
+    // Steps 7-8.
+    mime_type
+}
diff --git a/components/script/dom/headers.rs b/components/script/dom/headers.rs
index 98b24dd19792..e55678df56c0 100644
--- a/components/script/dom/headers.rs
+++ b/components/script/dom/headers.rs
@@ -13,8 +13,9 @@ use crate::dom::bindings::root::DomRoot;
 use crate::dom::bindings::str::{is_token, ByteString};
 use crate::dom::globalscope::GlobalScope;
 use dom_struct::dom_struct;
-use http::header::{self, HeaderMap as HyperHeaders, HeaderName, HeaderValue};
+use http::header::{HeaderMap as HyperHeaders, HeaderName, HeaderValue};
 use mime::{self, Mime};
+use net_traits;
 use std::cell::Cell;
 use std::result::Result;
 use std::str::{self, FromStr};
@@ -260,11 +261,10 @@ impl Headers {
     }
 
     // https://fetch.spec.whatwg.org/#concept-header-extract-mime-type
+    // FIXME(nox): We should probably just keep the `Option<Mime>` value here.
     pub fn extract_mime_type(&self) -> Vec<u8> {
-        self.header_list
-            .borrow()
-            .get(header::CONTENT_TYPE)
-            .map_or(vec![], |v| v.as_bytes().to_owned())
+        net_traits::extract_mime_type(&self.header_list.borrow())
+            .map_or(vec![], |mime_type| mime_type.to_string().into())
     }
 
     pub fn sort_header_list(&self) -> Vec<(String, String)> {
diff --git a/tests/wpt/metadata/fetch/content-type/script.window.js.ini b/tests/wpt/metadata/fetch/content-type/script.window.js.ini
index 1cac78a466b8..6d6c4a9be243 100644
--- a/tests/wpt/metadata/fetch/content-type/script.window.js.ini
+++ b/tests/wpt/metadata/fetch/content-type/script.window.js.ini
@@ -1,7 +1,4 @@
 [script.window.html]
-  [separate text/javascript;charset=windows-1252 x/x text/javascript]
-    expected: FAIL
-
   [combined text/javascript;" x/x]
     expected: FAIL
 
@@ -11,9 +8,6 @@
   [combined text/javascript;charset=windows-1252;" \\" x/x]
     expected: FAIL
 
-  [separate x/x text/javascript]
-    expected: FAIL
-
   [combined x/x;" x/y;\\" text/javascript;charset=windows-1252;" text/javascript]
     expected: FAIL
 
@@ -35,12 +29,6 @@
   [combined text/javascript;charset=windows-1252  text/javascript]
     expected: FAIL
 
-  [separate x/x;charset=windows-1252 text/javascript]
-    expected: FAIL
-
-  [separate text/javascript error]
-    expected: FAIL
-
   [separate text/javascript; charset=windows-1252 text/javascript]
     expected: FAIL
 
@@ -50,9 +38,6 @@
   [separate text/javascript;charset=windows-1252  text/javascript]
     expected: FAIL
 
-  [separate x/x;" x/y;\\" text/javascript;charset=windows-1252;" text/javascript]
-    expected: FAIL
-
   [separate text/javascript;";charset=windows-1252]
     expected: FAIL
 
@@ -62,9 +47,6 @@
   [separate text/javascript;" x/x]
     expected: FAIL
 
-  [separate text/javascript ]
-    expected: FAIL
-
   [combined text/javascript ]
     expected: FAIL
 
@pshaughn
Copy link
Member

@pshaughn pshaughn commented Nov 29, 2019

A relevant note for this issue: Some web platform tests expect to see some MIME-related strings verbatim, not canonicalized. hyperium/mime#116 is the example that I've been seeing most often as I go through MIME-related WPT failures.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Linked pull requests

Successfully merging a pull request may close this issue.

None yet
2 participants
You can’t perform that action at this time.