Skip to content

Commit

Permalink
Issue #7382 Use descriptive enums instead of booleans for MIMEClassif…
Browse files Browse the repository at this point in the history
…ier::classifer
  • Loading branch information
leaexplores committed Sep 12, 2015
1 parent 815e981 commit 8525495
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 50 deletions.
117 changes: 82 additions & 35 deletions components/net/mime_classifier.rs
Expand Up @@ -6,45 +6,58 @@ use std::borrow::ToOwned;

pub struct MIMEClassifier {
image_classifier: GroupedClassifier,
audio_video_classifer: GroupedClassifier,
audio_video_classifier: GroupedClassifier,
scriptable_classifier: GroupedClassifier,
plaintext_classifier: GroupedClassifier,
archive_classifer: GroupedClassifier,
archive_classifier: GroupedClassifier,
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}

pub enum MediaType {
Xml,
Html,
AudioVideo,
Image,
}

pub enum ApacheBugFlag {
ON,
OFF
}

pub enum NoSniffFlag {
ON,
OFF
}

impl MIMEClassifier {
//Performs MIME Type Sniffing Algorithm (section 7)
pub fn classify(&self,
no_sniff: bool,
check_for_apache_bug: bool,
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag,
supplied_type: &Option<(String, String)>,
data: &[u8]) -> Option<(String, String)> {

match *supplied_type {
None => self.sniff_unknown_type(!no_sniff, data),
None => self.sniff_unknown_type(no_sniff_flag, data),
Some((ref media_type, ref media_subtype)) => {
match (&**media_type, &**media_subtype) {
("unknown", "unknown") |
("application", "unknown") |
("*", "*") => self.sniff_unknown_type(!no_sniff, data),
_ => {
if no_sniff {
supplied_type.clone()
} else if check_for_apache_bug {
self.sniff_text_or_data(data)
} else if MIMEClassifier::is_xml(media_type, media_subtype) {
supplied_type.clone()
} else if MIMEClassifier::is_html(media_type, media_subtype) {
//Implied in section 7.3, but flow is not clear
self.feeds_classifier.classify(data).or(supplied_type.clone())
} else {
match (&**media_type, &**media_subtype) {
("image", _) => self.image_classifier.classify(data),
("audio", _) | ("video", _) | ("application", "ogg") =>
self.audio_video_classifer.classify(data),
_ => None
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
self.sniff_unknown_type(no_sniff_flag, data)
} else {
match no_sniff_flag {
NoSniffFlag::ON => supplied_type.clone(),
NoSniffFlag::OFF => match apache_bug_flag {
ApacheBugFlag::ON => self.sniff_text_or_data(data),
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
media_subtype) {
Some(MediaType::Xml) => supplied_type.clone(),
Some(MediaType::Html) =>
//Implied in section 7.3, but flow is not clear
self.feeds_classifier.classify(data).or(supplied_type.clone()),
Some(MediaType::Image) => self.image_classifier.classify(data),
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
None => None
}.or(supplied_type.clone())
}
}
Expand All @@ -56,25 +69,25 @@ impl MIMEClassifier {
pub fn new() -> MIMEClassifier {
MIMEClassifier {
image_classifier: GroupedClassifier::image_classifer(),
audio_video_classifer: GroupedClassifier::audio_video_classifer(),
audio_video_classifier: GroupedClassifier::audio_video_classifier(),
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
archive_classifer: GroupedClassifier::archive_classifier(),
archive_classifier: GroupedClassifier::archive_classifier(),
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}
}

//some sort of iterator over the classifiers might be better?
fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) ->
fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
Option<(String, String)> {
if sniff_scriptable {
self.scriptable_classifier.classify(data)
} else {
None
match no_sniff_flag {
NoSniffFlag::OFF => self.scriptable_classifier.classify(data),
_ => None
}.or_else(|| self.plaintext_classifier.classify(data))
.or_else(|| self.image_classifier.classify(data))
.or_else(|| self.audio_video_classifer.classify(data))
.or_else(|| self.archive_classifer.classify(data))
.or_else(|| self.audio_video_classifier.classify(data))
.or_else(|| self.archive_classifier.classify(data))
.or_else(|| self.binary_or_plaintext.classify(data))
}

Expand All @@ -93,6 +106,40 @@ impl MIMEClassifier {
fn is_html(tp: &str, sub_tp: &str) -> bool {
tp == "text" && sub_tp == "html"
}

fn is_image(tp: &str) -> bool {
tp == "image"
}

fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
tp == "audio" ||
tp == "video" ||
(tp == "application" && sub_tp == "ogg")
}

fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
match(tp, sub_tp) {
("unknown", "unknown") |
("application", "unknown") |
("*", "*") => true,
_ => false
}
}

fn get_media_type(media_type: &String,
media_subtype: &String) -> Option<MediaType> {
if MIMEClassifier::is_xml(media_type, media_subtype) {
Some(MediaType::Xml)
} else if MIMEClassifier::is_html(media_type, media_subtype) {
Some(MediaType::Html)
} else if MIMEClassifier::is_image(media_type) {
Some(MediaType::Image)
} else if MIMEClassifier::is_audio_video(media_type, media_subtype) {
Some(MediaType::AudioVideo)
} else {
None
}
}
}

pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
Expand Down Expand Up @@ -265,7 +312,7 @@ impl GroupedClassifier {
]
}
}
fn audio_video_classifer() -> GroupedClassifier {
fn audio_video_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::video_webm(),
Expand Down
33 changes: 22 additions & 11 deletions components/net/resource_task.rs
Expand Up @@ -10,8 +10,7 @@ use cookie_storage::CookieStorage;
use data_loader;
use file_loader;
use http_loader::{self, create_http_connector, Connector};
use mime_classifier::MIMEClassifier;

use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
use net_traits::ProgressMsg::Done;
use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource};
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
Expand All @@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender};

use std::borrow::ToOwned;
use std::boxed::FnBox;
use std::sync::Arc;

use std::sync::{Arc, Mutex};

use std::sync::mpsc::{channel, Sender};

pub enum ProgressSender {
Expand Down Expand Up @@ -72,29 +73,28 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
-> Result<ProgressSender, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
let mut nosniff = false;
let mut check_for_apache_bug = false;
let mut no_sniff = NoSniffFlag::OFF;
let mut check_for_apache_bug = ApacheBugFlag::OFF;

if let Some(ref headers) = metadata.headers {
if let Some(ref raw_content_type) = headers.get_raw("content-type") {
if raw_content_type.len() > 0 {
let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1];
check_for_apache_bug = last_raw_content_type == b"text/plain"
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|| last_raw_content_type == b"text/plain; charset=UTF-8";
check_for_apache_bug = apache_bug_predicate(last_raw_content_type)
}
}
if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") {
nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff");
if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") {
no_sniff = NoSniffFlag::ON
}
}
}

let supplied_type =
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
(format!("{}", toplevel), format!("{}", sublevel))
});
metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type,
metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
&partial_body).map(|(toplevel, sublevel)| {
let mime_tp: TopLevel = toplevel.parse().unwrap();
let mime_sb: SubLevel = sublevel.parse().unwrap();
Expand All @@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
start_sending_opt(start_chan, metadata)
}

fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag {
if last_raw_content_type == b"text/plain"
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|| last_raw_content_type == b"text/plain; charset=UTF-8" {
ApacheBugFlag::ON
} else {
ApacheBugFlag::OFF
}
}

/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> {
match start_chan {
Expand Down
89 changes: 85 additions & 4 deletions tests/unit/net/mime_classifier.rs
Expand Up @@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use net::mime_classifier::as_string_option;
use net::mime_classifier::{Mp4Matcher, MIMEClassifier};
use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag};
use std::env;
use std::fs::File;
use std::io::{self, Read};
Expand Down Expand Up @@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() {
}

#[cfg(test)]
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
fn test_sniff_with_flags(filename_orig: &path::Path,
type_string: &str,
subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>,
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag) {
let current_working_directory = env::current_dir().unwrap();
println!("The current directory is {}", current_working_directory.display());

Expand All @@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string

match read_result {
Ok(data) => {
match classifier.classify(false, false, &as_string_option(supplied_type), &data) {
match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
Some((parsed_type, parsed_subtp)) => {
if (&parsed_type[..] != type_string) ||
(&parsed_subtp[..] != subtype_string) {
Expand All @@ -69,6 +73,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
}
}

#[cfg(test)]
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
test_sniff_with_flags(filename_orig,
type_string,
subtype_string,
supplied_type,
NoSniffFlag::OFF,
ApacheBugFlag::OFF)
}

#[cfg(test)]
fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
Expand Down Expand Up @@ -448,3 +463,69 @@ fn test_sniff_rss_feed() {
fn test_sniff_atom_feed() {
test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html")));
}

#[test]
fn test_sniff_binary_file() {
test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None);
}

#[test]
fn test_sniff_atom_feed_with_no_sniff_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
"text",
"html",
Some(("text", "html")),
NoSniffFlag::ON,
ApacheBugFlag::OFF);
}

#[test]
fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
"text",
"html",
Some(("text", "html")),
NoSniffFlag::ON,
ApacheBugFlag::ON);
}

#[test]
fn test_sniff_utf_8_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"),
"text",
"plain",
None,
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}

#[test]
fn test_sniff_utf_16be_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"),
"text",
"plain",
None,
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}

#[test]
fn test_sniff_utf_16le_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"),
"text",
"plain",
None,
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}

#[test]
fn test_sniff_octet_stream_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("unknown/binary_file"),
"application",
"octet-stream",
None,
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}

Binary file not shown.

0 comments on commit 8525495

Please sign in to comment.