Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MIME sniffing #5005

Merged
merged 6 commits into from Apr 7, 2015
Merged
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Enable optional mime sniffing, and integrate it with the file loader.

  • Loading branch information
jdm committed Apr 7, 2015
commit a3201bc1ac0c29e904c14fd19fffb323f1e28b40
@@ -4,6 +4,7 @@

use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::Done;
use mime_classifier::MIMEClassifier;
use resource_task::start_sending;
use file_loader;

@@ -13,8 +14,9 @@ use util::resource_files::resources_dir_path;

use std::borrow::IntoCow;
use std::fs::PathExt;
use std::sync::Arc;

pub fn factory(mut load_data: LoadData) {
pub fn factory(mut load_data: LoadData, classifier: Arc<MIMEClassifier>) {
match load_data.url.non_relative_scheme_data().unwrap() {
"blank" => {
let start_chan = load_data.consumer;
@@ -42,5 +44,5 @@ pub fn factory(mut load_data: LoadData) {
return
}
};
file_loader::factory(load_data)
file_loader::factory(load_data, classifier)
}
@@ -4,22 +4,24 @@

use net_traits::{LoadData, Metadata};
use net_traits::ProgressMsg::{Payload, Done};
use mime_classifier::MIMEClassifier;
use resource_task::start_sending;

use rustc_serialize::base64::FromBase64;

use hyper::mime::Mime;
use std::sync::Arc;
use url::{percent_decode, SchemeData};

pub fn factory(load_data: LoadData) {
pub fn factory(load_data: LoadData, classifier: Arc<MIMEClassifier>) {
// NB: we don't spawn a new task.
// Hypothesis: data URLs are too small for parallel base64 etc. to be worth it.
// Should be tested at some point.
// Left in separate function to allow easy moving to a task, if desired.
load(load_data)
load(load_data, classifier)
}

fn load(load_data: LoadData) {
fn load(load_data: LoadData, _classifier: Arc<MIMEClassifier>) {
let start_chan = load_data.consumer;
let url = load_data.url;
assert!(&*url.scheme == "data");
@@ -4,52 +4,78 @@

use net_traits::{LoadData, Metadata, ProgressMsg};
use net_traits::ProgressMsg::{Payload, Done};
use resource_task::start_sending;
use mime_classifier::MIMEClassifier;
use resource_task::{start_sending, start_sending_sniffed};

use std::borrow::ToOwned;
use std::io;
use std::fs::File;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::mpsc::Sender;
use util::task::spawn_named;

static READ_SIZE: uint = 8192;

enum ReadStatus {
Partial(Vec<u8>),
EOF,
}

fn read_block(reader: &mut io::Read) -> Result<ReadStatus, String> {
let mut buf = vec![0; READ_SIZE];
match reader.read(buf.as_mut_slice()) {
Ok(0) => return Ok(ReadStatus::EOF),
Ok(n) => {
buf.truncate(n);
Ok(ReadStatus::Partial(buf))
}
Err(e) => Err(e.description().to_string()),
}
}

fn read_all(reader: &mut io::Read, progress_chan: &Sender<ProgressMsg>)
-> Result<(), String> {
-> Result<(), String> {
loop {
let mut buf = vec![0; READ_SIZE];
match reader.read(buf.as_mut_slice()) {
Ok(0) => return Ok(()),
Ok(n) => {
buf.truncate(n);
progress_chan.send(Payload(buf)).unwrap();
},
Err(e) => return Err(e.description().to_string()),
match try!(read_block(reader)) {
ReadStatus::Partial(buf) => progress_chan.send(Payload(buf)).unwrap(),
ReadStatus::EOF => return Ok(()),
}
}
}

pub fn factory(load_data: LoadData) {
pub fn factory(load_data: LoadData, classifier: Arc<MIMEClassifier>) {
let url = load_data.url;
let start_chan = load_data.consumer;
assert!(&*url.scheme == "file");
let progress_chan = start_sending(start_chan, Metadata::default(url.clone()));
spawn_named("file_loader".to_owned(), move || {
let metadata = Metadata::default(url.clone());
let file_path: Result<PathBuf, ()> = url.to_file_path();
match file_path {
Ok(file_path) => {
match File::open(&file_path) {
Ok(ref mut reader) => {
let res = read_all(reader, &progress_chan);
let res = read_block(reader);
let (res, progress_chan) = match res {
Ok(ReadStatus::Partial(buf)) => {
let progress_chan = start_sending_sniffed(start_chan, metadata,
classifier, &buf);
progress_chan.send(Payload(buf)).unwrap();
(read_all(reader, &progress_chan), progress_chan)
}
Ok(ReadStatus::EOF) | Err(_) =>
(res.map(|_| ()), start_sending(start_chan, metadata)),
};
progress_chan.send(Done(res)).unwrap();
}
Err(e) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(e.description().to_string()))).unwrap();
}
}
}
Err(_) => {
let progress_chan = start_sending(start_chan, metadata);
progress_chan.send(Done(Err(url.to_string()))).unwrap();
}
}
@@ -5,6 +5,7 @@
use net_traits::{ControlMsg, CookieSource, LoadData, LoadResponse, Metadata};
use net_traits::ProgressMsg;
use net_traits::ProgressMsg::{Payload, Done};
use mime_classifier::MIMEClassifier;
use resource_task::start_sending_opt;

use log;
@@ -21,6 +22,7 @@ use hyper::status::{StatusCode, StatusClass};
use std::error::Error;
use openssl::ssl::{SslContext, SslVerifyMode};
use std::io::{self, Read, Write};
use std::sync::Arc;
use std::sync::mpsc::{Sender, channel};
use std::thunk::Invoke;
use util::task::spawn_named;
@@ -31,9 +33,9 @@ use url::{Url, UrlParser};
use std::borrow::ToOwned;

pub fn factory(cookies_chan: Sender<ControlMsg>)
-> Box<Invoke<(LoadData,)> + Send> {
box move |(load_data,)| {
spawn_named("http_loader".to_owned(), move || load(load_data, cookies_chan))
-> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, classifier)| {
spawn_named("http_loader".to_owned(), move || load(load_data, classifier, cookies_chan))
}
}

@@ -47,7 +49,7 @@ fn send_error(url: Url, err: String, start_chan: Sender<LoadResponse>) {
};
}

fn load(mut load_data: LoadData, cookies_chan: Sender<ControlMsg>) {
fn load(mut load_data: LoadData, classifier: Arc<MIMEClassifier>, cookies_chan: Sender<ControlMsg>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it.
@@ -122,7 +124,7 @@ reason: \"certificate verify failed\" }]";
let mut image = resources_dir_path();
image.push("badcert.html");
let load_data = LoadData::new(Url::from_file_path(&*image).unwrap(), start_chan);
file_loader::factory(load_data);
file_loader::factory(load_data, classifier);
return;
},
Err(e) => {
@@ -315,13 +315,13 @@ impl MIMEChecker for BinaryOrPlaintextClassifier {
}
}
struct GroupedClassifier {
byte_matchers: Vec<Box<MIMEChecker + Send>>,
byte_matchers: Vec<Box<MIMEChecker + Send + Sync>>,
}
impl GroupedClassifier {
fn image_classifer() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::image_x_icon() as Box<MIMEChecker+Send>,
box ByteMatcher::image_x_icon(),
box ByteMatcher::image_x_icon_cursor(),
box ByteMatcher::image_bmp(),
box ByteMatcher::image_gif89a(),
@@ -335,7 +335,7 @@ impl GroupedClassifier {
fn audio_video_classifer() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::video_webm() as Box<MIMEChecker+Send>,
box ByteMatcher::video_webm(),
box ByteMatcher::audio_basic(),
box ByteMatcher::audio_aiff(),
box ByteMatcher::audio_mpeg(),
@@ -350,7 +350,7 @@ impl GroupedClassifier {
fn scriptable_classifier() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::text_html_doctype() as Box<MIMEChecker+Send>,
box ByteMatcher::text_html_doctype(),
box ByteMatcher::text_html_page(),
box ByteMatcher::text_html_head(),
box ByteMatcher::text_html_script(),
@@ -376,7 +376,7 @@ impl GroupedClassifier {
fn plaintext_classifier() -> GroupedClassifier {
GroupedClassifier{
byte_matchers: vec![
box ByteMatcher::text_plain_utf_8_bom() as Box<MIMEChecker+Send>,
box ByteMatcher::text_plain_utf_8_bom(),
box ByteMatcher::text_plain_utf_16le_bom(),
box ByteMatcher::text_plain_utf_16be_bom(),
box ByteMatcher::application_postscript()
@@ -386,7 +386,7 @@ impl GroupedClassifier {
fn archive_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::application_x_gzip() as Box<MIMEChecker+Send>,
box ByteMatcher::application_x_gzip(),
box ByteMatcher::application_zip(),
box ByteMatcher::application_x_rar_compressed()
]
@@ -398,7 +398,7 @@ impl GroupedClassifier {
fn font_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::application_font_woff() as Box<MIMEChecker+Send>,
box ByteMatcher::application_font_woff(),
box ByteMatcher::true_type_collection(),
box ByteMatcher::open_type(),
box ByteMatcher::true_type(),
@@ -10,10 +10,12 @@ use file_loader;
use http_loader;
use cookie_storage::CookieStorage;
use cookie;
use mime_classifier::MIMEClassifier;

use net_traits::{ControlMsg, LoadData, LoadResponse};
use net_traits::{Metadata, ProgressMsg, ResourceTask};
use net_traits::ProgressMsg::Done;
use util::opts;
use util::task::spawn_named;

use hyper::header::UserAgent;
@@ -27,6 +29,7 @@ use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::sync::Arc;
use std::sync::mpsc::{channel, Receiver, Sender};
use std::thunk::Invoke;

@@ -61,6 +64,30 @@ pub fn start_sending(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Se
start_sending_opt(start_chan, metadata).ok().unwrap()
}

/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed(start_chan: Sender<LoadResponse>, metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
-> Sender<ProgressMsg> {
start_sending_sniffed_opt(start_chan, metadata, classifier, partial_body).ok().unwrap()
}

/// For use by loaders in responding to a Load message that allows content sniffing.
pub fn start_sending_sniffed_opt(start_chan: Sender<LoadResponse>, mut metadata: Metadata,
classifier: Arc<MIMEClassifier>, partial_body: &Vec<u8>)
-> Result<Sender<ProgressMsg>, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
let nosniff = false;
let check_for_apache_bug = false;

metadata.content_type = classifier.classify(nosniff, check_for_apache_bug,
&metadata.content_type, &partial_body);

}

start_sending_opt(start_chan, metadata)
}

/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: Sender<LoadResponse>, metadata: Metadata) -> Result<Sender<ProgressMsg>, ()> {
let (progress_chan, progress_port) = channel();
@@ -123,6 +150,7 @@ struct ResourceManager {
user_agent: Option<String>,
cookie_storage: CookieStorage,
resource_task: Sender<ControlMsg>,
mime_classifier: Arc<MIMEClassifier>,
}

impl ResourceManager {
@@ -133,6 +161,7 @@ impl ResourceManager {
user_agent: user_agent,
cookie_storage: CookieStorage::new(),
resource_task: resource_task,
mime_classifier: Arc::new(MIMEClassifier::new()),
}
}
}
@@ -174,10 +203,10 @@ impl ResourceManager {

self.user_agent.as_ref().map(|ua| load_data.headers.set(UserAgent(ua.clone())));

fn from_factory(factory: fn(LoadData,))
-> Box<Invoke<(LoadData,)> + Send> {
box move |(load_data,)| {
factory(load_data)
fn from_factory(factory: fn(LoadData, Arc<MIMEClassifier>))
-> Box<Invoke<(LoadData, Arc<MIMEClassifier>)> + Send> {
box move |(load_data, classifier)| {
factory(load_data, classifier)
}
}

@@ -195,7 +224,7 @@ impl ResourceManager {
};
debug!("resource_task: loading url: {}", load_data.url.serialize());

loader.invoke((load_data,));
loader.invoke((load_data, self.mime_classifier.clone()));
}
}

ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.