Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a basic HTTP memory cache #4117

Closed
wants to merge 15 commits into from
Closed
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Large diffs are not rendered by default.

@@ -2,28 +2,66 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use http_cache::{MemoryCache, Uncacheable, CachedContentPending, NewCacheEntry, Revalidate};
use http_cache::{CachedPendingResource, UncachedPendingResource, ResourceResponseTarget};
use http_cache::{UncachedInProgressResource, CachedInProgressResource, ResourceProgressTarget};
use http_cache::{ExpiryDate, Etag};
use resource_task::{Metadata, Payload, Done, LoadResponse, LoadData, start_sending_opt};

use log;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use http::client::{RequestWriter, NetworkStream};
use http::headers::HeaderEnum;
use http::status::NotModified;
use std::io::Reader;
use servo_util::task::spawn_named;
use url::Url;

pub fn factory(load_data: LoadData, start_chan: Sender<LoadResponse>) {
spawn_named("http_loader", proc() load(load_data, start_chan))
//FIXME: it would be nice to reduce the numbers of procs here, but it's hard to make a consistent
// interface with the other loaders that don't need the cache.
pub fn factory<'a>(cache: Arc<Mutex<MemoryCache>>)
-> proc(load_data: LoadData, start_chan: Sender<LoadResponse>): 'a {
proc(load_data: LoadData, start_chan: Sender<LoadResponse>) {
spawn_named("http_loader", proc() load(load_data, start_chan, cache.clone()))
}
}

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

So many procs :( Can we refactor this in the future (if not now, leave a FIXME)?


fn start_sending_http_opt(start_chan: ResourceResponseTarget, metadata: Metadata)
-> Result<ResourceProgressTarget, ()> {
match start_chan {
CachedPendingResource(key, cache) => {
{
let mut cache = cache.lock();
cache.process_metadata(&key, metadata);
}
Ok(CachedInProgressResource(key, cache))
}
UncachedPendingResource(start_chan) =>
start_sending_opt(start_chan, metadata).map(|chan| {
UncachedInProgressResource(chan)
})
}
}

fn send_error(url: Url, err: String, start_chan: Sender<LoadResponse>) {
pub fn send_error_direct(url: Url, err: String, start_chan: Sender<LoadResponse>) {
match start_sending_opt(start_chan, Metadata::default(url)) {
Ok(p) => p.send(Done(Err(err))),
_ => {}
};
}

fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
fn send_error(url: Url, err: String, start_chan: &ResourceResponseTarget) {
match *start_chan {
CachedPendingResource(ref key, ref cache) => {
let mut cache = cache.lock();
cache.doom_request(key, err);
}
UncachedPendingResource(ref start_chan) => send_error_direct(url, err, start_chan.clone()),
}
}

fn load(mut load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<MemoryCache>>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it.
@@ -32,17 +70,53 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
let mut url = load_data.url.clone();
let mut redirected_to = HashSet::new();

debug!("checking cache for {}", url);
let cache_result = {
let mut cache = cache.lock();
cache.process_pending_request(&load_data, start_chan.clone())
};

let revalidating = match cache_result {
Revalidate(ref _key, ExpiryDate(ref last_fetched)) => {
load_data.headers.if_modified_since = Some(last_fetched.clone());
true
}

Revalidate(ref _key, Etag(ref etag)) => {
load_data.headers.if_none_match = Some(etag.opaque_tag.clone());
true
}

_ => false
};

let start_chan = match cache_result {
Uncacheable(reason) => {
debug!("request for {} can't be cached: {}", url, reason);
UncachedPendingResource(start_chan)
}
CachedContentPending => return,
NewCacheEntry(key) => {
debug!("new cache entry for {}", url);
CachedPendingResource(key, cache)
}
Revalidate(key, _) => {
debug!("revalidating {}", url);
CachedPendingResource(key, cache)
}
};

// Loop to handle redirects.
loop {
iters = iters + 1;

if iters > max_redirects {
send_error(url, "too many redirects".to_string(), start_chan);
send_error(url, "too many redirects".to_string(), &start_chan);
return;
}

if redirected_to.contains(&url) {
send_error(url, "redirect loop".to_string(), start_chan);
send_error(url, "redirect loop".to_string(), &start_chan);
return;
}

@@ -52,7 +126,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
"http" | "https" => {}
_ => {
let s = format!("{:s} request, but we don't support that scheme", url.scheme);
send_error(url, s, start_chan);
send_error(url, s, &start_chan);
return;
}
}
@@ -63,7 +137,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
let mut writer = match request {
Ok(w) => box w,
Err(e) => {
send_error(url, e.desc.to_string(), start_chan);
send_error(url, e.desc.to_string(), &start_chan);
return;
}
};
@@ -81,7 +155,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
writer.headers.content_length = Some(data.len());
match writer.write(data.as_slice()) {
Err(e) => {
send_error(url, e.desc.to_string(), start_chan);
send_error(url, e.desc.to_string(), &start_chan);
return;
}
_ => {}
@@ -92,7 +166,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
let mut response = match writer.read_response() {
Ok(r) => r,
Err((_, e)) => {
send_error(url, e.desc.to_string(), start_chan);
send_error(url, e.desc.to_string(), &start_chan);
return;
}
};
@@ -105,6 +179,21 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
}
}

if revalidating {
let (key, cache) = match start_chan {
CachedPendingResource(ref key, ref cache) => (key, cache),
UncachedPendingResource(..) => unreachable!(),
};

let mut cache = cache.lock();
if response.status == NotModified && revalidating {
cache.process_not_modified(key, &response.headers);
return;
}

cache.process_revalidation_failed(key);
}

if 3 == (response.status.code() / 100) {
match response.headers.location {
Some(new_url) => {
@@ -113,7 +202,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
Some(ref c) => {
if c.preflight {
// The preflight lied
send_error(url, "Preflight fetch inconsistent with main fetch".to_string(), start_chan);
send_error(url, "Preflight fetch inconsistent with main fetch".to_string(), &start_chan);
return;
} else {
// XXXManishearth There are some CORS-related steps here,
@@ -135,7 +224,7 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
metadata.headers = Some(response.headers.clone());
metadata.status = response.status.clone();

let progress_chan = match start_sending_opt(start_chan, metadata) {
let progress_chan = match start_sending_http_opt(start_chan, metadata) {
Ok(p) => p,
_ => return
};
@@ -146,15 +235,31 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>) {
match response.read(buf.as_mut_slice()) {
Ok(len) => {
unsafe { buf.set_len(len); }
if progress_chan.send_opt(Payload(buf)).is_err() {
// The send errors when the receiver is out of scope,
// which will happen if the fetch has timed out (or has been aborted)
// so we don't need to continue with the loading of the file here.
return;
match progress_chan {
CachedInProgressResource(ref key, ref cache) => {
let mut cache = cache.lock();
cache.process_payload(key, buf);
}
UncachedInProgressResource(ref progress_chan) => {
if progress_chan.send_opt(Payload(buf)).is_err() {
// The send errors when the receiver is out of scope,
// which will happen if the fetch has timed out (or has been aborted)
// so we don't need to continue with the loading of the file here.
return;
}
}
}
}
Err(_) => {
let _ = progress_chan.send_opt(Done(Ok(())));
match progress_chan {
CachedInProgressResource(ref key, ref cache) => {
let mut cache = cache.lock();
cache.process_done(key);
}
UncachedInProgressResource(ref progress_chan) => {
let _ = progress_chan.send_opt(Done(Ok(())));
}
}
break;
}
}
@@ -2,7 +2,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#![feature(default_type_params, globs, phase)]
#![feature(default_type_params, globs, phase, slicing_syntax)]

#![deny(unused_imports)]
#![deny(unused_variables)]
@@ -32,6 +32,7 @@ pub mod image {

pub mod about_loader;
pub mod file_loader;
pub mod http_cache;
pub mod http_loader;
pub mod data_loader;
pub mod image_cache_task;
@@ -7,10 +7,12 @@
use about_loader;
use data_loader;
use file_loader;
use http_cache::MemoryCache;
use http_loader;
use sniffer_task;

use std::comm::{channel, Receiver, Sender};
use std::sync::{Arc, Mutex};
use http::headers::content_type::MediaType;
use http::headers::response::HeaderCollection as ResponseHeaderCollection;
use http::headers::request::HeaderCollection as RequestHeaderCollection;
@@ -58,6 +60,7 @@ pub struct ResourceCORSData {
}

/// Metadata about a loaded resource, such as is obtained from HTTP headers.
#[deriving(Clone)]
pub struct Metadata {
/// Final URL after redirects.
pub final_url: Url,
@@ -176,13 +179,15 @@ pub fn new_resource_task(user_agent: Option<String>) -> ResourceTask {
struct ResourceManager {
from_client: Receiver<ControlMsg>,
user_agent: Option<String>,
memory_cache: Arc<Mutex<MemoryCache>>,
}

impl ResourceManager {
fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>) -> ResourceManager {
ResourceManager {
from_client: from_client,
user_agent: user_agent,
memory_cache: Arc::new(Mutex::new(MemoryCache::new())),
}
}
}
@@ -212,11 +217,18 @@ impl ResourceManager {

let sniffer_task = sniffer_task::new_sniffer_task(start_chan.clone());

fn from_factory<'a>(factory: fn(LoadData, Sender<LoadResponse>))
-> proc(LoadData, Sender<LoadResponse>): 'a {
proc(load_data: LoadData, start_chan: Sender<LoadResponse>) {
factory(load_data, start_chan)
}
}

let loader = match load_data.url.scheme.as_slice() {
"file" => file_loader::factory,
"http" | "https" => http_loader::factory,
"data" => data_loader::factory,
"about" => about_loader::factory,
"file" => from_factory(file_loader::factory),
"http" | "https" => http_loader::factory(self.memory_cache.clone()),
"data" => from_factory(data_loader::factory),
"about" => from_factory(about_loader::factory),
_ => {
debug!("resource_task: no loader for scheme {:s}", load_data.url.scheme);
start_sending(start_chan, Metadata::default(load_data.url))
@@ -26,12 +26,12 @@ use servo_net::resource_task::{Load, LoadData, Payload, Done, ResourceTask};
use servo_msg::constellation_msg::LoadData as MsgLoadData;
use servo_util::task_state;
use servo_util::task_state::IN_HTML_PARSER;
use servo_util::time::parse_http_timestamp;
use std::ascii::AsciiExt;
use std::comm::channel;
use std::str::MaybeOwned;
use url::Url;
use http::headers::HeaderEnum;
use time;
use html5ever::Attribute;
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
use string_cache::QualName;
@@ -44,25 +44,9 @@ pub enum HTMLInput {
// Parses an RFC 2616 compliant date/time string, and returns a localized
// date/time string in a format suitable for document.lastModified.
fn parse_last_modified(timestamp: &str) -> String {
let format = "%m/%d/%Y %H:%M:%S";

// RFC 822, updated by RFC 1123
match time::strptime(timestamp, "%a, %d %b %Y %T %Z") {
Ok(t) => return t.to_local().strftime(format).unwrap(),
Err(_) => ()
}

// RFC 850, obsoleted by RFC 1036
match time::strptime(timestamp, "%A, %d-%b-%y %T %Z") {
Ok(t) => return t.to_local().strftime(format).unwrap(),
Err(_) => ()
}

// ANSI C's asctime() format
match time::strptime(timestamp, "%c") {
Ok(t) => t.to_local().strftime(format).unwrap(),
Err(_) => String::from_str("")
}
parse_http_timestamp(timestamp).map(|t| {
t.to_local().strftime("%m/%d/%Y %H:%M:%S").unwrap()
}).unwrap_or(String::new())
}

trait SinkHelpers {
@@ -10,7 +10,7 @@ use std::f64;
use std::io::timer::sleep;
use std::iter::AdditiveIterator;
use std::time::duration::Duration;
use std_time::precise_time_ns;
use std_time::{Tm, precise_time_ns, strptime};
use task::{spawn_named};
use url::Url;

@@ -277,3 +277,21 @@ pub fn time<T>(msg: &str, callback: || -> T) -> T{
}
return val;
}

// Parses an RFC 2616 compliant date/time string
pub fn parse_http_timestamp(timestamp: &str) -> Option<Tm> {
// RFC 822, updated by RFC 1123
match strptime(timestamp, "%a, %d %b %Y %T %Z") {
Ok(t) => return Some(t),
Err(_) => ()
}

// RFC 850, obsoleted by RFC 1036
match strptime(timestamp, "%A, %d-%b-%y %T %Z") {
Ok(t) => return Some(t),
Err(_) => ()
}

// ANSI C's asctime() format
strptime(timestamp, "%c").ok()
}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.