Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a basic HTTP memory cache #4117

Closed
wants to merge 15 commits into from
Closed
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Evict cache entries that have passed their expiry date instead of mat…

…ching them.
  • Loading branch information
jdm committed Nov 27, 2014
commit 5d9560744791f34a6e2ae350bbad42cfa0510e1d
@@ -5,13 +5,21 @@
use http_loader::send_error_direct;
use resource_task::{Metadata, ProgressMsg, LoadResponse, LoadData, Payload, Done, start_sending_opt};

use servo_util::time::parse_http_timestamp;

use http::headers::HeaderEnum;
use http::method::Get;
use http::status::Ok as StatusOk;

use std::collections::HashMap;
use std::comm::Sender;
use std::iter::Map;
use std::num::FromStrRadix;
use std::str::CharSplits;
use std::sync::{Arc, Mutex};
use std::time::duration::{MAX, Duration};
use time;
use time::Timespec;
use url::Url;

//TODO: Store an Arc<Vec<u8>> instead?
@@ -54,17 +62,20 @@ struct PendingResource {
metadata: Option<Metadata>,
body: Vec<u8>,
consumers: PendingConsumers,
expires: Duration,
doomed: bool,
}

pub struct CachedResource {
pub metadata: Metadata,
pub body: Vec<u8>
struct CachedResource {
metadata: Metadata,
body: Vec<u8>,
expires: Duration,
}

pub struct MemoryCache {
complete_entries: HashMap<CacheKey, CachedResource>,
pending_entries: HashMap<CacheKey, PendingResource>,
base_time: Timespec,
}

pub enum ResourceResponseTarget {
@@ -83,6 +94,11 @@ pub enum CacheOperationResult {
NewCacheEntry(CacheKey),
}

fn split_header(header: &str) -> Map<&str, &str, CharSplits<char>> {
header.split(',')
.map(|v| v.trim())
}

fn response_is_cacheable(metadata: &Metadata) -> bool {
if metadata.status != StatusOk {
return false;
@@ -93,9 +109,7 @@ fn response_is_cacheable(metadata: &Metadata) -> bool {
}

fn any_token_matches(header: &str, tokens: &[&str]) -> bool {
header.split(',')
.map(|v| v.trim())
.any(|token| tokens.iter().any(|&s| s == token))
split_header(header).any(|token| tokens.iter().any(|&s| s == token))
}

let headers = metadata.headers.as_ref().unwrap();
@@ -120,11 +134,34 @@ fn response_is_cacheable(metadata: &Metadata) -> bool {
return true;
}

fn get_response_expiry(metadata: &Metadata) -> Duration {
metadata.headers.as_ref().and_then(|headers| {
headers.cache_control.as_ref().and_then(|cache_control| {
for token in split_header(cache_control[]) {
let mut parts = token.split('=');
if parts.next().unwrap() == "max-age" {
return parts.next()
.and_then(|val| FromStrRadix::from_str_radix(val, 10))
.map(|secs| Duration::seconds(secs));
}
}
None
}).or_else(|| {
headers.expires.as_ref().and_then(|expires| {
parse_http_timestamp(expires[]).map(|t| {
Duration::seconds(t.to_timespec().sec)
})
})
})
}).unwrap_or(MAX)
}

impl MemoryCache {
pub fn new() -> MemoryCache {
MemoryCache {
complete_entries: HashMap::new(),
pending_entries: HashMap::new(),
base_time: time::now().to_timespec(),
}
}

@@ -164,6 +201,7 @@ impl MemoryCache {
resource.doomed = true;
}

resource.expires = get_response_expiry(&metadata);
resource.metadata = Some(metadata);
resource.consumers = AwaitingBody(chans);
}
@@ -203,6 +241,7 @@ impl MemoryCache {
let complete = CachedResource {
metadata: resource.metadata.unwrap(),
body: resource.body,
expires: resource.expires,
};
self.complete_entries.insert(key.clone(), complete);
}
@@ -214,9 +253,22 @@ impl MemoryCache {
}

let key = CacheKey::new(load_data.clone());
if self.complete_entries.contains_key(&key) {
self.send_complete_entry(key, start_chan);
return CachedContentPending;
let expired = self.complete_entries.get(&key).map(|resource| {
self.base_time + resource.expires >= time::now().to_timespec()
});

match expired {
Some(true) => {
info!("evicting existing entry for {}", load_data.url);
self.complete_entries.remove(&key);
}

Some(false) => {
self.send_complete_entry(key, start_chan);
return CachedContentPending;
}

None => ()
}

let new_entry = match self.pending_entries.get(&key) {
@@ -239,6 +291,7 @@ impl MemoryCache {
metadata: None,
body: vec!(),
consumers: AwaitingHeaders(vec!(start_chan)),
expires: MAX,
doomed: false,
};
info!("creating cache entry for {}", key.url);
@@ -26,12 +26,12 @@ use servo_net::resource_task::{Load, LoadData, Payload, Done, ResourceTask};
use servo_msg::constellation_msg::LoadData as MsgLoadData;
use servo_util::task_state;
use servo_util::task_state::IN_HTML_PARSER;
use servo_util::time::parse_http_timestamp;
use std::ascii::AsciiExt;
use std::comm::channel;
use std::str::MaybeOwned;
use url::Url;
use http::headers::HeaderEnum;
use time;
use html5ever::Attribute;
use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText};
use string_cache::QualName;
@@ -44,25 +44,9 @@ pub enum HTMLInput {
// Parses an RFC 2616 compliant date/time string, and returns a localized
// date/time string in a format suitable for document.lastModified.
fn parse_last_modified(timestamp: &str) -> String {
let format = "%m/%d/%Y %H:%M:%S";

// RFC 822, updated by RFC 1123
match time::strptime(timestamp, "%a, %d %b %Y %T %Z") {
Ok(t) => return t.to_local().strftime(format).unwrap(),
Err(_) => ()
}

// RFC 850, obsoleted by RFC 1036
match time::strptime(timestamp, "%A, %d-%b-%y %T %Z") {
Ok(t) => return t.to_local().strftime(format).unwrap(),
Err(_) => ()
}

// ANSI C's asctime() format
match time::strptime(timestamp, "%c") {
Ok(t) => t.to_local().strftime(format).unwrap(),
Err(_) => String::from_str("")
}
parse_http_timestamp(timestamp).map(|t| {
t.to_local().strftime("%m/%d/%Y %H:%M:%S").unwrap()
}).unwrap_or(String::new())
}

trait SinkHelpers {
@@ -10,7 +10,7 @@ use std::f64;
use std::io::timer::sleep;
use std::iter::AdditiveIterator;
use std::time::duration::Duration;
use std_time::precise_time_ns;
use std_time::{Tm, precise_time_ns, strptime};
use task::{spawn_named};
use url::Url;

@@ -277,3 +277,24 @@ pub fn time<T>(msg: &str, callback: || -> T) -> T{
}
return val;
}

// Parses an RFC 2616 compliant date/time string
pub fn parse_http_timestamp(timestamp: &str) -> Option<Tm> {
// RFC 822, updated by RFC 1123
match strptime(timestamp, "%a, %d %b %Y %T %Z") {
Ok(t) => return Some(t),
Err(_) => ()
}

// RFC 850, obsoleted by RFC 1036
match strptime(timestamp, "%A, %d-%b-%y %T %Z") {
Ok(t) => return Some(t),
Err(_) => ()
}

// ANSI C's asctime() format
match strptime(timestamp, "%c") {
Ok(t) => Some(t),
Err(_) => None,
}

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

IIRC you can just write strptime(timestamp), "%c").ok()

}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.