Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a basic HTTP memory cache #4117

Closed
wants to merge 15 commits into from
Closed
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Revalidate expired cache entries instead of unconditionally evicting …

…them.
  • Loading branch information
jdm committed Nov 27, 2014
commit 1e70aa84fa6d2d43a10a98ac48289233effe05f4
@@ -11,28 +11,32 @@ use resource_task::{Metadata, ProgressMsg, LoadResponse, LoadData, Payload, Done
use servo_util::time::parse_http_timestamp;

use http::headers::HeaderEnum;
use http::headers::response::HeaderCollection as ResponseHeaderCollection;
use http::method::Get;
use http::status::Ok as StatusOk;

use std::collections::HashMap;
use std::comm::Sender;
use std::iter::Map;
use std::num::FromStrRadix;
use std::num::{Bounded, FromStrRadix};
use std::str::CharSplits;
use std::sync::{Arc, Mutex};
use std::time::duration::{MAX, Duration};
use time;
use time::Timespec;
use time::{Tm, Timespec};
use url::Url;

//TODO: Store an Arc<Vec<u8>> instead?
//TODO: Cache non-GET requests?
//TODO: Cache HEAD requests
//TODO: Doom responses with network errors
//TODO: Send Err responses for doomed entries
//TODO: Enable forced eviction of a request instead of retrieving the cached response
//TODO: Evict items based on expiration time
//TODO: Use If-Modified-Since, Etag, etc.
//TODO: Doom incomplete entries
//TODO: Cache-Control: must-revalidate
//TODO: Last-Modified
//TODO: Range requests
//TODO: Revalidation rules for query strings
//TODO: Vary

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

Vary?


/// The key used to differentiate requests in the cache.
#[deriving(Clone, Hash, PartialEq, Eq)]
@@ -116,6 +120,16 @@ pub enum CacheOperationResult {
CachedContentPending,
/// The request is not present in the cache but will be cached with the given key.
NewCacheEntry(CacheKey),
/// The request is in the cache but requires revalidation.
Revalidate(CacheKey, RevalidationMethod),
}

/// The means by which to revalidate stale cached content
pub enum RevalidationMethod {
/// The result of a stored Last-Modified or Expires header
ExpiryDate(Tm),
/// The result of a stored Etag header
Etag(String),
}

/// Tokenize a header value.
@@ -160,27 +174,39 @@ fn response_is_cacheable(metadata: &Metadata) -> bool {
return true;
}

/// Determine the expiry date of the given response.
fn get_response_expiry(metadata: &Metadata) -> Duration {
metadata.headers.as_ref().and_then(|headers| {
headers.cache_control.as_ref().and_then(|cache_control| {
for token in split_header(cache_control[]) {
let mut parts = token.split('=');
if parts.next().unwrap() == "max-age" {
return parts.next()
.and_then(|val| FromStrRadix::from_str_radix(val, 10))
.map(|secs| Duration::seconds(secs));
}
/// Determine the expiry date of the given response headers.

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

Can you add "returns a far future date if the response headers do not expire" for clarity?

fn get_response_expiry_from_headers(headers: &ResponseHeaderCollection) -> Duration {
headers.cache_control.as_ref().and_then(|cache_control| {
for token in split_header(cache_control[]) {
let mut parts = token.split('=');
if parts.next().unwrap() == "max-age" {

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

Can you write parts.next().map(|x| *x) == Some("max_age") or something to avoid unwrapping?

return parts.next()
.and_then(|val| FromStrRadix::from_str_radix(val, 10))
.map(|secs| Duration::seconds(secs));
}
None
}).or_else(|| {
headers.expires.as_ref().and_then(|expires| {
parse_http_timestamp(expires[]).map(|t| {
Duration::seconds(t.to_timespec().sec)
})
}
None
}).or_else(|| {
headers.expires.as_ref().and_then(|expires| {
parse_http_timestamp(expires[]).map(|t| {
// store the period of time from now until expiry
let desired = t.to_timespec();
let current = time::now().to_timespec();
if desired > current {
desired - current
} else {
Bounded::min_value()

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

Might be clearer as just current, but this is fine.

}
})
})
}).unwrap_or(MAX)
}).unwrap_or(Bounded::max_value())
}

/// Determine the expiry date of the given response.

This comment has been minimized.

Copy link
@pcwalton

pcwalton Dec 9, 2014

Contributor

As above for this comment.

fn get_response_expiry(metadata: &Metadata) -> Duration {
metadata.headers.as_ref().map(|headers| {
get_response_expiry_from_headers(headers)
}).unwrap_or(Bounded::max_value())
}

impl MemoryCache {
@@ -197,6 +223,11 @@ impl MemoryCache {
/// an error message or a final body payload. The cache entry is immediately removed.
pub fn doom_request(&mut self, key: &CacheKey, err: String) {
info!("dooming entry for {}", key.url);
match self.complete_entries.remove(key) {
Some(_) => return,
None => (),
}

let resource = self.pending_entries.remove(key).unwrap();
match resource.consumers {
AwaitingHeaders(ref consumers) => {
@@ -212,6 +243,14 @@ impl MemoryCache {
}
}

/// Handle a 304 response to a revalidation request. Updates the cached response
/// metadata with any new expiration data.
pub fn process_not_modified(&mut self, key: &CacheKey, headers: &ResponseHeaderCollection) {
info!("updating metadata for {}", key.url);
let resource = self.complete_entries.get_mut(key).unwrap();
resource.expires = get_response_expiry_from_headers(headers);
}

/// Handle the initial response metadata for an incomplete cached request.
/// If the response should not be cached, the entry will be doomed and any
/// subsequent requests will not see the cached request. All waiting consumers
@@ -303,17 +342,16 @@ impl MemoryCache {
}

let key = CacheKey::new(load_data.clone());
let expired = self.complete_entries.get(&key).map(|resource| {
self.base_time + resource.expires >= time::now().to_timespec()
});

match expired {
Some(true) => {
info!("evicting existing entry for {}", load_data.url);
self.complete_entries.remove(&key);
}
match self.complete_entries.get(&key) {
Some(resource) => {
if self.base_time + resource.expires >= time::now().to_timespec() {
return Revalidate(key, ExpiryDate(time::at(self.base_time + resource.expires)));
}

//TODO: Revalidate if Etag present
//TODO: Revalidate if must-revalidate
//TODO: Revalidate once per session for response with no explicit expiry

Some(false) => {
self.send_complete_entry(key, start_chan);
return CachedContentPending;
}
@@ -2,16 +2,18 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use http_cache::{MemoryCache, Uncacheable, CachedContentPending, NewCacheEntry};
use http_cache::{MemoryCache, Uncacheable, CachedContentPending, NewCacheEntry, Revalidate};
use http_cache::{CachedPendingResource, UncachedPendingResource, ResourceResponseTarget};
use http_cache::{UncachedInProgressResource, CachedInProgressResource, ResourceProgressTarget};
use http_cache::{ExpiryDate, Etag};
use resource_task::{Metadata, Payload, Done, LoadResponse, LoadData, start_sending_opt};

use log;
use std::collections::HashSet;
use std::sync::{Arc, Mutex};
use http::client::{RequestWriter, NetworkStream};
use http::headers::HeaderEnum;
use http::status::NotModified;
use std::io::Reader;
use servo_util::task::spawn_named;
use url::Url;
@@ -57,7 +59,7 @@ fn send_error(url: Url, err: String, start_chan: &ResourceResponseTarget) {
}
}

fn load(load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<MemoryCache>>) {
fn load(mut load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<MemoryCache>>) {
// FIXME: At the time of writing this FIXME, servo didn't have any central
// location for configuration. If you're reading this and such a
// repository DOES exist, please update this constant to use it.
@@ -72,6 +74,20 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<
cache.process_pending_request(&load_data, start_chan.clone())
};

let revalidating = match cache_result {
Revalidate(ref _key, ExpiryDate(ref last_fetched)) => {
load_data.headers.if_modified_since = Some(last_fetched.clone());
true
}

Revalidate(ref _key, Etag(ref etag)) => {
load_data.headers.if_none_match = Some(etag.clone());
true
}

_ => false
};

let start_chan = match cache_result {
Uncacheable(reason) => {
info!("request for {} can't be cached: {}", url, reason);
@@ -82,6 +98,10 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<
info!("new cache entry for {}", url);
CachedPendingResource(key, cache)
}
Revalidate(key, _) => {
info!("revalidating {}", url);
CachedPendingResource(key, cache)
}
};

// Loop to handle redirects.
@@ -157,6 +177,21 @@ fn load(load_data: LoadData, start_chan: Sender<LoadResponse>, cache: Arc<Mutex<
}
}

if revalidating {
let (key, cache) = match start_chan {
CachedPendingResource(ref key, ref cache) => (key, cache),
UncachedPendingResource(..) => unreachable!(),
};

let mut cache = cache.lock();
if response.status == NotModified && revalidating {
cache.process_not_modified(key, &response.headers);
return;
}

cache.doom_request(key, "cache entry expired".to_string());
}

if 3 == (response.status.code() / 100) {
match response.headers.location {
Some(new_url) => {
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.