From f3418c0d6667d45b48ac29f95ce251e1e9e18392 Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Mon, 31 Dec 2018 11:30:00 +0000 Subject: [PATCH 1/7] Add the URI query string hash to the cache path This avoids two URIs with only query parameter differences (like pages) mapping to the same file. --- .travis.yml | 1 + src/http_cache.rs | 69 ++++++++++++++++++++++++++++++++++++----------- src/lib.rs | 3 ++- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5d7638db..17a68fb3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,7 @@ script: - travis_wait cargo test - cargo run --example rate_limit - cargo build --features httpcache --example conditional_requests +- cargo test --features httpcache --doc http_cache # Cache `cargo install`ed tools, but don't cache the project's `target` # directory (which ends up over-caching and filling all disk space!) diff --git a/src/http_cache.rs b/src/http_cache.rs index e34d8326..0c593521 100644 --- a/src/http_cache.rs +++ b/src/http_cache.rs @@ -1,9 +1,11 @@ //! Implements Conditional Requests use std; +use std::collections::hash_map::DefaultHasher; use std::ffi::OsStr; use std::fmt::Debug; use std::fs; +use std::hash::{Hash, Hasher}; use std::io; use std::path::{Path, PathBuf}; @@ -61,6 +63,7 @@ pub struct FileBasedCache { impl HttpCache for FileBasedCache { fn cache_body_and_etag(&self, uri: &str, body: &[u8], etag: &[u8]) -> Result<()> { let mut path = cache_path(&self.root, &uri, "json"); + trace!("caching body at path: {}", path.display()); if let Some(parent) = path.parent() { fs::create_dir_all(parent)?; } @@ -79,32 +82,47 @@ impl HttpCache for FileBasedCache { } } -/// cache_path("https://api.github.com/users/dwijnand/repos", "json") ==> -/// ~/.hubcaps/cache/v1/https/api.github.com/users/dwijnand/repos.json -fn cache_path>(dir: &Path, uri: &str, extension: S) -> PathBuf { +/// Construct the cache path for the given URI and extension, from an initial directory. +/// +/// # Examples +/// +/// ``` +/// # use std::path::PathBuf; +/// # use hubcaps::http_cache::cache_path; +/// assert_eq!( +/// cache_path(&PathBuf::from("/home/.hubcaps/cache"), "https://api.github.com/users/dwijnand/repos", "json"), +/// PathBuf::from("/home/.hubcaps/cache/v1/https/api.github.com/users/dwijnand/repos.json"), +/// ); +/// assert_eq!( +/// cache_path(&PathBuf::from("/home/.hubcaps/cache"), "https://api.github.com/users/dwijnand/repos?page=2", "json"), +/// PathBuf::from("/home/.hubcaps/cache/v1/https/api.github.com/users/dwijnand/repos/6dd58bde8abb0869.json"), +/// ); +/// assert_eq!( +/// cache_path(&PathBuf::from("/home/.hubcaps/cache"), "https://api.github.com/users/dwijnand/repos?page=2&per_page=5", "json"), +/// PathBuf::from("/home/.hubcaps/cache/v1/https/api.github.com/users/dwijnand/repos/d862dcd2d85cebca.json"), +/// ); +/// ``` +#[doc(hidden)] // public for doc testing only +pub fn cache_path>(dir: &Path, uri: &str, extension: S) -> PathBuf { let uri = uri.parse::().expect("Expected a URI"); let mut path = dir.to_path_buf(); path.push("v1"); - path.push(uri.scheme_part().expect("Expected a URI scheme").as_ref()); // https - path.push( - uri.authority_part() - .expect("Expected a URI authority") - .as_ref(), - ); // api.github.com - path.push( - Path::new(uri.path()) // /users/dwijnand/repos - .strip_prefix("/") - .expect("Expected URI path to start with /"), - ); - path.set_extension(extension); + path.push(uri.scheme_part().expect("no URI scheme").as_str()); // https + path.push(uri.authority_part().expect("no URI authority").as_str()); // api.github.com + path.push(Path::new(&uri.path()[1..])); // users/dwijnand/repos + if let Some(query) = uri.query() { + path.push(hash1(query, DefaultHasher::new())); // fa269019d5035d5f + } + path.set_extension(extension); // .json path } fn read_to_string>(path: P) -> Result { + trace!("reading path: {}", path.as_ref().display()); fs::read_to_string(path).map_err(Error::from) } -fn no_read>>(error: E) -> Result { +fn no_read>>(error: E) -> Result { Err(Error::from(io::Error::new(io::ErrorKind::NotFound, error))) } @@ -124,3 +142,22 @@ where Box::new(self.clone()) } } + +fn hash1(x: A, mut hasher: H) -> String { + x.hash(&mut hasher); + u64_to_padded_hex(hasher.finish()) +} + +/// Construct a 0-padded hex string from a u64. +/// +/// # Examples +/// +/// ``` +/// # use hubcaps::http_cache::u64_to_padded_hex; +/// assert_eq!(u64_to_padded_hex(0), "0000000000000000"); +/// assert_eq!(u64_to_padded_hex(u64::max_value()), "ffffffffffffffff"); +/// ``` +#[doc(hidden)] // public for doc testing only +pub fn u64_to_padded_hex(x: u64) -> String { + format!("{:016x}", x) +} diff --git a/src/lib.rs b/src/lib.rs index 3eb2b81f..ddb1933a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -126,8 +126,9 @@ use mime::Mime; use serde::de::DeserializeOwned; use url::Url; +#[doc(hidden)] // public for doc testing only #[cfg(feature = "httpcache")] -mod http_cache; +pub mod http_cache; #[macro_use] mod macros; // expose json! macro to child modules pub mod activity; From 3a17a95aa7c679289d5431ac9c27b0bba5982a54 Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Mon, 31 Dec 2018 11:32:00 +0000 Subject: [PATCH 2/7] Partially expose FileBasedCache for integration testing --- src/http_cache.rs | 9 ++++++++- src/lib.rs | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/http_cache.rs b/src/http_cache.rs index 0c593521..92914e43 100644 --- a/src/http_cache.rs +++ b/src/http_cache.rs @@ -30,7 +30,7 @@ impl HttpCache { pub fn in_home_dir() -> BoxedHttpCache { let mut dir = dirs::home_dir().expect("Expected a home dir"); dir.push(".hubcaps/cache"); - Box::new(FileBasedCache { root: dir }) + Box::new(FileBasedCache::new(dir)) } } @@ -60,6 +60,13 @@ pub struct FileBasedCache { root: PathBuf, } +impl FileBasedCache { + #[doc(hidden)] // public for integration testing only + pub fn new>(root: P) -> FileBasedCache { + FileBasedCache { root: root.into() } + } +} + impl HttpCache for FileBasedCache { fn cache_body_and_etag(&self, uri: &str, body: &[u8], etag: &[u8]) -> Result<()> { let mut path = cache_path(&self.root, &uri, "json"); diff --git a/src/lib.rs b/src/lib.rs index ddb1933a..c269cc11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -126,7 +126,7 @@ use mime::Mime; use serde::de::DeserializeOwned; use url::Url; -#[doc(hidden)] // public for doc testing only +#[doc(hidden)] // public for doc testing and integration testing only #[cfg(feature = "httpcache")] pub mod http_cache; #[macro_use] From a7e64c2655bf486643f91e46ba2f45134dcdac6d Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Mon, 31 Dec 2018 11:33:09 +0000 Subject: [PATCH 3/7] Add a testkit for HTTP cache integration tests. Derived from cargo's. --- tests/testkit/mod.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/testkit/mod.rs diff --git a/tests/testkit/mod.rs b/tests/testkit/mod.rs new file mode 100644 index 00000000..5c1b2fc5 --- /dev/null +++ b/tests/testkit/mod.rs @@ -0,0 +1,46 @@ +use std::cell::Cell; +use std::env; +use std::fs; +use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT}; + +fn global_test_root() -> PathBuf { + let mut path = env::current_exe().unwrap(); + path.pop(); // chop off exe name + path.pop(); // chop off 'debug' + + // If `cargo test` is run manually then our path looks like + // `target/debug/foo`, in which case our `path` is already pointing at + // `target`. If, however, `cargo test --target $target` is used then the + // output is `target/$target/debug/foo`, so our path is pointing at + // `target/$target`. Here we conditionally pop the `$target` name. + if path.file_name().and_then(|s| s.to_str()) != Some("target") { + path.pop(); + } + + path.join("int-test") +} + +fn test_root() -> PathBuf { + let root = global_test_root(); + + static NEXT_TEST_NUM: AtomicUsize = ATOMIC_USIZE_INIT; + thread_local!(static TEST_NUM: usize = NEXT_TEST_NUM.fetch_add(1, Ordering::SeqCst)); + let root = root.join(&TEST_NUM.with(|my_id| format!("t{}", my_id))); + + thread_local!(static TEST_ROOT_INIT: Cell = Cell::new(false)); + TEST_ROOT_INIT.with(|i| { + if i.get() { + return; + } + i.set(true); + fs::remove_dir_all(&root).expect("removing root"); + debug!("deleted root {}", root.display()); + }); + + root +} + +pub fn test_home() -> PathBuf { + test_root().join("home") +} From 16aba3d3e53c555c8722dc3bd27be9e05703ddc4 Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Mon, 31 Dec 2018 11:35:52 +0000 Subject: [PATCH 4/7] Fix & test conditional requests work with pages --- .travis.yml | 1 + src/http_cache.rs | 35 +++++++++++-- src/lib.rs | 22 +++++++-- tests/conditional_requests.rs | 93 +++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 tests/conditional_requests.rs diff --git a/.travis.yml b/.travis.yml index 17a68fb3..a4779067 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,7 @@ script: - cargo run --example rate_limit - cargo build --features httpcache --example conditional_requests - cargo test --features httpcache --doc http_cache +- cargo test --features httpcache --test conditional_requests # Cache `cargo install`ed tools, but don't cache the project's `target` # directory (which ends up over-caching and filling all disk space!) diff --git a/src/http_cache.rs b/src/http_cache.rs index 92914e43..aa4144b4 100644 --- a/src/http_cache.rs +++ b/src/http_cache.rs @@ -17,9 +17,16 @@ use {Error, Result}; pub type BoxedHttpCache = Box; pub trait HttpCache: HttpCacheClone + Debug { - fn cache_body_and_etag(&self, uri: &str, body: &[u8], etag: &[u8]) -> Result<()>; + fn cache_response( + &self, + uri: &str, + body: &[u8], + etag: &[u8], + next_link: &Option, + ) -> Result<()>; fn lookup_etag(&self, uri: &str) -> Result; fn lookup_body(&self, uri: &str) -> Result; + fn lookup_next_link(&self, uri: &str) -> Result>; } impl HttpCache { @@ -44,7 +51,7 @@ impl Clone for BoxedHttpCache { pub struct NoCache; impl HttpCache for NoCache { - fn cache_body_and_etag(&self, _: &str, _: &[u8], _: &[u8]) -> Result<()> { + fn cache_response(&self, _: &str, _: &[u8], _: &[u8], _: &Option) -> Result<()> { Ok(()) } fn lookup_etag(&self, _uri: &str) -> Result { @@ -53,6 +60,9 @@ impl HttpCache for NoCache { fn lookup_body(&self, _uri: &str) -> Result { no_read("No body cached") } + fn lookup_next_link(&self, _uri: &str) -> Result> { + no_read("No next link cached") + } } #[derive(Clone, Debug)] @@ -68,7 +78,13 @@ impl FileBasedCache { } impl HttpCache for FileBasedCache { - fn cache_body_and_etag(&self, uri: &str, body: &[u8], etag: &[u8]) -> Result<()> { + fn cache_response( + &self, + uri: &str, + body: &[u8], + etag: &[u8], + next_link: &Option, + ) -> Result<()> { let mut path = cache_path(&self.root, &uri, "json"); trace!("caching body at path: {}", path.display()); if let Some(parent) = path.parent() { @@ -77,6 +93,10 @@ impl HttpCache for FileBasedCache { fs::write(&path, body)?; path.set_extension("etag"); fs::write(&path, etag)?; + if let Some(next_link) = next_link { + path.set_extension("next_link"); + fs::write(&path, next_link)?; + } Ok(()) } @@ -87,6 +107,15 @@ impl HttpCache for FileBasedCache { fn lookup_body(&self, uri: &str) -> Result { read_to_string(cache_path(&self.root, uri, "json")) } + + fn lookup_next_link(&self, uri: &str) -> Result> { + let path = cache_path(&self.root, uri, "next_link"); + if path.exists() { + Ok(Some(read_to_string(path)?)) + } else { + Ok(None) + } + } } /// Construct the cache path for the given URI and extension, from an initial directory. diff --git a/src/lib.rs b/src/lib.rs index c269cc11..96ddf468 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -121,6 +121,8 @@ use hyper::header::{ACCEPT, AUTHORIZATION, ETAG, LINK, LOCATION, USER_AGENT}; use hyper::{Body, Client, Method, Request, StatusCode, Uri}; #[cfg(feature = "tls")] use hyper_tls::HttpsConnector; +#[cfg(feature = "httpcache")] +use hyperx::header::LinkValue; use hyperx::header::{qitem, Link, RelationType}; use mime::Mime; use serde::de::DeserializeOwned; @@ -799,10 +801,12 @@ where #[cfg(feature = "httpcache")] { if let Some(etag) = etag { - if let Err(e) = instance2.http_cache.cache_body_and_etag( + let next_link = link.as_ref().and_then(|l| next_link(&l)); + if let Err(e) = instance2.http_cache.cache_response( &uri3, &response_body, &etag, + &next_link, ) { // failing to cache isn't fatal, so just log & swallow the error debug!("Failed to cache body & etag: {}", e); @@ -823,8 +827,20 @@ where .map_err(Error::from) .and_then(|body| { serde_json::from_str::(&body) - .map(|out| (link, out)) - .map_err(|error| ErrorKind::Codec(error).into()) + .map_err(Error::from) + .and_then(|out| { + let link = match link { + Some(link) => Ok(Some(link)), + None => instance2 + .http_cache + .lookup_next_link(&uri3) + .map(|next_link| next_link.map(|next| { + let next = LinkValue::new(next).push_rel(RelationType::Next); + Link::new(vec![next]) + })) + }; + link.map(|link| (link, out)) + }) }) } #[cfg(not(feature = "httpcache"))] diff --git a/tests/conditional_requests.rs b/tests/conditional_requests.rs new file mode 100644 index 00000000..43e47a28 --- /dev/null +++ b/tests/conditional_requests.rs @@ -0,0 +1,93 @@ +extern crate env_logger; +extern crate futures; +extern crate hubcaps; +extern crate hyper; +extern crate hyper_tls; +#[cfg(feature = "httpcache")] +#[macro_use] +extern crate log; +extern crate tokio; + +#[cfg(feature = "httpcache")] +use std::env; + +#[cfg(feature = "httpcache")] +use futures::{future, Stream}; +#[cfg(feature = "httpcache")] +use hyper::Client; +#[cfg(feature = "httpcache")] +use hyper_tls::HttpsConnector; +#[cfg(feature = "httpcache")] +use tokio::runtime::Runtime; + +#[cfg(feature = "httpcache")] +use hubcaps::http_cache::FileBasedCache; +#[cfg(feature = "httpcache")] +use hubcaps::repositories::UserRepoListOptions; +#[cfg(feature = "httpcache")] +use hubcaps::{Credentials, Error, Github, Result}; + +#[cfg(feature = "httpcache")] +mod testkit; + +#[test] +#[cfg(feature = "httpcache")] +fn compare_counts() -> Result<()> { + env_logger::init(); + + let mut rt = Runtime::new()?; + + let agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); + let credentials = env::var("GITHUB_TOKEN").ok().map(Credentials::Token); + let owner = "octocat"; + let per_page = 5; + let repo_list_options = UserRepoListOptions::builder().per_page(per_page).build(); + + info!("first get the total count of repos, without caching"); + + let github = Github::new(agent, credentials.clone()); + let repos = github.user_repos(owner).iter(&repo_list_options); + let total_count = rt.block_on(repos.fold(0, |acc, _repo| future::ok::<_, Error>(acc + 1)))?; + + // octocat current has 8 repos, so we set per_page to 5 to get 2 pages + // but if octocat ends up having less than 5 repos, it'll be just one page + // and therefore nullify this test, so we sanity check + assert!( + total_count > per_page, + "test sanity check failed, total_count: {}, per_page: {}", + total_count, + per_page, + ); + + info!("then get the total count with a cache"); + + let host = "https://api.github.com"; + let client = Client::builder().build(HttpsConnector::new(4).unwrap()); + let cache_path = testkit::test_home().join(".hubcaps/cache"); + let http_cache = Box::new(FileBasedCache::new(cache_path)); + let github: Github<_> = Github::custom(host, agent, credentials, client, http_cache); + + info!("first populate the cache"); + + let repos = github.user_repos(owner).iter(&repo_list_options); + let count1 = rt.block_on(repos.fold(0, |acc, _repo| future::ok::<_, Error>(acc + 1)))?; + let status1 = rt.block_on(github.rate_limit().get())?; + + info!("then retrieve via the cache"); + + let repos = github.user_repos(owner).iter(&repo_list_options); + let count2 = rt.block_on(repos.fold(0, |acc, _repo| future::ok::<_, Error>(acc + 1)))?; + let status2 = rt.block_on(github.rate_limit().get())?; + + info!("and compare the counts"); + + assert_eq!(count1, count2); + + info!("and while we're at it, compare that caching mitigates rate limiting"); + + let rem1 = status1.resources.core.remaining; + let rem2 = status2.resources.core.remaining; + assert_eq!(rem1, rem2); + + Ok(()) +} From c9a971ddf1323a460120d0929416494ee5639606 Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Wed, 2 Jan 2019 12:07:01 +0000 Subject: [PATCH 5/7] Make testkit init cleanly --- tests/testkit/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testkit/mod.rs b/tests/testkit/mod.rs index 5c1b2fc5..b694d894 100644 --- a/tests/testkit/mod.rs +++ b/tests/testkit/mod.rs @@ -34,8 +34,10 @@ fn test_root() -> PathBuf { return; } i.set(true); - fs::remove_dir_all(&root).expect("removing root"); - debug!("deleted root {}", root.display()); + if root.exists() { + fs::remove_dir_all(&root).expect("removing root"); + debug!("deleted root {}", root.display()); + } }); root From 2f65e1f52d66af98a45bf6e15dea1f823640299f Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Wed, 2 Jan 2019 15:00:46 +0000 Subject: [PATCH 6/7] Switch new tests/conditional_requests.rs to pretty_env_logger --- tests/conditional_requests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conditional_requests.rs b/tests/conditional_requests.rs index 43e47a28..d54737f0 100644 --- a/tests/conditional_requests.rs +++ b/tests/conditional_requests.rs @@ -1,4 +1,4 @@ -extern crate env_logger; +extern crate pretty_env_logger; extern crate futures; extern crate hubcaps; extern crate hyper; @@ -33,7 +33,7 @@ mod testkit; #[test] #[cfg(feature = "httpcache")] fn compare_counts() -> Result<()> { - env_logger::init(); + pretty_env_logger::init(); let mut rt = Runtime::new()?; From d2867eb9c8ce0137a1bcb5f431126680f22ccf9a Mon Sep 17 00:00:00 2001 From: Dale Wijnand Date: Wed, 2 Jan 2019 18:00:51 +0000 Subject: [PATCH 7/7] Skip conditional_requests test in CI :-/ --- tests/conditional_requests.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/conditional_requests.rs b/tests/conditional_requests.rs index d54737f0..16ff6d0c 100644 --- a/tests/conditional_requests.rs +++ b/tests/conditional_requests.rs @@ -38,7 +38,17 @@ fn compare_counts() -> Result<()> { let mut rt = Runtime::new()?; let agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); - let credentials = env::var("GITHUB_TOKEN").ok().map(Credentials::Token); + let credentials = match env::var("GITHUB_TOKEN").ok() { + Some(token) => Some(Credentials::Token(token)), + None => { + if env::var("CI") == Ok(String::from("true")) { + println!("No GITHUB_TOKEN env var in CI, skipping test"); + return Ok(()); + } else { + None + } + } + }; let owner = "octocat"; let per_page = 5; let repo_list_options = UserRepoListOptions::builder().per_page(per_page).build();