diff --git a/Cargo.lock b/Cargo.lock index 19acb2bc6..1733c0ad7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -413,7 +413,7 @@ dependencies = [ "time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", "tokio 0.1.22 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.2 (registry+https://github.com/rust-lang/crates.io-index)", + "url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "zstd 0.5.2+zstd.1.4.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -2591,6 +2591,7 @@ dependencies = [ "percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "pin-project-lite 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.53 (registry+https://github.com/rust-lang/crates.io-index)", "serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "tokio 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", "tokio-tls 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2891,6 +2892,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -3766,6 +3768,7 @@ dependencies = [ "idna 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e0fde10a7..f99451767 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,13 +13,15 @@ log = "0.4" regex = "1" structopt = "0.3" crates-index-diff = "7" -reqwest = { version = "0.10.6", features = ["blocking"] } # TODO: Remove blocking when async is ready -semver = "0.9" +reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready +semver = { version = "0.9", features = ["serde"] } slug = "=0.1.1" env_logger = "0.7" r2d2 = "0.8" r2d2_postgres = "0.14" -url = "1.4" +# iron needs url@1, but it reexports it as iron::url, so we can start using +# url@2 for other usecases +url = { version = "2.1.1", features = ["serde"] } badge = { path = "src/web/badge" } failure = "0.1.3" comrak = { version = "0.3", default-features = false } @@ -39,6 +41,7 @@ rustwide = "0.7.1" mime_guess = "2" dotenv = "0.15" zstd = "0.5" +git2 = { version = "0.13.6", default-features = false } # Data serialization and deserialization serde = { version = "1.0", features = ["derive"] } diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 82b861eae..985dcf265 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -32,7 +32,7 @@ pub(crate) fn add_package_into_database( default_target: &str, source_files: Option, doc_targets: Vec, - cratesio_data: &RegistryCrateData, + registry_data: &RegistryCrateData, has_docs: bool, has_examples: bool, compression_algorithms: std::collections::HashSet, @@ -87,10 +87,10 @@ pub(crate) fn add_package_into_database( &[ &crate_id, &metadata_pkg.version, - &cratesio_data.release_time.naive_utc(), + ®istry_data.release_time.naive_utc(), &serde_json::to_value(&dependencies)?, &metadata_pkg.package_name(), - &cratesio_data.yanked, + ®istry_data.yanked, &res.successful, &has_docs, &false, // TODO: Add test status somehow @@ -103,7 +103,7 @@ pub(crate) fn add_package_into_database( &serde_json::to_value(&metadata_pkg.authors)?, &serde_json::to_value(&metadata_pkg.keywords)?, &has_examples, - &cratesio_data.downloads, + ®istry_data.downloads, &source_files, &serde_json::to_value(&doc_targets)?, &is_library, @@ -117,7 +117,7 @@ pub(crate) fn add_package_into_database( add_keywords_into_database(&conn, &metadata_pkg, release_id)?; add_authors_into_database(&conn, &metadata_pkg, release_id)?; - add_owners_into_database(&conn, &cratesio_data.owners, crate_id)?; + add_owners_into_database(&conn, ®istry_data.owners, crate_id)?; add_compression_into_database(&conn, compression_algorithms.into_iter(), release_id)?; // Update the crates table with the new release diff --git a/src/docbuilder/mod.rs b/src/docbuilder/mod.rs index 0a0dac3d8..2b48f585e 100644 --- a/src/docbuilder/mod.rs +++ b/src/docbuilder/mod.rs @@ -11,6 +11,7 @@ pub(crate) use self::rustwide_builder::BuildResult; pub use self::rustwide_builder::RustwideBuilder; use crate::error::Result; +use crate::index::Index; use crate::DocBuilderOptions; use log::debug; use std::collections::BTreeSet; @@ -22,14 +23,17 @@ use std::path::PathBuf; /// chroot based documentation builder pub struct DocBuilder { options: DocBuilderOptions, + index: Index, cache: BTreeSet, db_cache: BTreeSet, } impl DocBuilder { pub fn new(options: DocBuilderOptions) -> DocBuilder { + let index = Index::new(&options.registry_index_path).expect("valid index"); DocBuilder { options, + index, cache: BTreeSet::new(), db_cache: BTreeSet::new(), } diff --git a/src/docbuilder/queue.rs b/src/docbuilder/queue.rs index b6f4b4f06..dd161c5f8 100644 --- a/src/docbuilder/queue.rs +++ b/src/docbuilder/queue.rs @@ -4,7 +4,7 @@ use super::{DocBuilder, RustwideBuilder}; use crate::db::connect_db; use crate::error::Result; use crate::utils::{add_crate_to_queue, get_crate_priority}; -use crates_index_diff::{ChangeKind, Index}; +use crates_index_diff::ChangeKind; use log::{debug, error}; impl DocBuilder { @@ -12,8 +12,7 @@ impl DocBuilder { /// Returns the number of crates added pub fn get_new_crates(&mut self) -> Result { let conn = connect_db()?; - let index = Index::from_path_or_cloned(&self.options.registry_index_path)?; - let (mut changes, oid) = index.peek_changes()?; + let (mut changes, oid) = self.index.diff().peek_changes()?; let mut crates_added = 0; // I believe this will fix ordering of queue if we get more than one crate from changes @@ -59,7 +58,7 @@ impl DocBuilder { } } - index.set_last_seen_reference(oid)?; + self.index.diff().set_last_seen_reference(oid)?; Ok(crates_added) } diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 32dcf72b2..92ab54b67 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -5,7 +5,6 @@ use crate::db::file::add_path_into_database; use crate::db::{add_build_into_database, add_package_into_database, connect_db}; use crate::docbuilder::{crates::crates_from_path, Limits}; use crate::error::Result; -use crate::index::api::RegistryCrateData; use crate::storage::CompressionAlgorithms; use crate::utils::{copy_doc_dir, parse_rustc_version, CargoMetadata}; use failure::ResultExt; @@ -397,7 +396,7 @@ impl RustwideBuilder { &res.target, files_list, successful_targets, - &RegistryCrateData::get_from_network(res.cargo_metadata.root())?, + &doc_builder.index.api().get_crate_data(name, version), has_docs, has_examples, algs, diff --git a/src/index/api.rs b/src/index/api.rs index b00bb9bad..d770e9f24 100644 --- a/src/index/api.rs +++ b/src/index/api.rs @@ -1,10 +1,12 @@ -use crate::{error::Result, utils::MetadataPackage}; use chrono::{DateTime, Utc}; use failure::err_msg; +use log::warn; use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT}; use semver::Version; -use serde_json::Value; -use std::io::Read; +use serde::Deserialize; +use url::Url; + +use crate::error::Result; const APP_USER_AGENT: &str = concat!( env!("CARGO_PKG_NAME"), @@ -12,6 +14,11 @@ const APP_USER_AGENT: &str = concat!( include_str!(concat!(env!("OUT_DIR"), "/git_version")) ); +pub(crate) struct Api { + api_base: Option, + client: reqwest::blocking::Client, +} + pub(crate) struct RegistryCrateData { pub(crate) release_time: DateTime, pub(crate) yanked: bool, @@ -26,147 +33,132 @@ pub(crate) struct CrateOwner { pub(crate) name: String, } -impl RegistryCrateData { - pub(crate) fn get_from_network(pkg: &MetadataPackage) -> Result { - let (release_time, yanked, downloads) = get_release_time_yanked_downloads(pkg)?; - let owners = get_owners(pkg)?; +impl Api { + pub(super) fn new(api_base: Option) -> Result { + let headers = vec![ + (USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)), + (ACCEPT, HeaderValue::from_static("application/json")), + ] + .into_iter() + .collect(); + + let client = reqwest::blocking::Client::builder() + .default_headers(headers) + .build()?; + + Ok(Self { api_base, client }) + } - Ok(Self { + fn api_base(&self) -> Result { + self.api_base + .clone() + .ok_or_else(|| err_msg("index is missing an api base url")) + } + + pub(crate) fn get_crate_data(&self, name: &str, version: &str) -> RegistryCrateData { + let (release_time, yanked, downloads) = self + .get_release_time_yanked_downloads(name, version) + .unwrap_or_else(|err| { + warn!("Failed to get crate data for {}-{}: {}", name, version, err); + (Utc::now(), false, 0) + }); + + let owners = self.get_owners(name).unwrap_or_else(|err| { + warn!("Failed to get owners for {}-{}: {}", name, version, err); + Vec::new() + }); + + RegistryCrateData { release_time, yanked, downloads, owners, - }) + } } -} -fn client() -> Result { - let headers = vec![ - (USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)), - (ACCEPT, HeaderValue::from_static("application/json")), - ] - .into_iter() - .collect(); + /// Get release_time, yanked and downloads from the registry's API + fn get_release_time_yanked_downloads( + &self, + name: &str, + version: &str, + ) -> Result<(DateTime, bool, i32)> { + let url = { + let mut url = self.api_base()?; + url.path_segments_mut() + .map_err(|()| err_msg("Invalid API url"))? + .extend(&["api", "v1", "crates", name, "versions"]); + url + }; + + #[derive(Deserialize)] + struct Response { + versions: Vec, + } + + #[derive(Deserialize)] + struct VersionData { + num: Version, + #[serde(default = "Utc::now")] + created_at: DateTime, + #[serde(default)] + yanked: bool, + #[serde(default)] + downloads: i32, + } - let client = reqwest::blocking::Client::builder() - .default_headers(headers) - .build()?; + let response: Response = self.client.get(url).send()?.error_for_status()?.json()?; - Ok(client) -} + let version = Version::parse(version)?; + let version = response + .versions + .into_iter() + .find(|data| data.num == version) + .ok_or_else(|| err_msg("Could not find version in response"))?; -/// Get release_time, yanked and downloads from the registry's API -fn get_release_time_yanked_downloads(pkg: &MetadataPackage) -> Result<(DateTime, bool, i32)> { - let url = format!("https://crates.io/api/v1/crates/{}/versions", pkg.name); - // FIXME: There is probably better way to do this - // and so many unwraps... - let mut res = client()?.get(&url).send()?; - - let mut body = String::new(); - res.read_to_string(&mut body)?; - - let json: Value = serde_json::from_str(&body)?; - let versions = json - .as_object() - .and_then(|o| o.get("versions")) - .and_then(|v| v.as_array()) - .ok_or_else(|| err_msg("Not a JSON object"))?; - - let (mut release_time, mut yanked, mut downloads) = (None, None, None); - - for version in versions { - let version = version - .as_object() - .ok_or_else(|| err_msg("Not a JSON object"))?; - let version_num = version - .get("num") - .and_then(|v| v.as_str()) - .ok_or_else(|| err_msg("Not a JSON object"))?; - - if Version::parse(version_num)?.to_string() == pkg.version { - let release_time_raw = version - .get("created_at") - .and_then(|c| c.as_str()) - .ok_or_else(|| err_msg("Not a JSON object"))?; - - release_time = Some( - DateTime::parse_from_str(release_time_raw, "%Y-%m-%dT%H:%M:%S%.f%:z")? - .with_timezone(&Utc), - ); - - yanked = Some( - version - .get("yanked") - .and_then(|c| c.as_bool()) - .ok_or_else(|| err_msg("Not a JSON object"))?, - ); - - downloads = Some( - version - .get("downloads") - .and_then(|c| c.as_i64()) - .ok_or_else(|| err_msg("Not a JSON object"))? as i32, - ); - - break; - } + Ok((version.created_at, version.yanked, version.downloads)) } - Ok(( - release_time.unwrap_or_else(Utc::now), - yanked.unwrap_or(false), - downloads.unwrap_or(0), - )) -} + /// Fetch owners from the registry's API + fn get_owners(&self, name: &str) -> Result> { + let url = { + let mut url = self.api_base()?; + url.path_segments_mut() + .map_err(|()| err_msg("Invalid API url"))? + .extend(&["api", "v1", "crates", name, "owners"]); + url + }; + + #[derive(Deserialize)] + struct Response { + users: Vec, + } -/// Fetch owners from the registry's API -fn get_owners(pkg: &MetadataPackage) -> Result> { - // owners available in: https://crates.io/api/v1/crates/rand/owners - let owners_url = format!("https://crates.io/api/v1/crates/{}/owners", pkg.name); - let mut res = client()?.get(&owners_url[..]).send()?; - // FIXME: There is probably better way to do this - // and so many unwraps... - let mut body = String::new(); - res.read_to_string(&mut body).unwrap(); - let json: Value = serde_json::from_str(&body[..])?; - - let owners = json - .as_object() - .and_then(|j| j.get("users")) - .and_then(|j| j.as_array()); - - let result = if let Some(owners) = owners { - owners - .iter() - .filter_map(|owner| { - fn extract<'a>(owner: &'a Value, field: &str) -> &'a str { - owner - .as_object() - .and_then(|o| o.get(field)) - .and_then(|o| o.as_str()) - .unwrap_or_default() - } - - let avatar = extract(owner, "avatar"); - let email = extract(owner, "email"); - let login = extract(owner, "login"); - let name = extract(owner, "name"); - - if login.is_empty() { - return None; - } - - Some(CrateOwner { - avatar: avatar.to_string(), - email: email.to_string(), - login: login.to_string(), - name: name.to_string(), - }) + #[derive(Deserialize)] + struct OwnerData { + #[serde(default)] + avatar: String, + #[serde(default)] + email: String, + #[serde(default)] + login: String, + #[serde(default)] + name: String, + } + + let response: Response = self.client.get(url).send()?.error_for_status()?.json()?; + + let result = response + .users + .into_iter() + .filter(|data| !data.login.is_empty()) + .map(|data| CrateOwner { + avatar: data.avatar, + email: data.email, + login: data.login, + name: data.name, }) - .collect() - } else { - Vec::new() - }; + .collect(); - Ok(result) + Ok(result) + } } diff --git a/src/index/mod.rs b/src/index/mod.rs index 65635341c..87359b0c4 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -1 +1,63 @@ +use std::path::{Path, PathBuf}; + +use url::Url; + +use self::api::Api; +use crate::error::Result; + pub(crate) mod api; + +pub(crate) struct Index { + diff: crates_index_diff::Index, + path: PathBuf, + api: Api, +} + +#[derive(serde::Deserialize, Clone)] +#[serde(rename_all = "kebab-case")] +struct IndexConfig { + dl: String, + #[serde(default)] + api: Option, + #[serde(default)] + allowed_registries: Vec, +} + +/// Inspects the given repository to find the config as specified in [RFC 2141][], assumes that the +/// repository has a remote called `origin` and that the branch `master` exists on it. +/// +/// [RFC 2141]: https://rust-lang.github.io/rfcs/2141-alternative-registries.html +fn load_config(repo: &git2::Repository) -> Result { + let tree = repo + .find_commit(repo.refname_to_id("refs/remotes/origin/master")?)? + .tree()?; + let file = tree + .get_name("config.json") + .ok_or_else(|| failure::format_err!("registry index missing config"))?; + let config = serde_json::from_slice(repo.find_blob(file.id())?.content())?; + Ok(config) +} + +impl Index { + pub(crate) fn new(path: impl AsRef) -> Result { + let path = path.as_ref().to_owned(); + let diff = crates_index_diff::Index::from_path_or_cloned(&path)?; + let config = load_config(diff.repository())?; + let api = Api::new(config.api)?; + Ok(Self { diff, path, api }) + } + + pub(crate) fn diff(&self) -> &crates_index_diff::Index { + &self.diff + } + + pub(crate) fn api(&self) -> &Api { + &self.api + } +} + +impl Clone for Index { + fn clone(&self) -> Self { + Self::new(&self.path).expect("we already loaded this registry successfully once") + } +} diff --git a/src/web/mod.rs b/src/web/mod.rs index 66c5a5188..73136fe6b 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -279,7 +279,7 @@ impl MatchSemver { /// been matched exactly, or if there has been a "correction" in the name that matched instead. fn match_version(conn: &Connection, name: &str, version: Option<&str>) -> Option { // version is an Option<&str> from router::Router::get, need to decode first - use url::percent_encoding::percent_decode; + use iron::url::percent_encoding::percent_decode; let req_version = version .and_then(|v| percent_decode(v.as_bytes()).decode_utf8().ok()) diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 36302e2d0..f1f15082e 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -57,7 +57,7 @@ pub struct RustLangRedirector { impl RustLangRedirector { pub fn new(target: &'static str) -> Self { - let url = url::Url::parse("https://doc.rust-lang.org/stable/") + let url = iron::url::Url::parse("https://doc.rust-lang.org/stable/") .expect("failed to parse rust-lang.org base URL") .join(target) .expect("failed to append crate name to rust-lang.org base URL"); @@ -75,7 +75,7 @@ impl iron::Handler for RustLangRedirector { /// Handler called for `/:crate` and `/:crate/:version` URLs. Automatically redirects to the docs /// or crate details page based on whether the given crate version was successfully built. pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { - use url::percent_encoding::percent_decode; + use iron::url::percent_encoding::percent_decode; fn redirect_to_doc( req: &Request, @@ -520,7 +520,7 @@ pub fn badge_handler(req: &mut Request) -> IronResult { Some(MatchSemver::Semver((version, _))) => { let base_url = format!("{}/{}/badge.svg", redirect_base(req), name); - let url = ctry!(url::Url::parse_with_params( + let url = ctry!(iron::url::Url::parse_with_params( &base_url, &[("version", version)] ));