From 9fd86583d6610d0af54e912f5f5643e2803eea5d Mon Sep 17 00:00:00 2001 From: Jakub Labor Date: Tue, 22 Oct 2024 09:15:22 -0400 Subject: [PATCH 1/3] Use RegistryApi in releases --- src/registry_api.rs | 89 +++++++++++++++++++++++++++++- src/utils/mod.rs | 6 --- src/web/mod.rs | 1 + src/web/releases.rs | 128 ++++++++++---------------------------------- 4 files changed, 118 insertions(+), 106 deletions(-) diff --git a/src/registry_api.rs b/src/registry_api.rs index 5d5ada084..74ee753e9 100644 --- a/src/registry_api.rs +++ b/src/registry_api.rs @@ -1,5 +1,5 @@ use crate::{error::Result, utils::retry_async}; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use chrono::{DateTime, Utc}; use reqwest::header::{HeaderValue, ACCEPT, USER_AGENT}; use semver::Version; @@ -69,6 +69,26 @@ impl fmt::Display for OwnerKind { } } +#[derive(Deserialize, Debug)] + +pub(crate) struct SearchCrate { + pub(crate) name: String, +} + +#[derive(Deserialize, Debug)] + +pub(crate) struct SearchMeta { + pub(crate) next_page: Option, + pub(crate) prev_page: Option, +} + +#[derive(Deserialize, Debug)] +pub(crate) struct Search { + pub(crate) crates: Vec, + pub(crate) meta: SearchMeta, + pub(crate) executed_query: Option, +} + impl RegistryApi { pub fn new(api_base: Url, max_retries: u32) -> Result { let headers = vec![ @@ -227,4 +247,71 @@ impl RegistryApi { Ok(result) } + + /// Fetch crates from the registry's API + pub(crate) async fn get_crates(&self, query: Option<&str>) -> Result { + #[derive(Deserialize, Debug)] + struct SearchError { + detail: String, + } + + #[derive(Deserialize, Debug)] + struct SearchResponse { + crates: Option>, + meta: Option, + errors: Option>, + } + + let url = { + let mut url = self.api_base.clone(); + url.path_segments_mut() + .map_err(|()| anyhow!("Invalid API url"))? + .extend(&["api", "v1", "crates"]); + url.set_query(query); + url + }; + + // Extract the query from the query args + let executed_query = url.query_pairs().find_map(|(key, value)| { + if key == "q" { + Some(value.to_string()) + } else { + None + } + }); + + let response: SearchResponse = retry_async( + || async { + Ok(self + .client + .get(url.clone()) + .send() + .await? + .error_for_status()?) + }, + self.max_retries, + ) + .await? + .json() + .await?; + + if let Some(errors) = response.errors { + let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect(); + bail!("got error from crates.io: {}", messages.join("\n")); + } + + let Some(crates) = response.crates else { + bail!("missing releases in crates.io response"); + }; + + let Some(meta) = response.meta else { + bail!("missing metadata in crates.io response"); + }; + + Ok(Search { + crates, + meta, + executed_query, + }) + } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 34352a483..acbeebaf9 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -31,12 +31,6 @@ pub(crate) mod sized_buffer; use std::{future::Future, thread, time::Duration}; -pub(crate) const APP_USER_AGENT: &str = concat!( - env!("CARGO_PKG_NAME"), - " ", - include_str!(concat!(env!("OUT_DIR"), "/git_version")) -); - pub(crate) fn report_error(err: &anyhow::Error) { // Debug-format for anyhow errors includes context & backtrace if std::env::var("SENTRY_DSN").is_ok() { diff --git a/src/web/mod.rs b/src/web/mod.rs index 113cdb5d5..8ce5832ce 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -424,6 +424,7 @@ async fn apply_middleware( .layer(Extension(context.service_metrics()?)) .layer(Extension(context.instance_metrics()?)) .layer(Extension(context.config()?)) + .layer(Extension(context.registry_api()?)) .layer(Extension(async_storage)) .layer(option_layer(template_data.map(Extension))) .layer(middleware::from_fn(csp::csp_middleware)) diff --git a/src/web/releases.rs b/src/web/releases.rs index 8419afeb8..560b38302 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -3,7 +3,7 @@ use crate::{ build_queue::{QueuedCrate, REBUILD_PRIORITY}, cdn, impl_axum_webpage, - utils::{report_error, retry_async}, + utils::report_error, web::{ axum_parse_uri_with_params, axum_redirect, encode_url_path, error::{AxumNope, AxumResult}, @@ -12,9 +12,9 @@ use crate::{ page::templates::{filters, RenderRegular, RenderSolid}, ReqVersion, }, - AsyncBuildQueue, Config, InstanceMetrics, + AsyncBuildQueue, Config, InstanceMetrics, RegistryApi, }; -use anyhow::{anyhow, bail, Context as _, Result}; +use anyhow::{anyhow, Context as _, Result}; use axum::{ extract::{Extension, Query}, response::{IntoResponse, Response as AxumResponse}, @@ -23,14 +23,13 @@ use base64::{engine::general_purpose::STANDARD as b64, Engine}; use chrono::{DateTime, Utc}; use futures_util::stream::TryStreamExt; use itertools::Itertools; -use once_cell::sync::Lazy; use rinja::Template; use serde::{Deserialize, Serialize}; use sqlx::Row; use std::collections::{BTreeMap, HashMap, HashSet}; use std::str; use std::sync::Arc; -use tracing::{debug, warn}; +use tracing::warn; use url::form_urlencoded; use super::cache::CachePolicy; @@ -143,85 +142,14 @@ struct SearchResult { /// This delegates to the crates.io search API. async fn get_search_results( conn: &mut sqlx::PgConnection, - config: &Config, - query_params: &str, + registry: &RegistryApi, + query_params: Option<&str>, ) -> Result { - #[derive(Deserialize)] - struct CratesIoError { - detail: String, - } - #[derive(Deserialize)] - struct CratesIoSearchResult { - crates: Option>, - meta: Option, - errors: Option>, - } - #[derive(Deserialize, Debug)] - struct CratesIoCrate { - name: String, - } - #[derive(Deserialize, Debug)] - struct CratesIoMeta { - next_page: Option, - prev_page: Option, - } - - use crate::utils::APP_USER_AGENT; - use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, USER_AGENT}; - use reqwest::Client as HttpClient; - - static HTTP_CLIENT: Lazy = Lazy::new(|| { - let mut headers = HeaderMap::new(); - headers.insert(USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)); - headers.insert(ACCEPT, HeaderValue::from_static("application/json")); - HttpClient::builder() - .default_headers(headers) - .build() - .unwrap() - }); - - let url = config - .registry_api_host - .join(&format!("api/v1/crates{query_params}"))?; - debug!("fetching search results from {}", url); - - // extract the query from the query args. - // This is easier because the query might have been encoded in the bash64-encoded - // paginate parameter. - let executed_query = url.query_pairs().find_map(|(key, value)| { - if key == "q" { - Some(value.to_string()) - } else { - None - } - }); - - let response: CratesIoSearchResult = retry_async( - || async { - Ok(HTTP_CLIENT - .get(url.clone()) - .send() - .await? - .error_for_status()?) - }, - config.crates_io_api_call_retries, - ) - .await? - .json() - .await?; - - if let Some(errors) = response.errors { - let messages: Vec<_> = errors.into_iter().map(|e| e.detail).collect(); - bail!("got error from crates.io: {}", messages.join("\n")); - } - - let Some(crates) = response.crates else { - bail!("missing releases in crates.io response"); - }; - - let Some(meta) = response.meta else { - bail!("missing metadata in crates.io response"); - }; + let crate::registry_api::Search { + crates, + meta, + executed_query, + } = registry.get_crates(query_params).await?; let names = Arc::new( crates @@ -574,6 +502,7 @@ impl_axum_webpage! { pub(crate) async fn search_handler( mut conn: DbConnection, Extension(config): Extension>, + Extension(registry): Extension>, Extension(metrics): Extension>, Query(mut params): Query>, ) -> AxumResult { @@ -646,20 +575,21 @@ pub(crate) async fn search_handler( AxumNope::NoResults })?; let query_params = String::from_utf8_lossy(&decoded); - - if !query_params.starts_with('?') { - // sometimes we see plain bytes being passed to `paginate`. - // In these cases we just return `NoResults` and don't call - // the crates.io API. - // The whole point of the `paginate` design is that we don't - // know anything about the pagination args and crates.io can - // change them as they wish, so we cannot do any more checks here. - warn!( - "didn't get query args in `paginate` arguments for search: \"{}\"", - query_params - ); - return Err(AxumNope::NoResults); - } + let query_params = match query_params.strip_prefix('?') { + Some(query_params) => query_params, + None => { + // sometimes we see plain bytes being passed to `paginate`. + // In these cases we just return `NoResults` and don't call + // the crates.io API. + // The whole point of the `paginate` design is that we don't + // know anything about the pagination args and crates.io can + // change them as they wish, so we cannot do any more checks here. + warn!( + "didn't get query args in `paginate` arguments for search: \"{query_params}\"" + ); + return Err(AxumNope::NoResults); + } + }; let mut p = form_urlencoded::parse(query_params.as_bytes()); if let Some(v) = p.find_map(|(k, v)| { @@ -672,7 +602,7 @@ pub(crate) async fn search_handler( sort_by = v; }; - get_search_results(&mut conn, &config, &query_params).await? + get_search_results(&mut conn, ®istry, Some(query_params)).await? } else if !query.is_empty() { let query_params: String = form_urlencoded::Serializer::new(String::new()) .append_pair("q", &query) @@ -680,7 +610,7 @@ pub(crate) async fn search_handler( .append_pair("per_page", &RELEASES_IN_RELEASES.to_string()) .finish(); - get_search_results(&mut conn, &config, &format!("?{}", &query_params)).await? + get_search_results(&mut conn, ®istry, Some(&query_params)).await? } else { return Err(AxumNope::NoResults); }; From 64a3096338fce7c19265acd4592e8b50fac0f77b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=81abor?= Date: Wed, 23 Oct 2024 23:12:36 -0400 Subject: [PATCH 2/3] Remove optional query params --- src/registry_api.rs | 4 ++-- src/web/releases.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/registry_api.rs b/src/registry_api.rs index 74ee753e9..5d65a052e 100644 --- a/src/registry_api.rs +++ b/src/registry_api.rs @@ -249,7 +249,7 @@ impl RegistryApi { } /// Fetch crates from the registry's API - pub(crate) async fn get_crates(&self, query: Option<&str>) -> Result { + pub(crate) async fn search(&self, query_params: &str) -> Result { #[derive(Deserialize, Debug)] struct SearchError { detail: String, @@ -267,7 +267,7 @@ impl RegistryApi { url.path_segments_mut() .map_err(|()| anyhow!("Invalid API url"))? .extend(&["api", "v1", "crates"]); - url.set_query(query); + url.set_query(Some(query_params)); url }; diff --git a/src/web/releases.rs b/src/web/releases.rs index 560b38302..f37b2de40 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -143,13 +143,13 @@ struct SearchResult { async fn get_search_results( conn: &mut sqlx::PgConnection, registry: &RegistryApi, - query_params: Option<&str>, + query_params: &str, ) -> Result { let crate::registry_api::Search { crates, meta, executed_query, - } = registry.get_crates(query_params).await?; + } = registry.search(query_params).await?; let names = Arc::new( crates @@ -602,7 +602,7 @@ pub(crate) async fn search_handler( sort_by = v; }; - get_search_results(&mut conn, ®istry, Some(query_params)).await? + get_search_results(&mut conn, ®istry, query_params).await? } else if !query.is_empty() { let query_params: String = form_urlencoded::Serializer::new(String::new()) .append_pair("q", &query) @@ -610,7 +610,7 @@ pub(crate) async fn search_handler( .append_pair("per_page", &RELEASES_IN_RELEASES.to_string()) .finish(); - get_search_results(&mut conn, ®istry, Some(&query_params)).await? + get_search_results(&mut conn, ®istry, &query_params).await? } else { return Err(AxumNope::NoResults); }; From 43445bf6fb6bd58d27d356049f17334a9ade6776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20=C5=81abor?= Date: Thu, 24 Oct 2024 00:42:22 -0400 Subject: [PATCH 3/3] Extract executed_query before search --- src/registry_api.rs | 16 +----------- src/web/releases.rs | 63 ++++++++++++++++----------------------------- 2 files changed, 23 insertions(+), 56 deletions(-) diff --git a/src/registry_api.rs b/src/registry_api.rs index 5d65a052e..9e55898fe 100644 --- a/src/registry_api.rs +++ b/src/registry_api.rs @@ -86,7 +86,6 @@ pub(crate) struct SearchMeta { pub(crate) struct Search { pub(crate) crates: Vec, pub(crate) meta: SearchMeta, - pub(crate) executed_query: Option, } impl RegistryApi { @@ -271,15 +270,6 @@ impl RegistryApi { url }; - // Extract the query from the query args - let executed_query = url.query_pairs().find_map(|(key, value)| { - if key == "q" { - Some(value.to_string()) - } else { - None - } - }); - let response: SearchResponse = retry_async( || async { Ok(self @@ -308,10 +298,6 @@ impl RegistryApi { bail!("missing metadata in crates.io response"); }; - Ok(Search { - crates, - meta, - executed_query, - }) + Ok(Search { crates, meta }) } } diff --git a/src/web/releases.rs b/src/web/releases.rs index f37b2de40..e9c332169 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -132,7 +132,6 @@ pub(crate) async fn get_releases( struct SearchResult { pub results: Vec, - pub executed_query: Option, pub prev_page: Option, pub next_page: Option, } @@ -145,11 +144,7 @@ async fn get_search_results( registry: &RegistryApi, query_params: &str, ) -> Result { - let crate::registry_api::Search { - crates, - meta, - executed_query, - } = registry.search(query_params).await?; + let crate::registry_api::Search { crates, meta } = registry.search(query_params).await?; let names = Arc::new( crates @@ -220,7 +215,6 @@ async fn get_search_results( .filter_map(|name| crates.get(name)) .cloned() .collect(), - executed_query, prev_page: meta.prev_page, next_page: meta.next_page, }) @@ -506,7 +500,7 @@ pub(crate) async fn search_handler( Extension(metrics): Extension>, Query(mut params): Query>, ) -> AxumResult { - let query = params + let mut query = params .get("query") .map(|q| q.to_string()) .unwrap_or_else(|| "".to_string()); @@ -568,39 +562,28 @@ pub(crate) async fn search_handler( let search_result = if let Some(paginate) = params.get("paginate") { let decoded = b64.decode(paginate.as_bytes()).map_err(|e| { - warn!( - "error when decoding pagination base64 string \"{}\": {:?}", - paginate, e - ); + warn!("error when decoding pagination base64 string \"{paginate}\": {e:?}"); AxumNope::NoResults })?; let query_params = String::from_utf8_lossy(&decoded); - let query_params = match query_params.strip_prefix('?') { - Some(query_params) => query_params, - None => { - // sometimes we see plain bytes being passed to `paginate`. - // In these cases we just return `NoResults` and don't call - // the crates.io API. - // The whole point of the `paginate` design is that we don't - // know anything about the pagination args and crates.io can - // change them as they wish, so we cannot do any more checks here. - warn!( - "didn't get query args in `paginate` arguments for search: \"{query_params}\"" - ); - return Err(AxumNope::NoResults); - } - }; + let query_params = query_params.strip_prefix('?').ok_or_else(|| { + // sometimes we see plain bytes being passed to `paginate`. + // In these cases we just return `NoResults` and don't call + // the crates.io API. + // The whole point of the `paginate` design is that we don't + // know anything about the pagination args and crates.io can + // change them as they wish, so we cannot do any more checks here. + warn!("didn't get query args in `paginate` arguments for search: \"{query_params}\""); + AxumNope::NoResults + })?; - let mut p = form_urlencoded::parse(query_params.as_bytes()); - if let Some(v) = p.find_map(|(k, v)| { - if &k == "sort" { - Some(v.to_string()) - } else { - None + for (k, v) in form_urlencoded::parse(query_params.as_bytes()) { + match &*k { + "q" => query = v.to_string(), + "sort" => sort_by = v.to_string(), + _ => {} } - }) { - sort_by = v; - }; + } get_search_results(&mut conn, ®istry, query_params).await? } else if !query.is_empty() { @@ -615,18 +598,16 @@ pub(crate) async fn search_handler( return Err(AxumNope::NoResults); }; - let executed_query = search_result.executed_query.unwrap_or_default(); - let title = if search_result.results.is_empty() { - format!("No results found for '{executed_query}'") + format!("No results found for '{query}'") } else { - format!("Search results for '{executed_query}'") + format!("Search results for '{query}'") }; Ok(Search { title, releases: search_result.results, - search_query: Some(executed_query), + search_query: Some(query), search_sort_by: Some(sort_by), next_page_link: search_result .next_page