From b402c58934d7369f6e53aedc605881b63053359f Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 13 Mar 2025 10:21:40 +0100 Subject: [PATCH 1/4] Make `semver_ord()` SQL fn usable in `diesel` queries --- crates/crates_io_diesel_helpers/src/fns.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/crates_io_diesel_helpers/src/fns.rs b/crates/crates_io_diesel_helpers/src/fns.rs index ae137d8ecb8..ace831c8ffd 100644 --- a/crates/crates_io_diesel_helpers/src/fns.rs +++ b/crates/crates_io_diesel_helpers/src/fns.rs @@ -14,3 +14,4 @@ define_sql_function!(fn floor(x: Double) -> Integer); define_sql_function!(fn greatest(x: T, y: T) -> T); define_sql_function!(fn least(x: T, y: T) -> T); define_sql_function!(fn split_part(string: Text, delimiter: Text, n: Integer) -> Text); +define_sql_function!(fn semver_ord(num: Text) -> Nullable); From 859488be2a42fb62a18c20a1a61fb5b1dc429697 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 13 Mar 2025 10:23:09 +0100 Subject: [PATCH 2/4] pagination: Clone values between variant structs This allows us to use `String` in seek variants. The `.clone()` call isn't ideal, and shouldn't be necessary, but rewriting the `seek!()` macro was out of scope for this effort. --- src/controllers/helpers/pagination.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 5ce90c882e8..094a613bff9 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -461,7 +461,7 @@ macro_rules! seek { impl From<[<$variant Helper>]> for $variant { fn from(helper: [<$variant Helper>]) -> Self { let [<$variant Helper>]($($field,)*) = helper; - Self { $($field,)* } + Self { $($field: $field.clone(),)* } } } @@ -470,7 +470,7 @@ macro_rules! seek { where S: serde::Serializer, { - let helper = [<$variant Helper>]($(self.$field,)*); + let helper = [<$variant Helper>]($(self.$field.clone(),)*); serde::Serialize::serialize(&helper, serializer) } } From 7a9364d1775d992f51456f8d9c9f68437a4f0f5f Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 13 Mar 2025 10:24:00 +0100 Subject: [PATCH 3/4] controllers/krate/versions: Use `semver_ord` column to sort by semver in the database --- src/controllers/krate/versions.rs | 170 +++++++++-------------- src/tests/routes/crates/versions/list.rs | 4 +- 2 files changed, 69 insertions(+), 105 deletions(-) diff --git a/src/controllers/krate/versions.rs b/src/controllers/krate/versions.rs index 5b665e5d71c..23a16cf133a 100644 --- a/src/controllers/krate/versions.rs +++ b/src/controllers/krate/versions.rs @@ -14,6 +14,7 @@ use crate::views::EncodableVersion; use axum::Json; use axum::extract::FromRequestParts; use axum_extra::extract::Query; +use crates_io_diesel_helpers::semver_ord; use diesel::dsl::not; use diesel::prelude::*; use diesel_async::{AsyncPgConnection, RunQueryDsl}; @@ -234,9 +235,6 @@ async fn list_by_date( /// Seek-based pagination of versions by semver /// -/// Unfortunately, Heroku Postgres has no support for the semver PG extension. -/// Therefore, we need to perform both sorting and pagination manually on the server. -/// /// # Panics /// /// This function will panic if `option` is built with `enable_pages` set to true. @@ -249,131 +247,93 @@ async fn list_by_semver( ) -> AppResult { use seek::*; - let include = params.include()?; - let mut query = versions::table - .filter(versions::crate_id.eq(crate_id)) - .into_boxed(); - - if !params.nums.is_empty() { - query = query.filter(versions::num.eq_any(params.nums.iter().map(|s| s.as_str()))); - } - - let (data, total, release_tracks) = if let Some(options) = options { - // Since versions will only increase in the future and both sorting and pagination need to - // happen on the app server, implementing it with fetching only the data needed for sorting - // and pagination, then making another query for the data to respond with, would minimize - // payload and memory usage. This way, we can utilize the sorted map and enrich it later - // without sorting twice. - // Sorting by semver but opted for id as the seek key because num can be quite lengthy, - // while id values are significantly smaller. + let make_base_query = || { + let mut query = versions::table + .filter(versions::crate_id.eq(crate_id)) + .left_outer_join(users::table) + .select(<(Version, Option)>::as_select()) + .into_boxed(); - let mut sorted_versions = IndexMap::new(); + if !params.nums.is_empty() { + query = query.filter(versions::num.eq_any(params.nums.iter().map(|s| s.as_str()))); + } query - .select((versions::id, versions::num, versions::yanked)) - .load_stream::<(i32, String, bool)>(conn) - .await? - .try_for_each(|(id, num, yanked)| { - let semver = semver::Version::parse(&num).ok(); - sorted_versions.insert(id, (semver, yanked, None)); - future::ready(Ok(())) - }) - .await?; + }; - sorted_versions - .sort_unstable_by(|_, (semver_a, _, _), _, (semver_b, _, _)| semver_b.cmp(semver_a)); + let mut query = make_base_query(); + if let Some(options) = options { assert!( !matches!(&options.page, Page::Numeric(_)), "?page= is not supported" ); - - let release_tracks = include.release_tracks.then(|| { - ReleaseTracks::from_sorted_semver_iter( - sorted_versions - .values() - .filter(|(_, yanked, _)| !yanked) - .filter_map(|(semver, _, _)| semver.as_ref()), + if let Some(SeekPayload::Semver(Semver { num, id })) = Seek::Semver.after(&options.page)? { + query = query.filter( + versions::semver_ord + .eq(semver_ord(num.clone())) + .and(versions::id.lt(id)) + .or(versions::semver_ord.lt(semver_ord(num))), ) - }); - - let mut idx = Some(0); - if let Some(SeekPayload::Semver(Semver { id })) = Seek::Semver.after(&options.page)? { - idx = sorted_versions - .get_index_of(&id) - .filter(|i| i + 1 < sorted_versions.len()) - .map(|i| i + 1); } - if let Some(start) = idx { - let end = (start + options.per_page as usize).min(sorted_versions.len()); - let ids = sorted_versions[start..end] - .keys() - .cloned() - .collect::>(); + query = query.limit(options.per_page); + } + + query = query.order((versions::semver_ord.desc(), versions::id.desc())); + + let data: Vec<(Version, Option)> = query.load(conn).await?; + let mut next_page = None; + if let Some(options) = options { + next_page = next_seek_params(&data, options, |last| Seek::Semver.to_payload(last))? + .map(|p| req.query_with_params(p)); + }; + + let release_tracks = if params.include()?.release_tracks { + let mut sorted_versions = IndexSet::new(); + if options.is_some() { versions::table .filter(versions::crate_id.eq(crate_id)) - .left_outer_join(users::table) - .select(<(Version, Option)>::as_select()) - .filter(versions::id.eq_any(ids)) - .load_stream::<(Version, Option)>(conn) + .filter(not(versions::yanked)) + .select(versions::num) + .load_stream::(conn) .await? - .try_for_each(|row| { - // The versions are already sorted, and we only need to enrich the fetched rows into them. - // Therefore, other values can now be safely ignored. - sorted_versions - .entry(row.0.id) - .and_modify(|entry| *entry = (None, false, Some(row))); - + .try_for_each(|num| { + if let Ok(semver) = semver::Version::parse(&num) { + sorted_versions.insert(semver); + }; future::ready(Ok(())) }) .await?; - - let len = sorted_versions.len(); - ( - sorted_versions - .into_values() - .filter_map(|(_, _, v)| v) - .collect(), - len, - release_tracks, - ) } else { - (vec![], 0, release_tracks) + sorted_versions = data + .iter() + .flat_map(|(version, _)| { + (!version.yanked) + .then_some(version) + .and_then(|v| semver::Version::parse(&v.num).ok()) + }) + .collect(); } + + sorted_versions.sort_unstable_by(|a, b| b.cmp(a)); + Some(ReleaseTracks::from_sorted_semver_iter( + sorted_versions.iter(), + )) } else { - let mut data = IndexMap::new(); - query - .left_outer_join(users::table) - .select(<(Version, Option)>::as_select()) - .load_stream::<(Version, Option)>(conn) - .await? - .try_for_each(|row| { - if let Ok(semver) = semver::Version::parse(&row.0.num) { - data.insert(semver, row); - }; - future::ready(Ok(())) - }) - .await?; - data.sort_unstable_by(|a, _, b, _| b.cmp(a)); - let total = data.len(); - let release_tracks = include.release_tracks.then(|| { - ReleaseTracks::from_sorted_semver_iter( - data.iter() - .flat_map(|(semver, (version, _))| (!version.yanked).then_some(semver)), - ) - }); - (data.into_values().collect(), total, release_tracks) + None }; - let mut next_page = None; - if let Some(options) = options { - next_page = next_seek_params(&data, options, |last| Seek::Semver.to_payload(last))? - .map(|p| req.query_with_params(p)) + // Since the total count is retrieved through an additional query, to maintain consistency + // with other pagination methods, we only make a count query while data is not empty. + let total = if !data.is_empty() { + make_base_query().count().get_result(conn).await? + } else { + 0 }; Ok(PaginatedVersionsAndPublishers { data, meta: ResponseMeta { - total: total as i64, + total, next_page, release_tracks, }, @@ -392,6 +352,7 @@ mod seek { seek!( pub enum Seek { Semver { + num: String, id: i32, }, Date { @@ -406,7 +367,10 @@ mod seek { pub(crate) fn to_payload(&self, record: &(Version, Option)) -> SeekPayload { let (Version { id, created_at, .. }, _) = *record; match *self { - Seek::Semver => SeekPayload::Semver(Semver { id }), + Seek::Semver => SeekPayload::Semver(Semver { + num: record.0.num.clone(), + id, + }), Seek::Date => SeekPayload::Date(Date { created_at, id }), } } diff --git a/src/tests/routes/crates/versions/list.rs b/src/tests/routes/crates/versions/list.rs index 6657145015c..0d36a4d0382 100644 --- a/src/tests/routes/crates/versions/list.rs +++ b/src/tests/routes/crates/versions/list.rs @@ -291,9 +291,9 @@ async fn test_seek_based_pagination_semver_sorting() -> anyhow::Result<()> { assert_eq!(json.meta.total as usize, expects.len()); assert_eq!(json.meta.release_tracks, None); - // A decodable seek value, MTAwCg (100), but doesn't actually exist + // A decodable seek value, WyIwLjAuMCIsMTAwXQ (["0.0.0",100]), but doesn't actually exist let json: VersionList = anon - .get_with_query(url, "per_page=10&sort=semver&seek=MTAwCg") + .get_with_query(url, "per_page=10&sort=semver&seek=WyIwLjAuMCIsMTAwXQ") .await .good(); assert_eq!(json.versions.len(), 0); From 9d65ca381ff396131c825ef51cc19f8e6d7149b0 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 13 Mar 2025 10:33:23 +0100 Subject: [PATCH 4/4] controllers/krate/versions: Merge `list_by_date()` and `list_by_semver()` fns --- src/controllers/krate/versions.rs | 158 +++++++----------------------- 1 file changed, 34 insertions(+), 124 deletions(-) diff --git a/src/controllers/krate/versions.rs b/src/controllers/krate/versions.rs index 23a16cf133a..96f6d2cc7db 100644 --- a/src/controllers/krate/versions.rs +++ b/src/controllers/krate/versions.rs @@ -99,13 +99,8 @@ pub async fn list_versions( None => None, }; - // Sort by semver by default - let versions_and_publishers = match ¶ms.sort.as_ref().map(|s| s.to_lowercase()).as_deref() { - Some("date") => { - list_by_date(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await? - } - _ => list_by_semver(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await?, - }; + let versions_and_publishers = + list(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await?; let versions = versions_and_publishers .data @@ -126,12 +121,12 @@ pub async fn list_versions( })) } -/// Seek-based pagination of versions by date +/// Seek-based pagination of versions /// /// # Panics /// -/// This function will panic if `option` is built with `enable_pages` set to true. -async fn list_by_date( +/// This function will panic if `options` is built with `enable_pages` set to true. +async fn list( crate_id: i32, options: Option<&PaginationOptions>, params: &ListQueryParams, @@ -140,113 +135,11 @@ async fn list_by_date( ) -> AppResult { use seek::*; - let make_base_query = || { - let mut query = versions::table - .filter(versions::crate_id.eq(crate_id)) - .left_outer_join(users::table) - .select(<(Version, Option)>::as_select()) - .into_boxed(); - - if !params.nums.is_empty() { - query = query.filter(versions::num.eq_any(params.nums.iter().map(|s| s.as_str()))); - } - query + let seek = match ¶ms.sort.as_ref().map(|s| s.to_lowercase()).as_deref() { + Some("date") => Seek::Date, + _ => Seek::Semver, }; - let mut query = make_base_query(); - - if let Some(options) = options { - assert!( - !matches!(&options.page, Page::Numeric(_)), - "?page= is not supported" - ); - if let Some(SeekPayload::Date(Date { created_at, id })) = Seek::Date.after(&options.page)? { - query = query.filter( - versions::created_at - .eq(created_at) - .and(versions::id.lt(id)) - .or(versions::created_at.lt(created_at)), - ) - } - query = query.limit(options.per_page); - } - - query = query.order((versions::created_at.desc(), versions::id.desc())); - - let data: Vec<(Version, Option)> = query.load(conn).await?; - let mut next_page = None; - if let Some(options) = options { - next_page = next_seek_params(&data, options, |last| Seek::Date.to_payload(last))? - .map(|p| req.query_with_params(p)); - }; - - let release_tracks = if params.include()?.release_tracks { - let mut sorted_versions = IndexSet::new(); - if options.is_some() { - versions::table - .filter(versions::crate_id.eq(crate_id)) - .filter(not(versions::yanked)) - .select(versions::num) - .load_stream::(conn) - .await? - .try_for_each(|num| { - if let Ok(semver) = semver::Version::parse(&num) { - sorted_versions.insert(semver); - }; - future::ready(Ok(())) - }) - .await?; - } else { - sorted_versions = data - .iter() - .flat_map(|(version, _)| { - (!version.yanked) - .then_some(version) - .and_then(|v| semver::Version::parse(&v.num).ok()) - }) - .collect(); - } - - sorted_versions.sort_unstable_by(|a, b| b.cmp(a)); - Some(ReleaseTracks::from_sorted_semver_iter( - sorted_versions.iter(), - )) - } else { - None - }; - - // Since the total count is retrieved through an additional query, to maintain consistency - // with other pagination methods, we only make a count query while data is not empty. - let total = if !data.is_empty() { - make_base_query().count().get_result(conn).await? - } else { - 0 - }; - - Ok(PaginatedVersionsAndPublishers { - data, - meta: ResponseMeta { - total, - next_page, - release_tracks, - }, - }) -} - -/// Seek-based pagination of versions by semver -/// -/// # Panics -/// -/// This function will panic if `option` is built with `enable_pages` set to true. -async fn list_by_semver( - crate_id: i32, - options: Option<&PaginationOptions>, - params: &ListQueryParams, - req: &Parts, - conn: &mut AsyncPgConnection, -) -> AppResult { - use seek::*; - let make_base_query = || { let mut query = versions::table .filter(versions::crate_id.eq(crate_id)) @@ -267,23 +160,40 @@ async fn list_by_semver( !matches!(&options.page, Page::Numeric(_)), "?page= is not supported" ); - if let Some(SeekPayload::Semver(Semver { num, id })) = Seek::Semver.after(&options.page)? { - query = query.filter( - versions::semver_ord - .eq(semver_ord(num.clone())) - .and(versions::id.lt(id)) - .or(versions::semver_ord.lt(semver_ord(num))), - ) + + match seek.after(&options.page)? { + Some(SeekPayload::Date(Date { created_at, id })) => { + query = query.filter( + versions::created_at + .eq(created_at) + .and(versions::id.lt(id)) + .or(versions::created_at.lt(created_at)), + ) + } + Some(SeekPayload::Semver(Semver { num, id })) => { + query = query.filter( + versions::semver_ord + .eq(semver_ord(num.clone())) + .and(versions::id.lt(id)) + .or(versions::semver_ord.lt(semver_ord(num))), + ) + } + None => {} } + query = query.limit(options.per_page); } - query = query.order((versions::semver_ord.desc(), versions::id.desc())); + if seek == Seek::Date { + query = query.order((versions::created_at.desc(), versions::id.desc())); + } else { + query = query.order((versions::semver_ord.desc(), versions::id.desc())); + } let data: Vec<(Version, Option)> = query.load(conn).await?; let mut next_page = None; if let Some(options) = options { - next_page = next_seek_params(&data, options, |last| Seek::Semver.to_payload(last))? + next_page = next_seek_params(&data, options, |last| seek.to_payload(last))? .map(|p| req.query_with_params(p)); };