diff --git a/crates/crates_io_diesel_helpers/src/fns.rs b/crates/crates_io_diesel_helpers/src/fns.rs index ae137d8ecb8..ace831c8ffd 100644 --- a/crates/crates_io_diesel_helpers/src/fns.rs +++ b/crates/crates_io_diesel_helpers/src/fns.rs @@ -14,3 +14,4 @@ define_sql_function!(fn floor(x: Double) -> Integer); define_sql_function!(fn greatest(x: T, y: T) -> T); define_sql_function!(fn least(x: T, y: T) -> T); define_sql_function!(fn split_part(string: Text, delimiter: Text, n: Integer) -> Text); +define_sql_function!(fn semver_ord(num: Text) -> Nullable); diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 5ce90c882e8..094a613bff9 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -461,7 +461,7 @@ macro_rules! seek { impl From<[<$variant Helper>]> for $variant { fn from(helper: [<$variant Helper>]) -> Self { let [<$variant Helper>]($($field,)*) = helper; - Self { $($field,)* } + Self { $($field: $field.clone(),)* } } } @@ -470,7 +470,7 @@ macro_rules! seek { where S: serde::Serializer, { - let helper = [<$variant Helper>]($(self.$field,)*); + let helper = [<$variant Helper>]($(self.$field.clone(),)*); serde::Serialize::serialize(&helper, serializer) } } diff --git a/src/controllers/krate/versions.rs b/src/controllers/krate/versions.rs index 5b665e5d71c..96f6d2cc7db 100644 --- a/src/controllers/krate/versions.rs +++ b/src/controllers/krate/versions.rs @@ -14,6 +14,7 @@ use crate::views::EncodableVersion; use axum::Json; use axum::extract::FromRequestParts; use axum_extra::extract::Query; +use crates_io_diesel_helpers::semver_ord; use diesel::dsl::not; use diesel::prelude::*; use diesel_async::{AsyncPgConnection, RunQueryDsl}; @@ -98,13 +99,8 @@ pub async fn list_versions( None => None, }; - // Sort by semver by default - let versions_and_publishers = match ¶ms.sort.as_ref().map(|s| s.to_lowercase()).as_deref() { - Some("date") => { - list_by_date(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await? - } - _ => list_by_semver(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await?, - }; + let versions_and_publishers = + list(crate_id, pagination.as_ref(), ¶ms, &req, &mut conn).await?; let versions = versions_and_publishers .data @@ -125,12 +121,12 @@ pub async fn list_versions( })) } -/// Seek-based pagination of versions by date +/// Seek-based pagination of versions /// /// # Panics /// -/// This function will panic if `option` is built with `enable_pages` set to true. -async fn list_by_date( +/// This function will panic if `options` is built with `enable_pages` set to true. +async fn list( crate_id: i32, options: Option<&PaginationOptions>, params: &ListQueryParams, @@ -139,6 +135,11 @@ async fn list_by_date( ) -> AppResult { use seek::*; + let seek = match ¶ms.sort.as_ref().map(|s| s.to_lowercase()).as_deref() { + Some("date") => Seek::Date, + _ => Seek::Semver, + }; + let make_base_query = || { let mut query = versions::table .filter(versions::crate_id.eq(crate_id)) @@ -159,23 +160,40 @@ async fn list_by_date( !matches!(&options.page, Page::Numeric(_)), "?page= is not supported" ); - if let Some(SeekPayload::Date(Date { created_at, id })) = Seek::Date.after(&options.page)? { - query = query.filter( - versions::created_at - .eq(created_at) - .and(versions::id.lt(id)) - .or(versions::created_at.lt(created_at)), - ) + + match seek.after(&options.page)? { + Some(SeekPayload::Date(Date { created_at, id })) => { + query = query.filter( + versions::created_at + .eq(created_at) + .and(versions::id.lt(id)) + .or(versions::created_at.lt(created_at)), + ) + } + Some(SeekPayload::Semver(Semver { num, id })) => { + query = query.filter( + versions::semver_ord + .eq(semver_ord(num.clone())) + .and(versions::id.lt(id)) + .or(versions::semver_ord.lt(semver_ord(num))), + ) + } + None => {} } + query = query.limit(options.per_page); } - query = query.order((versions::created_at.desc(), versions::id.desc())); + if seek == Seek::Date { + query = query.order((versions::created_at.desc(), versions::id.desc())); + } else { + query = query.order((versions::semver_ord.desc(), versions::id.desc())); + } let data: Vec<(Version, Option)> = query.load(conn).await?; let mut next_page = None; if let Some(options) = options { - next_page = next_seek_params(&data, options, |last| Seek::Date.to_payload(last))? + next_page = next_seek_params(&data, options, |last| seek.to_payload(last))? .map(|p| req.query_with_params(p)); }; @@ -232,154 +250,6 @@ async fn list_by_date( }) } -/// Seek-based pagination of versions by semver -/// -/// Unfortunately, Heroku Postgres has no support for the semver PG extension. -/// Therefore, we need to perform both sorting and pagination manually on the server. -/// -/// # Panics -/// -/// This function will panic if `option` is built with `enable_pages` set to true. -async fn list_by_semver( - crate_id: i32, - options: Option<&PaginationOptions>, - params: &ListQueryParams, - req: &Parts, - conn: &mut AsyncPgConnection, -) -> AppResult { - use seek::*; - - let include = params.include()?; - let mut query = versions::table - .filter(versions::crate_id.eq(crate_id)) - .into_boxed(); - - if !params.nums.is_empty() { - query = query.filter(versions::num.eq_any(params.nums.iter().map(|s| s.as_str()))); - } - - let (data, total, release_tracks) = if let Some(options) = options { - // Since versions will only increase in the future and both sorting and pagination need to - // happen on the app server, implementing it with fetching only the data needed for sorting - // and pagination, then making another query for the data to respond with, would minimize - // payload and memory usage. This way, we can utilize the sorted map and enrich it later - // without sorting twice. - // Sorting by semver but opted for id as the seek key because num can be quite lengthy, - // while id values are significantly smaller. - - let mut sorted_versions = IndexMap::new(); - query - .select((versions::id, versions::num, versions::yanked)) - .load_stream::<(i32, String, bool)>(conn) - .await? - .try_for_each(|(id, num, yanked)| { - let semver = semver::Version::parse(&num).ok(); - sorted_versions.insert(id, (semver, yanked, None)); - future::ready(Ok(())) - }) - .await?; - - sorted_versions - .sort_unstable_by(|_, (semver_a, _, _), _, (semver_b, _, _)| semver_b.cmp(semver_a)); - - assert!( - !matches!(&options.page, Page::Numeric(_)), - "?page= is not supported" - ); - - let release_tracks = include.release_tracks.then(|| { - ReleaseTracks::from_sorted_semver_iter( - sorted_versions - .values() - .filter(|(_, yanked, _)| !yanked) - .filter_map(|(semver, _, _)| semver.as_ref()), - ) - }); - - let mut idx = Some(0); - if let Some(SeekPayload::Semver(Semver { id })) = Seek::Semver.after(&options.page)? { - idx = sorted_versions - .get_index_of(&id) - .filter(|i| i + 1 < sorted_versions.len()) - .map(|i| i + 1); - } - if let Some(start) = idx { - let end = (start + options.per_page as usize).min(sorted_versions.len()); - let ids = sorted_versions[start..end] - .keys() - .cloned() - .collect::>(); - versions::table - .filter(versions::crate_id.eq(crate_id)) - .left_outer_join(users::table) - .select(<(Version, Option)>::as_select()) - .filter(versions::id.eq_any(ids)) - .load_stream::<(Version, Option)>(conn) - .await? - .try_for_each(|row| { - // The versions are already sorted, and we only need to enrich the fetched rows into them. - // Therefore, other values can now be safely ignored. - sorted_versions - .entry(row.0.id) - .and_modify(|entry| *entry = (None, false, Some(row))); - - future::ready(Ok(())) - }) - .await?; - - let len = sorted_versions.len(); - ( - sorted_versions - .into_values() - .filter_map(|(_, _, v)| v) - .collect(), - len, - release_tracks, - ) - } else { - (vec![], 0, release_tracks) - } - } else { - let mut data = IndexMap::new(); - query - .left_outer_join(users::table) - .select(<(Version, Option)>::as_select()) - .load_stream::<(Version, Option)>(conn) - .await? - .try_for_each(|row| { - if let Ok(semver) = semver::Version::parse(&row.0.num) { - data.insert(semver, row); - }; - future::ready(Ok(())) - }) - .await?; - data.sort_unstable_by(|a, _, b, _| b.cmp(a)); - let total = data.len(); - let release_tracks = include.release_tracks.then(|| { - ReleaseTracks::from_sorted_semver_iter( - data.iter() - .flat_map(|(semver, (version, _))| (!version.yanked).then_some(semver)), - ) - }); - (data.into_values().collect(), total, release_tracks) - }; - - let mut next_page = None; - if let Some(options) = options { - next_page = next_seek_params(&data, options, |last| Seek::Semver.to_payload(last))? - .map(|p| req.query_with_params(p)) - }; - - Ok(PaginatedVersionsAndPublishers { - data, - meta: ResponseMeta { - total: total as i64, - next_page, - release_tracks, - }, - }) -} - mod seek { use crate::controllers::helpers::pagination::seek; use crate::models::{User, Version}; @@ -392,6 +262,7 @@ mod seek { seek!( pub enum Seek { Semver { + num: String, id: i32, }, Date { @@ -406,7 +277,10 @@ mod seek { pub(crate) fn to_payload(&self, record: &(Version, Option)) -> SeekPayload { let (Version { id, created_at, .. }, _) = *record; match *self { - Seek::Semver => SeekPayload::Semver(Semver { id }), + Seek::Semver => SeekPayload::Semver(Semver { + num: record.0.num.clone(), + id, + }), Seek::Date => SeekPayload::Date(Date { created_at, id }), } } diff --git a/src/tests/routes/crates/versions/list.rs b/src/tests/routes/crates/versions/list.rs index 6657145015c..0d36a4d0382 100644 --- a/src/tests/routes/crates/versions/list.rs +++ b/src/tests/routes/crates/versions/list.rs @@ -291,9 +291,9 @@ async fn test_seek_based_pagination_semver_sorting() -> anyhow::Result<()> { assert_eq!(json.meta.total as usize, expects.len()); assert_eq!(json.meta.release_tracks, None); - // A decodable seek value, MTAwCg (100), but doesn't actually exist + // A decodable seek value, WyIwLjAuMCIsMTAwXQ (["0.0.0",100]), but doesn't actually exist let json: VersionList = anon - .get_with_query(url, "per_page=10&sort=semver&seek=MTAwCg") + .get_with_query(url, "per_page=10&sort=semver&seek=WyIwLjAuMCIsMTAwXQ") .await .good(); assert_eq!(json.versions.len(), 0);