Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/crates_io_diesel_helpers/src/fns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ define_sql_function!(fn floor(x: Double) -> Integer);
define_sql_function!(fn greatest<T: SingleValue>(x: T, y: T) -> T);
define_sql_function!(fn least<T: SingleValue>(x: T, y: T) -> T);
define_sql_function!(fn split_part(string: Text, delimiter: Text, n: Integer) -> Text);
define_sql_function!(fn semver_ord(num: Text) -> Nullable<Jsonb>);
4 changes: 2 additions & 2 deletions src/controllers/helpers/pagination.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ macro_rules! seek {
impl From<[<$variant Helper>]> for $variant {
fn from(helper: [<$variant Helper>]) -> Self {
let [<$variant Helper>]($($field,)*) = helper;
Self { $($field,)* }
Self { $($field: $field.clone(),)* }
}
}

Expand All @@ -470,7 +470,7 @@ macro_rules! seek {
where
S: serde::Serializer,
{
let helper = [<$variant Helper>]($(self.$field,)*);
let helper = [<$variant Helper>]($(self.$field.clone(),)*);
serde::Serialize::serialize(&helper, serializer)
}
}
Expand Down
210 changes: 42 additions & 168 deletions src/controllers/krate/versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::views::EncodableVersion;
use axum::Json;
use axum::extract::FromRequestParts;
use axum_extra::extract::Query;
use crates_io_diesel_helpers::semver_ord;
use diesel::dsl::not;
use diesel::prelude::*;
use diesel_async::{AsyncPgConnection, RunQueryDsl};
Expand Down Expand Up @@ -98,13 +99,8 @@ pub async fn list_versions(
None => None,
};

// Sort by semver by default
let versions_and_publishers = match &params.sort.as_ref().map(|s| s.to_lowercase()).as_deref() {
Some("date") => {
list_by_date(crate_id, pagination.as_ref(), &params, &req, &mut conn).await?
}
_ => list_by_semver(crate_id, pagination.as_ref(), &params, &req, &mut conn).await?,
};
let versions_and_publishers =
list(crate_id, pagination.as_ref(), &params, &req, &mut conn).await?;

let versions = versions_and_publishers
.data
Expand All @@ -125,12 +121,12 @@ pub async fn list_versions(
}))
}

/// Seek-based pagination of versions by date
/// Seek-based pagination of versions
///
/// # Panics
///
/// This function will panic if `option` is built with `enable_pages` set to true.
async fn list_by_date(
/// This function will panic if `options` is built with `enable_pages` set to true.
async fn list(
crate_id: i32,
options: Option<&PaginationOptions>,
params: &ListQueryParams,
Expand All @@ -139,6 +135,11 @@ async fn list_by_date(
) -> AppResult<PaginatedVersionsAndPublishers> {
use seek::*;

let seek = match &params.sort.as_ref().map(|s| s.to_lowercase()).as_deref() {
Some("date") => Seek::Date,
_ => Seek::Semver,
};

let make_base_query = || {
let mut query = versions::table
.filter(versions::crate_id.eq(crate_id))
Expand All @@ -159,23 +160,40 @@ async fn list_by_date(
!matches!(&options.page, Page::Numeric(_)),
"?page= is not supported"
);
if let Some(SeekPayload::Date(Date { created_at, id })) = Seek::Date.after(&options.page)? {
query = query.filter(
versions::created_at
.eq(created_at)
.and(versions::id.lt(id))
.or(versions::created_at.lt(created_at)),
)

match seek.after(&options.page)? {
Some(SeekPayload::Date(Date { created_at, id })) => {
query = query.filter(
versions::created_at
.eq(created_at)
.and(versions::id.lt(id))
.or(versions::created_at.lt(created_at)),
)
}
Some(SeekPayload::Semver(Semver { num, id })) => {
query = query.filter(
versions::semver_ord
.eq(semver_ord(num.clone()))
.and(versions::id.lt(id))
.or(versions::semver_ord.lt(semver_ord(num))),
)
}
None => {}
}

query = query.limit(options.per_page);
}

query = query.order((versions::created_at.desc(), versions::id.desc()));
if seek == Seek::Date {
query = query.order((versions::created_at.desc(), versions::id.desc()));
} else {
query = query.order((versions::semver_ord.desc(), versions::id.desc()));
}

let data: Vec<(Version, Option<User>)> = query.load(conn).await?;
let mut next_page = None;
if let Some(options) = options {
next_page = next_seek_params(&data, options, |last| Seek::Date.to_payload(last))?
next_page = next_seek_params(&data, options, |last| seek.to_payload(last))?
.map(|p| req.query_with_params(p));
};

Expand Down Expand Up @@ -232,154 +250,6 @@ async fn list_by_date(
})
}

/// Seek-based pagination of versions by semver
///
/// Unfortunately, Heroku Postgres has no support for the semver PG extension.
/// Therefore, we need to perform both sorting and pagination manually on the server.
///
/// # Panics
///
/// This function will panic if `option` is built with `enable_pages` set to true.
async fn list_by_semver(
crate_id: i32,
options: Option<&PaginationOptions>,
params: &ListQueryParams,
req: &Parts,
conn: &mut AsyncPgConnection,
) -> AppResult<PaginatedVersionsAndPublishers> {
use seek::*;

let include = params.include()?;
let mut query = versions::table
.filter(versions::crate_id.eq(crate_id))
.into_boxed();

if !params.nums.is_empty() {
query = query.filter(versions::num.eq_any(params.nums.iter().map(|s| s.as_str())));
}

let (data, total, release_tracks) = if let Some(options) = options {
// Since versions will only increase in the future and both sorting and pagination need to
// happen on the app server, implementing it with fetching only the data needed for sorting
// and pagination, then making another query for the data to respond with, would minimize
// payload and memory usage. This way, we can utilize the sorted map and enrich it later
// without sorting twice.
// Sorting by semver but opted for id as the seek key because num can be quite lengthy,
// while id values are significantly smaller.

let mut sorted_versions = IndexMap::new();
query
.select((versions::id, versions::num, versions::yanked))
.load_stream::<(i32, String, bool)>(conn)
.await?
.try_for_each(|(id, num, yanked)| {
let semver = semver::Version::parse(&num).ok();
sorted_versions.insert(id, (semver, yanked, None));
future::ready(Ok(()))
})
.await?;

sorted_versions
.sort_unstable_by(|_, (semver_a, _, _), _, (semver_b, _, _)| semver_b.cmp(semver_a));

assert!(
!matches!(&options.page, Page::Numeric(_)),
"?page= is not supported"
);

let release_tracks = include.release_tracks.then(|| {
ReleaseTracks::from_sorted_semver_iter(
sorted_versions
.values()
.filter(|(_, yanked, _)| !yanked)
.filter_map(|(semver, _, _)| semver.as_ref()),
)
});

let mut idx = Some(0);
if let Some(SeekPayload::Semver(Semver { id })) = Seek::Semver.after(&options.page)? {
idx = sorted_versions
.get_index_of(&id)
.filter(|i| i + 1 < sorted_versions.len())
.map(|i| i + 1);
}
if let Some(start) = idx {
let end = (start + options.per_page as usize).min(sorted_versions.len());
let ids = sorted_versions[start..end]
.keys()
.cloned()
.collect::<Vec<_>>();
versions::table
.filter(versions::crate_id.eq(crate_id))
.left_outer_join(users::table)
.select(<(Version, Option<User>)>::as_select())
.filter(versions::id.eq_any(ids))
.load_stream::<(Version, Option<User>)>(conn)
.await?
.try_for_each(|row| {
// The versions are already sorted, and we only need to enrich the fetched rows into them.
// Therefore, other values can now be safely ignored.
sorted_versions
.entry(row.0.id)
.and_modify(|entry| *entry = (None, false, Some(row)));

future::ready(Ok(()))
})
.await?;

let len = sorted_versions.len();
(
sorted_versions
.into_values()
.filter_map(|(_, _, v)| v)
.collect(),
len,
release_tracks,
)
} else {
(vec![], 0, release_tracks)
}
} else {
let mut data = IndexMap::new();
query
.left_outer_join(users::table)
.select(<(Version, Option<User>)>::as_select())
.load_stream::<(Version, Option<User>)>(conn)
.await?
.try_for_each(|row| {
if let Ok(semver) = semver::Version::parse(&row.0.num) {
data.insert(semver, row);
};
future::ready(Ok(()))
})
.await?;
data.sort_unstable_by(|a, _, b, _| b.cmp(a));
let total = data.len();
let release_tracks = include.release_tracks.then(|| {
ReleaseTracks::from_sorted_semver_iter(
data.iter()
.flat_map(|(semver, (version, _))| (!version.yanked).then_some(semver)),
)
});
(data.into_values().collect(), total, release_tracks)
};

let mut next_page = None;
if let Some(options) = options {
next_page = next_seek_params(&data, options, |last| Seek::Semver.to_payload(last))?
.map(|p| req.query_with_params(p))
};

Ok(PaginatedVersionsAndPublishers {
data,
meta: ResponseMeta {
total: total as i64,
next_page,
release_tracks,
},
})
}

mod seek {
use crate::controllers::helpers::pagination::seek;
use crate::models::{User, Version};
Expand All @@ -392,6 +262,7 @@ mod seek {
seek!(
pub enum Seek {
Semver {
num: String,
id: i32,
},
Date {
Expand All @@ -406,7 +277,10 @@ mod seek {
pub(crate) fn to_payload(&self, record: &(Version, Option<User>)) -> SeekPayload {
let (Version { id, created_at, .. }, _) = *record;
match *self {
Seek::Semver => SeekPayload::Semver(Semver { id }),
Seek::Semver => SeekPayload::Semver(Semver {
num: record.0.num.clone(),
id,
}),
Seek::Date => SeekPayload::Date(Date { created_at, id }),
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/tests/routes/crates/versions/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,9 @@ async fn test_seek_based_pagination_semver_sorting() -> anyhow::Result<()> {
assert_eq!(json.meta.total as usize, expects.len());
assert_eq!(json.meta.release_tracks, None);

// A decodable seek value, MTAwCg (100), but doesn't actually exist
// A decodable seek value, WyIwLjAuMCIsMTAwXQ (["0.0.0",100]), but doesn't actually exist
let json: VersionList = anon
.get_with_query(url, "per_page=10&sort=semver&seek=MTAwCg")
.get_with_query(url, "per_page=10&sort=semver&seek=WyIwLjAuMCIsMTAwXQ")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with the old link the API will now return a "400 Bad Request" response

.await
.good();
assert_eq!(json.versions.len(), 0);
Expand Down