-
Notifications
You must be signed in to change notification settings - Fork 676
Add semver_ord(num)
SQL function and version.semver_ord
column
#10763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b692584
7012704
e3ebad5
7438bdf
2f6d121
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
use crates_io_test_db::TestDatabase; | ||
use diesel::prelude::*; | ||
use diesel::sql_types::{Nullable, Text}; | ||
use diesel_async::RunQueryDsl; | ||
use std::fmt::Debug; | ||
|
||
#[tokio::test] | ||
async fn test_jsonb_output() { | ||
let test_db = TestDatabase::new(); | ||
let mut conn = test_db.async_connect().await; | ||
|
||
let mut check = async |num| { | ||
let query = format!("select semver_ord('{num}') as output"); | ||
|
||
#[derive(QueryableByName)] | ||
struct Row { | ||
#[diesel(sql_type = Nullable<Text>)] | ||
output: Option<String>, | ||
} | ||
|
||
diesel::sql_query(query) | ||
.get_result::<Row>(&mut conn) | ||
.await | ||
.unwrap() | ||
.output | ||
.unwrap_or_default() | ||
}; | ||
|
||
insta::assert_snapshot!(check("0.0.0").await, @r#"[0, 0, 0, {}]"#); | ||
insta::assert_snapshot!(check("1.0.0-alpha.1").await, @r#"[1, 0, 0, [true, "alpha", false, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, ""]]"#); | ||
|
||
// see https://crates.io/crates/cursed-trying-to-break-cargo/1.0.0-0.HDTV-BluRay.1020p.YTSUB.L33TRip.mkv – thanks @Gankra! | ||
insta::assert_snapshot!(check("1.0.0-0.HDTV-BluRay.1020p.YTSUB.L33TRip.mkv").await, @r#"[1, 0, 0, [false, 0, true, "HDTV-BluRay", true, "1020p", true, "YTSUB", true, "L33TRip", true, "mkv", null, null, null, null, null, null, null, null, ""]]"#); | ||
|
||
// Invalid version string | ||
insta::assert_snapshot!(check("foo").await, @""); | ||
|
||
// Version string with a lot of prerelease identifiers | ||
insta::assert_snapshot!(check("1.2.3-1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.end").await, @r#"[1, 2, 3, [false, 1, false, 2, false, 3, false, 4, false, 5, false, 6, false, 7, false, 8, false, 9, false, 10, "11.12.13.14.15.16.17.end"]]"#); | ||
} | ||
|
||
/// This test checks that the `semver_ord` function orders versions correctly. | ||
/// | ||
/// The test data is a list of versions in a random order. The versions are then | ||
/// ordered by the `semver_ord` function and the result is compared to the | ||
/// expected order (see <https://semver.org/#spec-item-11>). | ||
/// | ||
/// The test data was imported from <https://github.com/dtolnay/semver/blob/1.0.26/tests/test_version.rs#L223-L242>. | ||
#[tokio::test] | ||
async fn test_spec_order() { | ||
let test_db = TestDatabase::new(); | ||
let mut conn = test_db.async_connect().await; | ||
|
||
#[derive(QueryableByName)] | ||
struct Row { | ||
#[diesel(sql_type = Text)] | ||
num: String, | ||
} | ||
|
||
impl Debug for Row { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
f.write_str(&self.num) | ||
} | ||
} | ||
|
||
let mut check = async |order| { | ||
let query = format!( | ||
r#" | ||
with nums as ( | ||
select unnest(array[ | ||
'1.0.0-beta', | ||
'1.0.0-alpha', | ||
'1.0.0-rc.1', | ||
'1.0.0', | ||
'1.0.0-beta.2', | ||
'1.0.0-alpha.1', | ||
'1.0.0-alpha.beta', | ||
'1.0.0-beta.11' | ||
]) as num | ||
) | ||
select num | ||
from nums | ||
order by semver_ord(num) {order}; | ||
"# | ||
); | ||
|
||
diesel::sql_query(query) | ||
.load::<Row>(&mut conn) | ||
.await | ||
.unwrap() | ||
}; | ||
|
||
insta::assert_debug_snapshot!(check("asc").await, @r" | ||
[ | ||
1.0.0-alpha, | ||
1.0.0-alpha.1, | ||
1.0.0-alpha.beta, | ||
1.0.0-beta, | ||
1.0.0-beta.2, | ||
1.0.0-beta.11, | ||
1.0.0-rc.1, | ||
1.0.0, | ||
] | ||
"); | ||
|
||
insta::assert_debug_snapshot!(check("desc").await, @r" | ||
[ | ||
1.0.0, | ||
1.0.0-rc.1, | ||
1.0.0-beta.11, | ||
1.0.0-beta.2, | ||
1.0.0-beta, | ||
1.0.0-alpha.beta, | ||
1.0.0-alpha.1, | ||
1.0.0-alpha, | ||
] | ||
"); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
drop trigger trigger_set_semver_ord on versions; | ||
drop function set_semver_ord(); | ||
alter table versions drop column semver_ord; | ||
drop function semver_ord; |
eth3lbert marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
-- Add `semver_ord(num)` function to convert a semver string into a JSONB array for version comparison purposes. | ||
|
||
create or replace function semver_ord(num varchar) returns jsonb as $$ | ||
declare | ||
-- We need to ensure that the prerelease array has the same length for all | ||
-- versions since shorter arrays have lower precedence in JSONB. We store | ||
-- the first 10 parts of the prerelease string as pairs of booleans and | ||
-- numbers or text values, and then a final text item for the remaining | ||
-- parts. | ||
max_prerelease_parts constant int := 10; | ||
|
||
-- We ignore the "build metadata" part of the semver string, since it has | ||
-- no impact on the version ordering. | ||
match_result text[] := regexp_match(num, '^(\d+).(\d+).(\d+)(?:-([0-9A-Za-z\-.]+))?'); | ||
|
||
prerelease jsonb; | ||
prerelease_parts text[]; | ||
prerelease_part text; | ||
i int := 0; | ||
begin | ||
if match_result is null then | ||
return null; | ||
end if; | ||
|
||
if match_result[4] is null then | ||
-- A JSONB object has higher precedence than an array, and versions with | ||
-- prerelease specifiers should have lower precedence than those without. | ||
Comment on lines
+26
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went to check that we're not relying on any undocumented behaviour, and found this gem:
Little did they know! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, see #10763 (comment) :) |
||
prerelease := json_build_object(); | ||
else | ||
prerelease := to_jsonb(array_fill(NULL::bool, ARRAY[max_prerelease_parts * 2 + 1])); | ||
|
||
-- Split prerelease string by `.` and "append" items to | ||
-- the `prerelease` array. | ||
prerelease_parts := string_to_array(match_result[4], '.'); | ||
|
||
foreach prerelease_part in array prerelease_parts[1:max_prerelease_parts + 1] | ||
loop | ||
-- Parse parts as numbers if they consist of only digits. | ||
if regexp_like(prerelease_part, '^\d+$') then | ||
-- In JSONB a number has higher precedence than a string but in | ||
-- semver it is the other way around, so we use true/false to | ||
-- work around this. | ||
Comment on lines
+40
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here are some references to facilitate the review. The ordering for jsonb, as excerpted from the official doc1, is as follows:
The SemVer's pre-release ordering, as excerpted from the spec2, is as follows:
Footnotes |
||
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(false)); | ||
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part::numeric)); | ||
else | ||
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(true)); | ||
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part)); | ||
end if; | ||
|
||
-- Exit the loop if we have reached the maximum number of parts. | ||
i := i + 2; | ||
exit when i >= max_prerelease_parts * 2; | ||
eth3lbert marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end loop; | ||
|
||
prerelease := jsonb_set(prerelease, array[(max_prerelease_parts * 2)::text], to_jsonb(array_to_string(prerelease_parts[max_prerelease_parts + 1:], '.'))); | ||
end if; | ||
|
||
-- Return an array with the major, minor, patch, and prerelease parts. | ||
return json_build_array( | ||
match_result[1]::numeric, | ||
match_result[2]::numeric, | ||
match_result[3]::numeric, | ||
prerelease | ||
); | ||
end; | ||
$$ language plpgsql immutable; | ||
|
||
comment on function semver_ord is 'Converts a semver string into a JSONB array for version comparison purposes. The array has the following format: [major, minor, patch, prerelease] and when used for sorting follow the precedence rules defined in the semver specification (https://semver.org/#spec-item-11).'; | ||
|
||
|
||
-- Add corresponding column to the `versions` table. | ||
|
||
alter table versions | ||
add semver_ord jsonb; | ||
|
||
comment on column versions.semver_ord is 'JSONB representation of the version number for sorting purposes.'; | ||
|
||
|
||
-- Create a trigger to set the `semver_ord` column when inserting a new version. | ||
-- Ideally, we would use a generated column for this, but introducing such a | ||
-- column would require a full table rewrite, which is not feasible for large | ||
-- tables. | ||
|
||
create or replace function set_semver_ord() returns trigger as $$ | ||
begin | ||
new.semver_ord := semver_ord(new.num); | ||
return new; | ||
end | ||
$$ language plpgsql; | ||
|
||
create or replace trigger trigger_set_semver_ord | ||
before insert on versions | ||
for each row | ||
execute procedure set_semver_ord(); | ||
|
||
|
||
-- Populate the `semver_ord` column for existing versions. | ||
-- This query should be run manually in small batches to avoid locking the | ||
-- table for too long. | ||
|
||
-- with versions_to_update as ( | ||
-- select id, num | ||
-- from versions | ||
-- where semver_ord = 'null'::jsonb | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just noticed that I didn't update this correctly. this line should now be |
||
-- limit 1000 | ||
-- ) | ||
-- update versions | ||
-- set semver_ord = semver_ord(num) | ||
-- where id in (select id from versions_to_update); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I love that this is represented as-is. 😆