From 6776c264845979fff80a5d215699868bab6b156d Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 16 Mar 2025 16:16:44 +0300 Subject: [PATCH 1/5] fix ranges logic --- .../src/worktable/generator/queries/type.rs | 24 +++++--- .../worktable/generator/table/index_fns.rs | 7 +-- .../generator/table/select_executor.rs | 57 ++++++++++++------- src/persistence/manager.rs | 9 +-- tests/persistence/mod.rs | 4 +- tests/persistence/read.rs | 4 +- tests/persistence/sync/mod.rs | 18 ++---- 7 files changed, 68 insertions(+), 55 deletions(-) diff --git a/codegen/src/worktable/generator/queries/type.rs b/codegen/src/worktable/generator/queries/type.rs index ce099160..70f72918 100644 --- a/codegen/src/worktable/generator/queries/type.rs +++ b/codegen/src/worktable/generator/queries/type.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use proc_macro2::{Ident, Span, TokenStream}; use quote::quote; @@ -8,20 +10,26 @@ impl Generator { pub fn gen_available_types_def(&mut self) -> syn::Result { let name_generator = WorktableNameGenerator::from_table_name(self.name.to_string()); let avt_type_ident = name_generator.get_available_type_ident(); + let mut types_set = HashSet::new(); let rows: Vec<_> = self .columns .indexes .iter() .filter_map(|(_, idx)| self.columns.columns_map.get(&idx.field)) - .map(|s| { - let type_ident = Ident::new(s.to_string().as_str(), Span::mixed_site()); - let type_upper = - Ident::new(s.to_string().to_uppercase().as_str(), Span::mixed_site()); - Some(quote! { - #[from] - #type_upper(#type_ident), - }) + .filter_map(|s| { + if types_set.contains(&s.to_string()) { + None + } else { + types_set.insert(s.to_string()); + let type_ident = Ident::new(s.to_string().as_str(), Span::mixed_site()); + let type_upper = + Ident::new(s.to_string().to_uppercase().as_str(), Span::mixed_site()); + Some(quote! { + #[from] + #type_upper(#type_ident), + }) + } }) .collect(); diff --git a/codegen/src/worktable/generator/table/index_fns.rs b/codegen/src/worktable/generator/table/index_fns.rs index bd805d80..51e48a35 100644 --- a/codegen/src/worktable/generator/table/index_fns.rs +++ b/codegen/src/worktable/generator/table/index_fns.rs @@ -75,13 +75,12 @@ impl Generator { Ok(quote! { pub fn #fn_name(&self, by: #type_) -> SelectQueryBuilder<#row_ident, impl DoubleEndedIterator + '_, #column_range_type> { - let rows: Vec<#row_ident> = self.0.indexes.#field_ident + let rows = self.0.indexes.#field_ident .get(&by) .into_iter() - .filter_map(|(_, link)| self.0.data.select(*link).ok()) - .collect(); + .filter_map(|(_, link)| self.0.data.select(*link).ok()); - SelectQueryBuilder::new(rows.into_iter()) + SelectQueryBuilder::new(rows) } }) } diff --git a/codegen/src/worktable/generator/table/select_executor.rs b/codegen/src/worktable/generator/table/select_executor.rs index 8aa04918..a4f88eac 100644 --- a/codegen/src/worktable/generator/table/select_executor.rs +++ b/codegen/src/worktable/generator/table/select_executor.rs @@ -9,6 +9,8 @@ use crate::worktable::generator::Generator; use quote::ToTokens; use syn::Type; +const RANGE_VARIANTS: &[&str] = &["", "Inclusive", "From", "To", "ToInclusive"]; + fn is_numeric_type(ty: &Type) -> bool { matches!( ty.to_token_stream().to_string().as_str(), @@ -41,35 +43,48 @@ impl Generator { .collect(); let column_range_variants = unique_types.iter().map(|type_name| { - let variant_ident = Ident::new( - &type_name.to_string().to_case(Case::Pascal), - Span::call_site(), - ); let ty_ident = Ident::new(&type_name.to_string(), Span::call_site()); + let variants: Vec<_> = RANGE_VARIANTS + .into_iter() + .map(|v| { + let variant_ident = Ident::new( + &format!("{}{}", type_name.to_string().to_case(Case::Pascal), v), + Span::call_site(), + ); + let range_ident = Ident::new(&format!("Range{}", v), Span::call_site()); + quote! { + #variant_ident(std::ops::#range_ident<#ty_ident>), + } + }) + .collect(); + quote! { - #variant_ident(std::ops::RangeInclusive<#ty_ident>), + #(#variants)* } }); let from_impls = unique_types.iter().map(|type_name| { - let variant_ident = Ident::new( - &type_name.to_string().to_case(Case::Pascal), - Span::call_site(), - ); - let type_ident = Ident::new(&type_name.to_string(), Span::call_site()); + let ty_ident = Ident::new(&type_name.to_string(), Span::call_site()); + let variants: Vec<_> = RANGE_VARIANTS + .into_iter() + .map(|v| { + let variant_ident = Ident::new( + &format!("{}{}", type_name.to_string().to_case(Case::Pascal), v), + Span::call_site(), + ); + let range_ident = Ident::new(&format!("Range{}", v), Span::call_site()); + quote! { + impl From> for #column_range_type { + fn from(range: std::ops::#range_ident<#ty_ident>) -> Self { + Self::#variant_ident(range) + } + } + } + }) + .collect(); quote! { - impl From> for #column_range_type { - fn from(range: std::ops::RangeInclusive<#type_ident>) -> Self { - Self::#variant_ident(range) - } - } - impl From> for #column_range_type { - fn from(range: std::ops::Range<#type_ident>) -> Self { - let end = range.end.saturating_sub(1); - Self::#variant_ident(range.start..=end) - } - } + #(#variants)* } }); diff --git a/src/persistence/manager.rs b/src/persistence/manager.rs index 62f15b81..e59e0e0e 100644 --- a/src/persistence/manager.rs +++ b/src/persistence/manager.rs @@ -5,13 +5,10 @@ pub struct PersistenceConfig { } impl PersistenceConfig { - pub fn new, S2: Into>( - config_path: S1, - table_files_dir: S2, - ) -> eyre::Result { - Ok(Self { + pub fn new, S2: Into>(config_path: S1, table_files_dir: S2) -> Self { + Self { config_path: config_path.into(), tables_path: table_files_dir.into(), - }) + } } } diff --git a/tests/persistence/mod.rs b/tests/persistence/mod.rs index 0ac8270f..bae28abd 100644 --- a/tests/persistence/mod.rs +++ b/tests/persistence/mod.rs @@ -48,7 +48,7 @@ worktable!( pub const TEST_ROW_COUNT: usize = 100; pub async fn get_empty_test_wt() -> TestPersistWorkTable { - let config = PersistenceConfig::new("tests/data", "tests/data").unwrap(); + let config = PersistenceConfig::new("tests/data", "tests/data"); TestPersistWorkTable::new(config).await.unwrap() } @@ -64,7 +64,7 @@ pub async fn get_test_wt() -> TestPersistWorkTable { } pub async fn get_test_wt_without_secondary_indexes() -> TestWithoutSecondaryIndexesWorkTable { - let config = PersistenceConfig::new("tests/data", "tests/data").unwrap(); + let config = PersistenceConfig::new("tests/data", "tests/data"); let table = TestWithoutSecondaryIndexesWorkTable::new(config) .await .unwrap(); diff --git a/tests/persistence/read.rs b/tests/persistence/read.rs index c8fcb4d6..9249fae4 100644 --- a/tests/persistence/read.rs +++ b/tests/persistence/read.rs @@ -125,7 +125,7 @@ async fn test_data_parse() { #[tokio::test] async fn test_space_parse() { - let config = PersistenceConfig::new("tests/data/expected", "tests/data/expected").unwrap(); + let config = PersistenceConfig::new("tests/data/expected", "tests/data/expected"); let table = TestPersistWorkTable::load_from_file(config).await.unwrap(); let expected = get_test_wt().await; @@ -139,7 +139,7 @@ async fn test_space_parse() { async fn test_space_parse_no_file() { remove_dir_if_exists("tests/non-existent".to_string()).await; - let config = PersistenceConfig::new("tests/non-existent", "tests/non-existent").unwrap(); + let config = PersistenceConfig::new("tests/non-existent", "tests/non-existent"); let table = TestPersistWorkTable::load_from_file(config).await.unwrap(); let expected = get_empty_test_wt().await; assert_eq!( diff --git a/tests/persistence/sync/mod.rs b/tests/persistence/sync/mod.rs index 016f548a..d2ee6271 100644 --- a/tests/persistence/sync/mod.rs +++ b/tests/persistence/sync/mod.rs @@ -4,8 +4,7 @@ use worktable::prelude::{PersistenceConfig, PrimaryKeyGeneratorState}; #[test] fn test_space_insert_sync() { - let config = - PersistenceConfig::new("tests/data/sync/insert", "tests/data/sync/insert").unwrap(); + let config = PersistenceConfig::new("tests/data/sync/insert", "tests/data/sync/insert"); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) @@ -40,8 +39,7 @@ fn test_space_insert_sync() { #[test] fn test_space_insert_many_sync() { let config = - PersistenceConfig::new("tests/data/sync/insert_many", "tests/data/sync/insert_many") - .unwrap(); + PersistenceConfig::new("tests/data/sync/insert_many", "tests/data/sync/insert_many"); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) @@ -86,8 +84,7 @@ fn test_space_insert_many_sync() { #[test] fn test_space_update_full_sync() { let config = - PersistenceConfig::new("tests/data/sync/update_full", "tests/data/sync/update_full") - .unwrap(); + PersistenceConfig::new("tests/data/sync/update_full", "tests/data/sync/update_full"); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) @@ -132,8 +129,7 @@ fn test_space_update_query_sync() { let config = PersistenceConfig::new( "tests/data/sync/update_query", "tests/data/sync/update_query", - ) - .unwrap(); + ); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) @@ -172,8 +168,7 @@ fn test_space_update_query_sync() { #[test] fn test_space_delete_sync() { - let config = - PersistenceConfig::new("tests/data/sync/delete", "tests/data/sync/delete").unwrap(); + let config = PersistenceConfig::new("tests/data/sync/delete", "tests/data/sync/delete"); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) @@ -211,8 +206,7 @@ fn test_space_delete_query_sync() { let config = PersistenceConfig::new( "tests/data/sync/delete_query", "tests/data/sync/delete_query", - ) - .unwrap(); + ); let runtime = tokio::runtime::Builder::new_multi_thread() .worker_threads(2) From d76bba27a068377d4638db429d865ac535b4114d Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 19 Mar 2025 22:34:58 +0300 Subject: [PATCH 2/5] variants fix --- .../generator/table/select_executor.rs | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/codegen/src/worktable/generator/table/select_executor.rs b/codegen/src/worktable/generator/table/select_executor.rs index 314d2c44..ea8206fe 100644 --- a/codegen/src/worktable/generator/table/select_executor.rs +++ b/codegen/src/worktable/generator/table/select_executor.rs @@ -127,15 +127,26 @@ impl Generator { is_numeric_type(&syn::parse_str::(&ty.to_token_stream().to_string()).unwrap()) }) .map(|(column, ty)| { - let col_lit = Literal::string(column.to_string().as_str()); - let col_ident = Ident::new(&column.to_string(), Span::call_site()); - let variant_ident = - Ident::new(&ty.to_string().to_case(Case::Pascal), Span::call_site()); + let variants: Vec<_> = RANGE_VARIANTS + .into_iter() + .map(|v| { + let col_lit = Literal::string(column.to_string().as_str()); + let col_ident = Ident::new(&column.to_string(), Span::call_site()); + let variant_ident = Ident::new( + &format!("{}{}", ty.to_string().to_case(Case::Pascal), v), + Span::call_site(), + ); + quote! { + (#col_lit, #column_range_type::#variant_ident(range)) => { + Box::new(iter.filter(move |row| range.contains(&row.#col_ident))) + as Box> + }, + } + }) + .collect(); + quote! { - (#col_lit, #column_range_type::#variant_ident(range)) => { - Box::new(iter.filter(move |row| range.contains(&row.#col_ident))) - as Box> - }, + #(#variants)* } }); From 147ffbab7c0c164216d46aa4af56b1b920e61002 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 19 Mar 2025 22:39:18 +0300 Subject: [PATCH 3/5] clippy --- codegen/src/worktable/generator/table/select_executor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codegen/src/worktable/generator/table/select_executor.rs b/codegen/src/worktable/generator/table/select_executor.rs index ea8206fe..288f60b8 100644 --- a/codegen/src/worktable/generator/table/select_executor.rs +++ b/codegen/src/worktable/generator/table/select_executor.rs @@ -45,7 +45,7 @@ impl Generator { let column_range_variants = unique_types.iter().map(|type_name| { let ty_ident = Ident::new(&type_name.to_string(), Span::call_site()); let variants: Vec<_> = RANGE_VARIANTS - .into_iter() + .iter() .map(|v| { let variant_ident = Ident::new( &format!("{}{}", type_name.to_string().to_case(Case::Pascal), v), @@ -66,7 +66,7 @@ impl Generator { let from_impls = unique_types.iter().map(|type_name| { let ty_ident = Ident::new(&type_name.to_string(), Span::call_site()); let variants: Vec<_> = RANGE_VARIANTS - .into_iter() + .iter() .map(|v| { let variant_ident = Ident::new( &format!("{}{}", type_name.to_string().to_case(Case::Pascal), v), @@ -128,7 +128,7 @@ impl Generator { }) .map(|(column, ty)| { let variants: Vec<_> = RANGE_VARIANTS - .into_iter() + .iter() .map(|v| { let col_lit = Literal::string(column.to_string().as_str()); let col_ident = Ident::new(&column.to_string(), Span::call_site()); From 84424571cc03b6e0222e3cec05e5722f2f5a8dbd Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 19 Mar 2025 23:54:11 +0300 Subject: [PATCH 4/5] fix re-read table bug --- Cargo.toml | 4 +- src/persistence/space/data.rs | 9 ++- tests/persistence/sync/mod.rs | 2 + tests/persistence/sync/string_re_read.rs | 79 ++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 tests/persistence/sync/string_re_read.rs diff --git a/Cargo.toml b/Cargo.toml index 81a648af..9dbb0ed8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,9 +25,9 @@ lockfree = { version = "0.5.1" } worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } -data_bucket = "0.2.1" +#data_bucket = "0.2.1" # data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } -# data_bucket = { path = "../DataBucket", version = "0.2.0" } +data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } diff --git a/src/persistence/space/data.rs b/src/persistence/space/data.rs index 9f4a3a90..b1e0bc0f 100644 --- a/src/persistence/space/data.rs +++ b/src/persistence/space/data.rs @@ -7,8 +7,8 @@ use crate::persistence::SpaceDataOps; use crate::prelude::WT_DATA_EXTENSION; use convert_case::{Case, Casing}; use data_bucket::{ - parse_page, persist_page, update_at, DataPage, GeneralHeader, GeneralPage, Link, PageType, - Persistable, SizeMeasurable, SpaceInfoPage, GENERAL_HEADER_SIZE, + parse_general_header_by_index, parse_page, persist_page, update_at, DataPage, GeneralHeader, + GeneralPage, Link, PageType, Persistable, SizeMeasurable, SpaceInfoPage, GENERAL_HEADER_SIZE, }; use rkyv::api::high::HighDeserializer; use rkyv::rancor::Strategy; @@ -75,12 +75,14 @@ where let info = parse_page::<_, DATA_LENGTH>(&mut data_file, 0).await?; let file_length = data_file.metadata().await?.len(); let page_id = file_length / (DATA_LENGTH as u64 + GENERAL_HEADER_SIZE as u64); + let last_page_header = + parse_general_header_by_index(&mut data_file, page_id as u32).await?; Ok(Self { data_file, info, last_page_id: page_id as u32, - current_data_length: 0, + current_data_length: last_page_header.data_length, }) } @@ -117,6 +119,7 @@ where } self.current_data_length += link.length; self.update_data_length().await?; + println!("{:?}", self.current_data_length); update_at::<{ DATA_LENGTH }>(&mut self.data_file, link, bytes).await } diff --git a/tests/persistence/sync/mod.rs b/tests/persistence/sync/mod.rs index d2ee6271..ccc96f31 100644 --- a/tests/persistence/sync/mod.rs +++ b/tests/persistence/sync/mod.rs @@ -2,6 +2,8 @@ use crate::persistence::{AnotherByIdQuery, TestPersistRow, TestPersistWorkTable} use crate::remove_dir_if_exists; use worktable::prelude::{PersistenceConfig, PrimaryKeyGeneratorState}; +mod string_re_read; + #[test] fn test_space_insert_sync() { let config = PersistenceConfig::new("tests/data/sync/insert", "tests/data/sync/insert"); diff --git a/tests/persistence/sync/string_re_read.rs b/tests/persistence/sync/string_re_read.rs new file mode 100644 index 00000000..a959f097 --- /dev/null +++ b/tests/persistence/sync/string_re_read.rs @@ -0,0 +1,79 @@ +use crate::remove_dir_if_exists; + +use worktable::prelude::*; +use worktable_codegen::worktable; + +worktable!( + name: StringReRead, + persist: true, + columns: { + id: u64 primary_key autoincrement, + first: String, + second: String, + third: String, + last: String, + }, +); + +#[test] +fn test_key() { + let config = PersistenceConfig::new("tests/data/key", "tests/data/key"); + + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .enable_io() + .enable_time() + .build() + .unwrap(); + + runtime.block_on(async { + remove_dir_if_exists("tests/data/key".to_string()).await; + + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + table + .insert(StringReReadRow { + first: "first".to_string(), + id: table.get_next_pk().into(), + third: "third".to_string(), + second: "second".to_string(), + last: "_________________________last_____________________".to_string(), + }) + .unwrap(); + table + .insert(StringReReadRow { + first: "first_again".to_string(), + id: table.get_next_pk().into(), + third: "third_again".to_string(), + second: "second_again".to_string(), + last: "_________________________last_____________________".to_string(), + }) + .unwrap(); + + table.wait_for_ops().await + } + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + table + .insert(StringReReadRow { + first: "first_last".to_string(), + id: table.get_next_pk().into(), + third: "third_last".to_string(), + second: "second_last".to_string(), + last: "_________________________last_____________________".to_string(), + }) + .unwrap(); + table.wait_for_ops().await + } + { + let table = StringReReadWorkTable::load_from_file(config.clone()) + .await + .unwrap(); + assert_eq!(table.select_all().execute().unwrap().len(), 3); + } + }) +} From 133a0e90ba3d1e736d86a05e147fb93f22224934 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Wed, 19 Mar 2025 23:58:02 +0300 Subject: [PATCH 5/5] bucket version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9dbb0ed8..65e2c90e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,8 @@ worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } #data_bucket = "0.2.1" -# data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } -data_bucket = { path = "../DataBucket", version = "0.2.1" } +data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } +# data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] }