From 1c4b050a6f0bda9d282b179cc5c0a08afaca46a6 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Mon, 31 Mar 2025 18:45:41 +0300 Subject: [PATCH 01/11] Add SystemInfo for WT and indecies --- codegen/src/heap_size/mod.rs | 74 ++++++ codegen/src/lib.rs | 8 + codegen/src/worktable/generator/index.rs | 4 +- codegen/src/worktable/generator/row.rs | 2 +- .../src/worktable/generator/table/impls.rs | 10 + examples/Cargo.toml | 2 +- examples/src/main.rs | 40 ++- src/lib.rs | 3 +- src/table/mod.rs | 1 + src/table/system_info.rs | 232 ++++++++++++++++++ 10 files changed, 360 insertions(+), 16 deletions(-) create mode 100644 codegen/src/heap_size/mod.rs create mode 100644 src/table/system_info.rs diff --git a/codegen/src/heap_size/mod.rs b/codegen/src/heap_size/mod.rs new file mode 100644 index 00000000..ded7d9d7 --- /dev/null +++ b/codegen/src/heap_size/mod.rs @@ -0,0 +1,74 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Fields, Result, Type}; + +pub fn expand(input: proc_macro2::TokenStream) -> Result { + let input: DeriveInput = syn::parse2(input)?; + let name = &input.ident; + + let body = match &input.data { + Data::Struct(data_struct) => { + let fields = match &data_struct.fields { + Fields::Named(fields_named) => fields_named.named.iter().collect::>(), + Fields::Unnamed(fields_unnamed) => { + fields_unnamed.unnamed.iter().collect::>() + } + Fields::Unit => vec![], + }; + + if fields.is_empty() { + quote! { 0 } + } else if fields.iter().all(|f| is_copy_primitive(&f.ty)) { + quote! { std::mem::size_of::() } + } else { + let field_sizes = fields.iter().enumerate().map(|(i, f)| { + let accessor = match &f.ident { + Some(ident) => quote! { self.#ident }, + None => { + let index = syn::Index::from(i); + quote! { self.#index } + } + }; + quote! { size += #accessor.heap_size(); } + }); + + quote! { + let mut size = 0; + #(#field_sizes)* + size + } + } + } + _ => { + return Err(syn::Error::new_spanned( + name, + "#[derive(HeapSize)] only supports structs", + )); + } + }; + + let t = quote! { + impl HeapSize for #name { + fn heap_size(&self) -> usize { + #body + } + } + }; + println!("{}", t); + Ok(t) +} + +fn is_copy_primitive(ty: &Type) -> bool { + matches!( + ty, + Type::Path(type_path) + if type_path.qself.is_none() && + type_path.path.segments.len() == 1 && + matches!( + type_path.path.segments[0].ident.to_string().as_str(), + "u8" | "u16" | "u32" | "u64" | "usize" | + "i8" | "i16" | "i32" | "i64" | "isize" | + "bool" | "char" | "f64" | "f32" + ) + ) +} diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index c9725b65..2211eb8e 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -1,3 +1,4 @@ +mod heap_size; mod name_generator; mod persist_index; mod persist_table; @@ -26,3 +27,10 @@ pub fn persist_table(input: TokenStream) -> TokenStream { .unwrap_or_else(|e| e.to_compile_error()) .into() } + +#[proc_macro_derive(HeapSize)] +pub fn heap_size(input: TokenStream) -> TokenStream { + heap_size::expand(input.into()) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index 30e7d394..99e10ad2 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -47,11 +47,11 @@ impl Generator { let derive = if self.is_persist { quote! { - #[derive(Debug, Default, PersistIndex)] + #[derive(Debug, HeapSize, Default, PersistIndex)] } } else { quote! { - #[derive(Debug, Default)] + #[derive(Debug, HeapSize, Default)] } }; diff --git a/codegen/src/worktable/generator/row.rs b/codegen/src/worktable/generator/row.rs index 5be4d8db..163a0802 100644 --- a/codegen/src/worktable/generator/row.rs +++ b/codegen/src/worktable/generator/row.rs @@ -96,7 +96,7 @@ impl Generator { .collect(); quote! { - #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq)] + #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, HeapSize)] #[rkyv(derive(Debug))] #[repr(C)] pub struct #ident { diff --git a/codegen/src/worktable/generator/table/impls.rs b/codegen/src/worktable/generator/table/impls.rs index 33178593..29c20d19 100644 --- a/codegen/src/worktable/generator/table/impls.rs +++ b/codegen/src/worktable/generator/table/impls.rs @@ -19,6 +19,7 @@ impl Generator { let iter_with_fn = self.gen_table_iter_with_fn(); let iter_with_async_fn = self.gen_table_iter_with_async_fn(); let count_fn = self.gen_table_count_fn(); + let system_info_fn = self.gen_system_info_fn(); quote! { impl #ident { @@ -31,6 +32,7 @@ impl Generator { #get_next_fn #iter_with_fn #iter_with_async_fn + #system_info_fn } } } @@ -222,4 +224,12 @@ impl Generator { } } } + + fn gen_system_info_fn(&self) -> TokenStream { + quote! { + pub fn system_info(&self) -> SystemInfo { + self.0.system_info() + } + } + } } diff --git a/examples/Cargo.toml b/examples/Cargo.toml index f6b0a5fa..22e2b318 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -13,4 +13,4 @@ eyre = "0.6.12" futures = "0.3.30" async-std = "1.10" either = "1.15.0" - +ordered-float = "5.0.0" diff --git a/examples/src/main.rs b/examples/src/main.rs index b88e1bff..944d1d17 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -9,9 +9,9 @@ fn main() { columns: { id: u64 primary_key autoincrement, val: i64, - test: u8, + test: i32, attr: String, - attr2: i16, + attr2: i32, attr_float: f64, attr_string: String, @@ -40,24 +40,38 @@ fn main() { // WT rows (has prefix My because of table name) let row = MyRow { val: 777, - attr: "Attribute1".to_string(), + attr: "Attribute0".to_string(), attr2: 345, test: 1, id: 0, attr_float: 100.0.into(), - attr_string: "String_attr".to_string(), + attr_string: "String_attr0".to_string(), }; + for i in 2..100000 as i64 { + let row = MyRow { + val: 777, + attr: format!("Attribute{}", i), + attr2: 345 + i as i32, + test: i as i32, + id: i as u64, + attr_float: (100.0 + i as f64).into(), + attr_string: format!("String_attr{}", i), + }; + + my_table.insert(row).unwrap(); // или ? если ты внутри Result + } + // insert let pk: MyPrimaryKey = my_table.insert(row).expect("primary key"); // Select ALL records from WT let select_all = my_table.select_all().execute(); - println!("Select All {:?}", select_all); + //println!("Select All {:?}", select_all); // Select All records with attribute TEST let select_all = my_table.select_all().execute(); - println!("Select All {:?}", select_all); + //println!("Select All {:?}", select_all); // Select by Idx let select_by_attr = my_table @@ -65,20 +79,24 @@ fn main() { .execute() .unwrap(); - for row in select_by_attr { - println!("Select by idx, row {:?}", row); - } + //for row in select_by_attr { + // println!("Select by idx, row {:?}", row); + //} // Update Value query let update = my_table.update_val_by_id(ValByIdQuery { val: 1337 }, pk.clone()); let _ = block_on(update); let select_all = my_table.select_all().execute(); - println!("Select after update val {:?}", select_all); + //println!("Select after update val {:?}", select_all); let delete = my_table.delete(pk); let _ = block_on(delete); let select_all = my_table.select_all().execute(); - println!("Select after delete {:?}", select_all); + //println!("Select after delete {:?}", select_all); + + let info = my_table.system_info(); + + println!("{}", info.pretty()); } diff --git a/src/lib.rs b/src/lib.rs index 7c427346..ccca2724 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod prelude { }; pub use crate::primary_key::{PrimaryKeyGenerator, PrimaryKeyGeneratorState, TablePrimaryKey}; pub use crate::table::select::{Order, QueryParams, SelectQueryBuilder, SelectQueryExecutor}; + pub use crate::table::system_info::{HeapSize, SystemInfo}; pub use crate::util::{OrderedF32Def, OrderedF64Def}; pub use crate::{ lock::Lock, Difference, IndexMap, IndexMultiMap, TableIndex, TableIndexCdc, TableRow, @@ -45,7 +46,7 @@ pub mod prelude { pub use derive_more::{From, Into}; pub use lockfree::set::Set as LockFreeSet; - pub use worktable_codegen::{PersistIndex, PersistTable}; + pub use worktable_codegen::{HeapSize, PersistIndex, PersistTable}; pub const WT_INDEX_EXTENSION: &str = ".wt.idx"; pub const WT_DATA_EXTENSION: &str = ".wt.data"; diff --git a/src/table/mod.rs b/src/table/mod.rs index 2cc6ae5b..8784b973 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,4 +1,5 @@ pub mod select; +pub mod system_info; use std::marker::PhantomData; diff --git a/src/table/system_info.rs b/src/table/system_info.rs new file mode 100644 index 00000000..1853d7c8 --- /dev/null +++ b/src/table/system_info.rs @@ -0,0 +1,232 @@ +use data_bucket::Link; +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::Arc; + +use crate::in_memory::{RowWrapper, StorableRow}; +use crate::{IndexMap, IndexMultiMap, WorkTable}; + +#[derive(Debug)] +pub struct SystemInfo { + pub table_name: &'static str, + pub page_count: usize, + pub row_count: usize, + pub empty_slots: u64, + pub memory_usage_bytes: u64, + pub idx_size: usize, +} + +pub trait HeapSize { + fn heap_size(&self) -> usize; +} + +impl HeapSize for Option { + fn heap_size(&self) -> usize { + self.as_ref().map_or(0, |v| v.heap_size()) + } +} + +impl HeapSize for Vec { + fn heap_size(&self) -> usize { + self.capacity() * std::mem::size_of::() + + self.iter().map(|v| v.heap_size()).sum::() + } +} + +impl HeapSize for String { + fn heap_size(&self) -> usize { + self.capacity() + } +} + +impl HeapSize for IndexMap +where + T: Ord + Clone + 'static + HeapSize + std::marker::Send, + V: Ord + Clone + 'static + HeapSize + std::marker::Send, +{ + fn heap_size(&self) -> usize { + let mut size = std::mem::size_of_val(self); + + for (k, v) in self.iter() { + size += k.heap_size(); + size += v.heap_size(); + } + + size + } +} + +impl HeapSize for IndexMultiMap +where + T: Ord + Clone + 'static + HeapSize + std::marker::Send, + V: Ord + Clone + 'static + HeapSize + std::marker::Send, +{ + fn heap_size(&self) -> usize { + let mut size = std::mem::size_of_val(self); + + for (k, v) in self.iter() { + size += k.heap_size(); + size += v.heap_size(); + } + + size + } +} + +impl HeapSize for Link { + fn heap_size(&self) -> usize { + std::mem::size_of_val(self) + } +} + +impl HeapSize for Box { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } +} + +impl HeapSize for Arc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } +} + +impl HeapSize for Rc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } +} + +impl HeapSize for HashMap { + fn heap_size(&self) -> usize { + let mut size = 0; + for (k, v) in self.iter() { + size += k.heap_size(); + size += v.heap_size(); + } + size + } +} + +impl HeapSize for u8 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for u16 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for u32 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for i32 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for u64 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for i64 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for f64 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for f32 { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for usize { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for isize { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for bool { + fn heap_size(&self) -> usize { + 0 + } +} +impl HeapSize for char { + fn heap_size(&self) -> usize { + 0 + } +} + +impl< + Row, + PrimaryKey, + AvailableTypes, + SecondaryIndexes: HeapSize, + LockType, + PkGen, + const DATA_LENGTH: usize, + > WorkTable +where + PrimaryKey: Clone + Ord + Send + 'static + std::hash::Hash, + Row: StorableRow, + ::WrappedRow: RowWrapper, +{ + pub fn system_info(&self) -> SystemInfo { + let page_count = self.data.get_page_count(); + let row_count = self.pk_map.len(); + + let empty_links = self.data.get_empty_links().len(); + + let bytes = self.data.get_bytes(); + + let memory_usage_bytes = bytes + .iter() + .map(|(_buf, free_offset)| *free_offset as u64) + .sum(); + + let idx_size = self.indexes.heap_size(); + + SystemInfo { + table_name: self.table_name, + page_count, + row_count, + empty_slots: empty_links as u64, + memory_usage_bytes, + idx_size, + } + } +} + +impl SystemInfo { + pub fn pretty(&self) -> String { + let mem_kb = self.memory_usage_bytes as f64 / 1024.0; + let idx_kb = self.idx_size as f64 / 1024.0; + + format!( + "\ +|| Table: {}\n\ +|| Rows: {} ({} pages, {} empty slots)\n\ +|| Memory: {:.2} KB (data) + {:.2} KB (indexes)\n\ +|| Total: {:.2} KB", + self.table_name, + self.row_count, + self.page_count, + self.empty_slots, + mem_kb, + idx_kb, + mem_kb + idx_kb, + ) + } +} From e079186f69039f4ba1cd29c7e422153de5eae863 Mon Sep 17 00:00:00 2001 From: Maxim Volkov <37216852+Handy-caT@users.noreply.github.com> Date: Tue, 1 Apr 2025 18:23:44 +0300 Subject: [PATCH 02/11] Update to new `DataBucket` version (#68) --- src/lib.rs | 10 ++++----- src/persistence/space/index/mod.rs | 4 +++- .../space/index/table_of_contents.rs | 22 +++++-------------- src/persistence/space/index/util.rs | 2 +- 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ccca2724..523cbf21 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,11 +37,11 @@ pub mod prelude { TableSecondaryIndex, TableSecondaryIndexCdc, WorkTable, WorkTableError, }; pub use data_bucket::{ - align, get_index_page_size_from_data_length, map_data_pages_to_general, - map_index_pages_to_general, parse_data_page, parse_page, persist_page, seek_to_page_start, - update_at, DataPage, GeneralHeader, GeneralPage, IndexPage, Interval, Link, PageType, - Persistable, PersistableIndex, SizeMeasurable, SizeMeasure, SpaceInfoPage, - TableOfContentsPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + align, get_index_page_size_from_data_length, map_data_pages_to_general, parse_data_page, + parse_page, persist_page, seek_to_page_start, update_at, DataPage, GeneralHeader, + GeneralPage, IndexPage, Interval, Link, PageType, Persistable, PersistableIndex, + SizeMeasurable, SizeMeasure, SpaceInfoPage, TableOfContentsPage, DATA_VERSION, + GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; pub use derive_more::{From, Into}; diff --git a/src/persistence/space/index/mod.rs b/src/persistence/space/index/mod.rs index e0b74c34..79d88d52 100644 --- a/src/persistence/space/index/mod.rs +++ b/src/persistence/space/index/mod.rs @@ -33,7 +33,7 @@ pub use table_of_contents::IndexTableOfContents; pub use util::map_index_pages_to_toc_and_general; #[derive(Debug)] -pub struct SpaceIndex { +pub struct SpaceIndex { space_id: SpaceId, table_of_contents: IndexTableOfContents, next_page_id: Arc, @@ -108,6 +108,8 @@ where >, { let size = get_index_page_size_from_data_length::(DATA_LENGTH as usize); + println!("Length {}", DATA_LENGTH); + println!("Size {}", size); let mut page = IndexPage::new(node_id.key.clone(), size); page.current_index = 1; page.current_length = 1; diff --git a/src/persistence/space/index/table_of_contents.rs b/src/persistence/space/index/table_of_contents.rs index 47c50415..43121838 100644 --- a/src/persistence/space/index/table_of_contents.rs +++ b/src/persistence/space/index/table_of_contents.rs @@ -16,7 +16,7 @@ use rkyv::{rancor, Archive, Deserialize, Serialize}; use tokio::fs::File; #[derive(Debug)] -pub struct IndexTableOfContents { +pub struct IndexTableOfContents { current_page: usize, next_page_id: Arc, pub pages: Vec>>, @@ -24,7 +24,7 @@ pub struct IndexTableOfContents { impl IndexTableOfContents where - T: SizeMeasurable, + T: SizeMeasurable + Ord + Eq, { pub fn new(space_id: SpaceId, next_page_id: Arc) -> Self { let page_id = next_page_id.fetch_add(1, Ordering::Relaxed); @@ -40,10 +40,7 @@ where } } - pub fn get(&self, node_id: &T) -> Option - where - T: Ord + Eq, - { + pub fn get(&self, node_id: &T) -> Option { for page in &self.pages { if page.inner.contains(node_id) { return Some( @@ -63,7 +60,7 @@ where pub fn insert(&mut self, node_id: T, page_id: PageId) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + SizeMeasurable, { let next_page_id = self.next_page_id.clone(); @@ -95,7 +92,7 @@ where pub fn remove(&mut self, node_id: &T) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + SizeMeasurable, { let mut removed = false; let mut i = 0; @@ -117,10 +114,7 @@ where self.pages.iter().flat_map(|v| v.inner.iter()) } - pub fn update_key(&mut self, old_key: &T, new_key: T) - where - T: Ord + Eq, - { + pub fn update_key(&mut self, old_key: &T, new_key: T) { let page = self.get_current_page_mut(); page.inner.update_key(old_key, new_key); } @@ -133,8 +127,6 @@ where pub async fn persist(&mut self, file: &mut File) -> eyre::Result<()> where T: Archive - + Ord - + Eq + Clone + SizeMeasurable + for<'a> Serialize< @@ -157,8 +149,6 @@ where ) -> eyre::Result where T: Archive - + Ord - + Eq + Clone + SizeMeasurable + for<'a> Serialize< diff --git a/src/persistence/space/index/util.rs b/src/persistence/space/index/util.rs index 0b07e92c..c5fcf09f 100644 --- a/src/persistence/space/index/util.rs +++ b/src/persistence/space/index/util.rs @@ -10,7 +10,7 @@ pub fn map_index_pages_to_toc_and_general( Vec>>, ) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + Default + Ord + Eq + SizeMeasurable, { let mut general_index_pages = vec![]; let next_page_id = Arc::new(AtomicU32::new(1)); From 1cf96b24069a9417f108e2e2e8ff83d42aa2aac0 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Wed, 2 Apr 2025 18:31:01 +0300 Subject: [PATCH 03/11] wip --- Cargo.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 499072a6..0d315bf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,11 +26,12 @@ worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } #data_bucket = "0.2.1" -data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } -# data_bucket = { path = "../DataBucket", version = "0.2.1" } +# data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } +data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } -indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } +#indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } +indexset = { path = "../indexset", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" serde = { version = "1.0.215", features = ["derive"] } From 266b54dfa0370a3251197dc059d41b0df4ab2396 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Fri, 4 Apr 2025 18:29:21 +0300 Subject: [PATCH 04/11] MemStat for WT --- Cargo.toml | 7 +- codegen/src/lib.rs | 8 +- codegen/src/{heap_size => mem_stat}/mod.rs | 74 +++--- codegen/src/worktable/generator/index.rs | 49 +++- codegen/src/worktable/generator/row.rs | 2 +- examples/Cargo.toml | 3 + examples/src/main.rs | 29 +-- src/index/table_secondary_index.rs | 7 + src/lib.rs | 6 +- src/mem_stat/mod.rs | 257 ++++++++++++++++++++ src/table/system_info.rs | 260 +++++++-------------- 11 files changed, 480 insertions(+), 222 deletions(-) rename codegen/src/{heap_size => mem_stat}/mod.rs (56%) create mode 100644 src/mem_stat/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 0d315bf1..216d7c92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,12 +26,13 @@ worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } #data_bucket = "0.2.1" -# data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } -data_bucket = { path = "../DataBucket", version = "0.2.1" } +data_bucket = { git = "https://github.com/pathscale/DataBucket", rev = "6c8470d" } +# data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } -#indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } +# indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } indexset = { path = "../indexset", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" serde = { version = "1.0.215", features = ["derive"] } +prettytable-rs = "^0.10" diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index 2211eb8e..18fbc413 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -1,4 +1,4 @@ -mod heap_size; +mod mem_stat; mod name_generator; mod persist_index; mod persist_table; @@ -28,9 +28,9 @@ pub fn persist_table(input: TokenStream) -> TokenStream { .into() } -#[proc_macro_derive(HeapSize)] -pub fn heap_size(input: TokenStream) -> TokenStream { - heap_size::expand(input.into()) +#[proc_macro_derive(MemStat)] +pub fn mem_stat(input: TokenStream) -> TokenStream { + mem_stat::expand(input.into()) .unwrap_or_else(|e| e.to_compile_error()) .into() } diff --git a/codegen/src/heap_size/mod.rs b/codegen/src/mem_stat/mod.rs similarity index 56% rename from codegen/src/heap_size/mod.rs rename to codegen/src/mem_stat/mod.rs index ded7d9d7..f6c5376f 100644 --- a/codegen/src/heap_size/mod.rs +++ b/codegen/src/mem_stat/mod.rs @@ -2,24 +2,39 @@ use proc_macro2::TokenStream; use quote::quote; use syn::{Data, DeriveInput, Fields, Result, Type}; -pub fn expand(input: proc_macro2::TokenStream) -> Result { - let input: DeriveInput = syn::parse2(input)?; - let name = &input.ident; +fn gen_heap_size_body(data: &Data) -> Result { + gen_mem_fn_body( + data, + quote! { heap_size() }, + quote! { std::mem::size_of::() }, + ) +} + +fn gen_used_size_body(data: &Data) -> Result { + gen_mem_fn_body( + data, + quote! { used_size() }, + quote! { std::mem::size_of::() }, + ) +} - let body = match &input.data { +fn gen_mem_fn_body( + data: &Data, + method: TokenStream, + default_for_copy: TokenStream, +) -> Result { + match data { Data::Struct(data_struct) => { let fields = match &data_struct.fields { - Fields::Named(fields_named) => fields_named.named.iter().collect::>(), - Fields::Unnamed(fields_unnamed) => { - fields_unnamed.unnamed.iter().collect::>() - } + Fields::Named(named) => named.named.iter().collect::>(), + Fields::Unnamed(unnamed) => unnamed.unnamed.iter().collect::>(), Fields::Unit => vec![], }; if fields.is_empty() { - quote! { 0 } + Ok(quote! { 0 }) } else if fields.iter().all(|f| is_copy_primitive(&f.ty)) { - quote! { std::mem::size_of::() } + Ok(default_for_copy) } else { let field_sizes = fields.iter().enumerate().map(|(i, f)| { let accessor = match &f.ident { @@ -29,33 +44,40 @@ pub fn expand(input: proc_macro2::TokenStream) -> Result { quote! { self.#index } } }; - quote! { size += #accessor.heap_size(); } + quote! { size += #accessor.#method; } }); - quote! { + Ok(quote! { let mut size = 0; #(#field_sizes)* size - } + }) } } - _ => { - return Err(syn::Error::new_spanned( - name, - "#[derive(HeapSize)] only supports structs", - )); - } - }; + _ => Err(syn::Error::new_spanned( + method, + "#[derive(MemStat)] only supports structs", + )), + } +} + +pub fn expand(input: proc_macro2::TokenStream) -> Result { + let input: DeriveInput = syn::parse2(input)?; + let name = &input.ident; - let t = quote! { - impl HeapSize for #name { + let heap = gen_heap_size_body(&input.data)?; + let used = gen_used_size_body(&input.data)?; + + Ok(quote! { + impl MemStat for #name { fn heap_size(&self) -> usize { - #body + #heap + } + fn used_size(&self) -> usize { + #used } } - }; - println!("{}", t); - Ok(t) + }) } fn is_copy_primitive(ty: &Type) -> bool { diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index 99e10ad2..902ce851 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -25,7 +25,7 @@ impl Generator { /// Generates table's secondary index struct definition. It has fields with index names and types varying on index /// uniqueness. For unique index it's `TreeIndex>>`. - /// Index also derives `PersistIndex` macro. + /// Index also derives `PersistIndex` and `MemStat` macro. fn gen_type_def(&mut self) -> TokenStream { let name_generator = WorktableNameGenerator::from_table_name(self.name.to_string()); let ident = name_generator.get_index_type_ident(); @@ -47,11 +47,11 @@ impl Generator { let derive = if self.is_persist { quote! { - #[derive(Debug, HeapSize, Default, PersistIndex)] + #[derive(Debug, MemStat, Default, PersistIndex)] } } else { quote! { - #[derive(Debug, HeapSize, Default)] + #[derive(Debug, MemStat, Default)] } }; @@ -73,12 +73,14 @@ impl Generator { let save_row_fn = self.gen_save_row_index_fn(); let delete_row_fn = self.gen_delete_row_index_fn(); let process_difference_fn = self.gen_process_difference_index_fn(); + let info_fn = self.gen_index_info_fn(); quote! { impl TableSecondaryIndex<#row_type_ident, #avt_type_ident> for #index_type_ident { #save_row_fn #delete_row_fn #process_difference_fn + #info_fn } } } @@ -357,6 +359,47 @@ impl Generator { } } } + + fn gen_index_info_fn(&self) -> TokenStream { + let rows = self.columns.indexes.iter().map(|(_, idx)| { + let index_field_name = &idx.name; + let index_name_str = index_field_name.to_string(); + + if idx.is_unique { + quote! { + + info.push(IndexInfo { + name: #index_name_str.to_string(), + index_type: IndexKind::Unique, + key_count: self.#index_field_name.len(), + capacity: self.#index_field_name.capacity(), + heap_size: self.#index_field_name.heap_size(), + used_size: self.#index_field_name.used_size(), + + }); + } + } else { + quote! { + info.push(IndexInfo { + name: #index_name_str.to_string(), + index_type: IndexKind::NonUnique, + key_count: self.#index_field_name.len(), + capacity: self.#index_field_name.capacity(), + heap_size: self.#index_field_name.heap_size(), + used_size: self.#index_field_name.used_size(), + }); + } + } + }); + + quote! { + fn index_info(&self) -> Vec { + let mut info = Vec::new(); + #(#rows)* + info + } + } + } } // TODO: tests... diff --git a/codegen/src/worktable/generator/row.rs b/codegen/src/worktable/generator/row.rs index 163a0802..db3b779c 100644 --- a/codegen/src/worktable/generator/row.rs +++ b/codegen/src/worktable/generator/row.rs @@ -96,7 +96,7 @@ impl Generator { .collect(); quote! { - #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, HeapSize)] + #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, MemStat)] #[rkyv(derive(Debug))] #[repr(C)] pub struct #ident { diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 22e2b318..58178f1f 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -14,3 +14,6 @@ futures = "0.3.30" async-std = "1.10" either = "1.15.0" ordered-float = "5.0.0" +indexset = {path = "../../indexset"} +tokio = { version = "1", features = ["full"] } + diff --git a/examples/src/main.rs b/examples/src/main.rs index 944d1d17..f46d5d3a 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -2,10 +2,12 @@ use futures::executor::block_on; use worktable::prelude::*; use worktable::worktable; -fn main() { +#[tokio::main] +async fn main() { // describe WorkTable worktable!( name: My, + persist: true, columns: { id: u64 primary_key autoincrement, val: i64, @@ -18,7 +20,7 @@ fn main() { }, indexes: { idx1: attr, - idx2: attr2, + idx2: attr2 unique, idx3: attr_string, }, queries: { @@ -35,7 +37,8 @@ fn main() { ); // Init Worktable - let my_table = MyWorkTable::default(); + let config = PersistenceConfig::new("data", "data"); + let my_table = MyWorkTable::new(config).await.unwrap(); // WT rows (has prefix My because of table name) let row = MyRow { @@ -48,7 +51,7 @@ fn main() { attr_string: "String_attr0".to_string(), }; - for i in 2..100000 as i64 { + for i in 2..1000000 as i64 { let row = MyRow { val: 777, attr: format!("Attribute{}", i), @@ -66,18 +69,18 @@ fn main() { let pk: MyPrimaryKey = my_table.insert(row).expect("primary key"); // Select ALL records from WT - let select_all = my_table.select_all().execute(); + let _select_all = my_table.select_all().execute(); //println!("Select All {:?}", select_all); // Select All records with attribute TEST - let select_all = my_table.select_all().execute(); + let _select_all = my_table.select_all().execute(); //println!("Select All {:?}", select_all); // Select by Idx - let select_by_attr = my_table - .select_by_attr("Attribute1".to_string()) - .execute() - .unwrap(); + //let _select_by_attr = my_table + // .select_by_attr("Attribute1".to_string()) + // .execute() + //r .unwrap(); //for row in select_by_attr { // println!("Select by idx, row {:?}", row); @@ -87,16 +90,16 @@ fn main() { let update = my_table.update_val_by_id(ValByIdQuery { val: 1337 }, pk.clone()); let _ = block_on(update); - let select_all = my_table.select_all().execute(); + let _select_all = my_table.select_all().execute(); //println!("Select after update val {:?}", select_all); let delete = my_table.delete(pk); let _ = block_on(delete); - let select_all = my_table.select_all().execute(); + let _select_all = my_table.select_all().execute(); //println!("Select after delete {:?}", select_all); let info = my_table.system_info(); - println!("{}", info.pretty()); + println!("{info}"); } diff --git a/src/index/table_secondary_index.rs b/src/index/table_secondary_index.rs index 5f5a4571..c197f652 100644 --- a/src/index/table_secondary_index.rs +++ b/src/index/table_secondary_index.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use data_bucket::Link; +use crate::system_info::IndexInfo; use crate::Difference; use crate::WorkTableError; @@ -15,6 +16,8 @@ pub trait TableSecondaryIndex { link: Link, differences: HashMap<&str, Difference>, ) -> Result<(), WorkTableError>; + + fn index_info(&self) -> Vec; } pub trait TableSecondaryIndexCdc { @@ -46,4 +49,8 @@ where ) -> Result<(), WorkTableError> { Ok(()) } + + fn index_info(&self) -> Vec { + vec![] + } } diff --git a/src/lib.rs b/src/lib.rs index 523cbf21..75f7ed16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ mod primary_key; mod row; mod table; pub use data_bucket; +mod mem_stat; mod persistence; mod util; @@ -22,6 +23,7 @@ pub use worktable_codegen::worktable; pub mod prelude { pub use crate::in_memory::{Data, DataPages, RowWrapper, StorableRow}; pub use crate::lock::LockMap; + pub use crate::mem_stat::MemStat; pub use crate::persistence::{ map_index_pages_to_toc_and_general, DeleteOperation, IndexTableOfContents, InsertOperation, Operation, PersistenceConfig, PersistenceEngine, PersistenceEngineOps, PersistenceTask, @@ -30,7 +32,7 @@ pub mod prelude { }; pub use crate::primary_key::{PrimaryKeyGenerator, PrimaryKeyGeneratorState, TablePrimaryKey}; pub use crate::table::select::{Order, QueryParams, SelectQueryBuilder, SelectQueryExecutor}; - pub use crate::table::system_info::{HeapSize, SystemInfo}; + pub use crate::table::system_info::{IndexInfo, IndexKind, SystemInfo}; pub use crate::util::{OrderedF32Def, OrderedF64Def}; pub use crate::{ lock::Lock, Difference, IndexMap, IndexMultiMap, TableIndex, TableIndexCdc, TableRow, @@ -46,7 +48,7 @@ pub mod prelude { pub use derive_more::{From, Into}; pub use lockfree::set::Set as LockFreeSet; - pub use worktable_codegen::{HeapSize, PersistIndex, PersistTable}; + pub use worktable_codegen::{MemStat, PersistIndex, PersistTable}; pub const WT_INDEX_EXTENSION: &str = ".wt.idx"; pub const WT_DATA_EXTENSION: &str = ".wt.data"; diff --git a/src/mem_stat/mod.rs b/src/mem_stat/mod.rs new file mode 100644 index 00000000..4edc6253 --- /dev/null +++ b/src/mem_stat/mod.rs @@ -0,0 +1,257 @@ +use crate::IndexMap; +use crate::IndexMultiMap; +use data_bucket::Link; +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::Arc; + +pub trait MemStat { + fn heap_size(&self) -> usize; + fn used_size(&self) -> usize; +} + +impl MemStat for Option { + fn heap_size(&self) -> usize { + self.as_ref().map_or(0, |v| v.heap_size()) + } + fn used_size(&self) -> usize { + self.as_ref().map_or(0, |v| v.used_size()) + } +} + +impl MemStat for Vec { + fn heap_size(&self) -> usize { + self.capacity() * std::mem::size_of::() + + self.iter().map(|v| v.heap_size()).sum::() + } + fn used_size(&self) -> usize { + self.len() * std::mem::size_of::() + self.iter().map(|v| v.used_size()).sum::() + } +} + +impl MemStat for String { + fn heap_size(&self) -> usize { + self.capacity() + } + fn used_size(&self) -> usize { + self.len() + } +} + +impl MemStat for IndexMap +where + K: Ord + Clone + 'static + MemStat + Send, + V: Clone + 'static + MemStat + Send, +{ + fn heap_size(&self) -> usize { + let slot_size = std::mem::size_of::>(); + let base_heap = self.capacity() * slot_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + + fn used_size(&self) -> usize { + let pair_size = std::mem::size_of::>(); + let base = self.len() * pair_size; + + let used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base + used + } +} + +impl MemStat for IndexMultiMap +where + K: Ord + Clone + 'static + MemStat + Send, + V: Ord + Clone + 'static + MemStat + Send, +{ + fn heap_size(&self) -> usize { + let slot_size = std::mem::size_of::>(); + let base_heap = self.capacity() * slot_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + + fn used_size(&self) -> usize { + let pair_size = std::mem::size_of::>(); + let base = self.len() * pair_size; + + let used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base + used + } +} + +impl MemStat for Box { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for Arc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for Rc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for HashMap { + fn heap_size(&self) -> usize { + let bucket_size = size_of::<(K, V)>(); + let base_heap = self.capacity() * bucket_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + fn used_size(&self) -> usize { + let bucket_size = size_of::<(K, V)>(); + let base_used = self.len() * bucket_size; + + let kv_used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base_used + kv_used + } +} + +impl MemStat for u8 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for u16 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for u32 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for i32 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for u64 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for i64 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for f64 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for f32 { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for usize { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for isize { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for bool { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} +impl MemStat for char { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} + +impl MemStat for Link { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } +} diff --git a/src/table/system_info.rs b/src/table/system_info.rs index 1853d7c8..17a9fc85 100644 --- a/src/table/system_info.rs +++ b/src/table/system_info.rs @@ -1,10 +1,10 @@ -use data_bucket::Link; -use std::collections::HashMap; -use std::rc::Rc; -use std::sync::Arc; +use std::fmt::{self, Display, Formatter}; + +use prettytable::{format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR, row, Table}; use crate::in_memory::{RowWrapper, StorableRow}; -use crate::{IndexMap, IndexMultiMap, WorkTable}; +use crate::mem_stat::MemStat; +use crate::{TableSecondaryIndex, WorkTable}; #[derive(Debug)] pub struct SystemInfo { @@ -14,158 +14,31 @@ pub struct SystemInfo { pub empty_slots: u64, pub memory_usage_bytes: u64, pub idx_size: usize, + pub indexes_info: Vec, } -pub trait HeapSize { - fn heap_size(&self) -> usize; -} - -impl HeapSize for Option { - fn heap_size(&self) -> usize { - self.as_ref().map_or(0, |v| v.heap_size()) - } -} - -impl HeapSize for Vec { - fn heap_size(&self) -> usize { - self.capacity() * std::mem::size_of::() - + self.iter().map(|v| v.heap_size()).sum::() - } -} - -impl HeapSize for String { - fn heap_size(&self) -> usize { - self.capacity() - } -} - -impl HeapSize for IndexMap -where - T: Ord + Clone + 'static + HeapSize + std::marker::Send, - V: Ord + Clone + 'static + HeapSize + std::marker::Send, -{ - fn heap_size(&self) -> usize { - let mut size = std::mem::size_of_val(self); - - for (k, v) in self.iter() { - size += k.heap_size(); - size += v.heap_size(); - } - - size - } -} - -impl HeapSize for IndexMultiMap -where - T: Ord + Clone + 'static + HeapSize + std::marker::Send, - V: Ord + Clone + 'static + HeapSize + std::marker::Send, -{ - fn heap_size(&self) -> usize { - let mut size = std::mem::size_of_val(self); - - for (k, v) in self.iter() { - size += k.heap_size(); - size += v.heap_size(); - } - - size - } -} - -impl HeapSize for Link { - fn heap_size(&self) -> usize { - std::mem::size_of_val(self) - } -} - -impl HeapSize for Box { - fn heap_size(&self) -> usize { - std::mem::size_of::() + (**self).heap_size() - } -} - -impl HeapSize for Arc { - fn heap_size(&self) -> usize { - std::mem::size_of::() + (**self).heap_size() - } +#[derive(Debug)] +pub struct IndexInfo { + pub name: String, + pub index_type: IndexKind, + pub key_count: usize, + pub capacity: usize, + pub heap_size: usize, + pub used_size: usize, } -impl HeapSize for Rc { - fn heap_size(&self) -> usize { - std::mem::size_of::() + (**self).heap_size() - } +#[derive(Debug)] +pub enum IndexKind { + Unique, + NonUnique, } -impl HeapSize for HashMap { - fn heap_size(&self) -> usize { - let mut size = 0; - for (k, v) in self.iter() { - size += k.heap_size(); - size += v.heap_size(); +impl Display for IndexKind { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::Unique => write!(f, "unique"), + Self::NonUnique => write!(f, "non unique"), } - size - } -} - -impl HeapSize for u8 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for u16 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for u32 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for i32 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for u64 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for i64 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for f64 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for f32 { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for usize { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for isize { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for bool { - fn heap_size(&self) -> usize { - 0 - } -} -impl HeapSize for char { - fn heap_size(&self) -> usize { - 0 } } @@ -173,7 +46,7 @@ impl< Row, PrimaryKey, AvailableTypes, - SecondaryIndexes: HeapSize, + SecondaryIndexes: MemStat + TableSecondaryIndex, LockType, PkGen, const DATA_LENGTH: usize, @@ -205,28 +78,75 @@ where empty_slots: empty_links as u64, memory_usage_bytes, idx_size, + indexes_info: self.indexes.index_info(), } } } -impl SystemInfo { - pub fn pretty(&self) -> String { - let mem_kb = self.memory_usage_bytes as f64 / 1024.0; - let idx_kb = self.idx_size as f64 / 1024.0; - - format!( - "\ -|| Table: {}\n\ -|| Rows: {} ({} pages, {} empty slots)\n\ -|| Memory: {:.2} KB (data) + {:.2} KB (indexes)\n\ -|| Total: {:.2} KB", - self.table_name, - self.row_count, - self.page_count, - self.empty_slots, - mem_kb, - idx_kb, - mem_kb + idx_kb, - ) +impl Display for SystemInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mem_fmt = fmt_bytes(self.memory_usage_bytes as usize); + let idx_fmt = fmt_bytes(self.idx_size); + let total_fmt = fmt_bytes(self.memory_usage_bytes as usize + self.idx_size); + + writeln!(f, "┌──────────────────────────────┐")?; + writeln!(f, " \t Table Name: {:<5}", self.table_name)?; + writeln!(f, "└──────────────────────────────┘")?; + writeln!( + f, + "Rows: {} Pages: {} Empty slots: {}", + self.row_count, self.page_count, self.empty_slots + )?; + writeln!( + f, + "Allocated Memory: {} (data) + {} (indexes) = {} total\n", + mem_fmt, idx_fmt, total_fmt + )?; + + let mut table = Table::new(); + table.set_format(*FORMAT_NO_BORDER_LINE_SEPARATOR); + table.add_row(row!["Index", "Type", "Keys", "Capacity", "Heap", "Used"]); + + for idx in &self.indexes_info { + table.add_row(row![ + idx.name, + idx.index_type.to_string(), + idx.key_count, + idx.capacity, + fmt_bytes(idx.heap_size), + fmt_bytes(idx.used_size), + ]); + } + + let mut buffer = Vec::new(); + table.print(&mut buffer).unwrap(); + let table_str = String::from_utf8(buffer).unwrap(); + writeln!(f, "{}", table_str.trim_end())?; + + Ok(()) + } +} + +fn fmt_bytes(bytes: usize) -> String { + const KB: f64 = 1024.0; + const MB: f64 = 1024.0 * KB; + const GB: f64 = 1024.0 * MB; + + let b = bytes as f64; + + let (value, unit) = if b >= GB { + (b / GB, "GB") + } else if b >= MB { + (b / MB, "MB") + } else if b >= KB { + (b / KB, "KB") + } else { + return format!("{} B", bytes); + }; + + if (value.fract() * 100.0).round() == 0.0 { + format!("{:.0} {}", value, unit) + } else { + format!("{:.2} {}", value, unit) } } From a17896dee2a424bd94bf0dfa19d920b8956a7f28 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Fri, 4 Apr 2025 18:38:20 +0300 Subject: [PATCH 05/11] fix comment --- examples/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main.rs b/examples/src/main.rs index f46d5d3a..12fcd92e 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -62,7 +62,7 @@ async fn main() { attr_string: format!("String_attr{}", i), }; - my_table.insert(row).unwrap(); // или ? если ты внутри Result + my_table.insert(row).unwrap(); } // insert From e9daa250a25538ee09a40675cc046a265c3c6d44 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Fri, 4 Apr 2025 20:38:36 +0300 Subject: [PATCH 06/11] Add node_count --- codegen/src/worktable/generator/index.rs | 3 +++ src/table/system_info.rs | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index 902ce851..0165a9b1 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -375,6 +375,8 @@ impl Generator { capacity: self.#index_field_name.capacity(), heap_size: self.#index_field_name.heap_size(), used_size: self.#index_field_name.used_size(), + node_count: self.#index_field_name.node_count(), + }); } @@ -387,6 +389,7 @@ impl Generator { capacity: self.#index_field_name.capacity(), heap_size: self.#index_field_name.heap_size(), used_size: self.#index_field_name.used_size(), + node_count: self.#index_field_name.node_count(), }); } } diff --git a/src/table/system_info.rs b/src/table/system_info.rs index 17a9fc85..618fc868 100644 --- a/src/table/system_info.rs +++ b/src/table/system_info.rs @@ -25,6 +25,7 @@ pub struct IndexInfo { pub capacity: usize, pub heap_size: usize, pub used_size: usize, + pub node_count: usize, } #[derive(Debug)] @@ -105,7 +106,15 @@ impl Display for SystemInfo { let mut table = Table::new(); table.set_format(*FORMAT_NO_BORDER_LINE_SEPARATOR); - table.add_row(row!["Index", "Type", "Keys", "Capacity", "Heap", "Used"]); + table.add_row(row![ + "Index", + "Type", + "Keys", + "Capacity", + "Node Count", + "Heap", + "Used" + ]); for idx in &self.indexes_info { table.add_row(row![ @@ -113,6 +122,7 @@ impl Display for SystemInfo { idx.index_type.to_string(), idx.key_count, idx.capacity, + idx.node_count, fmt_bytes(idx.heap_size), fmt_bytes(idx.used_size), ]); From c588ab50dcf1ac558ea2e47a3f0afdbecc97243c Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sat, 5 Apr 2025 13:50:08 +0300 Subject: [PATCH 07/11] corrections --- Cargo.toml | 4 +- src/mem_stat/mod.rs | 127 +++++++++-------------------------- tests/worktable/custom_pk.rs | 1 + tests/worktable/with_enum.rs | 2 +- 4 files changed, 37 insertions(+), 97 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 216d7c92..0dd162bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,8 +30,8 @@ data_bucket = { git = "https://github.com/pathscale/DataBucket", rev = "6c8470d" # data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } -# indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } -indexset = { path = "../indexset", features = ["concurrent", "cdc", "multimap"] } +indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } +# indexset = { path = "../indexset", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" serde = { version = "1.0.215", features = ["derive"] } diff --git a/src/mem_stat/mod.rs b/src/mem_stat/mod.rs index 4edc6253..86b8a36b 100644 --- a/src/mem_stat/mod.rs +++ b/src/mem_stat/mod.rs @@ -1,10 +1,14 @@ -use crate::IndexMap; -use crate::IndexMultiMap; -use data_bucket::Link; use std::collections::HashMap; use std::rc::Rc; use std::sync::Arc; +use data_bucket::Link; +use ordered_float::OrderedFloat; +use uuid::Uuid; + +use crate::IndexMap; +use crate::IndexMultiMap; + pub trait MemStat { fn heap_size(&self) -> usize; fn used_size(&self) -> usize; @@ -150,108 +154,43 @@ impl MemStat for HashMap { } } -impl MemStat for u8 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for u16 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for u32 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for i32 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for u64 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for i64 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for f64 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for f32 { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for usize { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for isize { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } -} -impl MemStat for bool { +impl MemStat for OrderedFloat +where + T: MemStat, +{ fn heap_size(&self) -> usize { - 0 + self.0.heap_size() } + fn used_size(&self) -> usize { - 0 + self.0.used_size() } } -impl MemStat for char { + +impl MemStat for [u8] { fn heap_size(&self) -> usize { 0 } + fn used_size(&self) -> usize { 0 } } -impl MemStat for Link { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } +macro_rules! zero_mem_stat_impl { + ($($t:ident),+) => { + $( + impl MemStat for $t { + fn heap_size(&self) -> usize { + 0 + } + fn used_size(&self) -> usize { + 0 + } + } + )+ + }; } + +zero_mem_stat_impl!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize, f32, f64, bool); +zero_mem_stat_impl!(Link, Uuid); diff --git a/tests/worktable/custom_pk.rs b/tests/worktable/custom_pk.rs index 0cf95274..66fa5fe4 100644 --- a/tests/worktable/custom_pk.rs +++ b/tests/worktable/custom_pk.rs @@ -18,6 +18,7 @@ use worktable::worktable; Ord, Serialize, SizeMeasure, + MemStat, )] #[rkyv(compare(PartialEq), derive(Debug, PartialOrd, PartialEq, Eq, Ord))] struct CustomId(u64); diff --git a/tests/worktable/with_enum.rs b/tests/worktable/with_enum.rs index 94bdcf21..95c4db0f 100644 --- a/tests/worktable/with_enum.rs +++ b/tests/worktable/with_enum.rs @@ -2,7 +2,7 @@ use rkyv::{Archive, Deserialize, Serialize}; use worktable::prelude::*; use worktable::worktable; -#[derive(Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[derive(Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd, MemStat)] #[rkyv(compare(PartialEq), derive(Debug))] pub enum SomeEnum { First, From 6f1ac2b1a7faf49a913a7d2abf0a28e784eb83cf Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Sat, 5 Apr 2025 20:12:12 +0300 Subject: [PATCH 08/11] Add more primitives, fix for enums --- Cargo.toml | 6 ++--- codegen/src/mem_stat/mod.rs | 54 ++++++++++++++++++++++++++++++++++++- examples/Cargo.toml | 2 +- src/mem_stat/primitives.rs | 46 +++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 src/mem_stat/primitives.rs diff --git a/Cargo.toml b/Cargo.toml index 0dd162bb..372ff8a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,13 +25,11 @@ lockfree = { version = "0.5.1" } worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } -#data_bucket = "0.2.1" -data_bucket = { git = "https://github.com/pathscale/DataBucket", rev = "6c8470d" } -# data_bucket = { path = "../DataBucket", version = "0.2.1" } +data_bucket = { git = "https://github.com/pathscale/DataBucket", rev = "3eb4fc2" } +#data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } -# indexset = { path = "../indexset", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" serde = { version = "1.0.215", features = ["derive"] } diff --git a/codegen/src/mem_stat/mod.rs b/codegen/src/mem_stat/mod.rs index f6c5376f..267d437f 100644 --- a/codegen/src/mem_stat/mod.rs +++ b/codegen/src/mem_stat/mod.rs @@ -54,9 +54,61 @@ fn gen_mem_fn_body( }) } } + + Data::Enum(enum_data) => { + let arms = enum_data.variants.iter().map(|variant| { + let name = &variant.ident; + match &variant.fields { + Fields::Unit => { + quote! { + Self::#name => 0, + } + } + Fields::Unnamed(fields) => { + let bindings: Vec<_> = (0..fields.unnamed.len()) + .map(|i| syn::Ident::new(&format!("f{}", i), variant.ident.span())) + .collect(); + + let calls = bindings + .iter() + .map(|b| quote! { #b.#method }) + .collect::>(); + quote! { + Self::#name(#(#bindings),*) => { + 0 #(+ #calls)* + }, + } + } + Fields::Named(fields) => { + let bindings: Vec<_> = fields + .named + .iter() + .map(|f| f.ident.as_ref().unwrap()) + .collect(); + + let calls = bindings + .iter() + .map(|b| quote! { #b.#method }) + .collect::>(); + quote! { + Self::#name { #(#bindings),* } => { + 0 #(+ #calls)* + }, + } + } + } + }); + + Ok(quote! { + match self { + #(#arms)* + } + }) + } + _ => Err(syn::Error::new_spanned( method, - "#[derive(MemStat)] only supports structs", + "#[derive(MemStat)] only supports structs and enums", )), } } diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 58178f1f..2ec33f39 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -14,6 +14,6 @@ futures = "0.3.30" async-std = "1.10" either = "1.15.0" ordered-float = "5.0.0" -indexset = {path = "../../indexset"} +indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } tokio = { version = "1", features = ["full"] } diff --git a/src/mem_stat/primitives.rs b/src/mem_stat/primitives.rs new file mode 100644 index 00000000..68b9f975 --- /dev/null +++ b/src/mem_stat/primitives.rs @@ -0,0 +1,46 @@ +use super::MemStat; + +macro_rules! impl_memstat_zero { + ($($t:ty),*) => { + $( + impl MemStat for $t { + fn heap_size(&self) -> usize { 0 } + fn used_size(&self) -> usize { 0 } + } + )* + }; +} + +impl_memstat_zero!( + u8, + i8, + u16, + i16, + u32, + i32, + u64, + i64, + usize, + isize, + f32, + f64, + bool, + char, + u128, + i128, + std::num::NonZeroU8, + std::num::NonZeroU16, + std::num::NonZeroU32, + std::num::NonZeroU64, + std::num::NonZeroU128, + std::num::NonZeroUsize, + std::num::NonZeroI8, + std::num::NonZeroI16, + std::num::NonZeroI32, + std::num::NonZeroI64, + std::num::NonZeroI128, + std::num::NonZeroIsize, + std::time::Duration, + std::time::SystemTime, + std::time::Instant +); From 1a559036d6a5f260dcd19729ae6e7a17d3ac9811 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 6 Apr 2025 12:04:35 +0300 Subject: [PATCH 09/11] clippy --- codegen/src/worktable/generator/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index 0165a9b1..54e9bdf2 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -361,7 +361,7 @@ impl Generator { } fn gen_index_info_fn(&self) -> TokenStream { - let rows = self.columns.indexes.iter().map(|(_, idx)| { + let rows = self.columns.indexes.values().map(|idx| { let index_field_name = &idx.name; let index_name_str = index_field_name.to_string(); From 31ca7acb623ce1e5ace955c79dc4f5d684999aea Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 6 Apr 2025 12:06:16 +0300 Subject: [PATCH 10/11] clippy --- examples/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main.rs b/examples/src/main.rs index 12fcd92e..63b95647 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -51,7 +51,7 @@ async fn main() { attr_string: "String_attr0".to_string(), }; - for i in 2..1000000 as i64 { + for i in 2..1000000_i64 { let row = MyRow { val: 777, attr: format!("Attribute{}", i), From 3ff4c15cb6c6cf6957da0234178d23bd051e0585 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Sun, 6 Apr 2025 12:13:37 +0300 Subject: [PATCH 11/11] fix primitives module --- src/mem_stat/mod.rs | 32 ++++---------------------------- src/mem_stat/primitives.rs | 1 + 2 files changed, 5 insertions(+), 28 deletions(-) diff --git a/src/mem_stat/mod.rs b/src/mem_stat/mod.rs index 86b8a36b..3bbe0542 100644 --- a/src/mem_stat/mod.rs +++ b/src/mem_stat/mod.rs @@ -1,3 +1,5 @@ +mod primitives; + use std::collections::HashMap; use std::rc::Rc; use std::sync::Arc; @@ -6,8 +8,8 @@ use data_bucket::Link; use ordered_float::OrderedFloat; use uuid::Uuid; -use crate::IndexMap; use crate::IndexMultiMap; +use crate::{impl_memstat_zero, IndexMap}; pub trait MemStat { fn heap_size(&self) -> usize; @@ -167,30 +169,4 @@ where } } -impl MemStat for [u8] { - fn heap_size(&self) -> usize { - 0 - } - - fn used_size(&self) -> usize { - 0 - } -} - -macro_rules! zero_mem_stat_impl { - ($($t:ident),+) => { - $( - impl MemStat for $t { - fn heap_size(&self) -> usize { - 0 - } - fn used_size(&self) -> usize { - 0 - } - } - )+ - }; -} - -zero_mem_stat_impl!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize, f32, f64, bool); -zero_mem_stat_impl!(Link, Uuid); +impl_memstat_zero!(Link, Uuid, [u8]); diff --git a/src/mem_stat/primitives.rs b/src/mem_stat/primitives.rs index 68b9f975..6b7aaf6f 100644 --- a/src/mem_stat/primitives.rs +++ b/src/mem_stat/primitives.rs @@ -1,5 +1,6 @@ use super::MemStat; +#[macro_export] macro_rules! impl_memstat_zero { ($($t:ty),*) => { $(