diff --git a/Cargo.toml b/Cargo.toml index 499072a6..372ff8a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,12 +25,12 @@ lockfree = { version = "0.5.1" } worktable_codegen = { path = "codegen", version = "0.5.1" } futures = "0.3.30" uuid = { version = "1.10.0", features = ["v4"] } -#data_bucket = "0.2.1" -data_bucket = { git = "https://github.com/pathscale/DataBucket", branch = "main" } -# data_bucket = { path = "../DataBucket", version = "0.2.1" } +data_bucket = { git = "https://github.com/pathscale/DataBucket", rev = "3eb4fc2" } +#data_bucket = { path = "../DataBucket", version = "0.2.1" } performance_measurement_codegen = { path = "performance_measurement/codegen", version = "0.1.0", optional = true } performance_measurement = { path = "performance_measurement", version = "0.1.0", optional = true } -indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } +indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } convert_case = "0.6.0" ordered-float = "5.0.0" serde = { version = "1.0.215", features = ["derive"] } +prettytable-rs = "^0.10" diff --git a/codegen/src/lib.rs b/codegen/src/lib.rs index c9725b65..18fbc413 100644 --- a/codegen/src/lib.rs +++ b/codegen/src/lib.rs @@ -1,3 +1,4 @@ +mod mem_stat; mod name_generator; mod persist_index; mod persist_table; @@ -26,3 +27,10 @@ pub fn persist_table(input: TokenStream) -> TokenStream { .unwrap_or_else(|e| e.to_compile_error()) .into() } + +#[proc_macro_derive(MemStat)] +pub fn mem_stat(input: TokenStream) -> TokenStream { + mem_stat::expand(input.into()) + .unwrap_or_else(|e| e.to_compile_error()) + .into() +} diff --git a/codegen/src/mem_stat/mod.rs b/codegen/src/mem_stat/mod.rs new file mode 100644 index 00000000..267d437f --- /dev/null +++ b/codegen/src/mem_stat/mod.rs @@ -0,0 +1,148 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::{Data, DeriveInput, Fields, Result, Type}; + +fn gen_heap_size_body(data: &Data) -> Result { + gen_mem_fn_body( + data, + quote! { heap_size() }, + quote! { std::mem::size_of::() }, + ) +} + +fn gen_used_size_body(data: &Data) -> Result { + gen_mem_fn_body( + data, + quote! { used_size() }, + quote! { std::mem::size_of::() }, + ) +} + +fn gen_mem_fn_body( + data: &Data, + method: TokenStream, + default_for_copy: TokenStream, +) -> Result { + match data { + Data::Struct(data_struct) => { + let fields = match &data_struct.fields { + Fields::Named(named) => named.named.iter().collect::>(), + Fields::Unnamed(unnamed) => unnamed.unnamed.iter().collect::>(), + Fields::Unit => vec![], + }; + + if fields.is_empty() { + Ok(quote! { 0 }) + } else if fields.iter().all(|f| is_copy_primitive(&f.ty)) { + Ok(default_for_copy) + } else { + let field_sizes = fields.iter().enumerate().map(|(i, f)| { + let accessor = match &f.ident { + Some(ident) => quote! { self.#ident }, + None => { + let index = syn::Index::from(i); + quote! { self.#index } + } + }; + quote! { size += #accessor.#method; } + }); + + Ok(quote! { + let mut size = 0; + #(#field_sizes)* + size + }) + } + } + + Data::Enum(enum_data) => { + let arms = enum_data.variants.iter().map(|variant| { + let name = &variant.ident; + match &variant.fields { + Fields::Unit => { + quote! { + Self::#name => 0, + } + } + Fields::Unnamed(fields) => { + let bindings: Vec<_> = (0..fields.unnamed.len()) + .map(|i| syn::Ident::new(&format!("f{}", i), variant.ident.span())) + .collect(); + + let calls = bindings + .iter() + .map(|b| quote! { #b.#method }) + .collect::>(); + quote! { + Self::#name(#(#bindings),*) => { + 0 #(+ #calls)* + }, + } + } + Fields::Named(fields) => { + let bindings: Vec<_> = fields + .named + .iter() + .map(|f| f.ident.as_ref().unwrap()) + .collect(); + + let calls = bindings + .iter() + .map(|b| quote! { #b.#method }) + .collect::>(); + quote! { + Self::#name { #(#bindings),* } => { + 0 #(+ #calls)* + }, + } + } + } + }); + + Ok(quote! { + match self { + #(#arms)* + } + }) + } + + _ => Err(syn::Error::new_spanned( + method, + "#[derive(MemStat)] only supports structs and enums", + )), + } +} + +pub fn expand(input: proc_macro2::TokenStream) -> Result { + let input: DeriveInput = syn::parse2(input)?; + let name = &input.ident; + + let heap = gen_heap_size_body(&input.data)?; + let used = gen_used_size_body(&input.data)?; + + Ok(quote! { + impl MemStat for #name { + fn heap_size(&self) -> usize { + #heap + } + fn used_size(&self) -> usize { + #used + } + } + }) +} + +fn is_copy_primitive(ty: &Type) -> bool { + matches!( + ty, + Type::Path(type_path) + if type_path.qself.is_none() && + type_path.path.segments.len() == 1 && + matches!( + type_path.path.segments[0].ident.to_string().as_str(), + "u8" | "u16" | "u32" | "u64" | "usize" | + "i8" | "i16" | "i32" | "i64" | "isize" | + "bool" | "char" | "f64" | "f32" + ) + ) +} diff --git a/codegen/src/worktable/generator/index.rs b/codegen/src/worktable/generator/index.rs index 30e7d394..54e9bdf2 100644 --- a/codegen/src/worktable/generator/index.rs +++ b/codegen/src/worktable/generator/index.rs @@ -25,7 +25,7 @@ impl Generator { /// Generates table's secondary index struct definition. It has fields with index names and types varying on index /// uniqueness. For unique index it's `TreeIndex>>`. - /// Index also derives `PersistIndex` macro. + /// Index also derives `PersistIndex` and `MemStat` macro. fn gen_type_def(&mut self) -> TokenStream { let name_generator = WorktableNameGenerator::from_table_name(self.name.to_string()); let ident = name_generator.get_index_type_ident(); @@ -47,11 +47,11 @@ impl Generator { let derive = if self.is_persist { quote! { - #[derive(Debug, Default, PersistIndex)] + #[derive(Debug, MemStat, Default, PersistIndex)] } } else { quote! { - #[derive(Debug, Default)] + #[derive(Debug, MemStat, Default)] } }; @@ -73,12 +73,14 @@ impl Generator { let save_row_fn = self.gen_save_row_index_fn(); let delete_row_fn = self.gen_delete_row_index_fn(); let process_difference_fn = self.gen_process_difference_index_fn(); + let info_fn = self.gen_index_info_fn(); quote! { impl TableSecondaryIndex<#row_type_ident, #avt_type_ident> for #index_type_ident { #save_row_fn #delete_row_fn #process_difference_fn + #info_fn } } } @@ -357,6 +359,50 @@ impl Generator { } } } + + fn gen_index_info_fn(&self) -> TokenStream { + let rows = self.columns.indexes.values().map(|idx| { + let index_field_name = &idx.name; + let index_name_str = index_field_name.to_string(); + + if idx.is_unique { + quote! { + + info.push(IndexInfo { + name: #index_name_str.to_string(), + index_type: IndexKind::Unique, + key_count: self.#index_field_name.len(), + capacity: self.#index_field_name.capacity(), + heap_size: self.#index_field_name.heap_size(), + used_size: self.#index_field_name.used_size(), + node_count: self.#index_field_name.node_count(), + + + }); + } + } else { + quote! { + info.push(IndexInfo { + name: #index_name_str.to_string(), + index_type: IndexKind::NonUnique, + key_count: self.#index_field_name.len(), + capacity: self.#index_field_name.capacity(), + heap_size: self.#index_field_name.heap_size(), + used_size: self.#index_field_name.used_size(), + node_count: self.#index_field_name.node_count(), + }); + } + } + }); + + quote! { + fn index_info(&self) -> Vec { + let mut info = Vec::new(); + #(#rows)* + info + } + } + } } // TODO: tests... diff --git a/codegen/src/worktable/generator/row.rs b/codegen/src/worktable/generator/row.rs index 5be4d8db..db3b779c 100644 --- a/codegen/src/worktable/generator/row.rs +++ b/codegen/src/worktable/generator/row.rs @@ -96,7 +96,7 @@ impl Generator { .collect(); quote! { - #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq)] + #[derive(rkyv::Archive, Debug, rkyv::Deserialize, Clone, rkyv::Serialize, PartialEq, MemStat)] #[rkyv(derive(Debug))] #[repr(C)] pub struct #ident { diff --git a/codegen/src/worktable/generator/table/impls.rs b/codegen/src/worktable/generator/table/impls.rs index 33178593..29c20d19 100644 --- a/codegen/src/worktable/generator/table/impls.rs +++ b/codegen/src/worktable/generator/table/impls.rs @@ -19,6 +19,7 @@ impl Generator { let iter_with_fn = self.gen_table_iter_with_fn(); let iter_with_async_fn = self.gen_table_iter_with_async_fn(); let count_fn = self.gen_table_count_fn(); + let system_info_fn = self.gen_system_info_fn(); quote! { impl #ident { @@ -31,6 +32,7 @@ impl Generator { #get_next_fn #iter_with_fn #iter_with_async_fn + #system_info_fn } } } @@ -222,4 +224,12 @@ impl Generator { } } } + + fn gen_system_info_fn(&self) -> TokenStream { + quote! { + pub fn system_info(&self) -> SystemInfo { + self.0.system_info() + } + } + } } diff --git a/examples/Cargo.toml b/examples/Cargo.toml index f6b0a5fa..2ec33f39 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -13,4 +13,7 @@ eyre = "0.6.12" futures = "0.3.30" async-std = "1.10" either = "1.15.0" +ordered-float = "5.0.0" +indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } +tokio = { version = "1", features = ["full"] } diff --git a/examples/src/main.rs b/examples/src/main.rs index b88e1bff..63b95647 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -2,23 +2,25 @@ use futures::executor::block_on; use worktable::prelude::*; use worktable::worktable; -fn main() { +#[tokio::main] +async fn main() { // describe WorkTable worktable!( name: My, + persist: true, columns: { id: u64 primary_key autoincrement, val: i64, - test: u8, + test: i32, attr: String, - attr2: i16, + attr2: i32, attr_float: f64, attr_string: String, }, indexes: { idx1: attr, - idx2: attr2, + idx2: attr2 unique, idx3: attr_string, }, queries: { @@ -35,50 +37,69 @@ fn main() { ); // Init Worktable - let my_table = MyWorkTable::default(); + let config = PersistenceConfig::new("data", "data"); + let my_table = MyWorkTable::new(config).await.unwrap(); // WT rows (has prefix My because of table name) let row = MyRow { val: 777, - attr: "Attribute1".to_string(), + attr: "Attribute0".to_string(), attr2: 345, test: 1, id: 0, attr_float: 100.0.into(), - attr_string: "String_attr".to_string(), + attr_string: "String_attr0".to_string(), }; + for i in 2..1000000_i64 { + let row = MyRow { + val: 777, + attr: format!("Attribute{}", i), + attr2: 345 + i as i32, + test: i as i32, + id: i as u64, + attr_float: (100.0 + i as f64).into(), + attr_string: format!("String_attr{}", i), + }; + + my_table.insert(row).unwrap(); + } + // insert let pk: MyPrimaryKey = my_table.insert(row).expect("primary key"); // Select ALL records from WT - let select_all = my_table.select_all().execute(); - println!("Select All {:?}", select_all); + let _select_all = my_table.select_all().execute(); + //println!("Select All {:?}", select_all); // Select All records with attribute TEST - let select_all = my_table.select_all().execute(); - println!("Select All {:?}", select_all); + let _select_all = my_table.select_all().execute(); + //println!("Select All {:?}", select_all); // Select by Idx - let select_by_attr = my_table - .select_by_attr("Attribute1".to_string()) - .execute() - .unwrap(); + //let _select_by_attr = my_table + // .select_by_attr("Attribute1".to_string()) + // .execute() + //r .unwrap(); - for row in select_by_attr { - println!("Select by idx, row {:?}", row); - } + //for row in select_by_attr { + // println!("Select by idx, row {:?}", row); + //} // Update Value query let update = my_table.update_val_by_id(ValByIdQuery { val: 1337 }, pk.clone()); let _ = block_on(update); - let select_all = my_table.select_all().execute(); - println!("Select after update val {:?}", select_all); + let _select_all = my_table.select_all().execute(); + //println!("Select after update val {:?}", select_all); let delete = my_table.delete(pk); let _ = block_on(delete); - let select_all = my_table.select_all().execute(); - println!("Select after delete {:?}", select_all); + let _select_all = my_table.select_all().execute(); + //println!("Select after delete {:?}", select_all); + + let info = my_table.system_info(); + + println!("{info}"); } diff --git a/src/index/table_secondary_index.rs b/src/index/table_secondary_index.rs index 5f5a4571..c197f652 100644 --- a/src/index/table_secondary_index.rs +++ b/src/index/table_secondary_index.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use data_bucket::Link; +use crate::system_info::IndexInfo; use crate::Difference; use crate::WorkTableError; @@ -15,6 +16,8 @@ pub trait TableSecondaryIndex { link: Link, differences: HashMap<&str, Difference>, ) -> Result<(), WorkTableError>; + + fn index_info(&self) -> Vec; } pub trait TableSecondaryIndexCdc { @@ -46,4 +49,8 @@ where ) -> Result<(), WorkTableError> { Ok(()) } + + fn index_info(&self) -> Vec { + vec![] + } } diff --git a/src/lib.rs b/src/lib.rs index 7c427346..75f7ed16 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ mod primary_key; mod row; mod table; pub use data_bucket; +mod mem_stat; mod persistence; mod util; @@ -22,6 +23,7 @@ pub use worktable_codegen::worktable; pub mod prelude { pub use crate::in_memory::{Data, DataPages, RowWrapper, StorableRow}; pub use crate::lock::LockMap; + pub use crate::mem_stat::MemStat; pub use crate::persistence::{ map_index_pages_to_toc_and_general, DeleteOperation, IndexTableOfContents, InsertOperation, Operation, PersistenceConfig, PersistenceEngine, PersistenceEngineOps, PersistenceTask, @@ -30,22 +32,23 @@ pub mod prelude { }; pub use crate::primary_key::{PrimaryKeyGenerator, PrimaryKeyGeneratorState, TablePrimaryKey}; pub use crate::table::select::{Order, QueryParams, SelectQueryBuilder, SelectQueryExecutor}; + pub use crate::table::system_info::{IndexInfo, IndexKind, SystemInfo}; pub use crate::util::{OrderedF32Def, OrderedF64Def}; pub use crate::{ lock::Lock, Difference, IndexMap, IndexMultiMap, TableIndex, TableIndexCdc, TableRow, TableSecondaryIndex, TableSecondaryIndexCdc, WorkTable, WorkTableError, }; pub use data_bucket::{ - align, get_index_page_size_from_data_length, map_data_pages_to_general, - map_index_pages_to_general, parse_data_page, parse_page, persist_page, seek_to_page_start, - update_at, DataPage, GeneralHeader, GeneralPage, IndexPage, Interval, Link, PageType, - Persistable, PersistableIndex, SizeMeasurable, SizeMeasure, SpaceInfoPage, - TableOfContentsPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + align, get_index_page_size_from_data_length, map_data_pages_to_general, parse_data_page, + parse_page, persist_page, seek_to_page_start, update_at, DataPage, GeneralHeader, + GeneralPage, IndexPage, Interval, Link, PageType, Persistable, PersistableIndex, + SizeMeasurable, SizeMeasure, SpaceInfoPage, TableOfContentsPage, DATA_VERSION, + GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; pub use derive_more::{From, Into}; pub use lockfree::set::Set as LockFreeSet; - pub use worktable_codegen::{PersistIndex, PersistTable}; + pub use worktable_codegen::{MemStat, PersistIndex, PersistTable}; pub const WT_INDEX_EXTENSION: &str = ".wt.idx"; pub const WT_DATA_EXTENSION: &str = ".wt.data"; diff --git a/src/mem_stat/mod.rs b/src/mem_stat/mod.rs new file mode 100644 index 00000000..3bbe0542 --- /dev/null +++ b/src/mem_stat/mod.rs @@ -0,0 +1,172 @@ +mod primitives; + +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::Arc; + +use data_bucket::Link; +use ordered_float::OrderedFloat; +use uuid::Uuid; + +use crate::IndexMultiMap; +use crate::{impl_memstat_zero, IndexMap}; + +pub trait MemStat { + fn heap_size(&self) -> usize; + fn used_size(&self) -> usize; +} + +impl MemStat for Option { + fn heap_size(&self) -> usize { + self.as_ref().map_or(0, |v| v.heap_size()) + } + fn used_size(&self) -> usize { + self.as_ref().map_or(0, |v| v.used_size()) + } +} + +impl MemStat for Vec { + fn heap_size(&self) -> usize { + self.capacity() * std::mem::size_of::() + + self.iter().map(|v| v.heap_size()).sum::() + } + fn used_size(&self) -> usize { + self.len() * std::mem::size_of::() + self.iter().map(|v| v.used_size()).sum::() + } +} + +impl MemStat for String { + fn heap_size(&self) -> usize { + self.capacity() + } + fn used_size(&self) -> usize { + self.len() + } +} + +impl MemStat for IndexMap +where + K: Ord + Clone + 'static + MemStat + Send, + V: Clone + 'static + MemStat + Send, +{ + fn heap_size(&self) -> usize { + let slot_size = std::mem::size_of::>(); + let base_heap = self.capacity() * slot_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + + fn used_size(&self) -> usize { + let pair_size = std::mem::size_of::>(); + let base = self.len() * pair_size; + + let used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base + used + } +} + +impl MemStat for IndexMultiMap +where + K: Ord + Clone + 'static + MemStat + Send, + V: Ord + Clone + 'static + MemStat + Send, +{ + fn heap_size(&self) -> usize { + let slot_size = std::mem::size_of::>(); + let base_heap = self.capacity() * slot_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + + fn used_size(&self) -> usize { + let pair_size = std::mem::size_of::>(); + let base = self.len() * pair_size; + + let used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base + used + } +} + +impl MemStat for Box { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for Arc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for Rc { + fn heap_size(&self) -> usize { + std::mem::size_of::() + (**self).heap_size() + } + fn used_size(&self) -> usize { + std::mem::size_of::() + (**self).used_size() + } +} + +impl MemStat for HashMap { + fn heap_size(&self) -> usize { + let bucket_size = size_of::<(K, V)>(); + let base_heap = self.capacity() * bucket_size; + + let kv_heap: usize = self + .iter() + .map(|(k, v)| k.heap_size() + v.heap_size()) + .sum(); + + base_heap + kv_heap + } + fn used_size(&self) -> usize { + let bucket_size = size_of::<(K, V)>(); + let base_used = self.len() * bucket_size; + + let kv_used: usize = self + .iter() + .map(|(k, v)| k.used_size() + v.used_size()) + .sum(); + + base_used + kv_used + } +} + +impl MemStat for OrderedFloat +where + T: MemStat, +{ + fn heap_size(&self) -> usize { + self.0.heap_size() + } + + fn used_size(&self) -> usize { + self.0.used_size() + } +} + +impl_memstat_zero!(Link, Uuid, [u8]); diff --git a/src/mem_stat/primitives.rs b/src/mem_stat/primitives.rs new file mode 100644 index 00000000..6b7aaf6f --- /dev/null +++ b/src/mem_stat/primitives.rs @@ -0,0 +1,47 @@ +use super::MemStat; + +#[macro_export] +macro_rules! impl_memstat_zero { + ($($t:ty),*) => { + $( + impl MemStat for $t { + fn heap_size(&self) -> usize { 0 } + fn used_size(&self) -> usize { 0 } + } + )* + }; +} + +impl_memstat_zero!( + u8, + i8, + u16, + i16, + u32, + i32, + u64, + i64, + usize, + isize, + f32, + f64, + bool, + char, + u128, + i128, + std::num::NonZeroU8, + std::num::NonZeroU16, + std::num::NonZeroU32, + std::num::NonZeroU64, + std::num::NonZeroU128, + std::num::NonZeroUsize, + std::num::NonZeroI8, + std::num::NonZeroI16, + std::num::NonZeroI32, + std::num::NonZeroI64, + std::num::NonZeroI128, + std::num::NonZeroIsize, + std::time::Duration, + std::time::SystemTime, + std::time::Instant +); diff --git a/src/persistence/space/index/mod.rs b/src/persistence/space/index/mod.rs index e0b74c34..79d88d52 100644 --- a/src/persistence/space/index/mod.rs +++ b/src/persistence/space/index/mod.rs @@ -33,7 +33,7 @@ pub use table_of_contents::IndexTableOfContents; pub use util::map_index_pages_to_toc_and_general; #[derive(Debug)] -pub struct SpaceIndex { +pub struct SpaceIndex { space_id: SpaceId, table_of_contents: IndexTableOfContents, next_page_id: Arc, @@ -108,6 +108,8 @@ where >, { let size = get_index_page_size_from_data_length::(DATA_LENGTH as usize); + println!("Length {}", DATA_LENGTH); + println!("Size {}", size); let mut page = IndexPage::new(node_id.key.clone(), size); page.current_index = 1; page.current_length = 1; diff --git a/src/persistence/space/index/table_of_contents.rs b/src/persistence/space/index/table_of_contents.rs index 47c50415..43121838 100644 --- a/src/persistence/space/index/table_of_contents.rs +++ b/src/persistence/space/index/table_of_contents.rs @@ -16,7 +16,7 @@ use rkyv::{rancor, Archive, Deserialize, Serialize}; use tokio::fs::File; #[derive(Debug)] -pub struct IndexTableOfContents { +pub struct IndexTableOfContents { current_page: usize, next_page_id: Arc, pub pages: Vec>>, @@ -24,7 +24,7 @@ pub struct IndexTableOfContents { impl IndexTableOfContents where - T: SizeMeasurable, + T: SizeMeasurable + Ord + Eq, { pub fn new(space_id: SpaceId, next_page_id: Arc) -> Self { let page_id = next_page_id.fetch_add(1, Ordering::Relaxed); @@ -40,10 +40,7 @@ where } } - pub fn get(&self, node_id: &T) -> Option - where - T: Ord + Eq, - { + pub fn get(&self, node_id: &T) -> Option { for page in &self.pages { if page.inner.contains(node_id) { return Some( @@ -63,7 +60,7 @@ where pub fn insert(&mut self, node_id: T, page_id: PageId) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + SizeMeasurable, { let next_page_id = self.next_page_id.clone(); @@ -95,7 +92,7 @@ where pub fn remove(&mut self, node_id: &T) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + SizeMeasurable, { let mut removed = false; let mut i = 0; @@ -117,10 +114,7 @@ where self.pages.iter().flat_map(|v| v.inner.iter()) } - pub fn update_key(&mut self, old_key: &T, new_key: T) - where - T: Ord + Eq, - { + pub fn update_key(&mut self, old_key: &T, new_key: T) { let page = self.get_current_page_mut(); page.inner.update_key(old_key, new_key); } @@ -133,8 +127,6 @@ where pub async fn persist(&mut self, file: &mut File) -> eyre::Result<()> where T: Archive - + Ord - + Eq + Clone + SizeMeasurable + for<'a> Serialize< @@ -157,8 +149,6 @@ where ) -> eyre::Result where T: Archive - + Ord - + Eq + Clone + SizeMeasurable + for<'a> Serialize< diff --git a/src/persistence/space/index/util.rs b/src/persistence/space/index/util.rs index 0b07e92c..c5fcf09f 100644 --- a/src/persistence/space/index/util.rs +++ b/src/persistence/space/index/util.rs @@ -10,7 +10,7 @@ pub fn map_index_pages_to_toc_and_general( Vec>>, ) where - T: Clone + Ord + Eq + SizeMeasurable, + T: Clone + Default + Ord + Eq + SizeMeasurable, { let mut general_index_pages = vec![]; let next_page_id = Arc::new(AtomicU32::new(1)); diff --git a/src/table/mod.rs b/src/table/mod.rs index 2cc6ae5b..8784b973 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,4 +1,5 @@ pub mod select; +pub mod system_info; use std::marker::PhantomData; diff --git a/src/table/system_info.rs b/src/table/system_info.rs new file mode 100644 index 00000000..618fc868 --- /dev/null +++ b/src/table/system_info.rs @@ -0,0 +1,162 @@ +use std::fmt::{self, Display, Formatter}; + +use prettytable::{format::consts::FORMAT_NO_BORDER_LINE_SEPARATOR, row, Table}; + +use crate::in_memory::{RowWrapper, StorableRow}; +use crate::mem_stat::MemStat; +use crate::{TableSecondaryIndex, WorkTable}; + +#[derive(Debug)] +pub struct SystemInfo { + pub table_name: &'static str, + pub page_count: usize, + pub row_count: usize, + pub empty_slots: u64, + pub memory_usage_bytes: u64, + pub idx_size: usize, + pub indexes_info: Vec, +} + +#[derive(Debug)] +pub struct IndexInfo { + pub name: String, + pub index_type: IndexKind, + pub key_count: usize, + pub capacity: usize, + pub heap_size: usize, + pub used_size: usize, + pub node_count: usize, +} + +#[derive(Debug)] +pub enum IndexKind { + Unique, + NonUnique, +} + +impl Display for IndexKind { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Self::Unique => write!(f, "unique"), + Self::NonUnique => write!(f, "non unique"), + } + } +} + +impl< + Row, + PrimaryKey, + AvailableTypes, + SecondaryIndexes: MemStat + TableSecondaryIndex, + LockType, + PkGen, + const DATA_LENGTH: usize, + > WorkTable +where + PrimaryKey: Clone + Ord + Send + 'static + std::hash::Hash, + Row: StorableRow, + ::WrappedRow: RowWrapper, +{ + pub fn system_info(&self) -> SystemInfo { + let page_count = self.data.get_page_count(); + let row_count = self.pk_map.len(); + + let empty_links = self.data.get_empty_links().len(); + + let bytes = self.data.get_bytes(); + + let memory_usage_bytes = bytes + .iter() + .map(|(_buf, free_offset)| *free_offset as u64) + .sum(); + + let idx_size = self.indexes.heap_size(); + + SystemInfo { + table_name: self.table_name, + page_count, + row_count, + empty_slots: empty_links as u64, + memory_usage_bytes, + idx_size, + indexes_info: self.indexes.index_info(), + } + } +} + +impl Display for SystemInfo { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mem_fmt = fmt_bytes(self.memory_usage_bytes as usize); + let idx_fmt = fmt_bytes(self.idx_size); + let total_fmt = fmt_bytes(self.memory_usage_bytes as usize + self.idx_size); + + writeln!(f, "┌──────────────────────────────┐")?; + writeln!(f, " \t Table Name: {:<5}", self.table_name)?; + writeln!(f, "└──────────────────────────────┘")?; + writeln!( + f, + "Rows: {} Pages: {} Empty slots: {}", + self.row_count, self.page_count, self.empty_slots + )?; + writeln!( + f, + "Allocated Memory: {} (data) + {} (indexes) = {} total\n", + mem_fmt, idx_fmt, total_fmt + )?; + + let mut table = Table::new(); + table.set_format(*FORMAT_NO_BORDER_LINE_SEPARATOR); + table.add_row(row![ + "Index", + "Type", + "Keys", + "Capacity", + "Node Count", + "Heap", + "Used" + ]); + + for idx in &self.indexes_info { + table.add_row(row![ + idx.name, + idx.index_type.to_string(), + idx.key_count, + idx.capacity, + idx.node_count, + fmt_bytes(idx.heap_size), + fmt_bytes(idx.used_size), + ]); + } + + let mut buffer = Vec::new(); + table.print(&mut buffer).unwrap(); + let table_str = String::from_utf8(buffer).unwrap(); + writeln!(f, "{}", table_str.trim_end())?; + + Ok(()) + } +} + +fn fmt_bytes(bytes: usize) -> String { + const KB: f64 = 1024.0; + const MB: f64 = 1024.0 * KB; + const GB: f64 = 1024.0 * MB; + + let b = bytes as f64; + + let (value, unit) = if b >= GB { + (b / GB, "GB") + } else if b >= MB { + (b / MB, "MB") + } else if b >= KB { + (b / KB, "KB") + } else { + return format!("{} B", bytes); + }; + + if (value.fract() * 100.0).round() == 0.0 { + format!("{:.0} {}", value, unit) + } else { + format!("{:.2} {}", value, unit) + } +} diff --git a/tests/worktable/custom_pk.rs b/tests/worktable/custom_pk.rs index 0cf95274..66fa5fe4 100644 --- a/tests/worktable/custom_pk.rs +++ b/tests/worktable/custom_pk.rs @@ -18,6 +18,7 @@ use worktable::worktable; Ord, Serialize, SizeMeasure, + MemStat, )] #[rkyv(compare(PartialEq), derive(Debug, PartialOrd, PartialEq, Eq, Ord))] struct CustomId(u64); diff --git a/tests/worktable/with_enum.rs b/tests/worktable/with_enum.rs index 94bdcf21..95c4db0f 100644 --- a/tests/worktable/with_enum.rs +++ b/tests/worktable/with_enum.rs @@ -2,7 +2,7 @@ use rkyv::{Archive, Deserialize, Serialize}; use worktable::prelude::*; use worktable::worktable; -#[derive(Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[derive(Archive, Clone, Copy, Debug, Deserialize, Serialize, PartialEq, PartialOrd, MemStat)] #[rkyv(compare(PartialEq), derive(Debug))] pub enum SomeEnum { First,