Skip to content

Commit

Permalink
feat(storage): add char encoding (#116)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi <iskyzh@gmail.com>
  • Loading branch information
skyzh committed Nov 8, 2021
1 parent 777ed99 commit 76ff402
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/storage/secondary/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
//!
//! [`Block`] is the minimum managing unit in the storage engine.

mod char_block_builder;
mod primitive_block_builder;
mod primitive_block_iterator;
mod primitive_nullable_block_builder;
mod varchar_block_builder;
pub use char_block_builder::*;
pub use primitive_block_builder::*;
pub use primitive_block_iterator::*;
pub use primitive_nullable_block_builder::*;
pub use varchar_block_builder::*;

use bytes::{Buf, BufMut, Bytes};
use risinglight_proto::rowset::block_checksum::ChecksumType;
Expand Down
71 changes: 71 additions & 0 deletions src/storage/secondary/block/char_block_builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use super::BlockBuilder;
use crate::array::UTF8Array;

/// Encodes fixed-width char into a block.
pub struct PlainCharBlockBuilder {
data: Vec<u8>,
char_width: usize,
target_size: usize,
}

impl PlainCharBlockBuilder {
#[allow(dead_code)]
pub fn new(target_size: usize, char_width: usize) -> Self {
let data = Vec::with_capacity(target_size);
Self {
data,
char_width,
target_size,
}
}
}

impl BlockBuilder<UTF8Array> for PlainCharBlockBuilder {
fn append(&mut self, item: Option<&str>) {
let item = item
.expect("nullable item found in non-nullable block builder")
.as_bytes();
if item.len() > self.char_width {
panic!(
"item length {} > char width {}",
item.len(),
self.char_width
);
}
self.data.extend(item);
self.data.extend(
[0].iter()
.cycle()
.take(self.char_width - item.len())
.cloned(),
);
}

fn estimated_size(&self) -> usize {
self.data.len()
}

fn should_finish(&self, _next_item: &Option<&str>) -> bool {
!self.data.is_empty() && self.estimated_size() + self.char_width > self.target_size
}

fn finish(self) -> Vec<u8> {
self.data
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_build_char() {
let mut builder = PlainCharBlockBuilder::new(128, 40);
builder.append(Some("233"));
builder.append(Some("2333"));
builder.append(Some("23333"));
assert_eq!(builder.estimated_size(), 120);
assert!(builder.should_finish(&Some("2333333")));
builder.finish();
}
}
72 changes: 72 additions & 0 deletions src/storage/secondary/block/varchar_block_builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use bytes::BufMut;

use crate::array::UTF8Array;

use super::BlockBuilder;

/// Encodes offset and data into a block. The data layout is
/// ```plain
/// | offset (u32) | offset | offset | data | data | data |
/// ```
pub struct PlainVarcharBlockBuilder {
data: Vec<u8>,
offsets: Vec<u32>,
target_size: usize,
}

impl PlainVarcharBlockBuilder {
#[allow(dead_code)]
pub fn new(target_size: usize) -> Self {
let data = Vec::with_capacity(target_size);
Self {
data,
offsets: vec![],
target_size,
}
}
}

impl BlockBuilder<UTF8Array> for PlainVarcharBlockBuilder {
fn append(&mut self, item: Option<&str>) {
let item = item.expect("nullable item found in non-nullable block builder");
self.data.extend(item.as_bytes());
self.offsets.push(self.data.len() as u32);
}

fn estimated_size(&self) -> usize {
self.data.len() + self.offsets.len() * std::mem::size_of::<u32>()
}

fn should_finish(&self, next_item: &Option<&str>) -> bool {
!self.data.is_empty()
&& self.estimated_size()
+ next_item.map(|x| x.len()).unwrap_or(0)
+ std::mem::size_of::<u32>()
> self.target_size
}

fn finish(self) -> Vec<u8> {
let mut encoded_data = vec![];
for offset in self.offsets {
encoded_data.put_u32_le(offset);
}
encoded_data.extend(self.data);
encoded_data
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_build_str() {
let mut builder = PlainVarcharBlockBuilder::new(128);
builder.append(Some("233"));
builder.append(Some("23333"));
builder.append(Some("2333333"));
assert_eq!(builder.estimated_size(), 15 + 4 * 3);
assert!(!builder.should_finish(&Some("23333333")));
builder.finish();
}
}

0 comments on commit 76ff402

Please sign in to comment.