Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add columnar format compatibiliy tests #2433

Merged
merged 2 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added columnar/compat_tests_data/v1.columnar
Binary file not shown.
13 changes: 13 additions & 0 deletions columnar/src/columnar/format_version.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use core::fmt;
use std::fmt::{Display, Formatter};

use crate::InvalidData;

pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();
Expand All @@ -20,12 +23,22 @@ pub fn parse_footer(footer_bytes: [u8; VERSION_FOOTER_NUM_BYTES]) -> Result<Vers
Version::try_from_bytes(footer_bytes[0..4].try_into().unwrap())
}

pub const CURRENT_VERSION: Version = Version::V1;

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[repr(u32)]
pub enum Version {
V1 = 1u32,
}

impl Display for Version {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Version::V1 => write!(f, "v1"),
}
}
}

impl Version {
fn to_bytes(self) -> [u8; 4] {
(self as u32).to_le_bytes()
Expand Down
1 change: 1 addition & 0 deletions columnar/src/columnar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod reader;
mod writer;

pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use format_version::{Version, CURRENT_VERSION};
#[cfg(test)]
pub(crate) use merge::ColumnTypeCategory;
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
Expand Down
84 changes: 84 additions & 0 deletions columnar/src/compat_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION};

const NUM_DOCS: u32 = u16::MAX as u32;

fn generate_columnar(num_docs: u32) -> Vec<u8> {
use crate::ColumnarWriter;

let mut columnar_writer = ColumnarWriter::default();

for i in 0..num_docs {
if i % 100 == 0 {
columnar_writer.record_numerical(i, "sparse", i as u64);
}
if i % 2 == 0 {
columnar_writer.record_numerical(i, "dense", i as u64);
}
columnar_writer.record_numerical(i, "full", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
}

let mut wrt: Vec<u8> = Vec::new();
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();

wrt
}

#[test]
#[ignore]
/// Unignore to write columnar for current version to disk
fn create_format() {
PSeitz marked this conversation as resolved.
Show resolved Hide resolved
create_format_for_version(CURRENT_VERSION.to_string().as_str());
}

fn create_format_for_version(version: &str) {
let columnar = generate_columnar(NUM_DOCS);
let file_path = path_for_version(version);
std::fs::write(file_path, columnar).unwrap();
}

fn path_for_version(version: &str) -> String {
format!("./compat_tests_data/{}.columnar", version)
}

#[test]
fn test_format_v1() {
let path = path_for_version("v1");
test_format(&path);
}

fn test_format(path: &str) {
let file_content = std::fs::read(path).unwrap();
let reader = ColumnarReader::open(file_content).unwrap();

let column = open_column(&reader, "full");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "multi");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "sparse");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1), None);
assert_eq!(column.first(65000), Some(65000));

let column = open_column(&reader, "dense");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
assert_eq!(column.first(NUM_DOCS - 2), None);
}

fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
let column = reader.read_columns(name).unwrap()[0]
.open()
.unwrap()
.coerce_numerical(crate::NumericalType::U64)
.unwrap();
let DynamicColumn::U64(column) = column else {
panic!();
};
column
}
5 changes: 4 additions & 1 deletion columnar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub use column_values::{
};
pub use columnar::{
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
};
use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue};
Expand Down Expand Up @@ -131,3 +131,6 @@ impl Cardinality {

#[cfg(test)]
mod tests;

#[cfg(test)]
mod compat_tests;
Loading