Skip to content

Commit

Permalink
add columnar format compatibiliy tests
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Jun 13, 2024
1 parent e90e7a2 commit 833db54
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 1 deletion.
Binary file added columnar/compat_tests_data/v1.columnar
Binary file not shown.
13 changes: 13 additions & 0 deletions columnar/src/columnar/format_version.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use core::fmt;
use std::fmt::{Display, Formatter};

use crate::InvalidData;

pub const VERSION_FOOTER_NUM_BYTES: usize = MAGIC_BYTES.len() + std::mem::size_of::<u32>();

pub const CURRENT_VERSION: Version = Version::V1;

/// We end the file by these 4 bytes just to somewhat identify that
/// this is indeed a columnar file.
const MAGIC_BYTES: [u8; 4] = [2, 113, 119, 66];
Expand All @@ -26,6 +31,14 @@ pub enum Version {
V1 = 1u32,
}

impl Display for Version {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Version::V1 => write!(f, "v1"),
}
}
}

impl Version {
fn to_bytes(self) -> [u8; 4] {
(self as u32).to_le_bytes()
Expand Down
1 change: 1 addition & 0 deletions columnar/src/columnar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod reader;
mod writer;

pub use column_type::{ColumnType, HasAssociatedColumnType};
pub use format_version::{Version, CURRENT_VERSION};
#[cfg(test)]
pub(crate) use merge::ColumnTypeCategory;
pub use merge::{merge_columnar, MergeRowOrder, ShuffleMergeOrder, StackMergeOrder};
Expand Down
84 changes: 84 additions & 0 deletions columnar/src/compat_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use crate::{Column, ColumnarReader, DynamicColumn, CURRENT_VERSION};

const NUM_DOCS: u32 = u16::MAX as u32;

fn generate_columnar(num_docs: u32) -> Vec<u8> {
use crate::ColumnarWriter;

let mut columnar_writer = ColumnarWriter::default();

for i in 0..num_docs {
if i % 100 == 0 {
columnar_writer.record_numerical(i, "sparse", i as u64);
}
if i % 2 == 0 {
columnar_writer.record_numerical(i, "dense", i as u64);
}
columnar_writer.record_numerical(i, "full", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
columnar_writer.record_numerical(i, "multi", i as u64);
}

let mut wrt: Vec<u8> = Vec::new();
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();

wrt
}

#[test]
#[ignore]
/// Unignore to write columnar for current version to disk
fn create_format() {
create_format_for_version(CURRENT_VERSION.to_string().as_str());
}

fn create_format_for_version(version: &str) {
let columnar = generate_columnar(NUM_DOCS);
let file_path = path_for_version(version);
std::fs::write(file_path, columnar).unwrap();
}

fn path_for_version(version: &str) -> String {
format!("./compat_tests_data/{}.columnar", version)
}

#[test]
fn test_format_v1() {
let path = path_for_version("v1");
test_format(&path);
}

fn test_format(path: &str) {
let file_content = std::fs::read(path).unwrap();
let reader = ColumnarReader::open(file_content).unwrap();

let column = open_column(&reader, "full");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "multi");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);

let column = open_column(&reader, "sparse");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1), None);
assert_eq!(column.first(65000), Some(65000));

let column = open_column(&reader, "dense");
assert_eq!(column.first(0).unwrap(), 0);
assert_eq!(column.first(NUM_DOCS - 1).unwrap(), NUM_DOCS as u64 - 1);
assert_eq!(column.first(NUM_DOCS - 2), None);
}

fn open_column(reader: &ColumnarReader, name: &str) -> Column<u64> {
let column = reader.read_columns(name).unwrap()[0]
.open()
.unwrap()
.coerce_numerical(crate::NumericalType::U64)
.unwrap();
let DynamicColumn::U64(column) = column else {
panic!();
};
column
}
5 changes: 4 additions & 1 deletion columnar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub use column_values::{
};
pub use columnar::{
merge_columnar, ColumnType, ColumnarReader, ColumnarWriter, HasAssociatedColumnType,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder,
MergeRowOrder, ShuffleMergeOrder, StackMergeOrder, Version, CURRENT_VERSION,
};
use sstable::VoidSSTable;
pub use value::{NumericalType, NumericalValue};
Expand Down Expand Up @@ -131,3 +131,6 @@ impl Cardinality {

#[cfg(test)]
mod tests;

#[cfg(test)]
mod compat_tests;

0 comments on commit 833db54

Please sign in to comment.