Skip to content

Commit

Permalink
More efficient serialization for bitmap segments (#3492)
Browse files Browse the repository at this point in the history
* More efficient serialization for bitmap segments

* Rename a const

* Correctly count number of chunks in a segment

* Enum for BitmapBlock (de)ser mode

* Add more segments in test

* Fix duplicate function
  • Loading branch information
jaspervdm committed Nov 24, 2020
1 parent b3938de commit 055b684
Show file tree
Hide file tree
Showing 3 changed files with 442 additions and 1 deletion.
306 changes: 305 additions & 1 deletion chain/src/txhashset/bitmap_accumulator.rs
Expand Up @@ -12,16 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::cmp::min;
use std::convert::TryFrom;
use std::time::Instant;

use bit_vec::BitVec;
use croaring::Bitmap;

use crate::core::core::hash::{DefaultHashable, Hash};
use crate::core::core::pmmr::segment::{Segment, SegmentIdentifier, SegmentProof};
use crate::core::core::pmmr::{self, ReadablePMMR, ReadonlyPMMR, VecBackend, PMMR};
use crate::core::ser::{self, PMMRable, Readable, Reader, Writeable, Writer};
use crate::error::{Error, ErrorKind};
use enum_primitive::FromPrimitive;

/// The "bitmap accumulator" allows us to commit to a specific bitmap by splitting it into
/// fragments and inserting these fragments into an MMR to produce an overall root hash.
Expand Down Expand Up @@ -187,7 +190,7 @@ impl BitmapAccumulator {

/// A bitmap "chunk" representing 1024 contiguous bits of the overall bitmap.
/// The first 1024 bits belong in one chunk. The next 1024 bits in the next chunk, etc.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BitmapChunk(BitVec);

impl BitmapChunk {
Expand Down Expand Up @@ -242,3 +245,304 @@ impl Readable for BitmapChunk {
Ok(BitmapChunk::new())
}
}

///
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BitmapSegment {
identifier: SegmentIdentifier,
blocks: Vec<BitmapBlock>,
proof: SegmentProof,
}

impl Writeable for BitmapSegment {
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
Writeable::write(&self.identifier, writer)?;
writer.write_u16(self.blocks.len() as u16)?;
for block in &self.blocks {
Writeable::write(block, writer)?;
}
Writeable::write(&self.proof, writer)?;
Ok(())
}
}

impl Readable for BitmapSegment {
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
let identifier: SegmentIdentifier = Readable::read(reader)?;

let n_blocks = reader.read_u16()? as usize;
let mut blocks = Vec::<BitmapBlock>::with_capacity(n_blocks);
for _ in 0..n_blocks {
blocks.push(Readable::read(reader)?);
}
let proof = Readable::read(reader)?;

Ok(Self {
identifier,
blocks,
proof,
})
}
}

// TODO: this can be sped up with some `unsafe` code
impl From<Segment<BitmapChunk>> for BitmapSegment {
fn from(segment: Segment<BitmapChunk>) -> Self {
let (identifier, _, _, _, leaf_data, proof) = segment.parts();

let mut chunks_left = leaf_data.len();
let mut blocks =
Vec::with_capacity((chunks_left + BitmapBlock::NCHUNKS - 1) / BitmapBlock::NCHUNKS);
while chunks_left > 0 {
let n_chunks = min(BitmapBlock::NCHUNKS, chunks_left);
chunks_left = chunks_left.saturating_sub(n_chunks);
blocks.push(BitmapBlock::new(n_chunks));
}

for (chunk_idx, chunk) in leaf_data.into_iter().enumerate() {
assert_eq!(chunk.0.len(), BitmapChunk::LEN_BITS);
let block = &mut blocks
.get_mut(chunk_idx / BitmapBlock::NCHUNKS)
.unwrap()
.inner;
let offset = (chunk_idx % BitmapBlock::NCHUNKS) * BitmapChunk::LEN_BITS;
for (i, _) in chunk.0.iter().enumerate().filter(|&(_, v)| v) {
block.set(offset + i, true);
}
}

Self {
identifier,
blocks,
proof,
}
}
}

// TODO: this can be sped up with some `unsafe` code
impl From<BitmapSegment> for Segment<BitmapChunk> {
fn from(segment: BitmapSegment) -> Self {
let BitmapSegment {
identifier,
blocks,
proof,
} = segment;

// Count the number of chunks taking into account that the final block might be smaller
let n_chunks = blocks.len().saturating_sub(1) * BitmapBlock::NCHUNKS
+ blocks.last().map(|b| b.n_chunks()).unwrap_or(0);
let mut leaf_pos = Vec::with_capacity(n_chunks);
let mut chunks = Vec::with_capacity(n_chunks);
let offset = (1 << identifier.height) * identifier.idx + 1;
for i in 0..(n_chunks as u64) {
leaf_pos.push(pmmr::insertion_to_pmmr_index(offset + i));
chunks.push(BitmapChunk::new());
}

for (block_idx, block) in blocks.into_iter().enumerate() {
assert_eq!(block.inner.len(), BitmapBlock::NBITS as usize);
let offset = block_idx * BitmapBlock::NCHUNKS;
for (i, _) in block.inner.iter().enumerate().filter(|&(_, v)| v) {
chunks
.get_mut(offset + i / BitmapChunk::LEN_BITS)
.unwrap()
.0
.set(i % BitmapChunk::LEN_BITS, true);
}
}

Segment::from_parts(identifier, Vec::new(), Vec::new(), leaf_pos, chunks, proof)
}
}

/// A block of 2^16 bits that provides an efficient (de)serialization
/// depending on the bitmap occupancy.
#[derive(Clone, Debug, PartialEq, Eq)]
struct BitmapBlock {
inner: BitVec,
}

impl BitmapBlock {
/// Maximum number of bits in a block
const NBITS: u32 = 1 << 16;
/// Maximum number of chunks in a block
const NCHUNKS: usize = Self::NBITS as usize / BitmapChunk::LEN_BITS;

fn new(n_chunks: usize) -> Self {
assert!(n_chunks <= BitmapBlock::NCHUNKS);
Self {
inner: BitVec::from_elem(n_chunks * BitmapChunk::LEN_BITS, false),
}
}

fn n_chunks(&self) -> usize {
let length = self.inner.len();
assert_eq!(length % BitmapChunk::LEN_BITS, 0);
let n_chunks = length / BitmapChunk::LEN_BITS;
assert!(n_chunks <= BitmapBlock::NCHUNKS);
n_chunks
}
}

impl Writeable for BitmapBlock {
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
let length = self.inner.len();
assert!(length <= Self::NBITS as usize);
assert_eq!(length % BitmapChunk::LEN_BITS, 0);
writer.write_u8((length / BitmapChunk::LEN_BITS) as u8)?;

let count_pos = self.inner.iter().filter(|&v| v).count() as u32;
let count_neg = Self::NBITS - count_pos;
let threshold = Self::NBITS / 16;
if count_pos < threshold {
// Write positive indices
Writeable::write(&BitmapBlockSerialization::Positive, writer)?;
writer.write_u16(count_pos as u16)?;
for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| v) {
writer.write_u16(i as u16)?;
}
} else if count_neg < threshold {
// Write negative indices
Writeable::write(&BitmapBlockSerialization::Negative, writer)?;
writer.write_u16(count_neg as u16)?;
for (i, _) in self.inner.iter().enumerate().filter(|&(_, v)| !v) {
writer.write_u16(i as u16)?;
}
} else {
// Write raw bytes
Writeable::write(&BitmapBlockSerialization::Raw, writer)?;
let bytes = self.inner.to_bytes();
assert_eq!(bytes.len(), Self::NBITS as usize / 8);
writer.write_fixed_bytes(&bytes)?;
}

Ok(())
}
}

impl Readable for BitmapBlock {
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
let n_chunks = reader.read_u8()?;
if n_chunks as usize > BitmapBlock::NCHUNKS {
return Err(ser::Error::TooLargeReadErr);
}
let n_bits = n_chunks as usize * BitmapChunk::LEN_BITS;

let mode = Readable::read(reader)?;
let inner = match mode {
BitmapBlockSerialization::Raw => {
// Raw bytes
let bytes = reader.read_fixed_bytes(n_bits / 8)?;
BitVec::from_bytes(&bytes)
}
BitmapBlockSerialization::Positive => {
// Positive indices
let mut inner = BitVec::from_elem(n_bits, false);
let n = reader.read_u16()?;
for _ in 0..n {
inner.set(reader.read_u16()? as usize, true);
}
inner
}
BitmapBlockSerialization::Negative => {
// Negative indices
let mut inner = BitVec::from_elem(n_bits, true);
let n = reader.read_u16()?;
for _ in 0..n {
inner.set(reader.read_u16()? as usize, false);
}
inner
}
};

Ok(BitmapBlock { inner })
}
}

enum_from_primitive! {
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(u8)]
enum BitmapBlockSerialization {
Raw = 0,
Positive = 1,
Negative = 2,
}
}

impl Writeable for BitmapBlockSerialization {
fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ser::Error> {
writer.write_u8(*self as u8)
}
}

impl Readable for BitmapBlockSerialization {
fn read<R: Reader>(reader: &mut R) -> Result<Self, ser::Error> {
Self::from_u8(reader.read_u8()?).ok_or(ser::Error::CorruptedData)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::core::ser::{BinReader, BinWriter, ProtocolVersion, Readable, Writeable};
use byteorder::ReadBytesExt;
use grin_util::secp::rand::Rng;
use rand::thread_rng;
use std::io::Cursor;

fn test_roundtrip(entries: usize, inverse: bool, encoding: u8, length: usize) {
let mut rng = thread_rng();
let mut block = BitmapBlock::new(64);
if inverse {
block.inner.negate();
}

// Flip `entries` bits in random spots
let mut count = 0;
while count < entries {
let idx = rng.gen_range(0, BitmapBlock::NBITS as usize);
if block.inner.get(idx).unwrap() == inverse {
count += 1;
block.inner.set(idx, !inverse);
}
}

// Serialize
let mut cursor = Cursor::new(Vec::<u8>::new());
let mut writer = BinWriter::new(&mut cursor, ProtocolVersion(1));
Writeable::write(&block, &mut writer).unwrap();

// Check encoding type and length
cursor.set_position(1);
assert_eq!(cursor.read_u8().unwrap(), encoding);
let actual_length = cursor.get_ref().len();
assert_eq!(actual_length, length);
assert!(actual_length <= 2 + BitmapBlock::NBITS as usize / 8);

// Deserialize
cursor.set_position(0);
let mut reader = BinReader::new(&mut cursor, ProtocolVersion(1));
let block2: BitmapBlock = Readable::read(&mut reader).unwrap();
assert_eq!(block, block2);
}

#[test]
fn block_ser_roundtrip() {
let threshold = BitmapBlock::NBITS as usize / 16;
let entries = thread_rng().gen_range(threshold, 4 * threshold);
test_roundtrip(entries, false, 0, 2 + BitmapBlock::NBITS as usize / 8);
test_roundtrip(entries, true, 0, 2 + BitmapBlock::NBITS as usize / 8);
}

#[test]
fn sparse_block_ser_roundtrip() {
let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16);
test_roundtrip(entries, false, 1, 4 + 2 * entries);
}

#[test]
fn abdundant_block_ser_roundtrip() {
let entries = thread_rng().gen_range(1024, BitmapBlock::NBITS as usize / 16);
test_roundtrip(entries, true, 2, 4 + 2 * entries);
}
}

0 comments on commit 055b684

Please sign in to comment.