From fee9d283d476449d367fcdfd866c6b72ebf8d6ac Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 12:03:52 +0100
Subject: [PATCH 01/99] starting work on typed dictionaries

---
 src/structure/mod.rs   |   1 +
 src/structure/pfc.rs   |   3 +-
 src/structure/tfc.rs   | 133 +++++++++++++++++++++++++++++++++++++++++
 src/structure/vbyte.rs |  64 ++++++++++++++++++--
 4 files changed, 194 insertions(+), 7 deletions(-)
 create mode 100644 src/structure/tfc.rs

diff --git a/src/structure/mod.rs b/src/structure/mod.rs
index 08353122..78906419 100644
--- a/src/structure/mod.rs
+++ b/src/structure/mod.rs
@@ -9,6 +9,7 @@ pub mod bititer;
 pub mod logarray;
 //pub mod mapped_dict;
 pub mod pfc;
+pub mod tfc;
 pub mod util;
 pub mod vbyte;
 pub mod wavelettree;
diff --git a/src/structure/pfc.rs b/src/structure/pfc.rs
index d49ee008..892f4bae 100644
--- a/src/structure/pfc.rs
+++ b/src/structure/pfc.rs
@@ -805,8 +805,7 @@ impl Decoder for PfcDecoder {
                 false => {
                     // This is in the middle of some block. we expect a vbyte followed by some 0-delimited cstring
                     let last = self.last.as_ref().unwrap();
-                    let (prefix_len, vbyte_len) = vbyte::decode(&bytes).expect("expected vbyte");
-                    bytes.advance(vbyte_len);
+                    let (prefix_len, vbyte_len) = vbyte::decode_buf(bytes).expect("expected vbyte");
                     let b = bytes.split_to(pos - vbyte_len);
                     bytes.advance(1);
                     let mut full = BytesMut::with_capacity(prefix_len as usize + b.len());
diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
new file mode 100644
index 00000000..8f934246
--- /dev/null
+++ b/src/structure/tfc.rs
@@ -0,0 +1,133 @@
+use bytes::{Bytes, Buf, BytesMut, BufMut};
+
+use crate::structure::{vbyte::{self,encode_array}, util::find_common_prefix};
+
+const BLOCK_SIZE: usize = 8;
+
+pub struct TfcBlock {
+    data: Bytes
+}
+
+#[derive(Debug, PartialEq)]
+pub struct TfcBlockHeader {
+    size: u8,
+    sizes: [u64;BLOCK_SIZE],
+    shareds: [u64;BLOCK_SIZE-1]
+}
+
+#[derive(Debug)]
+pub enum TfcError {
+    InvalidCoding,
+    NotEnoughData,
+}
+
+impl From<vbyte::DecodeError> for TfcError {
+    fn from(e: vbyte::DecodeError) -> Self {
+        match e {
+            vbyte::DecodeError::UnexpectedEndOfBuffer => Self::NotEnoughData,
+            _ => Self::InvalidCoding
+        }
+    }
+}
+
+impl TfcBlockHeader {
+    fn parse<B:Buf>(buf: &mut B) -> Result<Self, TfcError> {
+        let size = buf.get_u8();
+        let mut sizes = [0;BLOCK_SIZE];
+        let mut shareds = [0;BLOCK_SIZE-1];
+
+        
+        let (first_size, _) = vbyte::decode_buf(buf)?;
+        sizes[0] = first_size;
+
+        for i in 0..(size-1) as usize {
+            let (shared, _) = vbyte::decode_buf(buf)?;
+            let (size, _) = vbyte::decode_buf(buf)?;
+
+            sizes[i+1] = size;
+            shareds[i] = shared;
+        }
+
+        Ok(Self {
+            size,
+            sizes,
+            shareds
+        })
+    }
+}
+
+pub struct TfcBlockBuilder {
+}
+
+fn build_block_unchecked<B:BufMut>(buf: &mut B, slices: &[&[u8]]) {
+    let slices_len = slices.len();
+    debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
+    buf.put_u8(slices_len as u8);
+
+    let first = slices[0];
+    let (vbyte, vbyte_len) = encode_array(first.len() as u64);
+    buf.put_slice(&vbyte[..vbyte_len]);
+
+    let mut last = first;
+
+    let mut suffixes: Vec<&[u8]> = Vec::with_capacity(slices.len());
+    suffixes.push(last);
+    for i in 1..slices.len() {
+        let cur = slices[i];
+        let common_prefix = find_common_prefix(last, cur);
+        let (vbyte, vbyte_len) = encode_array(common_prefix as u64);
+        buf.put_slice(&vbyte[..vbyte_len]);
+        
+        let suffix_len = cur.len() - common_prefix;
+        let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
+        buf.put_slice(&vbyte[..vbyte_len]);
+        suffixes.push(&cur[common_prefix..]);
+        last = cur;
+    }
+
+    for suffix in suffixes {
+        buf.put_slice(suffix);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use bytes::Buf;
+    #[test]
+    fn blah() {
+        let slice = b"asdfasfd";
+        let mut argh = slice as &[u8];
+        let first = argh.get_u8();
+        let second = argh.get_u8();
+
+        panic!("{} {} {:?}", first, second, argh);
+    }
+
+    #[test]
+    fn build_and_parse_block() {
+        let strings: [&[u8];5] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff"
+        ];
+
+        let mut buf = BytesMut::new();
+        build_block_unchecked(&mut buf, &strings);
+        let mut bytes: Bytes = buf.freeze();
+
+        let header = TfcBlockHeader::parse(&mut bytes).unwrap();
+
+        let expected = TfcBlockHeader {
+            size: 5,
+            sizes: [6, 2, 4, 3, 2, 0, 0, 0],
+            shareds: [2, 0, 1, 2, 0, 0, 0]
+        };
+
+        assert_eq!(expected, header);
+
+        assert_eq!(b"aaaaaabbccccdefff", &bytes[..]);
+    }
+}
diff --git a/src/structure/vbyte.rs b/src/structure/vbyte.rs
index fe261403..5ba74e5f 100644
--- a/src/structure/vbyte.rs
+++ b/src/structure/vbyte.rs
@@ -17,6 +17,10 @@
 use futures::io;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 
+use std::io::Write;
+
+use bytes::Buf;
+
 /// The maximum number of bytes required for any `u64` in a variable-byte encoding.
 pub const MAX_ENCODING_LEN: usize = 10;
 
@@ -74,19 +78,40 @@ fn max_byte_too_large(shift: u32, byte: u8) -> bool {
 /// This function expects the encoded value to start at the beginning of the slice; and the slice
 /// must be large enough to include all of the encoded bytes of one value. Decoding stops at the
 /// end of the encoded value, so it doesn't matter if the slice is longer.
-pub fn decode(buf: &[u8]) -> Result<(u64, usize), DecodeError> {
+pub fn decode(mut buf: &[u8]) -> Result<(u64, usize), DecodeError> {
+    decode_buf(&mut buf)
+}
+
+/// Decodes a `u64` from a variable-byte-encoded slice.
+///
+/// On success, this function returns `Ok` with the decoded value and encoding length. Otherwise,
+/// the slice data is invalid, and the function returns `Err` with the corresponding `DecodeError`
+/// giving the reason.
+///
+/// This function expects the encoded value to start at the beginning of the slice; and the slice
+/// must be large enough to include all of the encoded bytes of one value. Decoding stops at the
+/// end of the encoded value, so it doesn't matter if the slice is longer.
+pub fn decode_buf<B:Buf>(buf: &mut B) -> Result<(u64, usize), DecodeError> {
     // This will be the decoded result.
     let mut num: u64 = 0;
     // This is how many bits we shift `num` by on each iteration in increments of 7.
     let mut shift: u32 = 0;
     // Loop through each 8-bit byte value with its index.
-    for (i, &b) in buf.iter().enumerate() {
+    let mut count = 0;
+    loop {
+        if !buf.has_remaining() {
+            return Err(DecodeError::UnexpectedEndOfBuffer);
+        }
+
+        let b = buf.get_u8();
+        count += 1;
+        
         if is_last_encoded_byte(b) {
             return if max_byte_too_large(shift, b) {
                 Err(DecodeError::EncodedValueTooLarge)
             } else {
                 // Return the result (clearing the msb) and the encoding length.
-                Ok((num | ((clear_msb(b) as u64) << shift), i + 1))
+                Ok((num | ((clear_msb(b) as u64) << shift), count))
             };
         }
         // This is not the last byte. Update the result.
@@ -100,8 +125,6 @@ pub fn decode(buf: &[u8]) -> Result<(u64, usize), DecodeError> {
             return Err(DecodeError::UnexpectedEncodingLen);
         }
     }
-    // We have reached the end of the buffer without encountering the last encoded byte.
-    Err(DecodeError::UnexpectedEndOfBuffer)
 }
 
 /// Returns `true` if more than 7 bits remain to be encoded.
@@ -163,6 +186,37 @@ pub fn encode_vec(num: u64) -> Vec<u8> {
     vec
 }
 
+/// Encodes a `u64` with a variable-byte encoding in an array.
+///
+/// The array is always length 10. Additinally, the actual size of the vbyte is returned.
+pub fn encode_array(num: u64) -> ([u8;10],usize) {
+    // Allocate a `Vec` of the right size.
+    let mut buf = [0;10];
+    // Safety: We have created `vec` with the length of the encoded bytes of `num`.
+    let size = unsafe { encode_unchecked(&mut buf, num) };
+    (buf, size)
+}
+
+/*
+pub fn encode_into_writer<W:Write>(writer: &mut W, mut num: u64) -> std::io::Result<usize> {
+    let mut i = 0;
+    // Loop through all 7-bit strings of the number.
+    while more_than_7bits_remain(num) {
+        // This is not the last encoded byte.
+        let b = clear_msb(num as u8);
+        writer.write_u8(b)?;
+        // Get the next 7 bits.
+        num >>= 7;
+        i+=1;
+    }
+    // This is the last encoded byte.
+    let b = set_msb(num as u8);
+    // Return the encoding length.
+    writer.write_u8(b)?;
+    Ok(i + 1)
+}
+*/
+
 /// Encodes a `u64` with a variable-byte encoding in a `Vec` and writes that `Vec` to the
 /// destination `dest` in a future.
 pub async fn write_async<A>(dest: &mut A, num: u64) -> io::Result<usize>

From fde3b53c8b28e3f0bfb240000e9106a41784eb91 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 16:22:24 +0100
Subject: [PATCH 02/99] tfc block entry retrieval

---
 src/structure/tfc.rs   | 204 ++++++++++++++++++++++++++++++++---------
 src/structure/vbyte.rs |   8 +-
 2 files changed, 164 insertions(+), 48 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 8f934246..36c00148 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -1,65 +1,151 @@
-use bytes::{Bytes, Buf, BytesMut, BufMut};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
 
-use crate::structure::{vbyte::{self,encode_array}, util::find_common_prefix};
+use crate::structure::{
+    util::find_common_prefix,
+    vbyte::{self, encode_array},
+};
 
 const BLOCK_SIZE: usize = 8;
 
-pub struct TfcBlock {
-    data: Bytes
-}
-
-#[derive(Debug, PartialEq)]
-pub struct TfcBlockHeader {
-    size: u8,
-    sizes: [u64;BLOCK_SIZE],
-    shareds: [u64;BLOCK_SIZE-1]
-}
-
 #[derive(Debug)]
 pub enum TfcError {
     InvalidCoding,
     NotEnoughData,
 }
 
+#[derive(Debug, PartialEq)]
+pub struct TfcBlockHeader {
+    num_entries: u8,
+    buffer_length: usize,
+    sizes: [usize; BLOCK_SIZE],
+    shareds: [usize; BLOCK_SIZE - 1],
+}
+
 impl From<vbyte::DecodeError> for TfcError {
     fn from(e: vbyte::DecodeError) -> Self {
         match e {
             vbyte::DecodeError::UnexpectedEndOfBuffer => Self::NotEnoughData,
-            _ => Self::InvalidCoding
+            _ => Self::InvalidCoding,
         }
     }
 }
 
 impl TfcBlockHeader {
-    fn parse<B:Buf>(buf: &mut B) -> Result<Self, TfcError> {
-        let size = buf.get_u8();
-        let mut sizes = [0;BLOCK_SIZE];
-        let mut shareds = [0;BLOCK_SIZE-1];
+    fn parse<B: Buf>(buf: &mut B) -> Result<Self, TfcError> {
+        let num_entries = buf.get_u8();
+        let mut sizes = [0_usize; BLOCK_SIZE];
+        let mut shareds = [0_usize; BLOCK_SIZE - 1];
 
-        
         let (first_size, _) = vbyte::decode_buf(buf)?;
-        sizes[0] = first_size;
+        sizes[0] = first_size as usize;
 
-        for i in 0..(size-1) as usize {
+        for i in 0..(num_entries - 1) as usize {
             let (shared, _) = vbyte::decode_buf(buf)?;
             let (size, _) = vbyte::decode_buf(buf)?;
 
-            sizes[i+1] = size;
-            shareds[i] = shared;
+            sizes[i + 1] = size as usize;
+            shareds[i] = shared as usize;
         }
 
+        let buffer_length = sizes.iter().sum();
+
         Ok(Self {
-            size,
+            num_entries,
+            buffer_length,
             sizes,
-            shareds
+            shareds,
         })
     }
 }
 
-pub struct TfcBlockBuilder {
+#[derive(Debug)]
+pub struct TfcEntry<'a>(Vec<&'a [u8]>);
+
+impl<'a> TfcEntry<'a> {
+    fn as_vec(&self) -> Vec<u8> {
+        let mut v = Vec::with_capacity(self.0.iter().map(|s| s.len()).sum());
+
+        for slice in self.0.iter() {
+            v.extend_from_slice(slice);
+        }
+
+        v
+    }
 }
 
-fn build_block_unchecked<B:BufMut>(buf: &mut B, slices: &[&[u8]]) {
+pub struct TfcBlock {
+    header: TfcBlockHeader,
+    data: Bytes,
+}
+
+impl TfcBlock {
+    pub fn parse(bytes: &mut Bytes) -> Result<Self, TfcError> {
+        let header = TfcBlockHeader::parse(bytes)?;
+        if bytes.remaining() < header.buffer_length {
+            return Err(TfcError::NotEnoughData);
+        }
+
+        let data = bytes.split_to(header.buffer_length);
+
+        Ok(Self { header, data })
+    }
+
+    pub fn is_incomplete(&self) -> bool {
+        self.header.num_entries != BLOCK_SIZE as u8
+    }
+
+    pub fn entry(&self, index: usize) -> TfcEntry {
+        if index == 0 {
+            return TfcEntry(vec![&self.data[..self.header.sizes[0]]]);
+        }
+
+        let mut v = Vec::with_capacity(7);
+        let mut last = self.header.shareds[index - 1];
+        if last != 0 {
+            v.push(last);
+        }
+        if last != 0 {
+            for i in (0..index - 1).rev() {
+                let shared = self.header.shareds[i];
+                if shared == 0 {
+                    break;
+                }
+
+                if shared < last {
+                    v.push(shared);
+                    last = shared;
+                } else {
+                    v.push(last);
+                }
+            }
+        }
+
+        let start = index - v.len();
+
+        let mut taken = 0;
+        let mut slices = Vec::with_capacity(v.len() + 1);
+
+        let mut offset = self.header.sizes.iter().take(start).sum();
+        for (ix, shared) in v.iter().rev().enumerate() {
+            let have_to_take = shared - taken;
+            let cur_offset = offset;
+            offset += self.header.sizes[start + ix];
+            if have_to_take == 0 {
+                continue;
+            }
+            let slice = &self.data[cur_offset..cur_offset + have_to_take];
+            slices.push(slice);
+            taken += have_to_take;
+        }
+
+        let suffix_size = self.header.sizes[index];
+        slices.push(&self.data[offset..offset + suffix_size]);
+
+        TfcEntry(slices)
+    }
+}
+
+fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
     buf.put_u8(slices_len as u8);
@@ -77,7 +163,7 @@ fn build_block_unchecked<B:BufMut>(buf: &mut B, slices: &[&[u8]]) {
         let common_prefix = find_common_prefix(last, cur);
         let (vbyte, vbyte_len) = encode_array(common_prefix as u64);
         buf.put_slice(&vbyte[..vbyte_len]);
-        
+
         let suffix_len = cur.len() - common_prefix;
         let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
         buf.put_slice(&vbyte[..vbyte_len]);
@@ -104,30 +190,60 @@ mod tests {
         panic!("{} {} {:?}", first, second, argh);
     }
 
-    #[test]
-    fn build_and_parse_block() {
-        let strings: [&[u8];5] = [
-            b"aaaaaa",
-            b"aabb",
-            b"cccc",
-            b"cdef",
-            b"cdff"
-        ];
-
+    fn build_incomplete_block(strings: &[&[u8]]) -> TfcBlock {
         let mut buf = BytesMut::new();
         build_block_unchecked(&mut buf, &strings);
+
         let mut bytes: Bytes = buf.freeze();
 
-        let header = TfcBlockHeader::parse(&mut bytes).unwrap();
+        TfcBlock::parse(&mut bytes).unwrap()
+    }
+
+    #[test]
+    fn build_and_parse_block() {
+        let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
+
+        let block = build_incomplete_block(&strings);
 
-        let expected = TfcBlockHeader {
-            size: 5,
+        let expected_header = TfcBlockHeader {
+            num_entries: 5,
+            buffer_length: 17,
             sizes: [6, 2, 4, 3, 2, 0, 0, 0],
-            shareds: [2, 0, 1, 2, 0, 0, 0]
+            shareds: [2, 0, 1, 2, 0, 0, 0],
         };
 
-        assert_eq!(expected, header);
+        assert_eq!(expected_header, block.header);
+
+        let expected_bytes = b"aaaaaabbccccdefff";
+        assert_eq!(expected_bytes, &block.data[..]);
+    }
+
+    #[test]
+    fn entry_in_block() {
+        let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
+        let block = build_incomplete_block(&strings);
+
+        for (ix, string) in strings.iter().enumerate() {
+            assert_eq!(*string, &block.entry(ix).as_vec()[..]);
+        }
+    }
 
-        assert_eq!(b"aaaaaabbccccdefff", &bytes[..]);
+    #[test]
+    fn entry_in_complete_block() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_incomplete_block(&strings);
+
+        for (ix, string) in strings.iter().enumerate() {
+            assert_eq!(*string, &block.entry(ix).as_vec()[..]);
+        }
     }
 }
diff --git a/src/structure/vbyte.rs b/src/structure/vbyte.rs
index 5ba74e5f..9ad5bce5 100644
--- a/src/structure/vbyte.rs
+++ b/src/structure/vbyte.rs
@@ -91,7 +91,7 @@ pub fn decode(mut buf: &[u8]) -> Result<(u64, usize), DecodeError> {
 /// This function expects the encoded value to start at the beginning of the slice; and the slice
 /// must be large enough to include all of the encoded bytes of one value. Decoding stops at the
 /// end of the encoded value, so it doesn't matter if the slice is longer.
-pub fn decode_buf<B:Buf>(buf: &mut B) -> Result<(u64, usize), DecodeError> {
+pub fn decode_buf<B: Buf>(buf: &mut B) -> Result<(u64, usize), DecodeError> {
     // This will be the decoded result.
     let mut num: u64 = 0;
     // This is how many bits we shift `num` by on each iteration in increments of 7.
@@ -105,7 +105,7 @@ pub fn decode_buf<B:Buf>(buf: &mut B) -> Result<(u64, usize), DecodeError> {
 
         let b = buf.get_u8();
         count += 1;
-        
+
         if is_last_encoded_byte(b) {
             return if max_byte_too_large(shift, b) {
                 Err(DecodeError::EncodedValueTooLarge)
@@ -189,9 +189,9 @@ pub fn encode_vec(num: u64) -> Vec<u8> {
 /// Encodes a `u64` with a variable-byte encoding in an array.
 ///
 /// The array is always length 10. Additinally, the actual size of the vbyte is returned.
-pub fn encode_array(num: u64) -> ([u8;10],usize) {
+pub fn encode_array(num: u64) -> ([u8; 10], usize) {
     // Allocate a `Vec` of the right size.
-    let mut buf = [0;10];
+    let mut buf = [0; 10];
     // Safety: We have created `vec` with the length of the encoded bytes of `num`.
     let size = unsafe { encode_unchecked(&mut buf, num) };
     (buf, size)

From c54eddca2a2702d2a534cdf1acb0e02a151919c9 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 17:13:44 +0100
Subject: [PATCH 03/99] buf implementation for tfc dict entry

---
 src/structure/tfc.rs | 159 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 158 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 36c00148..4f874888 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -58,7 +58,7 @@ impl TfcBlockHeader {
     }
 }
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct TfcEntry<'a>(Vec<&'a [u8]>);
 
 impl<'a> TfcEntry<'a> {
@@ -71,6 +71,118 @@ impl<'a> TfcEntry<'a> {
 
         v
     }
+
+    fn as_buf(&self) -> TfcEntryBuf {
+        TfcEntryBuf {
+            entry: self,
+            slice_ix: 0,
+            pos_in_slice: 0
+        }
+    }
+
+    fn into_buf(self) -> OwnedTfcEntryBuf<'a> {
+        OwnedTfcEntryBuf {
+            entry: self,
+            slice_ix: 0,
+            pos_in_slice: 0
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.0.iter().map(|s|s.len()).sum()
+    }
+}
+
+pub struct TfcEntryBuf<'a>{
+    entry: &'a TfcEntry<'a>,
+    slice_ix: usize,
+    pos_in_slice: usize
+}
+
+fn calculate_remaining<'a>(entry: &TfcEntry<'a>, slice_ix: usize, pos_in_slice: usize) -> usize {
+    let total: usize = entry.0.iter().skip(slice_ix).map(|s|s.len()).sum();
+    total - pos_in_slice
+}
+
+fn calculate_chunk<'a>(entry: &'a TfcEntry<'a>, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
+    if slice_ix >= entry.0.len() {
+        &[]
+    }
+    else {
+        let slice = entry.0[slice_ix];
+        &slice[pos_in_slice..]
+    }
+}
+
+fn calculate_advance<'a>(entry: &'a TfcEntry<'a>, slice_ix: &mut usize, pos_in_slice: &mut usize, mut cnt: usize) {
+    if *slice_ix < entry.0.len() {
+        let slice = entry.0[*slice_ix];
+        let remaining_in_slice = slice.len() - *pos_in_slice;
+
+        if remaining_in_slice > cnt {
+            // we remain in the slice we're at. 
+            *pos_in_slice += cnt;
+        }
+        else {
+            // we are starting at the next slice
+            cnt -= remaining_in_slice;
+            *slice_ix += 1;
+            
+            loop {
+                if entry.0.len() >= *slice_ix {
+                    // past the end
+                    *pos_in_slice = 0;
+                    break;
+                }
+
+                let slice_len = entry.0[*slice_ix].len();
+
+                if cnt < slice_len {
+                    // this is our slice
+                    *pos_in_slice = cnt;
+                    break;
+                }
+
+                // not our slice, so advance to next
+                cnt -= entry.0.len();
+                *slice_ix += 1;
+            }
+        }
+    }
+}
+
+impl<'a> Buf for TfcEntryBuf<'a> {
+    fn remaining(&self) -> usize {
+        calculate_remaining(self.entry, self.slice_ix, self.pos_in_slice)
+    }
+
+    fn chunk(&self) -> &[u8] {
+        calculate_chunk(self.entry, self.slice_ix, self.pos_in_slice)
+    }
+
+    fn advance(&mut self, cnt: usize) {
+        calculate_advance(self.entry, &mut self.slice_ix, &mut self.pos_in_slice, cnt)
+    }
+}
+
+pub struct OwnedTfcEntryBuf<'a>{
+    entry: TfcEntry<'a>,
+    slice_ix: usize,
+    pos_in_slice: usize
+}
+
+impl<'a> Buf for OwnedTfcEntryBuf<'a> {
+    fn remaining(&self) -> usize {
+        calculate_remaining(&self.entry, self.slice_ix, self.pos_in_slice)
+    }
+
+    fn chunk(&self) -> &[u8] {
+        calculate_chunk(&self.entry, self.slice_ix, self.pos_in_slice)
+    }
+
+    fn advance(&mut self, cnt: usize) {
+        calculate_advance(&self.entry, &mut self.slice_ix, &mut self.pos_in_slice, cnt)
+    }
 }
 
 pub struct TfcBlock {
@@ -246,4 +358,49 @@ mod tests {
             assert_eq!(*string, &block.entry(ix).as_vec()[..]);
         }
     }
+
+    #[test]
+    fn entry_buf_in_complete_block() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_incomplete_block(&strings);
+
+        for (ix, string) in strings.iter().enumerate() {
+            let entry = block.entry(ix);
+            let mut buf = entry.as_buf();
+            let len = buf.remaining();
+            let bytes = buf.copy_to_bytes(len);
+            assert_eq!(*string, &bytes[..]);
+        }
+    }
+
+    #[test]
+    fn entry_owned_buf_in_complete_block() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_incomplete_block(&strings);
+
+        for (ix, string) in strings.iter().enumerate() {
+            let mut buf = block.entry(ix).into_buf();
+            let len = buf.remaining();
+            let bytes = buf.copy_to_bytes(len);
+            assert_eq!(*string, &bytes[..]);
+        }
+    }
 }

From 1da44ba44533fd68b1634725cde6bd4f22a9ea54 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 17:14:45 +0100
Subject: [PATCH 04/99] remove dummy test

---
 src/structure/tfc.rs | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 4f874888..33be3baf 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -292,15 +292,6 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
 mod tests {
     use super::*;
     use bytes::Buf;
-    #[test]
-    fn blah() {
-        let slice = b"asdfasfd";
-        let mut argh = slice as &[u8];
-        let first = argh.get_u8();
-        let second = argh.get_u8();
-
-        panic!("{} {} {:?}", first, second, argh);
-    }
 
     fn build_incomplete_block(strings: &[&[u8]]) -> TfcBlock {
         let mut buf = BytesMut::new();

From 6ad263ee06dcc97ef41cae88d626df3c88529a35 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 18:19:01 +0100
Subject: [PATCH 05/99] replicate all pfc comparison logic

---
 src/structure/tfc.rs | 255 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 220 insertions(+), 35 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 33be3baf..24b7f9ba 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -1,3 +1,6 @@
+use std::cmp::Ordering;
+use std::hash::{Hash, Hasher};
+
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 
 use crate::structure::{
@@ -59,11 +62,34 @@ impl TfcBlockHeader {
 }
 
 #[derive(Clone, Debug)]
-pub struct TfcEntry<'a>(Vec<&'a [u8]>);
+pub struct TfcDictEntry(Vec<Bytes>);
+
+impl TfcDictEntry {
+    pub fn new(parts: Vec<Bytes>) -> Self {
+        Self(parts)
+    }
+
+    pub fn new_optimized(parts: Vec<Bytes>) -> Self {
+        let mut entry = Self::new(parts);
+        entry.optimize();
+
+        entry
+    }
 
-impl<'a> TfcEntry<'a> {
-    fn as_vec(&self) -> Vec<u8> {
-        let mut v = Vec::with_capacity(self.0.iter().map(|s| s.len()).sum());
+    fn to_bytes(&self) -> Bytes {
+        if self.0.len() == 1 {
+            self.0[0].clone()
+        } else {
+            let mut buf = BytesMut::with_capacity(self.len());
+            for slice in self.0.iter() {
+                buf.extend_from_slice(&slice[..]);
+            }
+
+            buf.freeze()
+        }
+    }
+    fn to_vec(&self) -> Vec<u8> {
+        let mut v = Vec::with_capacity(self.len());
 
         for slice in self.0.iter() {
             v.extend_from_slice(slice);
@@ -76,58 +102,216 @@ impl<'a> TfcEntry<'a> {
         TfcEntryBuf {
             entry: self,
             slice_ix: 0,
-            pos_in_slice: 0
+            pos_in_slice: 0,
         }
     }
 
-    fn into_buf(self) -> OwnedTfcEntryBuf<'a> {
+    fn into_buf(self) -> OwnedTfcEntryBuf {
         OwnedTfcEntryBuf {
             entry: self,
             slice_ix: 0,
-            pos_in_slice: 0
+            pos_in_slice: 0,
         }
     }
 
     fn len(&self) -> usize {
-        self.0.iter().map(|s|s.len()).sum()
+        self.0.iter().map(|s| s.len()).sum()
+    }
+
+    /// optimize size
+    ///
+    /// For short strings, a list of pointers may be much less
+    /// efficient than a copy of the string.  This will copy the
+    /// underlying string if that is the case.
+    pub fn optimize(&mut self) {
+        let overhead_size = std::mem::size_of::<Bytes>() * self.0.len();
+
+        if std::mem::size_of::<Bytes>() + self.len() < overhead_size {
+            let mut bytes = BytesMut::with_capacity(self.len());
+            for part in self.0.iter() {
+                bytes.extend(part);
+            }
+
+            self.0 = vec![bytes.freeze()];
+        }
+    }
+
+    pub fn buf_eq<B: Buf>(&self, mut b: B) -> bool {
+        if self.len() != b.remaining() {
+            false
+        } else if self.len() == 0 {
+            true
+        } else {
+            let mut it = self.0.iter();
+            let mut part = it.next().unwrap();
+            loop {
+                let slice = b.chunk();
+
+                match part.len().cmp(&slice.len()) {
+                    Ordering::Less => {
+                        if part.as_ref() != &slice[..part.len()] {
+                            return false;
+                        }
+                    }
+                    Ordering::Equal => {
+                        if part != slice {
+                            return false;
+                        }
+
+                        assert!(it.next().is_none());
+                        return true;
+                    }
+                    Ordering::Greater => {
+                        panic!("This should never happen because it'd mean our entry is larger than the buffer passed in, but we already checked to make sure that is not the case");
+                    }
+                }
+
+                b.advance(part.len());
+                part = it.next().unwrap();
+            }
+        }
+    }
+}
+
+impl PartialEq for TfcDictEntry {
+    fn eq(&self, other: &Self) -> bool {
+        // unequal length, so can't be equal
+        if self.len() != other.len() {
+            return false;
+        }
+
+        self.cmp(other) == Ordering::Equal
+    }
+}
+
+impl Eq for TfcDictEntry {}
+
+impl Hash for TfcDictEntry {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        for part in self.0.iter() {
+            state.write(part);
+        }
+    }
+}
+
+impl Ord for TfcDictEntry {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // both are empty, so equal
+        if self.len() == 0 && other.len() == 0 {
+            return Ordering::Equal;
+        }
+
+        let mut it1 = self.0.iter();
+        let mut it2 = other.0.iter();
+        let mut part1 = it1.next().unwrap().clone();
+        let mut part2 = it2.next().unwrap().clone();
+
+        loop {
+            match part1.len().cmp(&part2.len()) {
+                Ordering::Equal => {
+                    match part1.cmp(&part2) {
+                        Ordering::Less => return Ordering::Less,
+                        Ordering::Greater => return Ordering::Greater,
+                        Ordering::Equal => {}
+                    }
+
+                    let p1_next = it1.next();
+                    let p2_next = it2.next();
+
+                    if let (Some(p1), Some(p2)) = (p1_next, p2_next) {
+                        part1 = p1.clone();
+                        part2 = p2.clone();
+                    } else if p1_next.is_none() && p2_next.is_none() {
+                        // done! everything has been compared equally and nothign remains.
+                        return Ordering::Equal;
+                    } else if p1_next.is_none() {
+                        // the left side is a prefix of the right side
+
+                        return Ordering::Less;
+                    } else {
+                        return Ordering::Greater;
+                    }
+                }
+                Ordering::Less => {
+                    let part2_slice = part2.slice(0..part1.len());
+                    match part1.cmp(&part2_slice) {
+                        Ordering::Less => return Ordering::Less,
+                        Ordering::Greater => return Ordering::Greater,
+                        Ordering::Equal => {}
+                    }
+
+                    part2 = part2.slice(part1.len()..);
+                    let part1_option = it1.next();
+                    if part1_option.is_none() {
+                        return Ordering::Less;
+                    }
+                    part1 = part1_option.unwrap().clone();
+                }
+                Ordering::Greater => {
+                    let part1_slice = part1.slice(0..part2.len());
+                    match part1_slice.cmp(&part2) {
+                        Ordering::Less => return Ordering::Less,
+                        Ordering::Greater => return Ordering::Greater,
+                        Ordering::Equal => {}
+                    }
+
+                    part1 = part1.slice(part2.len()..);
+                    let part2_option = it2.next();
+                    if part2_option.is_none() {
+                        return Ordering::Greater;
+                    }
+                    part2 = part2_option.unwrap().clone();
+                }
+            }
+        }
     }
 }
 
-pub struct TfcEntryBuf<'a>{
-    entry: &'a TfcEntry<'a>,
+impl PartialOrd for TfcDictEntry {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+#[derive(Clone)]
+pub struct TfcEntryBuf<'a> {
+    entry: &'a TfcDictEntry,
     slice_ix: usize,
-    pos_in_slice: usize
+    pos_in_slice: usize,
 }
 
-fn calculate_remaining<'a>(entry: &TfcEntry<'a>, slice_ix: usize, pos_in_slice: usize) -> usize {
-    let total: usize = entry.0.iter().skip(slice_ix).map(|s|s.len()).sum();
+fn calculate_remaining<'a>(entry: &TfcDictEntry, slice_ix: usize, pos_in_slice: usize) -> usize {
+    let total: usize = entry.0.iter().skip(slice_ix).map(|s| s.len()).sum();
     total - pos_in_slice
 }
 
-fn calculate_chunk<'a>(entry: &'a TfcEntry<'a>, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
+fn calculate_chunk<'a>(entry: &'a TfcDictEntry, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
     if slice_ix >= entry.0.len() {
         &[]
-    }
-    else {
-        let slice = entry.0[slice_ix];
+    } else {
+        let slice = &entry.0[slice_ix];
         &slice[pos_in_slice..]
     }
 }
 
-fn calculate_advance<'a>(entry: &'a TfcEntry<'a>, slice_ix: &mut usize, pos_in_slice: &mut usize, mut cnt: usize) {
+fn calculate_advance<'a>(
+    entry: &'a TfcDictEntry,
+    slice_ix: &mut usize,
+    pos_in_slice: &mut usize,
+    mut cnt: usize,
+) {
     if *slice_ix < entry.0.len() {
-        let slice = entry.0[*slice_ix];
+        let slice = &entry.0[*slice_ix];
         let remaining_in_slice = slice.len() - *pos_in_slice;
 
         if remaining_in_slice > cnt {
-            // we remain in the slice we're at. 
+            // we remain in the slice we're at.
             *pos_in_slice += cnt;
-        }
-        else {
+        } else {
             // we are starting at the next slice
             cnt -= remaining_in_slice;
             *slice_ix += 1;
-            
+
             loop {
                 if entry.0.len() >= *slice_ix {
                     // past the end
@@ -165,13 +349,13 @@ impl<'a> Buf for TfcEntryBuf<'a> {
     }
 }
 
-pub struct OwnedTfcEntryBuf<'a>{
-    entry: TfcEntry<'a>,
+pub struct OwnedTfcEntryBuf {
+    entry: TfcDictEntry,
     slice_ix: usize,
-    pos_in_slice: usize
+    pos_in_slice: usize,
 }
 
-impl<'a> Buf for OwnedTfcEntryBuf<'a> {
+impl Buf for OwnedTfcEntryBuf {
     fn remaining(&self) -> usize {
         calculate_remaining(&self.entry, self.slice_ix, self.pos_in_slice)
     }
@@ -206,9 +390,10 @@ impl TfcBlock {
         self.header.num_entries != BLOCK_SIZE as u8
     }
 
-    pub fn entry(&self, index: usize) -> TfcEntry {
+    pub fn entry(&self, index: usize) -> TfcDictEntry {
         if index == 0 {
-            return TfcEntry(vec![&self.data[..self.header.sizes[0]]]);
+            let b = self.data.slice(..self.header.sizes[0]);
+            return TfcDictEntry(vec![b]);
         }
 
         let mut v = Vec::with_capacity(7);
@@ -237,7 +422,7 @@ impl TfcBlock {
         let mut taken = 0;
         let mut slices = Vec::with_capacity(v.len() + 1);
 
-        let mut offset = self.header.sizes.iter().take(start).sum();
+        let mut offset: usize = self.header.sizes.iter().take(start).sum();
         for (ix, shared) in v.iter().rev().enumerate() {
             let have_to_take = shared - taken;
             let cur_offset = offset;
@@ -245,15 +430,15 @@ impl TfcBlock {
             if have_to_take == 0 {
                 continue;
             }
-            let slice = &self.data[cur_offset..cur_offset + have_to_take];
+            let slice = self.data.slice(cur_offset..cur_offset + have_to_take);
             slices.push(slice);
             taken += have_to_take;
         }
 
         let suffix_size = self.header.sizes[index];
-        slices.push(&self.data[offset..offset + suffix_size]);
+        slices.push(self.data.slice(offset..offset + suffix_size));
 
-        TfcEntry(slices)
+        TfcDictEntry(slices)
     }
 }
 
@@ -327,7 +512,7 @@ mod tests {
         let block = build_incomplete_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
-            assert_eq!(*string, &block.entry(ix).as_vec()[..]);
+            assert_eq!(*string, &block.entry(ix).to_vec()[..]);
         }
     }
 
@@ -346,7 +531,7 @@ mod tests {
         let block = build_incomplete_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
-            assert_eq!(*string, &block.entry(ix).as_vec()[..]);
+            assert_eq!(*string, &block.entry(ix).to_vec()[..]);
         }
     }
 

From e97e15797d3a477176750223c4ab13e889a7a68e Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Wed, 23 Nov 2022 18:20:26 +0100
Subject: [PATCH 06/99] also optimize dict entries

---
 src/structure/tfc.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 24b7f9ba..cf8e518d 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -393,7 +393,7 @@ impl TfcBlock {
     pub fn entry(&self, index: usize) -> TfcDictEntry {
         if index == 0 {
             let b = self.data.slice(..self.header.sizes[0]);
-            return TfcDictEntry(vec![b]);
+            return TfcDictEntry::new(vec![b]);
         }
 
         let mut v = Vec::with_capacity(7);
@@ -438,7 +438,7 @@ impl TfcBlock {
         let suffix_size = self.header.sizes[index];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
-        TfcDictEntry(slices)
+        TfcDictEntry::new_optimized(slices)
     }
 }
 

From 72653d8f25a90d73b095a750a276518275c18997 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 10:37:52 +0100
Subject: [PATCH 07/99] move tfc head to the start of the block

---
 src/structure/tfc.rs | 64 +++++++++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index cf8e518d..583e4bdc 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -18,9 +18,10 @@ pub enum TfcError {
 
 #[derive(Debug, PartialEq)]
 pub struct TfcBlockHeader {
+    head: Bytes,
     num_entries: u8,
     buffer_length: usize,
-    sizes: [usize; BLOCK_SIZE],
+    sizes: [usize; BLOCK_SIZE - 1],
     shareds: [usize; BLOCK_SIZE - 1],
 }
 
@@ -34,25 +35,27 @@ impl From<vbyte::DecodeError> for TfcError {
 }
 
 impl TfcBlockHeader {
-    fn parse<B: Buf>(buf: &mut B) -> Result<Self, TfcError> {
-        let num_entries = buf.get_u8();
-        let mut sizes = [0_usize; BLOCK_SIZE];
+    fn parse(buf: &mut Bytes) -> Result<Self, TfcError> {
+        let mut sizes = [0_usize; BLOCK_SIZE - 1];
         let mut shareds = [0_usize; BLOCK_SIZE - 1];
-
         let (first_size, _) = vbyte::decode_buf(buf)?;
-        sizes[0] = first_size as usize;
+
+        let head = buf.split_to(first_size as usize);
+
+        let num_entries = buf.get_u8();
 
         for i in 0..(num_entries - 1) as usize {
             let (shared, _) = vbyte::decode_buf(buf)?;
             let (size, _) = vbyte::decode_buf(buf)?;
 
-            sizes[i + 1] = size as usize;
+            sizes[i] = size as usize;
             shareds[i] = shared as usize;
         }
 
         let buffer_length = sizes.iter().sum();
 
         Ok(Self {
+            head,
             num_entries,
             buffer_length,
             sizes,
@@ -392,8 +395,7 @@ impl TfcBlock {
 
     pub fn entry(&self, index: usize) -> TfcDictEntry {
         if index == 0 {
-            let b = self.data.slice(..self.header.sizes[0]);
-            return TfcDictEntry::new(vec![b]);
+            return TfcDictEntry::new(vec![self.header.head.clone()]);
         }
 
         let mut v = Vec::with_capacity(7);
@@ -417,25 +419,45 @@ impl TfcBlock {
             }
         }
 
+
         let start = index - v.len();
 
         let mut taken = 0;
         let mut slices = Vec::with_capacity(v.len() + 1);
 
-        let mut offset: usize = self.header.sizes.iter().take(start).sum();
+        let mut offset: usize;
+        if start == 0 {
+            offset = 0;
+        }
+        else {
+            offset = self.header.sizes.iter().take(start - 1).sum();
+        }
         for (ix, shared) in v.iter().rev().enumerate() {
             let have_to_take = shared - taken;
             let cur_offset = offset;
-            offset += self.header.sizes[start + ix];
+
+            if !(ix == 0 && start == 0) {
+                // the head slice does not contribute to the offset
+                offset += self.header.sizes[start + ix - 1];
+            }
+
             if have_to_take == 0 {
                 continue;
             }
-            let slice = self.data.slice(cur_offset..cur_offset + have_to_take);
+
+            let slice;
+            if ix == 0 && start == 0 {
+                // the slice has to come out of the header
+                slice = self.header.head.slice(..have_to_take);
+            }
+            else {
+                slice = self.data.slice(cur_offset..cur_offset + have_to_take);
+            }
             slices.push(slice);
             taken += have_to_take;
         }
 
-        let suffix_size = self.header.sizes[index];
+        let suffix_size = self.header.sizes[index-1];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
         TfcDictEntry::new_optimized(slices)
@@ -445,11 +467,15 @@ impl TfcBlock {
 fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
-    buf.put_u8(slices_len as u8);
 
     let first = slices[0];
     let (vbyte, vbyte_len) = encode_array(first.len() as u64);
+
+    // write the head first
     buf.put_slice(&vbyte[..vbyte_len]);
+    buf.put_slice(slices[0]);
+
+    buf.put_u8(slices_len as u8);
 
     let mut last = first;
 
@@ -468,7 +494,8 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
         last = cur;
     }
 
-    for suffix in suffixes {
+    // write the rest of the slices
+    for suffix in suffixes.into_iter().skip(1) {
         buf.put_slice(suffix);
     }
 }
@@ -494,15 +521,16 @@ mod tests {
         let block = build_incomplete_block(&strings);
 
         let expected_header = TfcBlockHeader {
+            head: Bytes::copy_from_slice(b"aaaaaa"),
             num_entries: 5,
-            buffer_length: 17,
-            sizes: [6, 2, 4, 3, 2, 0, 0, 0],
+            buffer_length: 11,
+            sizes: [2, 4, 3, 2, 0, 0, 0],
             shareds: [2, 0, 1, 2, 0, 0, 0],
         };
 
         assert_eq!(expected_header, block.header);
 
-        let expected_bytes = b"aaaaaabbccccdefff";
+        let expected_bytes = b"bbccccdefff";
         assert_eq!(expected_bytes, &block.data[..]);
     }
 

From 169448c6ce05d350f9cdebb0c1b62e8f42467bfe Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 12:16:00 +0100
Subject: [PATCH 08/99] lookup slice in tfc block

---
 src/structure/tfc.rs  | 163 ++++++++++++++++++++++++++++++++++++++----
 src/structure/util.rs |  15 ++++
 2 files changed, 165 insertions(+), 13 deletions(-)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 583e4bdc..647edc95 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -8,6 +8,8 @@ use crate::structure::{
     vbyte::{self, encode_array},
 };
 
+use super::util::find_common_prefix_ord;
+
 const BLOCK_SIZE: usize = 8;
 
 #[derive(Debug)]
@@ -419,7 +421,6 @@ impl TfcBlock {
             }
         }
 
-
         let start = index - v.len();
 
         let mut taken = 0;
@@ -428,8 +429,7 @@ impl TfcBlock {
         let mut offset: usize;
         if start == 0 {
             offset = 0;
-        }
-        else {
+        } else {
             offset = self.header.sizes.iter().take(start - 1).sum();
         }
         for (ix, shared) in v.iter().rev().enumerate() {
@@ -449,19 +449,74 @@ impl TfcBlock {
             if ix == 0 && start == 0 {
                 // the slice has to come out of the header
                 slice = self.header.head.slice(..have_to_take);
-            }
-            else {
+            } else {
                 slice = self.data.slice(cur_offset..cur_offset + have_to_take);
             }
             slices.push(slice);
             taken += have_to_take;
         }
 
-        let suffix_size = self.header.sizes[index-1];
+        let suffix_size = self.header.sizes[index - 1];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
         TfcDictEntry::new_optimized(slices)
     }
+
+    fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
+        let head = Some(self.header.head.clone());
+        let mut offset = 0;
+        let tail = self.header.sizes.iter().map(move |s| {
+            let slice = self.data.slice(offset..*s + offset);
+            offset += s;
+
+            slice
+        });
+
+        head.into_iter().chain(tail)
+    }
+
+    pub fn id(&self, slice: &[u8]) -> IdLookupResult {
+        let (mut common_prefix, ordering) = find_common_prefix_ord(slice, &self.header.head);
+        match ordering {
+            Ordering::Equal => return IdLookupResult::Found(0),
+            Ordering::Less => return IdLookupResult::NotFound,
+            // We have to traverse the block
+            Ordering::Greater => {}
+        }
+
+        for (ix, (shared, suffix)) in self
+            .header
+            .shareds
+            .iter()
+            .zip(self.suffixes().skip(1))
+            .enumerate()
+        {
+            if *shared < common_prefix {
+                return IdLookupResult::Closest(ix as u64);
+            } else if *shared > common_prefix {
+                continue;
+            }
+
+            let (new_common_prefix, ordering) =
+                find_common_prefix_ord(&slice[common_prefix..], &suffix[..]);
+            match ordering {
+                Ordering::Equal => return IdLookupResult::Found(ix as u64 + 1),
+                Ordering::Less => return IdLookupResult::Closest(ix as u64),
+                Ordering::Greater => {
+                    common_prefix += new_common_prefix;
+                }
+            }
+        }
+
+        IdLookupResult::Closest(self.header.num_entries as u64 - 1)
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum IdLookupResult {
+    Found(u64),
+    Closest(u64),
+    NotFound,
 }
 
 fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
@@ -500,16 +555,25 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
     }
 }
 
+pub fn block_head(mut block: Bytes) -> Result<Bytes, TfcError> {
+    let (size, _) = vbyte::decode_buf(&mut block)?;
+    Ok(block.split_to(size as usize))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use bytes::Buf;
 
-    fn build_incomplete_block(strings: &[&[u8]]) -> TfcBlock {
+    fn build_block_bytes(strings: &[&[u8]]) -> Bytes {
         let mut buf = BytesMut::new();
         build_block_unchecked(&mut buf, &strings);
 
-        let mut bytes: Bytes = buf.freeze();
+        buf.freeze()
+    }
+
+    fn build_block(strings: &[&[u8]]) -> TfcBlock {
+        let mut bytes = build_block_bytes(strings);
 
         TfcBlock::parse(&mut bytes).unwrap()
     }
@@ -518,7 +582,7 @@ mod tests {
     fn build_and_parse_block() {
         let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
 
-        let block = build_incomplete_block(&strings);
+        let block = build_block(&strings);
 
         let expected_header = TfcBlockHeader {
             head: Bytes::copy_from_slice(b"aaaaaa"),
@@ -537,7 +601,7 @@ mod tests {
     #[test]
     fn entry_in_block() {
         let strings: [&[u8]; 5] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff"];
-        let block = build_incomplete_block(&strings);
+        let block = build_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
             assert_eq!(*string, &block.entry(ix).to_vec()[..]);
@@ -556,7 +620,7 @@ mod tests {
             b"cdffeeee",
             b"ceeeeeeeeeeeeeee",
         ];
-        let block = build_incomplete_block(&strings);
+        let block = build_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
             assert_eq!(*string, &block.entry(ix).to_vec()[..]);
@@ -575,7 +639,7 @@ mod tests {
             b"cdffeeee",
             b"ceeeeeeeeeeeeeee",
         ];
-        let block = build_incomplete_block(&strings);
+        let block = build_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
             let entry = block.entry(ix);
@@ -598,7 +662,7 @@ mod tests {
             b"cdffeeee",
             b"ceeeeeeeeeeeeeee",
         ];
-        let block = build_incomplete_block(&strings);
+        let block = build_block(&strings);
 
         for (ix, string) in strings.iter().enumerate() {
             let mut buf = block.entry(ix).into_buf();
@@ -607,4 +671,77 @@ mod tests {
             assert_eq!(*string, &bytes[..]);
         }
     }
+
+    #[test]
+    fn head_from_complete_block() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_block_bytes(&strings);
+        let head = block_head(block).unwrap();
+
+        assert_eq!(b"aaaaaa", &head[..]);
+    }
+
+    #[test]
+    fn slices_iter() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_block(&strings);
+
+        let expected_slices: Vec<&[u8]> = vec![
+            b"aaaaaa",
+            b"bb",
+            b"cccc",
+            b"def",
+            b"ff",
+            b"asdf",
+            b"eeee",
+            b"eeeeeeeeeeeeeee",
+        ];
+
+        let expected_bytes: Vec<_> = expected_slices
+            .into_iter()
+            .map(|b| Bytes::from(b))
+            .collect();
+
+        let actual: Vec<_> = block.suffixes().collect();
+
+        assert_eq!(expected_bytes, actual);
+    }
+
+    #[test]
+    fn block_id_lookup() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_block(&strings);
+
+        for (ix, string) in strings.iter().enumerate() {
+            let index = block.id(string);
+            assert_eq!(IdLookupResult::Found(ix as u64), index);
+        }
+    }
 }
diff --git a/src/structure/util.rs b/src/structure/util.rs
index 2b674275..6bb0f721 100644
--- a/src/structure/util.rs
+++ b/src/structure/util.rs
@@ -1,6 +1,7 @@
 use futures::io::Result;
 use futures::stream::{Peekable, Stream, StreamExt};
 use futures::task::{Context, Poll};
+use std::cmp::Ordering;
 use std::marker::Unpin;
 use std::pin::Pin;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
@@ -18,6 +19,20 @@ pub fn find_common_prefix(b1: &[u8], b2: &[u8]) -> usize {
     common
 }
 
+pub fn find_common_prefix_ord(b1: &[u8], b2: &[u8]) -> (usize, Ordering) {
+    let common_prefix = find_common_prefix(b1, b2);
+
+    if common_prefix == b1.len() && b1.len() == b2.len() {
+        (common_prefix, Ordering::Equal)
+    } else if b1.len() == common_prefix {
+        (common_prefix, Ordering::Less)
+    } else if b2.len() == common_prefix {
+        (common_prefix, Ordering::Greater)
+    } else {
+        (common_prefix, b1[common_prefix].cmp(&b2[common_prefix]))
+    }
+}
+
 pub async fn write_nul_terminated_bytes<W: AsyncWrite + Unpin>(
     w: &mut W,
     bytes: &[u8],

From 1c8c4a2f78fa35456ae2749412e4dfbd8e25646f Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 12:18:33 +0100
Subject: [PATCH 09/99] test for close matches in tfc

---
 src/structure/tfc.rs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/structure/tfc.rs b/src/structure/tfc.rs
index 647edc95..28073028 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc.rs
@@ -744,4 +744,32 @@ mod tests {
             assert_eq!(IdLookupResult::Found(ix as u64), index);
         }
     }
+
+    #[test]
+    fn block_id_lookup_nonmatches() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_block(&strings);
+
+        assert_eq!(IdLookupResult::NotFound,
+                   block.id(b"aa"));
+
+        assert_eq!(IdLookupResult::Closest(0),
+                   block.id(b"aaab"));
+
+        assert_eq!(IdLookupResult::Closest(1),
+                   block.id(b"aabba"));
+
+        assert_eq!(IdLookupResult::Closest(7),
+                   block.id(b"f"));
+
+    }
 }

From 080764339e604654f1e944f924d42834fd8a5623 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 14:05:33 +0100
Subject: [PATCH 10/99] implement tfcdict

---
 Cargo.toml                             |   1 +
 src/structure/logarray.rs              | 100 ++++++++++++++++++-
 src/structure/{tfc.rs => tfc/block.rs} |  31 ++++--
 src/structure/tfc/dict.rs              | 131 +++++++++++++++++++++++++
 src/structure/tfc/mod.rs               |   2 +
 5 files changed, 254 insertions(+), 11 deletions(-)
 rename src/structure/{tfc.rs => tfc/block.rs} (97%)
 create mode 100644 src/structure/tfc/dict.rs
 create mode 100644 src/structure/tfc/mod.rs

diff --git a/Cargo.toml b/Cargo.toml
index 75c68d60..dd626ad9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,7 @@ flate2 = "1.0"
 rayon = "1.4"
 thiserror = "1.0"
 async-trait = "0.1"
+itertools = "0.10"
 
 [dev-dependencies]
 tempfile = "3.1"
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 2bd6b627..48d8a15e 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -52,7 +52,7 @@
 use super::util;
 use crate::storage::*;
 use byteorder::{BigEndian, ByteOrder};
-use bytes::{Bytes, BytesMut};
+use bytes::{Bytes, BytesMut, BufMut};
 use futures::stream::{Stream, StreamExt};
 use std::{cmp::Ordering, convert::TryFrom, error, fmt, io};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
@@ -311,6 +311,104 @@ impl LogArray {
     }
 }
 
+/// write a logarray directly to an AsyncWrite
+pub struct LogArrayBufBuilder<'a, B: BufMut> {
+    /// Destination of the log array data
+    buf: &'a mut B,
+    /// Bit width of an element
+    width: u8,
+    /// Storage for the next word to be written to the buffer
+    current: u64,
+    /// Bit offset in `current` for the msb of the next encoded element
+    offset: u8,
+    /// Number of elements written to the buffer
+    count: u32,
+}
+
+impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
+    pub fn new(buf: &'a mut B, width: u8) -> Self {
+        Self {
+            buf,
+            width,
+            // Zero is needed for bitwise OR-ing new values.
+            current: 0,
+            // Start at the beginning of `current`.
+            offset: 0,
+            // No elements have been written.
+            count: 0,
+        }
+    }
+
+    pub fn count(&self) -> u32 {
+        self.count
+    }
+
+    pub fn push(&mut self, val: u64) {
+        // This is the minimum number of leading zeros that a decoded value should have.
+        let leading_zeros = 64 - self.width;
+
+        // If `val` does not fit in the `width`, return an error.
+        if val.leading_zeros() < u32::from(leading_zeros) {
+            panic!("expected value ({}) to fit in {} bits", val, self.width);
+        }
+
+        // Otherwise, push `val` onto the log array.
+        // Advance the element count since we know we're going to write `val`.
+        self.count += 1;
+
+        // Write the first part of `val` to `current`, putting the msb of `val` at the `offset`
+        // bit. This may be either the upper bits of `val` only or all of it. We check later.
+        self.current |= val << leading_zeros >> self.offset;
+
+        // Increment `offset` past `val`.
+        self.offset += self.width;
+
+        // Check if the new `offset` is larger than 64.
+        if self.offset >= 64 {
+            // We have filled `current`, so write it to the destination.
+            //util::write_u64(&mut self.file, self.current).await?;
+            self.buf.put_u64(self.current);
+            // Wrap the offset with the word size.
+            self.offset -= 64;
+
+            // Initialize the new `current`.
+            self.current = if self.offset == 0 {
+                // Zero is needed for bitwise OR-ing new values.
+                0
+            } else {
+                // This is the second part of `val`: the lower bits.
+                val << 64 - self.offset
+            };
+        }
+    }
+
+    pub fn push_vec(&mut self, vals: Vec<u64>) {
+        for val in vals {
+            self.push(val);
+        }
+    }
+
+    fn finalize_data(&mut self) {
+        if u64::from(self.count) * u64::from(self.width) & 0b11_1111 != 0 {
+            self.buf.put_u64(self.current);
+        }
+    }
+
+    pub fn finalize(mut self) {
+        let len = self.count;
+        let width = self.width;
+
+        // Write the final data word.
+        self.finalize_data();
+
+        // Write the control word.
+        let mut buf = [0; 8];
+        BigEndian::write_u32(&mut buf, len);
+        buf[4] = width;
+        self.buf.put_slice(&buf);
+    }
+}
+
 /// write a logarray directly to an AsyncWrite
 pub struct LogArrayFileBuilder<W: SyncableFile> {
     /// Destination of the log array data
diff --git a/src/structure/tfc.rs b/src/structure/tfc/block.rs
similarity index 97%
rename from src/structure/tfc.rs
rename to src/structure/tfc/block.rs
index 28073028..77d51b0e 100644
--- a/src/structure/tfc.rs
+++ b/src/structure/tfc/block.rs
@@ -4,13 +4,11 @@ use std::hash::{Hash, Hasher};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 
 use crate::structure::{
-    util::find_common_prefix,
+    util::{find_common_prefix, find_common_prefix_ord},
     vbyte::{self, encode_array},
 };
 
-use super::util::find_common_prefix_ord;
-
-const BLOCK_SIZE: usize = 8;
+pub const BLOCK_SIZE: usize = 8;
 
 #[derive(Debug)]
 pub enum TfcError {
@@ -81,7 +79,7 @@ impl TfcDictEntry {
         entry
     }
 
-    fn to_bytes(&self) -> Bytes {
+    pub fn to_bytes(&self) -> Bytes {
         if self.0.len() == 1 {
             self.0[0].clone()
         } else {
@@ -93,7 +91,7 @@ impl TfcDictEntry {
             buf.freeze()
         }
     }
-    fn to_vec(&self) -> Vec<u8> {
+    pub fn to_vec(&self) -> Vec<u8> {
         let mut v = Vec::with_capacity(self.len());
 
         for slice in self.0.iter() {
@@ -103,7 +101,7 @@ impl TfcDictEntry {
         v
     }
 
-    fn as_buf(&self) -> TfcEntryBuf {
+    pub fn as_buf(&self) -> TfcEntryBuf {
         TfcEntryBuf {
             entry: self,
             slice_ix: 0,
@@ -111,7 +109,7 @@ impl TfcDictEntry {
         }
     }
 
-    fn into_buf(self) -> OwnedTfcEntryBuf {
+    pub fn into_buf(self) -> OwnedTfcEntryBuf {
         OwnedTfcEntryBuf {
             entry: self,
             slice_ix: 0,
@@ -119,7 +117,7 @@ impl TfcDictEntry {
         }
     }
 
-    fn len(&self) -> usize {
+    pub fn len(&self) -> usize {
         self.0.iter().map(|s| s.len()).sum()
     }
 
@@ -391,6 +389,10 @@ impl TfcBlock {
         Ok(Self { header, data })
     }
 
+    pub fn num_entries(&self) -> u8 {
+        self.header.num_entries
+    }
+
     pub fn is_incomplete(&self) -> bool {
         self.header.num_entries != BLOCK_SIZE as u8
     }
@@ -519,7 +521,8 @@ pub enum IdLookupResult {
     NotFound,
 }
 
-fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
+pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) -> usize {
+    let mut size = 0;
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
 
@@ -529,8 +532,10 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
     // write the head first
     buf.put_slice(&vbyte[..vbyte_len]);
     buf.put_slice(slices[0]);
+    size += vbyte_len + slices[0].len();
 
     buf.put_u8(slices_len as u8);
+    size += 1;
 
     let mut last = first;
 
@@ -541,10 +546,13 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
         let common_prefix = find_common_prefix(last, cur);
         let (vbyte, vbyte_len) = encode_array(common_prefix as u64);
         buf.put_slice(&vbyte[..vbyte_len]);
+        size += vbyte_len;
 
         let suffix_len = cur.len() - common_prefix;
         let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
         buf.put_slice(&vbyte[..vbyte_len]);
+        size += vbyte_len;
+        
         suffixes.push(&cur[common_prefix..]);
         last = cur;
     }
@@ -552,7 +560,10 @@ fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) {
     // write the rest of the slices
     for suffix in suffixes.into_iter().skip(1) {
         buf.put_slice(suffix);
+        size += suffix.len();
     }
+
+    size
 }
 
 pub fn block_head(mut block: Bytes) -> Result<Bytes, TfcError> {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
new file mode 100644
index 00000000..1936a3e8
--- /dev/null
+++ b/src/structure/tfc/dict.rs
@@ -0,0 +1,131 @@
+use itertools::Itertools;
+use bytes::{BufMut, Bytes};
+use crate::structure::{util::calculate_width, LogArrayBufBuilder, LogArray};
+
+use super::block::*;
+
+fn build_dict_unchecked<'a, B1:BufMut, B2:BufMut,I:Iterator<Item=&'a [u8]>>(array_buf: &mut B1, data_buf: &mut B2, iter: I) {
+    let chunk_iter = iter.chunks(BLOCK_SIZE);
+    let mut offsets = Vec::new();
+
+    let mut offset = 0;
+    for chunk in &chunk_iter {
+        let slices: Vec<&[u8]> = chunk.collect();
+        let size = build_block_unchecked(data_buf, &slices);
+        offset += size;
+        offsets.push(offset as u64);
+    }
+
+    offsets.pop();
+
+    let largest_element = offsets.last().cloned().unwrap_or(0);
+    let width = calculate_width(largest_element);
+    let mut array_builder = LogArrayBufBuilder::new(array_buf, width);
+
+    array_builder.push_vec(offsets);
+    array_builder.finalize();
+}
+
+pub struct TfcDict {
+    offsets: LogArray,
+    data: Bytes
+}
+
+impl TfcDict {
+    pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
+        let offsets = LogArray::parse(offsets).unwrap();
+        Self {
+            offsets, data
+        }
+    }
+
+    pub fn block_bytes(&self, block_index:usize) -> Bytes {
+        let offset: usize;
+        if block_index == 0 {
+            offset = 0;
+        }
+        else {
+            offset = self.offsets.entry(block_index-1) as usize;
+        }
+
+        let block_bytes;
+        if block_index == self.offsets.len() {
+            block_bytes = self.data.slice(offset..);
+        }
+        else {
+            let end = self.offsets.entry(block_index) as usize;
+            block_bytes = self.data.slice(offset..end);
+        }
+
+        block_bytes
+    }
+
+    pub fn block(&self, block_index: usize) -> TfcBlock {
+        let mut block_bytes = self.block_bytes(block_index);
+        TfcBlock::parse(&mut block_bytes).unwrap()
+    }
+
+    pub fn block_head(&self, block_index: usize) -> Bytes {
+        let block_bytes = self.block_bytes(block_index);
+        block_head(block_bytes).unwrap()
+    }
+
+    pub fn num_blocks(&self) -> usize {
+        self.offsets.len() + 1
+    }
+
+    pub fn entry(&self, index: u64) -> TfcDictEntry {
+        let block = self.block((index / 8) as usize);
+        block.entry((index % 8) as usize)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use bytes::BytesMut;
+
+    #[test]
+    fn build_dict_of_two_blocks() {
+        let strings: Vec<&[u8]> = vec![
+            b"aaaaaaaa",
+            b"bbbbbbbb",
+            b"bbbcccdaaaa",
+            b"f",
+            b"fafasdfas",
+            b"gafovp",
+            b"gdfasfa",
+            b"gdfbbbbbb",
+            
+            b"hello",
+            b"iguana",
+            b"illusion",
+            b"illustrated",
+            b"jetengine",
+            b"jetplane",
+            
+        ];
+
+        let mut array_buf = BytesMut::new();
+        let mut data_buf = BytesMut::new();
+        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+
+        let array_bytes = array_buf.freeze();
+        let data_bytes = data_buf.freeze();
+        let dict =TfcDict::from_parts(array_bytes, data_bytes);
+
+        assert_eq!(2, dict.num_blocks());
+        assert_eq!(b"aaaaaaaa", &dict.block_head(0)[..]);
+        assert_eq!(b"hello", &dict.block_head(1)[..]);
+
+        let block0 = dict.block(0);
+        let block1 = dict.block(1);
+
+        assert_eq!(8, block0.num_entries());
+        assert_eq!(6, block1.num_entries());
+
+        for (ix, s) in strings.into_iter().enumerate() {
+            assert_eq!(s, &dict.entry(ix as u64).to_bytes()[..]);
+        }
+    }
+}
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
new file mode 100644
index 00000000..01c22bfa
--- /dev/null
+++ b/src/structure/tfc/mod.rs
@@ -0,0 +1,2 @@
+pub mod block;
+pub mod dict;

From dfa4834157f2c615c6c8a63e7ef2f4bcc46f7a9b Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 14:44:26 +0100
Subject: [PATCH 11/99] look up id of entry in tfcdict

---
 src/structure/logarray.rs  |   2 +-
 src/structure/tfc/block.rs |  38 +++++++---
 src/structure/tfc/dict.rs  | 140 ++++++++++++++++++++++++++++++++-----
 src/structure/vbyte.rs     |   2 -
 4 files changed, 153 insertions(+), 29 deletions(-)

diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 48d8a15e..629dafa7 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -52,7 +52,7 @@
 use super::util;
 use crate::storage::*;
 use byteorder::{BigEndian, ByteOrder};
-use bytes::{Bytes, BytesMut, BufMut};
+use bytes::{BufMut, Bytes, BytesMut};
 use futures::stream::{Stream, StreamExt};
 use std::{cmp::Ordering, convert::TryFrom, error, fmt, io};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 77d51b0e..4b5fc07b 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -521,6 +521,29 @@ pub enum IdLookupResult {
     NotFound,
 }
 
+impl IdLookupResult {
+    pub fn offset(self, offset: u64) -> Self {
+        match self {
+            Self::Found(i) => Self::Found(i + offset),
+            Self::Closest(i) => Self::Closest(i + offset),
+            Self::NotFound => Self::NotFound,
+        }
+    }
+
+    pub fn default(self, previous_block_num: usize, mut previous_block: Bytes) -> Self {
+        match self {
+            Self::NotFound => {
+                // we should move num elements to start of block so we don't hae to parse a full header
+                let previous_header = TfcBlockHeader::parse(&mut previous_block).unwrap();
+                let id = previous_header.num_entries as u64 - 1 + previous_block_num as u64;
+
+                Self::Closest(id)
+            }
+            _ => self,
+        }
+    }
+}
+
 pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) -> usize {
     let mut size = 0;
     let slices_len = slices.len();
@@ -552,7 +575,7 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
         let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
         buf.put_slice(&vbyte[..vbyte_len]);
         size += vbyte_len;
-        
+
         suffixes.push(&cur[common_prefix..]);
         last = cur;
     }
@@ -770,17 +793,12 @@ mod tests {
         ];
         let block = build_block(&strings);
 
-        assert_eq!(IdLookupResult::NotFound,
-                   block.id(b"aa"));
-
-        assert_eq!(IdLookupResult::Closest(0),
-                   block.id(b"aaab"));
+        assert_eq!(IdLookupResult::NotFound, block.id(b"aa"));
 
-        assert_eq!(IdLookupResult::Closest(1),
-                   block.id(b"aabba"));
+        assert_eq!(IdLookupResult::Closest(0), block.id(b"aaab"));
 
-        assert_eq!(IdLookupResult::Closest(7),
-                   block.id(b"f"));
+        assert_eq!(IdLookupResult::Closest(1), block.id(b"aabba"));
 
+        assert_eq!(IdLookupResult::Closest(7), block.id(b"f"));
     }
 }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 1936a3e8..53d7cef0 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,10 +1,16 @@
-use itertools::Itertools;
+use std::cmp::Ordering;
+
+use crate::structure::{util::calculate_width, LogArray, LogArrayBufBuilder};
 use bytes::{BufMut, Bytes};
-use crate::structure::{util::calculate_width, LogArrayBufBuilder, LogArray};
+use itertools::Itertools;
 
 use super::block::*;
 
-fn build_dict_unchecked<'a, B1:BufMut, B2:BufMut,I:Iterator<Item=&'a [u8]>>(array_buf: &mut B1, data_buf: &mut B2, iter: I) {
+fn build_dict_unchecked<'a, B1: BufMut, B2: BufMut, I: Iterator<Item = &'a [u8]>>(
+    array_buf: &mut B1,
+    data_buf: &mut B2,
+    iter: I,
+) {
     let chunk_iter = iter.chunks(BLOCK_SIZE);
     let mut offsets = Vec::new();
 
@@ -28,31 +34,27 @@ fn build_dict_unchecked<'a, B1:BufMut, B2:BufMut,I:Iterator<Item=&'a [u8]>>(arra
 
 pub struct TfcDict {
     offsets: LogArray,
-    data: Bytes
+    data: Bytes,
 }
 
 impl TfcDict {
     pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
         let offsets = LogArray::parse(offsets).unwrap();
-        Self {
-            offsets, data
-        }
+        Self { offsets, data }
     }
 
-    pub fn block_bytes(&self, block_index:usize) -> Bytes {
+    pub fn block_bytes(&self, block_index: usize) -> Bytes {
         let offset: usize;
         if block_index == 0 {
             offset = 0;
-        }
-        else {
-            offset = self.offsets.entry(block_index-1) as usize;
+        } else {
+            offset = self.offsets.entry(block_index - 1) as usize;
         }
 
         let block_bytes;
         if block_index == self.offsets.len() {
             block_bytes = self.data.slice(offset..);
-        }
-        else {
+        } else {
             let end = self.offsets.entry(block_index) as usize;
             block_bytes = self.data.slice(offset..end);
         }
@@ -78,6 +80,48 @@ impl TfcDict {
         let block = self.block((index / 8) as usize);
         block.entry((index % 8) as usize)
     }
+
+    pub fn id(&self, slice: &[u8]) -> IdLookupResult {
+        // let's binary search
+        let mut min = 0;
+        let mut max = self.offsets.len();
+        let mut mid: usize;
+
+        while min <= max {
+            mid = (min + max) / 2;
+
+            let head_slice = self.block_head(mid);
+
+            match slice.cmp(&head_slice[..]) {
+                Ordering::Less => {
+                    if mid == 0 {
+                        // we checked the first block and determined that the string should be in the previous block, if it exists.
+                        // but since this is the first block, the string doesn't exist.
+                        return IdLookupResult::NotFound;
+                    }
+                    max = mid - 1;
+                }
+                Ordering::Greater => min = mid + 1,
+                Ordering::Equal => return IdLookupResult::Found((mid * BLOCK_SIZE) as u64), // what luck! turns out the string we were looking for was the block head
+            }
+        }
+
+        let found = max;
+
+        // we found the block the string should be part of.
+        let block = self.block(found);
+        let block_id = block.id(slice);
+        let result = block_id.offset((found * BLOCK_SIZE) as u64);
+        if found != 0 {
+            // the default value will fill in the last index of the
+            // previous block if the entry was not found in the
+            // current block. This is only possible if the block as
+            // not the very first one.
+            result.default(found - 1, self.block_bytes(found - 1))
+        } else {
+            result
+        }
+    }
 }
 
 #[cfg(test)]
@@ -96,14 +140,12 @@ mod tests {
             b"gafovp",
             b"gdfasfa",
             b"gdfbbbbbb",
-            
             b"hello",
             b"iguana",
             b"illusion",
             b"illustrated",
             b"jetengine",
             b"jetplane",
-            
         ];
 
         let mut array_buf = BytesMut::new();
@@ -112,7 +154,7 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict =TfcDict::from_parts(array_bytes, data_bytes);
+        let dict = TfcDict::from_parts(array_bytes, data_bytes);
 
         assert_eq!(2, dict.num_blocks());
         assert_eq!(b"aaaaaaaa", &dict.block_head(0)[..]);
@@ -128,4 +170,70 @@ mod tests {
             assert_eq!(s, &dict.entry(ix as u64).to_bytes()[..]);
         }
     }
+
+    #[test]
+    fn lookup_entries_by_slice() {
+        let strings: Vec<&[u8]> = vec![
+            b"aaaaaaaa",
+            b"bbbbbbbb",
+            b"bbbcccdaaaa",
+            b"f",
+            b"fafasdfas",
+            b"gafovp",
+            b"gdfasfa",
+            b"gdfbbbbbb",
+            b"hello",
+            b"iguana",
+            b"illusion",
+            b"illustrated",
+            b"jetengine",
+            b"jetplane",
+        ];
+
+        let mut array_buf = BytesMut::new();
+        let mut data_buf = BytesMut::new();
+        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+
+        let array_bytes = array_buf.freeze();
+        let data_bytes = data_buf.freeze();
+        let dict = TfcDict::from_parts(array_bytes, data_bytes);
+
+        for (ix, s) in strings.into_iter().enumerate() {
+            assert_eq!(IdLookupResult::Found(ix as u64), dict.id(s));
+        }
+    }
+
+    #[test]
+    fn lookup_nonmatching_entries_by_slice() {
+        let strings: Vec<&[u8]> = vec![
+            b"aaaaaaaa",
+            b"bbbbbbbb",
+            b"bbbcccdaaaa",
+            b"f",
+            b"fafasdfas",
+            b"gafovp",
+            b"gdfasfa",
+            b"gdfbbbbbb",
+            b"hello",
+            b"iguana",
+            b"illusion",
+            b"illustrated",
+            b"jetengine",
+            b"jetplane",
+        ];
+
+        let mut array_buf = BytesMut::new();
+        let mut data_buf = BytesMut::new();
+        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+
+        let array_bytes = array_buf.freeze();
+        let data_bytes = data_buf.freeze();
+        let dict = TfcDict::from_parts(array_bytes, data_bytes);
+
+        assert_eq!(IdLookupResult::NotFound, dict.id(b"a"));
+        assert_eq!(IdLookupResult::Closest(0), dict.id(b"ab"));
+        assert_eq!(IdLookupResult::Closest(7), dict.id(b"hallo"));
+        assert_eq!(IdLookupResult::Closest(8), dict.id(b"hello!"));
+        assert_eq!(IdLookupResult::Closest(13), dict.id(b"zebra"));
+    }
 }
diff --git a/src/structure/vbyte.rs b/src/structure/vbyte.rs
index 9ad5bce5..a2396d8d 100644
--- a/src/structure/vbyte.rs
+++ b/src/structure/vbyte.rs
@@ -17,8 +17,6 @@
 use futures::io;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 
-use std::io::Write;
-
 use bytes::Buf;
 
 /// The maximum number of bytes required for any `u64` in a variable-byte encoding.

From de19515c3f65e037e0fe999b6f63a8383a665103 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 15:05:58 +0100
Subject: [PATCH 12/99] move block size to start for easier search

---
 src/structure/tfc/block.rs | 22 +++++++++-------------
 src/structure/tfc/dict.rs  | 18 +++++++++++++++---
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 4b5fc07b..6f1dd5d2 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -36,14 +36,14 @@ impl From<vbyte::DecodeError> for TfcError {
 
 impl TfcBlockHeader {
     fn parse(buf: &mut Bytes) -> Result<Self, TfcError> {
+        let num_entries = buf.get_u8();
+
         let mut sizes = [0_usize; BLOCK_SIZE - 1];
         let mut shareds = [0_usize; BLOCK_SIZE - 1];
         let (first_size, _) = vbyte::decode_buf(buf)?;
 
         let head = buf.split_to(first_size as usize);
 
-        let num_entries = buf.get_u8();
-
         for i in 0..(num_entries - 1) as usize {
             let (shared, _) = vbyte::decode_buf(buf)?;
             let (size, _) = vbyte::decode_buf(buf)?;
@@ -530,14 +530,10 @@ impl IdLookupResult {
         }
     }
 
-    pub fn default(self, previous_block_num: usize, mut previous_block: Bytes) -> Self {
+    pub fn default(self, default: u64) -> Self {
         match self {
             Self::NotFound => {
-                // we should move num elements to start of block so we don't hae to parse a full header
-                let previous_header = TfcBlockHeader::parse(&mut previous_block).unwrap();
-                let id = previous_header.num_entries as u64 - 1 + previous_block_num as u64;
-
-                Self::Closest(id)
+                Self::Closest(default)
             }
             _ => self,
         }
@@ -549,6 +545,9 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
 
+    buf.put_u8(slices_len as u8);
+    size += 1;
+
     let first = slices[0];
     let (vbyte, vbyte_len) = encode_array(first.len() as u64);
 
@@ -557,13 +556,9 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
     buf.put_slice(slices[0]);
     size += vbyte_len + slices[0].len();
 
-    buf.put_u8(slices_len as u8);
-    size += 1;
-
     let mut last = first;
 
     let mut suffixes: Vec<&[u8]> = Vec::with_capacity(slices.len());
-    suffixes.push(last);
     for i in 1..slices.len() {
         let cur = slices[i];
         let common_prefix = find_common_prefix(last, cur);
@@ -581,7 +576,7 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
     }
 
     // write the rest of the slices
-    for suffix in suffixes.into_iter().skip(1) {
+    for suffix in suffixes.into_iter() {
         buf.put_slice(suffix);
         size += suffix.len();
     }
@@ -590,6 +585,7 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
 }
 
 pub fn block_head(mut block: Bytes) -> Result<Bytes, TfcError> {
+    block.advance(1);
     let (size, _) = vbyte::decode_buf(&mut block)?;
     Ok(block.split_to(size as usize))
 }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 53d7cef0..0221095f 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -43,7 +43,7 @@ impl TfcDict {
         Self { offsets, data }
     }
 
-    pub fn block_bytes(&self, block_index: usize) -> Bytes {
+    fn block_offset(&self, block_index: usize) -> usize {
         let offset: usize;
         if block_index == 0 {
             offset = 0;
@@ -51,6 +51,11 @@ impl TfcDict {
             offset = self.offsets.entry(block_index - 1) as usize;
         }
 
+        offset
+    }
+
+    pub fn block_bytes(&self, block_index: usize) -> Bytes {
+        let offset = self.block_offset(block_index);
         let block_bytes;
         if block_index == self.offsets.len() {
             block_bytes = self.data.slice(offset..);
@@ -72,6 +77,12 @@ impl TfcDict {
         block_head(block_bytes).unwrap()
     }
 
+    pub fn block_num_elements(&self, block_index: usize) -> u8 {
+        let offset = self.block_offset(block_index);
+
+        self.data[offset]
+    }
+
     pub fn num_blocks(&self) -> usize {
         self.offsets.len() + 1
     }
@@ -111,13 +122,14 @@ impl TfcDict {
         // we found the block the string should be part of.
         let block = self.block(found);
         let block_id = block.id(slice);
-        let result = block_id.offset((found * BLOCK_SIZE) as u64);
+        let offset = (found * BLOCK_SIZE) as u64;
+        let result = block_id.offset(offset);
         if found != 0 {
             // the default value will fill in the last index of the
             // previous block if the entry was not found in the
             // current block. This is only possible if the block as
             // not the very first one.
-            result.default(found - 1, self.block_bytes(found - 1))
+            result.default(self.block_num_elements(found-1) as u64 + offset - 1)
         } else {
             result
         }

From 07fe95ea380e60762282efd89c88c22e7a41360e Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 15:37:39 +0100
Subject: [PATCH 13/99] renamed TfcDict and related types to SizedDict and
 related

---
 src/structure/tfc/block.rs | 76 +++++++++++++++++++-------------------
 src/structure/tfc/dict.rs  | 16 ++++----
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 6f1dd5d2..10adee23 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -11,13 +11,13 @@ use crate::structure::{
 pub const BLOCK_SIZE: usize = 8;
 
 #[derive(Debug)]
-pub enum TfcError {
+pub enum SizedDictError {
     InvalidCoding,
     NotEnoughData,
 }
 
 #[derive(Debug, PartialEq)]
-pub struct TfcBlockHeader {
+pub struct SizedBlockHeader {
     head: Bytes,
     num_entries: u8,
     buffer_length: usize,
@@ -25,7 +25,7 @@ pub struct TfcBlockHeader {
     shareds: [usize; BLOCK_SIZE - 1],
 }
 
-impl From<vbyte::DecodeError> for TfcError {
+impl From<vbyte::DecodeError> for SizedDictError {
     fn from(e: vbyte::DecodeError) -> Self {
         match e {
             vbyte::DecodeError::UnexpectedEndOfBuffer => Self::NotEnoughData,
@@ -34,8 +34,8 @@ impl From<vbyte::DecodeError> for TfcError {
     }
 }
 
-impl TfcBlockHeader {
-    fn parse(buf: &mut Bytes) -> Result<Self, TfcError> {
+impl SizedBlockHeader {
+    fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
         let num_entries = buf.get_u8();
 
         let mut sizes = [0_usize; BLOCK_SIZE - 1];
@@ -65,9 +65,9 @@ impl TfcBlockHeader {
 }
 
 #[derive(Clone, Debug)]
-pub struct TfcDictEntry(Vec<Bytes>);
+pub struct SizedDictEntry(Vec<Bytes>);
 
-impl TfcDictEntry {
+impl SizedDictEntry {
     pub fn new(parts: Vec<Bytes>) -> Self {
         Self(parts)
     }
@@ -101,16 +101,16 @@ impl TfcDictEntry {
         v
     }
 
-    pub fn as_buf(&self) -> TfcEntryBuf {
-        TfcEntryBuf {
+    pub fn as_buf(&self) -> SizedDictEntryBuf {
+        SizedDictEntryBuf {
             entry: self,
             slice_ix: 0,
             pos_in_slice: 0,
         }
     }
 
-    pub fn into_buf(self) -> OwnedTfcEntryBuf {
-        OwnedTfcEntryBuf {
+    pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
+        OwnedSizedDictEntryBuf {
             entry: self,
             slice_ix: 0,
             pos_in_slice: 0,
@@ -176,7 +176,7 @@ impl TfcDictEntry {
     }
 }
 
-impl PartialEq for TfcDictEntry {
+impl PartialEq for SizedDictEntry {
     fn eq(&self, other: &Self) -> bool {
         // unequal length, so can't be equal
         if self.len() != other.len() {
@@ -187,9 +187,9 @@ impl PartialEq for TfcDictEntry {
     }
 }
 
-impl Eq for TfcDictEntry {}
+impl Eq for SizedDictEntry {}
 
-impl Hash for TfcDictEntry {
+impl Hash for SizedDictEntry {
     fn hash<H: Hasher>(&self, state: &mut H) {
         for part in self.0.iter() {
             state.write(part);
@@ -197,7 +197,7 @@ impl Hash for TfcDictEntry {
     }
 }
 
-impl Ord for TfcDictEntry {
+impl Ord for SizedDictEntry {
     fn cmp(&self, other: &Self) -> Ordering {
         // both are empty, so equal
         if self.len() == 0 && other.len() == 0 {
@@ -270,25 +270,25 @@ impl Ord for TfcDictEntry {
     }
 }
 
-impl PartialOrd for TfcDictEntry {
+impl PartialOrd for SizedDictEntry {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         Some(self.cmp(other))
     }
 }
 
 #[derive(Clone)]
-pub struct TfcEntryBuf<'a> {
-    entry: &'a TfcDictEntry,
+pub struct SizedDictEntryBuf<'a> {
+    entry: &'a SizedDictEntry,
     slice_ix: usize,
     pos_in_slice: usize,
 }
 
-fn calculate_remaining<'a>(entry: &TfcDictEntry, slice_ix: usize, pos_in_slice: usize) -> usize {
+fn calculate_remaining<'a>(entry: &SizedDictEntry, slice_ix: usize, pos_in_slice: usize) -> usize {
     let total: usize = entry.0.iter().skip(slice_ix).map(|s| s.len()).sum();
     total - pos_in_slice
 }
 
-fn calculate_chunk<'a>(entry: &'a TfcDictEntry, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
+fn calculate_chunk<'a>(entry: &'a SizedDictEntry, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
     if slice_ix >= entry.0.len() {
         &[]
     } else {
@@ -298,7 +298,7 @@ fn calculate_chunk<'a>(entry: &'a TfcDictEntry, slice_ix: usize, pos_in_slice: u
 }
 
 fn calculate_advance<'a>(
-    entry: &'a TfcDictEntry,
+    entry: &'a SizedDictEntry,
     slice_ix: &mut usize,
     pos_in_slice: &mut usize,
     mut cnt: usize,
@@ -338,7 +338,7 @@ fn calculate_advance<'a>(
     }
 }
 
-impl<'a> Buf for TfcEntryBuf<'a> {
+impl<'a> Buf for SizedDictEntryBuf<'a> {
     fn remaining(&self) -> usize {
         calculate_remaining(self.entry, self.slice_ix, self.pos_in_slice)
     }
@@ -352,13 +352,13 @@ impl<'a> Buf for TfcEntryBuf<'a> {
     }
 }
 
-pub struct OwnedTfcEntryBuf {
-    entry: TfcDictEntry,
+pub struct OwnedSizedDictEntryBuf {
+    entry: SizedDictEntry,
     slice_ix: usize,
     pos_in_slice: usize,
 }
 
-impl Buf for OwnedTfcEntryBuf {
+impl Buf for OwnedSizedDictEntryBuf {
     fn remaining(&self) -> usize {
         calculate_remaining(&self.entry, self.slice_ix, self.pos_in_slice)
     }
@@ -372,16 +372,16 @@ impl Buf for OwnedTfcEntryBuf {
     }
 }
 
-pub struct TfcBlock {
-    header: TfcBlockHeader,
+pub struct SizedDictBlock {
+    header: SizedBlockHeader,
     data: Bytes,
 }
 
-impl TfcBlock {
-    pub fn parse(bytes: &mut Bytes) -> Result<Self, TfcError> {
-        let header = TfcBlockHeader::parse(bytes)?;
+impl SizedDictBlock {
+    pub fn parse(bytes: &mut Bytes) -> Result<Self, SizedDictError> {
+        let header = SizedBlockHeader::parse(bytes)?;
         if bytes.remaining() < header.buffer_length {
-            return Err(TfcError::NotEnoughData);
+            return Err(SizedDictError::NotEnoughData);
         }
 
         let data = bytes.split_to(header.buffer_length);
@@ -397,9 +397,9 @@ impl TfcBlock {
         self.header.num_entries != BLOCK_SIZE as u8
     }
 
-    pub fn entry(&self, index: usize) -> TfcDictEntry {
+    pub fn entry(&self, index: usize) -> SizedDictEntry {
         if index == 0 {
-            return TfcDictEntry::new(vec![self.header.head.clone()]);
+            return SizedDictEntry::new(vec![self.header.head.clone()]);
         }
 
         let mut v = Vec::with_capacity(7);
@@ -461,7 +461,7 @@ impl TfcBlock {
         let suffix_size = self.header.sizes[index - 1];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
-        TfcDictEntry::new_optimized(slices)
+        SizedDictEntry::new_optimized(slices)
     }
 
     fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
@@ -584,7 +584,7 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
     size
 }
 
-pub fn block_head(mut block: Bytes) -> Result<Bytes, TfcError> {
+pub fn block_head(mut block: Bytes) -> Result<Bytes, SizedDictError> {
     block.advance(1);
     let (size, _) = vbyte::decode_buf(&mut block)?;
     Ok(block.split_to(size as usize))
@@ -602,10 +602,10 @@ mod tests {
         buf.freeze()
     }
 
-    fn build_block(strings: &[&[u8]]) -> TfcBlock {
+    fn build_block(strings: &[&[u8]]) -> SizedDictBlock {
         let mut bytes = build_block_bytes(strings);
 
-        TfcBlock::parse(&mut bytes).unwrap()
+        SizedDictBlock::parse(&mut bytes).unwrap()
     }
 
     #[test]
@@ -614,7 +614,7 @@ mod tests {
 
         let block = build_block(&strings);
 
-        let expected_header = TfcBlockHeader {
+        let expected_header = SizedBlockHeader {
             head: Bytes::copy_from_slice(b"aaaaaa"),
             num_entries: 5,
             buffer_length: 11,
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 0221095f..0aef6b89 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -32,12 +32,12 @@ fn build_dict_unchecked<'a, B1: BufMut, B2: BufMut, I: Iterator<Item = &'a [u8]>
     array_builder.finalize();
 }
 
-pub struct TfcDict {
+pub struct SizedDict {
     offsets: LogArray,
     data: Bytes,
 }
 
-impl TfcDict {
+impl SizedDict {
     pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
         let offsets = LogArray::parse(offsets).unwrap();
         Self { offsets, data }
@@ -67,9 +67,9 @@ impl TfcDict {
         block_bytes
     }
 
-    pub fn block(&self, block_index: usize) -> TfcBlock {
+    pub fn block(&self, block_index: usize) -> SizedDictBlock {
         let mut block_bytes = self.block_bytes(block_index);
-        TfcBlock::parse(&mut block_bytes).unwrap()
+        SizedDictBlock::parse(&mut block_bytes).unwrap()
     }
 
     pub fn block_head(&self, block_index: usize) -> Bytes {
@@ -87,7 +87,7 @@ impl TfcDict {
         self.offsets.len() + 1
     }
 
-    pub fn entry(&self, index: u64) -> TfcDictEntry {
+    pub fn entry(&self, index: u64) -> SizedDictEntry {
         let block = self.block((index / 8) as usize);
         block.entry((index % 8) as usize)
     }
@@ -166,7 +166,7 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = TfcDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::from_parts(array_bytes, data_bytes);
 
         assert_eq!(2, dict.num_blocks());
         assert_eq!(b"aaaaaaaa", &dict.block_head(0)[..]);
@@ -208,7 +208,7 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = TfcDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::from_parts(array_bytes, data_bytes);
 
         for (ix, s) in strings.into_iter().enumerate() {
             assert_eq!(IdLookupResult::Found(ix as u64), dict.id(s));
@@ -240,7 +240,7 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = TfcDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::from_parts(array_bytes, data_bytes);
 
         assert_eq!(IdLookupResult::NotFound, dict.id(b"a"));
         assert_eq!(IdLookupResult::Closest(0), dict.id(b"ab"));

From 1124d43823341268a81b7fef7930e593a7a65e25 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Thu, 24 Nov 2022 16:42:51 +0100
Subject: [PATCH 14/99] typed dictionary segments

---
 src/structure/tfc/block.rs |   4 +-
 src/structure/tfc/dict.rs  |   9 ++-
 src/structure/tfc/mod.rs   |   1 +
 src/structure/tfc/typed.rs | 156 +++++++++++++++++++++++++++++++++++++
 4 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100644 src/structure/tfc/typed.rs

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 10adee23..0d204c4f 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -532,9 +532,7 @@ impl IdLookupResult {
 
     pub fn default(self, default: u64) -> Self {
         match self {
-            Self::NotFound => {
-                Self::Closest(default)
-            }
+            Self::NotFound => Self::Closest(default),
             _ => self,
         }
     }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 0aef6b89..e9d3c3e3 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -6,7 +6,7 @@ use itertools::Itertools;
 
 use super::block::*;
 
-fn build_dict_unchecked<'a, B1: BufMut, B2: BufMut, I: Iterator<Item = &'a [u8]>>(
+pub fn build_dict_unchecked<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
     array_buf: &mut B1,
     data_buf: &mut B2,
     iter: I,
@@ -16,8 +16,9 @@ fn build_dict_unchecked<'a, B1: BufMut, B2: BufMut, I: Iterator<Item = &'a [u8]>
 
     let mut offset = 0;
     for chunk in &chunk_iter {
-        let slices: Vec<&[u8]> = chunk.collect();
-        let size = build_block_unchecked(data_buf, &slices);
+        let slices: Vec<R> = chunk.collect();
+        let borrows: Vec<&[u8]> = slices.iter().map(|s| s.as_ref()).collect();
+        let size = build_block_unchecked(data_buf, &borrows);
         offset += size;
         offsets.push(offset as u64);
     }
@@ -129,7 +130,7 @@ impl SizedDict {
             // previous block if the entry was not found in the
             // current block. This is only possible if the block as
             // not the very first one.
-            result.default(self.block_num_elements(found-1) as u64 + offset - 1)
+            result.default(self.block_num_elements(found - 1) as u64 + offset - 1)
         } else {
             result
         }
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index 01c22bfa..5813c5f4 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -1,2 +1,3 @@
 pub mod block;
 pub mod dict;
+pub mod typed;
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
new file mode 100644
index 00000000..ed83b7f6
--- /dev/null
+++ b/src/structure/tfc/typed.rs
@@ -0,0 +1,156 @@
+use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use std::marker::PhantomData;
+
+use crate::structure::MonotonicLogArray;
+
+use super::{
+    block::IdLookupResult,
+    dict::{build_dict_unchecked, SizedDict},
+};
+
+pub struct TypedDict {
+    types_present: MonotonicLogArray,
+    type_offsets: Option<MonotonicLogArray>,
+    data: Bytes,
+}
+
+pub struct TypedDictSegment<T: TdbDataType> {
+    dict: SizedDict,
+    _x: PhantomData<T>,
+}
+
+impl<T: TdbDataType> TypedDictSegment<T> {
+    pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
+        let dict = SizedDict::from_parts(offsets, data);
+        Self {
+            dict,
+            _x: Default::default(),
+        }
+    }
+
+    pub fn get(&self, index: u64) -> T {
+        let entry = self.dict.entry(index);
+        T::from_lexical(entry.into_buf())
+    }
+
+    pub fn id(&self, val: &T) -> IdLookupResult {
+        let slice = val.to_lexical();
+        self.dict.id(&slice[..])
+    }
+}
+
+pub enum Datatype {
+    String,
+    UInt64,
+}
+
+pub trait TdbDataType {
+    fn datatype() -> Datatype;
+
+    fn to_lexical(&self) -> Bytes;
+
+    fn from_lexical<B: Buf>(b: B) -> Self;
+}
+
+impl TdbDataType for String {
+    fn datatype() -> Datatype {
+        Datatype::String
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(self.as_bytes())
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        String::from_utf8(vec).unwrap()
+    }
+}
+
+impl TdbDataType for u64 {
+    fn datatype() -> Datatype {
+        Datatype::UInt64
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(*self).unwrap();
+
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u64::<BigEndian>().unwrap()
+    }
+}
+
+pub fn build_segment<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+    array_buf: &mut B1,
+    data_buf: &mut B2,
+    iter: I,
+) {
+    let slices = iter.map(|val| val.to_lexical());
+
+    build_dict_unchecked(array_buf, data_buf, slices);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn build_and_parse_string_dictionary() {
+        let strings: Vec<_> = [
+            "aaaaaaaa",
+            "bbbbbbbb",
+            "bbbcccdaaaa",
+            "f",
+            "fafasdfas",
+            "gafovp",
+            "gdfasfa",
+            "gdfbbbbbb",
+            "hello",
+            "iguana",
+            "illusion",
+            "illustrated",
+            "jetengine",
+            "jetplane",
+        ]
+        .iter()
+        .map(|s| s.to_string())
+        .collect();
+
+        let mut offsets = BytesMut::new();
+        let mut data = BytesMut::new();
+
+        build_segment(&mut offsets, &mut data, strings.clone().into_iter());
+
+        let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
+
+        for (ix, s) in strings.into_iter().enumerate() {
+            assert_eq!(IdLookupResult::Found(ix as u64), segment.id(&s));
+            assert_eq!(s, segment.get(ix as u64));
+        }
+    }
+
+    #[test]
+    fn build_and_parse_u64_dictionary() {
+        let nums: Vec<_> = vec![
+            2, 5, 42, 2324, 256463, 256464, 1234567, 803050303, 999999999, 9999999999,
+        ];
+
+        let mut offsets = BytesMut::new();
+        let mut data = BytesMut::new();
+
+        build_segment(&mut offsets, &mut data, nums.clone().into_iter());
+
+        let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
+
+        for (ix, s) in nums.into_iter().enumerate() {
+            assert_eq!(IdLookupResult::Found(ix as u64), segment.id(&s));
+            assert_eq!(s, segment.get(ix as u64));
+        }
+    }
+}

From a2df5f37144b9f39aa3c4d1e2ce11b8e9fb3d3e7 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 25 Nov 2022 15:23:09 +0100
Subject: [PATCH 15/99] Add decimals and bigints

---
 Cargo.toml                 |   1 +
 src/structure/tfc/mod.rs   |   2 +
 src/structure/tfc/typed.rs | 236 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 236 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index dd626ad9..ccc95d7c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,7 @@ rayon = "1.4"
 thiserror = "1.0"
 async-trait = "0.1"
 itertools = "0.10"
+rug = "1.16"
 
 [dev-dependencies]
 tempfile = "3.1"
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index 5813c5f4..4b0b89cd 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -1,3 +1,5 @@
 pub mod block;
+pub mod decimal;
 pub mod dict;
+pub mod integer;
 pub mod typed;
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index ed83b7f6..40146dd6 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,12 +1,14 @@
+use crate::structure::MonotonicLogArray;
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
+use rug::Integer;
 use std::marker::PhantomData;
 
-use crate::structure::MonotonicLogArray;
-
 use super::{
     block::IdLookupResult,
+    decimal::{decimal_to_storage, storage_to_decimal},
     dict::{build_dict_unchecked, SizedDict},
+    integer::{bigint_to_storage, storage_to_bigint},
 };
 
 pub struct TypedDict {
@@ -43,6 +45,13 @@ impl<T: TdbDataType> TypedDictSegment<T> {
 pub enum Datatype {
     String,
     UInt64,
+    UInt32,
+    Int64,
+    Int32,
+    Float32,
+    Float64,
+    Decimal,
+    BigInt,
 }
 
 pub trait TdbDataType {
@@ -86,6 +95,135 @@ impl TdbDataType for u64 {
     }
 }
 
+const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
+impl TdbDataType for i64 {
+    fn datatype() -> Datatype {
+        Datatype::Int64
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I64_BYTE_MASK ^ (*self as u64);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        (I64_BYTE_MASK ^ i) as i64
+    }
+}
+
+const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
+impl TdbDataType for i32 {
+    fn datatype() -> Datatype {
+        Datatype::Int32
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I32_BYTE_MASK ^ (*self as u32);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u32::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        (I32_BYTE_MASK ^ i) as i32
+    }
+}
+
+const F32_SIGN_MASK: u32 = 0x8000_0000;
+const F32_COMPLEMENT: u32 = 0xffff_ffff;
+impl TdbDataType for f32 {
+    fn datatype() -> Datatype {
+        Datatype::Float32
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let f = *self;
+        let g: u32;
+        if f.signum() == -1.0 {
+            g = f.to_bits() ^ F32_COMPLEMENT;
+        } else {
+            g = f.to_bits() ^ F32_SIGN_MASK;
+        };
+        let mut buf = BytesMut::new().writer();
+        buf.write_u32::<BigEndian>(g).unwrap();
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        if i & F32_SIGN_MASK > 0 {
+            f32::from_bits(i ^ F32_SIGN_MASK)
+        } else {
+            f32::from_bits(i ^ F32_COMPLEMENT)
+        }
+    }
+}
+
+const F64_SIGN_MASK: u64 = 0x8000_0000_0000_0000;
+const F64_COMPLEMENT: u64 = 0xffff_ffff_ffff_ffff;
+impl TdbDataType for f64 {
+    fn datatype() -> Datatype {
+        Datatype::Float64
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let f = *self;
+        let g: u64;
+        if f.signum() == -1.0 {
+            g = f.to_bits() ^ F64_COMPLEMENT;
+        } else {
+            g = f.to_bits() ^ F64_SIGN_MASK;
+        };
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(g).unwrap();
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        if i & F64_SIGN_MASK > 0 {
+            f64::from_bits(i ^ F64_SIGN_MASK)
+        } else {
+            f64::from_bits(i ^ F64_COMPLEMENT)
+        }
+    }
+}
+
+impl TdbDataType for Integer {
+    fn datatype() -> Datatype {
+        Datatype::Float64
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(bigint_to_storage(self.clone()))
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        storage_to_bigint(&mut b)
+    }
+}
+
+#[derive(PartialEq, Debug)]
+pub struct Decimal(String);
+
+impl TdbDataType for Decimal {
+    fn datatype() -> Datatype {
+        Datatype::Decimal
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(decimal_to_storage(&self.0))
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        Decimal(storage_to_decimal(&mut b))
+    }
+}
+
 pub fn build_segment<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
     array_buf: &mut B1,
     data_buf: &mut B2,
@@ -137,7 +275,7 @@ mod tests {
 
     #[test]
     fn build_and_parse_u64_dictionary() {
-        let nums: Vec<_> = vec![
+        let nums: Vec<u64> = vec![
             2, 5, 42, 2324, 256463, 256464, 1234567, 803050303, 999999999, 9999999999,
         ];
 
@@ -153,4 +291,96 @@ mod tests {
             assert_eq!(s, segment.get(ix as u64));
         }
     }
+
+    use std::fmt::Debug;
+
+    fn cycle<D>(d: D)
+    where
+        D: TdbDataType + PartialEq + Debug,
+    {
+        let j = D::from_lexical(d.to_lexical());
+        assert_eq!(d, j)
+    }
+
+    #[test]
+    fn cycle_i64() {
+        cycle(-1_i64);
+        cycle(-23423423_i64);
+        cycle(0_i64);
+        cycle(i64::MAX);
+        cycle(i64::MIN);
+        cycle(324323_i64);
+    }
+
+    #[test]
+    fn cycle_i32() {
+        cycle(-1_i32);
+        cycle(-23423423_i32);
+        cycle(0_i32);
+        cycle(i32::MAX);
+        cycle(i32::MIN);
+        cycle(324323_i32);
+    }
+
+    #[test]
+    fn cycle_f32() {
+        cycle(-1_f32);
+        cycle(-23423423_f32);
+        cycle(0_f32);
+        cycle(324323_f32);
+        cycle(324323.2343_f32);
+        cycle(-324323.2343_f32);
+        cycle(f32::MAX);
+        cycle(f32::MIN);
+        cycle(f32::NEG_INFINITY);
+        cycle(f32::INFINITY);
+
+        let j = f32::from_lexical(f32::NAN.to_lexical());
+        assert!(j.is_nan())
+    }
+
+    #[test]
+    fn cycle_f64() {
+        cycle(-1_f64);
+        cycle(-23423423_f64);
+        cycle(0_f64);
+        cycle(-0_f64);
+        cycle(324323_f64);
+        cycle(324323.2343_f64);
+        cycle(-324323.2343_f64);
+        cycle(f64::MAX);
+        cycle(f64::MIN);
+        cycle(f64::NEG_INFINITY);
+        cycle(f64::INFINITY);
+
+        let j = f64::from_lexical(f64::NAN.to_lexical());
+        assert!(j.is_nan())
+    }
+
+    fn int(s: &str) -> Integer {
+        s.parse::<Integer>().unwrap()
+    }
+
+    #[test]
+    fn cycle_integer() {
+        cycle(int("-1"));
+        cycle(int("-12342343"));
+        cycle(int("0"));
+        cycle(int("234239847938724"));
+        cycle(int("983423984793872423423423432312698"));
+        cycle(int("-983423984793872423423423432312698"));
+    }
+
+    #[test]
+    fn cycle_decimal() {
+        cycle(Decimal("-1".to_string()));
+        cycle(Decimal("-12342343".to_string()));
+        cycle(Decimal("0".to_string()));
+        cycle(Decimal("-0.1".to_string()));
+        cycle(Decimal("-0.0".to_string()));
+        cycle(Decimal("-0.1239343".to_string()));
+        cycle(Decimal("234239847938724.23423421".to_string()));
+        cycle(Decimal("983423984793872423423423432312698".to_string()));
+        cycle(Decimal("-983423984793872423423423432312698".to_string()));
+    }
 }

From 1b917104ac6937a6e0329695e54d2f13bd89f9f2 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 25 Nov 2022 15:43:04 +0100
Subject: [PATCH 16/99] Implementaiton of decimal and integer

---
 src/structure/tfc/decimal.rs | 119 ++++++++++++++++++++++++++++++++++
 src/structure/tfc/integer.rs | 121 +++++++++++++++++++++++++++++++++++
 2 files changed, 240 insertions(+)
 create mode 100644 src/structure/tfc/decimal.rs
 create mode 100644 src/structure/tfc/integer.rs

diff --git a/src/structure/tfc/decimal.rs b/src/structure/tfc/decimal.rs
new file mode 100644
index 00000000..7fa0143d
--- /dev/null
+++ b/src/structure/tfc/decimal.rs
@@ -0,0 +1,119 @@
+use bytes::Buf;
+use rug::Integer;
+
+use crate::structure::tfc::integer;
+
+use super::integer::{bigint_to_storage, storage_to_bigint_and_sign, NEGATIVE_ZERO};
+
+fn encode_fraction(fraction: Option<&str>) -> Vec<u8> {
+    if let Some(f) = fraction {
+        if f.is_empty() {
+            return vec![0x00]; // a "false zero" so we don't represent it at all.
+        }
+        let len = f.len();
+        let size = len / 2 + usize::from(len % 2 != 0);
+        let mut bcd = Vec::with_capacity(size);
+        for i in 0..size {
+            let last = if i * 2 + 2 > len {
+                i * 2 + 1
+            } else {
+                i * 2 + 2
+            };
+            let two = &f[2 * i..last];
+            let mut this_int = centary_decimal_encode(two);
+            this_int <<= 1;
+            if i != size - 1 {
+                this_int |= 1 // add continuation bit.
+            }
+            bcd.push(this_int)
+        }
+        bcd
+    } else {
+        vec![0x00] // a "false zero" so we don't represent no fraction as a fraction
+    }
+}
+
+fn centary_decimal_encode(s: &str) -> u8 {
+    if s.len() == 1 {
+        let i = s.parse::<u8>().unwrap();
+        i * 11 + 1
+    } else {
+        let i = s.parse::<u8>().unwrap();
+        let o = i / 10 + 1;
+        i + o + 1
+    }
+}
+
+fn centary_decimal_decode(i: u8) -> String {
+    let j = i - 1;
+    if j % 11 == 0 {
+        let num = j / 11;
+        format!("{num:}")
+    } else {
+        let d = j / 11;
+        let num = j - d - 1;
+        format!("{num:02}")
+    }
+}
+
+fn decode_fraction<B: Buf>(fraction_buf: &mut B, is_pos: bool) -> String {
+    let mut first_byte = fraction_buf.chunk()[0];
+    if !is_pos {
+        first_byte = !first_byte;
+    }
+    if first_byte == 0x00 {
+        "".to_string()
+    } else {
+        let mut s = String::new();
+        while fraction_buf.has_remaining() {
+            let mut byte = fraction_buf.get_u8();
+            if !is_pos {
+                byte = !byte;
+            }
+            let num = byte >> 1;
+            let res = centary_decimal_decode(num);
+            s.push_str(&res);
+            if res.len() == 1 || byte & 1 == 0 {
+                break;
+            }
+        }
+        s
+    }
+}
+
+pub fn decimal_to_storage(decimal: &str) -> Vec<u8> {
+    let mut parts = decimal.split('.');
+    let bigint = parts.next().unwrap_or(decimal);
+    let fraction = parts.next();
+    let integer_part = bigint.parse::<Integer>().unwrap();
+    let is_neg = decimal.starts_with('-');
+    let prefix = bigint_to_storage(integer_part.clone());
+    let mut prefix = if integer_part == 0 && is_neg {
+        vec![NEGATIVE_ZERO] // negative zero
+    } else {
+        prefix
+    };
+    let suffix = if is_neg {
+        let mut suffix = encode_fraction(fraction);
+        for i in 0..suffix.len() {
+            suffix[i] = !suffix[i]
+        }
+        suffix
+    } else {
+        encode_fraction(fraction)
+    };
+    prefix.extend(suffix);
+    prefix
+}
+
+pub fn storage_to_decimal<B: Buf>(bytes: &mut B) -> String {
+    let (int, is_pos) = storage_to_bigint_and_sign(bytes);
+    let fraction = decode_fraction(bytes, is_pos);
+    let decimal = if fraction.is_empty() {
+        format!("{int:}")
+    } else {
+        let sign = if int == 0 && !is_pos { "-" } else { "" };
+        format!("{sign:}{int:}.{fraction:}")
+    };
+    decimal
+}
diff --git a/src/structure/tfc/integer.rs b/src/structure/tfc/integer.rs
new file mode 100644
index 00000000..25617add
--- /dev/null
+++ b/src/structure/tfc/integer.rs
@@ -0,0 +1,121 @@
+use bytes::Buf;
+use rug::Integer;
+
+const TERMINAL: u8 = 0;
+const FIRST_SIGN: u8 = 0b1000_0000u8;
+const FIRST_TERMINAL: u8 = 0b0000_0000u8;
+const CONTINUATION: u8 = 0b1000_0000u8;
+const FIRST_CONTINUATION: u8 = 0b0100_0000u8;
+const BASE_MASK: u8 = !CONTINUATION;
+const FIRST_MASK: u8 = !(FIRST_SIGN | FIRST_CONTINUATION);
+const FIRST_MAX: u8 = FIRST_CONTINUATION;
+pub const NEGATIVE_ZERO: u8 = 0b0111_1111;
+
+// Leave in reverse order for the convenience of the caller
+fn size_encode(size: u32) -> Vec<u8> {
+    if size == 0 {
+        return vec![NEGATIVE_ZERO]; // just the positive sign bit (allows negative zero)
+    }
+    let mut remainder = size;
+    let mut v = vec![];
+    let mut last = true;
+    while remainder > 0 {
+        if remainder >= CONTINUATION as u32 {
+            let continued = if last { TERMINAL } else { CONTINUATION };
+            let byte = continued | ((remainder & BASE_MASK as u32) as u8);
+            v.push(byte);
+        } else if remainder >= FIRST_MAX as u32 {
+            // special case where we fit in 7 bits but not 6
+            // and we need a zero padded initial byte.
+            let continued = if last { TERMINAL } else { CONTINUATION };
+            let byte = continued | ((remainder & BASE_MASK as u32) as u8);
+            v.push(byte);
+            let byte = FIRST_SIGN | FIRST_CONTINUATION;
+            v.push(byte)
+        } else {
+            let continued = if last {
+                FIRST_TERMINAL
+            } else {
+                FIRST_CONTINUATION
+            };
+            let byte = FIRST_SIGN | continued | ((remainder & FIRST_MASK as u32) as u8);
+            v.push(byte)
+        }
+        remainder >>= 7;
+        last = false;
+    }
+    v
+}
+
+fn size_decode<B: Buf>(v: &mut B) -> (bool, u32, usize) {
+    let mut size: u32 = 0;
+    let mut sign = true;
+    let mut i = 0;
+    while v.has_remaining() {
+        let vi = v.get_u8();
+        if i == 0 {
+            sign = vi & FIRST_SIGN != 0;
+            let vi = if sign { vi } else { !vi };
+            let val = (vi & FIRST_MASK) as u32;
+            if vi & FIRST_CONTINUATION == 0 {
+                return (sign, val, i + 1);
+            } else {
+                size += val
+            }
+        } else {
+            let vi = if sign { vi } else { !vi };
+            let val = (vi & BASE_MASK) as u32;
+            if vi & CONTINUATION == 0 {
+                return (sign, size + val, i + 1);
+            } else {
+                size += val
+            }
+        }
+        size <<= 7;
+        i += 1;
+    }
+    (sign, size, i)
+}
+
+pub fn bigint_to_storage(bigint: Integer) -> Vec<u8> {
+    let is_neg = bigint < 0;
+    let mut int = bigint.abs();
+    let size = int.significant_bits() + 1;
+    let num_bytes = (size / 8) + u32::from(size % 8 != 0);
+    let size_bytes = size_encode(num_bytes);
+    let mut number_vec = Vec::with_capacity(size_bytes.len() + num_bytes as usize + 1);
+    for _ in 0..num_bytes {
+        let byte = int.to_u8_wrapping();
+        number_vec.push(byte);
+        int >>= 8;
+    }
+    number_vec.extend(size_bytes);
+    if is_neg {
+        for i in 0..number_vec.len() {
+            number_vec[i] = !number_vec[i]
+        }
+    }
+    number_vec.reverse();
+    number_vec
+}
+
+pub fn storage_to_bigint_and_sign<B: Buf>(bytes: &mut B) -> (Integer, bool) {
+    let (is_pos, size, _) = size_decode(bytes);
+    let mut int = Integer::new();
+    if size == 0 {
+        return (int, is_pos);
+    }
+    for _ in 0..size {
+        int <<= 8;
+        let b = bytes.get_u8();
+        int += if is_pos { b } else { !b };
+    }
+    if !is_pos {
+        int = -int;
+    }
+    (int, is_pos)
+}
+
+pub fn storage_to_bigint<B: Buf>(bytes: &mut B) -> Integer {
+    storage_to_bigint_and_sign(bytes).0
+}

From b1ddd01966097774d037e38302de31145461a22d Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Fri, 25 Nov 2022 15:46:37 +0100
Subject: [PATCH 17/99] only enable rug features we need

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index ccc95d7c..96e14b42 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,7 +26,7 @@ rayon = "1.4"
 thiserror = "1.0"
 async-trait = "0.1"
 itertools = "0.10"
-rug = "1.16"
+rug = {version="1.16", default-features=false, features=["integer","rational"]}
 
 [dev-dependencies]
 tempfile = "3.1"

From 699bfbc197577c53667111483d41873e3c88b9f5 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Fri, 25 Nov 2022 16:10:49 +0100
Subject: [PATCH 18/99] refactor segment building to use one continuous offset
 array

---
 src/structure/tfc/dict.rs  | 25 ++++++++++++++++---------
 src/structure/tfc/typed.rs | 24 ++++++++++++++++++------
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index e9d3c3e3..7f0eb358 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -6,13 +6,12 @@ use itertools::Itertools;
 
 use super::block::*;
 
-pub fn build_dict_unchecked<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
-    array_buf: &mut B1,
-    data_buf: &mut B2,
+pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
+    offsets: &mut Vec<u64>,
+    data_buf: &mut B,
     iter: I,
 ) {
     let chunk_iter = iter.chunks(BLOCK_SIZE);
-    let mut offsets = Vec::new();
 
     let mut offset = 0;
     for chunk in &chunk_iter {
@@ -22,12 +21,14 @@ pub fn build_dict_unchecked<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<
         offset += size;
         offsets.push(offset as u64);
     }
-
+}
+pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
+    // the last offset doesn't matter as it's implied by the total size
     offsets.pop();
 
     let largest_element = offsets.last().cloned().unwrap_or(0);
     let width = calculate_width(largest_element);
-    let mut array_builder = LogArrayBufBuilder::new(array_buf, width);
+    let mut array_builder = LogArrayBufBuilder::new(buf, width);
 
     array_builder.push_vec(offsets);
     array_builder.finalize();
@@ -142,6 +143,12 @@ mod tests {
     use super::*;
     use bytes::BytesMut;
 
+    fn build_dict_and_offsets<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item=R>>(array_buf: &mut B1, data_buf: &mut B2, vals: I) {
+        let mut offsets = Vec::new();
+        build_dict_unchecked(&mut offsets, data_buf, vals);
+        build_offset_logarray(array_buf, offsets);
+    }
+
     #[test]
     fn build_dict_of_two_blocks() {
         let strings: Vec<&[u8]> = vec![
@@ -163,7 +170,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -205,7 +212,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -237,7 +244,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_unchecked(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 40146dd6..2e823eb9 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -224,20 +224,32 @@ impl TdbDataType for Decimal {
     }
 }
 
-pub fn build_segment<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
-    array_buf: &mut B1,
-    data_buf: &mut B2,
+pub fn build_segment<B: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+    offsets: &mut Vec<u64>,
+    data_buf: &mut B,
     iter: I,
 ) {
     let slices = iter.map(|val| val.to_lexical());
 
-    build_dict_unchecked(array_buf, data_buf, slices);
+    build_dict_unchecked(offsets, data_buf, slices);
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::structure::tfc::dict::build_offset_logarray;
+
     use super::*;
 
+    fn build_segment_and_offsets<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+        array_buf: &mut B1,
+        data_buf: &mut B2,
+        iter: I,
+    ) {
+        let mut offsets = Vec::new();
+        build_segment(&mut offsets, data_buf, iter);
+        build_offset_logarray(array_buf, offsets);
+    }
+
     #[test]
     fn build_and_parse_string_dictionary() {
         let strings: Vec<_> = [
@@ -263,7 +275,7 @@ mod tests {
         let mut offsets = BytesMut::new();
         let mut data = BytesMut::new();
 
-        build_segment(&mut offsets, &mut data, strings.clone().into_iter());
+        build_segment_and_offsets(&mut offsets, &mut data, strings.clone().into_iter());
 
         let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
 
@@ -282,7 +294,7 @@ mod tests {
         let mut offsets = BytesMut::new();
         let mut data = BytesMut::new();
 
-        build_segment(&mut offsets, &mut data, nums.clone().into_iter());
+        build_segment_and_offsets(&mut offsets, &mut data, nums.clone().into_iter());
 
         let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
 

From ab5e12f8ca1ed0f7f9a998690f1050a6bc362de0 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Fri, 25 Nov 2022 17:05:02 +0100
Subject: [PATCH 19/99] write mutliple segments into one go

---
 src/structure/tfc/typed.rs | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 2e823eb9..6e86f5fa 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,13 +1,14 @@
-use crate::structure::MonotonicLogArray;
+use crate::structure::{MonotonicLogArray, util::calculate_width, LogArrayBufBuilder};
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use rug::Integer;
 use std::marker::PhantomData;
+use itertools::*;
 
 use super::{
     block::IdLookupResult,
     decimal::{decimal_to_storage, storage_to_decimal},
-    dict::{build_dict_unchecked, SizedDict},
+    dict::{build_dict_unchecked, SizedDict, build_offset_logarray},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
@@ -42,8 +43,9 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 }
 
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Datatype {
-    String,
+    String = 0,
     UInt64,
     UInt32,
     Int64,
@@ -234,6 +236,34 @@ pub fn build_segment<B: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
     build_dict_unchecked(offsets, data_buf, slices);
 }
 
+pub fn build_multiple_segments<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut, R: AsRef<[u8]>, I: Iterator<Item=(Datatype, R)>>(used_types: &mut B1, type_offsets: &mut B2, block_offsets: &mut B3, data: &mut B4, iter: I) {
+    let mut types: Vec<(Datatype, u64)> = Vec::new();
+    let mut offsets = Vec::with_capacity(iter.size_hint().0);
+    for (key, group) in iter.group_by(|v|v.0).into_iter() {
+        let start_offset = offsets.len();
+        types.push((key, start_offset as u64));
+        build_dict_unchecked(&mut offsets, data, group.map(|v|v.1));
+    }
+    offsets.pop();
+    build_offset_logarray(block_offsets, offsets);
+
+    let largest = types.last().unwrap();
+
+    let types_width = calculate_width(largest.0 as u64);
+    let type_offsets_width = calculate_width(largest.1);
+
+    let mut types_builder = LogArrayBufBuilder::new(used_types, types_width);
+    let mut type_offsets_builder = LogArrayBufBuilder::new(type_offsets, type_offsets_width);
+    
+    for (t,o) in types {
+        types_builder.push(t as u64);
+        type_offsets_builder.push(o);
+    }
+
+    types_builder.finalize();
+    type_offsets_builder.finalize();
+}
+
 #[cfg(test)]
 mod tests {
     use crate::structure::tfc::dict::build_offset_logarray;

From 71df537d3a2b10706c1bcef87de5a12d134e4dc1 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 26 Nov 2022 11:05:16 +0100
Subject: [PATCH 20/99] Working test, added start offset parameter, Need to fix
 offsets

---
 src/structure/tfc/dict.rs  |  15 ++--
 src/structure/tfc/typed.rs | 154 +++++++++++++++++++++++++++++--------
 2 files changed, 133 insertions(+), 36 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 7f0eb358..2f3c0a6e 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -7,19 +7,20 @@ use itertools::Itertools;
 use super::block::*;
 
 pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
+    start_offset: u64,
     offsets: &mut Vec<u64>,
     data_buf: &mut B,
     iter: I,
 ) {
     let chunk_iter = iter.chunks(BLOCK_SIZE);
 
-    let mut offset = 0;
+    let mut offset = start_offset;
     for chunk in &chunk_iter {
         let slices: Vec<R> = chunk.collect();
         let borrows: Vec<&[u8]> = slices.iter().map(|s| s.as_ref()).collect();
         let size = build_block_unchecked(data_buf, &borrows);
-        offset += size;
-        offsets.push(offset as u64);
+        offset += size as u64;
+        offsets.push(offset);
     }
 }
 pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
@@ -143,9 +144,13 @@ mod tests {
     use super::*;
     use bytes::BytesMut;
 
-    fn build_dict_and_offsets<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item=R>>(array_buf: &mut B1, data_buf: &mut B2, vals: I) {
+    fn build_dict_and_offsets<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
+        array_buf: &mut B1,
+        data_buf: &mut B2,
+        vals: I,
+    ) {
         let mut offsets = Vec::new();
-        build_dict_unchecked(&mut offsets, data_buf, vals);
+        build_dict_unchecked(0, &mut offsets, data_buf, vals);
         build_offset_logarray(array_buf, offsets);
     }
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 6e86f5fa..a1d9e2d5 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,14 +1,14 @@
-use crate::structure::{MonotonicLogArray, util::calculate_width, LogArrayBufBuilder};
+use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
+use itertools::*;
 use rug::Integer;
 use std::marker::PhantomData;
-use itertools::*;
 
 use super::{
     block::IdLookupResult,
     decimal::{decimal_to_storage, storage_to_decimal},
-    dict::{build_dict_unchecked, SizedDict, build_offset_logarray},
+    dict::{build_dict_unchecked, build_offset_logarray, SizedDict},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
@@ -17,6 +17,17 @@ pub struct TypedDict {
     type_offsets: Option<MonotonicLogArray>,
     data: Bytes,
 }
+/*
+impl TypedDict {
+    pub fn id(&self, slice: &[u8], dt: Datatype) -> IdLookupResult {
+        if let Some(i) = self.types_present.index_of(dt as u64) {
+            let offset = types_offsets[i];
+
+        } else {
+            IdLookupResult::NotFound
+        }
+    }
+}*/
 
 pub struct TypedDictSegment<T: TdbDataType> {
     dict: SizedDict,
@@ -46,10 +57,10 @@ impl<T: TdbDataType> TypedDictSegment<T> {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Datatype {
     String = 0,
-    UInt64,
     UInt32,
-    Int64,
     Int32,
+    UInt64,
+    Int64,
     Float32,
     Float64,
     Decimal,
@@ -80,58 +91,75 @@ impl TdbDataType for String {
     }
 }
 
-impl TdbDataType for u64 {
+impl TdbDataType for u32 {
     fn datatype() -> Datatype {
         Datatype::UInt64
     }
 
     fn to_lexical(&self) -> Bytes {
         let mut buf = BytesMut::new().writer();
-        buf.write_u64::<BigEndian>(*self).unwrap();
+        buf.write_u32::<BigEndian>(*self).unwrap();
 
         buf.into_inner().freeze()
     }
 
     fn from_lexical<B: Buf>(b: B) -> Self {
-        b.reader().read_u64::<BigEndian>().unwrap()
+        b.reader().read_u32::<BigEndian>().unwrap()
     }
 }
 
-const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
-impl TdbDataType for i64 {
+const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
+impl TdbDataType for i32 {
     fn datatype() -> Datatype {
-        Datatype::Int64
+        Datatype::Int32
     }
 
     fn to_lexical(&self) -> Bytes {
-        let sign_flip = I64_BYTE_MASK ^ (*self as u64);
+        let sign_flip = I32_BYTE_MASK ^ (*self as u32);
         let mut buf = BytesMut::new().writer();
-        buf.write_u64::<BigEndian>(sign_flip).unwrap();
+        buf.write_u32::<BigEndian>(sign_flip).unwrap();
         buf.into_inner().freeze()
     }
 
     fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u64::<BigEndian>().unwrap();
-        (I64_BYTE_MASK ^ i) as i64
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        (I32_BYTE_MASK ^ i) as i32
     }
 }
 
-const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
-impl TdbDataType for i32 {
+impl TdbDataType for u64 {
     fn datatype() -> Datatype {
-        Datatype::Int32
+        Datatype::UInt64
     }
 
     fn to_lexical(&self) -> Bytes {
-        let sign_flip = I32_BYTE_MASK ^ (*self as u32);
         let mut buf = BytesMut::new().writer();
-        buf.write_u32::<BigEndian>(sign_flip).unwrap();
+        buf.write_u64::<BigEndian>(*self).unwrap();
+
         buf.into_inner().freeze()
     }
 
     fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u32::<BigEndian>().unwrap();
-        (I32_BYTE_MASK ^ i) as i32
+        b.reader().read_u64::<BigEndian>().unwrap()
+    }
+}
+
+const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
+impl TdbDataType for i64 {
+    fn datatype() -> Datatype {
+        Datatype::Int64
+    }
+
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I64_BYTE_MASK ^ (*self as u64);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        (I64_BYTE_MASK ^ i) as i64
     }
 }
 
@@ -233,20 +261,33 @@ pub fn build_segment<B: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
 ) {
     let slices = iter.map(|val| val.to_lexical());
 
-    build_dict_unchecked(offsets, data_buf, slices);
+    build_dict_unchecked(0, offsets, data_buf, slices);
 }
 
-pub fn build_multiple_segments<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut, R: AsRef<[u8]>, I: Iterator<Item=(Datatype, R)>>(used_types: &mut B1, type_offsets: &mut B2, block_offsets: &mut B3, data: &mut B4, iter: I) {
+pub fn build_multiple_segments<
+    B1: BufMut,
+    B2: BufMut,
+    B3: BufMut,
+    B4: BufMut,
+    R: AsRef<[u8]>,
+    I: Iterator<Item = (Datatype, R)>,
+>(
+    used_types: &mut B1,
+    type_offsets: &mut B2,
+    block_offsets: &mut B3,
+    data: &mut B4,
+    iter: I,
+) {
     let mut types: Vec<(Datatype, u64)> = Vec::new();
     let mut offsets = Vec::with_capacity(iter.size_hint().0);
-    for (key, group) in iter.group_by(|v|v.0).into_iter() {
-        let start_offset = offsets.len();
+    for (key, group) in iter.group_by(|v| v.0).into_iter() {
+        let start_offset = offsets.last().map(|t| *t).unwrap_or(0_u64);
         types.push((key, start_offset as u64));
-        build_dict_unchecked(&mut offsets, data, group.map(|v|v.1));
+        build_dict_unchecked(start_offset, &mut offsets, data, group.map(|v| v.1));
     }
     offsets.pop();
     build_offset_logarray(block_offsets, offsets);
-
+    eprintln!("types: {types:?}");
     let largest = types.last().unwrap();
 
     let types_width = calculate_width(largest.0 as u64);
@@ -254,8 +295,8 @@ pub fn build_multiple_segments<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut, R
 
     let mut types_builder = LogArrayBufBuilder::new(used_types, types_width);
     let mut type_offsets_builder = LogArrayBufBuilder::new(type_offsets, type_offsets_width);
-    
-    for (t,o) in types {
+
+    for (t, o) in types {
         types_builder.push(t as u64);
         type_offsets_builder.push(o);
     }
@@ -266,7 +307,7 @@ pub fn build_multiple_segments<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut, R
 
 #[cfg(test)]
 mod tests {
-    use crate::structure::tfc::dict::build_offset_logarray;
+    use crate::structure::{tfc::dict::build_offset_logarray, LogArray};
 
     use super::*;
 
@@ -425,4 +466,55 @@ mod tests {
         cycle(Decimal("983423984793872423423423432312698".to_string()));
         cycle(Decimal("-983423984793872423423423432312698".to_string()));
     }
+
+    fn make_entry<T: TdbDataType>(t: T) -> (Datatype, Bytes) {
+        (T::datatype(), t.to_lexical())
+    }
+
+    #[test]
+    fn test_multi_segment() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            make_entry(Decimal("-1".to_string())),
+            make_entry("asdf".to_string()),
+            make_entry(Decimal("-12342343.2348973".to_string())),
+            make_entry("Batty".to_string()),
+            make_entry("Batman".to_string()),
+            make_entry(-3_i64),
+            make_entry(Decimal("2348973".to_string())),
+            make_entry(4.389832_f32),
+            make_entry("apple".to_string()),
+            make_entry(23434.389832_f32),
+            make_entry("apply".to_string()),
+            make_entry(-500_i32),
+            make_entry(20_u32),
+        ];
+        vec.sort();
+        let mut used_types = Vec::new();
+        let mut type_offsets = Vec::new();
+        let mut block_offsets = Vec::new();
+        let mut data = BytesMut::new();
+        build_multiple_segments(
+            &mut used_types,
+            &mut type_offsets,
+            &mut block_offsets,
+            &mut data,
+            vec.clone().into_iter(),
+        );
+        eprintln!("used_types : {used_types:?}");
+        eprintln!("type_offsets : {type_offsets:?}");
+        eprintln!("block_offsets : {block_offsets:?}");
+        eprintln!("data : {data:?}");
+
+        let used_types_vec: Vec<u64> = LogArray::parse(Bytes::from(used_types))
+            .unwrap()
+            .iter()
+            .collect();
+
+        let expected_types_vec: Vec<u64> = vec.iter().map(|x| x.0 as u64).dedup().collect();
+        assert_eq!(used_types_vec, expected_types_vec);
+
+        eprintln!("expected_types_vec: {expected_types_vec:?}");
+
+        panic!();
+    }
 }

From 46b9a284b1aaa376e4daaa10cc51e9b7aff65cf2 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Sat, 26 Nov 2022 11:09:37 +0100
Subject: [PATCH 21/99] fix offsets

---
 src/structure/tfc/typed.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index a1d9e2d5..ab48c678 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -282,7 +282,8 @@ pub fn build_multiple_segments<
     let mut offsets = Vec::with_capacity(iter.size_hint().0);
     for (key, group) in iter.group_by(|v| v.0).into_iter() {
         let start_offset = offsets.last().map(|t| *t).unwrap_or(0_u64);
-        types.push((key, start_offset as u64));
+        let start_type_offset = offsets.len();
+        types.push((key, start_type_offset as u64));
         build_dict_unchecked(start_offset, &mut offsets, data, group.map(|v| v.1));
     }
     offsets.pop();

From 1b8393090956a3e85ba5faba46e5a8bbba5ce14b Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@datachemist.com>
Date: Sat, 26 Nov 2022 17:15:10 +0100
Subject: [PATCH 22/99] typed dict retrieval

---
 Cargo.toml                 |   2 +
 src/structure/logarray.rs  |  24 ++++
 src/structure/tfc/block.rs |   1 +
 src/structure/tfc/dict.rs  |  67 +++++----
 src/structure/tfc/typed.rs | 280 +++++++++++++++++++++++++++++--------
 5 files changed, 287 insertions(+), 87 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 96e14b42..51034a67 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,8 @@ thiserror = "1.0"
 async-trait = "0.1"
 itertools = "0.10"
 rug = {version="1.16", default-features=false, features=["integer","rational"]}
+num-derive = "0.3"
+num-traits = "0.2"
 
 [dev-dependencies]
 tempfile = "3.1"
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 629dafa7..bb0d99c5 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -58,6 +58,8 @@ use std::{cmp::Ordering, convert::TryFrom, error, fmt, io};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio_util::codec::{Decoder, FramedRead};
 
+use itertools::Itertools;
+
 // Static assertion: We expect the system architecture bus width to be >= 32 bits. If it is not,
 // the following line will cause a compiler error. (Ignore the unrelated error message itself.)
 const _: usize = 0 - !(std::mem::size_of::<usize>() >= 32 >> 3) as usize;
@@ -86,6 +88,12 @@ pub struct LogArray {
     input_buf: Bytes,
 }
 
+impl std::fmt::Debug for LogArray {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "LogArray([{}])", self.iter().format(", "))
+    }
+}
+
 /// An error that occurred during a log array operation.
 #[derive(Debug, PartialEq)]
 pub enum LogArrayError {
@@ -681,6 +689,12 @@ pub async fn logarray_stream_entries<F: 'static + FileLoad>(
 #[derive(Clone)]
 pub struct MonotonicLogArray(LogArray);
 
+impl std::fmt::Debug for MonotonicLogArray {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "MonotonicLogArray([{}])", self.iter().format(", "))
+    }
+}
+
 impl MonotonicLogArray {
     pub fn from_logarray(logarray: LogArray) -> MonotonicLogArray {
         if cfg!(debug_assertions) {
@@ -702,6 +716,12 @@ impl MonotonicLogArray {
         MonotonicLogArray(logarray)
     }
 
+    pub fn parse(bytes: Bytes) -> Result<MonotonicLogArray, LogArrayError>  {
+        let logarray = LogArray::parse(bytes)?;
+
+        Ok(Self::from_logarray(logarray))
+    }
+
     pub fn len(&self) -> usize {
         self.0.len()
     }
@@ -750,6 +770,10 @@ impl MonotonicLogArray {
 
         (min + max) / 2 + 1
     }
+
+    pub fn slice(&self, offset: usize, len: usize) -> MonotonicLogArray {
+        Self(self.0.slice(offset, len))
+    }
 }
 
 impl From<LogArray> for MonotonicLogArray {
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 0d204c4f..7335091a 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -372,6 +372,7 @@ impl Buf for OwnedSizedDictEntryBuf {
     }
 }
 
+#[derive(Debug)]
 pub struct SizedDictBlock {
     header: SizedBlockHeader,
     data: Bytes,
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 2f3c0a6e..7a85cddc 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,6 +1,6 @@
 use std::cmp::Ordering;
 
-use crate::structure::{util::calculate_width, LogArray, LogArrayBufBuilder};
+use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
 use bytes::{BufMut, Bytes};
 use itertools::Itertools;
 
@@ -35,15 +35,21 @@ pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
     array_builder.finalize();
 }
 
+#[derive(Debug)]
 pub struct SizedDict {
-    offsets: LogArray,
-    data: Bytes,
+    offsets: MonotonicLogArray,
+    pub(crate) data: Bytes,
+    dict_offset: u64,
 }
 
 impl SizedDict {
-    pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
-        let offsets = LogArray::parse(offsets).unwrap();
-        Self { offsets, data }
+    pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
+        let offsets = MonotonicLogArray::parse(offsets).unwrap();
+        Self::from_parts(offsets, data, dict_offset)
+    }
+
+    pub fn from_parts(offsets: MonotonicLogArray, data: Bytes, dict_offset: u64) -> Self {
+        Self { offsets, data, dict_offset }
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
@@ -51,21 +57,26 @@ impl SizedDict {
         if block_index == 0 {
             offset = 0;
         } else {
-            offset = self.offsets.entry(block_index - 1) as usize;
+            offset = (self.offsets.entry(block_index - 1) - self.dict_offset) as usize;
         }
 
         offset
     }
 
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
+        dbg!(block_index);
         let offset = self.block_offset(block_index);
         let block_bytes;
-        if block_index == self.offsets.len() {
+        dbg!(block_index);
+        dbg!(self.offsets.len());
+        //if block_index == self.offsets.len() {
+            dbg!(offset..);
             block_bytes = self.data.slice(offset..);
-        } else {
-            let end = self.offsets.entry(block_index) as usize;
-            block_bytes = self.data.slice(offset..end);
-        }
+        //} else {
+        //    let end = self.block_offset(block_index+1);
+        //    dbg!(offset..end);
+        //    block_bytes = self.data.slice(offset..end);
+        //}
 
         block_bytes
     }
@@ -91,8 +102,8 @@ impl SizedDict {
     }
 
     pub fn entry(&self, index: u64) -> SizedDictEntry {
-        let block = self.block((index / 8) as usize);
-        block.entry((index % 8) as usize)
+        let block = self.block(((index - 1) / 8) as usize);
+        block.entry(((index-1) % 8) as usize)
     }
 
     pub fn id(&self, slice: &[u8]) -> IdLookupResult {
@@ -116,7 +127,7 @@ impl SizedDict {
                     max = mid - 1;
                 }
                 Ordering::Greater => min = mid + 1,
-                Ordering::Equal => return IdLookupResult::Found((mid * BLOCK_SIZE) as u64), // what luck! turns out the string we were looking for was the block head
+                Ordering::Equal => return IdLookupResult::Found((mid * BLOCK_SIZE + 1) as u64), // what luck! turns out the string we were looking for was the block head
             }
         }
 
@@ -125,8 +136,9 @@ impl SizedDict {
         // we found the block the string should be part of.
         let block = self.block(found);
         let block_id = block.id(slice);
-        let offset = (found * BLOCK_SIZE) as u64;
-        let result = block_id.offset(offset);
+        let offset = (found * BLOCK_SIZE) as u64 + 1;
+        let result = block_id.offset(offset).default(offset-1);
+        /*
         if found != 0 {
             // the default value will fill in the last index of the
             // previous block if the entry was not found in the
@@ -136,6 +148,9 @@ impl SizedDict {
         } else {
             result
         }
+         */
+
+        result
     }
 }
 
@@ -179,7 +194,7 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = SizedDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::parse(array_bytes, data_bytes, 0);
 
         assert_eq!(2, dict.num_blocks());
         assert_eq!(b"aaaaaaaa", &dict.block_head(0)[..]);
@@ -192,7 +207,7 @@ mod tests {
         assert_eq!(6, block1.num_entries());
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(s, &dict.entry(ix as u64).to_bytes()[..]);
+            assert_eq!(s, &dict.entry((ix+1) as u64).to_bytes()[..]);
         }
     }
 
@@ -221,10 +236,10 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = SizedDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::parse(array_bytes, data_bytes, 0);
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found(ix as u64), dict.id(s));
+            assert_eq!(IdLookupResult::Found((ix+1) as u64), dict.id(s));
         }
     }
 
@@ -253,12 +268,12 @@ mod tests {
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
-        let dict = SizedDict::from_parts(array_bytes, data_bytes);
+        let dict = SizedDict::parse(array_bytes, data_bytes, 0);
 
         assert_eq!(IdLookupResult::NotFound, dict.id(b"a"));
-        assert_eq!(IdLookupResult::Closest(0), dict.id(b"ab"));
-        assert_eq!(IdLookupResult::Closest(7), dict.id(b"hallo"));
-        assert_eq!(IdLookupResult::Closest(8), dict.id(b"hello!"));
-        assert_eq!(IdLookupResult::Closest(13), dict.id(b"zebra"));
+        assert_eq!(IdLookupResult::Closest(1), dict.id(b"ab"));
+        assert_eq!(IdLookupResult::Closest(8), dict.id(b"hallo"));
+        assert_eq!(IdLookupResult::Closest(9), dict.id(b"hello!"));
+        assert_eq!(IdLookupResult::Closest(14), dict.id(b"zebra"));
     }
 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index ab48c678..74aab1f7 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,33 +1,171 @@
-use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
+use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray, tfc::block::BLOCK_SIZE};
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use itertools::*;
+use num_derive::FromPrimitive;
+use num_traits::FromPrimitive;
 use rug::Integer;
 use std::marker::PhantomData;
 
 use super::{
-    block::IdLookupResult,
+    block::{IdLookupResult, SizedDictEntry},
     decimal::{decimal_to_storage, storage_to_decimal},
     dict::{build_dict_unchecked, build_offset_logarray, SizedDict},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
+#[derive(Debug)]
 pub struct TypedDict {
     types_present: MonotonicLogArray,
-    type_offsets: Option<MonotonicLogArray>,
+    type_offsets: MonotonicLogArray,
+    block_offsets: MonotonicLogArray,
+    type_id_offsets: Vec<u64>,
     data: Bytes,
 }
-/*
+
 impl TypedDict {
-    pub fn id(&self, slice: &[u8], dt: Datatype) -> IdLookupResult {
+    pub fn from_parts(types_present: Bytes, type_offsets: Bytes, block_offsets: Bytes, data: Bytes) -> Self {
+        let types_present = MonotonicLogArray::parse(types_present).unwrap();
+        let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
+        let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
+
+        let mut tally: u64 = 0;
+        let mut type_id_offsets = Vec::with_capacity(types_present.len()-1);
+        dbg!(&type_offsets);
+        for type_offset in type_offsets.iter() {
+            let last_block_len;
+            if type_offset == 0 {
+                last_block_len = data[0];
+            }
+            else {
+                let last_block_offset_of_previous_type = block_offsets.entry(type_offset as usize - 1);
+                dbg!(last_block_offset_of_previous_type);
+                last_block_len = data[last_block_offset_of_previous_type as usize];
+            }
+            let gap = BLOCK_SIZE as u8 - last_block_len;
+            dbg!(gap);
+            tally += gap as u64;
+            dbg!(tally);
+            type_id_offsets.push((type_offset + 1)*8 - tally);
+        }
+
+        dbg!(&type_id_offsets);
+
+        Self {
+            types_present,
+            type_offsets,
+            block_offsets,
+            type_id_offsets,
+            data,
+        }
+    }
+
+    pub fn id<T:TdbDataType>(&self, v:&T) -> IdLookupResult {
+        let (datatype, bytes) = v.make_entry();
+
+        self.id_slice(datatype, bytes.as_ref())
+    }
+
+    pub fn get<T:TdbDataType>(&self, id:u64) -> T {
+        let (datatype, slice) = self.entry(id);
+        datatype.cast(slice.into_buf())
+    }
+
+    fn inner_type_segment(&self, i: usize) -> (SizedDict, u64) {
+        dbg!(i);
+        let type_offset;
+        let block_offset;
+        let id_offset;
+        if i == 0 {
+            type_offset = 0;
+            block_offset = 0;
+            id_offset = 0;
+        }
+        else {
+            type_offset = self.type_offsets.entry(i-1) as usize;
+            id_offset = self.type_id_offsets[type_offset];
+            block_offset = self.block_offsets.entry(type_offset as usize) as usize;
+        }
+        dbg!(type_offset);
+        dbg!(block_offset);
+
+        let len;
+        if i == self.types_present.len()-1 {
+            eprintln!("last type");
+            if i == 0 {
+                len = self.block_offsets.len() - type_offset;
+            }
+            else {
+                len = self.block_offsets.len() - type_offset - 1;
+            }
+        }
+        else {
+            let next_offset = self.type_offsets.entry(i) as usize;
+            if i == 0 {
+                len = next_offset - type_offset;
+            }
+            else {
+                len = next_offset - type_offset - 1;
+            }
+
+        }
+        dbg!(len);
+        dbg!(self.data.len());
+
+        let logarray_slice = self.block_offsets.slice(type_offset+1, len);
+        let data_slice = self.data.slice(block_offset..);
+        dbg!(data_slice.len());
+
+        (SizedDict::from_parts(logarray_slice, data_slice, type_offset as u64), id_offset as u64)
+    }
+
+    pub fn type_segment(&self, dt: Datatype) -> Option<(SizedDict, u64)> {
         if let Some(i) = self.types_present.index_of(dt as u64) {
-            let offset = types_offsets[i];
+            Some(self.inner_type_segment(i))
+        } else {
+            None
+        }
+    }
+
+    pub fn id_slice(&self, dt: Datatype, slice: &[u8]) -> IdLookupResult {
+        if let Some((dict, offset)) = self.type_segment(dt) {
+            dbg!(&dict.data);
+            let result = dict.id(slice)
+                .offset(offset);
 
+            if offset != 0 {
+                result.default(offset)
+            }
+            else {
+                result
+            }
         } else {
             IdLookupResult::NotFound
         }
     }
-}*/
+
+    fn type_index_for_id(&self, id: u64) -> usize {
+        for (ix, offset) in self.type_id_offsets.iter().enumerate() {
+            if *offset > (id-1) {
+                return ix;
+            }
+        }
+
+        self.type_id_offsets.len()
+    }
+
+    fn type_for_type_index(&self, type_index: usize) -> Datatype {
+        FromPrimitive::from_u64(self.types_present.entry(type_index)).unwrap()
+    }
+
+    pub fn entry(&self, id: u64) -> (Datatype, SizedDictEntry) {
+        let type_index = self.type_index_for_id(id);
+
+        let (dict, offset) = self.inner_type_segment(type_index);
+        let dt = self.type_for_type_index(type_index);
+        (dt, dict.entry(id - offset))
+    }
+}
 
 pub struct TypedDictSegment<T: TdbDataType> {
     dict: SizedDict,
@@ -35,8 +173,8 @@ pub struct TypedDictSegment<T: TdbDataType> {
 }
 
 impl<T: TdbDataType> TypedDictSegment<T> {
-    pub fn from_parts(offsets: Bytes, data: Bytes) -> Self {
-        let dict = SizedDict::from_parts(offsets, data);
+    pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
+        let dict = SizedDict::parse(offsets, data, dict_offset);
         Self {
             dict,
             _x: Default::default(),
@@ -54,7 +192,7 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
 pub enum Datatype {
     String = 0,
     UInt32,
@@ -67,12 +205,26 @@ pub enum Datatype {
     BigInt,
 }
 
+impl Datatype {
+    pub fn cast<T:TdbDataType, B: Buf>(self, b: B) -> T {
+        if T::datatype() != self {
+            panic!("not the right datatype");
+        }
+
+        T::from_lexical(b)
+    }
+}
+
 pub trait TdbDataType {
     fn datatype() -> Datatype;
 
     fn to_lexical(&self) -> Bytes;
 
     fn from_lexical<B: Buf>(b: B) -> Self;
+
+    fn make_entry(&self) -> (Datatype, Bytes) {
+        (Self::datatype(), self.to_lexical())
+    }
 }
 
 impl TdbDataType for String {
@@ -93,7 +245,7 @@ impl TdbDataType for String {
 
 impl TdbDataType for u32 {
     fn datatype() -> Datatype {
-        Datatype::UInt64
+        Datatype::UInt32
     }
 
     fn to_lexical(&self) -> Bytes {
@@ -272,34 +424,40 @@ pub fn build_multiple_segments<
     R: AsRef<[u8]>,
     I: Iterator<Item = (Datatype, R)>,
 >(
-    used_types: &mut B1,
-    type_offsets: &mut B2,
-    block_offsets: &mut B3,
-    data: &mut B4,
+    used_types_buf: &mut B1,
+    type_offsets_buf: &mut B2,
+    block_offsets_buf: &mut B3,
+    data_buf: &mut B4,
     iter: I,
 ) {
-    let mut types: Vec<(Datatype, u64)> = Vec::new();
+    let mut types: Vec<Datatype> = Vec::new();
+    let mut type_offsets: Vec<u64> = Vec::new();
     let mut offsets = Vec::with_capacity(iter.size_hint().0);
     for (key, group) in iter.group_by(|v| v.0).into_iter() {
         let start_offset = offsets.last().map(|t| *t).unwrap_or(0_u64);
         let start_type_offset = offsets.len();
-        types.push((key, start_type_offset as u64));
-        build_dict_unchecked(start_offset, &mut offsets, data, group.map(|v| v.1));
+        types.push(key);
+        type_offsets.push(start_type_offset as u64);
+        build_dict_unchecked(start_offset, &mut offsets, data_buf, group.map(|v| v.1));
     }
-    offsets.pop();
-    build_offset_logarray(block_offsets, offsets);
+
+    build_offset_logarray(block_offsets_buf, offsets);
     eprintln!("types: {types:?}");
-    let largest = types.last().unwrap();
+    let largest_type = types.last().unwrap();
+    let largest_type_offset = type_offsets.last().unwrap();
 
-    let types_width = calculate_width(largest.0 as u64);
-    let type_offsets_width = calculate_width(largest.1);
+    let types_width = calculate_width(*largest_type as u64);
+    let type_offsets_width = calculate_width(*largest_type_offset);
 
-    let mut types_builder = LogArrayBufBuilder::new(used_types, types_width);
-    let mut type_offsets_builder = LogArrayBufBuilder::new(type_offsets, type_offsets_width);
+    let mut types_builder = LogArrayBufBuilder::new(used_types_buf, types_width);
+    let mut type_offsets_builder = LogArrayBufBuilder::new(type_offsets_buf, type_offsets_width);
 
-    for (t, o) in types {
+    for t in types {
         types_builder.push(t as u64);
-        type_offsets_builder.push(o);
+    }
+
+    for o in type_offsets.into_iter().skip(1) {
+        type_offsets_builder.push(o - 1);
     }
 
     types_builder.finalize();
@@ -308,7 +466,7 @@ pub fn build_multiple_segments<
 
 #[cfg(test)]
 mod tests {
-    use crate::structure::{tfc::dict::build_offset_logarray, LogArray};
+    use crate::structure::{tfc::dict::build_offset_logarray};
 
     use super::*;
 
@@ -349,11 +507,11 @@ mod tests {
 
         build_segment_and_offsets(&mut offsets, &mut data, strings.clone().into_iter());
 
-        let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
+        let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found(ix as u64), segment.id(&s));
-            assert_eq!(s, segment.get(ix as u64));
+            assert_eq!(IdLookupResult::Found((ix+1) as u64), segment.id(&s));
+            assert_eq!(s, segment.get((ix+1) as u64));
         }
     }
 
@@ -368,11 +526,11 @@ mod tests {
 
         build_segment_and_offsets(&mut offsets, &mut data, nums.clone().into_iter());
 
-        let segment = TypedDictSegment::from_parts(offsets.freeze(), data.freeze());
+        let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
         for (ix, s) in nums.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found(ix as u64), segment.id(&s));
-            assert_eq!(s, segment.get(ix as u64));
+            assert_eq!(IdLookupResult::Found((ix+1) as u64), segment.id(&s));
+            assert_eq!(s, segment.get((ix+1) as u64));
         }
     }
 
@@ -468,31 +626,27 @@ mod tests {
         cycle(Decimal("-983423984793872423423423432312698".to_string()));
     }
 
-    fn make_entry<T: TdbDataType>(t: T) -> (Datatype, Bytes) {
-        (T::datatype(), t.to_lexical())
-    }
-
     #[test]
     fn test_multi_segment() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
-            make_entry(Decimal("-1".to_string())),
-            make_entry("asdf".to_string()),
-            make_entry(Decimal("-12342343.2348973".to_string())),
-            make_entry("Batty".to_string()),
-            make_entry("Batman".to_string()),
-            make_entry(-3_i64),
-            make_entry(Decimal("2348973".to_string())),
-            make_entry(4.389832_f32),
-            make_entry("apple".to_string()),
-            make_entry(23434.389832_f32),
-            make_entry("apply".to_string()),
-            make_entry(-500_i32),
-            make_entry(20_u32),
+            Decimal("-1".to_string()).make_entry(),
+            "asdf".to_string().make_entry(),
+            Decimal("-12342343.2348973".to_string()).make_entry(),
+            "Batty".to_string().make_entry(),
+            "Batman".to_string().make_entry(),
+            (-3_i64).make_entry(),
+            Decimal("2348973".to_string()).make_entry(),
+            4.389832_f32.make_entry(),
+            "apple".to_string().make_entry(),
+            23434.389832_f32.make_entry(),
+            "apply".to_string().make_entry(),
+            (-500_i32).make_entry(),
+            20_u32.make_entry(),
         ];
         vec.sort();
-        let mut used_types = Vec::new();
-        let mut type_offsets = Vec::new();
-        let mut block_offsets = Vec::new();
+        let mut used_types = BytesMut::new();
+        let mut type_offsets = BytesMut::new();
+        let mut block_offsets = BytesMut::new();
         let mut data = BytesMut::new();
         build_multiple_segments(
             &mut used_types,
@@ -506,15 +660,19 @@ mod tests {
         eprintln!("block_offsets : {block_offsets:?}");
         eprintln!("data : {data:?}");
 
-        let used_types_vec: Vec<u64> = LogArray::parse(Bytes::from(used_types))
-            .unwrap()
-            .iter()
-            .collect();
+        let dict = TypedDict::from_parts(used_types.freeze(), type_offsets.freeze(), block_offsets.freeze(), data.freeze());
+        eprintln!("{dict:?}");
+
+        let id = dict.id(&"Batty".to_string());
+        assert_eq!(IdLookupResult::Found(2), id);
+        assert_eq!(IdLookupResult::Found(6), dict.id(&20_u32));
+        assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
 
-        let expected_types_vec: Vec<u64> = vec.iter().map(|x| x.0 as u64).dedup().collect();
-        assert_eq!(used_types_vec, expected_types_vec);
+        for i in 1..vec.len()+1 {
+            eprintln!("!!!!!!!!!!!! {i} {:?}", dict.entry(i as u64));
+        }
 
-        eprintln!("expected_types_vec: {expected_types_vec:?}");
+        assert_eq!(Decimal("-12342343.2348973".to_string()), dict.get(11));
 
         panic!();
     }

From 593d227ce5181437c54b492669670680c0115c4b Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 26 Nov 2022 23:05:56 +0100
Subject: [PATCH 23/99] Two significant changes and some formatting.

typed.rs L93: don't use type_offset but index.
typed.rs L123 use block_offsets in from_parts.
---
 src/structure/tfc/dict.rs  |  18 +++--
 src/structure/tfc/typed.rs | 151 +++++++++++++++++++++++++++----------
 2 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 7a85cddc..8283bfc4 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -49,7 +49,11 @@ impl SizedDict {
     }
 
     pub fn from_parts(offsets: MonotonicLogArray, data: Bytes, dict_offset: u64) -> Self {
-        Self { offsets, data, dict_offset }
+        Self {
+            offsets,
+            data,
+            dict_offset,
+        }
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
@@ -70,8 +74,8 @@ impl SizedDict {
         dbg!(block_index);
         dbg!(self.offsets.len());
         //if block_index == self.offsets.len() {
-            dbg!(offset..);
-            block_bytes = self.data.slice(offset..);
+        dbg!(offset..);
+        block_bytes = dbg!(self.data.slice(offset..));
         //} else {
         //    let end = self.block_offset(block_index+1);
         //    dbg!(offset..end);
@@ -103,7 +107,7 @@ impl SizedDict {
 
     pub fn entry(&self, index: u64) -> SizedDictEntry {
         let block = self.block(((index - 1) / 8) as usize);
-        block.entry(((index-1) % 8) as usize)
+        block.entry(((index - 1) % 8) as usize)
     }
 
     pub fn id(&self, slice: &[u8]) -> IdLookupResult {
@@ -137,7 +141,7 @@ impl SizedDict {
         let block = self.block(found);
         let block_id = block.id(slice);
         let offset = (found * BLOCK_SIZE) as u64 + 1;
-        let result = block_id.offset(offset).default(offset-1);
+        let result = block_id.offset(offset).default(offset - 1);
         /*
         if found != 0 {
             // the default value will fill in the last index of the
@@ -207,7 +211,7 @@ mod tests {
         assert_eq!(6, block1.num_entries());
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(s, &dict.entry((ix+1) as u64).to_bytes()[..]);
+            assert_eq!(s, &dict.entry((ix + 1) as u64).to_bytes()[..]);
         }
     }
 
@@ -239,7 +243,7 @@ mod tests {
         let dict = SizedDict::parse(array_bytes, data_bytes, 0);
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found((ix+1) as u64), dict.id(s));
+            assert_eq!(IdLookupResult::Found((ix + 1) as u64), dict.id(s));
         }
     }
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 74aab1f7..75e82104 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,4 +1,6 @@
-use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray, tfc::block::BLOCK_SIZE};
+use crate::structure::{
+    tfc::block::BLOCK_SIZE, util::calculate_width, LogArrayBufBuilder, MonotonicLogArray,
+};
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use itertools::*;
@@ -24,21 +26,26 @@ pub struct TypedDict {
 }
 
 impl TypedDict {
-    pub fn from_parts(types_present: Bytes, type_offsets: Bytes, block_offsets: Bytes, data: Bytes) -> Self {
+    pub fn from_parts(
+        types_present: Bytes,
+        type_offsets: Bytes,
+        block_offsets: Bytes,
+        data: Bytes,
+    ) -> Self {
         let types_present = MonotonicLogArray::parse(types_present).unwrap();
         let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
         let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
 
         let mut tally: u64 = 0;
-        let mut type_id_offsets = Vec::with_capacity(types_present.len()-1);
+        let mut type_id_offsets = Vec::with_capacity(types_present.len() - 1);
         dbg!(&type_offsets);
         for type_offset in type_offsets.iter() {
             let last_block_len;
             if type_offset == 0 {
                 last_block_len = data[0];
-            }
-            else {
-                let last_block_offset_of_previous_type = block_offsets.entry(type_offset as usize - 1);
+            } else {
+                let last_block_offset_of_previous_type =
+                    block_offsets.entry(type_offset as usize - 1);
                 dbg!(last_block_offset_of_previous_type);
                 last_block_len = data[last_block_offset_of_previous_type as usize];
             }
@@ -46,7 +53,7 @@ impl TypedDict {
             dbg!(gap);
             tally += gap as u64;
             dbg!(tally);
-            type_id_offsets.push((type_offset + 1)*8 - tally);
+            type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
 
         dbg!(&type_id_offsets);
@@ -60,13 +67,14 @@ impl TypedDict {
         }
     }
 
-    pub fn id<T:TdbDataType>(&self, v:&T) -> IdLookupResult {
+    pub fn id<T: TdbDataType>(&self, v: &T) -> IdLookupResult {
         let (datatype, bytes) = v.make_entry();
 
         self.id_slice(datatype, bytes.as_ref())
     }
 
-    pub fn get<T:TdbDataType>(&self, id:u64) -> T {
+    pub fn get<T: TdbDataType>(&self, id: u64) -> T {
+        eprintln!("id: {id}");
         let (datatype, slice) = self.entry(id);
         datatype.cast(slice.into_buf())
     }
@@ -80,43 +88,41 @@ impl TypedDict {
             type_offset = 0;
             block_offset = 0;
             id_offset = 0;
-        }
-        else {
-            type_offset = self.type_offsets.entry(i-1) as usize;
-            id_offset = self.type_id_offsets[type_offset];
+        } else {
+            type_offset = self.type_offsets.entry(i - 1) as usize;
+            id_offset = dbg!(self.type_id_offsets[i - 1]);
             block_offset = self.block_offsets.entry(type_offset as usize) as usize;
         }
         dbg!(type_offset);
         dbg!(block_offset);
 
         let len;
-        if i == self.types_present.len()-1 {
+        if i == self.types_present.len() - 1 {
             eprintln!("last type");
             if i == 0 {
                 len = self.block_offsets.len() - type_offset;
-            }
-            else {
+            } else {
                 len = self.block_offsets.len() - type_offset - 1;
             }
-        }
-        else {
+        } else {
             let next_offset = self.type_offsets.entry(i) as usize;
             if i == 0 {
                 len = next_offset - type_offset;
-            }
-            else {
+            } else {
                 len = next_offset - type_offset - 1;
             }
-
         }
         dbg!(len);
         dbg!(self.data.len());
 
-        let logarray_slice = self.block_offsets.slice(type_offset+1, len);
+        let logarray_slice = self.block_offsets.slice(type_offset + 1, len);
         let data_slice = self.data.slice(block_offset..);
         dbg!(data_slice.len());
 
-        (SizedDict::from_parts(logarray_slice, data_slice, type_offset as u64), id_offset as u64)
+        (
+            SizedDict::from_parts(logarray_slice, data_slice, block_offset as u64),
+            id_offset as u64,
+        )
     }
 
     pub fn type_segment(&self, dt: Datatype) -> Option<(SizedDict, u64)> {
@@ -130,13 +136,11 @@ impl TypedDict {
     pub fn id_slice(&self, dt: Datatype, slice: &[u8]) -> IdLookupResult {
         if let Some((dict, offset)) = self.type_segment(dt) {
             dbg!(&dict.data);
-            let result = dict.id(slice)
-                .offset(offset);
+            let result = dict.id(slice).offset(offset);
 
             if offset != 0 {
                 result.default(offset)
-            }
-            else {
+            } else {
                 result
             }
         } else {
@@ -146,7 +150,7 @@ impl TypedDict {
 
     fn type_index_for_id(&self, id: u64) -> usize {
         for (ix, offset) in self.type_id_offsets.iter().enumerate() {
-            if *offset > (id-1) {
+            if *offset > (id - 1) {
                 return ix;
             }
         }
@@ -159,10 +163,11 @@ impl TypedDict {
     }
 
     pub fn entry(&self, id: u64) -> (Datatype, SizedDictEntry) {
+        eprintln!("entry(id): {id}");
         let type_index = self.type_index_for_id(id);
 
-        let (dict, offset) = self.inner_type_segment(type_index);
-        let dt = self.type_for_type_index(type_index);
+        let (dict, offset) = dbg!(self.inner_type_segment(type_index));
+        let dt = dbg!(self.type_for_type_index(type_index));
         (dt, dict.entry(id - offset))
     }
 }
@@ -206,7 +211,7 @@ pub enum Datatype {
 }
 
 impl Datatype {
-    pub fn cast<T:TdbDataType, B: Buf>(self, b: B) -> T {
+    pub fn cast<T: TdbDataType, B: Buf>(self, b: B) -> T {
         if T::datatype() != self {
             panic!("not the right datatype");
         }
@@ -466,7 +471,7 @@ pub fn build_multiple_segments<
 
 #[cfg(test)]
 mod tests {
-    use crate::structure::{tfc::dict::build_offset_logarray};
+    use crate::structure::tfc::dict::build_offset_logarray;
 
     use super::*;
 
@@ -510,8 +515,8 @@ mod tests {
         let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found((ix+1) as u64), segment.id(&s));
-            assert_eq!(s, segment.get((ix+1) as u64));
+            assert_eq!(IdLookupResult::Found((ix + 1) as u64), segment.id(&s));
+            assert_eq!(s, segment.get((ix + 1) as u64));
         }
     }
 
@@ -529,8 +534,8 @@ mod tests {
         let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
         for (ix, s) in nums.into_iter().enumerate() {
-            assert_eq!(IdLookupResult::Found((ix+1) as u64), segment.id(&s));
-            assert_eq!(s, segment.get((ix+1) as u64));
+            assert_eq!(IdLookupResult::Found((ix + 1) as u64), segment.id(&s));
+            assert_eq!(s, segment.get((ix + 1) as u64));
         }
     }
 
@@ -660,7 +665,12 @@ mod tests {
         eprintln!("block_offsets : {block_offsets:?}");
         eprintln!("data : {data:?}");
 
-        let dict = TypedDict::from_parts(used_types.freeze(), type_offsets.freeze(), block_offsets.freeze(), data.freeze());
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
         eprintln!("{dict:?}");
 
         let id = dict.id(&"Batty".to_string());
@@ -668,7 +678,7 @@ mod tests {
         assert_eq!(IdLookupResult::Found(6), dict.id(&20_u32));
         assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
 
-        for i in 1..vec.len()+1 {
+        for i in 1..vec.len() + 1 {
             eprintln!("!!!!!!!!!!!! {i} {:?}", dict.entry(i as u64));
         }
 
@@ -676,4 +686,69 @@ mod tests {
 
         panic!();
     }
+
+    #[test]
+    fn test_full_blocks() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            "fdsa".to_string().make_entry(),
+            "a".to_string().make_entry(),
+            "bc".to_string().make_entry(),
+            "bcd".to_string().make_entry(),
+            "z".to_string().make_entry(),
+            "Batty".to_string().make_entry(),
+            "Batman".to_string().make_entry(),
+            "apple".to_string().make_entry(),
+            (-500_i32).make_entry(),
+            20_u32.make_entry(),
+            22_u32.make_entry(),
+            23_u32.make_entry(),
+            24_u32.make_entry(),
+            25_u32.make_entry(),
+            26_u32.make_entry(),
+            27_u32.make_entry(),
+            28_u32.make_entry(),
+            3000_u32.make_entry(),
+            (-3_i64).make_entry(),
+            Decimal("-12342343.2348973".to_string()).make_entry(),
+            Decimal("234.8973".to_string()).make_entry(),
+            Decimal("0.2348973".to_string()).make_entry(),
+            Decimal("23423423.8973".to_string()).make_entry(),
+            Decimal("3.3".to_string()).make_entry(),
+            Decimal("0.001".to_string()).make_entry(),
+            Decimal("-0.001".to_string()).make_entry(),
+            Decimal("2".to_string()).make_entry(),
+            Decimal("0".to_string()).make_entry(),
+            4.389832_f32.make_entry(),
+            23434.389832_f32.make_entry(),
+        ];
+        vec.sort();
+        let mut used_types = BytesMut::new();
+        let mut type_offsets = BytesMut::new();
+        let mut block_offsets = BytesMut::new();
+        let mut data = BytesMut::new();
+        build_multiple_segments(
+            &mut used_types,
+            &mut type_offsets,
+            &mut block_offsets,
+            &mut data,
+            vec.clone().into_iter(),
+        );
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+        eprintln!("{dict:?}");
+
+        for i in 1..vec.len() + 1 {
+            eprintln!("!!!!!!!!!!!! {i} {:?}", dict.entry(i as u64));
+        }
+
+        assert_eq!("Batman".to_string(), dict.get::<String>(1));
+        assert_eq!("fdsa".to_string(), dict.get::<String>(7));
+        assert_eq!(26_u32, dict.get::<u32>(14));
+        assert_eq!(Decimal("234.8973".to_string()), dict.get(29));
+    }
 }

From 4ab91b38f489c88742caf286af43bb3287def117 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 26 Nov 2022 23:35:45 +0100
Subject: [PATCH 24/99] Remove dbgs and add better tests

---
 src/structure/tfc/block.rs |  2 +-
 src/structure/tfc/dict.rs  | 12 +---------
 src/structure/tfc/typed.rs | 47 ++++++++++++++++++--------------------
 3 files changed, 24 insertions(+), 37 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 7335091a..caba2706 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -65,7 +65,7 @@ impl SizedBlockHeader {
 }
 
 #[derive(Clone, Debug)]
-pub struct SizedDictEntry(Vec<Bytes>);
+pub struct SizedDictEntry(pub Vec<Bytes>);
 
 impl SizedDictEntry {
     pub fn new(parts: Vec<Bytes>) -> Self {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 8283bfc4..78626bff 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -68,19 +68,9 @@ impl SizedDict {
     }
 
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
-        dbg!(block_index);
         let offset = self.block_offset(block_index);
         let block_bytes;
-        dbg!(block_index);
-        dbg!(self.offsets.len());
-        //if block_index == self.offsets.len() {
-        dbg!(offset..);
-        block_bytes = dbg!(self.data.slice(offset..));
-        //} else {
-        //    let end = self.block_offset(block_index+1);
-        //    dbg!(offset..end);
-        //    block_bytes = self.data.slice(offset..end);
-        //}
+        block_bytes = self.data.slice(offset..);
 
         block_bytes
     }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 75e82104..1303c5e4 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -38,7 +38,6 @@ impl TypedDict {
 
         let mut tally: u64 = 0;
         let mut type_id_offsets = Vec::with_capacity(types_present.len() - 1);
-        dbg!(&type_offsets);
         for type_offset in type_offsets.iter() {
             let last_block_len;
             if type_offset == 0 {
@@ -46,18 +45,13 @@ impl TypedDict {
             } else {
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
-                dbg!(last_block_offset_of_previous_type);
                 last_block_len = data[last_block_offset_of_previous_type as usize];
             }
             let gap = BLOCK_SIZE as u8 - last_block_len;
-            dbg!(gap);
             tally += gap as u64;
-            dbg!(tally);
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
 
-        dbg!(&type_id_offsets);
-
         Self {
             types_present,
             type_offsets,
@@ -74,13 +68,11 @@ impl TypedDict {
     }
 
     pub fn get<T: TdbDataType>(&self, id: u64) -> T {
-        eprintln!("id: {id}");
         let (datatype, slice) = self.entry(id);
         datatype.cast(slice.into_buf())
     }
 
     fn inner_type_segment(&self, i: usize) -> (SizedDict, u64) {
-        dbg!(i);
         let type_offset;
         let block_offset;
         let id_offset;
@@ -90,15 +82,12 @@ impl TypedDict {
             id_offset = 0;
         } else {
             type_offset = self.type_offsets.entry(i - 1) as usize;
-            id_offset = dbg!(self.type_id_offsets[i - 1]);
+            id_offset = self.type_id_offsets[i - 1];
             block_offset = self.block_offsets.entry(type_offset as usize) as usize;
         }
-        dbg!(type_offset);
-        dbg!(block_offset);
 
         let len;
         if i == self.types_present.len() - 1 {
-            eprintln!("last type");
             if i == 0 {
                 len = self.block_offsets.len() - type_offset;
             } else {
@@ -112,12 +101,9 @@ impl TypedDict {
                 len = next_offset - type_offset - 1;
             }
         }
-        dbg!(len);
-        dbg!(self.data.len());
 
         let logarray_slice = self.block_offsets.slice(type_offset + 1, len);
         let data_slice = self.data.slice(block_offset..);
-        dbg!(data_slice.len());
 
         (
             SizedDict::from_parts(logarray_slice, data_slice, block_offset as u64),
@@ -135,7 +121,6 @@ impl TypedDict {
 
     pub fn id_slice(&self, dt: Datatype, slice: &[u8]) -> IdLookupResult {
         if let Some((dict, offset)) = self.type_segment(dt) {
-            dbg!(&dict.data);
             let result = dict.id(slice).offset(offset);
 
             if offset != 0 {
@@ -163,11 +148,10 @@ impl TypedDict {
     }
 
     pub fn entry(&self, id: u64) -> (Datatype, SizedDictEntry) {
-        eprintln!("entry(id): {id}");
         let type_index = self.type_index_for_id(id);
 
-        let (dict, offset) = dbg!(self.inner_type_segment(type_index));
-        let dt = dbg!(self.type_for_type_index(type_index));
+        let (dict, offset) = self.inner_type_segment(type_index);
+        let dt = self.type_for_type_index(type_index);
         (dt, dict.entry(id - offset))
     }
 }
@@ -671,7 +655,6 @@ mod tests {
             block_offsets.freeze(),
             data.freeze(),
         );
-        eprintln!("{dict:?}");
 
         let id = dict.id(&"Batty".to_string());
         assert_eq!(IdLookupResult::Found(2), id);
@@ -679,12 +662,11 @@ mod tests {
         assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
 
         for i in 1..vec.len() + 1 {
-            eprintln!("!!!!!!!!!!!! {i} {:?}", dict.entry(i as u64));
+            let (t, s) = dict.entry(i as u64);
+            assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
         }
 
         assert_eq!(Decimal("-12342343.2348973".to_string()), dict.get(11));
-
-        panic!();
     }
 
     #[test]
@@ -740,15 +722,30 @@ mod tests {
             block_offsets.freeze(),
             data.freeze(),
         );
-        eprintln!("{dict:?}");
 
         for i in 1..vec.len() + 1 {
-            eprintln!("!!!!!!!!!!!! {i} {:?}", dict.entry(i as u64));
+            let (t, s) = dict.entry(i as u64);
+            assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
         }
 
         assert_eq!("Batman".to_string(), dict.get::<String>(1));
         assert_eq!("fdsa".to_string(), dict.get::<String>(7));
         assert_eq!(26_u32, dict.get::<u32>(14));
         assert_eq!(Decimal("234.8973".to_string()), dict.get(29));
+
+        assert_eq!(IdLookupResult::NotFound, dict.id(&"AAAA".to_string()));
+        assert_eq!(IdLookupResult::Closest(2), dict.id(&"Baz".to_string()));
+        assert_eq!(IdLookupResult::Found(17), dict.id(&3000_u32));
+        assert_eq!(
+            IdLookupResult::Found(23),
+            dict.id(&Decimal("-0.001".to_string()))
+        );
+        assert_eq!(
+            IdLookupResult::Closest(23),
+            dict.id(&Decimal("-0.0001".to_string()))
+        );
+        assert_eq!(IdLookupResult::Found(16), dict.id(&28_u32));
+        assert_eq!(IdLookupResult::Closest(16), dict.id(&29_u32));
+        assert_eq!(IdLookupResult::Closest(17), dict.id(&3001_u32));
     }
 }

From d2959e6dae6a2c2d0f8674daa4bedc64181151bb Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 26 Nov 2022 23:43:55 +0100
Subject: [PATCH 25/99] Fix bigint naming, add tests

---
 src/structure/tfc/typed.rs | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 1303c5e4..2228a380 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -366,7 +366,7 @@ impl TdbDataType for f64 {
 
 impl TdbDataType for Integer {
     fn datatype() -> Datatype {
-        Datatype::Float64
+        Datatype::BigInt
     }
 
     fn to_lexical(&self) -> Bytes {
@@ -644,10 +644,6 @@ mod tests {
             &mut data,
             vec.clone().into_iter(),
         );
-        eprintln!("used_types : {used_types:?}");
-        eprintln!("type_offsets : {type_offsets:?}");
-        eprintln!("block_offsets : {block_offsets:?}");
-        eprintln!("data : {data:?}");
 
         let dict = TypedDict::from_parts(
             used_types.freeze(),
@@ -702,6 +698,7 @@ mod tests {
             Decimal("0".to_string()).make_entry(),
             4.389832_f32.make_entry(),
             23434.389832_f32.make_entry(),
+            int("239487329872343987").make_entry(),
         ];
         vec.sort();
         let mut used_types = BytesMut::new();
@@ -735,7 +732,9 @@ mod tests {
 
         assert_eq!(IdLookupResult::NotFound, dict.id(&"AAAA".to_string()));
         assert_eq!(IdLookupResult::Closest(2), dict.id(&"Baz".to_string()));
+
         assert_eq!(IdLookupResult::Found(17), dict.id(&3000_u32));
+
         assert_eq!(
             IdLookupResult::Found(23),
             dict.id(&Decimal("-0.001".to_string()))
@@ -744,8 +743,21 @@ mod tests {
             IdLookupResult::Closest(23),
             dict.id(&Decimal("-0.0001".to_string()))
         );
+
         assert_eq!(IdLookupResult::Found(16), dict.id(&28_u32));
         assert_eq!(IdLookupResult::Closest(16), dict.id(&29_u32));
         assert_eq!(IdLookupResult::Closest(17), dict.id(&3001_u32));
+
+        assert_eq!(IdLookupResult::Closest(17), dict.id(&3001_u32));
+
+        assert_eq!(IdLookupResult::Closest(30), dict.id(&int("0")));
+        assert_eq!(
+            IdLookupResult::Found(31),
+            dict.id(&int("239487329872343987"))
+        );
+        assert_eq!(
+            IdLookupResult::Closest(31),
+            dict.id(&int("99999999999999999999999999"))
+        );
     }
 }

From 8dfa62b91b91afec4dcd11bfd435f658a3a0f982 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 14:05:05 +0100
Subject: [PATCH 26/99] make versions of the bitindex generator that work with
 bufs

---
 src/structure/bitarray.rs | 56 ++++++++++++++++++++++++++-------------
 src/structure/bitindex.rs | 48 +++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/src/structure/bitarray.rs b/src/structure/bitarray.rs
index a46cdfdd..43ea1dba 100644
--- a/src/structure/bitarray.rs
+++ b/src/structure/bitarray.rs
@@ -34,7 +34,7 @@ use super::util;
 use crate::storage::*;
 use crate::structure::bititer::BitIter;
 use byteorder::{BigEndian, ByteOrder};
-use bytes::{Bytes, BytesMut};
+use bytes::{Buf, Bytes, BytesMut};
 use futures::io;
 use futures::stream::{Stream, StreamExt, TryStreamExt};
 use std::{convert::TryFrom, error, fmt};
@@ -291,24 +291,25 @@ impl Decoder for BitArrayBlockDecoder {
 
     /// Decode the next block of the bit array.
     fn decode(&mut self, bytes: &mut BytesMut) -> Result<Option<u64>, io::Error> {
-        // If there isn't a full word available in the buffer, stop.
-        if bytes.len() < 8 {
-            return Ok(None);
-        }
+        Ok(decode_next_bitarray_block(bytes, &mut self.readahead))
+    }
+}
 
-        // Read the next word. If `self.readahead` was `Some`, return that value; otherwise,
-        // recurse to read a second word and then return the first word.
-        //
-        // This trick means that we don't return the last word in the buffer, which is the control
-        // word. The consequence is that we read an extra word at the beginning of the decoding
-        // process.
-        match self
-            .readahead
-            .replace(BigEndian::read_u64(&bytes.split_to(8)))
-        {
-            Some(word) => Ok(Some(word)),
-            None => self.decode(bytes),
-        }
+fn decode_next_bitarray_block<B: Buf>(bytes: &mut B, readahead: &mut Option<u64>) -> Option<u64> {
+    // If there isn't a full word available in the buffer, stop.
+    if bytes.remaining() < 8 {
+        return None;
+    }
+
+    // Read the next word. If `readahead` was `Some`, return that value; otherwise,
+    // recurse to read a second word and then return the first word.
+    //
+    // This trick means that we don't return the last word in the buffer, which is the control
+    // word. The consequence is that we read an extra word at the beginning of the decoding
+    // process.
+    match readahead.replace(bytes.get_u64()) {
+        Some(word) => Some(word),
+        None => decode_next_bitarray_block(bytes, readahead),
     }
 }
 
@@ -316,6 +317,25 @@ pub fn bitarray_stream_blocks<R: AsyncRead + Unpin>(r: R) -> FramedRead<R, BitAr
     FramedRead::new(r, BitArrayBlockDecoder { readahead: None })
 }
 
+pub fn bitarray_iter_blocks<B: Buf>(b: &mut B) -> BitArrayBlockIterator<B> {
+    BitArrayBlockIterator {
+        buf: b,
+        readahead: None,
+    }
+}
+
+pub struct BitArrayBlockIterator<'a, B: Buf> {
+    buf: &'a mut B,
+    readahead: Option<u64>,
+}
+
+impl<'a, B: Buf> Iterator for BitArrayBlockIterator<'a, B> {
+    type Item = u64;
+    fn next(&mut self) -> Option<u64> {
+        decode_next_bitarray_block(self.buf, &mut self.readahead)
+    }
+}
+
 /// Read the length (number of bits) from a `FileLoad`.
 pub(crate) async fn bitarray_len_from_file<F: FileLoad>(f: F) -> io::Result<u64> {
     BitArrayError::validate_input_buf_size(f.size().await?)?;
diff --git a/src/structure/bitindex.rs b/src/structure/bitindex.rs
index 097d36ab..76b4c117 100644
--- a/src/structure/bitindex.rs
+++ b/src/structure/bitindex.rs
@@ -1,6 +1,9 @@
 //! Logic for building and using an index over a bitarray which provides rank and select.
 use byteorder::{BigEndian, ByteOrder};
+use bytes::Buf;
+use bytes::BufMut;
 use bytes::Bytes;
+use itertools::Itertools;
 
 use super::bitarray::*;
 use super::logarray::*;
@@ -434,6 +437,51 @@ pub async fn build_bitindex<
     Ok(())
 }
 
+pub fn build_bitindex_from_block_iter<'a, I: Iterator<Item = u64>, B1: BufMut, B2: BufMut>(
+    blocks_iter: &'a mut I,
+    blocks: &mut B1,
+    sblocks: &mut B2,
+) {
+    // the following widths are unoptimized, but should always be large enough
+    let mut blocks_builder =
+        LogArrayBufBuilder::new(blocks, 64 - (SBLOCK_SIZE * 64).leading_zeros() as u8);
+    let mut sblocks_builder = LogArrayBufBuilder::new(sblocks, 64);
+
+    // we chunk block_stream into blocks of SBLOCK size for further processing
+    let mut sblock_rank = 0;
+    let chunks = blocks_iter.chunks(SBLOCK_SIZE);
+    let mut iter = chunks.into_iter();
+    while let Some(chunk) = iter.next() {
+        let chunk: Vec<_> = chunk.collect();
+        let mut block_ranks = Vec::with_capacity(chunk.len());
+        for num in chunk {
+            block_ranks.push(num.count_ones() as u64);
+        }
+
+        let mut sblock_subrank = block_ranks.iter().sum();
+        sblock_rank += sblock_subrank;
+
+        for block_rank in block_ranks {
+            blocks_builder.push(sblock_subrank);
+            sblock_subrank -= block_rank;
+        }
+
+        sblocks_builder.push(sblock_rank);
+    }
+
+    blocks_builder.finalize();
+    sblocks_builder.finalize();
+}
+
+pub fn build_bitindex_from_buf<B1: Buf, B2: BufMut, B3: BufMut>(
+    bitarray: &mut B1,
+    blocks: &mut B2,
+    sblocks: &mut B3,
+) {
+    let mut iter = bitarray_iter_blocks(bitarray);
+    build_bitindex_from_block_iter(&mut iter, blocks, sblocks)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 4e61d03965670b40c4c1a289b7f50e8f1b6b7912 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 14:33:44 +0100
Subject: [PATCH 27/99] precalculate typed dict len

---
 src/structure/logarray.rs  |  2 +-
 src/structure/tfc/typed.rs | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index bb0d99c5..895999fa 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -716,7 +716,7 @@ impl MonotonicLogArray {
         MonotonicLogArray(logarray)
     }
 
-    pub fn parse(bytes: Bytes) -> Result<MonotonicLogArray, LogArrayError>  {
+    pub fn parse(bytes: Bytes) -> Result<MonotonicLogArray, LogArrayError> {
         let logarray = LogArray::parse(bytes)?;
 
         Ok(Self::from_logarray(logarray))
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 2228a380..b05f31e0 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -22,6 +22,7 @@ pub struct TypedDict {
     type_offsets: MonotonicLogArray,
     block_offsets: MonotonicLogArray,
     type_id_offsets: Vec<u64>,
+    num_entries: usize,
     data: Bytes,
 }
 
@@ -52,11 +53,16 @@ impl TypedDict {
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
 
+        let last_gap =
+            BLOCK_SIZE - data[block_offsets.entry(block_offsets.len() - 1) as usize] as usize;
+        let num_entries = (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap;
+
         Self {
             types_present,
             type_offsets,
             block_offsets,
             type_id_offsets,
+            num_entries,
             data,
         }
     }
@@ -154,6 +160,10 @@ impl TypedDict {
         let dt = self.type_for_type_index(type_index);
         (dt, dict.entry(id - offset))
     }
+
+    pub fn num_entries(&self) -> usize {
+        self.num_entries
+    }
 }
 
 pub struct TypedDictSegment<T: TdbDataType> {
@@ -652,6 +662,8 @@ mod tests {
             data.freeze(),
         );
 
+        assert_eq!(13, dict.num_entries());
+
         let id = dict.id(&"Batty".to_string());
         assert_eq!(IdLookupResult::Found(2), id);
         assert_eq!(IdLookupResult::Found(6), dict.id(&20_u32));
@@ -720,6 +732,8 @@ mod tests {
             data.freeze(),
         );
 
+        assert_eq!(31, dict.num_entries());
+
         for i in 1..vec.len() + 1 {
             let (t, s) = dict.entry(i as u64);
             assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));

From 75675ebf456c1088dd71232f5f5a946f4e4c2020 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 15:26:41 +0100
Subject: [PATCH 28/99] block and dict iterators

---
 src/structure/tfc/block.rs | 93 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index caba2706..184d7ed1 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -16,7 +16,7 @@ pub enum SizedDictError {
     NotEnoughData,
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub struct SizedBlockHeader {
     head: Bytes,
     num_entries: u8,
@@ -513,6 +513,56 @@ impl SizedDictBlock {
 
         IdLookupResult::Closest(self.header.num_entries as u64 - 1)
     }
+
+    pub fn iter<'a>(&'a self) -> SizedDictBlockIterator<'a> {
+        SizedDictBlockIterator {
+            header: &self.header,
+            data: self.data.clone(),
+            ix: 0,
+            last: None,
+        }
+    }
+}
+
+pub struct SizedDictBlockIterator<'a> {
+    header: &'a SizedBlockHeader,
+    data: Bytes,
+    ix: usize,
+    last: Option<Vec<Bytes>>,
+}
+
+impl<'a> Iterator for SizedDictBlockIterator<'a> {
+    type Item = SizedDictEntry;
+
+    fn next(&mut self) -> Option<SizedDictEntry> {
+        if let Some(last) = self.last.as_mut() {
+            if self.ix >= self.header.num_entries as usize - 1 {
+                return None;
+            }
+            let size = self.header.sizes[self.ix];
+            let mut shared = self.header.shareds[self.ix];
+            for rope_index in 0..last.len() {
+                let x = &mut last[rope_index];
+                if x.len() < shared {
+                    shared -= x.len();
+                    continue;
+                }
+
+                x.truncate(shared);
+                last.truncate(rope_index + 1);
+                break;
+            }
+
+            last.push(self.data.split_to(size));
+            self.ix += 1;
+
+            Some(SizedDictEntry::new(last.clone()))
+        } else {
+            let result = vec![self.header.head.clone()];
+            self.last = Some(result.clone());
+            Some(SizedDictEntry::new(result))
+        }
+    }
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -796,4 +846,45 @@ mod tests {
 
         assert_eq!(IdLookupResult::Closest(7), block.id(b"f"));
     }
+
+    #[test]
+    fn enumerate_complete_block() {
+        let strings: [&[u8]; 8] = [
+            b"aaaaaa",
+            b"aabb",
+            b"cccc",
+            b"cdef",
+            b"cdff",
+            b"cdffasdf",
+            b"cdffeeee",
+            b"ceeeeeeeeeeeeeee",
+        ];
+        let block = build_block(&strings);
+
+        let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
+        assert_eq!(
+            strings
+                .iter()
+                .cloned()
+                .map(Bytes::from_static)
+                .collect::<Vec<_>>(),
+            result
+        );
+    }
+
+    #[test]
+    fn enumerate_incomplete_block() {
+        let strings: [&[u8]; 6] = [b"aaaaaa", b"aabb", b"cccc", b"cdef", b"cdff", b"cdffasdf"];
+        let block = build_block(&strings);
+
+        let result: Vec<Bytes> = block.iter().map(|e| e.to_bytes()).collect();
+        assert_eq!(
+            strings
+                .iter()
+                .cloned()
+                .map(Bytes::from_static)
+                .collect::<Vec<_>>(),
+            result
+        );
+    }
 }

From b342ae8a15a990bfd17542a325497d4697e99103 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 16:19:10 +0100
Subject: [PATCH 29/99] full iterator over the entire typed dict

---
 src/structure/tfc/block.rs |  25 +++++--
 src/structure/tfc/dict.rs  |  82 ++++++++++++++++++++++-
 src/structure/tfc/typed.rs | 129 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 225 insertions(+), 11 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 184d7ed1..0d25e8c1 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
 
@@ -514,9 +515,18 @@ impl SizedDictBlock {
         IdLookupResult::Closest(self.header.num_entries as u64 - 1)
     }
 
-    pub fn iter<'a>(&'a self) -> SizedDictBlockIterator<'a> {
-        SizedDictBlockIterator {
-            header: &self.header,
+    pub fn iter<'a>(&'a self) -> SizedBlockIterator<'a> {
+        SizedBlockIterator {
+            header: Cow::Borrowed(&self.header),
+            data: self.data.clone(),
+            ix: 0,
+            last: None,
+        }
+    }
+
+    pub fn into_iter(self) -> OwnedSizedBlockIterator {
+        SizedBlockIterator {
+            header: Cow::Owned(self.header),
             data: self.data.clone(),
             ix: 0,
             last: None,
@@ -524,14 +534,17 @@ impl SizedDictBlock {
     }
 }
 
-pub struct SizedDictBlockIterator<'a> {
-    header: &'a SizedBlockHeader,
+type OwnedSizedBlockIterator = SizedBlockIterator<'static>;
+
+#[derive(Clone)]
+pub struct SizedBlockIterator<'a> {
+    header: Cow<'a, SizedBlockHeader>,
     data: Bytes,
     ix: usize,
     last: Option<Vec<Bytes>>,
 }
 
-impl<'a> Iterator for SizedDictBlockIterator<'a> {
+impl<'a> Iterator for SizedBlockIterator<'a> {
     type Item = SizedDictEntry;
 
     fn next(&mut self) -> Option<SizedDictEntry> {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 78626bff..046c8a35 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,4 +1,4 @@
-use std::cmp::Ordering;
+use std::{cmp::Ordering, borrow::Cow};
 
 use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
 use bytes::{BufMut, Bytes};
@@ -35,7 +35,7 @@ pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
     array_builder.finalize();
 }
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct SizedDict {
     offsets: MonotonicLogArray,
     pub(crate) data: Bytes,
@@ -146,7 +146,85 @@ impl SizedDict {
 
         result
     }
+
+    pub fn block_iter<'a>(&'a self) -> SizedDictBlockIterator<'a> {
+        SizedDictBlockIterator {
+            dict: Cow::Borrowed(self),
+            index: 0
+        }
+    }
+
+    pub fn into_block_iter(self) -> OwnedSizedDictBlockIterator {
+        SizedDictBlockIterator {
+            dict: Cow::Owned(self),
+            index: 0
+        }
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item=SizedDictEntry>+'a+Clone {
+        self.block_iter()
+            .flat_map(|b|b.into_iter())
+    }
+
+    pub fn into_iter(self) -> impl Iterator<Item=SizedDictEntry>+Clone {
+        self.into_block_iter()
+            .flat_map(|b|b.into_iter())
+    }
+}
+
+type OwnedSizedDictBlockIterator = SizedDictBlockIterator<'static>;
+
+#[derive(Clone)]
+pub struct SizedDictBlockIterator<'a> {
+    dict: Cow<'a, SizedDict>,
+    index: usize,
+}
+
+impl<'a> Iterator for SizedDictBlockIterator<'a> {
+    type Item = SizedDictBlock;
+
+    fn next(&mut self) -> Option<SizedDictBlock> {
+        if self.index >= self.dict.num_blocks() {
+            return None;
+        }
+
+        let block = self.dict.block(self.index);
+        self.index += 1;
+
+        Some(block)
+    }
+}
+
+/*
+pub struct SizedDictIterator<'a> {
+    dict: SizedDictBlockIterator<'a>,
+    block: Option<SizedBlockIterator<'a>>,
+}
+
+impl<'a> Iterator for SizedDictIterator<'a> {
+    type Item = SizedDictEntry;
+
+    fn next(&mut self) -> Option<SizedDictEntry> {
+        if let Some(entry) = self.block.as_ref().and_then(|b|b.next()) {
+            Some(entry)
+        }
+        else {
+            let next_block = self.dict.next();
+            if next_block.is_none() {
+                return None;
+            }
+            let next_block = next_block.unwrap();
+            let next_block_iter = next_block.iter();
+
+            let result = next_block_iter.next();
+
+            self.block = Some(next_block_iter);
+
+            result
+        }
+    }
 }
+*/
 
 #[cfg(test)]
 mod tests {
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index b05f31e0..9aa328df 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -7,16 +7,16 @@ use itertools::*;
 use num_derive::FromPrimitive;
 use num_traits::FromPrimitive;
 use rug::Integer;
-use std::marker::PhantomData;
+use std::{marker::PhantomData, borrow::Cow};
 
 use super::{
-    block::{IdLookupResult, SizedDictEntry},
+    block::{IdLookupResult, SizedDictEntry, SizedDictBlock},
     decimal::{decimal_to_storage, storage_to_decimal},
     dict::{build_dict_unchecked, build_offset_logarray, SizedDict},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct TypedDict {
     types_present: MonotonicLogArray,
     type_offsets: MonotonicLogArray,
@@ -164,8 +164,71 @@ impl TypedDict {
     pub fn num_entries(&self) -> usize {
         self.num_entries
     }
+
+    pub fn segment_iter<'a>(&'a self) -> DictSegmentIterator<'a> {
+        DictSegmentIterator {
+            dict: Cow::Borrowed(&self),
+            type_index: 0
+        }
+    }
+
+    pub fn into_segment_iter(self) -> OwnedDictSegmentIterator {
+        DictSegmentIterator {
+            dict: Cow::Owned(self),
+            type_index: 0
+        }
+    }
+
+    pub fn block_iter<'a>(&'a self) -> impl Iterator<Item=(Datatype, SizedDictBlock)>+'a+Clone {
+        self.segment_iter()
+            .flat_map(|(datatype, segment)| segment.into_block_iter()
+                .map(move |block| (datatype, block)))
+    }
+
+    pub fn into_block_iter(self) -> impl Iterator<Item=(Datatype, SizedDictBlock)>+Clone {
+        self.into_segment_iter()
+            .flat_map(|(datatype, segment)| segment.into_block_iter()
+                .map(move |block| (datatype, block)))
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item=(Datatype, SizedDictEntry)>+'a+Clone {
+        self.block_iter()
+            .flat_map(|(datatype, segment)| segment.into_iter()
+                .map(move |entry| (datatype, entry)))
+    }
+
+    pub fn into_iter(self) -> impl Iterator<Item=(Datatype, SizedDictEntry)>+Clone {
+        self.into_block_iter()
+            .flat_map(|(datatype, segment)| segment.into_iter()
+                .map(move |entry| (datatype, entry)))
+    }
+}
+
+type OwnedDictSegmentIterator = DictSegmentIterator<'static>;
+
+#[derive(Clone)]
+pub struct DictSegmentIterator<'a> {
+    dict: Cow<'a, TypedDict>,
+    type_index: usize,
 }
 
+impl<'a> Iterator for DictSegmentIterator<'a> {
+    type Item = (Datatype, SizedDict);
+
+    fn next(&mut self) -> Option<(Datatype, SizedDict)> {
+        if self.type_index >= self.dict.types_present.len() {
+            return None;
+        }
+
+        let (segment, _) = self.dict.inner_type_segment(self.type_index);
+        let datatype = self.dict.type_for_type_index(self.type_index);
+        self.type_index += 1;
+
+        Some((datatype, segment))
+    }
+}
+
+
 pub struct TypedDictSegment<T: TdbDataType> {
     dict: SizedDict,
     _x: PhantomData<T>,
@@ -774,4 +837,64 @@ mod tests {
             dict.id(&int("99999999999999999999999999"))
         );
     }
+
+    #[test]
+    fn iterate_full_blocks() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            "fdsa".to_string().make_entry(),
+            "a".to_string().make_entry(),
+            "bc".to_string().make_entry(),
+            "bcd".to_string().make_entry(),
+            "z".to_string().make_entry(),
+            "Batty".to_string().make_entry(),
+            "Batman".to_string().make_entry(),
+            "apple".to_string().make_entry(),
+            (-500_i32).make_entry(),
+            20_u32.make_entry(),
+            22_u32.make_entry(),
+            23_u32.make_entry(),
+            24_u32.make_entry(),
+            25_u32.make_entry(),
+            26_u32.make_entry(),
+            27_u32.make_entry(),
+            28_u32.make_entry(),
+            3000_u32.make_entry(),
+            (-3_i64).make_entry(),
+            Decimal("-12342343.2348973".to_string()).make_entry(),
+            Decimal("234.8973".to_string()).make_entry(),
+            Decimal("0.2348973".to_string()).make_entry(),
+            Decimal("23423423.8973".to_string()).make_entry(),
+            Decimal("3.3".to_string()).make_entry(),
+            Decimal("0.001".to_string()).make_entry(),
+            Decimal("-0.001".to_string()).make_entry(),
+            Decimal("2".to_string()).make_entry(),
+            Decimal("0".to_string()).make_entry(),
+            4.389832_f32.make_entry(),
+            23434.389832_f32.make_entry(),
+            int("239487329872343987").make_entry(),
+        ];
+        vec.sort();
+        let mut used_types = BytesMut::new();
+        let mut type_offsets = BytesMut::new();
+        let mut block_offsets = BytesMut::new();
+        let mut data = BytesMut::new();
+        build_multiple_segments(
+            &mut used_types,
+            &mut type_offsets,
+            &mut block_offsets,
+            &mut data,
+            vec.clone().into_iter(),
+        );
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+
+        let actual: Vec<_> = dict.iter().map(|(dt,e)|(dt, e.to_bytes())).collect();
+
+        assert_eq!(vec, actual);
+    }
 }

From 73cf21414539965140c1917bc9051611328a07ca Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 17:10:55 +0100
Subject: [PATCH 30/99] prereserve a vector with the right size on block
 iteration

---
 src/structure/tfc/block.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 0d25e8c1..b566de0c 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -571,8 +571,10 @@ impl<'a> Iterator for SizedBlockIterator<'a> {
 
             Some(SizedDictEntry::new(last.clone()))
         } else {
-            let result = vec![self.header.head.clone()];
-            self.last = Some(result.clone());
+            let mut last = Vec::with_capacity(BLOCK_SIZE);
+            last.push(self.header.head.clone());
+            let result = last.clone();
+            self.last = Some(last);
             Some(SizedDictEntry::new(result))
         }
     }

From 1185e233cb32eca2f7931d88189db5f906ee45cb Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 17:32:07 +0100
Subject: [PATCH 31/99] refactor entry buf code to reuse code smarter

---
 src/structure/tfc/block.rs | 139 ++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 79 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index b566de0c..5b35f58b 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -104,15 +104,15 @@ impl SizedDictEntry {
 
     pub fn as_buf(&self) -> SizedDictEntryBuf {
         SizedDictEntryBuf {
-            entry: self,
+            entry: Cow::Borrowed(self),
             slice_ix: 0,
             pos_in_slice: 0,
         }
     }
 
     pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
-        OwnedSizedDictEntryBuf {
-            entry: self,
+        SizedDictEntryBuf {
+            entry: Cow::Owned(self),
             slice_ix: 0,
             pos_in_slice: 0,
         }
@@ -279,100 +279,81 @@ impl PartialOrd for SizedDictEntry {
 
 #[derive(Clone)]
 pub struct SizedDictEntryBuf<'a> {
-    entry: &'a SizedDictEntry,
+    entry: Cow<'a, SizedDictEntry>,
     slice_ix: usize,
     pos_in_slice: usize,
 }
 
-fn calculate_remaining<'a>(entry: &SizedDictEntry, slice_ix: usize, pos_in_slice: usize) -> usize {
-    let total: usize = entry.0.iter().skip(slice_ix).map(|s| s.len()).sum();
-    total - pos_in_slice
-}
-
-fn calculate_chunk<'a>(entry: &'a SizedDictEntry, slice_ix: usize, pos_in_slice: usize) -> &[u8] {
-    if slice_ix >= entry.0.len() {
-        &[]
-    } else {
-        let slice = &entry.0[slice_ix];
-        &slice[pos_in_slice..]
-    }
-}
-
-fn calculate_advance<'a>(
-    entry: &'a SizedDictEntry,
-    slice_ix: &mut usize,
-    pos_in_slice: &mut usize,
-    mut cnt: usize,
-) {
-    if *slice_ix < entry.0.len() {
-        let slice = &entry.0[*slice_ix];
-        let remaining_in_slice = slice.len() - *pos_in_slice;
-
-        if remaining_in_slice > cnt {
-            // we remain in the slice we're at.
-            *pos_in_slice += cnt;
-        } else {
-            // we are starting at the next slice
-            cnt -= remaining_in_slice;
-            *slice_ix += 1;
-
-            loop {
-                if entry.0.len() >= *slice_ix {
-                    // past the end
-                    *pos_in_slice = 0;
-                    break;
-                }
-
-                let slice_len = entry.0[*slice_ix].len();
-
-                if cnt < slice_len {
-                    // this is our slice
-                    *pos_in_slice = cnt;
-                    break;
-                }
-
-                // not our slice, so advance to next
-                cnt -= entry.0.len();
-                *slice_ix += 1;
-            }
-        }
-    }
-}
-
 impl<'a> Buf for SizedDictEntryBuf<'a> {
     fn remaining(&self) -> usize {
-        calculate_remaining(self.entry, self.slice_ix, self.pos_in_slice)
+        {
+            let pos_in_slice = self.pos_in_slice;
+            let total: usize = self
+                .entry
+                .0
+                .iter()
+                .skip(self.slice_ix)
+                .map(|s| s.len())
+                .sum();
+            total - pos_in_slice
+        }
     }
 
     fn chunk(&self) -> &[u8] {
-        calculate_chunk(self.entry, self.slice_ix, self.pos_in_slice)
+        {
+            let pos_in_slice = self.pos_in_slice;
+            if self.slice_ix >= self.entry.0.len() {
+                &[]
+            } else {
+                let slice = &self.entry.0[self.slice_ix];
+                &slice[pos_in_slice..]
+            }
+        }
     }
 
     fn advance(&mut self, cnt: usize) {
-        calculate_advance(self.entry, &mut self.slice_ix, &mut self.pos_in_slice, cnt)
-    }
-}
-
-pub struct OwnedSizedDictEntryBuf {
-    entry: SizedDictEntry,
-    slice_ix: usize,
-    pos_in_slice: usize,
-}
+        {
+            let pos_in_slice: &mut usize = &mut self.pos_in_slice;
+            let mut cnt = cnt;
+            if self.slice_ix < self.entry.0.len() {
+                let slice = &self.entry.0[self.slice_ix];
+                let remaining_in_slice = slice.len() - *pos_in_slice;
+
+                if remaining_in_slice > cnt {
+                    // we remain in the slice we're at.
+                    *pos_in_slice += cnt;
+                } else {
+                    // we are starting at the next slice
+                    cnt -= remaining_in_slice;
+                    self.slice_ix += 1;
+
+                    loop {
+                        if self.entry.0.len() >= self.slice_ix {
+                            // past the end
+                            *pos_in_slice = 0;
+                            break;
+                        }
 
-impl Buf for OwnedSizedDictEntryBuf {
-    fn remaining(&self) -> usize {
-        calculate_remaining(&self.entry, self.slice_ix, self.pos_in_slice)
-    }
+                        let slice_len = self.entry.0[self.slice_ix].len();
 
-    fn chunk(&self) -> &[u8] {
-        calculate_chunk(&self.entry, self.slice_ix, self.pos_in_slice)
-    }
+                        if cnt < slice_len {
+                            // this is our slice
+                            *pos_in_slice = cnt;
+                            break;
+                        }
 
-    fn advance(&mut self, cnt: usize) {
-        calculate_advance(&self.entry, &mut self.slice_ix, &mut self.pos_in_slice, cnt)
+                        // not our slice, so advance to next
+                        cnt -= self.entry.0.len();
+                        self.slice_ix += 1;
+                    }
+                }
+            }
+        }
     }
 }
 
+type OwnedSizedDictEntryBuf = SizedDictEntryBuf<'static>;
+
 #[derive(Debug)]
 pub struct SizedDictBlock {
     header: SizedBlockHeader,

From e23b065c7f696c0c8c3e9dad7a853ccb8ce7c800 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Mon, 28 Nov 2022 17:32:16 +0100
Subject: [PATCH 32/99] reformat

---
 src/structure/tfc/dict.rs  | 16 +++++++-------
 src/structure/tfc/typed.rs | 43 ++++++++++++++++++++------------------
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 046c8a35..c47b65f9 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,4 +1,4 @@
-use std::{cmp::Ordering, borrow::Cow};
+use std::{borrow::Cow, cmp::Ordering};
 
 use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
 use bytes::{BufMut, Bytes};
@@ -150,25 +150,23 @@ impl SizedDict {
     pub fn block_iter<'a>(&'a self) -> SizedDictBlockIterator<'a> {
         SizedDictBlockIterator {
             dict: Cow::Borrowed(self),
-            index: 0
+            index: 0,
         }
     }
 
     pub fn into_block_iter(self) -> OwnedSizedDictBlockIterator {
         SizedDictBlockIterator {
             dict: Cow::Owned(self),
-            index: 0
+            index: 0,
         }
     }
 
-    pub fn iter<'a>(&'a self) -> impl Iterator<Item=SizedDictEntry>+'a+Clone {
-        self.block_iter()
-            .flat_map(|b|b.into_iter())
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = SizedDictEntry> + 'a + Clone {
+        self.block_iter().flat_map(|b| b.into_iter())
     }
 
-    pub fn into_iter(self) -> impl Iterator<Item=SizedDictEntry>+Clone {
-        self.into_block_iter()
-            .flat_map(|b|b.into_iter())
+    pub fn into_iter(self) -> impl Iterator<Item = SizedDictEntry> + Clone {
+        self.into_block_iter().flat_map(|b| b.into_iter())
     }
 }
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 9aa328df..e00f8a29 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -7,10 +7,10 @@ use itertools::*;
 use num_derive::FromPrimitive;
 use num_traits::FromPrimitive;
 use rug::Integer;
-use std::{marker::PhantomData, borrow::Cow};
+use std::{borrow::Cow, marker::PhantomData};
 
 use super::{
-    block::{IdLookupResult, SizedDictEntry, SizedDictBlock},
+    block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     decimal::{decimal_to_storage, storage_to_decimal},
     dict::{build_dict_unchecked, build_offset_logarray, SizedDict},
     integer::{bigint_to_storage, storage_to_bigint},
@@ -168,39 +168,43 @@ impl TypedDict {
     pub fn segment_iter<'a>(&'a self) -> DictSegmentIterator<'a> {
         DictSegmentIterator {
             dict: Cow::Borrowed(&self),
-            type_index: 0
+            type_index: 0,
         }
     }
 
     pub fn into_segment_iter(self) -> OwnedDictSegmentIterator {
         DictSegmentIterator {
             dict: Cow::Owned(self),
-            type_index: 0
+            type_index: 0,
         }
     }
 
-    pub fn block_iter<'a>(&'a self) -> impl Iterator<Item=(Datatype, SizedDictBlock)>+'a+Clone {
-        self.segment_iter()
-            .flat_map(|(datatype, segment)| segment.into_block_iter()
-                .map(move |block| (datatype, block)))
+    pub fn block_iter<'a>(
+        &'a self,
+    ) -> impl Iterator<Item = (Datatype, SizedDictBlock)> + 'a + Clone {
+        self.segment_iter().flat_map(|(datatype, segment)| {
+            segment
+                .into_block_iter()
+                .map(move |block| (datatype, block))
+        })
     }
 
-    pub fn into_block_iter(self) -> impl Iterator<Item=(Datatype, SizedDictBlock)>+Clone {
-        self.into_segment_iter()
-            .flat_map(|(datatype, segment)| segment.into_block_iter()
-                .map(move |block| (datatype, block)))
+    pub fn into_block_iter(self) -> impl Iterator<Item = (Datatype, SizedDictBlock)> + Clone {
+        self.into_segment_iter().flat_map(|(datatype, segment)| {
+            segment
+                .into_block_iter()
+                .map(move |block| (datatype, block))
+        })
     }
 
-    pub fn iter<'a>(&'a self) -> impl Iterator<Item=(Datatype, SizedDictEntry)>+'a+Clone {
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = (Datatype, SizedDictEntry)> + 'a + Clone {
         self.block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter()
-                .map(move |entry| (datatype, entry)))
+            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| (datatype, entry)))
     }
 
-    pub fn into_iter(self) -> impl Iterator<Item=(Datatype, SizedDictEntry)>+Clone {
+    pub fn into_iter(self) -> impl Iterator<Item = (Datatype, SizedDictEntry)> + Clone {
         self.into_block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter()
-                .map(move |entry| (datatype, entry)))
+            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| (datatype, entry)))
     }
 }
 
@@ -228,7 +232,6 @@ impl<'a> Iterator for DictSegmentIterator<'a> {
     }
 }
 
-
 pub struct TypedDictSegment<T: TdbDataType> {
     dict: SizedDict,
     _x: PhantomData<T>,
@@ -893,7 +896,7 @@ mod tests {
             data.freeze(),
         );
 
-        let actual: Vec<_> = dict.iter().map(|(dt,e)|(dt, e.to_bytes())).collect();
+        let actual: Vec<_> = dict.iter().map(|(dt, e)| (dt, e.to_bytes())).collect();
 
         assert_eq!(vec, actual);
     }

From 6411aa382c815b9086440b7eb82e557a1a3a64f8 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 29 Nov 2022 14:23:59 +0100
Subject: [PATCH 33/99] work

---
 src/layer/id_map.rs          | 57 +++++++++++++++++++++-------------
 src/layer/internal/base.rs   |  6 ++--
 src/layer/internal/child.rs  |  6 ++--
 src/layer/internal/mod.rs    |  6 ++--
 src/storage/cache.rs         |  8 ++---
 src/storage/consts.rs        | 10 ++++--
 src/storage/delta.rs         | 12 ++++----
 src/storage/file.rs          | 32 +++++++++++++++++++
 src/storage/layer.rs         | 34 ++++++++++++--------
 src/structure/mod.rs         |  1 +
 src/structure/tfc/block.rs   |  2 +-
 src/structure/tfc/decimal.rs |  2 --
 src/structure/tfc/dict.rs    |  7 +++++
 src/structure/tfc/file.rs    | 60 ++++++++++++++++++++++++++++++++++++
 src/structure/tfc/mod.rs     |  4 +++
 src/structure/tfc/typed.rs   | 15 +++++++++
 16 files changed, 203 insertions(+), 59 deletions(-)
 create mode 100644 src/structure/tfc/file.rs

diff --git a/src/layer/id_map.rs b/src/layer/id_map.rs
index 43994103..345e5f19 100644
--- a/src/layer/id_map.rs
+++ b/src/layer/id_map.rs
@@ -75,9 +75,9 @@ pub async fn memory_construct_idmaps_upto<F: 'static + FileLoad + FileStore>(
 }
 
 pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>(
-    node_dicts: &[PfcDict],
-    predicate_dicts: &[PfcDict],
-    value_dicts: &[PfcDict],
+    node_dicts: Vec<StringDict>,
+    predicate_dicts: Vec<StringDict>,
+    value_dicts: Vec<TypedDict>,
     node_value_idmaps: &[IdMap],
     predicate_idmaps: &[IdMap],
     idmap_files: IdMapFiles<F>,
@@ -86,49 +86,54 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
     debug_assert!(node_dicts.len() == value_dicts.len());
     debug_assert!(node_dicts.len() == node_value_idmaps.len());
     debug_assert!(node_dicts.len() == predicate_idmaps.len());
+    let len = node_dicts.len();
 
-    let mut node_iters = Vec::with_capacity(node_dicts.len());
+    let mut node_iters = Vec::with_capacity(len);
     let mut node_offset = 0;
-    for (ix, dict) in node_dicts.iter().enumerate() {
+    let node_entries_len: Vec<_> = node_dicts.iter().map(|d|d.num_entries()).collect();
+    for (ix, dict) in node_dicts.into_iter().enumerate() {
         let idmap = node_value_idmaps[ix].clone();
+        let num_entries = dict.num_entries();
         node_iters.push(
-            dict.entries()
+            dict.into_iter()
                 .enumerate()
                 .map(move |(i, e)| (idmap.inner_to_outer(i as u64) + node_offset as u64, e)),
         );
 
-        node_offset += dict.len() + value_dicts[ix].len();
+        node_offset += num_entries + value_dicts[ix].num_entries();
     }
 
-    let mut value_iters = Vec::with_capacity(node_dicts.len());
+    let mut value_iters = Vec::with_capacity(len);
     let mut value_offset = 0;
-    for (ix, dict) in value_dicts.iter().enumerate() {
+    for (ix, dict) in value_dicts.into_iter().enumerate() {
         let idmap = node_value_idmaps[ix].clone();
-        let node_count = node_dicts[ix].len();
-        value_iters.push(dict.entries().enumerate().map(move |(i, e)| {
+        let node_count = node_entries_len[ix];
+        let num_entries = dict.num_entries();
+        value_iters.push(dict.into_iter().enumerate().map(move |(i, e)| {
             (
                 idmap.inner_to_outer(i as u64 + node_count as u64) + value_offset as u64,
                 e,
             )
         }));
 
-        value_offset += node_count + dict.len();
+        value_offset += node_count + num_entries;
     }
 
-    let mut predicate_iters = Vec::with_capacity(node_dicts.len());
+    let mut predicate_iters = Vec::with_capacity(len);
     let mut predicate_offset = 0;
-    for (ix, dict) in predicate_dicts.iter().enumerate() {
+    for (ix, dict) in predicate_dicts.into_iter().enumerate() {
         let idmap = predicate_idmaps[ix].clone();
+        let num_entries = dict.num_entries();
         predicate_iters.push(
-            dict.entries()
+            dict.into_iter()
                 .enumerate()
                 .map(move |(i, e)| (idmap.inner_to_outer(i as u64) + predicate_offset as u64, e)),
         );
 
-        predicate_offset += dict.len();
+        predicate_offset += num_entries;
     }
 
-    let entry_comparator = |vals: &[Option<&(u64, PfcDictEntry)>]| {
+    let entry_comparator = |vals: &[Option<&(u64, SizedDictEntry)>]| {
         vals.iter()
             .enumerate()
             .filter(|(_, x)| x.is_some())
@@ -136,8 +141,16 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
             .map(|x| x.0)
     };
 
-    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator);
-    let sorted_value_iter = sorted_iterator(value_iters, entry_comparator);
+    let typed_entry_comparator = |vals: &[Option<&(u64, (Datatype, SizedDictEntry))>]| {
+        vals.iter()
+            .enumerate()
+            .filter(|(_, x)| x.is_some())
+            .min_by(|(_, x), (_, y)| x.unwrap().1.cmp(&y.unwrap().1))
+            .map(|x| x.0)
+    };
+
+    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator).map(|(i,s)|(i, (Datatype::String, s)));
+    let sorted_value_iter = sorted_iterator(value_iters, typed_entry_comparator);
     let sorted_node_value_iter = sorted_node_iter.chain(sorted_value_iter).map(|(id, _)| id);
     let sorted_predicate_iter =
         sorted_iterator(predicate_iters, entry_comparator).map(|(id, _)| id);
@@ -193,9 +206,9 @@ async fn construct_idmaps_from_layers<F: 'static + FileLoad + FileStore>(
         .collect();
 
     construct_idmaps_from_structures(
-        &node_dicts,
-        &predicate_dicts,
-        &value_dicts,
+        node_dicts,
+        predicate_dicts,
+        value_dicts,
         &node_value_idmaps,
         &predicate_idmaps,
         idmap_files,
diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 1c37c107..1a18df01 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -22,9 +22,9 @@ use std::pin::Pin;
 #[derive(Clone)]
 pub struct BaseLayer {
     pub(super) name: [u32; 5],
-    pub(super) node_dictionary: PfcDict,
-    pub(super) predicate_dictionary: PfcDict,
-    pub(super) value_dictionary: PfcDict,
+    pub(super) node_dictionary: StringDict,
+    pub(super) predicate_dictionary: StringDict,
+    pub(super) value_dictionary: TypedDict,
 
     pub(super) node_value_idmap: IdMap,
     pub(super) predicate_idmap: IdMap,
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index 4c911747..c688e66b 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -25,9 +25,9 @@ pub struct ChildLayer {
     pub(super) name: [u32; 5],
     pub(super) parent: Arc<InternalLayer>,
 
-    pub(super) node_dictionary: PfcDict,
-    pub(super) predicate_dictionary: PfcDict,
-    pub(super) value_dictionary: PfcDict,
+    pub(super) node_dictionary: StringDict,
+    pub(super) predicate_dictionary: StringDict,
+    pub(super) value_dictionary: TypedDict,
 
     pub(super) node_value_idmap: IdMap,
     pub(super) predicate_idmap: IdMap,
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 246af53f..169f6462 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -64,7 +64,7 @@ impl InternalLayer {
         count
     }
 
-    pub fn node_dictionary(&self) -> &PfcDict {
+    pub fn node_dictionary(&self) -> &StringDict {
         match self {
             Base(base) => &base.node_dictionary,
             Child(child) => &child.node_dictionary,
@@ -72,7 +72,7 @@ impl InternalLayer {
         }
     }
 
-    pub fn predicate_dictionary(&self) -> &PfcDict {
+    pub fn predicate_dictionary(&self) -> &StringDict {
         match self {
             Base(base) => &base.predicate_dictionary,
             Child(child) => &child.predicate_dictionary,
@@ -80,7 +80,7 @@ impl InternalLayer {
         }
     }
 
-    pub fn value_dictionary(&self) -> &PfcDict {
+    pub fn value_dictionary(&self) -> &TypedDict {
         match self {
             Base(base) => &base.value_dictionary,
             Child(child) => &child.value_dictionary,
diff --git a/src/storage/cache.rs b/src/storage/cache.rs
index 01f96db8..2d00302b 100644
--- a/src/storage/cache.rs
+++ b/src/storage/cache.rs
@@ -1,6 +1,6 @@
 use super::layer::*;
 use crate::layer::*;
-use crate::structure::PfcDict;
+use crate::structure::{StringDict, TypedDict};
 use async_trait::async_trait;
 use std::collections::HashMap;
 use std::io;
@@ -135,7 +135,7 @@ impl LayerStore for CachedLayerStore {
         }
     }
 
-    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>> {
         // is layer in cache? if so, we can use the cached version
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
@@ -147,7 +147,7 @@ impl LayerStore for CachedLayerStore {
         self.inner.get_node_dictionary(name).await
     }
 
-    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>> {
         // is layer in cache? if so, we can use the cached version
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
@@ -159,7 +159,7 @@ impl LayerStore for CachedLayerStore {
         self.inner.get_predicate_dictionary(name).await
     }
 
-    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<TypedDict>> {
         // is layer in cache? if so, we can use the cached version
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
diff --git a/src/storage/consts.rs b/src/storage/consts.rs
index 61e1da41..de296ebb 100644
--- a/src/storage/consts.rs
+++ b/src/storage/consts.rs
@@ -5,6 +5,8 @@ pub struct Filenames {
     pub predicate_dictionary_blocks: &'static str,
     pub predicate_dictionary_offsets: &'static str,
 
+    pub value_dictionary_types_present: &'static str,
+    pub value_dictionary_type_offsets: &'static str,
     pub value_dictionary_blocks: &'static str,
     pub value_dictionary_offsets: &'static str,
 
@@ -87,13 +89,15 @@ pub struct Filenames {
 }
 
 pub const FILENAMES: Filenames = Filenames {
-    node_dictionary_blocks: "node_dictionary_blocks.pfc",
+    node_dictionary_blocks: "node_dictionary_blocks.tfc",
     node_dictionary_offsets: "node_dictionary_offsets.logarray",
 
-    predicate_dictionary_blocks: "predicate_dictionary_blocks.pfc",
+    predicate_dictionary_blocks: "predicate_dictionary_blocks.tfc",
     predicate_dictionary_offsets: "predicate_dictionary_offsets.logarray",
 
-    value_dictionary_blocks: "value_dictionary_blocks.pfc",
+    value_dictionary_types_present: "value_dictionary_types.logarray",
+    value_dictionary_type_offsets: "value_dictionary_type_offsets.logarray",
+    value_dictionary_blocks: "value_dictionary_blocks.tfc",
     value_dictionary_offsets: "value_dictionary_offsets.logarray",
 
     node_value_idmap_bits: "node_value_idmap_bits.bitarray",
diff --git a/src/storage/delta.rs b/src/storage/delta.rs
index 87bf5f14..ad6e49ae 100644
--- a/src/storage/delta.rs
+++ b/src/storage/delta.rs
@@ -48,7 +48,7 @@ async fn get_node_dicts_from_disk<S: LayerStore>(
     store: &S,
     name: [u32; 5],
     upto: [u32; 5],
-) -> io::Result<Vec<PfcDict>> {
+) -> io::Result<Vec<StringDict>> {
     let mut result = Vec::new();
     walk_backwards_from_disk_upto!(store, name, upto, current, {
         let dict = store
@@ -67,7 +67,7 @@ async fn get_predicate_dicts_from_disk<S: LayerStore>(
     store: &S,
     name: [u32; 5],
     upto: [u32; 5],
-) -> io::Result<Vec<PfcDict>> {
+) -> io::Result<Vec<StringDict>> {
     let mut result = Vec::new();
     walk_backwards_from_disk_upto!(store, name, upto, current, {
         let dict = store
@@ -86,7 +86,7 @@ async fn get_value_dicts_from_disk<S: LayerStore>(
     store: &S,
     name: [u32; 5],
     upto: [u32; 5],
-) -> io::Result<Vec<PfcDict>> {
+) -> io::Result<Vec<TypedDict>> {
     let mut result = Vec::new();
     walk_backwards_from_disk_upto!(store, name, upto, current, {
         let dict = store
@@ -208,9 +208,9 @@ async fn dictionary_rollup_upto<S: LayerStore, F: 'static + FileLoad + FileStore
     merge_dictionaries(value_dicts.iter(), files.value_dictionary_files.clone()).await?;
 
     construct_idmaps_from_structures(
-        &node_dicts,
-        &predicate_dicts,
-        &value_dicts,
+        node_dicts,
+        predicate_dicts,
+        value_dicts,
         &node_value_idmaps,
         &predicate_idmaps,
         files.id_map_files.clone(),
diff --git a/src/storage/file.rs b/src/storage/file.rs
index a9c217ae..dd077cd6 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -260,6 +260,38 @@ impl<F: FileLoad + FileStore + Clone> ChildLayerFiles<F> {
     }
 }
 
+#[derive(Clone)]
+pub struct TypedDictionaryMaps {
+    pub types_present_map: Bytes,
+    pub type_offsets_map: Bytes,
+    pub blocks_map: Bytes,
+    pub offsets_map: Bytes,
+}
+
+#[derive(Clone)]
+pub struct TypedDictionaryFiles<F: 'static + FileLoad + FileStore> {
+    pub types_present_file: F,
+    pub type_offsets_file: F,
+    pub blocks_file: F,
+    pub offsets_file: F,
+}
+
+impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
+    pub async fn map_all(&self) -> io::Result<TypedDictionaryMaps> {
+        let types_present_map = self.types_present_file.map().await?;
+        let type_offsets_map = self.type_offsets_file.map().await?;
+        let blocks_map = self.blocks_file.map().await?;
+        let offsets_map = self.offsets_file.map().await?;
+
+        Ok(TypedDictionaryMaps {
+            types_present_map,
+            type_offsets_map,
+            blocks_map,
+            offsets_map,
+        })
+    }
+}
+
 #[derive(Clone)]
 pub struct DictionaryMaps {
     pub blocks_map: Bytes,
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index db6806b7..bd17c109 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -10,10 +10,12 @@ use crate::layer::{
     OptInternalLayerTriplePredicateIterator, OptInternalLayerTripleSubjectIterator, RollupLayer,
     SimpleLayerBuilder,
 };
+use crate::structure::StringDict;
+use crate::structure::TypedDict;
 use crate::structure::bitarray::bitarray_len_from_file;
 use crate::structure::logarray::logarray_file_get_length_and_width;
 use crate::structure::{
-    dict_file_get_count, util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, PfcDict,
+    dict_file_get_count, util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray,
     WaveletTree,
 };
 
@@ -75,11 +77,11 @@ pub trait LayerStore: 'static + Packable + Send + Sync {
 
     async fn get_layer_parent_name(&self, name: [u32; 5]) -> io::Result<Option<[u32; 5]>>;
 
-    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>>;
+    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>>;
 
-    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>>;
+    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>>;
 
-    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>>;
+    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<TypedDict>>;
 
     async fn get_node_count(&self, name: [u32; 5]) -> io::Result<Option<u64>>;
 
@@ -738,9 +740,15 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
     async fn value_dictionary_files(
         &self,
         layer: [u32; 5],
-    ) -> io::Result<DictionaryFiles<Self::File>> {
+    ) -> io::Result<TypedDictionaryFiles<Self::File>> {
         // does layer exist?
         if self.directory_exists(layer).await? {
+            let types_present_file = self
+                .get_file(layer, FILENAMES.value_dictionary_types_present)
+                .await?;
+            let type_offsets_file = self
+                .get_file(layer, FILENAMES.value_dictionary_type_offsets)
+                .await?;
             let blocks_file = self
                 .get_file(layer, FILENAMES.value_dictionary_blocks)
                 .await?;
@@ -748,7 +756,9 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
                 .get_file(layer, FILENAMES.value_dictionary_offsets)
                 .await?;
 
-            Ok(DictionaryFiles {
+            Ok(TypedDictionaryFiles {
+                types_present_file,
+                type_offsets_file,
                 blocks_file,
                 offsets_file,
             })
@@ -1546,34 +1556,34 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
         }
     }
 
-    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_node_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>> {
         if self.directory_exists(name).await? {
             let files = self.node_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(PfcDict::parse(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map)?))
         } else {
             Ok(None)
         }
     }
 
-    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_predicate_dictionary(&self, name: [u32; 5]) -> io::Result<Option<StringDict>> {
         if self.directory_exists(name).await? {
             let files = self.predicate_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(PfcDict::parse(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map)?))
         } else {
             Ok(None)
         }
     }
 
-    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<PfcDict>> {
+    async fn get_value_dictionary(&self, name: [u32; 5]) -> io::Result<Option<TypedDict>> {
         if self.directory_exists(name).await? {
             let files = self.value_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(PfcDict::parse(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(TypedDict::from_parts(maps.blocks_map, maps.offsets_map)?))
         } else {
             Ok(None)
         }
diff --git a/src/structure/mod.rs b/src/structure/mod.rs
index 78906419..b4ada408 100644
--- a/src/structure/mod.rs
+++ b/src/structure/mod.rs
@@ -19,4 +19,5 @@ pub use bitarray::*;
 pub use bitindex::*;
 pub use logarray::*;
 pub use pfc::*;
+pub use tfc::*;
 pub use wavelettree::*;
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 5b35f58b..fbc63d9d 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -352,7 +352,7 @@ impl<'a> Buf for SizedDictEntryBuf<'a> {
     }
 }
 
-type OwnedSizedDictEntryBuf = SizedDictEntryBuf<'static>;
+pub type OwnedSizedDictEntryBuf = SizedDictEntryBuf<'static>;
 
 #[derive(Debug)]
 pub struct SizedDictBlock {
diff --git a/src/structure/tfc/decimal.rs b/src/structure/tfc/decimal.rs
index 7fa0143d..b7acaf6c 100644
--- a/src/structure/tfc/decimal.rs
+++ b/src/structure/tfc/decimal.rs
@@ -1,8 +1,6 @@
 use bytes::Buf;
 use rug::Integer;
 
-use crate::structure::tfc::integer;
-
 use super::integer::{bigint_to_storage, storage_to_bigint_and_sign, NEGATIVE_ZERO};
 
 fn encode_fraction(fraction: Option<&str>) -> Vec<u8> {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index c47b65f9..301c393b 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -168,6 +168,13 @@ impl SizedDict {
     pub fn into_iter(self) -> impl Iterator<Item = SizedDictEntry> + Clone {
         self.into_block_iter().flat_map(|b| b.into_iter())
     }
+
+    pub fn num_entries(&self) -> usize {
+        let num_blocks = self.num_blocks();
+        let last_block_size = self.block_num_elements(num_blocks - 1);
+
+        (num_blocks-1) * BLOCK_SIZE + last_block_size as usize
+    }
 }
 
 type OwnedSizedDictBlockIterator = SizedDictBlockIterator<'static>;
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
new file mode 100644
index 00000000..15a75c2d
--- /dev/null
+++ b/src/structure/tfc/file.rs
@@ -0,0 +1,60 @@
+use bytes::BytesMut;
+
+use crate::{storage::*, structure::util::sorted_iterator};
+
+use super::{*, dict::{build_dict_unchecked, build_offset_logarray}};
+
+pub struct StringDictFileBuilder<W:SyncableFile> {
+    /// the file that this builder writes the pfc blocks to
+    blocks_file: W,
+    /// the file that this builder writes the block offsets to
+    block_offsets_file: W,
+
+    strings: Vec<SizedDictEntry>,
+}
+
+impl<W:SyncableFile> StringDictFileBuilder<W> {
+    pub fn new(blocks_file: W, block_offsets_file: W) -> Self {
+        Self {
+            blocks_file,
+            block_offsets_file,
+            strings: Vec::new()
+        }
+    }
+}
+
+pub async fn merge_string_dictionaries<
+    'a,
+    F: 'static + FileLoad + FileStore,
+    I: Iterator<Item = &'a StringDict>,
+>(
+    dictionaries: I,
+    dict_files: DictionaryFiles<F>,
+) -> io::Result<()> {
+    let iterators: Vec<_> = dictionaries.map(|d| d.iter()).collect();
+
+    let pick_fn = |vals: &[Option<&SizedDictEntry>]| {
+        vals.iter()
+            .enumerate()
+            .filter(|(_, v)| v.is_some())
+            .min_by(|(_, x), (_, y)| x.cmp(y))
+            .map(|(ix, _)| ix)
+    };
+
+    let sorted_iterator = sorted_iterator(iterators, pick_fn);
+
+    let blocks_file_writer = dict_files.blocks_file.open_write().await?;
+    let offsets_file_writer = dict_files.offsets_file.open_write().await?;
+
+    let mut offsets = Vec::new();
+    let mut offsets_buf = BytesMut::new();
+    let mut data_buf = BytesMut::new();
+    build_dict_unchecked(0, &mut offsets, &mut data_buf, sorted_iterator);
+    build_offset_logarray(&mut offsets_buf, offsets);
+
+
+
+
+    builder.add_all_entries(sorted_iterator).await?;
+    builder.finalize().await
+}
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index 4b0b89cd..d6297508 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -3,3 +3,7 @@ pub mod decimal;
 pub mod dict;
 pub mod integer;
 pub mod typed;
+pub mod file;
+
+pub use typed::*;
+pub use block::{SizedDictEntry, SizedDictEntryBuf, OwnedSizedDictEntryBuf};
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index e00f8a29..8f87bce1 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -232,6 +232,7 @@ impl<'a> Iterator for DictSegmentIterator<'a> {
     }
 }
 
+#[derive(Clone)]
 pub struct TypedDictSegment<T: TdbDataType> {
     dict: SizedDict,
     _x: PhantomData<T>,
@@ -255,8 +256,22 @@ impl<T: TdbDataType> TypedDictSegment<T> {
         let slice = val.to_lexical();
         self.dict.id(&slice[..])
     }
+
+    pub fn num_entries(&self) -> usize {
+        self.dict.num_entries()
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item=SizedDictEntry>+'a+Clone {
+        self.dict.iter()
+    }
+
+    pub fn into_iter(self) -> impl Iterator<Item=SizedDictEntry>+Clone {
+        self.dict.into_iter()
+    }
 }
 
+pub type StringDict = TypedDictSegment<String>;
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
 pub enum Datatype {
     String = 0,

From e25f46b09a936065071199a0d1a3a649954a4e39 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 29 Nov 2022 15:49:57 +0100
Subject: [PATCH 34/99] more work

---
 src/storage/delta.rs      | 20 ++++++-----
 src/storage/file.rs       |  8 ++---
 src/structure/tfc/file.rs | 72 ++++++++++++++++++++++++++++++++++-----
 src/structure/util.rs     | 12 ++++---
 4 files changed, 86 insertions(+), 26 deletions(-)

diff --git a/src/storage/delta.rs b/src/storage/delta.rs
index ad6e49ae..e93e80e1 100644
--- a/src/storage/delta.rs
+++ b/src/storage/delta.rs
@@ -1,5 +1,7 @@
 use std::io;
 
+use tfc::file::{merge_string_dictionaries, merge_typed_dictionaries};
+
 use crate::layer::builder::{build_indexes, TripleFileBuilder};
 use crate::layer::*;
 use crate::storage::*;
@@ -199,13 +201,13 @@ async fn dictionary_rollup_upto<S: LayerStore, F: 'static + FileLoad + FileStore
         )
         .collect();
 
-    merge_dictionaries(node_dicts.iter(), files.node_dictionary_files.clone()).await?;
-    merge_dictionaries(
+    merge_string_dictionaries(node_dicts.iter(), files.node_dictionary_files.clone()).await?;
+    merge_string_dictionaries(
         predicate_dicts.iter(),
         files.predicate_dictionary_files.clone(),
     )
     .await?;
-    merge_dictionaries(value_dicts.iter(), files.value_dictionary_files.clone()).await?;
+    merge_typed_dictionaries(&value_dicts, files.value_dictionary_files.clone()).await?;
 
     construct_idmaps_from_structures(
         node_dicts,
@@ -235,9 +237,9 @@ pub async fn dictionary_rollup<F: 'static + FileLoad + FileStore>(
         .into_iter()
         .map(|l| l.value_dictionary());
 
-    merge_dictionaries(node_dicts, files.node_dictionary_files.clone()).await?;
-    merge_dictionaries(predicate_dicts, files.predicate_dictionary_files.clone()).await?;
-    merge_dictionaries(value_dicts, files.value_dictionary_files.clone()).await?;
+    merge_string_dictionaries(node_dicts, files.node_dictionary_files.clone()).await?;
+    merge_string_dictionaries(predicate_dicts, files.predicate_dictionary_files.clone()).await?;
+    merge_typed_dictionaries(value_dicts, files.value_dictionary_files.clone()).await?;
 
     memory_construct_idmaps(layer, files.id_map_files.clone()).await
 }
@@ -260,9 +262,9 @@ async fn memory_dictionary_rollup_upto<F: 'static + FileLoad + FileStore>(
         .into_iter()
         .map(|l| l.value_dictionary());
 
-    merge_dictionaries(node_dicts, files.node_dictionary_files.clone()).await?;
-    merge_dictionaries(predicate_dicts, files.predicate_dictionary_files.clone()).await?;
-    merge_dictionaries(value_dicts, files.value_dictionary_files.clone()).await?;
+    merge_string_dictionaries(node_dicts, files.node_dictionary_files.clone()).await?;
+    merge_string_dictionaries(predicate_dicts, files.predicate_dictionary_files.clone()).await?;
+    merge_typed_dictionaries(value_dicts, files.value_dictionary_files.clone()).await?;
 
     memory_construct_idmaps_upto(layer, upto, files.id_map_files.clone()).await
 }
diff --git a/src/storage/file.rs b/src/storage/file.rs
index dd077cd6..6b04a923 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -91,7 +91,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerFiles<F> {
 pub struct BaseLayerFiles<F: 'static + FileLoad + FileStore> {
     pub node_dictionary_files: DictionaryFiles<F>,
     pub predicate_dictionary_files: DictionaryFiles<F>,
-    pub value_dictionary_files: DictionaryFiles<F>,
+    pub value_dictionary_files: TypedDictionaryFiles<F>,
 
     pub id_map_files: IdMapFiles<F>,
 
@@ -110,7 +110,7 @@ pub struct BaseLayerFiles<F: 'static + FileLoad + FileStore> {
 pub struct BaseLayerMaps {
     pub node_dictionary_maps: DictionaryMaps,
     pub predicate_dictionary_maps: DictionaryMaps,
-    pub value_dictionary_maps: DictionaryMaps,
+    pub value_dictionary_maps: TypedDictionaryMaps,
 
     pub id_map_maps: IdMapMaps,
 
@@ -165,7 +165,7 @@ impl<F: FileLoad + FileStore> BaseLayerFiles<F> {
 pub struct ChildLayerFiles<F: 'static + FileLoad + FileStore + Clone + Send + Sync> {
     pub node_dictionary_files: DictionaryFiles<F>,
     pub predicate_dictionary_files: DictionaryFiles<F>,
-    pub value_dictionary_files: DictionaryFiles<F>,
+    pub value_dictionary_files: TypedDictionaryFiles<F>,
 
     pub id_map_files: IdMapFiles<F>,
 
@@ -189,7 +189,7 @@ pub struct ChildLayerFiles<F: 'static + FileLoad + FileStore + Clone + Send + Sy
 pub struct ChildLayerMaps {
     pub node_dictionary_maps: DictionaryMaps,
     pub predicate_dictionary_maps: DictionaryMaps,
-    pub value_dictionary_maps: DictionaryMaps,
+    pub value_dictionary_maps: TypedDictionaryMaps,
 
     pub id_map_maps: IdMapMaps,
 
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index 15a75c2d..e9b206cd 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -1,4 +1,6 @@
 use bytes::BytesMut;
+use tokio::io::AsyncWriteExt;
+use std::io;
 
 use crate::{storage::*, structure::util::sorted_iterator};
 
@@ -24,14 +26,14 @@ impl<W:SyncableFile> StringDictFileBuilder<W> {
 }
 
 pub async fn merge_string_dictionaries<
-    'a,
+        'a,
     F: 'static + FileLoad + FileStore,
-    I: Iterator<Item = &'a StringDict>,
+    I: Iterator<Item = &'a StringDict>+'a,
 >(
     dictionaries: I,
     dict_files: DictionaryFiles<F>,
 ) -> io::Result<()> {
-    let iterators: Vec<_> = dictionaries.map(|d| d.iter()).collect();
+    let iterators: Vec<_> = dictionaries.map(|d|d.iter()).collect();
 
     let pick_fn = |vals: &[Option<&SizedDictEntry>]| {
         vals.iter()
@@ -41,10 +43,10 @@ pub async fn merge_string_dictionaries<
             .map(|(ix, _)| ix)
     };
 
-    let sorted_iterator = sorted_iterator(iterators, pick_fn);
+    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|elt|elt.to_bytes());
 
-    let blocks_file_writer = dict_files.blocks_file.open_write().await?;
-    let offsets_file_writer = dict_files.offsets_file.open_write().await?;
+    let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
+    let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
 
     let mut offsets = Vec::new();
     let mut offsets_buf = BytesMut::new();
@@ -53,8 +55,62 @@ pub async fn merge_string_dictionaries<
     build_offset_logarray(&mut offsets_buf, offsets);
 
 
+    blocks_file_writer.write_all(data_buf.as_ref()).await?;
+    blocks_file_writer.flush().await?;
+    blocks_file_writer.sync_all().await?;
+    offsets_file_writer.write_all(offsets_buf.as_ref()).await?;
+    offsets_file_writer.flush().await?;
+    offsets_file_writer.sync_all().await?;
 
+    Ok(())
+}
+
+pub async fn merge_typed_dictionaries<
+        'a,
+    F: 'static + FileLoad + FileStore,
+    I: Iterator<Item = &'a TypedDict>+'a,
+>(
+    dictionaries: I,
+    dict_files: TypedDictionaryFiles<F>,
+) -> io::Result<()> {
+    let iterators: Vec<_> = dictionaries.map(|d|d.iter()).collect();
+
+    let pick_fn = |vals: &[Option<&(Datatype, SizedDictEntry)>]| {
+        vals.iter()
+            .enumerate()
+            .filter(|(_, v)| v.is_some())
+            .min_by(|(_, x), (_, y)| x.cmp(y))
+            .map(|(ix, _)| ix)
+    };
+
+    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|(dt, elt)|(dt,elt.to_bytes()));
+
+    let mut types_present_file_writer = dict_files.types_present_file.open_write().await?;
+    let mut type_offsets_file_writer = dict_files.type_offsets_file.open_write().await?;
+    let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
+    let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
+
+    let mut types_present_buf = BytesMut::new();
+    let mut type_offsets_buf = BytesMut::new();
+    let mut offsets_buf = BytesMut::new();
+    let mut data_buf = BytesMut::new();
+    build_multiple_segments(&mut types_present_buf, &mut type_offsets_buf, &mut offsets_buf, &mut data_buf, sorted_iterator);
+
+    types_present_file_writer.write_all(types_present_buf.as_ref()).await?;
+    types_present_file_writer.flush().await?;
+    types_present_file_writer.sync_all().await?;
+
+    type_offsets_file_writer.write_all(type_offsets_buf.as_ref()).await?;
+    type_offsets_file_writer.flush().await?;
+    type_offsets_file_writer.sync_all().await?;
+
+    blocks_file_writer.write_all(data_buf.as_ref()).await?;
+    blocks_file_writer.flush().await?;
+    blocks_file_writer.sync_all().await?;
+
+    offsets_file_writer.write_all(offsets_buf.as_ref()).await?;
+    offsets_file_writer.flush().await?;
+    offsets_file_writer.sync_all().await?;
 
-    builder.add_all_entries(sorted_iterator).await?;
-    builder.finalize().await
+    Ok(())
 }
diff --git a/src/structure/util.rs b/src/structure/util.rs
index 6bb0f721..d47f86d9 100644
--- a/src/structure/util.rs
+++ b/src/structure/util.rs
@@ -123,7 +123,7 @@ pub fn sorted_stream<
 
 struct SortedIterator<
     T,
-    I: 'static + Iterator<Item = T> + Send,
+    I: Iterator<Item = T> + Send,
     F: 'static + Fn(&[Option<&T>]) -> Option<usize>,
 > {
     iters: Vec<std::iter::Peekable<I>>,
@@ -131,8 +131,9 @@ struct SortedIterator<
 }
 
 impl<
+        'a,
         T,
-        I: 'static + Iterator<Item = T> + Send,
+        I: 'a + Iterator<Item = T> + Send,
         F: 'static + Fn(&[Option<&T>]) -> Option<usize>,
     > Iterator for SortedIterator<T, I, F>
 {
@@ -154,13 +155,14 @@ impl<
 }
 
 pub fn sorted_iterator<
-    T,
-    I: 'static + Iterator<Item = T> + Send,
+        'a,
+    T: 'a,
+    I: 'a + Iterator<Item = T> + Send,
     F: 'static + Fn(&[Option<&T>]) -> Option<usize>,
 >(
     iters: Vec<I>,
     pick_fn: F,
-) -> impl Iterator<Item = T> {
+) -> impl Iterator<Item = T>+'a {
     let peekable_iters = iters
         .into_iter()
         .map(std::iter::Iterator::peekable)

From 1982937408b3237a3d65215fdd50db2c7a5cb7f7 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 29 Nov 2022 16:31:53 +0100
Subject: [PATCH 35/99] some builder logic around sizeddict

---
 src/structure/logarray.rs |  47 ++++++++++++++++-
 src/structure/tfc/dict.rs | 103 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 895999fa..2fa7479e 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -49,7 +49,7 @@
 //!
 //! * length: the number of elements in the log array
 
-use super::util;
+use super::util::{self, calculate_width};
 use crate::storage::*;
 use byteorder::{BigEndian, ByteOrder};
 use bytes::{BufMut, Bytes, BytesMut};
@@ -417,6 +417,51 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
     }
 }
 
+pub struct LateLogArrayBufBuilder<'a, B: BufMut> {
+    /// Destination of the log array data
+    buf: &'a mut B,
+    vals: Vec<u64>,
+    width: u8
+}
+
+impl<'a, B: BufMut> LateLogArrayBufBuilder<'a, B> {
+    pub fn new(buf: &'a mut B) -> Self {
+        Self {
+            buf,
+            vals: Vec::new(),
+            width: 0
+        }
+    }
+
+    pub fn count(&self) -> u32 {
+        self.vals.len() as u32
+    }
+
+    pub fn push(&mut self, val: u64) {
+        self.vals.push(val);
+        let width = calculate_width(val);
+        if self.width < width {
+            self.width = width;
+        }
+    }
+
+    pub fn push_vec(&mut self, vals: Vec<u64>) {
+        for val in vals {
+            self.push(val)
+        }
+    }
+
+    pub fn pop(&mut self) -> Option<u64> {
+        self.vals.pop()
+    }
+
+    pub fn finalize(self) {
+        let mut builder = LogArrayBufBuilder::new(self.buf, self.width);
+        builder.push_vec(self.vals);
+        builder.finalize();
+    }
+}
+
 /// write a logarray directly to an AsyncWrite
 pub struct LogArrayFileBuilder<W: SyncableFile> {
     /// Destination of the log array data
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index c47b65f9..46abdce6 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,6 +1,6 @@
 use std::{borrow::Cow, cmp::Ordering};
 
-use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray};
+use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray, LateLogArrayBufBuilder};
 use bytes::{BufMut, Bytes};
 use itertools::Itertools;
 
@@ -23,6 +23,58 @@ pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
         offsets.push(offset);
     }
 }
+
+struct SizedDictBufBuilder<'a, B1:BufMut, B2:BufMut> {
+    block_offset: u64,
+    id_offset: u64,
+    offsets: LateLogArrayBufBuilder<'a, B2>,
+    data_buf: &'a mut B1,
+    current_block: Vec<Bytes>,
+}
+
+impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
+    pub fn new(block_offset: u64, id_offset: u64, offsets: LateLogArrayBufBuilder<'a, B2>, data_buf: &'a mut B1) -> Self {
+        Self {
+            block_offset,
+            id_offset, offsets, data_buf,
+            current_block: Vec::with_capacity(8)
+        }
+    }
+
+    pub fn add(&mut self, value: Bytes) -> u64 {
+        self.current_block.push(value);
+        self.id_offset += 1;
+        if self.current_block.len() == BLOCK_SIZE {
+            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e|e.as_ref()).collect();
+            let size = build_block_unchecked(self.data_buf, &current_block);
+            self.block_offset += size as u64;
+            self.offsets.push(self.block_offset);
+
+            self.current_block.truncate(0);
+        }
+
+        self.id_offset
+    }
+
+    pub fn add_entry(&mut self, e: &SizedDictEntry) -> u64 {
+        self.add(e.to_bytes())
+    }
+
+    pub fn add_all<I: Iterator<Item = Bytes>>(&mut self, it: I) -> Vec<u64> {
+        it.map(|val| self.add(val)).collect()
+    }
+
+    pub fn finalize(mut self) -> LateLogArrayBufBuilder<'a, B2> {
+        if self.current_block.len() > 0 {
+            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e|e.as_ref()).collect();
+            let size = build_block_unchecked(self.data_buf, &current_block);
+            self.offsets.push(self.block_offset + size as u64);
+        }
+
+        self.offsets
+    }
+}
+
 pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
     // the last offset doesn't matter as it's implied by the total size
     offsets.pop();
@@ -281,6 +333,55 @@ mod tests {
         }
     }
 
+    #[test]
+    fn build_dict_of_two_blocks_with_builder() {
+        let strings: Vec<&[u8]> = vec![
+            b"aaaaaaaa",
+            b"bbbbbbbb",
+            b"bbbcccdaaaa",
+            b"f",
+            b"fafasdfas",
+            b"gafovp",
+            b"gdfasfa",
+            b"gdfbbbbbb",
+            b"hello",
+            b"iguana",
+            b"illusion",
+            b"illustrated",
+            b"jetengine",
+            b"jetplane",
+        ];
+
+        let mut array_buf = BytesMut::new();
+        let mut data_buf = BytesMut::new();
+
+        let logarray_builder = LateLogArrayBufBuilder::new(&mut array_buf);
+
+        let mut builder = SizedDictBufBuilder::new(0, 0, logarray_builder, &mut data_buf);
+        builder.add_all(strings.clone().into_iter().map(|v|Bytes::from_static(v)));
+        let mut logarray_builder = builder.finalize();
+        logarray_builder.pop();
+        logarray_builder.finalize();
+
+        let array_bytes = array_buf.freeze();
+        let data_bytes = data_buf.freeze();
+        let dict = SizedDict::parse(array_bytes, data_bytes, 0);
+
+        assert_eq!(2, dict.num_blocks());
+        assert_eq!(b"aaaaaaaa", &dict.block_head(0)[..]);
+        assert_eq!(b"hello", &dict.block_head(1)[..]);
+
+        let block0 = dict.block(0);
+        let block1 = dict.block(1);
+
+        assert_eq!(8, block0.num_entries());
+        assert_eq!(6, block1.num_entries());
+
+        for (ix, s) in strings.into_iter().enumerate() {
+            assert_eq!(s, &dict.entry((ix + 1) as u64).to_bytes()[..]);
+        }
+    }
+
     #[test]
     fn lookup_entries_by_slice() {
         let strings: Vec<&[u8]> = vec![

From 26d655ed7626e9580f78396589a000d959533e8d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 29 Nov 2022 20:58:43 +0100
Subject: [PATCH 36/99] Adding builder, doesn't work because of buf borrow

---
 src/structure/logarray.rs  |   4 ++
 src/structure/tfc/dict.rs  |  21 ++++--
 src/structure/tfc/typed.rs | 142 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 159 insertions(+), 8 deletions(-)

diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 2fa7479e..6fa51562 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -451,6 +451,10 @@ impl<'a, B: BufMut> LateLogArrayBufBuilder<'a, B> {
         }
     }
 
+    pub fn last(&mut self) -> Option<u64> {
+        self.vals.last().copied()
+    }
+
     pub fn pop(&mut self) -> Option<u64> {
         self.vals.pop()
     }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 46abdce6..12c608ca 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -24,16 +24,16 @@ pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
     }
 }
 
-struct SizedDictBufBuilder<'a, B1:BufMut, B2:BufMut> {
+pub struct SizedDictBufBuilder<'a, B1:BufMut, B2:BufMut> {
     block_offset: u64,
     id_offset: u64,
-    offsets: LateLogArrayBufBuilder<'a, B2>,
-    data_buf: &'a mut B1,
+    offsets: LateLogArrayBufBuilder<'a, B1>,
+    data_buf: &'a mut B2,
     current_block: Vec<Bytes>,
 }
 
 impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
-    pub fn new(block_offset: u64, id_offset: u64, offsets: LateLogArrayBufBuilder<'a, B2>, data_buf: &'a mut B1) -> Self {
+    pub fn new(block_offset: u64, id_offset: u64, offsets: LateLogArrayBufBuilder<'a, B1>, data_buf: &'a mut B2) -> Self {
         Self {
             block_offset,
             id_offset, offsets, data_buf,
@@ -41,6 +41,14 @@ impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
         }
     }
 
+    pub fn id_offset(&self) -> u64{
+        self.id_offset
+    }
+
+    pub fn block_offset(&self) -> u64{
+        self.block_offset
+    }
+
     pub fn add(&mut self, value: Bytes) -> u64 {
         self.current_block.push(value);
         self.id_offset += 1;
@@ -64,11 +72,12 @@ impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
         it.map(|val| self.add(val)).collect()
     }
 
-    pub fn finalize(mut self) -> LateLogArrayBufBuilder<'a, B2> {
+    pub fn finalize(mut self) -> LateLogArrayBufBuilder<'a, B1> {
         if self.current_block.len() > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e|e.as_ref()).collect();
             let size = build_block_unchecked(self.data_buf, &current_block);
-            self.offsets.push(self.block_offset + size as u64);
+            self.block_offset += size as u64;
+            self.offsets.push(self.block_offset);
         }
 
         self.offsets
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index e00f8a29..b904f931 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,5 +1,6 @@
 use crate::structure::{
-    tfc::block::BLOCK_SIZE, util::calculate_width, LogArrayBufBuilder, MonotonicLogArray,
+    tfc::block::BLOCK_SIZE, util::calculate_width, LateLogArrayBufBuilder, LogArrayBufBuilder,
+    MonotonicLogArray,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
@@ -12,7 +13,7 @@ use std::{borrow::Cow, marker::PhantomData};
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     decimal::{decimal_to_storage, storage_to_decimal},
-    dict::{build_dict_unchecked, build_offset_logarray, SizedDict},
+    dict::{build_dict_unchecked, build_offset_logarray, SizedDict, SizedDictBufBuilder},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
@@ -529,6 +530,79 @@ pub fn build_multiple_segments<
     type_offsets_builder.finalize();
 }
 
+struct TypedDictBufBuilder<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
+    types_present_builder: LateLogArrayBufBuilder<'a, B1>,
+    type_offsets_builder: LateLogArrayBufBuilder<'a, B2>,
+    sized_dict_buf_builder: SizedDictBufBuilder<'a, B3, B4>,
+    data_buf: &'a mut B4,
+    current_datatype: Option<Datatype>,
+}
+
+impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a, B1, B2, B3, B4> {
+    pub fn new(
+        used_types: &'a mut B1,
+        type_offsets: &'a mut B2,
+        block_offsets: &'a mut B3,
+        data_buf: &'a mut B4,
+    ) -> Self {
+        let types_present_builder = LateLogArrayBufBuilder::new(used_types);
+        let type_offsets_builder = LateLogArrayBufBuilder::new(type_offsets);
+        let block_offset_builder = LateLogArrayBufBuilder::new(block_offsets);
+        let sized_dict_buf_builder = SizedDictBufBuilder::new(0, 0, block_offset_builder, data_buf);
+        Self {
+            types_present_builder,
+            type_offsets_builder,
+            data_buf,
+            sized_dict_buf_builder,
+            current_datatype: None,
+        }
+    }
+
+    pub fn add(&mut self, dt: Datatype, value: Bytes) -> u64 {
+        if self.current_datatype == None {
+            self.current_datatype = Some(dt);
+            self.types_present_builder.push(dt as u64);
+        }
+
+        if self.current_datatype != Some(dt) {
+            let id_offset = self.sized_dict_buf_builder.id_offset();
+            let block_offset = self.sized_dict_buf_builder.block_offset();
+            let block_offset_builder = self.sized_dict_buf_builder.finalize();
+
+            self.types_present_builder.push(dt as u64);
+            self.type_offsets_builder.push(block_offset + 1);
+            self.sized_dict_buf_builder = SizedDictBufBuilder::new(
+                id_offset,
+                block_offset,
+                block_offset_builder,
+                self.data_buf,
+            );
+        }
+
+        self.sized_dict_buf_builder.add(value)
+    }
+
+    pub fn add_entry(&mut self, dt: Datatype, e: &SizedDictEntry) -> u64 {
+        self.add(dt, e.to_bytes())
+    }
+
+    pub fn add_all<I: Iterator<Item = (Datatype, Bytes)>>(&mut self, it: I) -> Vec<u64> {
+        it.map(|(dt, val)| self.add(dt, val)).collect()
+    }
+
+    pub fn finalize(mut self) {
+        if self.current_datatype == None {
+            panic!("There was nothing added to this dictionary!");
+        }
+        let block_offset_builder = self.sized_dict_buf_builder.finalize();
+        block_offset_builder.pop();
+        block_offset_builder.finalize();
+
+        self.types_present_builder.finalize();
+        self.type_offsets_builder.finalize();
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::structure::tfc::dict::build_offset_logarray;
@@ -900,4 +974,68 @@ mod tests {
 
         assert_eq!(vec, actual);
     }
+
+    fn test_incremental_builder() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            "fdsa".to_string().make_entry(),
+            "a".to_string().make_entry(),
+            "bc".to_string().make_entry(),
+            "bcd".to_string().make_entry(),
+            "z".to_string().make_entry(),
+            "Batty".to_string().make_entry(),
+            "Batman".to_string().make_entry(),
+            "apple".to_string().make_entry(),
+            (-500_i32).make_entry(),
+            20_u32.make_entry(),
+            22_u32.make_entry(),
+            23_u32.make_entry(),
+            24_u32.make_entry(),
+            25_u32.make_entry(),
+            26_u32.make_entry(),
+            27_u32.make_entry(),
+            28_u32.make_entry(),
+            3000_u32.make_entry(),
+            (-3_i64).make_entry(),
+            Decimal("-12342343.2348973".to_string()).make_entry(),
+            Decimal("234.8973".to_string()).make_entry(),
+            Decimal("0.2348973".to_string()).make_entry(),
+            Decimal("23423423.8973".to_string()).make_entry(),
+            Decimal("3.3".to_string()).make_entry(),
+            Decimal("0.001".to_string()).make_entry(),
+            Decimal("-0.001".to_string()).make_entry(),
+            Decimal("2".to_string()).make_entry(),
+            Decimal("0".to_string()).make_entry(),
+            4.389832_f32.make_entry(),
+            23434.389832_f32.make_entry(),
+            int("239487329872343987").make_entry(),
+        ];
+        vec.sort();
+
+        let mut used_types_buf = BytesMut::new();
+        let mut type_offsets_buf = BytesMut::new();
+        let mut block_offsets_buf = BytesMut::new();
+        let mut data_buf = BytesMut::new();
+
+        let typed_builder = TypedDictBufBuilder::new(
+            &mut used_types_buf,
+            &mut type_offsets_buf,
+            &mut block_offsets_buf,
+            &mut data_buf,
+        );
+
+        vec.into_iter()
+            .map(|(dt, entry)| typed_builder.add(dt, entry));
+
+        typed_builder.finalize();
+
+        let used_types = used_types_buf.freeze();
+        let type_offsets = type_offsets_buf.freeze();
+        let block_offsets = block_offsets_buf.freeze();
+        let data = data_buf.freeze();
+
+        let dict = TypedDict::from_parts(used_types, type_offsets, block_offsets, data);
+
+        let res = dict.entry(0);
+        eprintln!("res: {res:?}");
+    }
 }

From b6f8df81979d3f3c571f7d27ae6a6c7bb703f4b8 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 29 Nov 2022 21:20:02 +0100
Subject: [PATCH 37/99] Annoyed about a move

---
 src/structure/tfc/dict.rs  | 47 +++++++++++++++++++++++---------------
 src/structure/tfc/typed.rs | 23 ++++++++-----------
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 12c608ca..a102318d 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -1,6 +1,8 @@
 use std::{borrow::Cow, cmp::Ordering};
 
-use crate::structure::{util::calculate_width, LogArrayBufBuilder, MonotonicLogArray, LateLogArrayBufBuilder};
+use crate::structure::{
+    util::calculate_width, LateLogArrayBufBuilder, LogArrayBufBuilder, MonotonicLogArray,
+};
 use bytes::{BufMut, Bytes};
 use itertools::Itertools;
 
@@ -24,28 +26,35 @@ pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
     }
 }
 
-pub struct SizedDictBufBuilder<'a, B1:BufMut, B2:BufMut> {
+pub struct SizedDictBufBuilder<'a, B1: BufMut, B2: BufMut> {
     block_offset: u64,
     id_offset: u64,
     offsets: LateLogArrayBufBuilder<'a, B1>,
-    data_buf: &'a mut B2,
+    data_buf: B2,
     current_block: Vec<Bytes>,
 }
 
-impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
-    pub fn new(block_offset: u64, id_offset: u64, offsets: LateLogArrayBufBuilder<'a, B1>, data_buf: &'a mut B2) -> Self {
+impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
+    pub fn new(
+        block_offset: u64,
+        id_offset: u64,
+        offsets: LateLogArrayBufBuilder<'a, B1>,
+        data_buf: B2,
+    ) -> Self {
         Self {
             block_offset,
-            id_offset, offsets, data_buf,
-            current_block: Vec::with_capacity(8)
+            id_offset,
+            offsets,
+            data_buf,
+            current_block: Vec::with_capacity(8),
         }
     }
 
-    pub fn id_offset(&self) -> u64{
+    pub fn id_offset(&self) -> u64 {
         self.id_offset
     }
 
-    pub fn block_offset(&self) -> u64{
+    pub fn block_offset(&self) -> u64 {
         self.block_offset
     }
 
@@ -53,8 +62,8 @@ impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
         self.current_block.push(value);
         self.id_offset += 1;
         if self.current_block.len() == BLOCK_SIZE {
-            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e|e.as_ref()).collect();
-            let size = build_block_unchecked(self.data_buf, &current_block);
+            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
+            let size = build_block_unchecked(&mut self.data_buf, &current_block);
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
 
@@ -72,15 +81,15 @@ impl<'a, B1:BufMut, B2:BufMut> SizedDictBufBuilder<'a, B1, B2> {
         it.map(|val| self.add(val)).collect()
     }
 
-    pub fn finalize(mut self) -> LateLogArrayBufBuilder<'a, B1> {
+    pub fn finalize(mut self) -> (LateLogArrayBufBuilder<'a, B1>, B2) {
         if self.current_block.len() > 0 {
-            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e|e.as_ref()).collect();
-            let size = build_block_unchecked(self.data_buf, &current_block);
+            let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
+            let size = build_block_unchecked(&mut self.data_buf, &current_block);
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
         }
 
-        self.offsets
+        (self.offsets, self.data_buf)
     }
 }
 
@@ -362,13 +371,13 @@ mod tests {
         ];
 
         let mut array_buf = BytesMut::new();
-        let mut data_buf = BytesMut::new();
+        let data_buf = BytesMut::new();
 
         let logarray_builder = LateLogArrayBufBuilder::new(&mut array_buf);
 
-        let mut builder = SizedDictBufBuilder::new(0, 0, logarray_builder, &mut data_buf);
-        builder.add_all(strings.clone().into_iter().map(|v|Bytes::from_static(v)));
-        let mut logarray_builder = builder.finalize();
+        let mut builder = SizedDictBufBuilder::new(0, 0, logarray_builder, data_buf);
+        builder.add_all(strings.clone().into_iter().map(|v| Bytes::from_static(v)));
+        let (mut logarray_builder, data_buf) = builder.finalize();
         logarray_builder.pop();
         logarray_builder.finalize();
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index b904f931..fc0607e3 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -534,7 +534,6 @@ struct TypedDictBufBuilder<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
     types_present_builder: LateLogArrayBufBuilder<'a, B1>,
     type_offsets_builder: LateLogArrayBufBuilder<'a, B2>,
     sized_dict_buf_builder: SizedDictBufBuilder<'a, B3, B4>,
-    data_buf: &'a mut B4,
     current_datatype: Option<Datatype>,
 }
 
@@ -543,7 +542,7 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         used_types: &'a mut B1,
         type_offsets: &'a mut B2,
         block_offsets: &'a mut B3,
-        data_buf: &'a mut B4,
+        data_buf: B4,
     ) -> Self {
         let types_present_builder = LateLogArrayBufBuilder::new(used_types);
         let type_offsets_builder = LateLogArrayBufBuilder::new(type_offsets);
@@ -552,7 +551,6 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         Self {
             types_present_builder,
             type_offsets_builder,
-            data_buf,
             sized_dict_buf_builder,
             current_datatype: None,
         }
@@ -567,16 +565,12 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         if self.current_datatype != Some(dt) {
             let id_offset = self.sized_dict_buf_builder.id_offset();
             let block_offset = self.sized_dict_buf_builder.block_offset();
-            let block_offset_builder = self.sized_dict_buf_builder.finalize();
+            let (block_offset_builder, data_buf) = self.sized_dict_buf_builder.finalize();
 
             self.types_present_builder.push(dt as u64);
             self.type_offsets_builder.push(block_offset + 1);
-            self.sized_dict_buf_builder = SizedDictBufBuilder::new(
-                id_offset,
-                block_offset,
-                block_offset_builder,
-                self.data_buf,
-            );
+            self.sized_dict_buf_builder =
+                SizedDictBufBuilder::new(id_offset, block_offset, block_offset_builder, data_buf);
         }
 
         self.sized_dict_buf_builder.add(value)
@@ -590,16 +584,17 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         it.map(|(dt, val)| self.add(dt, val)).collect()
     }
 
-    pub fn finalize(mut self) {
+    pub fn finalize(self) -> B4 {
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
         }
-        let block_offset_builder = self.sized_dict_buf_builder.finalize();
+        let (mut block_offset_builder, mut data_buf) = self.sized_dict_buf_builder.finalize();
         block_offset_builder.pop();
         block_offset_builder.finalize();
 
         self.types_present_builder.finalize();
         self.type_offsets_builder.finalize();
+        data_buf
     }
 }
 
@@ -1020,13 +1015,13 @@ mod tests {
             &mut used_types_buf,
             &mut type_offsets_buf,
             &mut block_offsets_buf,
-            &mut data_buf,
+            data_buf,
         );
 
         vec.into_iter()
             .map(|(dt, entry)| typed_builder.add(dt, entry));
 
-        typed_builder.finalize();
+        let data_buf = typed_builder.finalize();
 
         let used_types = used_types_buf.freeze();
         let type_offsets = type_offsets_buf.freeze();

From 265121574e9b1a763d2bc45a55c276ace7d22a4f Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 29 Nov 2022 23:54:37 +0100
Subject: [PATCH 38/99] Fix borrow issues by adding option

---
 src/structure/tfc/dict.rs  | 11 +++++---
 src/structure/tfc/typed.rs | 54 +++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index a102318d..d9c981c1 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -81,7 +81,7 @@ impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
         it.map(|val| self.add(val)).collect()
     }
 
-    pub fn finalize(mut self) -> (LateLogArrayBufBuilder<'a, B1>, B2) {
+    pub fn finalize(mut self) -> (LateLogArrayBufBuilder<'a, B1>, B2, u64, u64) {
         if self.current_block.len() > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
             let size = build_block_unchecked(&mut self.data_buf, &current_block);
@@ -89,7 +89,12 @@ impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
             self.offsets.push(self.block_offset);
         }
 
-        (self.offsets, self.data_buf)
+        (
+            self.offsets,
+            self.data_buf,
+            self.block_offset,
+            self.id_offset,
+        )
     }
 }
 
@@ -377,7 +382,7 @@ mod tests {
 
         let mut builder = SizedDictBufBuilder::new(0, 0, logarray_builder, data_buf);
         builder.add_all(strings.clone().into_iter().map(|v| Bytes::from_static(v)));
-        let (mut logarray_builder, data_buf) = builder.finalize();
+        let (mut logarray_builder, data_buf, _, _) = builder.finalize();
         logarray_builder.pop();
         logarray_builder.finalize();
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index fc0607e3..ba57540a 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -533,7 +533,7 @@ pub fn build_multiple_segments<
 struct TypedDictBufBuilder<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
     types_present_builder: LateLogArrayBufBuilder<'a, B1>,
     type_offsets_builder: LateLogArrayBufBuilder<'a, B2>,
-    sized_dict_buf_builder: SizedDictBufBuilder<'a, B3, B4>,
+    sized_dict_buf_builder: Option<SizedDictBufBuilder<'a, B3, B4>>,
     current_datatype: Option<Datatype>,
 }
 
@@ -547,7 +547,12 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         let types_present_builder = LateLogArrayBufBuilder::new(used_types);
         let type_offsets_builder = LateLogArrayBufBuilder::new(type_offsets);
         let block_offset_builder = LateLogArrayBufBuilder::new(block_offsets);
-        let sized_dict_buf_builder = SizedDictBufBuilder::new(0, 0, block_offset_builder, data_buf);
+        let sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
+            0,
+            0,
+            block_offset_builder,
+            data_buf,
+        ));
         Self {
             types_present_builder,
             type_offsets_builder,
@@ -558,22 +563,30 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
 
     pub fn add(&mut self, dt: Datatype, value: Bytes) -> u64 {
         if self.current_datatype == None {
-            self.current_datatype = Some(dt);
+            self.current_datatype = dbg!(Some(dt));
             self.types_present_builder.push(dt as u64);
         }
 
         if self.current_datatype != Some(dt) {
-            let id_offset = self.sized_dict_buf_builder.id_offset();
-            let block_offset = self.sized_dict_buf_builder.block_offset();
-            let (block_offset_builder, data_buf) = self.sized_dict_buf_builder.finalize();
-
+            let (block_offset_builder, data_buf, block_offset, id_offset) =
+                self.sized_dict_buf_builder.take().unwrap().finalize();
+            dbg!(dt);
+            dbg!(id_offset);
+            dbg!(block_offset);
             self.types_present_builder.push(dt as u64);
             self.type_offsets_builder.push(block_offset + 1);
-            self.sized_dict_buf_builder =
-                SizedDictBufBuilder::new(id_offset, block_offset, block_offset_builder, data_buf);
+            self.sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
+                block_offset,
+                id_offset,
+                block_offset_builder,
+                data_buf,
+            ));
         }
 
-        self.sized_dict_buf_builder.add(value)
+        self.sized_dict_buf_builder
+            .as_mut()
+            .map(|s| s.add(value))
+            .unwrap()
     }
 
     pub fn add_entry(&mut self, dt: Datatype, e: &SizedDictEntry) -> u64 {
@@ -588,7 +601,8 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
         }
-        let (mut block_offset_builder, mut data_buf) = self.sized_dict_buf_builder.finalize();
+        let (mut block_offset_builder, data_buf, _, _) =
+            self.sized_dict_buf_builder.unwrap().finalize();
         block_offset_builder.pop();
         block_offset_builder.finalize();
 
@@ -970,6 +984,7 @@ mod tests {
         assert_eq!(vec, actual);
     }
 
+    #[test]
     fn test_incremental_builder() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
             "fdsa".to_string().make_entry(),
@@ -1009,17 +1024,22 @@ mod tests {
         let mut used_types_buf = BytesMut::new();
         let mut type_offsets_buf = BytesMut::new();
         let mut block_offsets_buf = BytesMut::new();
-        let mut data_buf = BytesMut::new();
+        let data_buf = BytesMut::new();
 
-        let typed_builder = TypedDictBufBuilder::new(
+        let mut typed_builder = TypedDictBufBuilder::new(
             &mut used_types_buf,
             &mut type_offsets_buf,
             &mut block_offsets_buf,
             data_buf,
         );
 
-        vec.into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry));
+        let results: Vec<u64> = vec
+            .into_iter()
+            .map(|(dt, entry)| {
+                eprintln!("dt: {dt:?}");
+                dbg!(typed_builder.add(dt, entry))
+            })
+            .collect();
 
         let data_buf = typed_builder.finalize();
 
@@ -1032,5 +1052,9 @@ mod tests {
 
         let res = dict.entry(0);
         eprintln!("res: {res:?}");
+
+        let res = dict.entry(1);
+        eprintln!("res: {res:?}");
+        panic!();
     }
 }

From 102900c76dc6ad9745d1264f61b942b9d4f99676 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 01:13:18 +0100
Subject: [PATCH 39/99] Almost working

---
 src/structure/logarray.rs  |  3 ++-
 src/structure/tfc/typed.rs | 11 +++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 6fa51562..18725890 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -420,7 +420,8 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
 pub struct LateLogArrayBufBuilder<'a, B: BufMut> {
     /// Destination of the log array data
     buf: &'a mut B,
-    vals: Vec<u64>,
+    /// NOTE: remove pub
+    pub vals: Vec<u64>,
     width: u8
 }
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index ba57540a..f273f1e7 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -45,6 +45,7 @@ impl TypedDict {
             if type_offset == 0 {
                 last_block_len = data[0];
             } else {
+                eprintln!("type_offset: {type_offset}");
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
                 last_block_len = data[last_block_offset_of_previous_type as usize];
@@ -573,14 +574,19 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
             dbg!(dt);
             dbg!(id_offset);
             dbg!(block_offset);
+
+            dbg!(&self.types_present_builder.vals);
+            dbg!(&self.type_offsets_builder.vals);
             self.types_present_builder.push(dt as u64);
-            self.type_offsets_builder.push(block_offset + 1);
+            self.type_offsets_builder
+                .push(block_offset_builder.count() as u64 - 1);
             self.sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
                 block_offset,
                 id_offset,
                 block_offset_builder,
                 data_buf,
             ));
+            self.current_datatype = Some(dt);
         }
 
         self.sized_dict_buf_builder
@@ -608,6 +614,7 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
 
         self.types_present_builder.finalize();
         self.type_offsets_builder.finalize();
+
         data_buf
     }
 }
@@ -1049,7 +1056,7 @@ mod tests {
         let data = data_buf.freeze();
 
         let dict = TypedDict::from_parts(used_types, type_offsets, block_offsets, data);
-
+        eprintln!("dict: {dict:?}");
         let res = dict.entry(0);
         eprintln!("res: {res:?}");
 

From 724d668913b6ed3b5b14d569c73824bac1703c3d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 01:17:50 +0100
Subject: [PATCH 40/99] Working

---
 src/structure/tfc/typed.rs | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index f273f1e7..6600819b 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1056,12 +1056,20 @@ mod tests {
         let data = data_buf.freeze();
 
         let dict = TypedDict::from_parts(used_types, type_offsets, block_offsets, data);
-        eprintln!("dict: {dict:?}");
-        let res = dict.entry(0);
-        eprintln!("res: {res:?}");
-
-        let res = dict.entry(1);
-        eprintln!("res: {res:?}");
+        assert_eq!(
+            dict.entry(1),
+            (
+                Datatype::String,
+                SizedDictEntry(vec![Bytes::from_static(b"Batman")])
+            )
+        );
+        assert_eq!(
+            dict.entry(2),
+            (
+                Datatype::String,
+                SizedDictEntry(vec![Bytes::from_static(b"Batty")])
+            )
+        );
         panic!();
     }
 }

From 51744838ca0b70ce34f74788f913cac1f917fad2 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 09:04:36 +0100
Subject: [PATCH 41/99] Remove debug prints

---
 src/structure/tfc/typed.rs | 39 +++++++++++---------------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 6600819b..a9877433 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -45,7 +45,6 @@ impl TypedDict {
             if type_offset == 0 {
                 last_block_len = data[0];
             } else {
-                eprintln!("type_offset: {type_offset}");
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
                 last_block_len = data[last_block_offset_of_previous_type as usize];
@@ -509,7 +508,6 @@ pub fn build_multiple_segments<
     }
 
     build_offset_logarray(block_offsets_buf, offsets);
-    eprintln!("types: {types:?}");
     let largest_type = types.last().unwrap();
     let largest_type_offset = type_offsets.last().unwrap();
 
@@ -564,19 +562,13 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
 
     pub fn add(&mut self, dt: Datatype, value: Bytes) -> u64 {
         if self.current_datatype == None {
-            self.current_datatype = dbg!(Some(dt));
+            self.current_datatype = Some(dt);
             self.types_present_builder.push(dt as u64);
         }
 
         if self.current_datatype != Some(dt) {
             let (block_offset_builder, data_buf, block_offset, id_offset) =
                 self.sized_dict_buf_builder.take().unwrap().finalize();
-            dbg!(dt);
-            dbg!(id_offset);
-            dbg!(block_offset);
-
-            dbg!(&self.types_present_builder.vals);
-            dbg!(&self.type_offsets_builder.vals);
             self.types_present_builder.push(dt as u64);
             self.type_offsets_builder
                 .push(block_offset_builder.count() as u64 - 1);
@@ -991,6 +983,10 @@ mod tests {
         assert_eq!(vec, actual);
     }
 
+    fn convert_entry(e: (Datatype, SizedDictEntry)) -> (Datatype, Bytes) {
+        (e.0, e.1.to_bytes())
+    }
+
     #[test]
     fn test_incremental_builder() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
@@ -1041,11 +1037,9 @@ mod tests {
         );
 
         let results: Vec<u64> = vec
+            .clone()
             .into_iter()
-            .map(|(dt, entry)| {
-                eprintln!("dt: {dt:?}");
-                dbg!(typed_builder.add(dt, entry))
-            })
+            .map(|(dt, entry)| typed_builder.add(dt, entry))
             .collect();
 
         let data_buf = typed_builder.finalize();
@@ -1056,20 +1050,9 @@ mod tests {
         let data = data_buf.freeze();
 
         let dict = TypedDict::from_parts(used_types, type_offsets, block_offsets, data);
-        assert_eq!(
-            dict.entry(1),
-            (
-                Datatype::String,
-                SizedDictEntry(vec![Bytes::from_static(b"Batman")])
-            )
-        );
-        assert_eq!(
-            dict.entry(2),
-            (
-                Datatype::String,
-                SizedDictEntry(vec![Bytes::from_static(b"Batty")])
-            )
-        );
-        panic!();
+
+        for i in 0..vec.len() {
+            assert_eq!(vec[i], convert_entry(dict.entry(i as u64 + 1)))
+        }
     }
 }

From df22829b4cf4bb5364b9a0bbe5b9bcc097221cfc Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 11:15:22 +0100
Subject: [PATCH 42/99] Adding suffixless blocks

---
 src/structure/tfc/block.rs | 95 ++++++++++++++++++++++++++++++++------
 src/structure/tfc/dict.rs  | 14 ++++--
 src/structure/tfc/typed.rs | 59 +++++++++++++++++++----
 3 files changed, 139 insertions(+), 29 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 5b35f58b..9b6b337f 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -37,8 +37,8 @@ impl From<vbyte::DecodeError> for SizedDictError {
 
 impl SizedBlockHeader {
     fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
-        let num_entries = buf.get_u8();
-
+        let cw = buf.get_u8();
+        let (record_size, num_entries) = parse_block_control_word(cw);
         let mut sizes = [0_usize; BLOCK_SIZE - 1];
         let mut shareds = [0_usize; BLOCK_SIZE - 1];
         let (first_size, _) = vbyte::decode_buf(buf)?;
@@ -47,8 +47,12 @@ impl SizedBlockHeader {
 
         for i in 0..(num_entries - 1) as usize {
             let (shared, _) = vbyte::decode_buf(buf)?;
-            let (size, _) = vbyte::decode_buf(buf)?;
-
+            let size = if record_size == None {
+                let (size, _) = vbyte::decode_buf(buf)?;
+                size
+            } else {
+                record_size.unwrap() as u64 - shared
+            };
             sizes[i] = size as usize;
             shareds[i] = shared as usize;
         }
@@ -533,7 +537,7 @@ impl<'a> Iterator for SizedBlockIterator<'a> {
             if self.ix >= self.header.num_entries as usize - 1 {
                 return None;
             }
-            let size = self.header.sizes[self.ix];
+            let size = dbg!(self.header.sizes[self.ix]);
             let mut shared = self.header.shareds[self.ix];
             for rope_index in 0..last.len() {
                 let x = &mut last[rope_index];
@@ -585,12 +589,60 @@ impl IdLookupResult {
     }
 }
 
-pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) -> usize {
+pub fn parse_block_control_records(cw: u8) -> u8 {
+    parse_block_control_word(cw).1
+}
+
+pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
+    let records = (cw & ((1 << 3) - 1)) + 1;
+    let record_size = record_size_decoding(cw);
+    (record_size, records)
+}
+
+// None => 0
+// Some(1) => 1
+// Some(2) => 2
+//
+// Some(4) => 3
+// Some(8) => 4
+
+// MSB = 1, fake ids block.
+//
+// id => byte = id - id_offset
+// two more bits
+
+fn record_size_decoding(enc: u8) -> Option<u8> {
+    match enc >> 3 {
+        0 => None,
+        3 => Some(4),
+        4 => Some(8),
+        _ => panic!("Ok, this is not known"),
+    }
+}
+
+fn record_size_encoding(record_size: Option<u8>) -> u8 {
+    match record_size {
+        None => 0,
+        Some(4) => 3 << 3,
+        Some(8) => 4 << 3,
+        _ => panic!("This is really bad!"),
+    }
+}
+
+fn create_block_control_word(record_size: Option<u8>, records: u8) -> u8 {
+    records - 1 + record_size_encoding(record_size)
+}
+
+pub(crate) fn build_block_unchecked<B: BufMut>(
+    record_size: Option<u8>,
+    buf: &mut B,
+    slices: &[&[u8]],
+) -> usize {
     let mut size = 0;
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
-
-    buf.put_u8(slices_len as u8);
+    let cw = dbg!(create_block_control_word(record_size, slices_len as u8));
+    buf.put_u8(cw as u8);
     size += 1;
 
     let first = slices[0];
@@ -611,11 +663,14 @@ pub(crate) fn build_block_unchecked<B: BufMut>(buf: &mut B, slices: &[&[u8]]) ->
         buf.put_slice(&vbyte[..vbyte_len]);
         size += vbyte_len;
 
-        let suffix_len = cur.len() - common_prefix;
-        let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
-        buf.put_slice(&vbyte[..vbyte_len]);
-        size += vbyte_len;
-
+        if record_size == None {
+            let suffix_len = cur.len() - common_prefix;
+            let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
+            buf.put_slice(&vbyte[..vbyte_len]);
+            size += vbyte_len;
+        } else {
+            eprintln!("Fixed width: {record_size:?}");
+        }
         suffixes.push(&cur[common_prefix..]);
         last = cur;
     }
@@ -642,7 +697,7 @@ mod tests {
 
     fn build_block_bytes(strings: &[&[u8]]) -> Bytes {
         let mut buf = BytesMut::new();
-        build_block_unchecked(&mut buf, &strings);
+        build_block_unchecked(None, &mut buf, &strings);
 
         buf.freeze()
     }
@@ -883,4 +938,16 @@ mod tests {
             result
         );
     }
+
+    #[test]
+    fn control_word_round_trip() {
+        let cw = create_block_control_word(None, 3);
+        assert_eq!(parse_block_control_word(cw), (None, 3));
+
+        let cw = create_block_control_word(Some(8), 5);
+        assert_eq!(parse_block_control_word(cw), (Some(8), 5));
+
+        let cw = create_block_control_word(Some(12), 6);
+        assert_eq!(parse_block_control_word(cw), (Some(12), 6))
+    }
 }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index d9c981c1..557641e7 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -9,6 +9,7 @@ use itertools::Itertools;
 use super::block::*;
 
 pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
+    record_size: Option<u8>,
     start_offset: u64,
     offsets: &mut Vec<u64>,
     data_buf: &mut B,
@@ -20,13 +21,14 @@ pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
     for chunk in &chunk_iter {
         let slices: Vec<R> = chunk.collect();
         let borrows: Vec<&[u8]> = slices.iter().map(|s| s.as_ref()).collect();
-        let size = build_block_unchecked(data_buf, &borrows);
+        let size = build_block_unchecked(record_size, data_buf, &borrows);
         offset += size as u64;
         offsets.push(offset);
     }
 }
 
 pub struct SizedDictBufBuilder<'a, B1: BufMut, B2: BufMut> {
+    pub(crate) record_size: Option<u8>,
     block_offset: u64,
     id_offset: u64,
     offsets: LateLogArrayBufBuilder<'a, B1>,
@@ -36,12 +38,14 @@ pub struct SizedDictBufBuilder<'a, B1: BufMut, B2: BufMut> {
 
 impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
     pub fn new(
+        record_size: Option<u8>,
         block_offset: u64,
         id_offset: u64,
         offsets: LateLogArrayBufBuilder<'a, B1>,
         data_buf: B2,
     ) -> Self {
         Self {
+            record_size,
             block_offset,
             id_offset,
             offsets,
@@ -63,7 +67,7 @@ impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
         self.id_offset += 1;
         if self.current_block.len() == BLOCK_SIZE {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
-            let size = build_block_unchecked(&mut self.data_buf, &current_block);
+            let size = build_block_unchecked(self.record_size, &mut self.data_buf, &current_block);
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
 
@@ -84,7 +88,7 @@ impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
     pub fn finalize(mut self) -> (LateLogArrayBufBuilder<'a, B1>, B2, u64, u64) {
         if self.current_block.len() > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
-            let size = build_block_unchecked(&mut self.data_buf, &current_block);
+            let size = build_block_unchecked(self.record_size, &mut self.data_buf, &current_block);
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
         }
@@ -310,7 +314,7 @@ mod tests {
         vals: I,
     ) {
         let mut offsets = Vec::new();
-        build_dict_unchecked(0, &mut offsets, data_buf, vals);
+        build_dict_unchecked(None, 0, &mut offsets, data_buf, vals);
         build_offset_logarray(array_buf, offsets);
     }
 
@@ -380,7 +384,7 @@ mod tests {
 
         let logarray_builder = LateLogArrayBufBuilder::new(&mut array_buf);
 
-        let mut builder = SizedDictBufBuilder::new(0, 0, logarray_builder, data_buf);
+        let mut builder = SizedDictBufBuilder::new(None, 0, 0, logarray_builder, data_buf);
         builder.add_all(strings.clone().into_iter().map(|v| Bytes::from_static(v)));
         let (mut logarray_builder, data_buf, _, _) = builder.finalize();
         logarray_builder.pop();
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index a9877433..edd699b0 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,6 +1,7 @@
 use crate::structure::{
-    tfc::block::BLOCK_SIZE, util::calculate_width, LateLogArrayBufBuilder, LogArrayBufBuilder,
-    MonotonicLogArray,
+    tfc::block::{parse_block_control_records, BLOCK_SIZE},
+    util::calculate_width,
+    LateLogArrayBufBuilder, LogArrayBufBuilder, MonotonicLogArray,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
@@ -43,12 +44,14 @@ impl TypedDict {
         for type_offset in type_offsets.iter() {
             let last_block_len;
             if type_offset == 0 {
-                last_block_len = data[0];
+                last_block_len = parse_block_control_records(data[0]);
             } else {
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
-                last_block_len = data[last_block_offset_of_previous_type as usize];
+                last_block_len =
+                    parse_block_control_records(data[last_block_offset_of_previous_type as usize]);
             }
+            eprintln!("last_block_len: {last_block_len}");
             let gap = BLOCK_SIZE as u8 - last_block_len;
             tally += gap as u64;
             type_id_offsets.push((type_offset + 1) * 8 - tally);
@@ -279,6 +282,20 @@ impl Datatype {
 
         T::from_lexical(b)
     }
+
+    pub fn record_size(&self) -> Option<u8> {
+        match self {
+            Datatype::String => None,
+            Datatype::UInt32 => Some(4),
+            Datatype::Int32 => Some(4),
+            Datatype::UInt64 => Some(8),
+            Datatype::Int64 => Some(8),
+            Datatype::Float32 => Some(4),
+            Datatype::Float64 => Some(8),
+            Datatype::Decimal => None,
+            Datatype::BigInt => None,
+        }
+    }
 }
 
 pub trait TdbDataType {
@@ -473,13 +490,13 @@ impl TdbDataType for Decimal {
 }
 
 pub fn build_segment<B: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+    record_size: Option<u8>,
     offsets: &mut Vec<u64>,
     data_buf: &mut B,
     iter: I,
 ) {
     let slices = iter.map(|val| val.to_lexical());
-
-    build_dict_unchecked(0, offsets, data_buf, slices);
+    build_dict_unchecked(record_size, 0, offsets, data_buf, slices);
 }
 
 pub fn build_multiple_segments<
@@ -504,7 +521,13 @@ pub fn build_multiple_segments<
         let start_type_offset = offsets.len();
         types.push(key);
         type_offsets.push(start_type_offset as u64);
-        build_dict_unchecked(start_offset, &mut offsets, data_buf, group.map(|v| v.1));
+        build_dict_unchecked(
+            key.record_size(),
+            start_offset,
+            &mut offsets,
+            data_buf,
+            group.map(|v| v.1),
+        );
     }
 
     build_offset_logarray(block_offsets_buf, offsets);
@@ -547,6 +570,7 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         let type_offsets_builder = LateLogArrayBufBuilder::new(type_offsets);
         let block_offset_builder = LateLogArrayBufBuilder::new(block_offsets);
         let sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
+            None,
             0,
             0,
             block_offset_builder,
@@ -564,6 +588,9 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         if self.current_datatype == None {
             self.current_datatype = Some(dt);
             self.types_present_builder.push(dt as u64);
+            self.sized_dict_buf_builder
+                .as_mut()
+                .map(|b| b.record_size = dt.record_size());
         }
 
         if self.current_datatype != Some(dt) {
@@ -573,6 +600,7 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
             self.type_offsets_builder
                 .push(block_offset_builder.count() as u64 - 1);
             self.sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
+                dt.record_size(),
                 block_offset,
                 id_offset,
                 block_offset_builder,
@@ -618,12 +646,13 @@ mod tests {
     use super::*;
 
     fn build_segment_and_offsets<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+        dt: Datatype,
         array_buf: &mut B1,
         data_buf: &mut B2,
         iter: I,
     ) {
         let mut offsets = Vec::new();
-        build_segment(&mut offsets, data_buf, iter);
+        build_segment(dt.record_size(), &mut offsets, data_buf, iter);
         build_offset_logarray(array_buf, offsets);
     }
 
@@ -652,7 +681,12 @@ mod tests {
         let mut offsets = BytesMut::new();
         let mut data = BytesMut::new();
 
-        build_segment_and_offsets(&mut offsets, &mut data, strings.clone().into_iter());
+        build_segment_and_offsets(
+            Datatype::String,
+            &mut offsets,
+            &mut data,
+            strings.clone().into_iter(),
+        );
 
         let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
@@ -671,7 +705,12 @@ mod tests {
         let mut offsets = BytesMut::new();
         let mut data = BytesMut::new();
 
-        build_segment_and_offsets(&mut offsets, &mut data, nums.clone().into_iter());
+        build_segment_and_offsets(
+            Datatype::UInt64,
+            &mut offsets,
+            &mut data,
+            nums.clone().into_iter(),
+        );
 
         let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 

From 8ed589f12e74419051831dfe06f825d7cdf52d9b Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 11:20:20 +0100
Subject: [PATCH 43/99] Remove extraneous

---
 src/structure/tfc/block.rs | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 9b6b337f..3800b43b 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -599,18 +599,6 @@ pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
     (record_size, records)
 }
 
-// None => 0
-// Some(1) => 1
-// Some(2) => 2
-//
-// Some(4) => 3
-// Some(8) => 4
-
-// MSB = 1, fake ids block.
-//
-// id => byte = id - id_offset
-// two more bits
-
 fn record_size_decoding(enc: u8) -> Option<u8> {
     match enc >> 3 {
         0 => None,

From c209e25d4573a01a639f92b307ef553c44ed83cd Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 30 Nov 2022 12:17:54 +0100
Subject: [PATCH 44/99] refactor progress

---
 src/layer/builder.rs        | 92 ++++++++++++++++++++-----------------
 src/layer/internal/base.rs  | 65 +++++++++++++-------------
 src/layer/internal/child.rs | 65 +++++++++++++-------------
 src/layer/internal/mod.rs   | 13 +++---
 src/storage/cache.rs        |  6 +--
 src/storage/delta.rs        |  2 +-
 src/storage/file.rs         | 40 +++++++++++++++-
 src/structure/logarray.rs   | 14 +++---
 src/structure/tfc/dict.rs   | 10 ++--
 src/structure/tfc/file.rs   |  2 +-
 src/structure/tfc/typed.rs  | 26 +++++------
 11 files changed, 193 insertions(+), 142 deletions(-)

diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index 5c78c91f..822a3bc4 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -1,39 +1,41 @@
 use std::io;
 
+use bytes::{BytesMut, Bytes};
 use futures::stream::TryStreamExt;
 use rayon::prelude::*;
+use tfc::dict::SizedDictBufBuilder;
 
 use super::layer::*;
 use crate::storage::*;
 use crate::structure::util;
 use crate::structure::*;
 
-pub struct DictionarySetFileBuilder<F: 'static + FileStore> {
-    node_dictionary_builder: PfcDictFileBuilder<F::Write>,
-    predicate_dictionary_builder: PfcDictFileBuilder<F::Write>,
-    value_dictionary_builder: PfcDictFileBuilder<F::Write>,
+pub struct DictionarySetFileBuilder<F: 'static + FileLoad + FileStore> {
+    node_files: DictionaryFiles<F>,
+    predicate_files: DictionaryFiles<F>,
+    value_files: TypedDictionaryFiles<F>,
+    node_dictionary_builder: SizedDictBufBuilder<BytesMut, BytesMut>,
+    predicate_dictionary_builder: SizedDictBufBuilder<BytesMut, BytesMut>,
+    value_dictionary_builder: TypedDictBufBuilder<BytesMut, BytesMut, BytesMut, BytesMut>,
 }
 
 impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     pub async fn from_files(
         node_files: DictionaryFiles<F>,
         predicate_files: DictionaryFiles<F>,
-        value_files: DictionaryFiles<F>,
+        value_files: TypedDictionaryFiles<F>,
     ) -> io::Result<Self> {
-        let node_dictionary_builder = PfcDictFileBuilder::new(
-            node_files.blocks_file.open_write().await?,
-            node_files.offsets_file.open_write().await?,
-        );
-        let predicate_dictionary_builder = PfcDictFileBuilder::new(
-            predicate_files.blocks_file.open_write().await?,
-            predicate_files.offsets_file.open_write().await?,
-        );
-        let value_dictionary_builder = PfcDictFileBuilder::new(
-            value_files.blocks_file.open_write().await?,
-            value_files.offsets_file.open_write().await?,
-        );
+        let node_dictionary_builder = SizedDictBufBuilder::new(None, 0, 0, LateLogArrayBufBuilder::new(BytesMut::new()), BytesMut::new());
+        let predicate_dictionary_builder = SizedDictBufBuilder::new(None, 0, 0, LateLogArrayBufBuilder::new(BytesMut::new()), BytesMut::new());
+        let value_dictionary_builder = TypedDictBufBuilder::new(BytesMut::new(),
+                                                                 BytesMut::new(),
+                                                                 BytesMut::new(),
+                                                                 BytesMut::new());
 
         Ok(Self {
+            node_files,
+            predicate_files,
+            value_files,
             node_dictionary_builder,
             predicate_dictionary_builder,
             value_dictionary_builder,
@@ -43,91 +45,99 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     /// Add a node string.
     ///
     /// Panics if the given node string is not a lexical successor of the previous node string.
-    pub async fn add_node(&mut self, node: &str) -> io::Result<u64> {
-        let id = self.node_dictionary_builder.add(node).await?;
+    pub fn add_node(&mut self, node: &str) -> u64 {
+        let id = self.node_dictionary_builder.add(Bytes::copy_from_slice(node.as_bytes()));
 
-        Ok(id)
+        id
     }
 
     /// Add a predicate string.
     ///
     /// Panics if the given predicate string is not a lexical successor of the previous node string.
-    pub async fn add_predicate(&mut self, predicate: &str) -> io::Result<u64> {
-        let id = self.predicate_dictionary_builder.add(predicate).await?;
+    pub fn add_predicate(&mut self, predicate: &str) -> u64 {
+        let id = self.predicate_dictionary_builder.add(Bytes::copy_from_slice(predicate.as_bytes()));
 
-        Ok(id)
+        id
     }
 
     /// Add a value string.
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
-    pub async fn add_value(&mut self, value: &str) -> io::Result<u64> {
-        let id = self.value_dictionary_builder.add(value).await?;
+    pub fn add_value(&mut self, value: &str) -> u64 {
+        let id = self.value_dictionary_builder.add(Datatype::String,
+                                                   Bytes::copy_from_slice(value.as_bytes()));
 
-        Ok(id)
+        id
     }
 
     /// Add nodes from an iterable.
     ///
     /// Panics if the nodes are not in lexical order, or if previous added nodes are a lexical succesor of any of these nodes.
-    pub async fn add_nodes<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
+    pub fn add_nodes<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
         &mut self,
         nodes: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
     {
         let mut ids = Vec::new();
         for node in nodes {
-            let id = self.add_node(&node).await?;
+            let id = self.add_node(&node);
             ids.push(id);
         }
 
-        Ok(ids)
+        ids
     }
 
     /// Add predicates from an iterable.
     ///
     /// Panics if the predicates are not in lexical order, or if previous added predicates are a lexical succesor of any of these predicates.
-    pub async fn add_predicates<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
+    pub fn add_predicates<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
         &mut self,
         predicates: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
     {
         let mut ids = Vec::new();
         for predicate in predicates {
-            let id = self.add_predicate(&predicate).await?;
+            let id = self.add_predicate(&predicate);
             ids.push(id);
         }
 
-        Ok(ids)
+        ids
     }
 
     /// Add values from an iterable.
     ///
     /// Panics if the values are not in lexical order, or if previous added values are a lexical succesor of any of these values.
-    pub async fn add_values<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
+    pub fn add_values<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
         &mut self,
         values: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
     {
         let mut ids = Vec::new();
         for value in values {
-            let id = self.add_value(&value).await?;
+            let id = self.add_value(&value);
             ids.push(id);
         }
 
-        Ok(ids)
+        ids
     }
 
     pub async fn finalize(self) -> io::Result<()> {
-        self.node_dictionary_builder.finalize().await?;
-        self.predicate_dictionary_builder.finalize().await?;
-        self.value_dictionary_builder.finalize().await?;
+        let (node_offsets_builder, mut node_data_buf, _, _) = self.node_dictionary_builder.finalize();
+        let mut node_offsets_buf = node_offsets_builder.finalize();
+        let (predicate_offsets_builder, mut predicate_data_buf, _, _) = self.predicate_dictionary_builder.finalize();
+        let mut predicate_offsets_buf = predicate_offsets_builder.finalize();
+        let (mut value_types_present_buf, mut value_type_offsets_buf, mut value_offsets_buf, mut value_data_buf) = self.value_dictionary_builder.finalize();
+
+        self.node_files.write_all_from_bufs(&mut node_data_buf, &mut node_offsets_buf).await?;
+        self.predicate_files.write_all_from_bufs(&mut predicate_data_buf, &mut predicate_offsets_buf).await?;
+
+        self.value_files.write_all_from_bufs(&mut value_types_present_buf, &mut value_type_offsets_buf, &mut value_offsets_buf, &mut value_data_buf).await?;
 
         Ok(())
     }
diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 1a18df01..5d81317a 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -49,27 +49,28 @@ impl BaseLayer {
     }
 
     pub fn load(name: [u32; 5], maps: BaseLayerMaps) -> InternalLayer {
-        let node_dictionary = PfcDict::parse(
+        let node_dictionary = TypedDictSegment::parse(
             maps.node_dictionary_maps.blocks_map,
             maps.node_dictionary_maps.offsets_map,
-        )
-        .unwrap();
-        let predicate_dictionary = PfcDict::parse(
+            0,
+        );
+        let predicate_dictionary = TypedDictSegment::parse(
             maps.predicate_dictionary_maps.blocks_map,
             maps.predicate_dictionary_maps.offsets_map,
-        )
-        .unwrap();
-        let value_dictionary = PfcDict::parse(
+            0,
+        );
+        let value_dictionary = TypedDict::from_parts(
+            maps.value_dictionary_maps.types_present_map,
+            maps.value_dictionary_maps.type_offsets_map,
             maps.value_dictionary_maps.blocks_map,
             maps.value_dictionary_maps.offsets_map,
-        )
-        .unwrap();
+        );
 
         let node_value_idmap = match maps.id_map_maps.node_value_idmap_maps {
             None => IdMap::default(),
             Some(maps) => IdMap::from_maps(
                 maps,
-                util::calculate_width((node_dictionary.len() + value_dictionary.len()) as u64),
+                util::calculate_width((node_dictionary.num_entries() + value_dictionary.num_entries()) as u64),
             ),
         };
 
@@ -77,7 +78,7 @@ impl BaseLayer {
             None => IdMap::default(),
             Some(map) => IdMap::from_maps(
                 map,
-                util::calculate_width(predicate_dictionary.len() as u64),
+                util::calculate_width(predicate_dictionary.num_entries() as u64),
             ),
         };
 
@@ -170,76 +171,76 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
     /// Add a node string.
     ///
     /// Panics if the given node string is not a lexical successor of the previous node string.
-    pub async fn add_node(&mut self, node: &str) -> io::Result<u64> {
-        let id = self.builder.add_node(node).await?;
+    pub fn add_node(&mut self, node: &str) -> u64 {
+        let id = self.builder.add_node(node);
 
-        Ok(id)
+        id
     }
 
     /// Add a predicate string.
     ///
     /// Panics if the given predicate string is not a lexical successor of the previous node string.
-    pub async fn add_predicate(&mut self, predicate: &str) -> io::Result<u64> {
-        let id = self.builder.add_predicate(predicate).await?;
+    pub fn add_predicate(&mut self, predicate: &str) -> u64 {
+        let id = self.builder.add_predicate(predicate);
 
-        Ok(id)
+        id
     }
 
     /// Add a value string.
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
-    pub async fn add_value(&mut self, value: &str) -> io::Result<u64> {
-        let id = self.builder.add_value(value).await?;
+    pub fn add_value(&mut self, value: &str) -> u64 {
+        let id = self.builder.add_value(value);
 
-        Ok(id)
+        id
     }
 
     /// Add nodes from an iterable.
     ///
     /// Panics if the nodes are not in lexical order, or if previous added nodes are a lexical succesor of any of these nodes.
-    pub async fn add_nodes<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_nodes<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         nodes: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
         I: Unpin + Sync,
     {
-        let ids = self.builder.add_nodes(nodes).await?;
+        let ids = self.builder.add_nodes(nodes);
 
-        Ok(ids)
+        ids
     }
 
     /// Add predicates from an iterable.
     ///
     /// Panics if the predicates are not in lexical order, or if previous added predicates are a lexical succesor of any of these predicates.
-    pub async fn add_predicates<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_predicates<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         predicates: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
         I: Unpin + Sync,
     {
-        let ids = self.builder.add_predicates(predicates).await?;
+        let ids = self.builder.add_predicates(predicates);
 
-        Ok(ids)
+        ids
     }
 
     /// Add values from an iterable.
     ///
     /// Panics if the values are not in lexical order, or if previous added values are a lexical succesor of any of these values.
-    pub async fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         values: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Unpin + Send + Sync,
         I: Unpin + Sync,
     {
-        let ids = self.builder.add_values(values).await?;
+        let ids = self.builder.add_values(values);
 
-        Ok(ids)
+        ids
     }
 
     /// Turn this builder into a phase 2 builder that will take triple data.
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index c688e66b..effd0fc1 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -62,21 +62,22 @@ impl ChildLayer {
     }
 
     pub fn load(name: [u32; 5], parent: Arc<InternalLayer>, maps: ChildLayerMaps) -> InternalLayer {
-        let node_dictionary = PfcDict::parse(
+        let node_dictionary = TypedDictSegment::parse(
             maps.node_dictionary_maps.blocks_map,
             maps.node_dictionary_maps.offsets_map,
-        )
-        .unwrap();
-        let predicate_dictionary = PfcDict::parse(
+            0
+        );
+        let predicate_dictionary = TypedDictSegment::parse(
             maps.predicate_dictionary_maps.blocks_map,
             maps.predicate_dictionary_maps.offsets_map,
-        )
-        .unwrap();
-        let value_dictionary = PfcDict::parse(
+            0,
+        );
+        let value_dictionary = TypedDict::from_parts(
+            maps.value_dictionary_maps.types_present_map,
+            maps.value_dictionary_maps.type_offsets_map,
             maps.value_dictionary_maps.blocks_map,
             maps.value_dictionary_maps.offsets_map,
-        )
-        .unwrap();
+        );
 
         let parent_node_value_count = parent.node_and_value_count();
         let parent_predicate_count = parent.predicate_count();
@@ -85,7 +86,7 @@ impl ChildLayer {
             None => IdMap::default(),
             Some(maps) => IdMap::from_maps(
                 maps,
-                util::calculate_width((node_dictionary.len() + value_dictionary.len()) as u64),
+                util::calculate_width((node_dictionary.num_entries() + value_dictionary.num_entries()) as u64),
             ),
         };
 
@@ -93,7 +94,7 @@ impl ChildLayer {
             None => IdMap::default(),
             Some(map) => IdMap::from_maps(
                 map,
-                util::calculate_width(predicate_dictionary.len() as u64),
+                util::calculate_width(predicate_dictionary.num_entries() as u64),
             ),
         };
 
@@ -233,10 +234,10 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// Does nothing if the node already exists in the parent, and
     /// panics if the given node string is not a lexical successor of
     /// the previous node string.
-    pub async fn add_node(&mut self, node: &str) -> io::Result<u64> {
+    pub fn add_node(&mut self, node: &str) -> u64 {
         match self.parent.subject_id(node) {
-            None => self.builder.add_node(node).await,
-            Some(id) => Ok(id),
+            None => self.builder.add_node(node),
+            Some(id) => id,
         }
     }
 
@@ -245,10 +246,10 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// Does nothing if the predicate already exists in the paretn, and
     /// panics if the given predicate string is not a lexical successor of
     /// the previous predicate string.
-    pub async fn add_predicate(&mut self, predicate: &str) -> io::Result<u64> {
+    pub fn add_predicate(&mut self, predicate: &str) -> u64 {
         match self.parent.predicate_id(predicate) {
-            None => self.builder.add_predicate(predicate).await,
-            Some(id) => Ok(id),
+            None => self.builder.add_predicate(predicate),
+            Some(id) => id,
         }
     }
 
@@ -257,10 +258,10 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// Does nothing if the value already exists in the paretn, and
     /// panics if the given value string is not a lexical successor of
     /// the previous value string.
-    pub async fn add_value(&mut self, value: &str) -> io::Result<u64> {
+    pub fn add_value(&mut self, value: &str) -> u64 {
         match self.parent.object_value_id(value) {
-            None => self.builder.add_value(value).await,
-            Some(id) => Ok(id),
+            None => self.builder.add_value(value),
+            Some(id) => id,
         }
     }
 
@@ -270,21 +271,21 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// added nodes are a lexical succesor of any of these
     /// nodes. Skips any nodes that are already part of the base
     /// layer.
-    pub async fn add_nodes<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_nodes<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         nodes: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Send,
     {
         // TODO bulk check node existence
         let mut result = Vec::new();
         for node in nodes {
-            let id = self.add_node(&node).await?;
+            let id = self.add_node(&node);
             result.push(id);
         }
 
-        Ok(result)
+        result
     }
 
     /// Add predicates from an iterable.
@@ -293,21 +294,21 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// previous added predicates are a lexical succesor of any of
     /// these predicates. Skips any predicates that are already part
     /// of the base layer.
-    pub async fn add_predicates<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_predicates<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         predicates: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Send,
     {
         // TODO bulk check predicate existence
         let mut result = Vec::new();
         for predicate in predicates {
-            let id = self.add_predicate(&predicate).await?;
+            let id = self.add_predicate(&predicate);
             result.push(id);
         }
 
-        Ok(result)
+        result
     }
 
     /// Add values from an iterable.
@@ -316,21 +317,21 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// added values are a lexical succesor of any of these
     /// values. Skips any nodes that are already part of the base
     /// layer.
-    pub async fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
         &mut self,
         values: I,
-    ) -> io::Result<Vec<u64>>
+    ) -> Vec<u64>
     where
         <I as std::iter::IntoIterator>::IntoIter: Send,
     {
         // TODO bulk check predicate existence
         let mut result = Vec::new();
         for value in values {
-            let id = self.add_value(&value).await?;
+            let id = self.add_value(&value);
             result.push(id);
         }
 
-        Ok(result)
+        result
     }
 
     /// Turn this builder into a phase 2 builder that will take triple data.
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 169f6462..ca07b5c0 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -27,6 +27,7 @@ pub enum InternalLayer {
 }
 
 use InternalLayer::*;
+use tfc::block::IdLookupResult;
 
 impl InternalLayer {
     pub fn name(&self) -> [u32; 5] {
@@ -221,14 +222,14 @@ impl InternalLayer {
     }
 
     pub fn predicate_dict_len(&self) -> usize {
-        self.predicate_dictionary().len()
+        self.predicate_dictionary().num_entries()
     }
 
-    pub fn predicate_dict_id(&self, predicate: &str) -> Option<u64> {
+    pub fn predicate_dict_id(&self, predicate: &str) -> IdLookupResult {
         self.predicate_dictionary().id(predicate)
     }
 
-    pub fn node_dict_id(&self, subject: &str) -> Option<u64> {
+    pub fn node_dict_id(&self, subject: &str) -> IdLookupResult {
         self.node_dictionary().id(subject)
     }
 
@@ -237,15 +238,15 @@ impl InternalLayer {
     }
 
     pub fn node_dict_len(&self) -> usize {
-        self.node_dictionary().len()
+        self.node_dictionary().num_entries()
     }
 
-    pub fn value_dict_id(&self, value: &str) -> Option<u64> {
+    pub fn value_dict_id(&self, value: &str) -> IdLookupResult {
         self.value_dictionary().id(value)
     }
 
     pub fn value_dict_len(&self) -> usize {
-        self.value_dictionary().len()
+        self.value_dictionary().num_entries()
     }
 
     pub fn value_dict_get(&self, id: usize) -> Option<String> {
diff --git a/src/storage/cache.rs b/src/storage/cache.rs
index 2d00302b..abb5bca0 100644
--- a/src/storage/cache.rs
+++ b/src/storage/cache.rs
@@ -176,7 +176,7 @@ impl LayerStore for CachedLayerStore {
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
             if !layer.is_rollup() {
-                return Ok(Some(layer.node_dictionary().len() as u64));
+                return Ok(Some(layer.node_dictionary().num_entries() as u64));
             }
         }
 
@@ -188,7 +188,7 @@ impl LayerStore for CachedLayerStore {
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
             if !layer.is_rollup() {
-                return Ok(Some(layer.predicate_dictionary().len() as u64));
+                return Ok(Some(layer.predicate_dictionary().num_entries() as u64));
             }
         }
 
@@ -200,7 +200,7 @@ impl LayerStore for CachedLayerStore {
         if let Some(layer) = self.cache.get_layer_from_cache(name) {
             // unless it is a rollup
             if !layer.is_rollup() {
-                return Ok(Some(layer.value_dictionary().len() as u64));
+                return Ok(Some(layer.value_dictionary().num_entries() as u64));
             }
         }
 
diff --git a/src/storage/delta.rs b/src/storage/delta.rs
index e93e80e1..16fe6353 100644
--- a/src/storage/delta.rs
+++ b/src/storage/delta.rs
@@ -207,7 +207,7 @@ async fn dictionary_rollup_upto<S: LayerStore, F: 'static + FileLoad + FileStore
         files.predicate_dictionary_files.clone(),
     )
     .await?;
-    merge_typed_dictionaries(&value_dicts, files.value_dictionary_files.clone()).await?;
+    merge_typed_dictionaries(value_dicts.iter(), files.value_dictionary_files.clone()).await?;
 
     construct_idmaps_from_structures(
         node_dicts,
diff --git a/src/storage/file.rs b/src/storage/file.rs
index 6b04a923..7f6be220 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -2,8 +2,8 @@
 
 use std::io;
 
-use bytes::Bytes;
-use tokio::io::{AsyncRead, AsyncWrite};
+use bytes::{Bytes, Buf};
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 
 use async_trait::async_trait;
 
@@ -290,6 +290,26 @@ impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
             offsets_map,
         })
     }
+
+    pub async fn write_all_from_bufs<B1: Buf, B2: Buf, B3:Buf, B4:Buf>(&self, types_present_buf: &mut B1, type_offsets_buf: &mut B2, blocks_buf: &mut B3, offsets_buf: &mut B4) -> io::Result<()> {
+        let mut types_present_writer = self.types_present_file.open_write().await?;
+        let mut type_offsets_writer = self.type_offsets_file.open_write().await?;
+        let mut blocks_writer = self.blocks_file.open_write().await?;
+        let mut offsets_writer = self.offsets_file.open_write().await?;
+
+        types_present_writer.write_all_buf(types_present_buf).await?;
+        type_offsets_writer.write_all_buf(type_offsets_buf).await?;
+        blocks_writer.write_all_buf(blocks_buf).await?;
+        offsets_writer.write_all_buf(offsets_buf).await?;
+
+        blocks_writer.flush();
+        blocks_writer.sync_all();
+
+        offsets_writer.flush();
+        offsets_writer.sync_all();
+
+        Ok(())
+    }
 }
 
 #[derive(Clone)]
@@ -315,6 +335,22 @@ impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
             offsets_map,
         })
     }
+
+    pub async fn write_all_from_bufs<B1: Buf, B2: Buf>(&self, blocks_buf: &mut B1, offsets_buf: &mut B2) -> io::Result<()> {
+        let mut blocks_writer = self.blocks_file.open_write().await?;
+        let mut offsets_writer = self.offsets_file.open_write().await?;
+
+        blocks_writer.write_all_buf(blocks_buf).await?;
+        offsets_writer.write_all_buf(offsets_buf).await?;
+
+        blocks_writer.flush();
+        blocks_writer.sync_all();
+
+        offsets_writer.flush();
+        offsets_writer.sync_all();
+
+        Ok(())
+    }
 }
 
 #[derive(Clone)]
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 18725890..83330736 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -417,16 +417,16 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
     }
 }
 
-pub struct LateLogArrayBufBuilder<'a, B: BufMut> {
+pub struct LateLogArrayBufBuilder<B: BufMut> {
     /// Destination of the log array data
-    buf: &'a mut B,
+    buf: B,
     /// NOTE: remove pub
     pub vals: Vec<u64>,
     width: u8
 }
 
-impl<'a, B: BufMut> LateLogArrayBufBuilder<'a, B> {
-    pub fn new(buf: &'a mut B) -> Self {
+impl<B: BufMut> LateLogArrayBufBuilder<B> {
+    pub fn new(buf: B) -> Self {
         Self {
             buf,
             vals: Vec::new(),
@@ -460,10 +460,12 @@ impl<'a, B: BufMut> LateLogArrayBufBuilder<'a, B> {
         self.vals.pop()
     }
 
-    pub fn finalize(self) {
-        let mut builder = LogArrayBufBuilder::new(self.buf, self.width);
+    pub fn finalize(self) -> B {
+        let mut builder = LogArrayBufBuilder::new(&mut self.buf, self.width);
         builder.push_vec(self.vals);
         builder.finalize();
+
+        self.buf
     }
 }
 
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 63d8286b..2ae3b585 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -27,21 +27,21 @@ pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
     }
 }
 
-pub struct SizedDictBufBuilder<'a, B1: BufMut, B2: BufMut> {
+pub struct SizedDictBufBuilder<B1: BufMut, B2: BufMut> {
     pub(crate) record_size: Option<u8>,
     block_offset: u64,
     id_offset: u64,
-    offsets: LateLogArrayBufBuilder<'a, B1>,
+    offsets: LateLogArrayBufBuilder<B1>,
     data_buf: B2,
     current_block: Vec<Bytes>,
 }
 
-impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
+impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
     pub fn new(
         record_size: Option<u8>,
         block_offset: u64,
         id_offset: u64,
-        offsets: LateLogArrayBufBuilder<'a, B1>,
+        offsets: LateLogArrayBufBuilder<B1>,
         data_buf: B2,
     ) -> Self {
         Self {
@@ -85,7 +85,7 @@ impl<'a, B1: BufMut, B2: BufMut> SizedDictBufBuilder<'a, B1, B2> {
         it.map(|val| self.add(val)).collect()
     }
 
-    pub fn finalize(mut self) -> (LateLogArrayBufBuilder<'a, B1>, B2, u64, u64) {
+    pub fn finalize(mut self) -> (LateLogArrayBufBuilder<B1>, B2, u64, u64) {
         if self.current_block.len() > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
             let size = build_block_unchecked(self.record_size, &mut self.data_buf, &current_block);
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index e9b206cd..93ea3185 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -51,7 +51,7 @@ pub async fn merge_string_dictionaries<
     let mut offsets = Vec::new();
     let mut offsets_buf = BytesMut::new();
     let mut data_buf = BytesMut::new();
-    build_dict_unchecked(0, &mut offsets, &mut data_buf, sorted_iterator);
+    build_dict_unchecked(None, 0, &mut offsets, &mut data_buf, sorted_iterator);
     build_offset_logarray(&mut offsets_buf, offsets);
 
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 13949f55..20f3f926 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -567,18 +567,18 @@ pub fn build_multiple_segments<
     type_offsets_builder.finalize();
 }
 
-struct TypedDictBufBuilder<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
-    types_present_builder: LateLogArrayBufBuilder<'a, B1>,
-    type_offsets_builder: LateLogArrayBufBuilder<'a, B2>,
-    sized_dict_buf_builder: Option<SizedDictBufBuilder<'a, B3, B4>>,
+pub struct TypedDictBufBuilder<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
+    types_present_builder: LateLogArrayBufBuilder<B1>,
+    type_offsets_builder: LateLogArrayBufBuilder<B2>,
+    sized_dict_buf_builder: Option<SizedDictBufBuilder<B3, B4>>,
     current_datatype: Option<Datatype>,
 }
 
-impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a, B1, B2, B3, B4> {
+impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2, B3, B4> {
     pub fn new(
-        used_types: &'a mut B1,
-        type_offsets: &'a mut B2,
-        block_offsets: &'a mut B3,
+        used_types: B1,
+        type_offsets: B2,
+        block_offsets: B3,
         data_buf: B4,
     ) -> Self {
         let types_present_builder = LateLogArrayBufBuilder::new(used_types);
@@ -638,19 +638,19 @@ impl<'a, B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<'a,
         it.map(|(dt, val)| self.add(dt, val)).collect()
     }
 
-    pub fn finalize(self) -> B4 {
+    pub fn finalize(self) -> (B1, B2, B3, B4) {
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
         }
         let (mut block_offset_builder, data_buf, _, _) =
             self.sized_dict_buf_builder.unwrap().finalize();
         block_offset_builder.pop();
-        block_offset_builder.finalize();
+        let block_offsets_buf = block_offset_builder.finalize();
 
-        self.types_present_builder.finalize();
-        self.type_offsets_builder.finalize();
+        let types_present_buf = self.types_present_builder.finalize();
+        let type_offsets_buf = self.type_offsets_builder.finalize();
 
-        data_buf
+        (types_present_buf, type_offsets_buf, block_offsets_buf, data_buf)
     }
 }
 

From a3768a3169a69300e59cdec8bf46fccff2e65263 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 15:38:44 +0100
Subject: [PATCH 45/99] adding parse of control word logic

---
 src/structure/tfc/block.rs | 15 ++++++++++++---
 src/structure/tfc/typed.rs |  6 ++++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 3800b43b..2b7bb71a 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -613,7 +613,10 @@ fn record_size_encoding(record_size: Option<u8>) -> u8 {
         None => 0,
         Some(4) => 3 << 3,
         Some(8) => 4 << 3,
-        _ => panic!("This is really bad!"),
+        _ => {
+            dbg!(record_size);
+            panic!("This is really bad!")
+        }
     }
 }
 
@@ -929,13 +932,19 @@ mod tests {
 
     #[test]
     fn control_word_round_trip() {
+        let cw = create_block_control_word(None, 1);
+        assert_eq!(parse_block_control_word(cw), (None, 1));
+
+        let cw = create_block_control_word(None, 8);
+        assert_eq!(parse_block_control_word(cw), (None, 8));
+
         let cw = create_block_control_word(None, 3);
         assert_eq!(parse_block_control_word(cw), (None, 3));
 
         let cw = create_block_control_word(Some(8), 5);
         assert_eq!(parse_block_control_word(cw), (Some(8), 5));
 
-        let cw = create_block_control_word(Some(12), 6);
-        assert_eq!(parse_block_control_word(cw), (Some(12), 6))
+        let cw = create_block_control_word(Some(4), 6);
+        assert_eq!(parse_block_control_word(cw), (Some(4), 6))
     }
 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index edd699b0..18d09b61 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -57,8 +57,10 @@ impl TypedDict {
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
 
-        let last_gap =
-            BLOCK_SIZE - data[block_offsets.entry(block_offsets.len() - 1) as usize] as usize;
+        let last_gap = BLOCK_SIZE
+            - parse_block_control_records(
+                data[block_offsets.entry(block_offsets.len() - 1) as usize],
+            ) as usize;
         let num_entries = (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap;
 
         Self {

From 25d7d21bbbcf0f7fccd654e586cce7d713f81ea6 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 30 Nov 2022 15:40:27 +0100
Subject: [PATCH 46/99] make lexical conversion more generic

---
 src/structure/tfc/typed.rs | 357 ++++++++++++++++++++-----------------
 1 file changed, 191 insertions(+), 166 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 18d09b61..6f0affad 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -73,8 +73,8 @@ impl TypedDict {
         }
     }
 
-    pub fn id<T: TdbDataType>(&self, v: &T) -> IdLookupResult {
-        let (datatype, bytes) = v.make_entry();
+    pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
+        let (datatype, bytes) = T::make_entry(v);
 
         self.id_slice(datatype, bytes.as_ref())
     }
@@ -257,8 +257,8 @@ impl<T: TdbDataType> TypedDictSegment<T> {
         T::from_lexical(entry.into_buf())
     }
 
-    pub fn id(&self, val: &T) -> IdLookupResult {
-        let slice = val.to_lexical();
+    pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
+        let slice = T::to_lexical(val);
         self.dict.id(&slice[..])
     }
 }
@@ -302,13 +302,26 @@ impl Datatype {
 
 pub trait TdbDataType {
     fn datatype() -> Datatype;
+    fn from_lexical<B: Buf>(b: B) -> Self;
 
-    fn to_lexical(&self) -> Bytes;
+    fn to_lexical<T>(val: &T) -> Bytes
+    where T: ToLexical<Self> + ?Sized {
+        val.to_lexical()
+    }
 
-    fn from_lexical<B: Buf>(b: B) -> Self;
+    fn make_entry<T>(val: &T) -> (Datatype, Bytes)
+    where T: ToLexical<Self> + ?Sized{
+        (Self::datatype(), val.to_lexical())
+    }
+}
+
+pub trait ToLexical<T:?Sized> {
+    fn to_lexical(&self) -> Bytes;
+}
 
-    fn make_entry(&self) -> (Datatype, Bytes) {
-        (Self::datatype(), self.to_lexical())
+impl<T:AsRef<str>> ToLexical<String> for T {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(self.as_ref().as_bytes())
     }
 }
 
@@ -317,10 +330,6 @@ impl TdbDataType for String {
         Datatype::String
     }
 
-    fn to_lexical(&self) -> Bytes {
-        Bytes::copy_from_slice(self.as_bytes())
-    }
-
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         let mut vec = vec![0; b.remaining()];
         b.copy_to_slice(&mut vec);
@@ -333,16 +342,18 @@ impl TdbDataType for u32 {
         Datatype::UInt32
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u32::<BigEndian>().unwrap()
+    }
+}
+
+impl ToLexical<u32> for u32 {
     fn to_lexical(&self) -> Bytes {
         let mut buf = BytesMut::new().writer();
         buf.write_u32::<BigEndian>(*self).unwrap();
 
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        b.reader().read_u32::<BigEndian>().unwrap()
-    }
 }
 
 const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
@@ -351,17 +362,19 @@ impl TdbDataType for i32 {
         Datatype::Int32
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        (I32_BYTE_MASK ^ i) as i32
+    }
+}
+
+impl ToLexical<i32> for i32 {
     fn to_lexical(&self) -> Bytes {
         let sign_flip = I32_BYTE_MASK ^ (*self as u32);
         let mut buf = BytesMut::new().writer();
         buf.write_u32::<BigEndian>(sign_flip).unwrap();
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u32::<BigEndian>().unwrap();
-        (I32_BYTE_MASK ^ i) as i32
-    }
 }
 
 impl TdbDataType for u64 {
@@ -369,16 +382,18 @@ impl TdbDataType for u64 {
         Datatype::UInt64
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u64::<BigEndian>().unwrap()
+    }
+}
+
+impl ToLexical<u64> for u64 {
     fn to_lexical(&self) -> Bytes {
         let mut buf = BytesMut::new().writer();
         buf.write_u64::<BigEndian>(*self).unwrap();
 
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        b.reader().read_u64::<BigEndian>().unwrap()
-    }
 }
 
 const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
@@ -387,17 +402,19 @@ impl TdbDataType for i64 {
         Datatype::Int64
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        (I64_BYTE_MASK ^ i) as i64
+    }
+}
+
+impl ToLexical<i64> for i64 {
     fn to_lexical(&self) -> Bytes {
         let sign_flip = I64_BYTE_MASK ^ (*self as u64);
         let mut buf = BytesMut::new().writer();
         buf.write_u64::<BigEndian>(sign_flip).unwrap();
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u64::<BigEndian>().unwrap();
-        (I64_BYTE_MASK ^ i) as i64
-    }
 }
 
 const F32_SIGN_MASK: u32 = 0x8000_0000;
@@ -407,6 +424,17 @@ impl TdbDataType for f32 {
         Datatype::Float32
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        if i & F32_SIGN_MASK > 0 {
+            f32::from_bits(i ^ F32_SIGN_MASK)
+        } else {
+            f32::from_bits(i ^ F32_COMPLEMENT)
+        }
+    }
+}
+
+impl ToLexical<f32> for f32 {
     fn to_lexical(&self) -> Bytes {
         let f = *self;
         let g: u32;
@@ -419,15 +447,6 @@ impl TdbDataType for f32 {
         buf.write_u32::<BigEndian>(g).unwrap();
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u32::<BigEndian>().unwrap();
-        if i & F32_SIGN_MASK > 0 {
-            f32::from_bits(i ^ F32_SIGN_MASK)
-        } else {
-            f32::from_bits(i ^ F32_COMPLEMENT)
-        }
-    }
 }
 
 const F64_SIGN_MASK: u64 = 0x8000_0000_0000_0000;
@@ -437,6 +456,17 @@ impl TdbDataType for f64 {
         Datatype::Float64
     }
 
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        if i & F64_SIGN_MASK > 0 {
+            f64::from_bits(i ^ F64_SIGN_MASK)
+        } else {
+            f64::from_bits(i ^ F64_COMPLEMENT)
+        }
+    }
+}
+
+impl ToLexical<f64> for f64 {
     fn to_lexical(&self) -> Bytes {
         let f = *self;
         let g: u64;
@@ -449,15 +479,6 @@ impl TdbDataType for f64 {
         buf.write_u64::<BigEndian>(g).unwrap();
         buf.into_inner().freeze()
     }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u64::<BigEndian>().unwrap();
-        if i & F64_SIGN_MASK > 0 {
-            f64::from_bits(i ^ F64_SIGN_MASK)
-        } else {
-            f64::from_bits(i ^ F64_COMPLEMENT)
-        }
-    }
 }
 
 impl TdbDataType for Integer {
@@ -465,15 +486,17 @@ impl TdbDataType for Integer {
         Datatype::BigInt
     }
 
-    fn to_lexical(&self) -> Bytes {
-        Bytes::from(bigint_to_storage(self.clone()))
-    }
-
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         storage_to_bigint(&mut b)
     }
 }
 
+impl ToLexical<Integer> for Integer {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(bigint_to_storage(self.clone()))
+    }
+}
+
 #[derive(PartialEq, Debug)]
 pub struct Decimal(String);
 
@@ -482,16 +505,18 @@ impl TdbDataType for Decimal {
         Datatype::Decimal
     }
 
-    fn to_lexical(&self) -> Bytes {
-        Bytes::from(decimal_to_storage(&self.0))
-    }
-
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         Decimal(storage_to_decimal(&mut b))
     }
 }
 
-pub fn build_segment<B: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+impl ToLexical<Decimal> for Decimal {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(decimal_to_storage(&self.0))
+    }
+}
+
+pub fn build_segment<B: BufMut, T: TdbDataType, Q: ToLexical<T>, I: Iterator<Item = Q>>(
     record_size: Option<u8>,
     offsets: &mut Vec<u64>,
     data_buf: &mut B,
@@ -647,7 +672,7 @@ mod tests {
 
     use super::*;
 
-    fn build_segment_and_offsets<B1: BufMut, B2: BufMut, T: TdbDataType, I: Iterator<Item = T>>(
+    fn build_segment_and_offsets<B1: BufMut, B2: BufMut, T: TdbDataType, Q: ToLexical<T>, I: Iterator<Item = Q>>(
         dt: Datatype,
         array_buf: &mut B1,
         data_buf: &mut B2,
@@ -726,9 +751,9 @@ mod tests {
 
     fn cycle<D>(d: D)
     where
-        D: TdbDataType + PartialEq + Debug,
+        D: TdbDataType + PartialEq + Debug + ToLexical<D>,
     {
-        let j = D::from_lexical(d.to_lexical());
+        let j = D::from_lexical(<D as TdbDataType>::to_lexical(&d));
         assert_eq!(d, j)
     }
 
@@ -817,19 +842,19 @@ mod tests {
     #[test]
     fn test_multi_segment() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
-            Decimal("-1".to_string()).make_entry(),
-            "asdf".to_string().make_entry(),
-            Decimal("-12342343.2348973".to_string()).make_entry(),
-            "Batty".to_string().make_entry(),
-            "Batman".to_string().make_entry(),
-            (-3_i64).make_entry(),
-            Decimal("2348973".to_string()).make_entry(),
-            4.389832_f32.make_entry(),
-            "apple".to_string().make_entry(),
-            23434.389832_f32.make_entry(),
-            "apply".to_string().make_entry(),
-            (-500_i32).make_entry(),
-            20_u32.make_entry(),
+            Decimal::make_entry(&Decimal("-1".to_string())),
+            String::make_entry(&"asdf"),
+            Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            i64::make_entry(&-3_i64),
+            Decimal::make_entry(&Decimal("2348973".to_string())),
+            f32::make_entry(&4.389832_f32),
+            String::make_entry(&"apple"),
+            f32::make_entry(&23434.389832_f32),
+            String::make_entry(&"apply"),
+            i32::make_entry(&-500_i32),
+            u32::make_entry(&20_u32),
         ];
         vec.sort();
         let mut used_types = BytesMut::new();
@@ -869,37 +894,37 @@ mod tests {
     #[test]
     fn test_full_blocks() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
-            "fdsa".to_string().make_entry(),
-            "a".to_string().make_entry(),
-            "bc".to_string().make_entry(),
-            "bcd".to_string().make_entry(),
-            "z".to_string().make_entry(),
-            "Batty".to_string().make_entry(),
-            "Batman".to_string().make_entry(),
-            "apple".to_string().make_entry(),
-            (-500_i32).make_entry(),
-            20_u32.make_entry(),
-            22_u32.make_entry(),
-            23_u32.make_entry(),
-            24_u32.make_entry(),
-            25_u32.make_entry(),
-            26_u32.make_entry(),
-            27_u32.make_entry(),
-            28_u32.make_entry(),
-            3000_u32.make_entry(),
-            (-3_i64).make_entry(),
-            Decimal("-12342343.2348973".to_string()).make_entry(),
-            Decimal("234.8973".to_string()).make_entry(),
-            Decimal("0.2348973".to_string()).make_entry(),
-            Decimal("23423423.8973".to_string()).make_entry(),
-            Decimal("3.3".to_string()).make_entry(),
-            Decimal("0.001".to_string()).make_entry(),
-            Decimal("-0.001".to_string()).make_entry(),
-            Decimal("2".to_string()).make_entry(),
-            Decimal("0".to_string()).make_entry(),
-            4.389832_f32.make_entry(),
-            23434.389832_f32.make_entry(),
-            int("239487329872343987").make_entry(),
+            String::make_entry(&"fdsa"),
+            String::make_entry(&"a"),
+            String::make_entry(&"bc"),
+            String::make_entry(&"bcd"),
+            String::make_entry(&"z"),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            String::make_entry(&"apple"),
+            i32::make_entry(&-500_i32),
+            u32::make_entry(&20_u32),
+            u32::make_entry(&22_u32),
+            u32::make_entry(&23_u32),
+            u32::make_entry(&24_u32),
+            u32::make_entry(&25_u32),
+            u32::make_entry(&26_u32),
+            u32::make_entry(&27_u32),
+            u32::make_entry(&28_u32),
+            u32::make_entry(&3000_u32),
+            i64::make_entry(&-3_i64),
+            Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
+            Decimal::make_entry(&Decimal("234.8973".to_string())),
+            Decimal::make_entry(&Decimal("0.2348973".to_string())),
+            Decimal::make_entry(&Decimal("23423423.8973".to_string())),
+            Decimal::make_entry(&Decimal("3.3".to_string())),
+            Decimal::make_entry(&Decimal("0.001".to_string())),
+            Decimal::make_entry(&Decimal("-0.001".to_string())),
+            Decimal::make_entry(&Decimal("2".to_string())),
+            Decimal::make_entry(&Decimal("0".to_string())),
+            f32::make_entry(&4.389832_f32),
+            f32::make_entry(&23434.389832_f32),
+            Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
         let mut used_types = BytesMut::new();
@@ -967,37 +992,37 @@ mod tests {
     #[test]
     fn iterate_full_blocks() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
-            "fdsa".to_string().make_entry(),
-            "a".to_string().make_entry(),
-            "bc".to_string().make_entry(),
-            "bcd".to_string().make_entry(),
-            "z".to_string().make_entry(),
-            "Batty".to_string().make_entry(),
-            "Batman".to_string().make_entry(),
-            "apple".to_string().make_entry(),
-            (-500_i32).make_entry(),
-            20_u32.make_entry(),
-            22_u32.make_entry(),
-            23_u32.make_entry(),
-            24_u32.make_entry(),
-            25_u32.make_entry(),
-            26_u32.make_entry(),
-            27_u32.make_entry(),
-            28_u32.make_entry(),
-            3000_u32.make_entry(),
-            (-3_i64).make_entry(),
-            Decimal("-12342343.2348973".to_string()).make_entry(),
-            Decimal("234.8973".to_string()).make_entry(),
-            Decimal("0.2348973".to_string()).make_entry(),
-            Decimal("23423423.8973".to_string()).make_entry(),
-            Decimal("3.3".to_string()).make_entry(),
-            Decimal("0.001".to_string()).make_entry(),
-            Decimal("-0.001".to_string()).make_entry(),
-            Decimal("2".to_string()).make_entry(),
-            Decimal("0".to_string()).make_entry(),
-            4.389832_f32.make_entry(),
-            23434.389832_f32.make_entry(),
-            int("239487329872343987").make_entry(),
+            String::make_entry(&"fdsa"),
+            String::make_entry(&"a"),
+            String::make_entry(&"bc"),
+            String::make_entry(&"bcd"),
+            String::make_entry(&"z"),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            String::make_entry(&"apple"),
+            i32::make_entry(&-500_i32),
+            u32::make_entry(&20_u32),
+            u32::make_entry(&22_u32),
+            u32::make_entry(&23_u32),
+            u32::make_entry(&24_u32),
+            u32::make_entry(&25_u32),
+            u32::make_entry(&26_u32),
+            u32::make_entry(&27_u32),
+            u32::make_entry(&28_u32),
+            u32::make_entry(&3000_u32),
+            i64::make_entry(&-3_i64),
+            Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
+            Decimal::make_entry(&Decimal("234.8973".to_string())),
+            Decimal::make_entry(&Decimal("0.2348973".to_string())),
+            Decimal::make_entry(&Decimal("23423423.8973".to_string())),
+            Decimal::make_entry(&Decimal("3.3".to_string())),
+            Decimal::make_entry(&Decimal("0.001".to_string())),
+            Decimal::make_entry(&Decimal("-0.001".to_string())),
+            Decimal::make_entry(&Decimal("2".to_string())),
+            Decimal::make_entry(&Decimal("0".to_string())),
+            f32::make_entry(&4.389832_f32),
+            f32::make_entry(&23434.389832_f32),
+            Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
         let mut used_types = BytesMut::new();
@@ -1031,37 +1056,37 @@ mod tests {
     #[test]
     fn test_incremental_builder() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
-            "fdsa".to_string().make_entry(),
-            "a".to_string().make_entry(),
-            "bc".to_string().make_entry(),
-            "bcd".to_string().make_entry(),
-            "z".to_string().make_entry(),
-            "Batty".to_string().make_entry(),
-            "Batman".to_string().make_entry(),
-            "apple".to_string().make_entry(),
-            (-500_i32).make_entry(),
-            20_u32.make_entry(),
-            22_u32.make_entry(),
-            23_u32.make_entry(),
-            24_u32.make_entry(),
-            25_u32.make_entry(),
-            26_u32.make_entry(),
-            27_u32.make_entry(),
-            28_u32.make_entry(),
-            3000_u32.make_entry(),
-            (-3_i64).make_entry(),
-            Decimal("-12342343.2348973".to_string()).make_entry(),
-            Decimal("234.8973".to_string()).make_entry(),
-            Decimal("0.2348973".to_string()).make_entry(),
-            Decimal("23423423.8973".to_string()).make_entry(),
-            Decimal("3.3".to_string()).make_entry(),
-            Decimal("0.001".to_string()).make_entry(),
-            Decimal("-0.001".to_string()).make_entry(),
-            Decimal("2".to_string()).make_entry(),
-            Decimal("0".to_string()).make_entry(),
-            4.389832_f32.make_entry(),
-            23434.389832_f32.make_entry(),
-            int("239487329872343987").make_entry(),
+            String::make_entry(&"fdsa"),
+            String::make_entry(&"a"),
+            String::make_entry(&"bc"),
+            String::make_entry(&"bcd"),
+            String::make_entry(&"z"),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            String::make_entry(&"apple"),
+            i32::make_entry(&-500_i32),
+            u32::make_entry(&20_u32),
+            u32::make_entry(&22_u32),
+            u32::make_entry(&23_u32),
+            u32::make_entry(&24_u32),
+            u32::make_entry(&25_u32),
+            u32::make_entry(&26_u32),
+            u32::make_entry(&27_u32),
+            u32::make_entry(&28_u32),
+            u32::make_entry(&3000_u32),
+            i64::make_entry(&-3_i64),
+            Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
+            Decimal::make_entry(&Decimal("234.8973".to_string())),
+            Decimal::make_entry(&Decimal("0.2348973".to_string())),
+            Decimal::make_entry(&Decimal("23423423.8973".to_string())),
+            Decimal::make_entry(&Decimal("3.3".to_string())),
+            Decimal::make_entry(&Decimal("0.001".to_string())),
+            Decimal::make_entry(&Decimal("-0.001".to_string())),
+            Decimal::make_entry(&Decimal("2".to_string())),
+            Decimal::make_entry(&Decimal("0".to_string())),
+            f32::make_entry(&4.389832_f32),
+            f32::make_entry(&23434.389832_f32),
+            Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
 

From 46d62b170df64c2d2cda41e4fc51c056bb22f3d2 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 30 Nov 2022 16:03:28 +0100
Subject: [PATCH 47/99] back to something that compiles

---
 src/layer/internal/mod.rs   |  19 +++--
 src/layer/simple_builder.rs |  14 ++--
 src/storage/file.rs         |   2 +-
 src/storage/layer.rs        | 152 +++++++++++++++++++-----------------
 src/structure/logarray.rs   |   2 +-
 src/structure/tfc/block.rs  |  15 ++++
 src/structure/tfc/dict.rs   |   7 +-
 src/structure/tfc/typed.rs  |  19 +++--
 8 files changed, 131 insertions(+), 99 deletions(-)

diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index ca07b5c0..52aab0a9 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -226,11 +226,11 @@ impl InternalLayer {
     }
 
     pub fn predicate_dict_id(&self, predicate: &str) -> IdLookupResult {
-        self.predicate_dictionary().id(predicate)
+        self.predicate_dictionary().id(&predicate)
     }
 
     pub fn node_dict_id(&self, subject: &str) -> IdLookupResult {
-        self.node_dictionary().id(subject)
+        self.node_dictionary().id(&subject)
     }
 
     pub fn node_dict_get(&self, id: usize) -> Option<String> {
@@ -242,7 +242,7 @@ impl InternalLayer {
     }
 
     pub fn value_dict_id(&self, value: &str) -> IdLookupResult {
-        self.value_dictionary().id(value)
+        self.value_dictionary().id(&value)
     }
 
     pub fn value_dict_len(&self) -> usize {
@@ -542,12 +542,12 @@ impl Layer for InternalLayer {
 
     fn node_and_value_count(&self) -> usize {
         self.parent_node_value_count()
-            + self.node_dictionary().len()
-            + self.value_dictionary().len()
+            + self.node_dictionary().num_entries()
+            + self.value_dictionary().num_entries()
     }
 
     fn predicate_count(&self) -> usize {
-        self.parent_predicate_count() + self.predicate_dictionary().len()
+        self.parent_predicate_count() + self.predicate_dictionary().num_entries()
     }
 
     fn subject_id<'a>(&'a self, subject: &str) -> Option<u64> {
@@ -555,6 +555,7 @@ impl Layer for InternalLayer {
             (
                 layer
                     .node_dict_id(subject)
+                    .into_option()
                     .map(|id| layer.node_value_id_map().inner_to_outer(id)),
                 layer.immediate_parent(),
             )
@@ -572,6 +573,7 @@ impl Layer for InternalLayer {
             (
                 layer
                     .predicate_dict_id(predicate)
+                    .into_option()
                     .map(|id| layer.predicate_id_map().inner_to_outer(id)),
                 layer.immediate_parent(),
             )
@@ -589,6 +591,7 @@ impl Layer for InternalLayer {
             (
                 layer
                     .node_dict_id(object)
+                    .into_option()
                     .map(|id| layer.node_value_id_map().inner_to_outer(id)),
                 layer.immediate_parent(),
             )
@@ -604,7 +607,9 @@ impl Layer for InternalLayer {
     fn object_value_id<'a>(&'a self, object: &str) -> Option<u64> {
         let to_result = |layer: &'a InternalLayer| {
             (
-                layer.value_dict_id(object).map(|i| {
+                layer.value_dict_id(object)
+                    .into_option()
+                    .map(|i| {
                     layer
                         .node_value_id_map()
                         .inner_to_outer(i + layer.node_dict_len() as u64)
diff --git a/src/layer/simple_builder.rs b/src/layer/simple_builder.rs
index 8a9672ad..8eaaf4fb 100644
--- a/src/layer/simple_builder.rs
+++ b/src/layer/simple_builder.rs
@@ -193,11 +193,10 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                     let mut builder =
                         ChildLayerFileBuilder::from_files(parent.clone(), &files).await?;
 
-                    let node_ids = builder.add_nodes(unresolved_nodes.clone()).await?;
+                    let node_ids = builder.add_nodes(unresolved_nodes.clone());
                     let predicate_ids = builder
-                        .add_predicates(unresolved_predicates.clone())
-                        .await?;
-                    let value_ids = builder.add_values(unresolved_values.clone()).await?;
+                        .add_predicates(unresolved_predicates.clone());
+                    let value_ids = builder.add_values(unresolved_values.clone());
 
                     let mut builder = builder.into_phase2().await?;
 
@@ -240,11 +239,10 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                     let files = files.into_base();
                     let mut builder = BaseLayerFileBuilder::from_files(&files).await?;
 
-                    let node_ids = builder.add_nodes(unresolved_nodes.clone()).await?;
+                    let node_ids = builder.add_nodes(unresolved_nodes.clone());
                     let predicate_ids = builder
-                        .add_predicates(unresolved_predicates.clone())
-                        .await?;
-                    let value_ids = builder.add_values(unresolved_values.clone()).await?;
+                        .add_predicates(unresolved_predicates.clone());
+                    let value_ids = builder.add_values(unresolved_values.clone());
 
                     let mut builder = builder.into_phase2().await?;
 
diff --git a/src/storage/file.rs b/src/storage/file.rs
index 7f6be220..02ce1104 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -65,7 +65,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerFiles<F> {
         }
     }
 
-    pub fn value_dictionary_files(&self) -> &DictionaryFiles<F> {
+    pub fn value_dictionary_files(&self) -> &TypedDictionaryFiles<F> {
         match self {
             Self::Base(b) => &b.value_dictionary_files,
             Self::Child(c) => &c.value_dictionary_files,
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index bd17c109..29af5973 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -365,6 +365,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             FILENAMES.node_dictionary_offsets,
             FILENAMES.predicate_dictionary_blocks,
             FILENAMES.predicate_dictionary_offsets,
+            FILENAMES.value_dictionary_types_present,
+            FILENAMES.value_dictionary_type_offsets,
             FILENAMES.value_dictionary_blocks,
             FILENAMES.value_dictionary_offsets,
             FILENAMES.node_value_idmap_bits,
@@ -407,55 +409,57 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
                 blocks_file: files[2].clone(),
                 offsets_file: files[3].clone(),
             },
-            value_dictionary_files: DictionaryFiles {
-                blocks_file: files[4].clone(),
-                offsets_file: files[5].clone(),
+            value_dictionary_files: TypedDictionaryFiles {
+                types_present_file: files[4].clone(),
+                type_offsets_file: files[5].clone(),
+                blocks_file: files[6].clone(),
+                offsets_file: files[7].clone(),
             },
 
             id_map_files: IdMapFiles {
                 node_value_idmap_files: BitIndexFiles {
-                    bits_file: files[6].clone(),
-                    blocks_file: files[7].clone(),
-                    sblocks_file: files[8].clone(),
+                    bits_file: files[8].clone(),
+                    blocks_file: files[9].clone(),
+                    sblocks_file: files[10].clone(),
                 },
                 predicate_idmap_files: BitIndexFiles {
-                    bits_file: files[9].clone(),
-                    blocks_file: files[10].clone(),
-                    sblocks_file: files[11].clone(),
+                    bits_file: files[11].clone(),
+                    blocks_file: files[12].clone(),
+                    sblocks_file: files[13].clone(),
                 },
             },
 
-            subjects_file: files[12].clone(),
-            objects_file: files[13].clone(),
+            subjects_file: files[14].clone(),
+            objects_file: files[15].clone(),
 
             s_p_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[14].clone(),
-                    blocks_file: files[15].clone(),
-                    sblocks_file: files[16].clone(),
+                    bits_file: files[16].clone(),
+                    blocks_file: files[17].clone(),
+                    sblocks_file: files[18].clone(),
                 },
-                nums_file: files[17].clone(),
+                nums_file: files[19].clone(),
             },
             sp_o_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[18].clone(),
-                    blocks_file: files[19].clone(),
-                    sblocks_file: files[20].clone(),
+                    bits_file: files[20].clone(),
+                    blocks_file: files[21].clone(),
+                    sblocks_file: files[22].clone(),
                 },
-                nums_file: files[21].clone(),
+                nums_file: files[23].clone(),
             },
             o_ps_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[22].clone(),
-                    blocks_file: files[23].clone(),
-                    sblocks_file: files[24].clone(),
+                    bits_file: files[24].clone(),
+                    blocks_file: files[25].clone(),
+                    sblocks_file: files[26].clone(),
                 },
-                nums_file: files[25].clone(),
+                nums_file: files[27].clone(),
             },
             predicate_wavelet_tree_files: BitIndexFiles {
-                bits_file: files[26].clone(),
-                blocks_file: files[27].clone(),
-                sblocks_file: files[28].clone(),
+                bits_file: files[28].clone(),
+                blocks_file: files[29].clone(),
+                sblocks_file: files[30].clone(),
             },
         })
     }
@@ -466,6 +470,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             FILENAMES.node_dictionary_offsets,
             FILENAMES.predicate_dictionary_blocks,
             FILENAMES.predicate_dictionary_offsets,
+            FILENAMES.value_dictionary_types_present,
+            FILENAMES.value_dictionary_type_offsets,
             FILENAMES.value_dictionary_blocks,
             FILENAMES.value_dictionary_offsets,
             FILENAMES.node_value_idmap_bits,
@@ -524,86 +530,88 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
                 blocks_file: files[2].clone(),
                 offsets_file: files[3].clone(),
             },
-            value_dictionary_files: DictionaryFiles {
-                blocks_file: files[4].clone(),
-                offsets_file: files[5].clone(),
+            value_dictionary_files: TypedDictionaryFiles {
+                types_present_file: files[4].clone(),
+                type_offsets_file: files[5].clone(),
+                blocks_file: files[6].clone(),
+                offsets_file: files[7].clone(),
             },
 
             id_map_files: IdMapFiles {
                 node_value_idmap_files: BitIndexFiles {
-                    bits_file: files[6].clone(),
-                    blocks_file: files[7].clone(),
-                    sblocks_file: files[8].clone(),
+                    bits_file: files[8].clone(),
+                    blocks_file: files[9].clone(),
+                    sblocks_file: files[10].clone(),
                 },
                 predicate_idmap_files: BitIndexFiles {
-                    bits_file: files[9].clone(),
-                    blocks_file: files[10].clone(),
-                    sblocks_file: files[11].clone(),
+                    bits_file: files[11].clone(),
+                    blocks_file: files[12].clone(),
+                    sblocks_file: files[13].clone(),
                 },
             },
 
-            pos_subjects_file: files[12].clone(),
-            pos_objects_file: files[13].clone(),
-            neg_subjects_file: files[14].clone(),
-            neg_objects_file: files[15].clone(),
+            pos_subjects_file: files[14].clone(),
+            pos_objects_file: files[15].clone(),
+            neg_subjects_file: files[16].clone(),
+            neg_objects_file: files[17].clone(),
 
             pos_s_p_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[16].clone(),
-                    blocks_file: files[17].clone(),
-                    sblocks_file: files[18].clone(),
+                    bits_file: files[18].clone(),
+                    blocks_file: files[19].clone(),
+                    sblocks_file: files[20].clone(),
                 },
-                nums_file: files[19].clone(),
+                nums_file: files[21].clone(),
             },
             pos_sp_o_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[20].clone(),
-                    blocks_file: files[21].clone(),
-                    sblocks_file: files[22].clone(),
+                    bits_file: files[22].clone(),
+                    blocks_file: files[23].clone(),
+                    sblocks_file: files[24].clone(),
                 },
-                nums_file: files[23].clone(),
+                nums_file: files[25].clone(),
             },
             pos_o_ps_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[24].clone(),
-                    blocks_file: files[25].clone(),
-                    sblocks_file: files[26].clone(),
+                    bits_file: files[26].clone(),
+                    blocks_file: files[27].clone(),
+                    sblocks_file: files[28].clone(),
                 },
-                nums_file: files[27].clone(),
+                nums_file: files[29].clone(),
             },
             neg_s_p_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[28].clone(),
-                    blocks_file: files[29].clone(),
-                    sblocks_file: files[30].clone(),
+                    bits_file: files[30].clone(),
+                    blocks_file: files[31].clone(),
+                    sblocks_file: files[32].clone(),
                 },
-                nums_file: files[31].clone(),
+                nums_file: files[33].clone(),
             },
             neg_sp_o_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[32].clone(),
-                    blocks_file: files[33].clone(),
-                    sblocks_file: files[34].clone(),
+                    bits_file: files[34].clone(),
+                    blocks_file: files[35].clone(),
+                    sblocks_file: files[36].clone(),
                 },
-                nums_file: files[35].clone(),
+                nums_file: files[37].clone(),
             },
             neg_o_ps_adjacency_list_files: AdjacencyListFiles {
                 bitindex_files: BitIndexFiles {
-                    bits_file: files[36].clone(),
-                    blocks_file: files[37].clone(),
-                    sblocks_file: files[38].clone(),
+                    bits_file: files[38].clone(),
+                    blocks_file: files[39].clone(),
+                    sblocks_file: files[40].clone(),
                 },
-                nums_file: files[39].clone(),
+                nums_file: files[41].clone(),
             },
             pos_predicate_wavelet_tree_files: BitIndexFiles {
-                bits_file: files[40].clone(),
-                blocks_file: files[41].clone(),
-                sblocks_file: files[42].clone(),
+                bits_file: files[42].clone(),
+                blocks_file: files[43].clone(),
+                sblocks_file: files[44].clone(),
             },
             neg_predicate_wavelet_tree_files: BitIndexFiles {
-                bits_file: files[43].clone(),
-                blocks_file: files[44].clone(),
-                sblocks_file: files[45].clone(),
+                bits_file: files[45].clone(),
+                blocks_file: files[46].clone(),
+                sblocks_file: files[47].clone(),
             },
         })
     }
@@ -1561,7 +1569,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.node_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map, 0)))
         } else {
             Ok(None)
         }
@@ -1572,7 +1580,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.predicate_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map, 0)))
         } else {
             Ok(None)
         }
@@ -1583,7 +1591,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.value_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(TypedDict::from_parts(maps.blocks_map, maps.offsets_map)?))
+            Ok(Some(TypedDict::from_parts(maps.types_present_map, maps.type_offsets_map, maps.blocks_map, maps.offsets_map)))
         } else {
             Ok(None)
         }
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 83330736..d9ba9ca9 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -460,7 +460,7 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
         self.vals.pop()
     }
 
-    pub fn finalize(self) -> B {
+    pub fn finalize(mut self) -> B {
         let mut builder = LogArrayBufBuilder::new(&mut self.buf, self.width);
         builder.push_vec(self.vals);
         builder.finalize();
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index a45365a2..07077ac4 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -587,6 +587,21 @@ impl IdLookupResult {
             _ => self,
         }
     }
+
+    pub fn map<F: Fn(u64)->u64>(self, f: F) -> Self {
+        match self {
+            Self::Found(i) => Self::Found(f(i)),
+            Self::Closest(i) => Self::Closest(f(i)),
+            Self::NotFound => Self::NotFound
+        }
+    }
+
+    pub fn into_option(self) -> Option<u64> {
+        match self {
+            Self::Found(i) => Some(i),
+            _ => None
+        }
+    }
 }
 
 pub fn parse_block_control_records(cw: u8) -> u8 {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 2ae3b585..9e2707ff 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -174,9 +174,12 @@ impl SizedDict {
         self.offsets.len() + 1
     }
 
-    pub fn entry(&self, index: u64) -> SizedDictEntry {
+    pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
+        if index > self.num_entries() {
+            return None;
+        }
         let block = self.block(((index - 1) / 8) as usize);
-        block.entry(((index - 1) % 8) as usize)
+        Some(block.entry(((index - 1) % 8) as usize))
     }
 
     pub fn id(&self, slice: &[u8]) -> IdLookupResult {
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index a9b9fc8c..1bdbdc58 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -79,9 +79,9 @@ impl TypedDict {
         self.id_slice(datatype, bytes.as_ref())
     }
 
-    pub fn get<T: TdbDataType>(&self, id: u64) -> T {
-        let (datatype, slice) = self.entry(id);
-        datatype.cast(slice.into_buf())
+    pub fn get<T: TdbDataType>(&self, id: usize) -> Option<T> {
+        let result = self.entry(id);
+        result.map(|(datatype, slice)| datatype.cast(slice.into_buf()))
     }
 
     fn inner_type_segment(&self, i: usize) -> (SizedDict, u64) {
@@ -159,12 +159,15 @@ impl TypedDict {
         FromPrimitive::from_u64(self.types_present.entry(type_index)).unwrap()
     }
 
-    pub fn entry(&self, id: u64) -> (Datatype, SizedDictEntry) {
-        let type_index = self.type_index_for_id(id);
+    pub fn entry(&self, id: usize) -> Option<(Datatype, SizedDictEntry)> {
+        if id > self.num_entries() {
+            return None;
+        }
+        let type_index = self.type_index_for_id(id as u64);
 
         let (dict, offset) = self.inner_type_segment(type_index);
         let dt = self.type_for_type_index(type_index);
-        (dt, dict.entry(id - offset))
+        dict.entry(id - offset as usize).map(|e| (dt, e))
     }
 
     pub fn num_entries(&self) -> usize {
@@ -253,9 +256,9 @@ impl<T: TdbDataType> TypedDictSegment<T> {
         }
     }
 
-    pub fn get(&self, index: u64) -> T {
+    pub fn get(&self, index: usize) -> Option<T> {
         let entry = self.dict.entry(index);
-        T::from_lexical(entry.into_buf())
+        entry.map(|e|T::from_lexical(e.into_buf()))
     }
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {

From a09a67e856aa3d05d3fed1b3f00f631336cdc45d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 16:51:48 +0100
Subject: [PATCH 48/99] Builds

---
 src/layer/internal/base.rs             | 16 ++---
 src/layer/internal/child.rs            | 18 +++---
 src/layer/internal/mod.rs              | 21 ++----
 src/layer/internal/object_iterator.rs  | 15 +----
 src/layer/internal/subject_iterator.rs | 60 ++++--------------
 src/storage/memory.rs                  |  8 ++-
 src/structure/tfc/dict.rs              | 10 +--
 src/structure/tfc/typed.rs             | 88 +++++++++++++++-----------
 8 files changed, 100 insertions(+), 136 deletions(-)

diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 5d81317a..14bea2cc 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -70,7 +70,9 @@ impl BaseLayer {
             None => IdMap::default(),
             Some(maps) => IdMap::from_maps(
                 maps,
-                util::calculate_width((node_dictionary.num_entries() + value_dictionary.num_entries()) as u64),
+                util::calculate_width(
+                    (node_dictionary.num_entries() + value_dictionary.num_entries()) as u64,
+                ),
             ),
         };
 
@@ -452,15 +454,9 @@ pub mod tests {
 
         let mut builder = BaseLayerFileBuilder::from_files(&base_layer_files).await?;
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await?;
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await?;
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await?;
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
 
         let mut builder = builder.into_phase2().await?;
 
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index effd0fc1..978fe965 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -65,7 +65,7 @@ impl ChildLayer {
         let node_dictionary = TypedDictSegment::parse(
             maps.node_dictionary_maps.blocks_map,
             maps.node_dictionary_maps.offsets_map,
-            0
+            0,
         );
         let predicate_dictionary = TypedDictSegment::parse(
             maps.predicate_dictionary_maps.blocks_map,
@@ -86,7 +86,9 @@ impl ChildLayer {
             None => IdMap::default(),
             Some(maps) => IdMap::from_maps(
                 maps,
-                util::calculate_width((node_dictionary.num_entries() + value_dictionary.num_entries()) as u64),
+                util::calculate_width(
+                    (node_dictionary.num_entries() + value_dictionary.num_entries()) as u64,
+                ),
             ),
         };
 
@@ -944,9 +946,9 @@ pub mod tests {
         let mut b = ChildLayerFileBuilder::from_files(parent.clone(), &child_files)
             .await
             .unwrap();
-        b.add_node("foo").await.unwrap();
-        b.add_predicate("bar").await.unwrap();
-        b.add_value("baz").await.unwrap();
+        b.add_node("foo");
+        b.add_predicate("bar");
+        b.add_value("baz");
 
         let b = b.into_phase2().await.unwrap();
         b.finalize().await.unwrap();
@@ -982,9 +984,9 @@ pub mod tests {
         let mut b = ChildLayerFileBuilder::from_files(parent.clone(), &child_files)
             .await
             .unwrap();
-        b.add_node("foo").await.unwrap();
-        b.add_predicate("bar").await.unwrap();
-        b.add_value("baz").await.unwrap();
+        b.add_node("foo");
+        b.add_predicate("bar");
+        b.add_value("baz");
         let b = b.into_phase2().await.unwrap();
 
         b.finalize().await.unwrap();
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 52aab0a9..650de6a2 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -26,8 +26,8 @@ pub enum InternalLayer {
     Rollup(RollupLayer),
 }
 
-use InternalLayer::*;
 use tfc::block::IdLookupResult;
+use InternalLayer::*;
 
 impl InternalLayer {
     pub fn name(&self) -> [u32; 5] {
@@ -607,9 +607,7 @@ impl Layer for InternalLayer {
     fn object_value_id<'a>(&'a self, object: &str) -> Option<u64> {
         let to_result = |layer: &'a InternalLayer| {
             (
-                layer.value_dict_id(object)
-                    .into_option()
-                    .map(|i| {
+                layer.value_dict_id(object).into_option().map(|i| {
                     layer
                         .node_value_id_map()
                         .inner_to_outer(i + layer.node_dict_len() as u64)
@@ -1109,18 +1107,9 @@ mod tests {
         let values = vec!["chicken", "cow", "dog", "pig", "zebra"];
 
         let mut builder = BaseLayerFileBuilder::from_files(&files).await.unwrap();
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(3, 3, 3).await.unwrap();
         builder.finalize().await.unwrap();
diff --git a/src/layer/internal/object_iterator.rs b/src/layer/internal/object_iterator.rs
index 1d3b9753..85a1fd98 100644
--- a/src/layer/internal/object_iterator.rs
+++ b/src/layer/internal/object_iterator.rs
@@ -242,18 +242,9 @@ mod tests {
             .await
             .unwrap();
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
 
         builder.add_triple(1, 1, 2).await.unwrap();
diff --git a/src/layer/internal/subject_iterator.rs b/src/layer/internal/subject_iterator.rs
index a02be96d..db54274f 100644
--- a/src/layer/internal/subject_iterator.rs
+++ b/src/layer/internal/subject_iterator.rs
@@ -494,18 +494,9 @@ mod tests {
         let predicates = vec!["abcde", "fghij", "klmno", "lll"];
         let values = vec!["chicken", "cow", "dog", "pig", "zebra"];
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
@@ -536,18 +527,9 @@ mod tests {
         let predicates = vec!["abcde", "fghij", "klmno", "lll"];
         let values = vec!["chicken", "cow", "dog", "pig", "zebra"];
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
@@ -621,18 +603,9 @@ mod tests {
         let predicates = vec!["abcde", "fghij", "klmno", "lll"];
         let values = vec!["chicken", "cow", "dog", "pig", "zebra"];
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
         builder.add_triple(3, 3, 5).await.unwrap();
@@ -663,18 +636,9 @@ mod tests {
         let predicates = vec!["abcde", "fghij", "klmno", "lll", "xyz", "yyy"];
         let values = vec!["chicken", "cow", "dog", "pig", "zebra"];
 
-        builder
-            .add_nodes(nodes.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_predicates(predicates.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
-        builder
-            .add_values(values.into_iter().map(|s| s.to_string()))
-            .await
-            .unwrap();
+        builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
+        builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| s.to_string()));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 4).await.unwrap();
diff --git a/src/storage/memory.rs b/src/storage/memory.rs
index 4d4d2354..0928e6b9 100644
--- a/src/storage/memory.rs
+++ b/src/storage/memory.rs
@@ -326,7 +326,9 @@ pub fn base_layer_memory_files() -> BaseLayerFiles<MemoryBackedStore> {
             blocks_file: MemoryBackedStore::new(),
             offsets_file: MemoryBackedStore::new(),
         },
-        value_dictionary_files: DictionaryFiles {
+        value_dictionary_files: TypedDictionaryFiles {
+            types_present_file: MemoryBackedStore::new(),
+            type_offsets_file: MemoryBackedStore::new(),
             blocks_file: MemoryBackedStore::new(),
             offsets_file: MemoryBackedStore::new(),
         },
@@ -390,7 +392,9 @@ pub fn child_layer_memory_files() -> ChildLayerFiles<MemoryBackedStore> {
             blocks_file: MemoryBackedStore::new(),
             offsets_file: MemoryBackedStore::new(),
         },
-        value_dictionary_files: DictionaryFiles {
+        value_dictionary_files: TypedDictionaryFiles {
+            types_present_file: MemoryBackedStore::new(),
+            type_offsets_file: MemoryBackedStore::new(),
             blocks_file: MemoryBackedStore::new(),
             offsets_file: MemoryBackedStore::new(),
         },
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 9e2707ff..4ea1b6c9 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -167,7 +167,7 @@ impl SizedDict {
     pub fn block_num_elements(&self, block_index: usize) -> u8 {
         let offset = self.block_offset(block_index);
 
-        self.data[offset]
+        parse_block_control_records(self.data[offset])
     }
 
     pub fn num_blocks(&self) -> usize {
@@ -176,6 +176,8 @@ impl SizedDict {
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
         if index > self.num_entries() {
+            dbg!(index);
+            dbg!(self.num_entries());
             return None;
         }
         let block = self.block(((index - 1) / 8) as usize);
@@ -255,7 +257,7 @@ impl SizedDict {
         let num_blocks = self.num_blocks();
         let last_block_size = self.block_num_elements(num_blocks - 1);
 
-        (num_blocks-1) * BLOCK_SIZE + last_block_size as usize
+        (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
     }
 }
 
@@ -366,7 +368,7 @@ mod tests {
         assert_eq!(6, block1.num_entries());
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(s, &dict.entry((ix + 1) as u64).to_bytes()[..]);
+            assert_eq!(s, &dict.entry(ix + 1).unwrap().to_bytes()[..]);
         }
     }
 
@@ -415,7 +417,7 @@ mod tests {
         assert_eq!(6, block1.num_entries());
 
         for (ix, s) in strings.into_iter().enumerate() {
-            assert_eq!(s, &dict.entry((ix + 1) as u64).to_bytes()[..]);
+            assert_eq!(s, &dict.entry(ix + 1).unwrap().to_bytes()[..]);
         }
     }
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 1bdbdc58..850d3d6a 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -161,13 +161,16 @@ impl TypedDict {
 
     pub fn entry(&self, id: usize) -> Option<(Datatype, SizedDictEntry)> {
         if id > self.num_entries() {
+            dbg!(self.num_entries());
             return None;
         }
         let type_index = self.type_index_for_id(id as u64);
 
         let (dict, offset) = self.inner_type_segment(type_index);
+        dbg!(offset);
+        dbg!(type_index);
         let dt = self.type_for_type_index(type_index);
-        dict.entry(id - offset as usize).map(|e| (dt, e))
+        dbg!(dict.entry(id - offset as usize).map(|e| (dt, e)))
     }
 
     pub fn num_entries(&self) -> usize {
@@ -258,7 +261,7 @@ impl<T: TdbDataType> TypedDictSegment<T> {
 
     pub fn get(&self, index: usize) -> Option<T> {
         let entry = self.dict.entry(index);
-        entry.map(|e|T::from_lexical(e.into_buf()))
+        entry.map(|e| T::from_lexical(e.into_buf()))
     }
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
@@ -270,11 +273,11 @@ impl<T: TdbDataType> TypedDictSegment<T> {
         self.dict.num_entries()
     }
 
-    pub fn iter<'a>(&'a self) -> impl Iterator<Item=SizedDictEntry>+'a+Clone {
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = SizedDictEntry> + 'a + Clone {
         self.dict.iter()
     }
 
-    pub fn into_iter(self) -> impl Iterator<Item=SizedDictEntry>+Clone {
+    pub fn into_iter(self) -> impl Iterator<Item = SizedDictEntry> + Clone {
         self.dict.into_iter()
     }
 }
@@ -323,21 +326,25 @@ pub trait TdbDataType {
     fn from_lexical<B: Buf>(b: B) -> Self;
 
     fn to_lexical<T>(val: &T) -> Bytes
-    where T: ToLexical<Self> + ?Sized {
+    where
+        T: ToLexical<Self> + ?Sized,
+    {
         val.to_lexical()
     }
 
     fn make_entry<T>(val: &T) -> (Datatype, Bytes)
-    where T: ToLexical<Self> + ?Sized{
+    where
+        T: ToLexical<Self> + ?Sized,
+    {
         (Self::datatype(), val.to_lexical())
     }
 }
 
-pub trait ToLexical<T:?Sized> {
+pub trait ToLexical<T: ?Sized> {
     fn to_lexical(&self) -> Bytes;
 }
 
-impl<T:AsRef<str>> ToLexical<String> for T {
+impl<T: AsRef<str>> ToLexical<String> for T {
     fn to_lexical(&self) -> Bytes {
         Bytes::copy_from_slice(self.as_ref().as_bytes())
     }
@@ -605,12 +612,7 @@ pub struct TypedDictBufBuilder<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
 }
 
 impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2, B3, B4> {
-    pub fn new(
-        used_types: B1,
-        type_offsets: B2,
-        block_offsets: B3,
-        data_buf: B4,
-    ) -> Self {
+    pub fn new(used_types: B1, type_offsets: B2, block_offsets: B3, data_buf: B4) -> Self {
         let types_present_builder = LateLogArrayBufBuilder::new(used_types);
         let type_offsets_builder = LateLogArrayBufBuilder::new(type_offsets);
         let block_offset_builder = LateLogArrayBufBuilder::new(block_offsets);
@@ -680,7 +682,12 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
         let types_present_buf = self.types_present_builder.finalize();
         let type_offsets_buf = self.type_offsets_builder.finalize();
 
-        (types_present_buf, type_offsets_buf, block_offsets_buf, data_buf)
+        (
+            types_present_buf,
+            type_offsets_buf,
+            block_offsets_buf,
+            data_buf,
+        )
     }
 }
 
@@ -690,7 +697,13 @@ mod tests {
 
     use super::*;
 
-    fn build_segment_and_offsets<B1: BufMut, B2: BufMut, T: TdbDataType, Q: ToLexical<T>, I: Iterator<Item = Q>>(
+    fn build_segment_and_offsets<
+        B1: BufMut,
+        B2: BufMut,
+        T: TdbDataType,
+        Q: ToLexical<T>,
+        I: Iterator<Item = Q>,
+    >(
         dt: Datatype,
         array_buf: &mut B1,
         data_buf: &mut B2,
@@ -737,7 +750,7 @@ mod tests {
 
         for (ix, s) in strings.into_iter().enumerate() {
             assert_eq!(IdLookupResult::Found((ix + 1) as u64), segment.id(&s));
-            assert_eq!(s, segment.get((ix + 1) as u64));
+            assert_eq!(s, segment.get(ix + 1).unwrap());
         }
     }
 
@@ -761,7 +774,7 @@ mod tests {
 
         for (ix, s) in nums.into_iter().enumerate() {
             assert_eq!(IdLookupResult::Found((ix + 1) as u64), segment.id(&s));
-            assert_eq!(s, segment.get((ix + 1) as u64));
+            assert_eq!(s, segment.get(ix + 1).unwrap());
         }
     }
 
@@ -902,11 +915,14 @@ mod tests {
         assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
 
         for i in 1..vec.len() + 1 {
-            let (t, s) = dict.entry(i as u64);
+            let (t, s) = dict.entry(i).unwrap();
             assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
         }
 
-        assert_eq!(Decimal("-12342343.2348973".to_string()), dict.get(11));
+        assert_eq!(
+            Decimal("-12342343.2348973".to_string()),
+            dict.get(11).unwrap()
+        );
     }
 
     #[test]
@@ -967,14 +983,14 @@ mod tests {
         assert_eq!(31, dict.num_entries());
 
         for i in 1..vec.len() + 1 {
-            let (t, s) = dict.entry(i as u64);
+            let (t, s) = dict.entry(i).unwrap();
             assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
         }
 
-        assert_eq!("Batman".to_string(), dict.get::<String>(1));
-        assert_eq!("fdsa".to_string(), dict.get::<String>(7));
-        assert_eq!(26_u32, dict.get::<u32>(14));
-        assert_eq!(Decimal("234.8973".to_string()), dict.get(29));
+        assert_eq!("Batman".to_string(), dict.get::<String>(1).unwrap());
+        assert_eq!("fdsa".to_string(), dict.get::<String>(7).unwrap());
+        assert_eq!(26_u32, dict.get::<u32>(14).unwrap());
+        assert_eq!(Decimal("234.8973".to_string()), dict.get(29).unwrap());
 
         assert_eq!(IdLookupResult::NotFound, dict.id(&"AAAA".to_string()));
         assert_eq!(IdLookupResult::Closest(2), dict.id(&"Baz".to_string()));
@@ -1114,9 +1130,9 @@ mod tests {
         let data_buf = BytesMut::new();
 
         let mut typed_builder = TypedDictBufBuilder::new(
-            &mut used_types_buf,
-            &mut type_offsets_buf,
-            &mut block_offsets_buf,
+            used_types_buf,
+            type_offsets_buf,
+            block_offsets_buf,
             data_buf,
         );
 
@@ -1126,17 +1142,17 @@ mod tests {
             .map(|(dt, entry)| typed_builder.add(dt, entry))
             .collect();
 
-        let data_buf = typed_builder.finalize();
-
-        let used_types = used_types_buf.freeze();
-        let type_offsets = type_offsets_buf.freeze();
-        let block_offsets = block_offsets_buf.freeze();
-        let data = data_buf.freeze();
+        let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
 
-        let dict = TypedDict::from_parts(used_types, type_offsets, block_offsets, data);
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
 
         for i in 0..vec.len() {
-            assert_eq!(vec[i], convert_entry(dict.entry(i as u64 + 1)))
+            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
         }
     }
 }

From c802006c2de65dc7b202e6095e99c7bab523e0c4 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 30 Nov 2022 16:58:12 +0100
Subject: [PATCH 49/99] No warnings

---
 src/storage/file.rs        | 42 ++++++++++++++++++--------
 src/structure/tfc/file.rs  | 61 +++++++++++++++++---------------------
 src/structure/tfc/typed.rs |  8 ++---
 3 files changed, 61 insertions(+), 50 deletions(-)

diff --git a/src/storage/file.rs b/src/storage/file.rs
index 02ce1104..29e2c337 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -2,7 +2,7 @@
 
 use std::io;
 
-use bytes::{Bytes, Buf};
+use bytes::{Buf, Bytes};
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 
 use async_trait::async_trait;
@@ -291,22 +291,36 @@ impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
         })
     }
 
-    pub async fn write_all_from_bufs<B1: Buf, B2: Buf, B3:Buf, B4:Buf>(&self, types_present_buf: &mut B1, type_offsets_buf: &mut B2, blocks_buf: &mut B3, offsets_buf: &mut B4) -> io::Result<()> {
+    pub async fn write_all_from_bufs<B1: Buf, B2: Buf, B3: Buf, B4: Buf>(
+        &self,
+        types_present_buf: &mut B1,
+        type_offsets_buf: &mut B2,
+        blocks_buf: &mut B3,
+        offsets_buf: &mut B4,
+    ) -> io::Result<()> {
         let mut types_present_writer = self.types_present_file.open_write().await?;
         let mut type_offsets_writer = self.type_offsets_file.open_write().await?;
         let mut blocks_writer = self.blocks_file.open_write().await?;
         let mut offsets_writer = self.offsets_file.open_write().await?;
 
-        types_present_writer.write_all_buf(types_present_buf).await?;
+        types_present_writer
+            .write_all_buf(types_present_buf)
+            .await?;
         type_offsets_writer.write_all_buf(type_offsets_buf).await?;
         blocks_writer.write_all_buf(blocks_buf).await?;
         offsets_writer.write_all_buf(offsets_buf).await?;
 
-        blocks_writer.flush();
-        blocks_writer.sync_all();
+        types_present_writer.flush().await?;
+        types_present_writer.sync_all().await?;
 
-        offsets_writer.flush();
-        offsets_writer.sync_all();
+        type_offsets_writer.flush().await?;
+        type_offsets_writer.sync_all().await?;
+
+        blocks_writer.flush().await?;
+        blocks_writer.sync_all().await?;
+
+        offsets_writer.flush().await?;
+        offsets_writer.sync_all().await?;
 
         Ok(())
     }
@@ -336,18 +350,22 @@ impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
         })
     }
 
-    pub async fn write_all_from_bufs<B1: Buf, B2: Buf>(&self, blocks_buf: &mut B1, offsets_buf: &mut B2) -> io::Result<()> {
+    pub async fn write_all_from_bufs<B1: Buf, B2: Buf>(
+        &self,
+        blocks_buf: &mut B1,
+        offsets_buf: &mut B2,
+    ) -> io::Result<()> {
         let mut blocks_writer = self.blocks_file.open_write().await?;
         let mut offsets_writer = self.offsets_file.open_write().await?;
 
         blocks_writer.write_all_buf(blocks_buf).await?;
         offsets_writer.write_all_buf(offsets_buf).await?;
 
-        blocks_writer.flush();
-        blocks_writer.sync_all();
+        blocks_writer.flush().await?;
+        blocks_writer.sync_all().await?;
 
-        offsets_writer.flush();
-        offsets_writer.sync_all();
+        offsets_writer.flush().await?;
+        offsets_writer.sync_all().await?;
 
         Ok(())
     }
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index 93ea3185..9b2e5528 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -1,39 +1,23 @@
 use bytes::BytesMut;
-use tokio::io::AsyncWriteExt;
 use std::io;
+use tokio::io::AsyncWriteExt;
 
 use crate::{storage::*, structure::util::sorted_iterator};
 
-use super::{*, dict::{build_dict_unchecked, build_offset_logarray}};
-
-pub struct StringDictFileBuilder<W:SyncableFile> {
-    /// the file that this builder writes the pfc blocks to
-    blocks_file: W,
-    /// the file that this builder writes the block offsets to
-    block_offsets_file: W,
-
-    strings: Vec<SizedDictEntry>,
-}
-
-impl<W:SyncableFile> StringDictFileBuilder<W> {
-    pub fn new(blocks_file: W, block_offsets_file: W) -> Self {
-        Self {
-            blocks_file,
-            block_offsets_file,
-            strings: Vec::new()
-        }
-    }
-}
+use super::{
+    dict::{build_dict_unchecked, build_offset_logarray},
+    *,
+};
 
 pub async fn merge_string_dictionaries<
-        'a,
+    'a,
     F: 'static + FileLoad + FileStore,
-    I: Iterator<Item = &'a StringDict>+'a,
+    I: Iterator<Item = &'a StringDict> + 'a,
 >(
     dictionaries: I,
     dict_files: DictionaryFiles<F>,
 ) -> io::Result<()> {
-    let iterators: Vec<_> = dictionaries.map(|d|d.iter()).collect();
+    let iterators: Vec<_> = dictionaries.map(|d| d.iter()).collect();
 
     let pick_fn = |vals: &[Option<&SizedDictEntry>]| {
         vals.iter()
@@ -43,7 +27,7 @@ pub async fn merge_string_dictionaries<
             .map(|(ix, _)| ix)
     };
 
-    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|elt|elt.to_bytes());
+    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|elt| elt.to_bytes());
 
     let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
     let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
@@ -54,7 +38,6 @@ pub async fn merge_string_dictionaries<
     build_dict_unchecked(None, 0, &mut offsets, &mut data_buf, sorted_iterator);
     build_offset_logarray(&mut offsets_buf, offsets);
 
-
     blocks_file_writer.write_all(data_buf.as_ref()).await?;
     blocks_file_writer.flush().await?;
     blocks_file_writer.sync_all().await?;
@@ -66,14 +49,14 @@ pub async fn merge_string_dictionaries<
 }
 
 pub async fn merge_typed_dictionaries<
-        'a,
+    'a,
     F: 'static + FileLoad + FileStore,
-    I: Iterator<Item = &'a TypedDict>+'a,
+    I: Iterator<Item = &'a TypedDict> + 'a,
 >(
     dictionaries: I,
     dict_files: TypedDictionaryFiles<F>,
 ) -> io::Result<()> {
-    let iterators: Vec<_> = dictionaries.map(|d|d.iter()).collect();
+    let iterators: Vec<_> = dictionaries.map(|d| d.iter()).collect();
 
     let pick_fn = |vals: &[Option<&(Datatype, SizedDictEntry)>]| {
         vals.iter()
@@ -83,7 +66,7 @@ pub async fn merge_typed_dictionaries<
             .map(|(ix, _)| ix)
     };
 
-    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|(dt, elt)|(dt,elt.to_bytes()));
+    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|(dt, elt)| (dt, elt.to_bytes()));
 
     let mut types_present_file_writer = dict_files.types_present_file.open_write().await?;
     let mut type_offsets_file_writer = dict_files.type_offsets_file.open_write().await?;
@@ -94,13 +77,23 @@ pub async fn merge_typed_dictionaries<
     let mut type_offsets_buf = BytesMut::new();
     let mut offsets_buf = BytesMut::new();
     let mut data_buf = BytesMut::new();
-    build_multiple_segments(&mut types_present_buf, &mut type_offsets_buf, &mut offsets_buf, &mut data_buf, sorted_iterator);
-
-    types_present_file_writer.write_all(types_present_buf.as_ref()).await?;
+    build_multiple_segments(
+        &mut types_present_buf,
+        &mut type_offsets_buf,
+        &mut offsets_buf,
+        &mut data_buf,
+        sorted_iterator,
+    );
+
+    types_present_file_writer
+        .write_all(types_present_buf.as_ref())
+        .await?;
     types_present_file_writer.flush().await?;
     types_present_file_writer.sync_all().await?;
 
-    type_offsets_file_writer.write_all(type_offsets_buf.as_ref()).await?;
+    type_offsets_file_writer
+        .write_all(type_offsets_buf.as_ref())
+        .await?;
     type_offsets_file_writer.flush().await?;
     type_offsets_file_writer.sync_all().await?;
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 850d3d6a..5377e7d0 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1124,9 +1124,9 @@ mod tests {
         ];
         vec.sort();
 
-        let mut used_types_buf = BytesMut::new();
-        let mut type_offsets_buf = BytesMut::new();
-        let mut block_offsets_buf = BytesMut::new();
+        let used_types_buf = BytesMut::new();
+        let type_offsets_buf = BytesMut::new();
+        let block_offsets_buf = BytesMut::new();
         let data_buf = BytesMut::new();
 
         let mut typed_builder = TypedDictBufBuilder::new(
@@ -1136,7 +1136,7 @@ mod tests {
             data_buf,
         );
 
-        let results: Vec<u64> = vec
+        let _results: Vec<u64> = vec
             .clone()
             .into_iter()
             .map(|(dt, entry)| typed_builder.add(dt, entry))

From 4837d73186c5ab9a203ac1dbcd936ef1d75f9d19 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 1 Dec 2022 15:49:45 +0100
Subject: [PATCH 50/99] WIP: To avoid data loss, checking in this debugging
 code

---
 src/layer/simple_builder.rs |  7 ++---
 src/storage/layer.rs        | 28 ++++++++++++-----
 src/structure/logarray.rs   | 31 ++++++++++++++-----
 src/structure/tfc/typed.rs  | 60 +++++++++++++++++++++++++++++++------
 src/structure/util.rs       | 19 ++++++------
 5 files changed, 108 insertions(+), 37 deletions(-)

diff --git a/src/layer/simple_builder.rs b/src/layer/simple_builder.rs
index 8eaaf4fb..8ae41b05 100644
--- a/src/layer/simple_builder.rs
+++ b/src/layer/simple_builder.rs
@@ -62,6 +62,7 @@ pub struct SimpleLayerBuilder<F: 'static + FileLoad + FileStore + Clone> {
 impl<F: 'static + FileLoad + FileStore + Clone> SimpleLayerBuilder<F> {
     /// Construct a layer builder for a base layer
     pub fn new(name: [u32; 5], files: BaseLayerFiles<F>) -> Self {
+        eprintln!("Trying to make a new layer file");
         Self {
             name,
             parent: None,
@@ -194,8 +195,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                         ChildLayerFileBuilder::from_files(parent.clone(), &files).await?;
 
                     let node_ids = builder.add_nodes(unresolved_nodes.clone());
-                    let predicate_ids = builder
-                        .add_predicates(unresolved_predicates.clone());
+                    let predicate_ids = builder.add_predicates(unresolved_predicates.clone());
                     let value_ids = builder.add_values(unresolved_values.clone());
 
                     let mut builder = builder.into_phase2().await?;
@@ -240,8 +240,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                     let mut builder = BaseLayerFileBuilder::from_files(&files).await?;
 
                     let node_ids = builder.add_nodes(unresolved_nodes.clone());
-                    let predicate_ids = builder
-                        .add_predicates(unresolved_predicates.clone());
+                    let predicate_ids = builder.add_predicates(unresolved_predicates.clone());
                     let value_ids = builder.add_values(unresolved_values.clone());
 
                     let mut builder = builder.into_phase2().await?;
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index 29af5973..bac60e72 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -10,13 +10,12 @@ use crate::layer::{
     OptInternalLayerTriplePredicateIterator, OptInternalLayerTripleSubjectIterator, RollupLayer,
     SimpleLayerBuilder,
 };
-use crate::structure::StringDict;
-use crate::structure::TypedDict;
 use crate::structure::bitarray::bitarray_len_from_file;
 use crate::structure::logarray::logarray_file_get_length_and_width;
+use crate::structure::StringDict;
+use crate::structure::TypedDict;
 use crate::structure::{
-    dict_file_get_count, util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray,
-    WaveletTree,
+    dict_file_get_count, util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, WaveletTree,
 };
 
 use std::convert::TryInto;
@@ -1569,7 +1568,11 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.node_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map, 0)))
+            Ok(Some(StringDict::parse(
+                maps.blocks_map,
+                maps.offsets_map,
+                0,
+            )))
         } else {
             Ok(None)
         }
@@ -1580,7 +1583,11 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.predicate_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(maps.blocks_map, maps.offsets_map, 0)))
+            Ok(Some(StringDict::parse(
+                maps.blocks_map,
+                maps.offsets_map,
+                0,
+            )))
         } else {
             Ok(None)
         }
@@ -1591,7 +1598,12 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.value_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(TypedDict::from_parts(maps.types_present_map, maps.type_offsets_map, maps.blocks_map, maps.offsets_map)))
+            Ok(Some(TypedDict::from_parts(
+                maps.types_present_map,
+                maps.type_offsets_map,
+                maps.blocks_map,
+                maps.offsets_map,
+            )))
         } else {
             Ok(None)
         }
@@ -2274,7 +2286,7 @@ mod tests {
         HashMap<StringTriple, IdTriple>,
     )> {
         let mut builder = store.create_base_layer().await?;
-        let name = builder.name();
+        let name = dbg!(builder.name());
         for t in BASE_TRIPLES.iter() {
             builder.add_string_triple(t.clone());
         }
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index d9ba9ca9..d71cde75 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -296,6 +296,8 @@ impl LogArray {
     ///
     /// Panics if `index` + `length` is >= the length of the log array.
     pub fn slice(&self, offset: usize, len: usize) -> LogArray {
+        dbg!(len);
+        dbg!(offset);
         let offset = u32::try_from(offset)
             .unwrap_or_else(|_| panic!("expected 32-bit slice offset ({})", offset));
         let len =
@@ -352,8 +354,10 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
     }
 
     pub fn push(&mut self, val: u64) {
+        eprintln!("push");
+        dbg!(val);
         // This is the minimum number of leading zeros that a decoded value should have.
-        let leading_zeros = 64 - self.width;
+        let leading_zeros = u64::BITS - self.width as u32;
 
         // If `val` does not fit in the `width`, return an error.
         if val.leading_zeros() < u32::from(leading_zeros) {
@@ -404,7 +408,7 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
 
     pub fn finalize(mut self) {
         let len = self.count;
-        let width = self.width;
+        let width = dbg!(self.width);
 
         // Write the final data word.
         self.finalize_data();
@@ -422,7 +426,7 @@ pub struct LateLogArrayBufBuilder<B: BufMut> {
     buf: B,
     /// NOTE: remove pub
     pub vals: Vec<u64>,
-    width: u8
+    width: u8,
 }
 
 impl<B: BufMut> LateLogArrayBufBuilder<B> {
@@ -430,7 +434,7 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
         Self {
             buf,
             vals: Vec::new(),
-            width: 0
+            width: 0,
         }
     }
 
@@ -440,7 +444,7 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
 
     pub fn push(&mut self, val: u64) {
         self.vals.push(val);
-        let width = calculate_width(val);
+        let width = dbg!(calculate_width(val));
         if self.width < width {
             self.width = width;
         }
@@ -461,10 +465,13 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
     }
 
     pub fn finalize(mut self) -> B {
+        /*if self.width == 0 {
+            self.width = 1
+        }*/
         let mut builder = LogArrayBufBuilder::new(&mut self.buf, self.width);
-        builder.push_vec(self.vals);
+        builder.push_vec(dbg!(self.vals));
         builder.finalize();
-
+        eprintln!("Finalized logarray");
         self.buf
     }
 }
@@ -930,6 +937,16 @@ mod tests {
         assert!(MonotonicLogArray::from_logarray(logarray).is_empty());
     }
 
+    #[test]
+    pub fn late_logarray_just_zero() {
+        let buf = BytesMut::new();
+        let mut builder = LateLogArrayBufBuilder::new(buf);
+        builder.push(0);
+        let logarray_buf = builder.finalize().freeze();
+        let logarray = LogArray::parse(logarray_buf).unwrap();
+        assert_eq!(logarray.entry(0_usize), 0_u64);
+    }
+
     #[tokio::test]
     #[should_panic(expected = "expected value (8) to fit in 3 bits")]
     async fn log_array_file_builder_panic() {
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 5377e7d0..52893a3b 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -56,11 +56,15 @@ impl TypedDict {
             tally += gap as u64;
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
-
-        let last_gap = BLOCK_SIZE
-            - parse_block_control_records(
-                data[block_offsets.entry(block_offsets.len() - 1) as usize],
-            ) as usize;
+        dbg!(block_offsets.len());
+        let last_gap = if block_offsets.len() == 0 {
+            1
+        } else {
+            BLOCK_SIZE
+                - parse_block_control_records(
+                    data[block_offsets.entry(block_offsets.len() - 1) as usize],
+                ) as usize
+        };
         let num_entries = (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap;
 
         Self {
@@ -88,6 +92,7 @@ impl TypedDict {
         let type_offset;
         let block_offset;
         let id_offset;
+        dbg!(i);
         if i == 0 {
             type_offset = 0;
             block_offset = 0;
@@ -113,7 +118,7 @@ impl TypedDict {
                 len = next_offset - type_offset - 1;
             }
         }
-
+        dbg!(type_offset + 1);
         let logarray_slice = self.block_offsets.slice(type_offset + 1, len);
         let data_slice = self.data.slice(block_offset..);
 
@@ -286,7 +291,7 @@ pub type StringDict = TypedDictSegment<String>;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
 pub enum Datatype {
-    String = 0,
+    String = 1,
     UInt32,
     Int32,
     UInt64,
@@ -663,6 +668,7 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
     }
 
     pub fn add_entry(&mut self, dt: Datatype, e: &SizedDictEntry) -> u64 {
+        eprintln!("Adding entry: {dt:?},{e:?}");
         self.add(dt, e.to_bytes())
     }
 
@@ -671,17 +677,21 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
     }
 
     pub fn finalize(self) -> (B1, B2, B3, B4) {
+        eprintln!("Finalizing now");
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
         }
         let (mut block_offset_builder, data_buf, _, _) =
             self.sized_dict_buf_builder.unwrap().finalize();
+        eprintln!("a");
         block_offset_builder.pop();
         let block_offsets_buf = block_offset_builder.finalize();
-
+        eprintln!("b");
+        dbg!(&self.types_present_builder.vals);
         let types_present_buf = self.types_present_builder.finalize();
+        eprintln!("c");
         let type_offsets_buf = self.type_offsets_builder.finalize();
-
+        eprintln!("Finalized...");
         (
             types_present_buf,
             type_offsets_buf,
@@ -1087,6 +1097,38 @@ mod tests {
         (e.0, e.1.to_bytes())
     }
 
+    #[test]
+    fn test_one_string() {
+        let vec: Vec<(Datatype, Bytes)> = vec![String::make_entry(&"fdsa")];
+        let used_types_buf = BytesMut::new();
+        let type_offsets_buf = BytesMut::new();
+        let block_offsets_buf = BytesMut::new();
+        let data_buf = BytesMut::new();
+
+        let mut typed_builder = TypedDictBufBuilder::new(
+            used_types_buf,
+            type_offsets_buf,
+            block_offsets_buf,
+            data_buf,
+        );
+
+        let _results: Vec<u64> = vec
+            .clone()
+            .into_iter()
+            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .collect();
+
+        let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+        assert_eq!(vec[0], convert_entry(dict.entry(1).unwrap()))
+    }
+
     #[test]
     fn test_incremental_builder() {
         let mut vec: Vec<(Datatype, Bytes)> = vec![
diff --git a/src/structure/util.rs b/src/structure/util.rs
index d47f86d9..f349bb91 100644
--- a/src/structure/util.rs
+++ b/src/structure/util.rs
@@ -130,12 +130,8 @@ struct SortedIterator<
     pick_fn: F,
 }
 
-impl<
-        'a,
-        T,
-        I: 'a + Iterator<Item = T> + Send,
-        F: 'static + Fn(&[Option<&T>]) -> Option<usize>,
-    > Iterator for SortedIterator<T, I, F>
+impl<'a, T, I: 'a + Iterator<Item = T> + Send, F: 'static + Fn(&[Option<&T>]) -> Option<usize>>
+    Iterator for SortedIterator<T, I, F>
 {
     type Item = T;
 
@@ -155,14 +151,14 @@ impl<
 }
 
 pub fn sorted_iterator<
-        'a,
+    'a,
     T: 'a,
     I: 'a + Iterator<Item = T> + Send,
     F: 'static + Fn(&[Option<&T>]) -> Option<usize>,
 >(
     iters: Vec<I>,
     pick_fn: F,
-) -> impl Iterator<Item = T>+'a {
+) -> impl Iterator<Item = T> + 'a {
     let peekable_iters = iters
         .into_iter()
         .map(std::iter::Iterator::peekable)
@@ -187,7 +183,12 @@ pub fn assert_poll_next<T, S: Stream<Item = T>>(stream: Pin<&mut S>, cx: &mut Co
 }
 
 pub fn calculate_width(size: u64) -> u8 {
-    ((size + 1) as f32).log2().ceil() as u8
+    let mut msb = u64::BITS - size.leading_zeros();
+    // zero is a degenerate case, but needs to be represented with one bit.
+    if msb == 0 {
+        msb = 1
+    };
+    msb as u8
 }
 
 #[cfg(test)]

From 2fbd8602894477433aabb365d279f925355f1f49 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 1 Dec 2022 17:27:20 +0100
Subject: [PATCH 51/99] Some debugging code.

---
 src/structure/tfc/dict.rs  |  2 +-
 src/structure/tfc/typed.rs | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 4ea1b6c9..1ac85dff 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -140,7 +140,7 @@ impl SizedDict {
         if block_index == 0 {
             offset = 0;
         } else {
-            offset = (self.offsets.entry(block_index - 1) - self.dict_offset) as usize;
+            offset = (dbg!(self.offsets.entry(block_index - 1)) - dbg!(self.dict_offset)) as usize;
         }
 
         offset
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 52893a3b..eff19d1c 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -98,27 +98,30 @@ impl TypedDict {
             block_offset = 0;
             id_offset = 0;
         } else {
-            type_offset = self.type_offsets.entry(i - 1) as usize;
-            id_offset = self.type_id_offsets[i - 1];
-            block_offset = self.block_offsets.entry(type_offset as usize) as usize;
+            type_offset = dbg!(self.type_offsets.entry(i - 1) as usize);
+            id_offset = dbg!(self.type_id_offsets[i - 1]);
+            block_offset = dbg!(self.block_offsets.entry(type_offset as usize) as usize);
         }
 
         let len;
-        if i == self.types_present.len() - 1 {
+        dbg!(&self.types_present);
+        dbg!(&self.type_id_offsets);
+        dbg!(&self.block_offsets);
+        if dbg!(i == dbg!(self.types_present.len()) - 1) {
             if i == 0 {
                 len = self.block_offsets.len() - type_offset;
             } else {
                 len = self.block_offsets.len() - type_offset - 1;
             }
         } else {
-            let next_offset = self.type_offsets.entry(i) as usize;
+            let next_offset = dbg!(self.type_offsets.entry(i) as usize);
             if i == 0 {
                 len = next_offset - type_offset;
             } else {
-                len = next_offset - type_offset - 1;
+                len = dbg!(next_offset - type_offset - 1);
             }
         }
-        dbg!(type_offset + 1);
+        dbg!(type_offset);
         let logarray_slice = self.block_offsets.slice(type_offset + 1, len);
         let data_slice = self.data.slice(block_offset..);
 
@@ -240,7 +243,6 @@ impl<'a> Iterator for DictSegmentIterator<'a> {
         if self.type_index >= self.dict.types_present.len() {
             return None;
         }
-
         let (segment, _) = self.dict.inner_type_segment(self.type_index);
         let datatype = self.dict.type_for_type_index(self.type_index);
         self.type_index += 1;
@@ -291,12 +293,12 @@ pub type StringDict = TypedDictSegment<String>;
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
 pub enum Datatype {
-    String = 1,
+    String = 0,
     UInt32,
     Int32,
+    Float32,
     UInt64,
     Int64,
-    Float32,
     Float64,
     Decimal,
     BigInt,

From 2c113eb2005858aebd7cbf014b3e478d0c7157a5 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 1 Dec 2022 20:22:57 +0100
Subject: [PATCH 52/99] Add condition for empty slice

---
 src/structure/tfc/typed.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index eff19d1c..86c6ee6c 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -122,7 +122,13 @@ impl TypedDict {
             }
         }
         dbg!(type_offset);
-        let logarray_slice = self.block_offsets.slice(type_offset + 1, len);
+        let logarray_slice;
+        if len == 0 {
+            // any slice will do
+            logarray_slice = self.block_offsets.slice(0, 0);
+        } else {
+            logarray_slice = self.block_offsets.slice(type_offset + 1, len);
+        }
         let data_slice = self.data.slice(block_offset..);
 
         (

From 2b07fa6594696404a1d2f76da603b84cd90df499 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 2 Dec 2022 17:53:46 +0100
Subject: [PATCH 53/99] Adding fixes for phase2

---
 src/layer/internal/base.rs | 26 ++++++++---
 src/structure/logarray.rs  |  9 ++--
 src/structure/tfc/block.rs | 10 ++---
 src/structure/tfc/dict.rs  |  4 +-
 src/structure/tfc/typed.rs | 89 +++++++++++++++++++++++++++-----------
 5 files changed, 92 insertions(+), 46 deletions(-)

diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 14bea2cc..6d8e223a 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -256,17 +256,29 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
         let predicate_dict_blocks_map = files.predicate_dictionary_files.blocks_file.map().await?;
         let predicate_dict_offsets_map =
             files.predicate_dictionary_files.offsets_file.map().await?;
+        let value_dict_types_present_map = files
+            .value_dictionary_files
+            .types_present_file
+            .map()
+            .await?;
+        let value_dict_type_offsets_map =
+            files.value_dictionary_files.type_offsets_file.map().await?;
         let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
         let value_dict_offsets_map = files.value_dictionary_files.offsets_file.map().await?;
 
-        let node_dict = PfcDict::parse(node_dict_blocks_map, node_dict_offsets_map)?;
-        let pred_dict = PfcDict::parse(predicate_dict_blocks_map, predicate_dict_offsets_map)?;
-        let val_dict = PfcDict::parse(value_dict_blocks_map, value_dict_offsets_map)?;
+        let node_dict = StringDict::parse(node_dict_blocks_map, node_dict_offsets_map, 0);
+        let pred_dict = StringDict::parse(predicate_dict_blocks_map, predicate_dict_offsets_map, 0);
+        let val_dict = TypedDict::from_parts(
+            value_dict_types_present_map,
+            value_dict_type_offsets_map,
+            value_dict_blocks_map,
+            value_dict_offsets_map,
+        );
 
         // TODO: it is a bit silly to parse the dictionaries just for this. surely we can get the counts in an easier way?
-        let num_nodes = node_dict.len();
-        let num_predicates = pred_dict.len();
-        let num_values = val_dict.len();
+        let num_nodes = node_dict.num_entries();
+        let num_predicates = pred_dict.num_entries();
+        let num_values = val_dict.num_entries();
 
         BaseLayerFileBuilderPhase2::new(files, num_nodes, num_predicates, num_values).await
     }
@@ -605,7 +617,7 @@ pub mod tests {
 
         let builder = builder.into_phase2().await.unwrap();
         builder.finalize().await.unwrap();
-
+        eprintln!("Here");
         let layer = BaseLayer::load_from_files([1, 2, 3, 4, 5], &base_layer_files)
             .await
             .unwrap();
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index d71cde75..6429e459 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -354,8 +354,6 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
     }
 
     pub fn push(&mut self, val: u64) {
-        eprintln!("push");
-        dbg!(val);
         // This is the minimum number of leading zeros that a decoded value should have.
         let leading_zeros = u64::BITS - self.width as u32;
 
@@ -408,7 +406,7 @@ impl<'a, B: BufMut> LogArrayBufBuilder<'a, B> {
 
     pub fn finalize(mut self) {
         let len = self.count;
-        let width = dbg!(self.width);
+        let width = self.width;
 
         // Write the final data word.
         self.finalize_data();
@@ -444,7 +442,7 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
 
     pub fn push(&mut self, val: u64) {
         self.vals.push(val);
-        let width = dbg!(calculate_width(val));
+        let width = calculate_width(val);
         if self.width < width {
             self.width = width;
         }
@@ -469,9 +467,8 @@ impl<B: BufMut> LateLogArrayBufBuilder<B> {
             self.width = 1
         }*/
         let mut builder = LogArrayBufBuilder::new(&mut self.buf, self.width);
-        builder.push_vec(dbg!(self.vals));
+        builder.push_vec(self.vals);
         builder.finalize();
-        eprintln!("Finalized logarray");
         self.buf
     }
 }
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 07077ac4..7672a985 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -537,7 +537,7 @@ impl<'a> Iterator for SizedBlockIterator<'a> {
             if self.ix >= self.header.num_entries as usize - 1 {
                 return None;
             }
-            let size = dbg!(self.header.sizes[self.ix]);
+            let size = self.header.sizes[self.ix];
             let mut shared = self.header.shareds[self.ix];
             for rope_index in 0..last.len() {
                 let x = &mut last[rope_index];
@@ -588,18 +588,18 @@ impl IdLookupResult {
         }
     }
 
-    pub fn map<F: Fn(u64)->u64>(self, f: F) -> Self {
+    pub fn map<F: Fn(u64) -> u64>(self, f: F) -> Self {
         match self {
             Self::Found(i) => Self::Found(f(i)),
             Self::Closest(i) => Self::Closest(f(i)),
-            Self::NotFound => Self::NotFound
+            Self::NotFound => Self::NotFound,
         }
     }
 
     pub fn into_option(self) -> Option<u64> {
         match self {
             Self::Found(i) => Some(i),
-            _ => None
+            _ => None,
         }
     }
 }
@@ -647,7 +647,7 @@ pub(crate) fn build_block_unchecked<B: BufMut>(
     let mut size = 0;
     let slices_len = slices.len();
     debug_assert!(slices_len <= BLOCK_SIZE && slices_len != 0);
-    let cw = dbg!(create_block_control_word(record_size, slices_len as u8));
+    let cw = create_block_control_word(record_size, slices_len as u8);
     buf.put_u8(cw as u8);
     size += 1;
 
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 1ac85dff..542722e2 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -140,7 +140,7 @@ impl SizedDict {
         if block_index == 0 {
             offset = 0;
         } else {
-            offset = (dbg!(self.offsets.entry(block_index - 1)) - dbg!(self.dict_offset)) as usize;
+            offset = (self.offsets.entry(block_index - 1) - self.dict_offset) as usize;
         }
 
         offset
@@ -176,8 +176,6 @@ impl SizedDict {
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
         if index > self.num_entries() {
-            dbg!(index);
-            dbg!(self.num_entries());
             return None;
         }
         let block = self.block(((index - 1) / 8) as usize);
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 86c6ee6c..40c3bfae 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -51,12 +51,12 @@ impl TypedDict {
                 last_block_len =
                     parse_block_control_records(data[last_block_offset_of_previous_type as usize]);
             }
-            eprintln!("last_block_len: {last_block_len}");
+
             let gap = BLOCK_SIZE as u8 - last_block_len;
             tally += gap as u64;
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
-        dbg!(block_offsets.len());
+
         let last_gap = if block_offsets.len() == 0 {
             1
         } else {
@@ -92,36 +92,33 @@ impl TypedDict {
         let type_offset;
         let block_offset;
         let id_offset;
-        dbg!(i);
+
         if i == 0 {
             type_offset = 0;
             block_offset = 0;
             id_offset = 0;
         } else {
-            type_offset = dbg!(self.type_offsets.entry(i - 1) as usize);
-            id_offset = dbg!(self.type_id_offsets[i - 1]);
-            block_offset = dbg!(self.block_offsets.entry(type_offset as usize) as usize);
+            type_offset = self.type_offsets.entry(i - 1) as usize;
+            id_offset = self.type_id_offsets[i - 1];
+            block_offset = self.block_offsets.entry(type_offset as usize) as usize;
         }
 
         let len;
-        dbg!(&self.types_present);
-        dbg!(&self.type_id_offsets);
-        dbg!(&self.block_offsets);
-        if dbg!(i == dbg!(self.types_present.len()) - 1) {
+        if i == self.types_present.len() - 1 {
             if i == 0 {
                 len = self.block_offsets.len() - type_offset;
             } else {
                 len = self.block_offsets.len() - type_offset - 1;
             }
         } else {
-            let next_offset = dbg!(self.type_offsets.entry(i) as usize);
+            let next_offset = self.type_offsets.entry(i) as usize;
             if i == 0 {
                 len = next_offset - type_offset;
             } else {
-                len = dbg!(next_offset - type_offset - 1);
+                len = next_offset - type_offset - 1;
             }
         }
-        dbg!(type_offset);
+
         let logarray_slice;
         if len == 0 {
             // any slice will do
@@ -175,16 +172,13 @@ impl TypedDict {
 
     pub fn entry(&self, id: usize) -> Option<(Datatype, SizedDictEntry)> {
         if id > self.num_entries() {
-            dbg!(self.num_entries());
             return None;
         }
         let type_index = self.type_index_for_id(id as u64);
 
         let (dict, offset) = self.inner_type_segment(type_index);
-        dbg!(offset);
-        dbg!(type_index);
         let dt = self.type_for_type_index(type_index);
-        dbg!(dict.entry(id - offset as usize).map(|e| (dt, e)))
+        dict.entry(id - offset as usize).map(|e| (dt, e))
     }
 
     pub fn num_entries(&self) -> usize {
@@ -265,6 +259,10 @@ pub struct TypedDictSegment<T: TdbDataType> {
 
 impl<T: TdbDataType> TypedDictSegment<T> {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
+        let offsets2 = offsets.clone();
+        let data2 = data.clone();
+        dbg!(offsets2);
+        dbg!(data2);
         let dict = SizedDict::parse(offsets, data, dict_offset);
         Self {
             dict,
@@ -676,7 +674,6 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
     }
 
     pub fn add_entry(&mut self, dt: Datatype, e: &SizedDictEntry) -> u64 {
-        eprintln!("Adding entry: {dt:?},{e:?}");
         self.add(dt, e.to_bytes())
     }
 
@@ -685,21 +682,18 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
     }
 
     pub fn finalize(self) -> (B1, B2, B3, B4) {
-        eprintln!("Finalizing now");
+        /*
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
-        }
+        }*/
         let (mut block_offset_builder, data_buf, _, _) =
             self.sized_dict_buf_builder.unwrap().finalize();
-        eprintln!("a");
+
         block_offset_builder.pop();
         let block_offsets_buf = block_offset_builder.finalize();
-        eprintln!("b");
-        dbg!(&self.types_present_builder.vals);
         let types_present_buf = self.types_present_builder.finalize();
-        eprintln!("c");
         let type_offsets_buf = self.type_offsets_builder.finalize();
-        eprintln!("Finalized...");
+
         (
             types_present_buf,
             type_offsets_buf,
@@ -1205,4 +1199,49 @@ mod tests {
             assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
         }
     }
+
+    #[test]
+    fn test_incremental_builder_small_dicts() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            String::make_entry(&"fdsa"),
+            i32::make_entry(&-500_i32),
+            u32::make_entry(&20_u32),
+            i64::make_entry(&-3_i64),
+            Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
+            f32::make_entry(&23434.389832_f32),
+            Integer::make_entry(&int("239487329872343987")),
+        ];
+        vec.sort();
+
+        let used_types_buf = BytesMut::new();
+        let type_offsets_buf = BytesMut::new();
+        let block_offsets_buf = BytesMut::new();
+        let data_buf = BytesMut::new();
+
+        let mut typed_builder = TypedDictBufBuilder::new(
+            used_types_buf,
+            type_offsets_buf,
+            block_offsets_buf,
+            data_buf,
+        );
+
+        let _results: Vec<u64> = vec
+            .clone()
+            .into_iter()
+            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .collect();
+
+        let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+
+        for i in 0..vec.len() {
+            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+        }
+    }
 }

From 55f20d5e98c3595630a9b7015b1e31a516db7016 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 3 Dec 2022 00:45:43 +0100
Subject: [PATCH 54/99] 190 passing

---
 src/layer/builder.rs        | 77 ++++++++++++++++++++++++++++---------
 src/layer/internal/base.rs  | 16 ++++----
 src/layer/internal/child.rs | 42 ++++++++++++--------
 src/storage/consts.rs       |  4 +-
 src/storage/file.rs         | 28 +++++++-------
 src/storage/layer.rs        | 36 ++++++++---------
 src/storage/memory.rs       | 10 ++---
 src/structure/tfc/block.rs  |  2 +-
 src/structure/tfc/dict.rs   | 24 +++++++++---
 src/structure/tfc/file.rs   | 15 ++++----
 src/structure/tfc/typed.rs  | 41 ++++++++++++++------
 11 files changed, 191 insertions(+), 104 deletions(-)

diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index 822a3bc4..55df9d00 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -1,6 +1,6 @@
 use std::io;
 
-use bytes::{BytesMut, Bytes};
+use bytes::{Bytes, BytesMut};
 use futures::stream::TryStreamExt;
 use rayon::prelude::*;
 use tfc::dict::SizedDictBufBuilder;
@@ -25,12 +25,26 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
         predicate_files: DictionaryFiles<F>,
         value_files: TypedDictionaryFiles<F>,
     ) -> io::Result<Self> {
-        let node_dictionary_builder = SizedDictBufBuilder::new(None, 0, 0, LateLogArrayBufBuilder::new(BytesMut::new()), BytesMut::new());
-        let predicate_dictionary_builder = SizedDictBufBuilder::new(None, 0, 0, LateLogArrayBufBuilder::new(BytesMut::new()), BytesMut::new());
-        let value_dictionary_builder = TypedDictBufBuilder::new(BytesMut::new(),
-                                                                 BytesMut::new(),
-                                                                 BytesMut::new(),
-                                                                 BytesMut::new());
+        let node_dictionary_builder = SizedDictBufBuilder::new(
+            None,
+            0,
+            0,
+            LateLogArrayBufBuilder::new(BytesMut::new()),
+            BytesMut::new(),
+        );
+        let predicate_dictionary_builder = SizedDictBufBuilder::new(
+            None,
+            0,
+            0,
+            LateLogArrayBufBuilder::new(BytesMut::new()),
+            BytesMut::new(),
+        );
+        let value_dictionary_builder = TypedDictBufBuilder::new(
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+        );
 
         Ok(Self {
             node_files,
@@ -46,7 +60,9 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     ///
     /// Panics if the given node string is not a lexical successor of the previous node string.
     pub fn add_node(&mut self, node: &str) -> u64 {
-        let id = self.node_dictionary_builder.add(Bytes::copy_from_slice(node.as_bytes()));
+        let id = self
+            .node_dictionary_builder
+            .add(Bytes::copy_from_slice(node.as_bytes()));
 
         id
     }
@@ -55,7 +71,9 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     ///
     /// Panics if the given predicate string is not a lexical successor of the previous node string.
     pub fn add_predicate(&mut self, predicate: &str) -> u64 {
-        let id = self.predicate_dictionary_builder.add(Bytes::copy_from_slice(predicate.as_bytes()));
+        let id = self
+            .predicate_dictionary_builder
+            .add(Bytes::copy_from_slice(predicate.as_bytes()));
 
         id
     }
@@ -64,8 +82,9 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
     pub fn add_value(&mut self, value: &str) -> u64 {
-        let id = self.value_dictionary_builder.add(Datatype::String,
-                                                   Bytes::copy_from_slice(value.as_bytes()));
+        let id = self
+            .value_dictionary_builder
+            .add(Datatype::String, Bytes::copy_from_slice(value.as_bytes()));
 
         id
     }
@@ -128,16 +147,38 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     }
 
     pub async fn finalize(self) -> io::Result<()> {
-        let (node_offsets_builder, mut node_data_buf, _, _) = self.node_dictionary_builder.finalize();
+        let (mut node_offsets_builder, mut node_data_buf, _, _) =
+            self.node_dictionary_builder.finalize();
+        // last offset is useless
+        node_offsets_builder.pop();
         let mut node_offsets_buf = node_offsets_builder.finalize();
-        let (predicate_offsets_builder, mut predicate_data_buf, _, _) = self.predicate_dictionary_builder.finalize();
+        let (mut predicate_offsets_builder, mut predicate_data_buf, _, _) =
+            self.predicate_dictionary_builder.finalize();
+        // last offset is useless
+        predicate_offsets_builder.pop();
         let mut predicate_offsets_buf = predicate_offsets_builder.finalize();
-        let (mut value_types_present_buf, mut value_type_offsets_buf, mut value_offsets_buf, mut value_data_buf) = self.value_dictionary_builder.finalize();
-
-        self.node_files.write_all_from_bufs(&mut node_data_buf, &mut node_offsets_buf).await?;
-        self.predicate_files.write_all_from_bufs(&mut predicate_data_buf, &mut predicate_offsets_buf).await?;
+        let (
+            mut value_types_present_buf,
+            mut value_type_offsets_buf,
+            mut value_offsets_buf,
+            mut value_data_buf,
+        ) = self.value_dictionary_builder.finalize();
+
+        self.node_files
+            .write_all_from_bufs(&mut node_data_buf, &mut node_offsets_buf)
+            .await?;
+        self.predicate_files
+            .write_all_from_bufs(&mut predicate_data_buf, &mut predicate_offsets_buf)
+            .await?;
 
-        self.value_files.write_all_from_bufs(&mut value_types_present_buf, &mut value_type_offsets_buf, &mut value_offsets_buf, &mut value_data_buf).await?;
+        self.value_files
+            .write_all_from_bufs(
+                &mut value_types_present_buf,
+                &mut value_type_offsets_buf,
+                &mut value_offsets_buf,
+                &mut value_data_buf,
+            )
+            .await?;
 
         Ok(())
     }
diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 6d8e223a..4d6cbe69 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -49,21 +49,21 @@ impl BaseLayer {
     }
 
     pub fn load(name: [u32; 5], maps: BaseLayerMaps) -> InternalLayer {
-        let node_dictionary = TypedDictSegment::parse(
-            maps.node_dictionary_maps.blocks_map,
+        let node_dictionary = StringDict::parse(
             maps.node_dictionary_maps.offsets_map,
+            maps.node_dictionary_maps.blocks_map,
             0,
         );
-        let predicate_dictionary = TypedDictSegment::parse(
-            maps.predicate_dictionary_maps.blocks_map,
+        let predicate_dictionary = StringDict::parse(
             maps.predicate_dictionary_maps.offsets_map,
+            maps.predicate_dictionary_maps.blocks_map,
             0,
         );
         let value_dictionary = TypedDict::from_parts(
             maps.value_dictionary_maps.types_present_map,
             maps.value_dictionary_maps.type_offsets_map,
-            maps.value_dictionary_maps.blocks_map,
             maps.value_dictionary_maps.offsets_map,
+            maps.value_dictionary_maps.blocks_map,
         );
 
         let node_value_idmap = match maps.id_map_maps.node_value_idmap_maps {
@@ -266,13 +266,13 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
         let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
         let value_dict_offsets_map = files.value_dictionary_files.offsets_file.map().await?;
 
-        let node_dict = StringDict::parse(node_dict_blocks_map, node_dict_offsets_map, 0);
-        let pred_dict = StringDict::parse(predicate_dict_blocks_map, predicate_dict_offsets_map, 0);
+        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map, 0);
+        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map, 0);
         let val_dict = TypedDict::from_parts(
             value_dict_types_present_map,
             value_dict_type_offsets_map,
-            value_dict_blocks_map,
             value_dict_offsets_map,
+            value_dict_blocks_map,
         );
 
         // TODO: it is a bit silly to parse the dictionaries just for this. surely we can get the counts in an easier way?
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index 978fe965..09744582 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -62,21 +62,21 @@ impl ChildLayer {
     }
 
     pub fn load(name: [u32; 5], parent: Arc<InternalLayer>, maps: ChildLayerMaps) -> InternalLayer {
-        let node_dictionary = TypedDictSegment::parse(
-            maps.node_dictionary_maps.blocks_map,
+        let node_dictionary = StringDict::parse(
             maps.node_dictionary_maps.offsets_map,
+            maps.node_dictionary_maps.blocks_map,
             0,
         );
-        let predicate_dictionary = TypedDictSegment::parse(
-            maps.predicate_dictionary_maps.blocks_map,
+        let predicate_dictionary = StringDict::parse(
             maps.predicate_dictionary_maps.offsets_map,
+            maps.predicate_dictionary_maps.blocks_map,
             0,
         );
         let value_dictionary = TypedDict::from_parts(
             maps.value_dictionary_maps.types_present_map,
             maps.value_dictionary_maps.type_offsets_map,
-            maps.value_dictionary_maps.blocks_map,
             maps.value_dictionary_maps.offsets_map,
+            maps.value_dictionary_maps.blocks_map,
         );
 
         let parent_node_value_count = parent.node_and_value_count();
@@ -345,23 +345,35 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
         } = self;
 
         builder.finalize().await?;
-
-        let node_dict_blocks_map = files.node_dictionary_files.blocks_file.map().await?;
+        eprintln!("Into phase2");
         let node_dict_offsets_map = files.node_dictionary_files.offsets_file.map().await?;
-        let predicate_dict_blocks_map = files.predicate_dictionary_files.blocks_file.map().await?;
+        let node_dict_blocks_map = files.node_dictionary_files.blocks_file.map().await?;
         let predicate_dict_offsets_map =
             files.predicate_dictionary_files.offsets_file.map().await?;
-        let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
+        let predicate_dict_blocks_map = files.predicate_dictionary_files.blocks_file.map().await?;
+        let value_dict_types_present_map = files
+            .value_dictionary_files
+            .types_present_file
+            .map()
+            .await?;
+        let value_dict_type_offsets_map =
+            files.value_dictionary_files.type_offsets_file.map().await?;
         let value_dict_offsets_map = files.value_dictionary_files.offsets_file.map().await?;
+        let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
 
-        let node_dict = PfcDict::parse(node_dict_blocks_map, node_dict_offsets_map)?;
-        let pred_dict = PfcDict::parse(predicate_dict_blocks_map, predicate_dict_offsets_map)?;
-        let val_dict = PfcDict::parse(value_dict_blocks_map, value_dict_offsets_map)?;
+        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map, 0);
+        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map, 0);
+        let val_dict = TypedDict::from_parts(
+            value_dict_types_present_map,
+            value_dict_type_offsets_map,
+            value_dict_offsets_map,
+            value_dict_blocks_map,
+        );
 
         // TODO: it is a bit silly to parse the dictionaries just for this. surely we can get the counts in an easier way?
-        let num_nodes = node_dict.len();
-        let num_predicates = pred_dict.len();
-        let num_values = val_dict.len();
+        let num_nodes = node_dict.num_entries();
+        let num_predicates = pred_dict.num_entries();
+        let num_values = val_dict.num_entries();
 
         ChildLayerFileBuilderPhase2::new(parent, files, num_nodes, num_predicates, num_values).await
     }
diff --git a/src/storage/consts.rs b/src/storage/consts.rs
index de296ebb..6731d9f8 100644
--- a/src/storage/consts.rs
+++ b/src/storage/consts.rs
@@ -186,11 +186,13 @@ pub const FILENAMES: Filenames = Filenames {
     rollup: "rollup.hex",
 };
 
-pub const SHARED_REQUIRED_FILES: [&'static str; 6] = [
+pub const SHARED_REQUIRED_FILES: [&'static str; 8] = [
     FILENAMES.node_dictionary_blocks,
     FILENAMES.node_dictionary_offsets,
     FILENAMES.predicate_dictionary_blocks,
     FILENAMES.predicate_dictionary_offsets,
+    FILENAMES.value_dictionary_types_present,
+    FILENAMES.value_dictionary_type_offsets,
     FILENAMES.value_dictionary_blocks,
     FILENAMES.value_dictionary_offsets,
 ];
diff --git a/src/storage/file.rs b/src/storage/file.rs
index 29e2c337..89556ee0 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -280,14 +280,14 @@ impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
     pub async fn map_all(&self) -> io::Result<TypedDictionaryMaps> {
         let types_present_map = self.types_present_file.map().await?;
         let type_offsets_map = self.type_offsets_file.map().await?;
-        let blocks_map = self.blocks_file.map().await?;
         let offsets_map = self.offsets_file.map().await?;
+        let blocks_map = self.blocks_file.map().await?;
 
         Ok(TypedDictionaryMaps {
             types_present_map,
             type_offsets_map,
-            blocks_map,
             offsets_map,
+            blocks_map,
         })
     }
 
@@ -295,20 +295,20 @@ impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
         &self,
         types_present_buf: &mut B1,
         type_offsets_buf: &mut B2,
-        blocks_buf: &mut B3,
-        offsets_buf: &mut B4,
+        offsets_buf: &mut B3,
+        blocks_buf: &mut B4,
     ) -> io::Result<()> {
         let mut types_present_writer = self.types_present_file.open_write().await?;
         let mut type_offsets_writer = self.type_offsets_file.open_write().await?;
-        let mut blocks_writer = self.blocks_file.open_write().await?;
         let mut offsets_writer = self.offsets_file.open_write().await?;
+        let mut blocks_writer = self.blocks_file.open_write().await?;
 
         types_present_writer
             .write_all_buf(types_present_buf)
             .await?;
         type_offsets_writer.write_all_buf(type_offsets_buf).await?;
-        blocks_writer.write_all_buf(blocks_buf).await?;
         offsets_writer.write_all_buf(offsets_buf).await?;
+        blocks_writer.write_all_buf(blocks_buf).await?;
 
         types_present_writer.flush().await?;
         types_present_writer.sync_all().await?;
@@ -316,12 +316,12 @@ impl<F: 'static + FileLoad + FileStore> TypedDictionaryFiles<F> {
         type_offsets_writer.flush().await?;
         type_offsets_writer.sync_all().await?;
 
-        blocks_writer.flush().await?;
-        blocks_writer.sync_all().await?;
-
         offsets_writer.flush().await?;
         offsets_writer.sync_all().await?;
 
+        blocks_writer.flush().await?;
+        blocks_writer.sync_all().await?;
+
         Ok(())
     }
 }
@@ -345,8 +345,8 @@ impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
         let offsets_map = self.offsets_file.map().await?;
 
         Ok(DictionaryMaps {
-            blocks_map,
             offsets_map,
+            blocks_map,
         })
     }
 
@@ -355,18 +355,18 @@ impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
         blocks_buf: &mut B1,
         offsets_buf: &mut B2,
     ) -> io::Result<()> {
-        let mut blocks_writer = self.blocks_file.open_write().await?;
         let mut offsets_writer = self.offsets_file.open_write().await?;
+        let mut blocks_writer = self.blocks_file.open_write().await?;
 
         blocks_writer.write_all_buf(blocks_buf).await?;
         offsets_writer.write_all_buf(offsets_buf).await?;
 
-        blocks_writer.flush().await?;
-        blocks_writer.sync_all().await?;
-
         offsets_writer.flush().await?;
         offsets_writer.sync_all().await?;
 
+        blocks_writer.flush().await?;
+        blocks_writer.sync_all().await?;
+
         Ok(())
     }
 }
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index bac60e72..44b290e7 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -366,8 +366,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             FILENAMES.predicate_dictionary_offsets,
             FILENAMES.value_dictionary_types_present,
             FILENAMES.value_dictionary_type_offsets,
-            FILENAMES.value_dictionary_blocks,
             FILENAMES.value_dictionary_offsets,
+            FILENAMES.value_dictionary_blocks,
             FILENAMES.node_value_idmap_bits,
             FILENAMES.node_value_idmap_bit_index_blocks,
             FILENAMES.node_value_idmap_bit_index_sblocks,
@@ -411,8 +411,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             value_dictionary_files: TypedDictionaryFiles {
                 types_present_file: files[4].clone(),
                 type_offsets_file: files[5].clone(),
-                blocks_file: files[6].clone(),
-                offsets_file: files[7].clone(),
+                offsets_file: files[6].clone(),
+                blocks_file: files[7].clone(),
             },
 
             id_map_files: IdMapFiles {
@@ -471,8 +471,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             FILENAMES.predicate_dictionary_offsets,
             FILENAMES.value_dictionary_types_present,
             FILENAMES.value_dictionary_type_offsets,
-            FILENAMES.value_dictionary_blocks,
             FILENAMES.value_dictionary_offsets,
+            FILENAMES.value_dictionary_blocks,
             FILENAMES.node_value_idmap_bits,
             FILENAMES.node_value_idmap_bit_index_blocks,
             FILENAMES.node_value_idmap_bit_index_sblocks,
@@ -532,8 +532,8 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             value_dictionary_files: TypedDictionaryFiles {
                 types_present_file: files[4].clone(),
                 type_offsets_file: files[5].clone(),
-                blocks_file: files[6].clone(),
-                offsets_file: files[7].clone(),
+                offsets_file: files[6].clone(),
+                blocks_file: files[7].clone(),
             },
 
             id_map_files: IdMapFiles {
@@ -706,12 +706,12 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
     ) -> io::Result<DictionaryFiles<Self::File>> {
         // does layer exist?
         if self.directory_exists(layer).await? {
-            let blocks_file = self
-                .get_file(layer, FILENAMES.node_dictionary_blocks)
-                .await?;
             let offsets_file = self
                 .get_file(layer, FILENAMES.node_dictionary_offsets)
                 .await?;
+            let blocks_file = self
+                .get_file(layer, FILENAMES.node_dictionary_blocks)
+                .await?;
 
             Ok(DictionaryFiles {
                 blocks_file,
@@ -728,12 +728,12 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
     ) -> io::Result<DictionaryFiles<Self::File>> {
         // does layer exist?
         if self.directory_exists(layer).await? {
-            let blocks_file = self
-                .get_file(layer, FILENAMES.predicate_dictionary_blocks)
-                .await?;
             let offsets_file = self
                 .get_file(layer, FILENAMES.predicate_dictionary_offsets)
                 .await?;
+            let blocks_file = self
+                .get_file(layer, FILENAMES.predicate_dictionary_blocks)
+                .await?;
 
             Ok(DictionaryFiles {
                 blocks_file,
@@ -756,12 +756,12 @@ pub trait PersistentLayerStore: 'static + Send + Sync + Clone {
             let type_offsets_file = self
                 .get_file(layer, FILENAMES.value_dictionary_type_offsets)
                 .await?;
-            let blocks_file = self
-                .get_file(layer, FILENAMES.value_dictionary_blocks)
-                .await?;
             let offsets_file = self
                 .get_file(layer, FILENAMES.value_dictionary_offsets)
                 .await?;
+            let blocks_file = self
+                .get_file(layer, FILENAMES.value_dictionary_blocks)
+                .await?;
 
             Ok(TypedDictionaryFiles {
                 types_present_file,
@@ -1569,8 +1569,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let maps = files.map_all().await?;
 
             Ok(Some(StringDict::parse(
-                maps.blocks_map,
                 maps.offsets_map,
+                maps.blocks_map,
                 0,
             )))
         } else {
@@ -1584,8 +1584,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let maps = files.map_all().await?;
 
             Ok(Some(StringDict::parse(
-                maps.blocks_map,
                 maps.offsets_map,
+                maps.blocks_map,
                 0,
             )))
         } else {
@@ -1601,8 +1601,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             Ok(Some(TypedDict::from_parts(
                 maps.types_present_map,
                 maps.type_offsets_map,
-                maps.blocks_map,
                 maps.offsets_map,
+                maps.blocks_map,
             )))
         } else {
             Ok(None)
diff --git a/src/storage/memory.rs b/src/storage/memory.rs
index 0928e6b9..4febd9ec 100644
--- a/src/storage/memory.rs
+++ b/src/storage/memory.rs
@@ -525,17 +525,17 @@ mod tests {
         builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
-
+        eprintln!("Here1");
         builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
-
+        eprintln!("Here2");
         builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
         builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
-
+        eprintln!("Here3");
         builder.commit_boxed().await.unwrap();
-
+        eprintln!("Here4");
         let layer = store.get_layer(child_name).await.unwrap().unwrap();
-
+        eprintln!("Here5");
         assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
         assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
         assert!(layer.string_triple_exists(&StringTriple::new_node("cow", "likes", "pig")));
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 7672a985..02bee893 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -605,7 +605,7 @@ impl IdLookupResult {
 }
 
 pub fn parse_block_control_records(cw: u8) -> u8 {
-    parse_block_control_word(cw).1
+    dbg!(parse_block_control_word(cw).1)
 }
 
 pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 542722e2..c5ab4e29 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -44,6 +44,7 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
         offsets: LateLogArrayBufBuilder<B1>,
         data_buf: B2,
     ) -> Self {
+        dbg!(block_offset);
         Self {
             record_size,
             block_offset,
@@ -86,9 +87,13 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
     }
 
     pub fn finalize(mut self) -> (LateLogArrayBufBuilder<B1>, B2, u64, u64) {
-        if self.current_block.len() > 0 {
+        if dbg!(self.current_block.len()) > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
-            let size = build_block_unchecked(self.record_size, &mut self.data_buf, &current_block);
+            let size = dbg!(build_block_unchecked(
+                self.record_size,
+                &mut self.data_buf,
+                &current_block
+            ));
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
         }
@@ -103,13 +108,14 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
 }
 
 pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
+    dbg!(&offsets);
     // the last offset doesn't matter as it's implied by the total size
     offsets.pop();
 
     let largest_element = offsets.last().cloned().unwrap_or(0);
     let width = calculate_width(largest_element);
     let mut array_builder = LogArrayBufBuilder::new(buf, width);
-
+    dbg!(&offsets);
     array_builder.push_vec(offsets);
     array_builder.finalize();
 }
@@ -123,7 +129,11 @@ pub struct SizedDict {
 
 impl SizedDict {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
+        dbg!(&offsets);
+        dbg!(&data);
+        dbg!(dict_offset);
         let offsets = MonotonicLogArray::parse(offsets).unwrap();
+        dbg!(&offsets);
         Self::from_parts(offsets, data, dict_offset)
     }
 
@@ -136,6 +146,7 @@ impl SizedDict {
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
+        dbg!(block_index);
         let offset: usize;
         if block_index == 0 {
             offset = 0;
@@ -165,12 +176,15 @@ impl SizedDict {
     }
 
     pub fn block_num_elements(&self, block_index: usize) -> u8 {
+        eprintln!("offset: {block_index}");
         let offset = self.block_offset(block_index);
-
+        eprintln!("offset: {offset}");
         parse_block_control_records(self.data[offset])
     }
 
     pub fn num_blocks(&self) -> usize {
+        dbg!(&self.offsets);
+        dbg!(&self.data);
         self.offsets.len() + 1
     }
 
@@ -252,7 +266,7 @@ impl SizedDict {
     }
 
     pub fn num_entries(&self) -> usize {
-        let num_blocks = self.num_blocks();
+        let num_blocks = dbg!(self.num_blocks());
         let last_block_size = self.block_num_elements(num_blocks - 1);
 
         (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index 9b2e5528..ed28fce4 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -38,13 +38,14 @@ pub async fn merge_string_dictionaries<
     build_dict_unchecked(None, 0, &mut offsets, &mut data_buf, sorted_iterator);
     build_offset_logarray(&mut offsets_buf, offsets);
 
-    blocks_file_writer.write_all(data_buf.as_ref()).await?;
-    blocks_file_writer.flush().await?;
-    blocks_file_writer.sync_all().await?;
     offsets_file_writer.write_all(offsets_buf.as_ref()).await?;
     offsets_file_writer.flush().await?;
     offsets_file_writer.sync_all().await?;
 
+    blocks_file_writer.write_all(data_buf.as_ref()).await?;
+    blocks_file_writer.flush().await?;
+    blocks_file_writer.sync_all().await?;
+
     Ok(())
 }
 
@@ -97,13 +98,13 @@ pub async fn merge_typed_dictionaries<
     type_offsets_file_writer.flush().await?;
     type_offsets_file_writer.sync_all().await?;
 
-    blocks_file_writer.write_all(data_buf.as_ref()).await?;
-    blocks_file_writer.flush().await?;
-    blocks_file_writer.sync_all().await?;
-
     offsets_file_writer.write_all(offsets_buf.as_ref()).await?;
     offsets_file_writer.flush().await?;
     offsets_file_writer.sync_all().await?;
 
+    blocks_file_writer.write_all(data_buf.as_ref()).await?;
+    blocks_file_writer.flush().await?;
+    blocks_file_writer.sync_all().await?;
+
     Ok(())
 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 40c3bfae..758d32aa 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -35,10 +35,20 @@ impl TypedDict {
         block_offsets: Bytes,
         data: Bytes,
     ) -> Self {
+        let types_present2 = types_present.clone();
+        dbg!(types_present2);
+        let type_offsets2 = type_offsets.clone();
+        dbg!(type_offsets2);
+        let block_offsets2 = block_offsets.clone();
+        dbg!(block_offsets2);
+        let data2 = data.clone();
+        dbg!(data2);
         let types_present = MonotonicLogArray::parse(types_present).unwrap();
         let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
         let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
-
+        dbg!(&types_present);
+        dbg!(&type_offsets);
+        dbg!(&block_offsets);
         let mut tally: u64 = 0;
         let mut type_id_offsets = Vec::with_capacity(types_present.len() - 1);
         for type_offset in type_offsets.iter() {
@@ -61,20 +71,26 @@ impl TypedDict {
             1
         } else {
             BLOCK_SIZE
-                - parse_block_control_records(
+                - dbg!(parse_block_control_records(
                     data[block_offsets.entry(block_offsets.len() - 1) as usize],
-                ) as usize
+                ) as usize)
         };
-        let num_entries = (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap;
-
-        Self {
+        dbg!(last_gap);
+        dbg!((block_offsets.len() + 1) * BLOCK_SIZE - tally as usize);
+        let num_entries = if block_offsets.len() == 0 {
+            parse_block_control_records(data[0]) as usize
+        } else {
+            (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap
+        };
+        dbg!(num_entries);
+        dbg!(Self {
             types_present,
             type_offsets,
             block_offsets,
             type_id_offsets,
             num_entries,
             data,
-        }
+        })
     }
 
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
@@ -92,7 +108,7 @@ impl TypedDict {
         let type_offset;
         let block_offset;
         let id_offset;
-
+        dbg!(i);
         if i == 0 {
             type_offset = 0;
             block_offset = 0;
@@ -102,7 +118,7 @@ impl TypedDict {
             id_offset = self.type_id_offsets[i - 1];
             block_offset = self.block_offsets.entry(type_offset as usize) as usize;
         }
-
+        dbg!(block_offset);
         let len;
         if i == self.types_present.len() - 1 {
             if i == 0 {
@@ -263,7 +279,9 @@ impl<T: TdbDataType> TypedDictSegment<T> {
         let data2 = data.clone();
         dbg!(offsets2);
         dbg!(data2);
+        dbg!(dict_offset);
         let dict = SizedDict::parse(offsets, data, dict_offset);
+        dbg!(&dict);
         Self {
             dict,
             _x: Default::default(),
@@ -689,11 +707,10 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
         let (mut block_offset_builder, data_buf, _, _) =
             self.sized_dict_buf_builder.unwrap().finalize();
 
-        block_offset_builder.pop();
-        let block_offsets_buf = block_offset_builder.finalize();
         let types_present_buf = self.types_present_builder.finalize();
         let type_offsets_buf = self.type_offsets_builder.finalize();
-
+        block_offset_builder.pop();
+        let block_offsets_buf = block_offset_builder.finalize();
         (
             types_present_buf,
             type_offsets_buf,

From 7750194b1341d5266cbc47f9b506de8d5aa4f72d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sun, 4 Dec 2022 19:05:16 +0100
Subject: [PATCH 55/99] only 90 failing

---
 src/layer/internal/mod.rs  | 19 +++++++++++--------
 src/layer/layer.rs         |  1 +
 src/structure/tfc/block.rs | 12 +++++++-----
 src/structure/tfc/dict.rs  | 17 +++++++++++++----
 src/structure/tfc/typed.rs | 18 +++++++++++++++---
 5 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 650de6a2..510f4f03 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -551,6 +551,7 @@ impl Layer for InternalLayer {
     }
 
     fn subject_id<'a>(&'a self, subject: &str) -> Option<u64> {
+        eprintln!("In subject_id");
         let to_result = |layer: &'a InternalLayer| {
             (
                 layer
@@ -565,10 +566,12 @@ impl Layer for InternalLayer {
             result = to_result(layer);
         }
         let (id_option, parent_option) = result;
-        id_option.map(|id| 1 + id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
+        eprintln!("id_option: {id_option:?}");
+        id_option.map(|id| id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
     }
 
     fn predicate_id<'a>(&'a self, predicate: &str) -> Option<u64> {
+        eprintln!("In predicate id");
         let to_result = |layer: &'a InternalLayer| {
             (
                 layer
@@ -583,7 +586,7 @@ impl Layer for InternalLayer {
             result = to_result(layer);
         }
         let (id_option, parent_option) = result;
-        id_option.map(|id| 1 + id + parent_option.map_or(0, |p| p.predicate_count() as u64))
+        id_option.map(|id| id + parent_option.map_or(0, |p| p.predicate_count() as u64))
     }
 
     fn object_node_id<'a>(&'a self, object: &str) -> Option<u64> {
@@ -601,7 +604,7 @@ impl Layer for InternalLayer {
             result = to_result(layer);
         }
         let (id_option, parent_option) = result;
-        id_option.map(|id| 1 + id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
+        id_option.map(|id| id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
     }
 
     fn object_value_id<'a>(&'a self, object: &str) -> Option<u64> {
@@ -620,14 +623,14 @@ impl Layer for InternalLayer {
             result = to_result(layer);
         }
         let (id_option, parent_option) = result;
-        id_option.map(|id| 1 + id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
+        id_option.map(|id| id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
     }
 
     fn id_subject(&self, id: u64) -> Option<String> {
         if id == 0 {
             return None;
         }
-        let mut corrected_id = id - 1;
+        let mut corrected_id = id;
         let mut current_option: Option<&InternalLayer> = Some(self);
         let mut parent_count = self.node_and_value_count() as u64;
         while let Some(current_layer) = current_option {
@@ -663,7 +666,7 @@ impl Layer for InternalLayer {
         let mut current_option: Option<&InternalLayer> = Some(self);
         let mut parent_count = self.predicate_count() as u64;
         while let Some(current_layer) = current_option {
-            let mut corrected_id = id - 1;
+            let mut corrected_id = id;
             if let Some(parent) = current_layer.immediate_parent() {
                 parent_count -= current_layer.predicate_dict_len() as u64;
                 if corrected_id >= parent_count as u64 {
@@ -691,7 +694,7 @@ impl Layer for InternalLayer {
         if id == 0 {
             return None;
         }
-        let mut corrected_id = id - 1;
+        let mut corrected_id = id;
         let mut current_option: Option<&InternalLayer> = Some(self);
         let mut parent_count = self.node_and_value_count() as u64;
         while let Some(current_layer) = current_option {
@@ -734,7 +737,7 @@ impl Layer for InternalLayer {
             return None;
         }
 
-        let mut corrected_id = id - 1;
+        let mut corrected_id = id;
         let mut current_option: Option<&InternalLayer> = Some(self);
         let mut parent_count = self.node_and_value_count() as u64;
         while let Some(current_layer) = current_option {
diff --git a/src/layer/layer.rs b/src/layer/layer.rs
index 27680d31..ca88a338 100644
--- a/src/layer/layer.rs
+++ b/src/layer/layer.rs
@@ -79,6 +79,7 @@ pub trait Layer: Send + Sync {
 
     /// Returns true if the given triple exists, and false otherwise.
     fn string_triple_exists(&self, triple: &StringTriple) -> bool {
+        eprintln!("I am here");
         self.string_triple_to_id(triple)
             .map(|t| self.id_triple_exists(t))
             .unwrap_or(false)
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 02bee893..2569ae51 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -38,6 +38,7 @@ impl From<vbyte::DecodeError> for SizedDictError {
 impl SizedBlockHeader {
     fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
         let cw = buf.get_u8();
+        dbg!(&buf);
         let (record_size, num_entries) = parse_block_control_word(cw);
         let mut sizes = [0_usize; BLOCK_SIZE - 1];
         let mut shareds = [0_usize; BLOCK_SIZE - 1];
@@ -59,13 +60,13 @@ impl SizedBlockHeader {
 
         let buffer_length = sizes.iter().sum();
 
-        Ok(Self {
+        Ok(dbg!(Self {
             head,
             num_entries,
             buffer_length,
             sizes,
             shareds,
-        })
+        }))
     }
 }
 
@@ -373,7 +374,7 @@ impl SizedDictBlock {
 
         let data = bytes.split_to(header.buffer_length);
 
-        Ok(Self { header, data })
+        Ok(dbg!(Self { header, data }))
     }
 
     pub fn num_entries(&self) -> u8 {
@@ -385,6 +386,7 @@ impl SizedDictBlock {
     }
 
     pub fn entry(&self, index: usize) -> SizedDictEntry {
+        dbg!(index);
         if index == 0 {
             return SizedDictEntry::new(vec![self.header.head.clone()]);
         }
@@ -448,7 +450,7 @@ impl SizedDictBlock {
         let suffix_size = self.header.sizes[index - 1];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
-        SizedDictEntry::new_optimized(slices)
+        dbg!(SizedDictEntry::new_optimized(slices))
     }
 
     fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
@@ -489,7 +491,7 @@ impl SizedDictBlock {
             let (new_common_prefix, ordering) =
                 find_common_prefix_ord(&slice[common_prefix..], &suffix[..]);
             match ordering {
-                Ordering::Equal => return IdLookupResult::Found(ix as u64 + 1),
+                Ordering::Equal => return dbg!(IdLookupResult::Found(ix as u64 + 1)),
                 Ordering::Less => return IdLookupResult::Closest(ix as u64),
                 Ordering::Greater => {
                     common_prefix += new_common_prefix;
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index c5ab4e29..6c682358 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -160,7 +160,7 @@ impl SizedDict {
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
         let offset = self.block_offset(block_index);
         let block_bytes;
-        block_bytes = self.data.slice(offset..);
+        block_bytes = dbg!(self.data.slice(offset..));
 
         block_bytes
     }
@@ -176,10 +176,17 @@ impl SizedDict {
     }
 
     pub fn block_num_elements(&self, block_index: usize) -> u8 {
-        eprintln!("offset: {block_index}");
-        let offset = self.block_offset(block_index);
+        eprintln!("block_index: {block_index}");
+        let offset = dbg!(self.block_offset(block_index));
         eprintln!("offset: {offset}");
-        parse_block_control_records(self.data[offset])
+
+        dbg!(&self.data);
+        if dbg!(self.data.len()) == 0 {
+            eprintln!("size is zero");
+            0
+        } else {
+            dbg!(parse_block_control_records(dbg!(self.data[offset])))
+        }
     }
 
     pub fn num_blocks(&self) -> usize {
@@ -189,6 +196,7 @@ impl SizedDict {
     }
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
+        dbg!(index);
         if index > self.num_entries() {
             return None;
         }
@@ -197,6 +205,7 @@ impl SizedDict {
     }
 
     pub fn id(&self, slice: &[u8]) -> IdLookupResult {
+        dbg!(slice);
         // let's binary search
         let mut min = 0;
         let mut max = self.offsets.len();
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 758d32aa..50fa3600 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -49,17 +49,28 @@ impl TypedDict {
         dbg!(&types_present);
         dbg!(&type_offsets);
         dbg!(&block_offsets);
+        if types_present.len() == 0 {
+            return Self {
+                types_present,
+                type_offsets,
+                block_offsets,
+                type_id_offsets: Vec::new(),
+                num_entries: 0,
+                data,
+            };
+        }
         let mut tally: u64 = 0;
         let mut type_id_offsets = Vec::with_capacity(types_present.len() - 1);
         for type_offset in type_offsets.iter() {
             let last_block_len;
             if type_offset == 0 {
-                last_block_len = parse_block_control_records(data[0]);
+                last_block_len = dbg!(parse_block_control_records(data[0]));
             } else {
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
-                last_block_len =
-                    parse_block_control_records(data[last_block_offset_of_previous_type as usize]);
+                last_block_len = dbg!(parse_block_control_records(
+                    data[last_block_offset_of_previous_type as usize]
+                ));
             }
 
             let gap = BLOCK_SIZE as u8 - last_block_len;
@@ -295,6 +306,7 @@ impl<T: TdbDataType> TypedDictSegment<T> {
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
         let slice = T::to_lexical(val);
+        dbg!(&slice);
         self.dict.id(&slice[..])
     }
 

From a51feede40b5518b3e1549fcd63acdf9bd17b23d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Mon, 5 Dec 2022 00:00:14 +0100
Subject: [PATCH 56/99] Fewer debug prints

---
 src/storage/layer.rs       |  2 +-
 src/structure/logarray.rs  |  2 --
 src/structure/tfc/block.rs | 18 +++++++-----------
 src/structure/tfc/dict.rs  | 36 ++++++++----------------------------
 src/structure/tfc/typed.rs | 37 ++++++++++++-------------------------
 5 files changed, 28 insertions(+), 67 deletions(-)

diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index 44b290e7..d628d39e 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -2286,7 +2286,7 @@ mod tests {
         HashMap<StringTriple, IdTriple>,
     )> {
         let mut builder = store.create_base_layer().await?;
-        let name = dbg!(builder.name());
+        let name = builder.name();
         for t in BASE_TRIPLES.iter() {
             builder.add_string_triple(t.clone());
         }
diff --git a/src/structure/logarray.rs b/src/structure/logarray.rs
index 6429e459..2f980868 100644
--- a/src/structure/logarray.rs
+++ b/src/structure/logarray.rs
@@ -296,8 +296,6 @@ impl LogArray {
     ///
     /// Panics if `index` + `length` is >= the length of the log array.
     pub fn slice(&self, offset: usize, len: usize) -> LogArray {
-        dbg!(len);
-        dbg!(offset);
         let offset = u32::try_from(offset)
             .unwrap_or_else(|_| panic!("expected 32-bit slice offset ({})", offset));
         let len =
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 2569ae51..f4332f2d 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -38,7 +38,7 @@ impl From<vbyte::DecodeError> for SizedDictError {
 impl SizedBlockHeader {
     fn parse(buf: &mut Bytes) -> Result<Self, SizedDictError> {
         let cw = buf.get_u8();
-        dbg!(&buf);
+
         let (record_size, num_entries) = parse_block_control_word(cw);
         let mut sizes = [0_usize; BLOCK_SIZE - 1];
         let mut shareds = [0_usize; BLOCK_SIZE - 1];
@@ -60,13 +60,13 @@ impl SizedBlockHeader {
 
         let buffer_length = sizes.iter().sum();
 
-        Ok(dbg!(Self {
+        Ok(Self {
             head,
             num_entries,
             buffer_length,
             sizes,
             shareds,
-        }))
+        })
     }
 }
 
@@ -374,7 +374,7 @@ impl SizedDictBlock {
 
         let data = bytes.split_to(header.buffer_length);
 
-        Ok(dbg!(Self { header, data }))
+        Ok(Self { header, data })
     }
 
     pub fn num_entries(&self) -> u8 {
@@ -386,7 +386,6 @@ impl SizedDictBlock {
     }
 
     pub fn entry(&self, index: usize) -> SizedDictEntry {
-        dbg!(index);
         if index == 0 {
             return SizedDictEntry::new(vec![self.header.head.clone()]);
         }
@@ -450,7 +449,7 @@ impl SizedDictBlock {
         let suffix_size = self.header.sizes[index - 1];
         slices.push(self.data.slice(offset..offset + suffix_size));
 
-        dbg!(SizedDictEntry::new_optimized(slices))
+        SizedDictEntry::new_optimized(slices)
     }
 
     fn suffixes<'a>(&'a self) -> impl Iterator<Item = Bytes> + 'a {
@@ -491,7 +490,7 @@ impl SizedDictBlock {
             let (new_common_prefix, ordering) =
                 find_common_prefix_ord(&slice[common_prefix..], &suffix[..]);
             match ordering {
-                Ordering::Equal => return dbg!(IdLookupResult::Found(ix as u64 + 1)),
+                Ordering::Equal => return IdLookupResult::Found(ix as u64 + 1),
                 Ordering::Less => return IdLookupResult::Closest(ix as u64),
                 Ordering::Greater => {
                     common_prefix += new_common_prefix;
@@ -607,7 +606,7 @@ impl IdLookupResult {
 }
 
 pub fn parse_block_control_records(cw: u8) -> u8 {
-    dbg!(parse_block_control_word(cw).1)
+    parse_block_control_word(cw).1
 }
 
 pub fn parse_block_control_word(cw: u8) -> (Option<u8>, u8) {
@@ -631,7 +630,6 @@ fn record_size_encoding(record_size: Option<u8>) -> u8 {
         Some(4) => 3 << 3,
         Some(8) => 4 << 3,
         _ => {
-            dbg!(record_size);
             panic!("This is really bad!")
         }
     }
@@ -676,8 +674,6 @@ pub(crate) fn build_block_unchecked<B: BufMut>(
             let (vbyte, vbyte_len) = encode_array(suffix_len as u64);
             buf.put_slice(&vbyte[..vbyte_len]);
             size += vbyte_len;
-        } else {
-            eprintln!("Fixed width: {record_size:?}");
         }
         suffixes.push(&cur[common_prefix..]);
         last = cur;
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 6c682358..e49af940 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -44,7 +44,6 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
         offsets: LateLogArrayBufBuilder<B1>,
         data_buf: B2,
     ) -> Self {
-        dbg!(block_offset);
         Self {
             record_size,
             block_offset,
@@ -87,13 +86,9 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
     }
 
     pub fn finalize(mut self) -> (LateLogArrayBufBuilder<B1>, B2, u64, u64) {
-        if dbg!(self.current_block.len()) > 0 {
+        if self.current_block.len() > 0 {
             let current_block: Vec<&[u8]> = self.current_block.iter().map(|e| e.as_ref()).collect();
-            let size = dbg!(build_block_unchecked(
-                self.record_size,
-                &mut self.data_buf,
-                &current_block
-            ));
+            let size = build_block_unchecked(self.record_size, &mut self.data_buf, &current_block);
             self.block_offset += size as u64;
             self.offsets.push(self.block_offset);
         }
@@ -108,14 +103,13 @@ impl<B1: BufMut, B2: BufMut> SizedDictBufBuilder<B1, B2> {
 }
 
 pub fn build_offset_logarray<B: BufMut>(buf: &mut B, mut offsets: Vec<u64>) {
-    dbg!(&offsets);
     // the last offset doesn't matter as it's implied by the total size
     offsets.pop();
 
     let largest_element = offsets.last().cloned().unwrap_or(0);
     let width = calculate_width(largest_element);
     let mut array_builder = LogArrayBufBuilder::new(buf, width);
-    dbg!(&offsets);
+
     array_builder.push_vec(offsets);
     array_builder.finalize();
 }
@@ -129,11 +123,7 @@ pub struct SizedDict {
 
 impl SizedDict {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
-        dbg!(&offsets);
-        dbg!(&data);
-        dbg!(dict_offset);
         let offsets = MonotonicLogArray::parse(offsets).unwrap();
-        dbg!(&offsets);
         Self::from_parts(offsets, data, dict_offset)
     }
 
@@ -146,7 +136,6 @@ impl SizedDict {
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
-        dbg!(block_index);
         let offset: usize;
         if block_index == 0 {
             offset = 0;
@@ -160,7 +149,7 @@ impl SizedDict {
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
         let offset = self.block_offset(block_index);
         let block_bytes;
-        block_bytes = dbg!(self.data.slice(offset..));
+        block_bytes = self.data.slice(offset..);
 
         block_bytes
     }
@@ -176,27 +165,19 @@ impl SizedDict {
     }
 
     pub fn block_num_elements(&self, block_index: usize) -> u8 {
-        eprintln!("block_index: {block_index}");
-        let offset = dbg!(self.block_offset(block_index));
-        eprintln!("offset: {offset}");
-
-        dbg!(&self.data);
-        if dbg!(self.data.len()) == 0 {
-            eprintln!("size is zero");
+        let offset = self.block_offset(block_index);
+        if self.data.len() == 0 {
             0
         } else {
-            dbg!(parse_block_control_records(dbg!(self.data[offset])))
+            parse_block_control_records(self.data[offset])
         }
     }
 
     pub fn num_blocks(&self) -> usize {
-        dbg!(&self.offsets);
-        dbg!(&self.data);
         self.offsets.len() + 1
     }
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
-        dbg!(index);
         if index > self.num_entries() {
             return None;
         }
@@ -205,7 +186,6 @@ impl SizedDict {
     }
 
     pub fn id(&self, slice: &[u8]) -> IdLookupResult {
-        dbg!(slice);
         // let's binary search
         let mut min = 0;
         let mut max = self.offsets.len();
@@ -275,7 +255,7 @@ impl SizedDict {
     }
 
     pub fn num_entries(&self) -> usize {
-        let num_blocks = dbg!(self.num_blocks());
+        let num_blocks = self.num_blocks();
         let last_block_size = self.block_num_elements(num_blocks - 1);
 
         (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 50fa3600..e5aa46a7 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -36,19 +36,13 @@ impl TypedDict {
         data: Bytes,
     ) -> Self {
         let types_present2 = types_present.clone();
-        dbg!(types_present2);
         let type_offsets2 = type_offsets.clone();
-        dbg!(type_offsets2);
         let block_offsets2 = block_offsets.clone();
-        dbg!(block_offsets2);
         let data2 = data.clone();
-        dbg!(data2);
+
         let types_present = MonotonicLogArray::parse(types_present).unwrap();
         let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
         let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
-        dbg!(&types_present);
-        dbg!(&type_offsets);
-        dbg!(&block_offsets);
         if types_present.len() == 0 {
             return Self {
                 types_present,
@@ -64,13 +58,12 @@ impl TypedDict {
         for type_offset in type_offsets.iter() {
             let last_block_len;
             if type_offset == 0 {
-                last_block_len = dbg!(parse_block_control_records(data[0]));
+                last_block_len = parse_block_control_records(data[0]);
             } else {
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
-                last_block_len = dbg!(parse_block_control_records(
-                    data[last_block_offset_of_previous_type as usize]
-                ));
+                last_block_len =
+                    parse_block_control_records(data[last_block_offset_of_previous_type as usize]);
             }
 
             let gap = BLOCK_SIZE as u8 - last_block_len;
@@ -82,26 +75,24 @@ impl TypedDict {
             1
         } else {
             BLOCK_SIZE
-                - dbg!(parse_block_control_records(
+                - parse_block_control_records(
                     data[block_offsets.entry(block_offsets.len() - 1) as usize],
-                ) as usize)
+                ) as usize
         };
-        dbg!(last_gap);
-        dbg!((block_offsets.len() + 1) * BLOCK_SIZE - tally as usize);
         let num_entries = if block_offsets.len() == 0 {
             parse_block_control_records(data[0]) as usize
         } else {
             (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap
         };
-        dbg!(num_entries);
-        dbg!(Self {
+
+        Self {
             types_present,
             type_offsets,
             block_offsets,
             type_id_offsets,
             num_entries,
             data,
-        })
+        }
     }
 
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
@@ -119,7 +110,7 @@ impl TypedDict {
         let type_offset;
         let block_offset;
         let id_offset;
-        dbg!(i);
+
         if i == 0 {
             type_offset = 0;
             block_offset = 0;
@@ -129,7 +120,7 @@ impl TypedDict {
             id_offset = self.type_id_offsets[i - 1];
             block_offset = self.block_offsets.entry(type_offset as usize) as usize;
         }
-        dbg!(block_offset);
+
         let len;
         if i == self.types_present.len() - 1 {
             if i == 0 {
@@ -288,11 +279,8 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
         let offsets2 = offsets.clone();
         let data2 = data.clone();
-        dbg!(offsets2);
-        dbg!(data2);
-        dbg!(dict_offset);
         let dict = SizedDict::parse(offsets, data, dict_offset);
-        dbg!(&dict);
+
         Self {
             dict,
             _x: Default::default(),
@@ -306,7 +294,6 @@ impl<T: TdbDataType> TypedDictSegment<T> {
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
         let slice = T::to_lexical(val);
-        dbg!(&slice);
         self.dict.id(&slice[..])
     }
 

From 48750c25972798c2da8d14754e21de258da24ae1 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 6 Dec 2022 10:02:06 +0100
Subject: [PATCH 57/99] Ready for comparison to refactor branch

---
 src/layer/internal/base.rs  |  4 ++--
 src/layer/internal/child.rs |  2 +-
 src/layer/internal/mod.rs   | 11 ++++-------
 src/layer/simple_builder.rs |  1 -
 src/storage/file.rs         |  4 ++--
 src/storage/memory.rs       |  8 +++-----
 src/structure/tfc/dict.rs   | 28 ++++++++++++++++------------
 src/structure/tfc/typed.rs  | 15 ++++++++-------
 8 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 4d6cbe69..43f63206 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -56,7 +56,7 @@ impl BaseLayer {
         );
         let predicate_dictionary = StringDict::parse(
             maps.predicate_dictionary_maps.offsets_map,
-            maps.predicate_dictionary_maps.blocks_map,
+            dbg!(maps.predicate_dictionary_maps.blocks_map),
             0,
         );
         let value_dictionary = TypedDict::from_parts(
@@ -617,7 +617,7 @@ pub mod tests {
 
         let builder = builder.into_phase2().await.unwrap();
         builder.finalize().await.unwrap();
-        eprintln!("Here");
+
         let layer = BaseLayer::load_from_files([1, 2, 3, 4, 5], &base_layer_files)
             .await
             .unwrap();
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index 09744582..edc5e521 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -345,7 +345,7 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
         } = self;
 
         builder.finalize().await?;
-        eprintln!("Into phase2");
+
         let node_dict_offsets_map = files.node_dictionary_files.offsets_file.map().await?;
         let node_dict_blocks_map = files.node_dictionary_files.blocks_file.map().await?;
         let predicate_dict_offsets_map =
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 510f4f03..9b6f3061 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -234,7 +234,7 @@ impl InternalLayer {
     }
 
     pub fn node_dict_get(&self, id: usize) -> Option<String> {
-        self.node_dictionary().get(id)
+        dbg!(self.node_dictionary().get(id))
     }
 
     pub fn node_dict_len(&self) -> usize {
@@ -551,7 +551,6 @@ impl Layer for InternalLayer {
     }
 
     fn subject_id<'a>(&'a self, subject: &str) -> Option<u64> {
-        eprintln!("In subject_id");
         let to_result = |layer: &'a InternalLayer| {
             (
                 layer
@@ -566,12 +565,10 @@ impl Layer for InternalLayer {
             result = to_result(layer);
         }
         let (id_option, parent_option) = result;
-        eprintln!("id_option: {id_option:?}");
         id_option.map(|id| id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
     }
 
     fn predicate_id<'a>(&'a self, predicate: &str) -> Option<u64> {
-        eprintln!("In predicate id");
         let to_result = |layer: &'a InternalLayer| {
             (
                 layer
@@ -712,11 +709,11 @@ impl Layer for InternalLayer {
                 }
             }
 
-            corrected_id = current_layer
+            corrected_id = dbg!(current_layer
                 .node_value_id_map()
-                .outer_to_inner(corrected_id);
+                .outer_to_inner(corrected_id));
 
-            if corrected_id >= current_layer.node_dict_len() as u64 {
+            if corrected_id > dbg!(current_layer.node_dict_len()) as u64 {
                 // object, if it exists, must be a value
                 corrected_id -= current_layer.node_dict_len() as u64;
                 return current_layer
diff --git a/src/layer/simple_builder.rs b/src/layer/simple_builder.rs
index 8ae41b05..7f47c03d 100644
--- a/src/layer/simple_builder.rs
+++ b/src/layer/simple_builder.rs
@@ -62,7 +62,6 @@ pub struct SimpleLayerBuilder<F: 'static + FileLoad + FileStore + Clone> {
 impl<F: 'static + FileLoad + FileStore + Clone> SimpleLayerBuilder<F> {
     /// Construct a layer builder for a base layer
     pub fn new(name: [u32; 5], files: BaseLayerFiles<F>) -> Self {
-        eprintln!("Trying to make a new layer file");
         Self {
             name,
             parent: None,
diff --git a/src/storage/file.rs b/src/storage/file.rs
index 89556ee0..78e85dee 100644
--- a/src/storage/file.rs
+++ b/src/storage/file.rs
@@ -341,8 +341,8 @@ pub struct DictionaryFiles<F: 'static + FileLoad + FileStore> {
 
 impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
     pub async fn map_all(&self) -> io::Result<DictionaryMaps> {
-        let blocks_map = self.blocks_file.map().await?;
         let offsets_map = self.offsets_file.map().await?;
+        let blocks_map = self.blocks_file.map().await?;
 
         Ok(DictionaryMaps {
             offsets_map,
@@ -358,8 +358,8 @@ impl<F: 'static + FileLoad + FileStore> DictionaryFiles<F> {
         let mut offsets_writer = self.offsets_file.open_write().await?;
         let mut blocks_writer = self.blocks_file.open_write().await?;
 
-        blocks_writer.write_all_buf(blocks_buf).await?;
         offsets_writer.write_all_buf(offsets_buf).await?;
+        blocks_writer.write_all_buf(blocks_buf).await?;
 
         offsets_writer.flush().await?;
         offsets_writer.sync_all().await?;
diff --git a/src/storage/memory.rs b/src/storage/memory.rs
index 4febd9ec..f277314f 100644
--- a/src/storage/memory.rs
+++ b/src/storage/memory.rs
@@ -525,17 +525,15 @@ mod tests {
         builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
-        eprintln!("Here1");
         builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
-        eprintln!("Here2");
+
         builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
         builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
-        eprintln!("Here3");
+
         builder.commit_boxed().await.unwrap();
-        eprintln!("Here4");
         let layer = store.get_layer(child_name).await.unwrap().unwrap();
-        eprintln!("Here5");
+
         assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
         assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
         assert!(layer.string_triple_exists(&StringTriple::new_node("cow", "likes", "pig")));
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index e49af940..42fea0a0 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -128,11 +128,12 @@ impl SizedDict {
     }
 
     pub fn from_parts(offsets: MonotonicLogArray, data: Bytes, dict_offset: u64) -> Self {
-        Self {
+        dbg!(&data);
+        dbg!(Self {
             offsets,
             data,
             dict_offset,
-        }
+        })
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
@@ -147,9 +148,9 @@ impl SizedDict {
     }
 
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
-        let offset = self.block_offset(block_index);
+        let offset = dbg!(self.block_offset(block_index));
         let block_bytes;
-        block_bytes = self.data.slice(offset..);
+        block_bytes = dbg!(self.data.slice(offset..));
 
         block_bytes
     }
@@ -178,6 +179,7 @@ impl SizedDict {
     }
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
+        dbg!(index);
         if index > self.num_entries() {
             return None;
         }
@@ -190,11 +192,11 @@ impl SizedDict {
         let mut min = 0;
         let mut max = self.offsets.len();
         let mut mid: usize;
-
+        dbg!(&self);
         while min <= max {
             mid = (min + max) / 2;
-
-            let head_slice = self.block_head(mid);
+            dbg!(mid);
+            let head_slice = dbg!(self.block_head(mid));
 
             match slice.cmp(&head_slice[..]) {
                 Ordering::Less => {
@@ -203,20 +205,22 @@ impl SizedDict {
                         // but since this is the first block, the string doesn't exist.
                         return IdLookupResult::NotFound;
                     }
-                    max = mid - 1;
+                    max = dbg!(mid - 1);
                 }
-                Ordering::Greater => min = mid + 1,
-                Ordering::Equal => return IdLookupResult::Found((mid * BLOCK_SIZE + 1) as u64), // what luck! turns out the string we were looking for was the block head
+                Ordering::Greater => min = dbg!(mid + 1),
+                Ordering::Equal => {
+                    return IdLookupResult::Found(dbg!((mid * BLOCK_SIZE + 1)) as u64)
+                } // what luck! turns out the string we were looking for was the block head
             }
         }
 
         let found = max;
 
         // we found the block the string should be part of.
-        let block = self.block(found);
+        let block = dbg!(self.block(found));
         let block_id = block.id(slice);
         let offset = (found * BLOCK_SIZE) as u64 + 1;
-        let result = block_id.offset(offset).default(offset - 1);
+        let result = block_id.offset(offset).default(dbg!(offset - 1));
         /*
         if found != 0 {
             // the default value will fill in the last index of the
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index e5aa46a7..dc34579d 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -35,6 +35,7 @@ impl TypedDict {
         block_offsets: Bytes,
         data: Bytes,
     ) -> Self {
+        dbg!(&data);
         let types_present2 = types_present.clone();
         let type_offsets2 = type_offsets.clone();
         let block_offsets2 = block_offsets.clone();
@@ -85,14 +86,14 @@ impl TypedDict {
             (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap
         };
 
-        Self {
+        dbg!(Self {
             types_present,
             type_offsets,
             block_offsets,
             type_id_offsets,
             num_entries,
             data,
-        }
+        })
     }
 
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
@@ -102,7 +103,7 @@ impl TypedDict {
     }
 
     pub fn get<T: TdbDataType>(&self, id: usize) -> Option<T> {
-        let result = self.entry(id);
+        let result = self.entry(dbg!(id));
         result.map(|(datatype, slice)| datatype.cast(slice.into_buf()))
     }
 
@@ -277,8 +278,7 @@ pub struct TypedDictSegment<T: TdbDataType> {
 
 impl<T: TdbDataType> TypedDictSegment<T> {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
-        let offsets2 = offsets.clone();
-        let data2 = data.clone();
+        dbg!(&data);
         let dict = SizedDict::parse(offsets, data, dict_offset);
 
         Self {
@@ -288,12 +288,13 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 
     pub fn get(&self, index: usize) -> Option<T> {
-        let entry = self.dict.entry(index);
+        let entry = self.dict.entry(dbg!(index));
         entry.map(|e| T::from_lexical(e.into_buf()))
     }
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
-        let slice = T::to_lexical(val);
+        dbg!(&self.dict);
+        let slice = dbg!(T::to_lexical(val));
         self.dict.id(&slice[..])
     }
 

From 49b6c97484f51ecba839f823bd09e0110665822d Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 11:12:17 +0100
Subject: [PATCH 58/99] make lower level empty dicts work

---
 src/structure/tfc/dict.rs | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 42fea0a0..8d79e7f3 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -148,6 +148,9 @@ impl SizedDict {
     }
 
     pub fn block_bytes(&self, block_index: usize) -> Bytes {
+        if self.data.is_empty() {
+            panic!("empty dictionary has no block");
+        }
         let offset = dbg!(self.block_offset(block_index));
         let block_bytes;
         block_bytes = dbg!(self.data.slice(offset..));
@@ -166,16 +169,21 @@ impl SizedDict {
     }
 
     pub fn block_num_elements(&self, block_index: usize) -> u8 {
-        let offset = self.block_offset(block_index);
-        if self.data.len() == 0 {
+        if self.data.is_empty() {
             0
         } else {
+            let offset = self.block_offset(block_index);
             parse_block_control_records(self.data[offset])
         }
     }
 
     pub fn num_blocks(&self) -> usize {
-        self.offsets.len() + 1
+        if self.data.is_empty() {
+            0
+        }
+        else {
+            self.offsets.len() + 1
+        }
     }
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
@@ -193,6 +201,9 @@ impl SizedDict {
         let mut max = self.offsets.len();
         let mut mid: usize;
         dbg!(&self);
+        if self.is_empty() {
+            return IdLookupResult::NotFound;
+        }
         while min <= max {
             mid = (min + max) / 2;
             dbg!(mid);
@@ -260,9 +271,18 @@ impl SizedDict {
 
     pub fn num_entries(&self) -> usize {
         let num_blocks = self.num_blocks();
-        let last_block_size = self.block_num_elements(num_blocks - 1);
+        if num_blocks == 0 {
+            0
+        }
+        else {
+            let last_block_size = self.block_num_elements(num_blocks - 1);
+
+            (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
+        }
+    }
 
-        (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
     }
 }
 

From af9abb4c0f363612c5c38b7934620f5954b19663 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 11:25:27 +0100
Subject: [PATCH 59/99] fix id correction when looking up in parent layers

---
 src/layer/internal/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index 9b6f3061..df16010d 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -635,7 +635,7 @@ impl Layer for InternalLayer {
                 parent_count = parent_count
                     - current_layer.node_dict_len() as u64
                     - current_layer.value_dict_len() as u64;
-                if corrected_id >= parent_count as u64 {
+                if corrected_id > parent_count as u64 {
                     // subject, if it exists, is in this layer
                     corrected_id -= parent_count;
                 } else {
@@ -666,7 +666,7 @@ impl Layer for InternalLayer {
             let mut corrected_id = id;
             if let Some(parent) = current_layer.immediate_parent() {
                 parent_count -= current_layer.predicate_dict_len() as u64;
-                if corrected_id >= parent_count as u64 {
+                if corrected_id > parent_count as u64 {
                     // subject, if it exists, is in this layer
                     corrected_id -= parent_count;
                 } else {
@@ -700,7 +700,7 @@ impl Layer for InternalLayer {
                     - current_layer.node_dict_len() as u64
                     - current_layer.value_dict_len() as u64;
 
-                if corrected_id >= parent_count {
+                if corrected_id > parent_count {
                     // object, if it exists, is in this layer
                     corrected_id -= parent_count;
                 } else {

From 3380e78ddc11a8ceab9dddebcd860da9267d93b0 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 14:48:14 +0100
Subject: [PATCH 60/99] fix id mapping for new id offset

---
 src/layer/id_map.rs | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/layer/id_map.rs b/src/layer/id_map.rs
index 345e5f19..e01f3331 100644
--- a/src/layer/id_map.rs
+++ b/src/layer/id_map.rs
@@ -32,10 +32,10 @@ impl IdMap {
         self.id_wtree
             .as_ref()
             .and_then(|wtree| {
-                if id >= wtree.len() as u64 {
+                if id > wtree.len() as u64 {
                     None
                 } else {
-                    Some(wtree.lookup_one(id).unwrap())
+                    Some(wtree.lookup_one(id-1).unwrap() + 1)
                 }
             })
             .unwrap_or(id)
@@ -45,10 +45,11 @@ impl IdMap {
         self.id_wtree
             .as_ref()
             .and_then(|wtree| {
-                if id >= wtree.len() as u64 {
+                if id > wtree.len() as u64 {
                     None
                 } else {
-                    Some(wtree.decode_one(id.try_into().unwrap()))
+                    let id:usize = id.try_into().unwrap();
+                    Some(wtree.decode_one(id - 1) + 1)
                 }
             })
             .unwrap_or(id)
@@ -97,7 +98,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
         node_iters.push(
             dict.into_iter()
                 .enumerate()
-                .map(move |(i, e)| (idmap.inner_to_outer(i as u64) + node_offset as u64, e)),
+                .map(move |(i, e)| (idmap.inner_to_outer(i as u64 + 1) + node_offset as u64, e)),
         );
 
         node_offset += num_entries + value_dicts[ix].num_entries();
@@ -111,7 +112,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
         let num_entries = dict.num_entries();
         value_iters.push(dict.into_iter().enumerate().map(move |(i, e)| {
             (
-                idmap.inner_to_outer(i as u64 + node_count as u64) + value_offset as u64,
+                idmap.inner_to_outer(i as u64 + node_count as u64 + 1) + value_offset as u64,
                 e,
             )
         }));
@@ -127,7 +128,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
         predicate_iters.push(
             dict.into_iter()
                 .enumerate()
-                .map(move |(i, e)| (idmap.inner_to_outer(i as u64) + predicate_offset as u64, e)),
+                .map(move |(i, e)| (idmap.inner_to_outer(i as u64 + 1) + predicate_offset as u64, e)),
         );
 
         predicate_offset += num_entries;
@@ -151,9 +152,9 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
 
     let sorted_node_iter = sorted_iterator(node_iters, entry_comparator).map(|(i,s)|(i, (Datatype::String, s)));
     let sorted_value_iter = sorted_iterator(value_iters, typed_entry_comparator);
-    let sorted_node_value_iter = sorted_node_iter.chain(sorted_value_iter).map(|(id, _)| id);
+    let sorted_node_value_iter = sorted_node_iter.chain(sorted_value_iter).map(|(id, _)| id - 1);
     let sorted_predicate_iter =
-        sorted_iterator(predicate_iters, entry_comparator).map(|(id, _)| id);
+        sorted_iterator(predicate_iters, entry_comparator).map(|(id, _)| id - 1);
 
     let node_value_width = util::calculate_width(node_offset as u64);
     let node_value_build_task = tokio::spawn(build_wavelet_tree_from_iter(

From 14b71a0bf3f12c953bb714d6349c1459da0b90bf Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 14:48:23 +0100
Subject: [PATCH 61/99] remove some debug expressions

---
 src/structure/tfc/dict.rs  | 5 ++---
 src/structure/tfc/typed.rs | 6 ++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 8d79e7f3..0766de56 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -128,12 +128,11 @@ impl SizedDict {
     }
 
     pub fn from_parts(offsets: MonotonicLogArray, data: Bytes, dict_offset: u64) -> Self {
-        dbg!(&data);
-        dbg!(Self {
+        Self {
             offsets,
             data,
             dict_offset,
-        })
+        }
     }
 
     fn block_offset(&self, block_index: usize) -> usize {
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index dc34579d..465c10a9 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -35,7 +35,6 @@ impl TypedDict {
         block_offsets: Bytes,
         data: Bytes,
     ) -> Self {
-        dbg!(&data);
         let types_present2 = types_present.clone();
         let type_offsets2 = type_offsets.clone();
         let block_offsets2 = block_offsets.clone();
@@ -86,14 +85,14 @@ impl TypedDict {
             (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap
         };
 
-        dbg!(Self {
+        Self {
             types_present,
             type_offsets,
             block_offsets,
             type_id_offsets,
             num_entries,
             data,
-        })
+        }
     }
 
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
@@ -278,7 +277,6 @@ pub struct TypedDictSegment<T: TdbDataType> {
 
 impl<T: TdbDataType> TypedDictSegment<T> {
     pub fn parse(offsets: Bytes, data: Bytes, dict_offset: u64) -> Self {
-        dbg!(&data);
         let dict = SizedDict::parse(offsets, data, dict_offset);
 
         Self {

From 7c3b34031cc10a6a60be1b555ea2035bc62322db Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 16:03:02 +0100
Subject: [PATCH 62/99] removed loads of dbg! invocations, and started string
 dict logic

---
 src/layer/builder.rs        | 29 ++++----------
 src/layer/internal/base.rs  |  8 ++--
 src/layer/internal/child.rs |  6 +--
 src/layer/internal/mod.rs   |  8 ++--
 src/storage/layer.rs        | 13 ++++---
 src/structure/mod.rs        |  3 +-
 src/structure/tfc/dict.rs   | 19 ++++-----
 src/structure/tfc/typed.rs  | 77 ++++++++++++++++++++++++++++++++++---
 8 files changed, 105 insertions(+), 58 deletions(-)

diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index 55df9d00..d8eeb111 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -3,7 +3,6 @@ use std::io;
 use bytes::{Bytes, BytesMut};
 use futures::stream::TryStreamExt;
 use rayon::prelude::*;
-use tfc::dict::SizedDictBufBuilder;
 
 use super::layer::*;
 use crate::storage::*;
@@ -14,8 +13,8 @@ pub struct DictionarySetFileBuilder<F: 'static + FileLoad + FileStore> {
     node_files: DictionaryFiles<F>,
     predicate_files: DictionaryFiles<F>,
     value_files: TypedDictionaryFiles<F>,
-    node_dictionary_builder: SizedDictBufBuilder<BytesMut, BytesMut>,
-    predicate_dictionary_builder: SizedDictBufBuilder<BytesMut, BytesMut>,
+    node_dictionary_builder: StringDictBufBuilder<BytesMut, BytesMut>,
+    predicate_dictionary_builder: StringDictBufBuilder<BytesMut, BytesMut>,
     value_dictionary_builder: TypedDictBufBuilder<BytesMut, BytesMut, BytesMut, BytesMut>,
 }
 
@@ -25,18 +24,12 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
         predicate_files: DictionaryFiles<F>,
         value_files: TypedDictionaryFiles<F>,
     ) -> io::Result<Self> {
-        let node_dictionary_builder = SizedDictBufBuilder::new(
-            None,
-            0,
-            0,
-            LateLogArrayBufBuilder::new(BytesMut::new()),
+        let node_dictionary_builder = StringDictBufBuilder::new(
+            BytesMut::new(),
             BytesMut::new(),
         );
-        let predicate_dictionary_builder = SizedDictBufBuilder::new(
-            None,
-            0,
-            0,
-            LateLogArrayBufBuilder::new(BytesMut::new()),
+        let predicate_dictionary_builder = StringDictBufBuilder::new(
+            BytesMut::new(),
             BytesMut::new(),
         );
         let value_dictionary_builder = TypedDictBufBuilder::new(
@@ -147,16 +140,10 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     }
 
     pub async fn finalize(self) -> io::Result<()> {
-        let (mut node_offsets_builder, mut node_data_buf, _, _) =
+        let (mut node_offsets_buf, mut node_data_buf) =
             self.node_dictionary_builder.finalize();
-        // last offset is useless
-        node_offsets_builder.pop();
-        let mut node_offsets_buf = node_offsets_builder.finalize();
-        let (mut predicate_offsets_builder, mut predicate_data_buf, _, _) =
+        let (mut predicate_offsets_buf, mut predicate_data_buf) =
             self.predicate_dictionary_builder.finalize();
-        // last offset is useless
-        predicate_offsets_builder.pop();
-        let mut predicate_offsets_buf = predicate_offsets_builder.finalize();
         let (
             mut value_types_present_buf,
             mut value_type_offsets_buf,
diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 43f63206..4b11fd06 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -52,12 +52,10 @@ impl BaseLayer {
         let node_dictionary = StringDict::parse(
             maps.node_dictionary_maps.offsets_map,
             maps.node_dictionary_maps.blocks_map,
-            0,
         );
         let predicate_dictionary = StringDict::parse(
             maps.predicate_dictionary_maps.offsets_map,
-            dbg!(maps.predicate_dictionary_maps.blocks_map),
-            0,
+            maps.predicate_dictionary_maps.blocks_map,
         );
         let value_dictionary = TypedDict::from_parts(
             maps.value_dictionary_maps.types_present_map,
@@ -266,8 +264,8 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
         let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
         let value_dict_offsets_map = files.value_dictionary_files.offsets_file.map().await?;
 
-        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map, 0);
-        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map, 0);
+        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map);
+        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map);
         let val_dict = TypedDict::from_parts(
             value_dict_types_present_map,
             value_dict_type_offsets_map,
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index edc5e521..9690a297 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -65,12 +65,10 @@ impl ChildLayer {
         let node_dictionary = StringDict::parse(
             maps.node_dictionary_maps.offsets_map,
             maps.node_dictionary_maps.blocks_map,
-            0,
         );
         let predicate_dictionary = StringDict::parse(
             maps.predicate_dictionary_maps.offsets_map,
             maps.predicate_dictionary_maps.blocks_map,
-            0,
         );
         let value_dictionary = TypedDict::from_parts(
             maps.value_dictionary_maps.types_present_map,
@@ -361,8 +359,8 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
         let value_dict_offsets_map = files.value_dictionary_files.offsets_file.map().await?;
         let value_dict_blocks_map = files.value_dictionary_files.blocks_file.map().await?;
 
-        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map, 0);
-        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map, 0);
+        let node_dict = StringDict::parse(node_dict_offsets_map, node_dict_blocks_map);
+        let pred_dict = StringDict::parse(predicate_dict_offsets_map, predicate_dict_blocks_map);
         let val_dict = TypedDict::from_parts(
             value_dict_types_present_map,
             value_dict_type_offsets_map,
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index df16010d..a3ed3905 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -234,7 +234,7 @@ impl InternalLayer {
     }
 
     pub fn node_dict_get(&self, id: usize) -> Option<String> {
-        dbg!(self.node_dictionary().get(id))
+        self.node_dictionary().get(id)
     }
 
     pub fn node_dict_len(&self) -> usize {
@@ -709,11 +709,11 @@ impl Layer for InternalLayer {
                 }
             }
 
-            corrected_id = dbg!(current_layer
+            corrected_id = current_layer
                 .node_value_id_map()
-                .outer_to_inner(corrected_id));
+                .outer_to_inner(corrected_id);
 
-            if corrected_id > dbg!(current_layer.node_dict_len()) as u64 {
+            if corrected_id > current_layer.node_dict_len() as u64 {
                 // object, if it exists, must be a value
                 corrected_id -= current_layer.node_dict_len() as u64;
                 return current_layer
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index d628d39e..30575ec7 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -15,7 +15,7 @@ use crate::structure::logarray::logarray_file_get_length_and_width;
 use crate::structure::StringDict;
 use crate::structure::TypedDict;
 use crate::structure::{
-    dict_file_get_count, util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, WaveletTree,
+    util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, WaveletTree,
 };
 
 use std::convert::TryInto;
@@ -1571,7 +1571,6 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             Ok(Some(StringDict::parse(
                 maps.offsets_map,
                 maps.blocks_map,
-                0,
             )))
         } else {
             Ok(None)
@@ -1586,7 +1585,6 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             Ok(Some(StringDict::parse(
                 maps.offsets_map,
                 maps.blocks_map,
-                0,
             )))
         } else {
             Ok(None)
@@ -1612,7 +1610,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_node_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.node_dictionary_files(name).await?.blocks_file;
-            Ok(Some(dict_file_get_count(file).await?))
+            panic!();
+            //Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
@@ -1621,7 +1620,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_predicate_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.predicate_dictionary_files(name).await?.blocks_file;
-            Ok(Some(dict_file_get_count(file).await?))
+            panic!();
+            //Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
@@ -1630,7 +1630,8 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_value_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.value_dictionary_files(name).await?.blocks_file;
-            Ok(Some(dict_file_get_count(file).await?))
+            panic!();
+            //Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
diff --git a/src/structure/mod.rs b/src/structure/mod.rs
index b4ada408..e5147270 100644
--- a/src/structure/mod.rs
+++ b/src/structure/mod.rs
@@ -8,7 +8,7 @@ pub mod bitindex;
 pub mod bititer;
 pub mod logarray;
 //pub mod mapped_dict;
-pub mod pfc;
+//pub mod pfc;
 pub mod tfc;
 pub mod util;
 pub mod vbyte;
@@ -18,6 +18,5 @@ pub use adjacencylist::*;
 pub use bitarray::*;
 pub use bitindex::*;
 pub use logarray::*;
-pub use pfc::*;
 pub use tfc::*;
 pub use wavelettree::*;
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 0766de56..cac4dca7 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -150,9 +150,9 @@ impl SizedDict {
         if self.data.is_empty() {
             panic!("empty dictionary has no block");
         }
-        let offset = dbg!(self.block_offset(block_index));
+        let offset = self.block_offset(block_index);
         let block_bytes;
-        block_bytes = dbg!(self.data.slice(offset..));
+        block_bytes = self.data.slice(offset..);
 
         block_bytes
     }
@@ -186,7 +186,6 @@ impl SizedDict {
     }
 
     pub fn entry(&self, index: usize) -> Option<SizedDictEntry> {
-        dbg!(index);
         if index > self.num_entries() {
             return None;
         }
@@ -199,14 +198,12 @@ impl SizedDict {
         let mut min = 0;
         let mut max = self.offsets.len();
         let mut mid: usize;
-        dbg!(&self);
         if self.is_empty() {
             return IdLookupResult::NotFound;
         }
         while min <= max {
             mid = (min + max) / 2;
-            dbg!(mid);
-            let head_slice = dbg!(self.block_head(mid));
+            let head_slice = self.block_head(mid);
 
             match slice.cmp(&head_slice[..]) {
                 Ordering::Less => {
@@ -215,11 +212,11 @@ impl SizedDict {
                         // but since this is the first block, the string doesn't exist.
                         return IdLookupResult::NotFound;
                     }
-                    max = dbg!(mid - 1);
+                    max = mid - 1;
                 }
-                Ordering::Greater => min = dbg!(mid + 1),
+                Ordering::Greater => min = mid + 1,
                 Ordering::Equal => {
-                    return IdLookupResult::Found(dbg!((mid * BLOCK_SIZE + 1)) as u64)
+                    return IdLookupResult::Found((mid * BLOCK_SIZE + 1) as u64)
                 } // what luck! turns out the string we were looking for was the block head
             }
         }
@@ -227,10 +224,10 @@ impl SizedDict {
         let found = max;
 
         // we found the block the string should be part of.
-        let block = dbg!(self.block(found));
+        let block = self.block(found);
         let block_id = block.id(slice);
         let offset = (found * BLOCK_SIZE) as u64 + 1;
-        let result = block_id.offset(offset).default(dbg!(offset - 1));
+        let result = block_id.offset(offset).default(offset - 1);
         /*
         if found != 0 {
             // the default value will fill in the last index of the
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 465c10a9..b07dab59 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -102,7 +102,7 @@ impl TypedDict {
     }
 
     pub fn get<T: TdbDataType>(&self, id: usize) -> Option<T> {
-        let result = self.entry(dbg!(id));
+        let result = self.entry(id);
         result.map(|(datatype, slice)| datatype.cast(slice.into_buf()))
     }
 
@@ -286,13 +286,12 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 
     pub fn get(&self, index: usize) -> Option<T> {
-        let entry = self.dict.entry(dbg!(index));
+        let entry = self.dict.entry(index);
         entry.map(|e| T::from_lexical(e.into_buf()))
     }
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
-        dbg!(&self.dict);
-        let slice = dbg!(T::to_lexical(val));
+        let slice = T::to_lexical(val);
         self.dict.id(&slice[..])
     }
 
@@ -309,7 +308,75 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 }
 
-pub type StringDict = TypedDictSegment<String>;
+#[derive(Clone)]
+pub struct StringDict(TypedDictSegment<String>);
+
+impl StringDict {
+    pub fn parse(offsets: Bytes, data: Bytes) -> Self {
+        Self(TypedDictSegment::parse(offsets, data.slice(..data.len()), 0))
+    }
+
+    pub fn get(&self, index: usize) -> Option<String> {
+        self.0.get(index)
+    }
+
+    pub fn id<Q: ToLexical<String>>(&self, val: &Q) -> IdLookupResult {
+        self.0.id(val)
+    }
+
+    pub fn num_entries(&self) -> usize {
+        self.0.num_entries()
+    }
+
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = SizedDictEntry> + 'a + Clone {
+        self.0.iter()
+    }
+
+    pub fn into_iter(self) -> impl Iterator<Item = SizedDictEntry> + Clone {
+        self.0.into_iter()
+    }
+}
+
+pub struct StringDictBufBuilder<B1: BufMut, B2: BufMut>(SizedDictBufBuilder<B1, B2>);
+
+impl<B1: BufMut, B2: BufMut> StringDictBufBuilder<B1, B2> {
+    pub fn new(
+        offsets_buf: B1,
+        data_buf: B2,
+    ) -> Self {
+        let offsets = LateLogArrayBufBuilder::new(offsets_buf);
+        Self(SizedDictBufBuilder::new(None, 0, 0, offsets, data_buf))
+    }
+
+    pub fn id_offset(&self) -> u64 {
+        self.0.id_offset()
+    }
+
+    pub fn block_offset(&self) -> u64 {
+        self.0.block_offset()
+    }
+
+    pub fn add(&mut self, value: Bytes) -> u64 {
+        self.0.add(value)
+    }
+
+    pub fn add_entry(&mut self, e: &SizedDictEntry) -> u64 {
+        self.0.add_entry(e)
+    }
+
+    pub fn add_all<I: Iterator<Item = Bytes>>(&mut self, it: I) -> Vec<u64> {
+        self.0.add_all(it)
+    }
+
+    pub fn finalize(self) -> (B1, B2) {
+        let (mut offsets_array, mut data_buf, _block_offset, id_offset) = self.0.finalize();
+        offsets_array.pop();
+        let offsets_buf = offsets_array.finalize();
+        //data_buf.put_u64(id_offset);
+
+        (offsets_buf, data_buf)
+    }
+}
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
 pub enum Datatype {

From 737742359f0a93f0ade79383ffe2a9b4d6e904ab Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 6 Dec 2022 16:51:36 +0100
Subject: [PATCH 63/99] fixed all tests

---
 src/storage/layer.rs       |  10 ++-
 src/structure/tfc/dict.rs  |  45 +++++---------
 src/structure/tfc/file.rs  |  39 ++++++------
 src/structure/tfc/mod.rs   |   1 +
 src/structure/tfc/typed.rs | 122 +++++++++++--------------------------
 5 files changed, 75 insertions(+), 142 deletions(-)

diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index 30575ec7..2f63fc6d 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -11,6 +11,7 @@ use crate::layer::{
     SimpleLayerBuilder,
 };
 use crate::structure::bitarray::bitarray_len_from_file;
+use crate::structure::dict_file_get_count;
 use crate::structure::logarray::logarray_file_get_length_and_width;
 use crate::structure::StringDict;
 use crate::structure::TypedDict;
@@ -1610,8 +1611,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_node_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.node_dictionary_files(name).await?.blocks_file;
-            panic!();
-            //Ok(Some(dict_file_get_count(file).await?))
+            Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
@@ -1620,8 +1620,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_predicate_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.predicate_dictionary_files(name).await?.blocks_file;
-            panic!();
-            //Ok(Some(dict_file_get_count(file).await?))
+            Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
@@ -1630,8 +1629,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
     async fn get_value_count(&self, name: [u32; 5]) -> io::Result<Option<u64>> {
         if self.directory_exists(name).await? {
             let file = self.value_dictionary_files(name).await?.blocks_file;
-            panic!();
-            //Ok(Some(dict_file_get_count(file).await?))
+            Ok(Some(dict_file_get_count(file).await?))
         } else {
             Ok(None)
         }
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index cac4dca7..16118164 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -4,29 +4,9 @@ use crate::structure::{
     util::calculate_width, LateLogArrayBufBuilder, LogArrayBufBuilder, MonotonicLogArray,
 };
 use bytes::{BufMut, Bytes};
-use itertools::Itertools;
 
 use super::block::*;
 
-pub fn build_dict_unchecked<B: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
-    record_size: Option<u8>,
-    start_offset: u64,
-    offsets: &mut Vec<u64>,
-    data_buf: &mut B,
-    iter: I,
-) {
-    let chunk_iter = iter.chunks(BLOCK_SIZE);
-
-    let mut offset = start_offset;
-    for chunk in &chunk_iter {
-        let slices: Vec<R> = chunk.collect();
-        let borrows: Vec<&[u8]> = slices.iter().map(|s| s.as_ref()).collect();
-        let size = build_block_unchecked(record_size, data_buf, &borrows);
-        offset += size as u64;
-        offsets.push(offset);
-    }
-}
-
 pub struct SizedDictBufBuilder<B1: BufMut, B2: BufMut> {
     pub(crate) record_size: Option<u8>,
     block_offset: u64,
@@ -341,14 +321,19 @@ mod tests {
     use super::*;
     use bytes::BytesMut;
 
-    fn build_dict_and_offsets<B1: BufMut, B2: BufMut, R: AsRef<[u8]>, I: Iterator<Item = R>>(
-        array_buf: &mut B1,
-        data_buf: &mut B2,
+    fn build_dict_and_offsets<B1: BufMut, B2: BufMut, I: Iterator<Item = Bytes>>(
+        array_buf: B1,
+        data_buf: B2,
         vals: I,
-    ) {
-        let mut offsets = Vec::new();
-        build_dict_unchecked(None, 0, &mut offsets, data_buf, vals);
-        build_offset_logarray(array_buf, offsets);
+    ) -> (B1, B2) {
+        let offsets = LateLogArrayBufBuilder::new(array_buf);
+        let mut builder = SizedDictBufBuilder::new(None, 0, 0, offsets, data_buf);
+        builder.add_all(vals);
+        let (mut array, data_buf, _, _) = builder.finalize();
+        array.pop();
+        let array_buf = array.finalize();
+
+        (array_buf, data_buf)
     }
 
     #[test]
@@ -372,7 +357,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(|s|Bytes::from(s)));
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -463,7 +448,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(Bytes::from));
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -495,7 +480,7 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter());
+        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(Bytes::from));
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index ed28fce4..645c85b6 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -1,13 +1,11 @@
+use byteorder::{BigEndian, ByteOrder};
 use bytes::BytesMut;
 use std::io;
-use tokio::io::AsyncWriteExt;
+use tokio::io::{AsyncWriteExt, AsyncReadExt};
 
 use crate::{storage::*, structure::util::sorted_iterator};
 
-use super::{
-    dict::{build_dict_unchecked, build_offset_logarray},
-    *,
-};
+use super::*;
 
 pub async fn merge_string_dictionaries<
     'a,
@@ -32,11 +30,9 @@ pub async fn merge_string_dictionaries<
     let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
     let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
 
-    let mut offsets = Vec::new();
-    let mut offsets_buf = BytesMut::new();
-    let mut data_buf = BytesMut::new();
-    build_dict_unchecked(None, 0, &mut offsets, &mut data_buf, sorted_iterator);
-    build_offset_logarray(&mut offsets_buf, offsets);
+    let mut builder = StringDictBufBuilder::new(BytesMut::new(), BytesMut::new());
+    builder.add_all(sorted_iterator);
+    let (offsets_buf, data_buf) = builder.finalize();
 
     offsets_file_writer.write_all(offsets_buf.as_ref()).await?;
     offsets_file_writer.flush().await?;
@@ -74,17 +70,9 @@ pub async fn merge_typed_dictionaries<
     let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
     let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
 
-    let mut types_present_buf = BytesMut::new();
-    let mut type_offsets_buf = BytesMut::new();
-    let mut offsets_buf = BytesMut::new();
-    let mut data_buf = BytesMut::new();
-    build_multiple_segments(
-        &mut types_present_buf,
-        &mut type_offsets_buf,
-        &mut offsets_buf,
-        &mut data_buf,
-        sorted_iterator,
-    );
+    let mut builder = TypedDictBufBuilder::new(BytesMut::new(), BytesMut::new(), BytesMut::new(), BytesMut::new());
+    builder.add_all(sorted_iterator);
+    let (types_present_buf, type_offsets_buf, offsets_buf, data_buf) = builder.finalize();
 
     types_present_file_writer
         .write_all(types_present_buf.as_ref())
@@ -108,3 +96,12 @@ pub async fn merge_typed_dictionaries<
 
     Ok(())
 }
+
+pub async fn dict_file_get_count<F: 'static + FileLoad>(file: F) -> io::Result<u64> {
+    let mut result = vec![0; 8];
+    file.open_read_from(file.size().await? - 8)
+        .await?
+        .read_exact(&mut result)
+        .await?;
+    Ok(BigEndian::read_u64(&result))
+}
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index d6297508..665e65f0 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -7,3 +7,4 @@ pub mod file;
 
 pub use typed::*;
 pub use block::{SizedDictEntry, SizedDictEntryBuf, OwnedSizedDictEntryBuf};
+pub use file::*;
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index b07dab59..5ee0a0d6 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -1,11 +1,9 @@
 use crate::structure::{
     tfc::block::{parse_block_control_records, BLOCK_SIZE},
-    util::calculate_width,
-    LateLogArrayBufBuilder, LogArrayBufBuilder, MonotonicLogArray,
+    LateLogArrayBufBuilder, MonotonicLogArray,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use itertools::*;
 use num_derive::FromPrimitive;
 use num_traits::FromPrimitive;
 use rug::Integer;
@@ -14,7 +12,7 @@ use std::{borrow::Cow, marker::PhantomData};
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     decimal::{decimal_to_storage, storage_to_decimal},
-    dict::{build_dict_unchecked, build_offset_logarray, SizedDict, SizedDictBufBuilder},
+    dict::{SizedDict, SizedDictBufBuilder},
     integer::{bigint_to_storage, storage_to_bigint},
 };
 
@@ -35,11 +33,6 @@ impl TypedDict {
         block_offsets: Bytes,
         data: Bytes,
     ) -> Self {
-        let types_present2 = types_present.clone();
-        let type_offsets2 = type_offsets.clone();
-        let block_offsets2 = block_offsets.clone();
-        let data2 = data.clone();
-
         let types_present = MonotonicLogArray::parse(types_present).unwrap();
         let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
         let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
@@ -50,7 +43,7 @@ impl TypedDict {
                 block_offsets,
                 type_id_offsets: Vec::new(),
                 num_entries: 0,
-                data,
+                data: data.slice(..data.len()-8),
             };
         }
         let mut tally: u64 = 0;
@@ -91,7 +84,7 @@ impl TypedDict {
             block_offsets,
             type_id_offsets,
             num_entries,
-            data,
+            data: data.slice(..data.len()-8),
         }
     }
 
@@ -313,7 +306,7 @@ pub struct StringDict(TypedDictSegment<String>);
 
 impl StringDict {
     pub fn parse(offsets: Bytes, data: Bytes) -> Self {
-        Self(TypedDictSegment::parse(offsets, data.slice(..data.len()), 0))
+        Self(TypedDictSegment::parse(offsets, data.slice(..data.len()-8), 0))
     }
 
     pub fn get(&self, index: usize) -> Option<String> {
@@ -372,7 +365,7 @@ impl<B1: BufMut, B2: BufMut> StringDictBufBuilder<B1, B2> {
         let (mut offsets_array, mut data_buf, _block_offset, id_offset) = self.0.finalize();
         offsets_array.pop();
         let offsets_buf = offsets_array.finalize();
-        //data_buf.put_u64(id_offset);
+        data_buf.put_u64(id_offset);
 
         (offsets_buf, data_buf)
     }
@@ -635,69 +628,6 @@ impl ToLexical<Decimal> for Decimal {
     }
 }
 
-pub fn build_segment<B: BufMut, T: TdbDataType, Q: ToLexical<T>, I: Iterator<Item = Q>>(
-    record_size: Option<u8>,
-    offsets: &mut Vec<u64>,
-    data_buf: &mut B,
-    iter: I,
-) {
-    let slices = iter.map(|val| val.to_lexical());
-    build_dict_unchecked(record_size, 0, offsets, data_buf, slices);
-}
-
-pub fn build_multiple_segments<
-    B1: BufMut,
-    B2: BufMut,
-    B3: BufMut,
-    B4: BufMut,
-    R: AsRef<[u8]>,
-    I: Iterator<Item = (Datatype, R)>,
->(
-    used_types_buf: &mut B1,
-    type_offsets_buf: &mut B2,
-    block_offsets_buf: &mut B3,
-    data_buf: &mut B4,
-    iter: I,
-) {
-    let mut types: Vec<Datatype> = Vec::new();
-    let mut type_offsets: Vec<u64> = Vec::new();
-    let mut offsets = Vec::with_capacity(iter.size_hint().0);
-    for (key, group) in iter.group_by(|v| v.0).into_iter() {
-        let start_offset = offsets.last().map(|t| *t).unwrap_or(0_u64);
-        let start_type_offset = offsets.len();
-        types.push(key);
-        type_offsets.push(start_type_offset as u64);
-        build_dict_unchecked(
-            key.record_size(),
-            start_offset,
-            &mut offsets,
-            data_buf,
-            group.map(|v| v.1),
-        );
-    }
-
-    build_offset_logarray(block_offsets_buf, offsets);
-    let largest_type = types.last().unwrap();
-    let largest_type_offset = type_offsets.last().unwrap();
-
-    let types_width = calculate_width(*largest_type as u64);
-    let type_offsets_width = calculate_width(*largest_type_offset);
-
-    let mut types_builder = LogArrayBufBuilder::new(used_types_buf, types_width);
-    let mut type_offsets_builder = LogArrayBufBuilder::new(type_offsets_buf, type_offsets_width);
-
-    for t in types {
-        types_builder.push(t as u64);
-    }
-
-    for o in type_offsets.into_iter().skip(1) {
-        type_offsets_builder.push(o - 1);
-    }
-
-    types_builder.finalize();
-    type_offsets_builder.finalize();
-}
-
 pub struct TypedDictBufBuilder<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
     types_present_builder: LateLogArrayBufBuilder<B1>,
     type_offsets_builder: LateLogArrayBufBuilder<B2>,
@@ -769,13 +699,14 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
         if self.current_datatype == None {
             panic!("There was nothing added to this dictionary!");
         }*/
-        let (mut block_offset_builder, data_buf, _, _) =
+        let (mut block_offset_builder, mut data_buf, _, id_offset) =
             self.sized_dict_buf_builder.unwrap().finalize();
 
         let types_present_buf = self.types_present_builder.finalize();
         let type_offsets_buf = self.type_offsets_builder.finalize();
         block_offset_builder.pop();
         let block_offsets_buf = block_offset_builder.finalize();
+        data_buf.put_u64(id_offset);
         (
             types_present_buf,
             type_offsets_buf,
@@ -787,9 +718,25 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
 
 #[cfg(test)]
 mod tests {
-    use crate::structure::tfc::dict::build_offset_logarray;
-
     use super::*;
+    fn build_multiple_segments<
+            B1: BufMut,
+        B2: BufMut,
+        B3: BufMut,
+        B4: BufMut,
+        I: Iterator<Item = (Datatype, Bytes)>,
+        >(
+        used_types_buf: &mut B1,
+        type_offsets_buf: &mut B2,
+        block_offsets_buf: &mut B3,
+        data_buf: &mut B4,
+        iter: I,
+    ) {
+        let mut builder = TypedDictBufBuilder::new(used_types_buf, type_offsets_buf, block_offsets_buf, data_buf);
+        builder.add_all(iter);
+        builder.finalize();
+    }
+
 
     fn build_segment_and_offsets<
         B1: BufMut,
@@ -799,13 +746,18 @@ mod tests {
         I: Iterator<Item = Q>,
     >(
         dt: Datatype,
-        array_buf: &mut B1,
-        data_buf: &mut B2,
+        array_buf: B1,
+        data_buf: B2,
         iter: I,
-    ) {
-        let mut offsets = Vec::new();
-        build_segment(dt.record_size(), &mut offsets, data_buf, iter);
-        build_offset_logarray(array_buf, offsets);
+    ) -> (B1, B2) {
+        let offsets = LateLogArrayBufBuilder::new(array_buf);
+        let mut builder = SizedDictBufBuilder::new(dt.record_size(), 0, 0, offsets, data_buf);
+        builder.add_all(iter.map(|v|v.to_lexical()));
+        let (mut offsets_array, data_buf, _, _) = builder.finalize();
+        offsets_array.pop();
+        let offsets_buf = offsets_array.finalize();
+
+        (offsets_buf, data_buf)
     }
 
     #[test]

From b017c5e6b2c79f35c87907c50e9894d8e13ab835 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 6 Dec 2022 17:34:41 +0100
Subject: [PATCH 64/99] Adding multiblock logic

---
 src/structure/tfc/typed.rs | 146 ++++++++++++++++++++++++++++++++++---
 1 file changed, 134 insertions(+), 12 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 5ee0a0d6..39b62a67 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -43,7 +43,7 @@ impl TypedDict {
                 block_offsets,
                 type_id_offsets: Vec::new(),
                 num_entries: 0,
-                data: data.slice(..data.len()-8),
+                data: data.slice(..data.len() - 8),
             };
         }
         let mut tally: u64 = 0;
@@ -84,7 +84,7 @@ impl TypedDict {
             block_offsets,
             type_id_offsets,
             num_entries,
-            data: data.slice(..data.len()-8),
+            data: data.slice(..data.len() - 8),
         }
     }
 
@@ -134,6 +134,8 @@ impl TypedDict {
         if len == 0 {
             // any slice will do
             logarray_slice = self.block_offsets.slice(0, 0);
+        } else if i == 0 {
+            logarray_slice = self.block_offsets.slice(type_offset, len);
         } else {
             logarray_slice = self.block_offsets.slice(type_offset + 1, len);
         }
@@ -306,7 +308,11 @@ pub struct StringDict(TypedDictSegment<String>);
 
 impl StringDict {
     pub fn parse(offsets: Bytes, data: Bytes) -> Self {
-        Self(TypedDictSegment::parse(offsets, data.slice(..data.len()-8), 0))
+        Self(TypedDictSegment::parse(
+            offsets,
+            data.slice(..data.len() - 8),
+            0,
+        ))
     }
 
     pub fn get(&self, index: usize) -> Option<String> {
@@ -333,10 +339,7 @@ impl StringDict {
 pub struct StringDictBufBuilder<B1: BufMut, B2: BufMut>(SizedDictBufBuilder<B1, B2>);
 
 impl<B1: BufMut, B2: BufMut> StringDictBufBuilder<B1, B2> {
-    pub fn new(
-        offsets_buf: B1,
-        data_buf: B2,
-    ) -> Self {
+    pub fn new(offsets_buf: B1, data_buf: B2) -> Self {
         let offsets = LateLogArrayBufBuilder::new(offsets_buf);
         Self(SizedDictBufBuilder::new(None, 0, 0, offsets, data_buf))
     }
@@ -720,24 +723,28 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
 mod tests {
     use super::*;
     fn build_multiple_segments<
-            B1: BufMut,
+        B1: BufMut,
         B2: BufMut,
         B3: BufMut,
         B4: BufMut,
         I: Iterator<Item = (Datatype, Bytes)>,
-        >(
+    >(
         used_types_buf: &mut B1,
         type_offsets_buf: &mut B2,
         block_offsets_buf: &mut B3,
         data_buf: &mut B4,
         iter: I,
     ) {
-        let mut builder = TypedDictBufBuilder::new(used_types_buf, type_offsets_buf, block_offsets_buf, data_buf);
+        let mut builder = TypedDictBufBuilder::new(
+            used_types_buf,
+            type_offsets_buf,
+            block_offsets_buf,
+            data_buf,
+        );
         builder.add_all(iter);
         builder.finalize();
     }
 
-
     fn build_segment_and_offsets<
         B1: BufMut,
         B2: BufMut,
@@ -752,7 +759,7 @@ mod tests {
     ) -> (B1, B2) {
         let offsets = LateLogArrayBufBuilder::new(array_buf);
         let mut builder = SizedDictBufBuilder::new(dt.record_size(), 0, 0, offsets, data_buf);
-        builder.add_all(iter.map(|v|v.to_lexical()));
+        builder.add_all(iter.map(|v| v.to_lexical()));
         let (mut offsets_array, data_buf, _, _) = builder.finalize();
         offsets_array.pop();
         let offsets_buf = offsets_array.finalize();
@@ -1278,4 +1285,119 @@ mod tests {
             assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
         }
     }
+
+    #[test]
+    fn test_two_blocks() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            String::make_entry(&"fdsa"),
+            String::make_entry(&"a"),
+            String::make_entry(&"bc"),
+            String::make_entry(&"bcd"),
+            String::make_entry(&"z"),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            String::make_entry(&"apple"),
+            String::make_entry(&"donkey"),
+        ];
+        vec.sort();
+
+        let mut typed_builder = TypedDictBufBuilder::new(
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+        );
+
+        let _results: Vec<u64> = vec
+            .clone()
+            .into_iter()
+            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .collect();
+
+        let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+
+        for i in 0..vec.len() {
+            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+        }
+    }
+
+    #[test]
+    fn test_three_blocks() {
+        let mut vec: Vec<(Datatype, Bytes)> = vec![
+            String::make_entry(&"fdsa"),
+            String::make_entry(&"a"),
+            String::make_entry(&"bc"),
+            String::make_entry(&"bcd"),
+            String::make_entry(&"z"),
+            String::make_entry(&"Batty"),
+            String::make_entry(&"Batman"),
+            String::make_entry(&"apple"),
+            String::make_entry(&"donkey"),
+            String::make_entry(&"pickle"),
+            String::make_entry(&"Pacify"),
+            String::make_entry(&"Buckle"),
+            String::make_entry(&"possibilities"),
+            String::make_entry(&"suspicious"),
+            String::make_entry(&"babble"),
+            String::make_entry(&"reformat"),
+            String::make_entry(&"refactor"),
+            String::make_entry(&"prereserve"),
+            String::make_entry(&"full"),
+            String::make_entry(&"block"),
+            String::make_entry(&"precalculate"),
+            String::make_entry(&"make"),
+            String::make_entry(&"Fix"),
+            String::make_entry(&"Remove"),
+            String::make_entry(&"Two"),
+            String::make_entry(&"typed"),
+            String::make_entry(&"fix"),
+            String::make_entry(&"Working"),
+            String::make_entry(&"write"),
+            String::make_entry(&"refactor"),
+            String::make_entry(&"only"),
+            String::make_entry(&"Implementation"),
+            String::make_entry(&"Add"),
+            String::make_entry(&"typed"),
+            String::make_entry(&"renamed"),
+            String::make_entry(&"move"),
+            String::make_entry(&"look"),
+            String::make_entry(&"implement"),
+            String::make_entry(&"test"),
+            String::make_entry(&"lookup"),
+        ];
+        vec.sort();
+
+        let mut typed_builder = TypedDictBufBuilder::new(
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+            BytesMut::new(),
+        );
+
+        let _results: Vec<u64> = vec
+            .clone()
+            .into_iter()
+            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .collect();
+
+        let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
+
+        let dict = TypedDict::from_parts(
+            used_types.freeze(),
+            type_offsets.freeze(),
+            block_offsets.freeze(),
+            data.freeze(),
+        );
+
+        for i in 0..vec.len() {
+            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+        }
+    }
 }

From 29c0640994561994509b21809825ad9c65d954b7 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 09:39:26 +0100
Subject: [PATCH 65/99] Moving types into their own file

---
 src/structure/tfc/datatypes.rs | 264 ++++++++++++++++++++++++++++++++
 src/structure/tfc/integer.rs   |   4 +-
 src/structure/tfc/mod.rs       |   8 +-
 src/structure/tfc/typed.rs     | 265 +--------------------------------
 4 files changed, 273 insertions(+), 268 deletions(-)
 create mode 100644 src/structure/tfc/datatypes.rs

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
new file mode 100644
index 00000000..32389c14
--- /dev/null
+++ b/src/structure/tfc/datatypes.rs
@@ -0,0 +1,264 @@
+use super::{
+    decimal::{decimal_to_storage, storage_to_decimal},
+    integer::{bigint_to_storage, storage_to_bigint},
+};
+use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use num_derive::FromPrimitive;
+use rug::Integer;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
+pub enum Datatype {
+    String = 0,
+    UInt32,
+    Int32,
+    Float32,
+    UInt64,
+    Int64,
+    Float64,
+    Decimal,
+    BigInt,
+}
+
+impl Datatype {
+    pub fn cast<T: TdbDataType, B: Buf>(self, b: B) -> T {
+        if T::datatype() != self {
+            panic!("not the right datatype");
+        }
+
+        T::from_lexical(b)
+    }
+
+    pub fn record_size(&self) -> Option<u8> {
+        match self {
+            Datatype::String => None,
+            Datatype::UInt32 => Some(4),
+            Datatype::Int32 => Some(4),
+            Datatype::UInt64 => Some(8),
+            Datatype::Int64 => Some(8),
+            Datatype::Float32 => Some(4),
+            Datatype::Float64 => Some(8),
+            Datatype::Decimal => None,
+            Datatype::BigInt => None,
+        }
+    }
+}
+
+pub trait TdbDataType {
+    fn datatype() -> Datatype;
+    fn from_lexical<B: Buf>(b: B) -> Self;
+
+    fn to_lexical<T>(val: &T) -> Bytes
+    where
+        T: ToLexical<Self> + ?Sized,
+    {
+        val.to_lexical()
+    }
+
+    fn make_entry<T>(val: &T) -> (Datatype, Bytes)
+    where
+        T: ToLexical<Self> + ?Sized,
+    {
+        (Self::datatype(), val.to_lexical())
+    }
+}
+
+pub trait ToLexical<T: ?Sized> {
+    fn to_lexical(&self) -> Bytes;
+}
+
+impl<T: AsRef<str>> ToLexical<String> for T {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(self.as_ref().as_bytes())
+    }
+}
+
+impl TdbDataType for String {
+    fn datatype() -> Datatype {
+        Datatype::String
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        String::from_utf8(vec).unwrap()
+    }
+}
+
+impl TdbDataType for u32 {
+    fn datatype() -> Datatype {
+        Datatype::UInt32
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u32::<BigEndian>().unwrap()
+    }
+}
+
+impl ToLexical<u32> for u32 {
+    fn to_lexical(&self) -> Bytes {
+        let mut buf = BytesMut::new().writer();
+        buf.write_u32::<BigEndian>(*self).unwrap();
+
+        buf.into_inner().freeze()
+    }
+}
+
+const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
+impl TdbDataType for i32 {
+    fn datatype() -> Datatype {
+        Datatype::Int32
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        (I32_BYTE_MASK ^ i) as i32
+    }
+}
+
+impl ToLexical<i32> for i32 {
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I32_BYTE_MASK ^ (*self as u32);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u32::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
+impl TdbDataType for u64 {
+    fn datatype() -> Datatype {
+        Datatype::UInt64
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u64::<BigEndian>().unwrap()
+    }
+}
+
+impl ToLexical<u64> for u64 {
+    fn to_lexical(&self) -> Bytes {
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(*self).unwrap();
+
+        buf.into_inner().freeze()
+    }
+}
+
+const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
+impl TdbDataType for i64 {
+    fn datatype() -> Datatype {
+        Datatype::Int64
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        (I64_BYTE_MASK ^ i) as i64
+    }
+}
+
+impl ToLexical<i64> for i64 {
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I64_BYTE_MASK ^ (*self as u64);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
+const F32_SIGN_MASK: u32 = 0x8000_0000;
+const F32_COMPLEMENT: u32 = 0xffff_ffff;
+impl TdbDataType for f32 {
+    fn datatype() -> Datatype {
+        Datatype::Float32
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u32::<BigEndian>().unwrap();
+        if i & F32_SIGN_MASK > 0 {
+            f32::from_bits(i ^ F32_SIGN_MASK)
+        } else {
+            f32::from_bits(i ^ F32_COMPLEMENT)
+        }
+    }
+}
+
+impl ToLexical<f32> for f32 {
+    fn to_lexical(&self) -> Bytes {
+        let f = *self;
+        let g: u32 = if f.signum() == -1.0 {
+            f.to_bits() ^ F32_COMPLEMENT
+        } else {
+            f.to_bits() ^ F32_SIGN_MASK
+        };
+        let mut buf = BytesMut::new().writer();
+        buf.write_u32::<BigEndian>(g).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
+const F64_SIGN_MASK: u64 = 0x8000_0000_0000_0000;
+const F64_COMPLEMENT: u64 = 0xffff_ffff_ffff_ffff;
+impl TdbDataType for f64 {
+    fn datatype() -> Datatype {
+        Datatype::Float64
+    }
+
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u64::<BigEndian>().unwrap();
+        if i & F64_SIGN_MASK > 0 {
+            f64::from_bits(i ^ F64_SIGN_MASK)
+        } else {
+            f64::from_bits(i ^ F64_COMPLEMENT)
+        }
+    }
+}
+
+impl ToLexical<f64> for f64 {
+    fn to_lexical(&self) -> Bytes {
+        let f = *self;
+        let g: u64;
+        if f.signum() == -1.0 {
+            g = f.to_bits() ^ F64_COMPLEMENT;
+        } else {
+            g = f.to_bits() ^ F64_SIGN_MASK;
+        };
+        let mut buf = BytesMut::new().writer();
+        buf.write_u64::<BigEndian>(g).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
+impl TdbDataType for Integer {
+    fn datatype() -> Datatype {
+        Datatype::BigInt
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        storage_to_bigint(&mut b)
+    }
+}
+
+impl ToLexical<Integer> for Integer {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(bigint_to_storage(self.clone()))
+    }
+}
+
+#[derive(PartialEq, Debug)]
+pub struct Decimal(String);
+
+impl TdbDataType for Decimal {
+    fn datatype() -> Datatype {
+        Datatype::Decimal
+    }
+
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        Decimal(storage_to_decimal(&mut b))
+    }
+}
+
+impl ToLexical<Decimal> for Decimal {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(decimal_to_storage(&self.0))
+    }
+}
diff --git a/src/structure/tfc/integer.rs b/src/structure/tfc/integer.rs
index 25617add..f841b897 100644
--- a/src/structure/tfc/integer.rs
+++ b/src/structure/tfc/integer.rs
@@ -91,8 +91,8 @@ pub fn bigint_to_storage(bigint: Integer) -> Vec<u8> {
     }
     number_vec.extend(size_bytes);
     if is_neg {
-        for i in 0..number_vec.len() {
-            number_vec[i] = !number_vec[i]
+        for e in number_vec.iter_mut() {
+            *e = !*e;
         }
     }
     number_vec.reverse();
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index 665e65f0..2c2f120c 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -1,10 +1,12 @@
 pub mod block;
+pub mod datatypes;
 pub mod decimal;
 pub mod dict;
+pub mod file;
 pub mod integer;
 pub mod typed;
-pub mod file;
 
-pub use typed::*;
-pub use block::{SizedDictEntry, SizedDictEntryBuf, OwnedSizedDictEntryBuf};
+pub use block::{OwnedSizedDictEntryBuf, SizedDictEntry, SizedDictEntryBuf};
+pub use datatypes::*;
 pub use file::*;
+pub use typed::*;
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 39b62a67..443ef34a 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -2,18 +2,14 @@ use crate::structure::{
     tfc::block::{parse_block_control_records, BLOCK_SIZE},
     LateLogArrayBufBuilder, MonotonicLogArray,
 };
-use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use bytes::{Buf, BufMut, Bytes, BytesMut};
-use num_derive::FromPrimitive;
+use bytes::{BufMut, Bytes};
 use num_traits::FromPrimitive;
-use rug::Integer;
 use std::{borrow::Cow, marker::PhantomData};
 
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
-    decimal::{decimal_to_storage, storage_to_decimal},
     dict::{SizedDict, SizedDictBufBuilder},
-    integer::{bigint_to_storage, storage_to_bigint},
+    Datatype, TdbDataType, ToLexical,
 };
 
 #[derive(Clone, Debug)]
@@ -374,263 +370,6 @@ impl<B1: BufMut, B2: BufMut> StringDictBufBuilder<B1, B2> {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
-pub enum Datatype {
-    String = 0,
-    UInt32,
-    Int32,
-    Float32,
-    UInt64,
-    Int64,
-    Float64,
-    Decimal,
-    BigInt,
-}
-
-impl Datatype {
-    pub fn cast<T: TdbDataType, B: Buf>(self, b: B) -> T {
-        if T::datatype() != self {
-            panic!("not the right datatype");
-        }
-
-        T::from_lexical(b)
-    }
-
-    pub fn record_size(&self) -> Option<u8> {
-        match self {
-            Datatype::String => None,
-            Datatype::UInt32 => Some(4),
-            Datatype::Int32 => Some(4),
-            Datatype::UInt64 => Some(8),
-            Datatype::Int64 => Some(8),
-            Datatype::Float32 => Some(4),
-            Datatype::Float64 => Some(8),
-            Datatype::Decimal => None,
-            Datatype::BigInt => None,
-        }
-    }
-}
-
-pub trait TdbDataType {
-    fn datatype() -> Datatype;
-    fn from_lexical<B: Buf>(b: B) -> Self;
-
-    fn to_lexical<T>(val: &T) -> Bytes
-    where
-        T: ToLexical<Self> + ?Sized,
-    {
-        val.to_lexical()
-    }
-
-    fn make_entry<T>(val: &T) -> (Datatype, Bytes)
-    where
-        T: ToLexical<Self> + ?Sized,
-    {
-        (Self::datatype(), val.to_lexical())
-    }
-}
-
-pub trait ToLexical<T: ?Sized> {
-    fn to_lexical(&self) -> Bytes;
-}
-
-impl<T: AsRef<str>> ToLexical<String> for T {
-    fn to_lexical(&self) -> Bytes {
-        Bytes::copy_from_slice(self.as_ref().as_bytes())
-    }
-}
-
-impl TdbDataType for String {
-    fn datatype() -> Datatype {
-        Datatype::String
-    }
-
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        let mut vec = vec![0; b.remaining()];
-        b.copy_to_slice(&mut vec);
-        String::from_utf8(vec).unwrap()
-    }
-}
-
-impl TdbDataType for u32 {
-    fn datatype() -> Datatype {
-        Datatype::UInt32
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        b.reader().read_u32::<BigEndian>().unwrap()
-    }
-}
-
-impl ToLexical<u32> for u32 {
-    fn to_lexical(&self) -> Bytes {
-        let mut buf = BytesMut::new().writer();
-        buf.write_u32::<BigEndian>(*self).unwrap();
-
-        buf.into_inner().freeze()
-    }
-}
-
-const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
-impl TdbDataType for i32 {
-    fn datatype() -> Datatype {
-        Datatype::Int32
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u32::<BigEndian>().unwrap();
-        (I32_BYTE_MASK ^ i) as i32
-    }
-}
-
-impl ToLexical<i32> for i32 {
-    fn to_lexical(&self) -> Bytes {
-        let sign_flip = I32_BYTE_MASK ^ (*self as u32);
-        let mut buf = BytesMut::new().writer();
-        buf.write_u32::<BigEndian>(sign_flip).unwrap();
-        buf.into_inner().freeze()
-    }
-}
-
-impl TdbDataType for u64 {
-    fn datatype() -> Datatype {
-        Datatype::UInt64
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        b.reader().read_u64::<BigEndian>().unwrap()
-    }
-}
-
-impl ToLexical<u64> for u64 {
-    fn to_lexical(&self) -> Bytes {
-        let mut buf = BytesMut::new().writer();
-        buf.write_u64::<BigEndian>(*self).unwrap();
-
-        buf.into_inner().freeze()
-    }
-}
-
-const I64_BYTE_MASK: u64 = 0b1000_0000 << (7 * 8);
-impl TdbDataType for i64 {
-    fn datatype() -> Datatype {
-        Datatype::Int64
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u64::<BigEndian>().unwrap();
-        (I64_BYTE_MASK ^ i) as i64
-    }
-}
-
-impl ToLexical<i64> for i64 {
-    fn to_lexical(&self) -> Bytes {
-        let sign_flip = I64_BYTE_MASK ^ (*self as u64);
-        let mut buf = BytesMut::new().writer();
-        buf.write_u64::<BigEndian>(sign_flip).unwrap();
-        buf.into_inner().freeze()
-    }
-}
-
-const F32_SIGN_MASK: u32 = 0x8000_0000;
-const F32_COMPLEMENT: u32 = 0xffff_ffff;
-impl TdbDataType for f32 {
-    fn datatype() -> Datatype {
-        Datatype::Float32
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u32::<BigEndian>().unwrap();
-        if i & F32_SIGN_MASK > 0 {
-            f32::from_bits(i ^ F32_SIGN_MASK)
-        } else {
-            f32::from_bits(i ^ F32_COMPLEMENT)
-        }
-    }
-}
-
-impl ToLexical<f32> for f32 {
-    fn to_lexical(&self) -> Bytes {
-        let f = *self;
-        let g: u32;
-        if f.signum() == -1.0 {
-            g = f.to_bits() ^ F32_COMPLEMENT;
-        } else {
-            g = f.to_bits() ^ F32_SIGN_MASK;
-        };
-        let mut buf = BytesMut::new().writer();
-        buf.write_u32::<BigEndian>(g).unwrap();
-        buf.into_inner().freeze()
-    }
-}
-
-const F64_SIGN_MASK: u64 = 0x8000_0000_0000_0000;
-const F64_COMPLEMENT: u64 = 0xffff_ffff_ffff_ffff;
-impl TdbDataType for f64 {
-    fn datatype() -> Datatype {
-        Datatype::Float64
-    }
-
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let i = b.reader().read_u64::<BigEndian>().unwrap();
-        if i & F64_SIGN_MASK > 0 {
-            f64::from_bits(i ^ F64_SIGN_MASK)
-        } else {
-            f64::from_bits(i ^ F64_COMPLEMENT)
-        }
-    }
-}
-
-impl ToLexical<f64> for f64 {
-    fn to_lexical(&self) -> Bytes {
-        let f = *self;
-        let g: u64;
-        if f.signum() == -1.0 {
-            g = f.to_bits() ^ F64_COMPLEMENT;
-        } else {
-            g = f.to_bits() ^ F64_SIGN_MASK;
-        };
-        let mut buf = BytesMut::new().writer();
-        buf.write_u64::<BigEndian>(g).unwrap();
-        buf.into_inner().freeze()
-    }
-}
-
-impl TdbDataType for Integer {
-    fn datatype() -> Datatype {
-        Datatype::BigInt
-    }
-
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        storage_to_bigint(&mut b)
-    }
-}
-
-impl ToLexical<Integer> for Integer {
-    fn to_lexical(&self) -> Bytes {
-        Bytes::from(bigint_to_storage(self.clone()))
-    }
-}
-
-#[derive(PartialEq, Debug)]
-pub struct Decimal(String);
-
-impl TdbDataType for Decimal {
-    fn datatype() -> Datatype {
-        Datatype::Decimal
-    }
-
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        Decimal(storage_to_decimal(&mut b))
-    }
-}
-
-impl ToLexical<Decimal> for Decimal {
-    fn to_lexical(&self) -> Bytes {
-        Bytes::from(decimal_to_storage(&self.0))
-    }
-}
-
 pub struct TypedDictBufBuilder<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> {
     types_present_builder: LateLogArrayBufBuilder<B1>,
     type_offsets_builder: LateLogArrayBufBuilder<B2>,

From f4e84072197e9321f7b45d28ffc3c28a2f7af730 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 09:46:22 +0100
Subject: [PATCH 66/99] Make tests pass (imports)

---
 src/structure/tfc/datatypes.rs | 2 +-
 src/structure/tfc/typed.rs     | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 32389c14..a1ae6b7f 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -245,7 +245,7 @@ impl ToLexical<Integer> for Integer {
 }
 
 #[derive(PartialEq, Debug)]
-pub struct Decimal(String);
+pub struct Decimal(pub String);
 
 impl TdbDataType for Decimal {
     fn datatype() -> Datatype {
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 443ef34a..c33a4676 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -460,6 +460,11 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
 
 #[cfg(test)]
 mod tests {
+    use bytes::BytesMut;
+    use rug::Integer;
+
+    use crate::structure::Decimal;
+
     use super::*;
     fn build_multiple_segments<
         B1: BufMut,

From e4865081bc8ff932a2e81a7733173d464dbf358c Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 10:26:09 +0100
Subject: [PATCH 67/99] Satisfy linter

---
 src/structure/tfc/datatypes.rs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index a1ae6b7f..1505030e 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -216,11 +216,10 @@ impl TdbDataType for f64 {
 impl ToLexical<f64> for f64 {
     fn to_lexical(&self) -> Bytes {
         let f = *self;
-        let g: u64;
-        if f.signum() == -1.0 {
-            g = f.to_bits() ^ F64_COMPLEMENT;
+        let g: u64 = if f.signum() == -1.0 {
+            f.to_bits() ^ F64_COMPLEMENT
         } else {
-            g = f.to_bits() ^ F64_SIGN_MASK;
+            f.to_bits() ^ F64_SIGN_MASK
         };
         let mut buf = BytesMut::new().writer();
         buf.write_u64::<BigEndian>(g).unwrap();

From 641b55cc08b6616033466cce63cd1633136f0028 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 11:16:54 +0100
Subject: [PATCH 68/99] Made SizedDictEntry slightly more efficient for single
 byte structs

---
 src/structure/tfc/block.rs | 114 ++++++++++++++++++++++++++-----------
 src/structure/tfc/typed.rs |   4 +-
 2 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index f4332f2d..7dc8847c 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -3,6 +3,7 @@ use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
 
 use bytes::{Buf, BufMut, Bytes, BytesMut};
+use itertools::Either;
 
 use crate::structure::{
     util::{find_common_prefix, find_common_prefix_ord},
@@ -71,11 +72,26 @@ impl SizedBlockHeader {
 }
 
 #[derive(Clone, Debug)]
-pub struct SizedDictEntry(pub Vec<Bytes>);
+pub enum SizedDictEntry {
+    Single(Bytes),
+    Rope(Vec<Bytes>)
+}
+
+impl From<Bytes> for SizedDictEntry {
+    fn from(val: Bytes) -> Self {
+        Self::Single(val)
+    }
+}
+
+impl From<Vec<Bytes>> for SizedDictEntry {
+    fn from(val: Vec<Bytes>) -> Self {
+        Self::Rope(val)
+    }
+}
 
 impl SizedDictEntry {
     pub fn new(parts: Vec<Bytes>) -> Self {
-        Self(parts)
+        Self::Rope(parts)
     }
 
     pub fn new_optimized(parts: Vec<Bytes>) -> Self {
@@ -86,21 +102,41 @@ impl SizedDictEntry {
     }
 
     pub fn to_bytes(&self) -> Bytes {
-        if self.0.len() == 1 {
-            self.0[0].clone()
-        } else {
-            let mut buf = BytesMut::with_capacity(self.len());
-            for slice in self.0.iter() {
-                buf.extend_from_slice(&slice[..]);
+        match self {
+            Self::Single(b) => b.clone(),
+            Self::Rope(v) => {
+                if v.len() == 1 {
+                    v[0].clone()
+                } else {
+                    let mut buf = BytesMut::with_capacity(self.len());
+                    for slice in v.iter() {
+                        buf.extend_from_slice(&slice[..]);
+                    }
+
+                    buf.freeze()
+                }
             }
+        }
+    }
 
-            buf.freeze()
+    pub fn chunks(&self) -> impl Iterator<Item=&Bytes> {
+        match self {
+            Self::Single(b) => Either::Left(std::iter::once(b)),
+            Self::Rope(v) => Either::Right(v.iter())
+        }
+    }
+
+    pub fn into_chunks(self) -> impl Iterator<Item=Bytes> {
+        match self {
+            Self::Single(b) => Either::Left(std::iter::once(b)),
+            Self::Rope(v) => Either::Right(v.into_iter())
         }
     }
+
     pub fn to_vec(&self) -> Vec<u8> {
         let mut v = Vec::with_capacity(self.len());
 
-        for slice in self.0.iter() {
+        for slice in self.chunks() {
             v.extend_from_slice(slice);
         }
 
@@ -124,7 +160,14 @@ impl SizedDictEntry {
     }
 
     pub fn len(&self) -> usize {
-        self.0.iter().map(|s| s.len()).sum()
+        self.chunks().map(|s| s.len()).sum()
+    }
+
+    fn rope_len(&self) -> usize {
+        match self {
+            Self::Single(_) => 1,
+            Self::Rope(v) => v.len()
+        }
     }
 
     /// optimize size
@@ -133,15 +176,15 @@ impl SizedDictEntry {
     /// efficient than a copy of the string.  This will copy the
     /// underlying string if that is the case.
     pub fn optimize(&mut self) {
-        let overhead_size = std::mem::size_of::<Bytes>() * self.0.len();
+        let overhead_size = std::mem::size_of::<Bytes>() * self.rope_len();
 
         if std::mem::size_of::<Bytes>() + self.len() < overhead_size {
             let mut bytes = BytesMut::with_capacity(self.len());
-            for part in self.0.iter() {
+            for part in self.chunks() {
                 bytes.extend(part);
             }
 
-            self.0 = vec![bytes.freeze()];
+            *self = Self::Single(bytes.freeze());
         }
     }
 
@@ -151,7 +194,7 @@ impl SizedDictEntry {
         } else if self.len() == 0 {
             true
         } else {
-            let mut it = self.0.iter();
+            let mut it = self.chunks();
             let mut part = it.next().unwrap();
             loop {
                 let slice = b.chunk();
@@ -197,7 +240,7 @@ impl Eq for SizedDictEntry {}
 
 impl Hash for SizedDictEntry {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        for part in self.0.iter() {
+        for part in self.chunks() {
             state.write(part);
         }
     }
@@ -210,8 +253,8 @@ impl Ord for SizedDictEntry {
             return Ordering::Equal;
         }
 
-        let mut it1 = self.0.iter();
-        let mut it2 = other.0.iter();
+        let mut it1 = self.chunks();
+        let mut it2 = other.chunks();
         let mut part1 = it1.next().unwrap().clone();
         let mut part2 = it2.next().unwrap().clone();
 
@@ -289,14 +332,22 @@ pub struct SizedDictEntryBuf<'a> {
     pos_in_slice: usize,
 }
 
+impl<'a> SizedDictEntryBuf<'a> {
+    fn current_slice(&self) -> &Bytes {
+        match self.entry.as_ref() {
+            SizedDictEntry::Single(b) => &b,
+            SizedDictEntry::Rope(v) => &v[self.slice_ix]
+        }
+    }
+}
+
 impl<'a> Buf for SizedDictEntryBuf<'a> {
     fn remaining(&self) -> usize {
         {
             let pos_in_slice = self.pos_in_slice;
             let total: usize = self
                 .entry
-                .0
-                .iter()
+                .chunks()
                 .skip(self.slice_ix)
                 .map(|s| s.len())
                 .sum();
@@ -307,10 +358,10 @@ impl<'a> Buf for SizedDictEntryBuf<'a> {
     fn chunk(&self) -> &[u8] {
         {
             let pos_in_slice = self.pos_in_slice;
-            if self.slice_ix >= self.entry.0.len() {
+            if self.slice_ix >= self.entry.rope_len() {
                 &[]
             } else {
-                let slice = &self.entry.0[self.slice_ix];
+                let slice = self.current_slice();
                 &slice[pos_in_slice..]
             }
         }
@@ -318,37 +369,36 @@ impl<'a> Buf for SizedDictEntryBuf<'a> {
 
     fn advance(&mut self, cnt: usize) {
         {
-            let pos_in_slice: &mut usize = &mut self.pos_in_slice;
             let mut cnt = cnt;
-            if self.slice_ix < self.entry.0.len() {
-                let slice = &self.entry.0[self.slice_ix];
-                let remaining_in_slice = slice.len() - *pos_in_slice;
+            if self.slice_ix < self.entry.rope_len() {
+                let slice = self.current_slice();
+                let remaining_in_slice = slice.len() - self.pos_in_slice;
 
                 if remaining_in_slice > cnt {
                     // we remain in the slice we're at.
-                    *pos_in_slice += cnt;
+                    self.pos_in_slice += cnt;
                 } else {
                     // we are starting at the next slice
                     cnt -= remaining_in_slice;
                     self.slice_ix += 1;
 
                     loop {
-                        if self.entry.0.len() >= self.slice_ix {
+                        if self.entry.rope_len() >= self.slice_ix {
                             // past the end
-                            *pos_in_slice = 0;
+                            self.pos_in_slice = 0;
                             break;
                         }
 
-                        let slice_len = self.entry.0[self.slice_ix].len();
+                        let slice_len = self.current_slice().len();
 
                         if cnt < slice_len {
                             // this is our slice
-                            *pos_in_slice = cnt;
+                            self.pos_in_slice = cnt;
                             break;
                         }
 
                         // not our slice, so advance to next
-                        cnt -= self.entry.0.len();
+                        cnt -= self.entry.rope_len();
                         self.slice_ix += 1;
                     }
                 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index c33a4676..c0b8a663 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -713,7 +713,7 @@ mod tests {
 
         for i in 1..vec.len() + 1 {
             let (t, s) = dict.entry(i).unwrap();
-            assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
+            assert_eq!(vec[i - 1], (t, s.into_chunks().flatten().collect()));
         }
 
         assert_eq!(
@@ -781,7 +781,7 @@ mod tests {
 
         for i in 1..vec.len() + 1 {
             let (t, s) = dict.entry(i).unwrap();
-            assert_eq!(vec[i - 1], (t, s.0.into_iter().flatten().collect()));
+            assert_eq!(vec[i - 1], (t, s.into_chunks().flatten().collect()));
         }
 
         assert_eq!("Batman".to_string(), dict.get::<String>(1).unwrap());

From bcca7cfd4917c3e833731ccd6cb0a00d6d769234 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 12:27:13 +0100
Subject: [PATCH 69/99] implement TypedDictEntry

---
 src/layer/builder.rs           |   2 +-
 src/layer/id_map.rs            |   4 +-
 src/structure/tfc/datatypes.rs |   8 +--
 src/structure/tfc/file.rs      |   4 +-
 src/structure/tfc/typed.rs     | 123 +++++++++++++++++++--------------
 5 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index d8eeb111..a2e753c2 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -77,7 +77,7 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     pub fn add_value(&mut self, value: &str) -> u64 {
         let id = self
             .value_dictionary_builder
-            .add(Datatype::String, Bytes::copy_from_slice(value.as_bytes()));
+            .add(TypedDictEntry::new(Datatype::String, Bytes::copy_from_slice(value.as_bytes()).into()));
 
         id
     }
diff --git a/src/layer/id_map.rs b/src/layer/id_map.rs
index e01f3331..6857a11d 100644
--- a/src/layer/id_map.rs
+++ b/src/layer/id_map.rs
@@ -142,7 +142,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
             .map(|x| x.0)
     };
 
-    let typed_entry_comparator = |vals: &[Option<&(u64, (Datatype, SizedDictEntry))>]| {
+    let typed_entry_comparator = |vals: &[Option<&(u64, TypedDictEntry)>]| {
         vals.iter()
             .enumerate()
             .filter(|(_, x)| x.is_some())
@@ -150,7 +150,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
             .map(|x| x.0)
     };
 
-    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator).map(|(i,s)|(i, (Datatype::String, s)));
+    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator).map(|(i,s)|(i, TypedDictEntry::new(Datatype::String, s)));
     let sorted_value_iter = sorted_iterator(value_iters, typed_entry_comparator);
     let sorted_node_value_iter = sorted_node_iter.chain(sorted_value_iter).map(|(id, _)| id - 1);
     let sorted_predicate_iter =
diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 1505030e..63705feb 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -1,13 +1,13 @@
 use super::{
     decimal::{decimal_to_storage, storage_to_decimal},
-    integer::{bigint_to_storage, storage_to_bigint},
+    integer::{bigint_to_storage, storage_to_bigint}, TypedDictEntry,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use num_derive::FromPrimitive;
 use rug::Integer;
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, FromPrimitive, Hash)]
 pub enum Datatype {
     String = 0,
     UInt32,
@@ -55,11 +55,11 @@ pub trait TdbDataType {
         val.to_lexical()
     }
 
-    fn make_entry<T>(val: &T) -> (Datatype, Bytes)
+    fn make_entry<T>(val: &T) -> TypedDictEntry
     where
         T: ToLexical<Self> + ?Sized,
     {
-        (Self::datatype(), val.to_lexical())
+        TypedDictEntry::new(Self::datatype(), val.to_lexical().into())
     }
 }
 
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index 645c85b6..316076cb 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -55,7 +55,7 @@ pub async fn merge_typed_dictionaries<
 ) -> io::Result<()> {
     let iterators: Vec<_> = dictionaries.map(|d| d.iter()).collect();
 
-    let pick_fn = |vals: &[Option<&(Datatype, SizedDictEntry)>]| {
+    let pick_fn = |vals: &[Option<&TypedDictEntry>]| {
         vals.iter()
             .enumerate()
             .filter(|(_, v)| v.is_some())
@@ -63,7 +63,7 @@ pub async fn merge_typed_dictionaries<
             .map(|(ix, _)| ix)
     };
 
-    let sorted_iterator = sorted_iterator(iterators, pick_fn).map(|(dt, elt)| (dt, elt.to_bytes()));
+    let sorted_iterator = sorted_iterator(iterators, pick_fn);
 
     let mut types_present_file_writer = dict_files.types_present_file.open_write().await?;
     let mut type_offsets_file_writer = dict_files.type_offsets_file.open_write().await?;
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index c0b8a663..142c7b53 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -9,9 +9,34 @@ use std::{borrow::Cow, marker::PhantomData};
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     dict::{SizedDict, SizedDictBufBuilder},
-    Datatype, TdbDataType, ToLexical,
+    Datatype, TdbDataType, ToLexical, SizedDictEntryBuf, OwnedSizedDictEntryBuf,
 };
 
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct TypedDictEntry {
+    datatype: Datatype,
+    entry: SizedDictEntry
+}
+
+impl TypedDictEntry {
+    pub fn new(datatype: Datatype, entry: SizedDictEntry) -> Self {
+        Self {
+            datatype, entry
+        }
+    }
+    pub fn to_bytes(&self) -> Bytes {
+        self.entry.to_bytes()
+    }
+
+    pub fn as_buf(&self) -> SizedDictEntryBuf {
+        self.entry.as_buf()
+    }
+
+    pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
+        self.entry.into_buf()
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct TypedDict {
     types_present: MonotonicLogArray,
@@ -85,14 +110,14 @@ impl TypedDict {
     }
 
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
-        let (datatype, bytes) = T::make_entry(v);
+        let entry = T::make_entry(v);
 
-        self.id_slice(datatype, bytes.as_ref())
+        self.id_slice(entry.datatype, &entry.to_bytes())
     }
 
     pub fn get<T: TdbDataType>(&self, id: usize) -> Option<T> {
         let result = self.entry(id);
-        result.map(|(datatype, slice)| datatype.cast(slice.into_buf()))
+        result.map(|entry| entry.datatype.cast(entry.into_buf()))
     }
 
     fn inner_type_segment(&self, i: usize) -> (SizedDict, u64) {
@@ -179,7 +204,7 @@ impl TypedDict {
         FromPrimitive::from_u64(self.types_present.entry(type_index)).unwrap()
     }
 
-    pub fn entry(&self, id: usize) -> Option<(Datatype, SizedDictEntry)> {
+    pub fn entry(&self, id: usize) -> Option<TypedDictEntry> {
         if id > self.num_entries() {
             return None;
         }
@@ -187,7 +212,7 @@ impl TypedDict {
 
         let (dict, offset) = self.inner_type_segment(type_index);
         let dt = self.type_for_type_index(type_index);
-        dict.entry(id - offset as usize).map(|e| (dt, e))
+        dict.entry(id - offset as usize).map(|e| TypedDictEntry::new(dt, e))
     }
 
     pub fn num_entries(&self) -> usize {
@@ -226,14 +251,14 @@ impl TypedDict {
         })
     }
 
-    pub fn iter<'a>(&'a self) -> impl Iterator<Item = (Datatype, SizedDictEntry)> + 'a + Clone {
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = TypedDictEntry> + 'a + Clone {
         self.block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| (datatype, entry)))
+            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| TypedDictEntry::new(datatype, entry)))
     }
 
-    pub fn into_iter(self) -> impl Iterator<Item = (Datatype, SizedDictEntry)> + Clone {
+    pub fn into_iter(self) -> impl Iterator<Item = TypedDictEntry> + Clone {
         self.into_block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| (datatype, entry)))
+            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| TypedDictEntry::new(datatype, entry)))
     }
 }
 
@@ -397,43 +422,39 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
         }
     }
 
-    pub fn add(&mut self, dt: Datatype, value: Bytes) -> u64 {
+    pub fn add(&mut self, value: TypedDictEntry) -> u64 {
         if self.current_datatype == None {
-            self.current_datatype = Some(dt);
-            self.types_present_builder.push(dt as u64);
+            self.current_datatype = Some(value.datatype);
+            self.types_present_builder.push(value.datatype as u64);
             self.sized_dict_buf_builder
                 .as_mut()
-                .map(|b| b.record_size = dt.record_size());
+                .map(|b| b.record_size = value.datatype.record_size());
         }
 
-        if self.current_datatype != Some(dt) {
+        if self.current_datatype != Some(value.datatype) {
             let (block_offset_builder, data_buf, block_offset, id_offset) =
                 self.sized_dict_buf_builder.take().unwrap().finalize();
-            self.types_present_builder.push(dt as u64);
+            self.types_present_builder.push(value.datatype as u64);
             self.type_offsets_builder
                 .push(block_offset_builder.count() as u64 - 1);
             self.sized_dict_buf_builder = Some(SizedDictBufBuilder::new(
-                dt.record_size(),
+                value.datatype.record_size(),
                 block_offset,
                 id_offset,
                 block_offset_builder,
                 data_buf,
             ));
-            self.current_datatype = Some(dt);
+            self.current_datatype = Some(value.datatype);
         }
 
         self.sized_dict_buf_builder
             .as_mut()
-            .map(|s| s.add(value))
+            .map(|s| s.add(value.entry.to_bytes()))
             .unwrap()
     }
 
-    pub fn add_entry(&mut self, dt: Datatype, e: &SizedDictEntry) -> u64 {
-        self.add(dt, e.to_bytes())
-    }
-
-    pub fn add_all<I: Iterator<Item = (Datatype, Bytes)>>(&mut self, it: I) -> Vec<u64> {
-        it.map(|(dt, val)| self.add(dt, val)).collect()
+    pub fn add_all<I: Iterator<Item = TypedDictEntry>>(&mut self, it: I) -> Vec<u64> {
+        it.map(|val| self.add(val)).collect()
     }
 
     pub fn finalize(self) -> (B1, B2, B3, B4) {
@@ -471,7 +492,7 @@ mod tests {
         B2: BufMut,
         B3: BufMut,
         B4: BufMut,
-        I: Iterator<Item = (Datatype, Bytes)>,
+        I: Iterator<Item = TypedDictEntry>,
     >(
         used_types_buf: &mut B1,
         type_offsets_buf: &mut B2,
@@ -669,7 +690,7 @@ mod tests {
 
     #[test]
     fn test_multi_segment() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             Decimal::make_entry(&Decimal("-1".to_string())),
             String::make_entry(&"asdf"),
             Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
@@ -712,8 +733,8 @@ mod tests {
         assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
 
         for i in 1..vec.len() + 1 {
-            let (t, s) = dict.entry(i).unwrap();
-            assert_eq!(vec[i - 1], (t, s.into_chunks().flatten().collect()));
+            let entry = dict.entry(i).unwrap();
+            assert_eq!(vec[i - 1], entry);
         }
 
         assert_eq!(
@@ -724,7 +745,7 @@ mod tests {
 
     #[test]
     fn test_full_blocks() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             String::make_entry(&"a"),
             String::make_entry(&"bc"),
@@ -780,8 +801,8 @@ mod tests {
         assert_eq!(31, dict.num_entries());
 
         for i in 1..vec.len() + 1 {
-            let (t, s) = dict.entry(i).unwrap();
-            assert_eq!(vec[i - 1], (t, s.into_chunks().flatten().collect()));
+            let entry = dict.entry(i).unwrap();
+            assert_eq!(vec[i - 1], entry);
         }
 
         assert_eq!("Batman".to_string(), dict.get::<String>(1).unwrap());
@@ -822,7 +843,7 @@ mod tests {
 
     #[test]
     fn iterate_full_blocks() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             String::make_entry(&"a"),
             String::make_entry(&"bc"),
@@ -875,18 +896,14 @@ mod tests {
             data.freeze(),
         );
 
-        let actual: Vec<_> = dict.iter().map(|(dt, e)| (dt, e.to_bytes())).collect();
+        let actual: Vec<_> = dict.iter().collect();
 
         assert_eq!(vec, actual);
     }
 
-    fn convert_entry(e: (Datatype, SizedDictEntry)) -> (Datatype, Bytes) {
-        (e.0, e.1.to_bytes())
-    }
-
     #[test]
     fn test_one_string() {
-        let vec: Vec<(Datatype, Bytes)> = vec![String::make_entry(&"fdsa")];
+        let vec: Vec<TypedDictEntry> = vec![String::make_entry(&"fdsa")];
         let used_types_buf = BytesMut::new();
         let type_offsets_buf = BytesMut::new();
         let block_offsets_buf = BytesMut::new();
@@ -902,7 +919,7 @@ mod tests {
         let _results: Vec<u64> = vec
             .clone()
             .into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .map(|entry| typed_builder.add(entry))
             .collect();
 
         let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
@@ -913,12 +930,12 @@ mod tests {
             block_offsets.freeze(),
             data.freeze(),
         );
-        assert_eq!(vec[0], convert_entry(dict.entry(1).unwrap()))
+        assert_eq!(vec[0], dict.entry(1).unwrap())
     }
 
     #[test]
     fn test_incremental_builder() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             String::make_entry(&"a"),
             String::make_entry(&"bc"),
@@ -968,7 +985,7 @@ mod tests {
         let _results: Vec<u64> = vec
             .clone()
             .into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .map(|entry| typed_builder.add(entry))
             .collect();
 
         let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
@@ -981,13 +998,13 @@ mod tests {
         );
 
         for i in 0..vec.len() {
-            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+            assert_eq!(vec[i], dict.entry(i + 1).unwrap())
         }
     }
 
     #[test]
     fn test_incremental_builder_small_dicts() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             i32::make_entry(&-500_i32),
             u32::make_entry(&20_u32),
@@ -1013,7 +1030,7 @@ mod tests {
         let _results: Vec<u64> = vec
             .clone()
             .into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .map(|entry| typed_builder.add(entry))
             .collect();
 
         let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
@@ -1026,13 +1043,13 @@ mod tests {
         );
 
         for i in 0..vec.len() {
-            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+            assert_eq!(vec[i], dict.entry(i + 1).unwrap())
         }
     }
 
     #[test]
     fn test_two_blocks() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             String::make_entry(&"a"),
             String::make_entry(&"bc"),
@@ -1055,7 +1072,7 @@ mod tests {
         let _results: Vec<u64> = vec
             .clone()
             .into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .map(|entry| typed_builder.add(entry))
             .collect();
 
         let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
@@ -1068,13 +1085,13 @@ mod tests {
         );
 
         for i in 0..vec.len() {
-            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+            assert_eq!(vec[i], dict.entry(i + 1).unwrap())
         }
     }
 
     #[test]
     fn test_three_blocks() {
-        let mut vec: Vec<(Datatype, Bytes)> = vec![
+        let mut vec: Vec<TypedDictEntry> = vec![
             String::make_entry(&"fdsa"),
             String::make_entry(&"a"),
             String::make_entry(&"bc"),
@@ -1128,7 +1145,7 @@ mod tests {
         let _results: Vec<u64> = vec
             .clone()
             .into_iter()
-            .map(|(dt, entry)| typed_builder.add(dt, entry))
+            .map(|entry| typed_builder.add(entry))
             .collect();
 
         let (used_types, type_offsets, block_offsets, data) = typed_builder.finalize();
@@ -1141,7 +1158,7 @@ mod tests {
         );
 
         for i in 0..vec.len() {
-            assert_eq!(vec[i], convert_entry(dict.entry(i + 1).unwrap()))
+            assert_eq!(vec[i], dict.entry(i + 1).unwrap())
         }
     }
 }

From bdd4bec9364a642a82e6a1ad7a1943ddffe07e66 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 12:27:33 +0100
Subject: [PATCH 70/99] reformat everything

---
 src/layer/builder.rs           | 21 ++++++++-------------
 src/layer/id_map.rs            | 24 ++++++++++++++----------
 src/storage/layer.rs           | 14 +++-----------
 src/structure/tfc/block.rs     | 14 +++++++-------
 src/structure/tfc/datatypes.rs |  3 ++-
 src/structure/tfc/dict.rs      | 28 ++++++++++++++++++----------
 src/structure/tfc/file.rs      |  9 +++++++--
 src/structure/tfc/typed.rs     | 25 +++++++++++++++----------
 8 files changed, 74 insertions(+), 64 deletions(-)

diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index a2e753c2..68adcbe4 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -24,14 +24,9 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
         predicate_files: DictionaryFiles<F>,
         value_files: TypedDictionaryFiles<F>,
     ) -> io::Result<Self> {
-        let node_dictionary_builder = StringDictBufBuilder::new(
-            BytesMut::new(),
-            BytesMut::new(),
-        );
-        let predicate_dictionary_builder = StringDictBufBuilder::new(
-            BytesMut::new(),
-            BytesMut::new(),
-        );
+        let node_dictionary_builder = StringDictBufBuilder::new(BytesMut::new(), BytesMut::new());
+        let predicate_dictionary_builder =
+            StringDictBufBuilder::new(BytesMut::new(), BytesMut::new());
         let value_dictionary_builder = TypedDictBufBuilder::new(
             BytesMut::new(),
             BytesMut::new(),
@@ -75,9 +70,10 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
     pub fn add_value(&mut self, value: &str) -> u64 {
-        let id = self
-            .value_dictionary_builder
-            .add(TypedDictEntry::new(Datatype::String, Bytes::copy_from_slice(value.as_bytes()).into()));
+        let id = self.value_dictionary_builder.add(TypedDictEntry::new(
+            Datatype::String,
+            Bytes::copy_from_slice(value.as_bytes()).into(),
+        ));
 
         id
     }
@@ -140,8 +136,7 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     }
 
     pub async fn finalize(self) -> io::Result<()> {
-        let (mut node_offsets_buf, mut node_data_buf) =
-            self.node_dictionary_builder.finalize();
+        let (mut node_offsets_buf, mut node_data_buf) = self.node_dictionary_builder.finalize();
         let (mut predicate_offsets_buf, mut predicate_data_buf) =
             self.predicate_dictionary_builder.finalize();
         let (
diff --git a/src/layer/id_map.rs b/src/layer/id_map.rs
index 6857a11d..832972df 100644
--- a/src/layer/id_map.rs
+++ b/src/layer/id_map.rs
@@ -35,7 +35,7 @@ impl IdMap {
                 if id > wtree.len() as u64 {
                     None
                 } else {
-                    Some(wtree.lookup_one(id-1).unwrap() + 1)
+                    Some(wtree.lookup_one(id - 1).unwrap() + 1)
                 }
             })
             .unwrap_or(id)
@@ -48,7 +48,7 @@ impl IdMap {
                 if id > wtree.len() as u64 {
                     None
                 } else {
-                    let id:usize = id.try_into().unwrap();
+                    let id: usize = id.try_into().unwrap();
                     Some(wtree.decode_one(id - 1) + 1)
                 }
             })
@@ -91,7 +91,7 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
 
     let mut node_iters = Vec::with_capacity(len);
     let mut node_offset = 0;
-    let node_entries_len: Vec<_> = node_dicts.iter().map(|d|d.num_entries()).collect();
+    let node_entries_len: Vec<_> = node_dicts.iter().map(|d| d.num_entries()).collect();
     for (ix, dict) in node_dicts.into_iter().enumerate() {
         let idmap = node_value_idmaps[ix].clone();
         let num_entries = dict.num_entries();
@@ -125,11 +125,12 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
     for (ix, dict) in predicate_dicts.into_iter().enumerate() {
         let idmap = predicate_idmaps[ix].clone();
         let num_entries = dict.num_entries();
-        predicate_iters.push(
-            dict.into_iter()
-                .enumerate()
-                .map(move |(i, e)| (idmap.inner_to_outer(i as u64 + 1) + predicate_offset as u64, e)),
-        );
+        predicate_iters.push(dict.into_iter().enumerate().map(move |(i, e)| {
+            (
+                idmap.inner_to_outer(i as u64 + 1) + predicate_offset as u64,
+                e,
+            )
+        }));
 
         predicate_offset += num_entries;
     }
@@ -150,9 +151,12 @@ pub async fn construct_idmaps_from_structures<F: 'static + FileLoad + FileStore>
             .map(|x| x.0)
     };
 
-    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator).map(|(i,s)|(i, TypedDictEntry::new(Datatype::String, s)));
+    let sorted_node_iter = sorted_iterator(node_iters, entry_comparator)
+        .map(|(i, s)| (i, TypedDictEntry::new(Datatype::String, s)));
     let sorted_value_iter = sorted_iterator(value_iters, typed_entry_comparator);
-    let sorted_node_value_iter = sorted_node_iter.chain(sorted_value_iter).map(|(id, _)| id - 1);
+    let sorted_node_value_iter = sorted_node_iter
+        .chain(sorted_value_iter)
+        .map(|(id, _)| id - 1);
     let sorted_predicate_iter =
         sorted_iterator(predicate_iters, entry_comparator).map(|(id, _)| id - 1);
 
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index 2f63fc6d..323e6732 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -15,9 +15,7 @@ use crate::structure::dict_file_get_count;
 use crate::structure::logarray::logarray_file_get_length_and_width;
 use crate::structure::StringDict;
 use crate::structure::TypedDict;
-use crate::structure::{
-    util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, WaveletTree,
-};
+use crate::structure::{util, AdjacencyList, BitIndex, LogArray, MonotonicLogArray, WaveletTree};
 
 use std::convert::TryInto;
 use std::io;
@@ -1569,10 +1567,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.node_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(
-                maps.offsets_map,
-                maps.blocks_map,
-            )))
+            Ok(Some(StringDict::parse(maps.offsets_map, maps.blocks_map)))
         } else {
             Ok(None)
         }
@@ -1583,10 +1578,7 @@ impl<F: 'static + FileLoad + FileStore + Clone, T: 'static + PersistentLayerStor
             let files = self.predicate_dictionary_files(name).await?;
             let maps = files.map_all().await?;
 
-            Ok(Some(StringDict::parse(
-                maps.offsets_map,
-                maps.blocks_map,
-            )))
+            Ok(Some(StringDict::parse(maps.offsets_map, maps.blocks_map)))
         } else {
             Ok(None)
         }
diff --git a/src/structure/tfc/block.rs b/src/structure/tfc/block.rs
index 7dc8847c..95e061f7 100644
--- a/src/structure/tfc/block.rs
+++ b/src/structure/tfc/block.rs
@@ -74,7 +74,7 @@ impl SizedBlockHeader {
 #[derive(Clone, Debug)]
 pub enum SizedDictEntry {
     Single(Bytes),
-    Rope(Vec<Bytes>)
+    Rope(Vec<Bytes>),
 }
 
 impl From<Bytes> for SizedDictEntry {
@@ -119,17 +119,17 @@ impl SizedDictEntry {
         }
     }
 
-    pub fn chunks(&self) -> impl Iterator<Item=&Bytes> {
+    pub fn chunks(&self) -> impl Iterator<Item = &Bytes> {
         match self {
             Self::Single(b) => Either::Left(std::iter::once(b)),
-            Self::Rope(v) => Either::Right(v.iter())
+            Self::Rope(v) => Either::Right(v.iter()),
         }
     }
 
-    pub fn into_chunks(self) -> impl Iterator<Item=Bytes> {
+    pub fn into_chunks(self) -> impl Iterator<Item = Bytes> {
         match self {
             Self::Single(b) => Either::Left(std::iter::once(b)),
-            Self::Rope(v) => Either::Right(v.into_iter())
+            Self::Rope(v) => Either::Right(v.into_iter()),
         }
     }
 
@@ -166,7 +166,7 @@ impl SizedDictEntry {
     fn rope_len(&self) -> usize {
         match self {
             Self::Single(_) => 1,
-            Self::Rope(v) => v.len()
+            Self::Rope(v) => v.len(),
         }
     }
 
@@ -336,7 +336,7 @@ impl<'a> SizedDictEntryBuf<'a> {
     fn current_slice(&self) -> &Bytes {
         match self.entry.as_ref() {
             SizedDictEntry::Single(b) => &b,
-            SizedDictEntry::Rope(v) => &v[self.slice_ix]
+            SizedDictEntry::Rope(v) => &v[self.slice_ix],
         }
     }
 }
diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 63705feb..44f790c1 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -1,6 +1,7 @@
 use super::{
     decimal::{decimal_to_storage, storage_to_decimal},
-    integer::{bigint_to_storage, storage_to_bigint}, TypedDictEntry,
+    integer::{bigint_to_storage, storage_to_bigint},
+    TypedDictEntry,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
diff --git a/src/structure/tfc/dict.rs b/src/structure/tfc/dict.rs
index 16118164..ce4deee1 100644
--- a/src/structure/tfc/dict.rs
+++ b/src/structure/tfc/dict.rs
@@ -159,8 +159,7 @@ impl SizedDict {
     pub fn num_blocks(&self) -> usize {
         if self.data.is_empty() {
             0
-        }
-        else {
+        } else {
             self.offsets.len() + 1
         }
     }
@@ -195,9 +194,7 @@ impl SizedDict {
                     max = mid - 1;
                 }
                 Ordering::Greater => min = mid + 1,
-                Ordering::Equal => {
-                    return IdLookupResult::Found((mid * BLOCK_SIZE + 1) as u64)
-                } // what luck! turns out the string we were looking for was the block head
+                Ordering::Equal => return IdLookupResult::Found((mid * BLOCK_SIZE + 1) as u64), // what luck! turns out the string we were looking for was the block head
             }
         }
 
@@ -249,8 +246,7 @@ impl SizedDict {
         let num_blocks = self.num_blocks();
         if num_blocks == 0 {
             0
-        }
-        else {
+        } else {
             let last_block_size = self.block_num_elements(num_blocks - 1);
 
             (num_blocks - 1) * BLOCK_SIZE + last_block_size as usize
@@ -357,7 +353,11 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(|s|Bytes::from(s)));
+        build_dict_and_offsets(
+            &mut array_buf,
+            &mut data_buf,
+            strings.clone().into_iter().map(|s| Bytes::from(s)),
+        );
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -448,7 +448,11 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(Bytes::from));
+        build_dict_and_offsets(
+            &mut array_buf,
+            &mut data_buf,
+            strings.clone().into_iter().map(Bytes::from),
+        );
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
@@ -480,7 +484,11 @@ mod tests {
 
         let mut array_buf = BytesMut::new();
         let mut data_buf = BytesMut::new();
-        build_dict_and_offsets(&mut array_buf, &mut data_buf, strings.clone().into_iter().map(Bytes::from));
+        build_dict_and_offsets(
+            &mut array_buf,
+            &mut data_buf,
+            strings.clone().into_iter().map(Bytes::from),
+        );
 
         let array_bytes = array_buf.freeze();
         let data_bytes = data_buf.freeze();
diff --git a/src/structure/tfc/file.rs b/src/structure/tfc/file.rs
index 316076cb..12e9cad7 100644
--- a/src/structure/tfc/file.rs
+++ b/src/structure/tfc/file.rs
@@ -1,7 +1,7 @@
 use byteorder::{BigEndian, ByteOrder};
 use bytes::BytesMut;
 use std::io;
-use tokio::io::{AsyncWriteExt, AsyncReadExt};
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
 
 use crate::{storage::*, structure::util::sorted_iterator};
 
@@ -70,7 +70,12 @@ pub async fn merge_typed_dictionaries<
     let mut blocks_file_writer = dict_files.blocks_file.open_write().await?;
     let mut offsets_file_writer = dict_files.offsets_file.open_write().await?;
 
-    let mut builder = TypedDictBufBuilder::new(BytesMut::new(), BytesMut::new(), BytesMut::new(), BytesMut::new());
+    let mut builder = TypedDictBufBuilder::new(
+        BytesMut::new(),
+        BytesMut::new(),
+        BytesMut::new(),
+        BytesMut::new(),
+    );
     builder.add_all(sorted_iterator);
     let (types_present_buf, type_offsets_buf, offsets_buf, data_buf) = builder.finalize();
 
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 142c7b53..cb999636 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -9,20 +9,18 @@ use std::{borrow::Cow, marker::PhantomData};
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     dict::{SizedDict, SizedDictBufBuilder},
-    Datatype, TdbDataType, ToLexical, SizedDictEntryBuf, OwnedSizedDictEntryBuf,
+    Datatype, OwnedSizedDictEntryBuf, SizedDictEntryBuf, TdbDataType, ToLexical,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct TypedDictEntry {
     datatype: Datatype,
-    entry: SizedDictEntry
+    entry: SizedDictEntry,
 }
 
 impl TypedDictEntry {
     pub fn new(datatype: Datatype, entry: SizedDictEntry) -> Self {
-        Self {
-            datatype, entry
-        }
+        Self { datatype, entry }
     }
     pub fn to_bytes(&self) -> Bytes {
         self.entry.to_bytes()
@@ -212,7 +210,8 @@ impl TypedDict {
 
         let (dict, offset) = self.inner_type_segment(type_index);
         let dt = self.type_for_type_index(type_index);
-        dict.entry(id - offset as usize).map(|e| TypedDictEntry::new(dt, e))
+        dict.entry(id - offset as usize)
+            .map(|e| TypedDictEntry::new(dt, e))
     }
 
     pub fn num_entries(&self) -> usize {
@@ -252,13 +251,19 @@ impl TypedDict {
     }
 
     pub fn iter<'a>(&'a self) -> impl Iterator<Item = TypedDictEntry> + 'a + Clone {
-        self.block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| TypedDictEntry::new(datatype, entry)))
+        self.block_iter().flat_map(|(datatype, segment)| {
+            segment
+                .into_iter()
+                .map(move |entry| TypedDictEntry::new(datatype, entry))
+        })
     }
 
     pub fn into_iter(self) -> impl Iterator<Item = TypedDictEntry> + Clone {
-        self.into_block_iter()
-            .flat_map(|(datatype, segment)| segment.into_iter().map(move |entry| TypedDictEntry::new(datatype, entry)))
+        self.into_block_iter().flat_map(|(datatype, segment)| {
+            segment
+                .into_iter()
+                .map(move |entry| TypedDictEntry::new(datatype, entry))
+        })
     }
 }
 

From a0e9546a99edf786623f3a4cc0365be1cc592cb9 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 13:26:18 +0100
Subject: [PATCH 71/99] split from_lexical to its own trait

---
 src/structure/tfc/datatypes.rs | 35 +++++++++++++++++++++++++++-------
 src/structure/tfc/typed.rs     |  1 +
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 44f790c1..90dff6a9 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -45,9 +45,8 @@ impl Datatype {
     }
 }
 
-pub trait TdbDataType {
+pub trait TdbDataType: FromLexical<Self> {
     fn datatype() -> Datatype;
-    fn from_lexical<B: Buf>(b: B) -> Self;
 
     fn to_lexical<T>(val: &T) -> Bytes
     where
@@ -68,17 +67,17 @@ pub trait ToLexical<T: ?Sized> {
     fn to_lexical(&self) -> Bytes;
 }
 
+pub trait FromLexical<T: ?Sized> {
+    fn from_lexical<B: Buf>(b: B) -> Self;
+}
+
 impl<T: AsRef<str>> ToLexical<String> for T {
     fn to_lexical(&self) -> Bytes {
         Bytes::copy_from_slice(self.as_ref().as_bytes())
     }
 }
 
-impl TdbDataType for String {
-    fn datatype() -> Datatype {
-        Datatype::String
-    }
-
+impl FromLexical<String> for String {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         let mut vec = vec![0; b.remaining()];
         b.copy_to_slice(&mut vec);
@@ -86,11 +85,19 @@ impl TdbDataType for String {
     }
 }
 
+impl TdbDataType for String {
+    fn datatype() -> Datatype {
+        Datatype::String
+    }
+}
+
 impl TdbDataType for u32 {
     fn datatype() -> Datatype {
         Datatype::UInt32
     }
+}
 
+impl FromLexical<u32> for u32 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         b.reader().read_u32::<BigEndian>().unwrap()
     }
@@ -110,7 +117,9 @@ impl TdbDataType for i32 {
     fn datatype() -> Datatype {
         Datatype::Int32
     }
+}
 
+impl FromLexical<i32> for i32 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         let i = b.reader().read_u32::<BigEndian>().unwrap();
         (I32_BYTE_MASK ^ i) as i32
@@ -130,7 +139,9 @@ impl TdbDataType for u64 {
     fn datatype() -> Datatype {
         Datatype::UInt64
     }
+}
 
+impl FromLexical<u64> for u64 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         b.reader().read_u64::<BigEndian>().unwrap()
     }
@@ -150,7 +161,9 @@ impl TdbDataType for i64 {
     fn datatype() -> Datatype {
         Datatype::Int64
     }
+}
 
+impl FromLexical<i64> for i64 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         let i = b.reader().read_u64::<BigEndian>().unwrap();
         (I64_BYTE_MASK ^ i) as i64
@@ -172,7 +185,9 @@ impl TdbDataType for f32 {
     fn datatype() -> Datatype {
         Datatype::Float32
     }
+}
 
+impl FromLexical<f32> for f32 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         let i = b.reader().read_u32::<BigEndian>().unwrap();
         if i & F32_SIGN_MASK > 0 {
@@ -203,7 +218,9 @@ impl TdbDataType for f64 {
     fn datatype() -> Datatype {
         Datatype::Float64
     }
+}
 
+impl FromLexical<f64> for f64 {
     fn from_lexical<B: Buf>(b: B) -> Self {
         let i = b.reader().read_u64::<BigEndian>().unwrap();
         if i & F64_SIGN_MASK > 0 {
@@ -232,7 +249,9 @@ impl TdbDataType for Integer {
     fn datatype() -> Datatype {
         Datatype::BigInt
     }
+}
 
+impl FromLexical<Integer> for Integer {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         storage_to_bigint(&mut b)
     }
@@ -251,7 +270,9 @@ impl TdbDataType for Decimal {
     fn datatype() -> Datatype {
         Datatype::Decimal
     }
+}
 
+impl FromLexical<Decimal> for Decimal {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         Decimal(storage_to_decimal(&mut b))
     }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index cb999636..d9e1097c 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -489,6 +489,7 @@ mod tests {
     use bytes::BytesMut;
     use rug::Integer;
 
+    use super::super::datatypes::FromLexical;
     use crate::structure::Decimal;
 
     use super::*;

From 6147b2b35da30f651a13889c236b845ca21530ae Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 13:32:51 +0100
Subject: [PATCH 72/99] put trait bound on TdbDataType ensuring FromLexical and
 ToLexical are implemented

---
 src/structure/tfc/datatypes.rs | 16 ++++++++--------
 src/structure/tfc/typed.rs     |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 90dff6a9..d12e4e7b 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -45,16 +45,9 @@ impl Datatype {
     }
 }
 
-pub trait TdbDataType: FromLexical<Self> {
+pub trait TdbDataType: FromLexical<Self> + ToLexical<Self> {
     fn datatype() -> Datatype;
 
-    fn to_lexical<T>(val: &T) -> Bytes
-    where
-        T: ToLexical<Self> + ?Sized,
-    {
-        val.to_lexical()
-    }
-
     fn make_entry<T>(val: &T) -> TypedDictEntry
     where
         T: ToLexical<Self> + ?Sized,
@@ -257,6 +250,13 @@ impl FromLexical<Integer> for Integer {
     }
 }
 
+impl FromLexical<Integer> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        // TODO make this better
+        storage_to_bigint(&mut b).to_string()
+    }
+}
+
 impl ToLexical<Integer> for Integer {
     fn to_lexical(&self) -> Bytes {
         Bytes::from(bigint_to_storage(self.clone()))
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index d9e1097c..a3fa1aed 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -312,7 +312,7 @@ impl<T: TdbDataType> TypedDictSegment<T> {
     }
 
     pub fn id<Q: ToLexical<T>>(&self, val: &Q) -> IdLookupResult {
-        let slice = T::to_lexical(val);
+        let slice = val.to_lexical();
         self.dict.id(&slice[..])
     }
 
@@ -608,7 +608,7 @@ mod tests {
     where
         D: TdbDataType + PartialEq + Debug + ToLexical<D>,
     {
-        let j = D::from_lexical(<D as TdbDataType>::to_lexical(&d));
+        let j = D::from_lexical(d.to_lexical());
         assert_eq!(d, j)
     }
 

From 4e2b9e8b0788b218862d773b6906174ef4c0ffeb Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 13:39:26 +0100
Subject: [PATCH 73/99] remove debug print

---
 src/layer/layer.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/layer/layer.rs b/src/layer/layer.rs
index ca88a338..27680d31 100644
--- a/src/layer/layer.rs
+++ b/src/layer/layer.rs
@@ -79,7 +79,6 @@ pub trait Layer: Send + Sync {
 
     /// Returns true if the given triple exists, and false otherwise.
     fn string_triple_exists(&self, triple: &StringTriple) -> bool {
-        eprintln!("I am here");
         self.string_triple_to_id(triple)
             .map(|t| self.id_triple_exists(t))
             .unwrap_or(false)

From 534068aa42c6f2b9f01b9a0e9cd8dee0d2abc954 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 14:45:55 +0100
Subject: [PATCH 74/99] change interface to allow adding of arbitrary values,
 not just strings

---
 benches/bench.rs                         |   4 +-
 benches/builder/data.rs                  |   8 +-
 benches/builder/main.rs                  |   8 +-
 examples/print_graph.rs                  |   5 +-
 examples/write_to_graph.rs               |  12 +-
 src/layer/builder.rs                     |  11 +-
 src/layer/internal/base.rs               |  15 +-
 src/layer/internal/child.rs              |  30 ++--
 src/layer/internal/mod.rs                |  22 +--
 src/layer/internal/object_iterator.rs    |  36 ++---
 src/layer/internal/predicate_iterator.rs |  38 ++---
 src/layer/internal/subject_iterator.rs   | 117 ++++++++--------
 src/layer/layer.rs                       |  94 ++++++++-----
 src/layer/simple_builder.rs              | 170 ++++++++++++++---------
 src/lib.rs                               |   2 +-
 src/storage/cache.rs                     |  26 ++--
 src/storage/delta.rs                     |  66 ++++-----
 src/storage/directory.rs                 |  67 +++++----
 src/storage/layer.rs                     |  70 +++++-----
 src/storage/memory.rs                    |  18 +--
 src/storage/pack.rs                      |  12 +-
 src/store/mod.rs                         |  90 ++++++------
 src/store/sync.rs                        |  51 ++++---
 src/structure/tfc/typed.rs               |   5 +
 24 files changed, 535 insertions(+), 442 deletions(-)

diff --git a/benches/bench.rs b/benches/bench.rs
index 274b2618..e5522071 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -3,7 +3,7 @@ extern crate test;
 
 use tempfile::tempdir;
 use terminus_store;
-use terminus_store::layer::StringTriple;
+use terminus_store::layer::ValueTriple;
 use test::Bencher;
 
 #[bench]
@@ -14,7 +14,7 @@ fn bench_add_string_triple(b: &mut Bencher) {
     let mut count = 1;
     b.iter(|| {
         layer_builder
-            .add_string_triple(StringTriple::new_value(
+            .add_value_triple(ValueTriple::new_string_value(
                 &count.to_string(),
                 &count.to_string(),
                 &count.to_string(),
diff --git a/benches/builder/data.rs b/benches/builder/data.rs
index fd3285e6..506209c7 100644
--- a/benches/builder/data.rs
+++ b/benches/builder/data.rs
@@ -1,7 +1,7 @@
 use rand::distributions::Alphanumeric;
 use rand::prelude::*;
 use std::iter;
-use terminus_store::layer::StringTriple;
+use terminus_store::layer::ValueTriple;
 
 fn random_string<R: Rng>(rand: &mut R, len_min: usize, len_max: usize) -> String {
     let len: usize = rand.gen_range(len_min..len_max);
@@ -50,19 +50,19 @@ impl<R: Rng> TestData<R> {
         }
     }
 
-    pub fn random_triple(&mut self) -> StringTriple {
+    pub fn random_triple(&mut self) -> ValueTriple {
         let subject_ix = self.rand.gen_range(0..self.nodes.len());
         let predicate_ix = self.rand.gen_range(0..self.predicates.len());
         if self.rand.gen() {
             let object_ix = self.rand.gen_range(0..self.nodes.len());
-            StringTriple::new_node(
+            ValueTriple::new_node(
                 &self.nodes[subject_ix],
                 &self.predicates[predicate_ix],
                 &self.nodes[object_ix],
             )
         } else {
             let object_ix = self.rand.gen_range(0..self.values.len());
-            StringTriple::new_value(
+            ValueTriple::new_string_value(
                 &self.nodes[subject_ix],
                 &self.predicates[predicate_ix],
                 &self.values[object_ix],
diff --git a/benches/builder/main.rs b/benches/builder/main.rs
index 83943370..9557c93b 100644
--- a/benches/builder/main.rs
+++ b/benches/builder/main.rs
@@ -38,7 +38,7 @@ fn build_base_layer_1000(b: &mut Bencher) {
         let builder = store.create_base_layer().unwrap();
 
         for triple in triples.iter() {
-            builder.add_string_triple(triple.clone()).unwrap();
+            builder.add_value_triple(triple.clone()).unwrap();
         }
 
         let _base_layer = builder.commit().unwrap();
@@ -78,7 +78,7 @@ fn build_nonempty_child_layer_on_empty_base_layer(b: &mut Bencher) {
         let builder = base_layer.open_write().unwrap();
 
         for triple in triples.iter() {
-            builder.add_string_triple(triple.clone()).unwrap();
+            builder.add_value_triple(triple.clone()).unwrap();
         }
 
         builder.commit().unwrap();
@@ -97,7 +97,7 @@ fn build_nonempty_child_layer_on_nonempty_base_layer(b: &mut Bencher) {
     let builder = store.create_base_layer().unwrap();
 
     for _ in 0..1000 {
-        builder.add_string_triple(data.random_triple()).unwrap();
+        builder.add_value_triple(data.random_triple()).unwrap();
     }
     let base_layer = builder.commit().unwrap();
 
@@ -110,7 +110,7 @@ fn build_nonempty_child_layer_on_nonempty_base_layer(b: &mut Bencher) {
         let builder = base_layer.open_write().unwrap();
 
         for triple in triples.iter() {
-            builder.add_string_triple(triple.clone()).unwrap();
+            builder.add_value_triple(triple.clone()).unwrap();
         }
 
         builder.commit().unwrap();
diff --git a/examples/print_graph.rs b/examples/print_graph.rs
index 55776a1c..85513e91 100644
--- a/examples/print_graph.rs
+++ b/examples/print_graph.rs
@@ -1,6 +1,7 @@
 use std::env;
 
 use std::io;
+use terminus_store::structure::TdbDataType;
 use terminus_store::*;
 use tokio;
 
@@ -21,7 +22,7 @@ async fn print_graph(store_path: &str, graph: &str) -> io::Result<()> {
                     .expect("expected id triple to be mapable to string");
 
                 println!(
-                    "{}, {}, {} {}",
+                    "{}, {}, {} {:?}",
                     triple.subject,
                     triple.predicate,
                     match triple.object {
@@ -29,7 +30,7 @@ async fn print_graph(store_path: &str, graph: &str) -> io::Result<()> {
                         ObjectType::Value(_) => "value",
                     },
                     match triple.object {
-                        ObjectType::Node(n) => n,
+                        ObjectType::Node(n) => String::make_entry(&n),
                         ObjectType::Value(v) => v,
                     }
                 );
diff --git a/examples/write_to_graph.rs b/examples/write_to_graph.rs
index 90bcfde3..5a67ceb6 100644
--- a/examples/write_to_graph.rs
+++ b/examples/write_to_graph.rs
@@ -7,8 +7,8 @@ use tokio;
 use tokio::io::{self, AsyncBufReadExt};
 
 enum Command {
-    Add(StringTriple),
-    Remove(StringTriple),
+    Add(ValueTriple),
+    Remove(ValueTriple),
 }
 
 async fn parse_command(s: &str) -> io::Result<Command> {
@@ -25,8 +25,8 @@ async fn parse_command(s: &str) -> io::Result<Command> {
         let object = &matches[5];
 
         let triple = match object_type_name {
-            "node" => StringTriple::new_node(subject, predicate, object),
-            "value" => StringTriple::new_value(subject, predicate, object),
+            "node" => ValueTriple::new_node(subject, predicate, object),
+            "value" => ValueTriple::new_string_value(subject, predicate, object),
             _ => {
                 return Err(io::Error::new(
                     io::ErrorKind::InvalidData,
@@ -84,8 +84,8 @@ async fn process_commands(store_path: &str, graph: &str) -> io::Result<()> {
         // Since no io is happening, adding triples to the builder is
         // not a future.
         match command {
-            Command::Add(triple) => builder.add_string_triple(triple)?,
-            Command::Remove(triple) => builder.remove_string_triple(triple)?,
+            Command::Add(triple) => builder.add_value_triple(triple)?,
+            Command::Remove(triple) => builder.remove_value_triple(triple)?,
         }
     }
 
diff --git a/src/layer/builder.rs b/src/layer/builder.rs
index 68adcbe4..d8a18d73 100644
--- a/src/layer/builder.rs
+++ b/src/layer/builder.rs
@@ -69,11 +69,8 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     /// Add a value string.
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
-    pub fn add_value(&mut self, value: &str) -> u64 {
-        let id = self.value_dictionary_builder.add(TypedDictEntry::new(
-            Datatype::String,
-            Bytes::copy_from_slice(value.as_bytes()).into(),
-        ));
+    pub fn add_value(&mut self, value: TypedDictEntry) -> u64 {
+        let id = self.value_dictionary_builder.add(value);
 
         id
     }
@@ -119,7 +116,7 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     /// Add values from an iterable.
     ///
     /// Panics if the values are not in lexical order, or if previous added values are a lexical succesor of any of these values.
-    pub fn add_values<I: 'static + IntoIterator<Item = String> + Unpin + Send + Sync>(
+    pub fn add_values<I: 'static + IntoIterator<Item = TypedDictEntry> + Unpin + Send + Sync>(
         &mut self,
         values: I,
     ) -> Vec<u64>
@@ -128,7 +125,7 @@ impl<F: 'static + FileLoad + FileStore> DictionarySetFileBuilder<F> {
     {
         let mut ids = Vec::new();
         for value in values {
-            let id = self.add_value(&value);
+            let id = self.add_value(value);
             ids.push(id);
         }
 
diff --git a/src/layer/internal/base.rs b/src/layer/internal/base.rs
index 4b11fd06..eb7654cd 100644
--- a/src/layer/internal/base.rs
+++ b/src/layer/internal/base.rs
@@ -189,7 +189,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
     /// Add a value string.
     ///
     /// Panics if the given value string is not a lexical successor of the previous value string.
-    pub fn add_value(&mut self, value: &str) -> u64 {
+    pub fn add_value(&mut self, value: TypedDictEntry) -> u64 {
         let id = self.builder.add_value(value);
 
         id
@@ -230,7 +230,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> BaseLayerFileBuilder<F> {
     /// Add values from an iterable.
     ///
     /// Panics if the values are not in lexical order, or if previous added values are a lexical succesor of any of these values.
-    pub fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_values<I: 'static + IntoIterator<Item = TypedDictEntry> + Send>(
         &mut self,
         values: I,
     ) -> Vec<u64>
@@ -466,7 +466,7 @@ pub mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
 
         let mut builder = builder.into_phase2().await?;
 
@@ -513,7 +513,12 @@ pub mod tests {
         assert_eq!(3, base_layer.subject_id("bbbbb").unwrap());
         assert_eq!(2, base_layer.predicate_id("fghij").unwrap());
         assert_eq!(1, base_layer.object_node_id("aaaaa").unwrap());
-        assert_eq!(6, base_layer.object_value_id("chicken").unwrap());
+        assert_eq!(
+            6,
+            base_layer
+                .object_value_id(&String::make_entry(&"chicken"))
+                .unwrap()
+        );
 
         assert_eq!("bbbbb", base_layer.id_subject(3).unwrap());
         assert_eq!("fghij", base_layer.id_predicate(2).unwrap());
@@ -522,7 +527,7 @@ pub mod tests {
             base_layer.id_object(1).unwrap()
         );
         assert_eq!(
-            ObjectType::Value("chicken".to_string()),
+            ObjectType::Value(String::make_entry(&"chicken")),
             base_layer.id_object(6).unwrap()
         );
     }
diff --git a/src/layer/internal/child.rs b/src/layer/internal/child.rs
index 9690a297..37a368d7 100644
--- a/src/layer/internal/child.rs
+++ b/src/layer/internal/child.rs
@@ -258,8 +258,8 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// Does nothing if the value already exists in the paretn, and
     /// panics if the given value string is not a lexical successor of
     /// the previous value string.
-    pub fn add_value(&mut self, value: &str) -> u64 {
-        match self.parent.object_value_id(value) {
+    pub fn add_value(&mut self, value: TypedDictEntry) -> u64 {
+        match self.parent.object_value_id(&value) {
             None => self.builder.add_value(value),
             Some(id) => id,
         }
@@ -317,7 +317,7 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
     /// added values are a lexical succesor of any of these
     /// values. Skips any nodes that are already part of the base
     /// layer.
-    pub fn add_values<I: 'static + IntoIterator<Item = String> + Send>(
+    pub fn add_values<I: 'static + IntoIterator<Item = TypedDictEntry> + Send>(
         &mut self,
         values: I,
     ) -> Vec<u64>
@@ -327,7 +327,7 @@ impl<F: 'static + FileLoad + FileStore + Clone + Send + Sync> ChildLayerFileBuil
         // TODO bulk check predicate existence
         let mut result = Vec::new();
         for value in values {
-            let id = self.add_value(&value);
+            let id = self.add_value(value);
             result.push(id);
         }
 
@@ -958,7 +958,7 @@ pub mod tests {
             .unwrap();
         b.add_node("foo");
         b.add_predicate("bar");
-        b.add_value("baz");
+        b.add_value(String::make_entry(&"baz"));
 
         let b = b.into_phase2().await.unwrap();
         b.finalize().await.unwrap();
@@ -970,7 +970,12 @@ pub mod tests {
         assert_eq!(3, child_layer.subject_id("bbbbb").unwrap());
         assert_eq!(2, child_layer.predicate_id("fghij").unwrap());
         assert_eq!(1, child_layer.object_node_id("aaaaa").unwrap());
-        assert_eq!(6, child_layer.object_value_id("chicken").unwrap());
+        assert_eq!(
+            6,
+            child_layer
+                .object_value_id(&String::make_entry(&"chicken"))
+                .unwrap()
+        );
 
         assert_eq!("bbbbb", child_layer.id_subject(3).unwrap());
         assert_eq!("fghij", child_layer.id_predicate(2).unwrap());
@@ -979,7 +984,7 @@ pub mod tests {
             child_layer.id_object(1).unwrap()
         );
         assert_eq!(
-            ObjectType::Value("chicken".to_string()),
+            ObjectType::Value(String::make_entry(&"chicken")),
             child_layer.id_object(6).unwrap()
         );
     }
@@ -996,7 +1001,7 @@ pub mod tests {
             .unwrap();
         b.add_node("foo");
         b.add_predicate("bar");
-        b.add_value("baz");
+        b.add_value(String::make_entry(&"baz"));
         let b = b.into_phase2().await.unwrap();
 
         b.finalize().await.unwrap();
@@ -1008,7 +1013,12 @@ pub mod tests {
         assert_eq!(11, child_layer.subject_id("foo").unwrap());
         assert_eq!(5, child_layer.predicate_id("bar").unwrap());
         assert_eq!(11, child_layer.object_node_id("foo").unwrap());
-        assert_eq!(12, child_layer.object_value_id("baz").unwrap());
+        assert_eq!(
+            12,
+            child_layer
+                .object_value_id(&String::make_entry(&"baz"))
+                .unwrap()
+        );
 
         assert_eq!("foo", child_layer.id_subject(11).unwrap());
         assert_eq!("bar", child_layer.id_predicate(5).unwrap());
@@ -1017,7 +1027,7 @@ pub mod tests {
             child_layer.id_object(11).unwrap()
         );
         assert_eq!(
-            ObjectType::Value("baz".to_string()),
+            ObjectType::Value(String::make_entry(&"baz")),
             child_layer.id_object(12).unwrap()
         );
     }
diff --git a/src/layer/internal/mod.rs b/src/layer/internal/mod.rs
index a3ed3905..cd8d3455 100644
--- a/src/layer/internal/mod.rs
+++ b/src/layer/internal/mod.rs
@@ -241,16 +241,16 @@ impl InternalLayer {
         self.node_dictionary().num_entries()
     }
 
-    pub fn value_dict_id(&self, value: &str) -> IdLookupResult {
-        self.value_dictionary().id(&value)
+    pub fn value_dict_id(&self, value: &TypedDictEntry) -> IdLookupResult {
+        self.value_dictionary().id_entry(value)
     }
 
     pub fn value_dict_len(&self) -> usize {
         self.value_dictionary().num_entries()
     }
 
-    pub fn value_dict_get(&self, id: usize) -> Option<String> {
-        self.value_dictionary().get(id)
+    pub fn value_dict_get(&self, id: usize) -> Option<TypedDictEntry> {
+        self.value_dictionary().entry(id)
     }
 
     pub fn internal_triple_addition_exists(
@@ -604,7 +604,7 @@ impl Layer for InternalLayer {
         id_option.map(|id| id + parent_option.map_or(0, |p| p.node_and_value_count() as u64))
     }
 
-    fn object_value_id<'a>(&'a self, object: &str) -> Option<u64> {
+    fn object_value_id<'a>(&'a self, object: &TypedDictEntry) -> Option<u64> {
         let to_result = |layer: &'a InternalLayer| {
             (
                 layer.value_dict_id(object).into_option().map(|i| {
@@ -1057,13 +1057,13 @@ mod tests {
         let builder = store.create_base_layer().unwrap();
 
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
         builder
-            .add_string_triple(StringTriple::new_node("cow", "likes", "duck"))
+            .add_value_triple(ValueTriple::new_node("cow", "likes", "duck"))
             .unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("duck", "says", "quack"))
+            .add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"))
             .unwrap();
 
         builder.commit().unwrap()
@@ -1085,10 +1085,10 @@ mod tests {
         let builder = base_layer.open_write().unwrap();
 
         builder
-            .remove_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("horse", "says", "neigh"))
+            .add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"))
             .unwrap();
 
         let layer = builder.commit().unwrap();
@@ -1109,7 +1109,7 @@ mod tests {
         let mut builder = BaseLayerFileBuilder::from_files(&files).await.unwrap();
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(3, 3, 3).await.unwrap();
         builder.finalize().await.unwrap();
diff --git a/src/layer/internal/object_iterator.rs b/src/layer/internal/object_iterator.rs
index 85a1fd98..aa2faf9e 100644
--- a/src/layer/internal/object_iterator.rs
+++ b/src/layer/internal/object_iterator.rs
@@ -244,7 +244,7 @@ mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
 
         builder.add_triple(1, 1, 2).await.unwrap();
@@ -409,40 +409,40 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "horse"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child1_name).await.unwrap();
         let child2_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child2_name).await.unwrap();
         let child3_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child3_name).await.unwrap();
         let child4_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("field", "contains", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("field", "contains", "cow"));
         builder.commit_boxed().await.unwrap();
 
         let layer = store.get_layer(child4_name).await.unwrap().unwrap();
@@ -454,9 +454,9 @@ mod tests {
             .collect();
 
         let expected = vec![
-            StringTriple::new_node("duck", "likes", "cow"),
-            StringTriple::new_node("horse", "likes", "cow"),
-            StringTriple::new_node("field", "contains", "cow"),
+            ValueTriple::new_node("duck", "likes", "cow"),
+            ValueTriple::new_node("horse", "likes", "cow"),
+            ValueTriple::new_node("field", "contains", "cow"),
         ];
 
         assert_eq!(expected, triples);
diff --git a/src/layer/internal/predicate_iterator.rs b/src/layer/internal/predicate_iterator.rs
index ca980e5a..35a54a8b 100644
--- a/src/layer/internal/predicate_iterator.rs
+++ b/src/layer/internal/predicate_iterator.rs
@@ -257,38 +257,38 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "horse"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child1_name).await.unwrap();
         let child2_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child2_name).await.unwrap();
         let child3_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child3_name).await.unwrap();
         let child4_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         let layer = store.get_layer(child4_name).await.unwrap().unwrap();
@@ -300,9 +300,9 @@ mod tests {
             .collect();
 
         let expected = vec![
-            StringTriple::new_node("cow", "likes", "duck"),
-            StringTriple::new_node("duck", "likes", "cow"),
-            StringTriple::new_node("horse", "likes", "horse"),
+            ValueTriple::new_node("cow", "likes", "duck"),
+            ValueTriple::new_node("duck", "likes", "cow"),
+            ValueTriple::new_node("horse", "likes", "horse"),
         ];
 
         assert_eq!(expected, triples);
@@ -314,8 +314,8 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_node("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_node("cow", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "says", "quack"));
         builder.commit_boxed().await.unwrap();
 
         let layer = store.get_layer(base_name).await.unwrap().unwrap();
@@ -326,8 +326,8 @@ mod tests {
             .collect();
 
         let expected = vec![
-            StringTriple::new_node("cow", "says", "moo"),
-            StringTriple::new_node("cow", "says", "quack"),
+            ValueTriple::new_node("cow", "says", "moo"),
+            ValueTriple::new_node("cow", "says", "quack"),
         ];
 
         assert_eq!(expected, triples);
diff --git a/src/layer/internal/subject_iterator.rs b/src/layer/internal/subject_iterator.rs
index db54274f..5c356b50 100644
--- a/src/layer/internal/subject_iterator.rs
+++ b/src/layer/internal/subject_iterator.rs
@@ -455,6 +455,7 @@ mod tests {
     use crate::layer::base::tests::*;
     use crate::layer::child::tests::*;
     use crate::layer::*;
+    use crate::structure::TdbDataType;
 
     use std::sync::Arc;
 
@@ -496,7 +497,7 @@ mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
@@ -529,7 +530,7 @@ mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
@@ -605,7 +606,7 @@ mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(3, 2, 5).await.unwrap();
         builder.add_triple(3, 3, 5).await.unwrap();
@@ -638,7 +639,7 @@ mod tests {
 
         builder.add_nodes(nodes.into_iter().map(|s| s.to_string()));
         builder.add_predicates(predicates.into_iter().map(|s| s.to_string()));
-        builder.add_values(values.into_iter().map(|s| s.to_string()));
+        builder.add_values(values.into_iter().map(|s| String::make_entry(&s)));
         let mut builder = builder.into_phase2().await.unwrap();
         builder.add_triple(1, 1, 1).await.unwrap();
         builder.add_triple(3, 2, 4).await.unwrap();
@@ -870,38 +871,38 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "horse"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child1_name).await.unwrap();
         let child2_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child2_name).await.unwrap();
         let child3_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child3_name).await.unwrap();
         let child4_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
         builder.commit_boxed().await.unwrap();
 
         let layer = store.get_layer(child4_name).await.unwrap().unwrap();
@@ -913,8 +914,8 @@ mod tests {
             .collect();
 
         let expected = vec![
-            StringTriple::new_node("duck", "likes", "cow"),
-            StringTriple::new_value("duck", "says", "quack"),
+            ValueTriple::new_node("duck", "likes", "cow"),
+            ValueTriple::new_string_value("duck", "says", "quack"),
         ];
 
         assert_eq!(expected, triples);
@@ -926,42 +927,42 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "horse"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child1_name).await.unwrap();
         let child2_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "horse"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "horse"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child2_name).await.unwrap();
         let child3_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "pig"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child3_name).await.unwrap();
         let child4_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.remove_string_triple(StringTriple::new_node("duck", "likes", "horse"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "rabbit"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "rabbit"));
         builder.commit_boxed().await.unwrap();
 
         let layer = store.get_layer(child4_name).await.unwrap().unwrap();
@@ -974,9 +975,9 @@ mod tests {
             .collect();
 
         let expected = vec![
-            StringTriple::new_node("duck", "likes", "cow"),
-            StringTriple::new_node("duck", "likes", "pig"),
-            StringTriple::new_node("duck", "likes", "rabbit"),
+            ValueTriple::new_node("duck", "likes", "cow"),
+            ValueTriple::new_node("duck", "likes", "pig"),
+            ValueTriple::new_node("duck", "likes", "rabbit"),
         ];
 
         assert_eq!(expected, triples);
@@ -987,38 +988,38 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("sheep", "says", "baa"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("sheep", "says", "baa"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "woof"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "horse"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "woof"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "horse"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child1_name).await.unwrap();
         let child2_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("horse", "says", "woof"));
-        builder.remove_string_triple(StringTriple::new_value("sheep", "says", "baa"));
+        builder.remove_value_triple(ValueTriple::new_string_value("horse", "says", "woof"));
+        builder.remove_value_triple(ValueTriple::new_string_value("sheep", "says", "baa"));
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_value("rabbit", "says", "sniff"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("rabbit", "says", "sniff"));
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(child2_name).await.unwrap();
         let child3_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.remove_string_triple(StringTriple::new_value("horse", "says", "quack"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.remove_value_triple(ValueTriple::new_string_value("horse", "says", "quack"));
 
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
         builder.commit_boxed().await.unwrap();
 
         (
@@ -1050,15 +1051,15 @@ mod tests {
             .collect();
 
         let expected_additions = vec![
-            StringTriple::new_node("duck", "likes", "cow"),
-            StringTriple::new_value("horse", "says", "neigh"),
-            StringTriple::new_value("rabbit", "says", "sniff"),
+            ValueTriple::new_node("duck", "likes", "cow"),
+            ValueTriple::new_string_value("horse", "says", "neigh"),
+            ValueTriple::new_string_value("rabbit", "says", "sniff"),
         ];
 
         let expected_removals = vec![
-            StringTriple::new_node("duck", "hates", "cow"),
-            StringTriple::new_value("sheep", "says", "baa"),
-            StringTriple::new_value("horse", "says", "woof"),
+            ValueTriple::new_node("duck", "hates", "cow"),
+            ValueTriple::new_string_value("sheep", "says", "baa"),
+            ValueTriple::new_string_value("horse", "says", "woof"),
         ];
 
         assert_eq!(expected_additions, additions);
diff --git a/src/layer/layer.rs b/src/layer/layer.rs
index 27680d31..fa6fe21b 100644
--- a/src/layer/layer.rs
+++ b/src/layer/layer.rs
@@ -2,6 +2,8 @@
 use std::collections::HashMap;
 use std::hash::Hash;
 
+use crate::structure::{TdbDataType, TypedDictEntry};
+
 /// A layer containing dictionary entries and triples.
 ///
 /// A layer can be queried. To answer queries, layers will check their
@@ -26,7 +28,7 @@ pub trait Layer: Send + Sync {
     /// The numerical id of a node object, or None if the node object cannot be found.
     fn object_node_id(&self, object: &str) -> Option<u64>;
     /// The numerical id of a value object, or None if the value object cannot be found.
-    fn object_value_id(&self, object: &str) -> Option<u64>;
+    fn object_value_id(&self, object: &TypedDictEntry) -> Option<u64>;
     /// The subject corresponding to a numerical id, or None if it cannot be found.
     fn id_subject(&self, id: u64) -> Option<String>;
 
@@ -44,7 +46,7 @@ pub trait Layer: Send + Sync {
     }
 
     /// The object value corresponding to a numerical id, or None if it cannot be found. Panics if the object is actually a node.
-    fn id_object_value(&self, id: u64) -> Option<String> {
+    fn id_object_value(&self, id: u64) -> Option<TypedDictEntry> {
         self.id_object(id).map(|o| {
             o.value()
                 .expect("Expected ObjectType to be value but got a node")
@@ -78,8 +80,8 @@ pub trait Layer: Send + Sync {
     }
 
     /// Returns true if the given triple exists, and false otherwise.
-    fn string_triple_exists(&self, triple: &StringTriple) -> bool {
-        self.string_triple_to_id(triple)
+    fn value_triple_exists(&self, triple: &ValueTriple) -> bool {
+        self.value_triple_to_id(triple)
             .map(|t| self.id_triple_exists(t))
             .unwrap_or(false)
     }
@@ -91,8 +93,8 @@ pub trait Layer: Send + Sync {
     fn triples_sp(&self, subject: u64, predicate: u64)
         -> Box<dyn Iterator<Item = IdTriple> + Send>;
 
-    /// Convert a `StringTriple` to an `IdTriple`, returning None if any of the strings in the triple could not be resolved.
-    fn string_triple_to_id(&self, triple: &StringTriple) -> Option<IdTriple> {
+    /// Convert a `ValueTriple` to an `IdTriple`, returning None if any of the strings in the triple could not be resolved.
+    fn value_triple_to_id(&self, triple: &ValueTriple) -> Option<IdTriple> {
         self.subject_id(&triple.subject).and_then(|subject| {
             self.predicate_id(&triple.predicate).and_then(|predicate| {
                 match &triple.object {
@@ -113,7 +115,7 @@ pub trait Layer: Send + Sync {
     fn triples_o(&self, object: u64) -> Box<dyn Iterator<Item = IdTriple> + Send>;
 
     /// Convert all known strings in the given string triple to ids.
-    fn string_triple_to_partially_resolved(&self, triple: StringTriple) -> PartiallyResolvedTriple {
+    fn value_triple_to_partially_resolved(&self, triple: ValueTriple) -> PartiallyResolvedTriple {
         PartiallyResolvedTriple {
             subject: self
                 .subject_id(&triple.subject)
@@ -137,10 +139,10 @@ pub trait Layer: Send + Sync {
     }
 
     /// Convert an id triple to the corresponding string version, returning None if any of those ids could not be converted.
-    fn id_triple_to_string(&self, triple: &IdTriple) -> Option<StringTriple> {
+    fn id_triple_to_string(&self, triple: &IdTriple) -> Option<ValueTriple> {
         self.id_subject(triple.subject).and_then(|subject| {
             self.id_predicate(triple.predicate).and_then(|predicate| {
-                self.id_object(triple.object).map(|object| StringTriple {
+                self.id_object(triple.object).map(|object| ValueTriple {
                     subject,
                     predicate,
                     object,
@@ -199,18 +201,18 @@ impl IdTriple {
 
 /// A triple stored as strings.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct StringTriple {
+pub struct ValueTriple {
     pub subject: String,
     pub predicate: String,
     pub object: ObjectType,
 }
 
-impl StringTriple {
+impl ValueTriple {
     /// Construct a triple with a node object.
     ///
     /// Nodes may appear in both the subject and object position.
-    pub fn new_node(subject: &str, predicate: &str, object: &str) -> StringTriple {
-        StringTriple {
+    pub fn new_node(subject: &str, predicate: &str, object: &str) -> ValueTriple {
+        ValueTriple {
             subject: subject.to_owned(),
             predicate: predicate.to_owned(),
             object: ObjectType::Node(object.to_owned()),
@@ -220,11 +222,22 @@ impl StringTriple {
     /// Construct a triple with a value object.
     ///
     /// Values may only appear in the object position.
-    pub fn new_value(subject: &str, predicate: &str, object: &str) -> StringTriple {
-        StringTriple {
+    pub fn new_value(subject: &str, predicate: &str, object: TypedDictEntry) -> ValueTriple {
+        ValueTriple {
+            subject: subject.to_owned(),
+            predicate: predicate.to_owned(),
+            object: ObjectType::Value(object),
+        }
+    }
+
+    /// Construct a triple with a string value object.
+    ///
+    /// Values may only appear in the object position.
+    pub fn new_string_value(subject: &str, predicate: &str, object: &str) -> ValueTriple {
+        ValueTriple {
             subject: subject.to_owned(),
             predicate: predicate.to_owned(),
-            object: ObjectType::Value(object.to_owned()),
+            object: ObjectType::Value(String::make_entry(&object)),
         }
     }
 
@@ -293,7 +306,7 @@ impl PartiallyResolvedTriple {
         &self,
         node_map: &HashMap<String, u64>,
         predicate_map: &HashMap<String, u64>,
-        value_map: &HashMap<String, u64>,
+        value_map: &HashMap<TypedDictEntry, u64>,
     ) -> Option<IdTriple> {
         let subject = match self.subject.as_ref() {
             PossiblyResolved::Unresolved(s) => *node_map.get(s)?,
@@ -356,7 +369,7 @@ impl PartiallyResolvedTriple {
 #[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Ord, Hash)]
 pub enum ObjectType {
     Node(String),
-    Value(String),
+    Value(TypedDictEntry),
 }
 
 impl ObjectType {
@@ -374,17 +387,17 @@ impl ObjectType {
         }
     }
 
-    pub fn value(self) -> Option<String> {
+    pub fn value(self) -> Option<TypedDictEntry> {
         match self {
             ObjectType::Node(_) => None,
-            ObjectType::Value(s) => Some(s),
+            ObjectType::Value(v) => Some(v),
         }
     }
 
-    pub fn value_ref(&self) -> Option<&str> {
+    pub fn value_ref(&self) -> Option<&TypedDictEntry> {
         match self {
             ObjectType::Node(_) => None,
-            ObjectType::Value(s) => Some(s),
+            ObjectType::Value(v) => Some(v),
         }
     }
 }
@@ -405,8 +418,8 @@ mod tests {
         let files = base_layer_files();
         let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "sniff"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "sniff"));
 
         builder.commit().await.unwrap();
 
@@ -420,7 +433,7 @@ mod tests {
         let files = child_layer_files();
         let mut builder =
             SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.commit().await.unwrap();
 
         let child: Arc<InternalLayer> = Arc::new(
@@ -452,7 +465,7 @@ mod tests {
             .collect();
 
         assert_eq!(
-            vec![StringTriple::new_value("cow", "says", "sniff")],
+            vec![ValueTriple::new_string_value("cow", "says", "sniff")],
             triples
         );
     }
@@ -462,7 +475,7 @@ mod tests {
         let files = base_layer_files();
         let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
 
         builder.commit().await.unwrap();
 
@@ -476,7 +489,7 @@ mod tests {
         let files = child_layer_files();
         let mut builder =
             SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.commit().await.unwrap();
 
         let child: Arc<InternalLayer> = Arc::new(
@@ -489,7 +502,7 @@ mod tests {
         let files = child_layer_files();
         let mut builder =
             SimpleLayerBuilder::from_parent([5, 4, 3, 2, 2], child.clone(), files.clone());
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.commit().await.unwrap();
 
         let child: Arc<InternalLayer> = Arc::new(
@@ -504,7 +517,10 @@ mod tests {
             .map(|t| child.id_triple_to_string(&t).unwrap())
             .collect();
 
-        assert_eq!(vec![StringTriple::new_value("cow", "says", "moo")], triples);
+        assert_eq!(
+            vec![ValueTriple::new_string_value("cow", "says", "moo")],
+            triples
+        );
     }
 
     #[tokio::test]
@@ -512,8 +528,8 @@ mod tests {
         let files = base_layer_files();
         let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "neigh"));
 
         builder.commit().await.unwrap();
 
@@ -527,7 +543,7 @@ mod tests {
         let files = child_layer_files();
         let mut builder =
             SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "neigh"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "neigh"));
         builder.commit().await.unwrap();
 
         let child: Arc<InternalLayer> = Arc::new(
@@ -540,7 +556,7 @@ mod tests {
         let files = child_layer_files();
         let mut builder =
             SimpleLayerBuilder::from_parent([5, 4, 3, 2, 2], child.clone(), files.clone());
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.commit().await.unwrap();
 
         let child: Arc<InternalLayer> = Arc::new(
@@ -566,7 +582,13 @@ mod tests {
             .unwrap();
         let triple_2 = child.id_triple_to_string(&id_triple_2).unwrap();
 
-        assert_eq!(StringTriple::new_value("cow", "says", "moo"), triple_1);
-        assert_eq!(StringTriple::new_value("duck", "says", "quack"), triple_2);
+        assert_eq!(
+            ValueTriple::new_string_value("cow", "says", "moo"),
+            triple_1
+        );
+        assert_eq!(
+            ValueTriple::new_string_value("duck", "says", "quack"),
+            triple_2
+        );
     }
 }
diff --git a/src/layer/simple_builder.rs b/src/layer/simple_builder.rs
index 7f47c03d..90c4a23e 100644
--- a/src/layer/simple_builder.rs
+++ b/src/layer/simple_builder.rs
@@ -12,6 +12,7 @@
 use super::internal::*;
 use super::layer::*;
 use crate::storage::*;
+use crate::structure::TypedDictEntry;
 use std::collections::{HashMap, HashSet};
 use std::io;
 use std::pin::Pin;
@@ -31,11 +32,11 @@ pub trait LayerBuilder: Send + Sync {
     /// Return the parent if it exists
     fn parent(&self) -> Option<Arc<dyn Layer>>;
     /// Add a string triple
-    fn add_string_triple(&mut self, triple: StringTriple);
+    fn add_value_triple(&mut self, triple: ValueTriple);
     /// Add an id triple
     fn add_id_triple(&mut self, triple: IdTriple);
     /// Remove a string triple
-    fn remove_string_triple(&mut self, triple: StringTriple);
+    fn remove_value_triple(&mut self, triple: ValueTriple);
     /// Remove an id triple
     fn remove_id_triple(&mut self, triple: IdTriple);
     /// Commit the layer to storage
@@ -53,9 +54,9 @@ pub struct SimpleLayerBuilder<F: 'static + FileLoad + FileStore + Clone> {
     name: [u32; 5],
     parent: Option<Arc<dyn Layer>>,
     files: LayerFiles<F>,
-    additions: Vec<StringTriple>,
+    additions: Vec<ValueTriple>,
     id_additions: Vec<IdTriple>,
-    removals: Vec<StringTriple>,
+    removals: Vec<ValueTriple>,
     id_removals: Vec<IdTriple>,
 }
 
@@ -96,7 +97,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
         self.parent.clone()
     }
 
-    fn add_string_triple(&mut self, triple: StringTriple) {
+    fn add_value_triple(&mut self, triple: ValueTriple) {
         self.additions.push(triple);
     }
 
@@ -104,7 +105,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
         self.id_additions.push(triple);
     }
 
-    fn remove_string_triple(&mut self, triple: StringTriple) {
+    fn remove_value_triple(&mut self, triple: ValueTriple) {
         self.removals.push(triple);
     }
 
@@ -132,7 +133,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                         .collect(),
                     Some(parent) => additions
                         .into_par_iter()
-                        .map(move |triple| parent.string_triple_to_partially_resolved(triple))
+                        .map(move |triple| parent.value_triple_to_partially_resolved(triple))
                         .collect(),
                 };
 
@@ -150,7 +151,7 @@ impl<F: 'static + FileLoad + FileStore + Clone> LayerBuilder for SimpleLayerBuil
                         .collect(),
                     Some(parent) => removals
                         .into_par_iter()
-                        .map(move |triple| parent.string_triple_to_partially_resolved(triple))
+                        .map(move |triple| parent.value_triple_to_partially_resolved(triple))
                         .collect(),
                 };
 
@@ -318,7 +319,7 @@ fn zero_equivalents(
 
 fn collect_unresolved_strings(
     triples: &[PartiallyResolvedTriple],
-) -> (Vec<String>, Vec<String>, Vec<String>) {
+) -> (Vec<String>, Vec<String>, Vec<TypedDictEntry>) {
     let (unresolved_nodes, (unresolved_predicates, unresolved_values)) = rayon::join(
         || {
             let unresolved_nodes_set: HashSet<_> = triples
@@ -394,6 +395,7 @@ mod tests {
     use super::*;
     use crate::layer::internal::InternalLayer;
     use crate::storage::memory::*;
+    use crate::structure::TdbDataType;
 
     fn new_base_files() -> BaseLayerFiles<MemoryBackedStore> {
         // TODO inline
@@ -410,9 +412,9 @@ mod tests {
         let files = new_base_files();
         let mut builder = SimpleLayerBuilder::new(name, files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit().await.unwrap();
 
@@ -424,9 +426,9 @@ mod tests {
     async fn simple_base_layer_construction() {
         let layer = example_base_layer().await;
 
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
@@ -436,9 +438,9 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "cow"));
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         let child_layer = Arc::new(
             async {
@@ -450,15 +452,17 @@ mod tests {
             .unwrap(),
         );
 
+        assert!(child_layer
+            .value_triple_exists(&ValueTriple::new_string_value("horse", "says", "neigh")));
+        assert!(child_layer.value_triple_exists(&ValueTriple::new_node("horse", "likes", "cow")));
         assert!(
-            child_layer.string_triple_exists(&StringTriple::new_value("horse", "says", "neigh"))
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
         );
-        assert!(child_layer.string_triple_exists(&StringTriple::new_node("horse", "likes", "cow")));
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
         assert!(
-            !child_layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack"))
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink"))
         );
+        assert!(!child_layer
+            .value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
@@ -469,9 +473,9 @@ mod tests {
         let mut builder =
             SimpleLayerBuilder::from_parent(name2, base_layer.clone(), files2.clone());
 
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "cow"));
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit().await.unwrap();
         let layer2: Arc<InternalLayer> = Arc::new(
@@ -484,9 +488,9 @@ mod tests {
         let name3 = [0, 0, 0, 0, 1];
         let files3 = new_child_files();
         builder = SimpleLayerBuilder::from_parent(name3, layer2.clone(), files3.clone());
-        builder.remove_string_triple(StringTriple::new_node("horse", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_node("horse", "likes", "pig"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.remove_value_triple(ValueTriple::new_node("horse", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("horse", "likes", "pig"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit().await.unwrap();
         let layer3: Arc<InternalLayer> = Arc::new(
@@ -499,8 +503,8 @@ mod tests {
         let name4 = [0, 0, 0, 0, 1];
         let files4 = new_child_files();
         builder = SimpleLayerBuilder::from_parent(name4, layer3.clone(), files4.clone());
-        builder.remove_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "horse"));
+        builder.remove_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "horse"));
         builder.commit().await.unwrap();
         let layer4: Arc<InternalLayer> = Arc::new(
             ChildLayer::load_from_files(name4, layer3, &files4)
@@ -509,14 +513,16 @@ mod tests {
                 .into(),
         );
 
-        assert!(layer4.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer4.string_triple_exists(&StringTriple::new_value("duck", "says", "quack")));
-        assert!(layer4.string_triple_exists(&StringTriple::new_value("horse", "says", "neigh")));
-        assert!(layer4.string_triple_exists(&StringTriple::new_node("horse", "likes", "pig")));
-        assert!(layer4.string_triple_exists(&StringTriple::new_node("cow", "likes", "horse")));
+        assert!(layer4.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer4.value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
+        assert!(
+            layer4.value_triple_exists(&ValueTriple::new_string_value("horse", "says", "neigh"))
+        );
+        assert!(layer4.value_triple_exists(&ValueTriple::new_node("horse", "likes", "pig")));
+        assert!(layer4.value_triple_exists(&ValueTriple::new_node("cow", "likes", "horse")));
 
-        assert!(!layer4.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(!layer4.string_triple_exists(&StringTriple::new_node("horse", "likes", "cow")));
+        assert!(!layer4.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
+        assert!(!layer4.value_triple_exists(&ValueTriple::new_node("horse", "likes", "cow")));
     }
 
     #[tokio::test]
@@ -525,8 +531,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::new(name, files.clone());
 
-        builder.remove_string_triple(StringTriple::new_value("crow", "says", "caw"));
-        builder.add_string_triple(StringTriple::new_value("crow", "says", "caw"));
+        builder.remove_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
+        builder.add_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
 
         builder.commit().await.unwrap();
         let base_layer: Arc<InternalLayer> = Arc::new(
@@ -536,7 +542,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(!base_layer.string_triple_exists(&StringTriple::new_value("crow", "says", "caw")));
+        assert!(
+            !base_layer.value_triple_exists(&ValueTriple::new_string_value("crow", "says", "caw"))
+        );
     }
 
     #[tokio::test]
@@ -545,8 +553,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::new(name, files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("crow", "says", "caw"));
-        builder.remove_string_triple(StringTriple::new_value("crow", "says", "caw"));
+        builder.add_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
+        builder.remove_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
 
         builder.commit().await.unwrap();
         let base_layer: Arc<InternalLayer> = Arc::new(
@@ -556,7 +564,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(!base_layer.string_triple_exists(&StringTriple::new_value("crow", "says", "caw")));
+        assert!(
+            !base_layer.value_triple_exists(&ValueTriple::new_string_value("crow", "says", "caw"))
+        );
     }
 
     #[tokio::test]
@@ -566,8 +576,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -577,7 +587,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -587,8 +599,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -598,7 +610,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -608,8 +622,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.remove_string_triple(StringTriple::new_value("crow", "says", "caw"));
-        builder.add_string_triple(StringTriple::new_value("crow", "says", "caw"));
+        builder.remove_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
+        builder.add_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -619,7 +633,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(!child_layer.string_triple_exists(&StringTriple::new_value("crow", "says", "caw")));
+        assert!(
+            !child_layer.value_triple_exists(&ValueTriple::new_string_value("crow", "says", "caw"))
+        );
     }
 
     #[tokio::test]
@@ -629,8 +645,8 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("crow", "says", "caw"));
-        builder.remove_string_triple(StringTriple::new_value("crow", "says", "caw"));
+        builder.add_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
+        builder.remove_value_triple(ValueTriple::new_string_value("crow", "says", "caw"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -640,7 +656,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(!child_layer.string_triple_exists(&StringTriple::new_value("crow", "says", "caw")));
+        assert!(
+            !child_layer.value_triple_exists(&ValueTriple::new_string_value("crow", "says", "caw"))
+        );
     }
 
     #[tokio::test]
@@ -650,10 +668,12 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let node_id = base_layer.subject_id("cow").unwrap();
         let predicate_id = base_layer.predicate_id("says").unwrap();
-        let value_id = base_layer.object_value_id("moo").unwrap();
+        let value_id = base_layer
+            .object_value_id(&String::make_entry(&"moo"))
+            .unwrap();
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.add_id_triple(IdTriple::new(node_id, predicate_id, value_id));
 
         builder.commit().await.unwrap();
@@ -664,7 +684,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -674,11 +696,13 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let node_id = base_layer.subject_id("cow").unwrap();
         let predicate_id = base_layer.predicate_id("says").unwrap();
-        let value_id = base_layer.object_value_id("moo").unwrap();
+        let value_id = base_layer
+            .object_value_id(&String::make_entry(&"moo"))
+            .unwrap();
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
         builder.remove_id_triple(IdTriple::new(node_id, predicate_id, value_id));
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -688,7 +712,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -698,10 +724,12 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let node_id = base_layer.subject_id("cow").unwrap();
         let predicate_id = base_layer.predicate_id("says").unwrap();
-        let value_id = base_layer.object_value_id("moo").unwrap();
+        let value_id = base_layer
+            .object_value_id(&String::make_entry(&"moo"))
+            .unwrap();
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
         builder.remove_id_triple(IdTriple::new(node_id, predicate_id, value_id));
 
         builder.commit().await.unwrap();
@@ -712,7 +740,9 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -722,11 +752,13 @@ mod tests {
         let name = [0, 0, 0, 0, 0];
         let node_id = base_layer.subject_id("cow").unwrap();
         let predicate_id = base_layer.predicate_id("says").unwrap();
-        let value_id = base_layer.object_value_id("moo").unwrap();
+        let value_id = base_layer
+            .object_value_id(&String::make_entry(&"moo"))
+            .unwrap();
         let mut builder = SimpleLayerBuilder::from_parent(name, base_layer.clone(), files.clone());
 
         builder.add_id_triple(IdTriple::new(node_id, predicate_id, value_id));
-        builder.remove_string_triple(StringTriple::new_value("cow", "says", "moo"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
 
         builder.commit().await.unwrap();
         let child_layer: Arc<InternalLayer> = Arc::new(
@@ -736,6 +768,8 @@ mod tests {
                 .into(),
         );
 
-        assert!(child_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            child_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index ec2f0544..3e434f3c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -37,6 +37,6 @@ pub mod storage;
 pub mod store;
 pub mod structure;
 
-pub use layer::{IdTriple, Layer, ObjectType, StringTriple};
+pub use layer::{IdTriple, Layer, ObjectType, ValueTriple};
 pub use store::sync::{open_sync_directory_store, open_sync_memory_store};
 pub use store::{open_directory_store, open_memory_store};
diff --git a/src/storage/cache.rs b/src/storage/cache.rs
index abb5bca0..0a1762f1 100644
--- a/src/storage/cache.rs
+++ b/src/storage/cache.rs
@@ -557,17 +557,17 @@ pub mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
         builder.commit_boxed().await.unwrap();
 
@@ -591,17 +591,17 @@ pub mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
 
         builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
         builder.commit_boxed().await.unwrap();
 
@@ -621,9 +621,9 @@ pub mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
 
diff --git a/src/storage/delta.rs b/src/storage/delta.rs
index 16fe6353..fb291899 100644
--- a/src/storage/delta.rs
+++ b/src/storage/delta.rs
@@ -445,30 +445,30 @@ mod tests {
     ) -> io::Result<(Arc<InternalLayer>, Arc<InternalLayer>, Arc<InternalLayer>)> {
         let mut builder = store.create_base_layer().await?;
         let base_name = builder.name();
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
 
         builder.commit_boxed().await?;
         let base_layer = store.get_layer(base_name).await?.unwrap();
 
         builder = store.create_child_layer(base_name).await?;
         let child1_name = builder.name();
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
-        builder.add_string_triple(StringTriple::new_value("horse", "says", "neigh"));
-        builder.add_string_triple(StringTriple::new_node("pig", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
+        builder.add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"));
+        builder.add_value_triple(ValueTriple::new_node("pig", "likes", "pig"));
 
         builder.commit_boxed().await?;
         let child1_layer = store.get_layer(child1_name).await?.unwrap();
 
         builder = store.create_child_layer(child1_name).await?;
         let child2_name = builder.name();
-        builder.remove_string_triple(StringTriple::new_node("pig", "likes", "pig"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("sheep", "says", "baah"));
-        builder.add_string_triple(StringTriple::new_value("pig", "likes", "sheep"));
+        builder.remove_value_triple(ValueTriple::new_node("pig", "likes", "pig"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("sheep", "says", "baah"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "likes", "sheep"));
         builder.commit_boxed().await?;
 
         let child2_layer = store.get_layer(child2_name).await?.unwrap();
@@ -505,7 +505,7 @@ mod tests {
         );
 
         for t in expected {
-            assert!(delta_layer.string_triple_exists(&t));
+            assert!(delta_layer.value_triple_exists(&t));
         }
     }
 
@@ -541,7 +541,7 @@ mod tests {
         );
 
         for t in expected {
-            assert!(delta_layer.string_triple_exists(&t));
+            assert!(delta_layer.value_triple_exists(&t));
         }
 
         let change_expected: Vec<_> =
@@ -604,50 +604,50 @@ mod tests {
         );
 
         for t in expected {
-            assert!(delta_layer2.string_triple_exists(&t));
+            assert!(delta_layer2.value_triple_exists(&t));
         }
     }
 
     async fn create_layer_stack<S: LayerStore>(store: &S) -> Vec<[u32; 5]> {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
-        builder.add_string_triple(StringTriple::new_value("a", "a", "a"));
-        builder.add_string_triple(StringTriple::new_value("a", "b", "c"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "a", "a"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "b", "c"));
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(base_name).await.unwrap();
         let child1_name = builder.name();
-        builder.add_string_triple(StringTriple::new_value("a", "a", "b"));
-        builder.add_string_triple(StringTriple::new_value("a", "b", "a"));
-        builder.add_string_triple(StringTriple::new_value("b", "a", "a"));
-        builder.add_string_triple(StringTriple::new_value("d", "d", "d"));
-        builder.remove_string_triple(StringTriple::new_value("a", "a", "a"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "a", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "b", "a"));
+        builder.add_value_triple(ValueTriple::new_string_value("b", "a", "a"));
+        builder.add_value_triple(ValueTriple::new_string_value("d", "d", "d"));
+        builder.remove_value_triple(ValueTriple::new_string_value("a", "a", "a"));
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(child1_name).await.unwrap();
-        builder.add_string_triple(StringTriple::new_value("a", "b", "b"));
-        builder.add_string_triple(StringTriple::new_value("b", "a", "b"));
-        builder.add_string_triple(StringTriple::new_value("e", "e", "e"));
-        builder.remove_string_triple(StringTriple::new_value("a", "a", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "b", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("b", "a", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("e", "e", "e"));
+        builder.remove_value_triple(ValueTriple::new_string_value("a", "a", "b"));
         let child2_name = builder.name();
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(child2_name).await.unwrap();
-        builder.add_string_triple(StringTriple::new_value("a", "a", "b"));
-        builder.add_string_triple(StringTriple::new_value("b", "b", "a"));
-        builder.add_string_triple(StringTriple::new_value("f", "f", "f"));
+        builder.add_value_triple(ValueTriple::new_string_value("a", "a", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("b", "b", "a"));
+        builder.add_value_triple(ValueTriple::new_string_value("f", "f", "f"));
         let child3_name = builder.name();
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(child3_name).await.unwrap();
-        builder.add_string_triple(StringTriple::new_value("b", "b", "c"));
-        builder.add_string_triple(StringTriple::new_value("g", "g", "g"));
+        builder.add_value_triple(ValueTriple::new_string_value("b", "b", "c"));
+        builder.add_value_triple(ValueTriple::new_string_value("g", "g", "g"));
         let child4_name = builder.name();
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(child4_name).await.unwrap();
-        builder.add_string_triple(StringTriple::new_value("c", "a", "b"));
-        builder.add_string_triple(StringTriple::new_value("h", "h", "h"));
+        builder.add_value_triple(ValueTriple::new_string_value("c", "a", "b"));
+        builder.add_value_triple(ValueTriple::new_string_value("h", "h", "h"));
         let child5_name = builder.name();
         builder.commit_boxed().await.unwrap();
 
diff --git a/src/storage/directory.rs b/src/storage/directory.rs
index a5f4f27e..2466d691 100644
--- a/src/storage/directory.rs
+++ b/src/storage/directory.rs
@@ -441,17 +441,17 @@ mod tests {
             let mut builder = store.create_base_layer().await?;
             let base_name = builder.name();
 
-            builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-            builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-            builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+            builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+            builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+            builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
             builder.commit_boxed().await?;
 
             let mut builder = store.create_child_layer(base_name).await?;
             let child_name = builder.name();
 
-            builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-            builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+            builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+            builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
             builder.commit_boxed().await?;
 
@@ -461,10 +461,10 @@ mod tests {
         .unwrap()
         .unwrap();
 
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(layer.string_triple_exists(&StringTriple::new_node("cow", "likes", "pig")));
-        assert!(!layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_node("cow", "likes", "pig")));
+        assert!(!layer.value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
@@ -551,17 +551,17 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
         builder.commit_boxed().await.unwrap();
 
@@ -575,12 +575,15 @@ mod tests {
             _ => panic!("not a rollup"),
         }
 
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_node("cow", "likes", "pig")));
         assert!(
-            !rolled_layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack"))
+            rolled_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
         );
+        assert!(
+            rolled_layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink"))
+        );
+        assert!(rolled_layer.value_triple_exists(&ValueTriple::new_node("cow", "likes", "pig")));
+        assert!(!rolled_layer
+            .value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
@@ -591,25 +594,25 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store.create_child_layer(child_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("cow", "likes", "pig"));
-        builder.add_string_triple(StringTriple::new_node("cow", "hates", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("cow", "likes", "pig"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "hates", "pig"));
 
         builder.commit_boxed().await.unwrap();
 
@@ -627,13 +630,17 @@ mod tests {
             _ => panic!("not a rollup"),
         }
 
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(rolled_layer.string_triple_exists(&StringTriple::new_node("cow", "hates", "pig")));
-        assert!(!rolled_layer.string_triple_exists(&StringTriple::new_value("cow", "likes", "pig")));
         assert!(
-            !rolled_layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack"))
+            rolled_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
+        assert!(
+            rolled_layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink"))
         );
+        assert!(rolled_layer.value_triple_exists(&ValueTriple::new_node("cow", "hates", "pig")));
+        assert!(!rolled_layer
+            .value_triple_exists(&ValueTriple::new_string_value("cow", "likes", "pig")));
+        assert!(!rolled_layer
+            .value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
diff --git a/src/storage/layer.rs b/src/storage/layer.rs
index 323e6732..53337ea8 100644
--- a/src/storage/layer.rs
+++ b/src/storage/layer.rs
@@ -2228,7 +2228,7 @@ pub(crate) async fn file_triple_layer_count<F: FileLoad + FileStore>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::layer::{Layer, ObjectType, StringTriple};
+    use crate::layer::{Layer, ObjectType, ValueTriple};
     use crate::storage::directory::DirectoryLayerStore;
     use crate::storage::memory::MemoryLayerStore;
     use std::collections::HashMap;
@@ -2238,33 +2238,33 @@ mod tests {
     // They test functionality that should really work for both
 
     lazy_static! {
-        static ref BASE_TRIPLES: Vec<StringTriple> = vec![
-            StringTriple::new_value("cow", "says", "moo"),
-            StringTriple::new_value("cow", "says", "mooo"),
-            StringTriple::new_node("cow", "likes", "duck"),
-            StringTriple::new_node("cow", "likes", "pig"),
-            StringTriple::new_value("cow", "name", "clarabelle"),
-            StringTriple::new_value("pig", "says", "oink"),
-            StringTriple::new_node("pig", "hates", "cow"),
-            StringTriple::new_value("duck", "says", "quack"),
-            StringTriple::new_node("duck", "hates", "cow"),
-            StringTriple::new_node("duck", "hates", "pig"),
-            StringTriple::new_value("duck", "name", "donald"),
+        static ref BASE_TRIPLES: Vec<ValueTriple> = vec![
+            ValueTriple::new_string_value("cow", "says", "moo"),
+            ValueTriple::new_string_value("cow", "says", "mooo"),
+            ValueTriple::new_node("cow", "likes", "duck"),
+            ValueTriple::new_node("cow", "likes", "pig"),
+            ValueTriple::new_string_value("cow", "name", "clarabelle"),
+            ValueTriple::new_string_value("pig", "says", "oink"),
+            ValueTriple::new_node("pig", "hates", "cow"),
+            ValueTriple::new_string_value("duck", "says", "quack"),
+            ValueTriple::new_node("duck", "hates", "cow"),
+            ValueTriple::new_node("duck", "hates", "pig"),
+            ValueTriple::new_string_value("duck", "name", "donald"),
         ];
-        static ref CHILD_ADDITION_TRIPLES: Vec<StringTriple> = vec![
-            StringTriple::new_value("cow", "says", "moooo"),
-            StringTriple::new_value("cow", "says", "mooooo"),
-            StringTriple::new_node("cow", "likes", "horse"),
-            StringTriple::new_node("pig", "likes", "platypus"),
-            StringTriple::new_node("duck", "hates", "platypus"),
+        static ref CHILD_ADDITION_TRIPLES: Vec<ValueTriple> = vec![
+            ValueTriple::new_string_value("cow", "says", "moooo"),
+            ValueTriple::new_string_value("cow", "says", "mooooo"),
+            ValueTriple::new_node("cow", "likes", "horse"),
+            ValueTriple::new_node("pig", "likes", "platypus"),
+            ValueTriple::new_node("duck", "hates", "platypus"),
         ];
-        static ref CHILD_REMOVAL_TRIPLES: Vec<StringTriple> = vec![
-            StringTriple::new_value("cow", "says", "mooo"),
-            StringTriple::new_value("cow", "name", "clarabelle"),
-            StringTriple::new_node("pig", "hates", "cow"),
-            StringTriple::new_node("duck", "hates", "cow"),
-            StringTriple::new_node("duck", "hates", "pig"),
-            StringTriple::new_value("duck", "name", "donald"),
+        static ref CHILD_REMOVAL_TRIPLES: Vec<ValueTriple> = vec![
+            ValueTriple::new_string_value("cow", "says", "mooo"),
+            ValueTriple::new_string_value("cow", "name", "clarabelle"),
+            ValueTriple::new_node("pig", "hates", "cow"),
+            ValueTriple::new_node("duck", "hates", "cow"),
+            ValueTriple::new_node("duck", "hates", "pig"),
+            ValueTriple::new_string_value("duck", "name", "donald"),
         ];
     }
 
@@ -2274,19 +2274,19 @@ mod tests {
     ) -> io::Result<(
         [u32; 5],
         Option<Arc<InternalLayer>>,
-        HashMap<StringTriple, IdTriple>,
+        HashMap<ValueTriple, IdTriple>,
     )> {
         let mut builder = store.create_base_layer().await?;
         let name = builder.name();
         for t in BASE_TRIPLES.iter() {
-            builder.add_string_triple(t.clone());
+            builder.add_value_triple(t.clone());
         }
         builder.commit_boxed().await?;
         let layer = store.get_layer(name).await?.unwrap();
 
         let mut contents = HashMap::with_capacity(BASE_TRIPLES.len());
         for t in BASE_TRIPLES.iter() {
-            let t_id = layer.string_triple_to_id(t).unwrap();
+            let t_id = layer.value_triple_to_id(t).unwrap();
             contents.insert(t.clone(), t_id);
         }
 
@@ -2304,30 +2304,30 @@ mod tests {
     ) -> io::Result<(
         [u32; 5],
         Option<Arc<InternalLayer>>,
-        HashMap<StringTriple, IdTriple>,
-        HashMap<StringTriple, IdTriple>,
+        HashMap<ValueTriple, IdTriple>,
+        HashMap<ValueTriple, IdTriple>,
     )> {
         let (base_name, _base_layer, _) = example_base_layer(store, false).await?;
         let mut builder = store.create_child_layer(base_name).await?;
         let name = builder.name();
         for t in CHILD_ADDITION_TRIPLES.iter() {
-            builder.add_string_triple(t.clone());
+            builder.add_value_triple(t.clone());
         }
         for t in CHILD_REMOVAL_TRIPLES.iter() {
-            builder.remove_string_triple(t.clone());
+            builder.remove_value_triple(t.clone());
         }
         builder.commit_boxed().await?;
         let layer = store.get_layer(name).await?.unwrap();
 
         let mut add_contents = HashMap::with_capacity(BASE_TRIPLES.len());
         for t in CHILD_ADDITION_TRIPLES.iter() {
-            let t_id = layer.string_triple_to_id(t).unwrap();
+            let t_id = layer.value_triple_to_id(t).unwrap();
             add_contents.insert(t.clone(), t_id);
         }
 
         let mut remove_contents = HashMap::with_capacity(BASE_TRIPLES.len());
         for t in CHILD_REMOVAL_TRIPLES.iter() {
-            let t_id = layer.string_triple_to_id(t).unwrap();
+            let t_id = layer.value_triple_to_id(t).unwrap();
             remove_contents.insert(t.clone(), t_id);
         }
 
diff --git a/src/storage/memory.rs b/src/storage/memory.rs
index f277314f..26150a5a 100644
--- a/src/storage/memory.rs
+++ b/src/storage/memory.rs
@@ -520,24 +520,24 @@ mod tests {
         let mut builder = store.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_value("cow", "says", "moo"));
-        builder.add_string_triple(StringTriple::new_value("pig", "says", "oink"));
-        builder.add_string_triple(StringTriple::new_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
+        builder.add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"));
+        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
 
         builder.commit_boxed().await.unwrap();
         builder = store.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_value("duck", "says", "quack"));
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "pig"));
+        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "pig"));
 
         builder.commit_boxed().await.unwrap();
         let layer = store.get_layer(child_name).await.unwrap().unwrap();
 
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
-        assert!(layer.string_triple_exists(&StringTriple::new_node("cow", "likes", "pig")));
-        assert!(!layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_node("cow", "likes", "pig")));
+        assert!(!layer.value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
     }
 
     #[tokio::test]
diff --git a/src/storage/pack.rs b/src/storage/pack.rs
index c598a88b..44a634f1 100644
--- a/src/storage/pack.rs
+++ b/src/storage/pack.rs
@@ -335,16 +335,16 @@ mod tests {
         let mut builder = store1.create_base_layer().await.unwrap();
         let base_name = builder.name();
 
-        builder.add_string_triple(StringTriple::new_node("cow", "likes", "duck"));
-        builder.add_string_triple(StringTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("cow", "likes", "duck"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
 
         builder.commit_boxed().await.unwrap();
 
         let mut builder = store1.create_child_layer(base_name).await.unwrap();
         let child_name = builder.name();
 
-        builder.remove_string_triple(StringTriple::new_node("duck", "hates", "cow"));
-        builder.add_string_triple(StringTriple::new_node("duck", "likes", "cow"));
+        builder.remove_value_triple(ValueTriple::new_node("duck", "hates", "cow"));
+        builder.add_value_triple(ValueTriple::new_node("duck", "likes", "cow"));
 
         builder.commit_boxed().await.unwrap();
 
@@ -369,8 +369,8 @@ mod tests {
             .collect();
         assert_eq!(
             vec![
-                StringTriple::new_node("cow", "likes", "duck"),
-                StringTriple::new_node("duck", "likes", "cow")
+                ValueTriple::new_node("cow", "likes", "duck"),
+                ValueTriple::new_node("duck", "likes", "cow")
             ],
             triples
         );
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 0a587b1c..2e28ca82 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -6,10 +6,11 @@ pub mod sync;
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
 
-use crate::layer::{IdTriple, Layer, LayerBuilder, LayerCounts, ObjectType, StringTriple};
+use crate::layer::{IdTriple, Layer, LayerBuilder, LayerCounts, ObjectType, ValueTriple};
 use crate::storage::directory::{DirectoryLabelStore, DirectoryLayerStore};
 use crate::storage::memory::{MemoryLabelStore, MemoryLayerStore};
 use crate::storage::{CachedLayerStore, LabelStore, LayerStore, LockingHashMapLayerCache};
+use crate::structure::TypedDictEntry;
 
 use std::io;
 
@@ -90,8 +91,8 @@ impl StoreLayerBuilder {
     }
 
     /// Add a string triple.
-    pub fn add_string_triple(&self, triple: StringTriple) -> Result<(), io::Error> {
-        self.with_builder(move |b| b.add_string_triple(triple))
+    pub fn add_value_triple(&self, triple: ValueTriple) -> Result<(), io::Error> {
+        self.with_builder(move |b| b.add_value_triple(triple))
     }
 
     /// Add an id triple.
@@ -100,8 +101,8 @@ impl StoreLayerBuilder {
     }
 
     /// Remove a string triple.
-    pub fn remove_string_triple(&self, triple: StringTriple) -> Result<(), io::Error> {
-        self.with_builder(move |b| b.remove_string_triple(triple))
+    pub fn remove_value_triple(&self, triple: ValueTriple) -> Result<(), io::Error> {
+        self.with_builder(move |b| b.remove_value_triple(triple))
     }
 
     /// Remove an id triple.
@@ -165,14 +166,14 @@ impl StoreLayerBuilder {
                 triple_additions.par_bridge().for_each(|t| {
                     delta
                         .id_triple_to_string(&t)
-                        .map(|st| self.add_string_triple(st));
+                        .map(|st| self.add_value_triple(st));
                 });
             },
             move || {
                 triple_removals.par_bridge().for_each(|t| {
                     delta
                         .id_triple_to_string(&t)
-                        .map(|st| self.remove_string_triple(st));
+                        .map(|st| self.remove_value_triple(st));
                 })
             },
         );
@@ -189,8 +190,8 @@ impl StoreLayerBuilder {
                 if let Some(this) = self.parent() {
                     this.triples().par_bridge().for_each(|t| {
                         if let Some(st) = this.id_triple_to_string(&t) {
-                            if !other.string_triple_exists(&st) {
-                                self.remove_string_triple(st).unwrap()
+                            if !other.value_triple_exists(&st) {
+                                self.remove_value_triple(st).unwrap()
                             }
                         }
                     })
@@ -200,11 +201,11 @@ impl StoreLayerBuilder {
                 other.triples().par_bridge().for_each(|t| {
                     if let Some(st) = other.id_triple_to_string(&t) {
                         if let Some(this) = self.parent() {
-                            if !this.string_triple_exists(&st) {
-                                self.add_string_triple(st).unwrap()
+                            if !this.value_triple_exists(&st) {
+                                self.add_value_triple(st).unwrap()
                             }
                         } else {
-                            self.add_string_triple(st).unwrap()
+                            self.add_value_triple(st).unwrap()
                         };
                     }
                 })
@@ -278,7 +279,7 @@ impl StoreLayer {
         let new_builder = self.store.create_base_layer().await?;
         self.triples().par_bridge().for_each(|t| {
             let st = self.id_triple_to_string(&t).unwrap();
-            new_builder.add_string_triple(st).unwrap()
+            new_builder.add_value_triple(st).unwrap()
         });
 
         new_builder.commit().await
@@ -577,7 +578,7 @@ impl Layer for StoreLayer {
         self.layer.object_node_id(object)
     }
 
-    fn object_value_id(&self, object: &str) -> Option<u64> {
+    fn object_value_id(&self, object: &TypedDictEntry) -> Option<u64> {
         self.layer.object_value_id(object)
     }
 
@@ -892,7 +893,7 @@ mod tests {
 
         let mut builder = store.create_base_layer().await.unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().await.unwrap();
@@ -900,7 +901,7 @@ mod tests {
 
         builder = layer.open_write().await.unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("pig", "says", "oink"))
+            .add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"))
             .unwrap();
 
         let layer2 = builder.commit().await.unwrap();
@@ -910,8 +911,8 @@ mod tests {
         let layer = database.head().await.unwrap().unwrap();
 
         assert_eq!(layer2_name, layer.name());
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
     }
 
     #[tokio::test]
@@ -934,7 +935,7 @@ mod tests {
         let store = open_memory_store();
         let builder = store.create_base_layer().await.unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().await.unwrap();
@@ -943,7 +944,7 @@ mod tests {
 
         let layer2 = store.get_layer_from_id(id).await.unwrap().unwrap();
 
-        assert!(layer2.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(layer2.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
     }
 
     #[tokio::test]
@@ -952,7 +953,7 @@ mod tests {
         let builder = store.create_base_layer().await.unwrap();
 
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         assert!(!builder.committed());
@@ -969,7 +970,7 @@ mod tests {
 
         let builder1 = store.create_base_layer().await.unwrap();
         builder1
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer1 = builder1.commit().await.unwrap();
@@ -978,7 +979,7 @@ mod tests {
 
         let builder2 = store.create_base_layer().await.unwrap();
         builder2
-            .add_string_triple(StringTriple::new_value("duck", "says", "quack"))
+            .add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"))
             .unwrap();
 
         let layer2 = builder2.commit().await.unwrap();
@@ -987,8 +988,12 @@ mod tests {
 
         let new_layer = database.head().await.unwrap().unwrap();
 
-        assert!(new_layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack")));
-        assert!(!new_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(
+            new_layer.value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack"))
+        );
+        assert!(
+            !new_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
     }
 
     #[tokio::test]
@@ -996,7 +1001,7 @@ mod tests {
         let store = open_memory_store();
         let builder = store.create_base_layer().await.unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().await.unwrap();
@@ -1004,15 +1009,15 @@ mod tests {
         let builder2 = layer.open_write().await.unwrap();
 
         builder2
-            .add_string_triple(StringTriple::new_value("dog", "says", "woof"))
+            .add_value_triple(ValueTriple::new_string_value("dog", "says", "woof"))
             .unwrap();
 
         let layer2 = builder2.commit().await.unwrap();
 
         let new = layer2.squash().await.unwrap();
 
-        assert!(new.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(new.string_triple_exists(&StringTriple::new_value("dog", "says", "woof")));
+        assert!(new.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(new.value_triple_exists(&ValueTriple::new_string_value("dog", "says", "woof")));
         assert!(new.parent().await.unwrap().is_none());
     }
 
@@ -1022,7 +1027,7 @@ mod tests {
         let builder = store.create_base_layer().await.unwrap();
 
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().await.unwrap();
@@ -1030,7 +1035,7 @@ mod tests {
         let builder2 = layer.open_write().await.unwrap();
 
         builder2
-            .add_string_triple(StringTriple::new_value("dog", "says", "woof"))
+            .add_value_triple(ValueTriple::new_string_value("dog", "says", "woof"))
             .unwrap();
 
         let layer2 = builder2.commit().await.unwrap();
@@ -1038,10 +1043,10 @@ mod tests {
         let delta_builder_1 = store.create_base_layer().await.unwrap();
 
         delta_builder_1
-            .add_string_triple(StringTriple::new_value("dog", "says", "woof"))
+            .add_value_triple(ValueTriple::new_string_value("dog", "says", "woof"))
             .unwrap();
         delta_builder_1
-            .add_string_triple(StringTriple::new_value("cat", "says", "meow"))
+            .add_value_triple(ValueTriple::new_string_value("cat", "says", "meow"))
             .unwrap();
 
         let delta_1 = delta_builder_1.commit().await.unwrap();
@@ -1049,10 +1054,10 @@ mod tests {
         let delta_builder_2 = delta_1.open_write().await.unwrap();
 
         delta_builder_2
-            .add_string_triple(StringTriple::new_value("crow", "says", "caw"))
+            .add_value_triple(ValueTriple::new_string_value("crow", "says", "caw"))
             .unwrap();
         delta_builder_2
-            .remove_string_triple(StringTriple::new_value("cat", "says", "meow"))
+            .remove_value_triple(ValueTriple::new_string_value("cat", "says", "meow"))
             .unwrap();
 
         let delta = delta_builder_2.commit().await.unwrap();
@@ -1063,10 +1068,17 @@ mod tests {
 
         let rebase_layer = rebase_builder.commit().await.unwrap();
 
-        assert!(rebase_layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(rebase_layer.string_triple_exists(&StringTriple::new_value("crow", "says", "caw")));
-        assert!(rebase_layer.string_triple_exists(&StringTriple::new_value("dog", "says", "woof")));
-        assert!(!rebase_layer.string_triple_exists(&StringTriple::new_value("cat", "says", "meow")));
+        assert!(
+            rebase_layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo"))
+        );
+        assert!(
+            rebase_layer.value_triple_exists(&ValueTriple::new_string_value("crow", "says", "caw"))
+        );
+        assert!(
+            rebase_layer.value_triple_exists(&ValueTriple::new_string_value("dog", "says", "woof"))
+        );
+        assert!(!rebase_layer
+            .value_triple_exists(&ValueTriple::new_string_value("cat", "says", "meow")));
     }
 
     async fn cached_layer_name_does_not_change_after_rollup(store: Store) {
diff --git a/src/store/sync.rs b/src/store/sync.rs
index ca1dc04f..b304299d 100644
--- a/src/store/sync.rs
+++ b/src/store/sync.rs
@@ -11,10 +11,11 @@ use tokio::runtime::Runtime;
 use std::io;
 use std::path::PathBuf;
 
-use crate::layer::{IdTriple, Layer, LayerCounts, ObjectType, StringTriple};
+use crate::layer::{IdTriple, Layer, LayerCounts, ObjectType, ValueTriple};
 use crate::store::{
     open_directory_store, open_memory_store, NamedGraph, Store, StoreLayer, StoreLayerBuilder,
 };
+use crate::structure::TypedDictEntry;
 
 lazy_static! {
     static ref RUNTIME: Runtime = Runtime::new().unwrap();
@@ -54,8 +55,8 @@ impl SyncStoreLayerBuilder {
     }
 
     /// Add a string triple.
-    pub fn add_string_triple(&self, triple: StringTriple) -> Result<(), io::Error> {
-        self.inner.add_string_triple(triple)
+    pub fn add_value_triple(&self, triple: ValueTriple) -> Result<(), io::Error> {
+        self.inner.add_value_triple(triple)
     }
 
     /// Add an id triple.
@@ -64,8 +65,8 @@ impl SyncStoreLayerBuilder {
     }
 
     /// Remove a string triple.
-    pub fn remove_string_triple(&self, triple: StringTriple) -> Result<(), io::Error> {
-        self.inner.remove_string_triple(triple)
+    pub fn remove_value_triple(&self, triple: ValueTriple) -> Result<(), io::Error> {
+        self.inner.remove_value_triple(triple)
     }
 
     /// Remove an id triple.
@@ -376,7 +377,7 @@ impl Layer for SyncStoreLayer {
         self.inner.object_node_id(object)
     }
 
-    fn object_value_id(&self, object: &str) -> Option<u64> {
+    fn object_value_id(&self, object: &TypedDictEntry) -> Option<u64> {
         self.inner.object_value_id(object)
     }
 
@@ -603,7 +604,7 @@ mod tests {
 
         let mut builder = store.create_base_layer().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().unwrap();
@@ -611,7 +612,7 @@ mod tests {
 
         builder = layer.open_write().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("pig", "says", "oink"))
+            .add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"))
             .unwrap();
 
         let layer2 = builder.commit().unwrap();
@@ -621,8 +622,8 @@ mod tests {
         let layer = database.head().unwrap().unwrap();
 
         assert_eq!(layer2_name, layer.name());
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
     }
 
     #[test]
@@ -636,7 +637,7 @@ mod tests {
 
         let mut builder = store.create_base_layer().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().unwrap();
@@ -644,7 +645,7 @@ mod tests {
 
         builder = layer.open_write().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("pig", "says", "oink"))
+            .add_value_triple(ValueTriple::new_string_value("pig", "says", "oink"))
             .unwrap();
 
         let layer2 = builder.commit().unwrap();
@@ -654,8 +655,8 @@ mod tests {
         let layer = database.head().unwrap().unwrap();
 
         assert_eq!(layer2_name, layer.name());
-        assert!(layer.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
-        assert!(layer.string_triple_exists(&StringTriple::new_value("pig", "says", "oink")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
+        assert!(layer.value_triple_exists(&ValueTriple::new_string_value("pig", "says", "oink")));
     }
 
     #[test]
@@ -663,7 +664,7 @@ mod tests {
         let store = open_sync_memory_store();
         let builder = store.create_base_layer().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         let layer = builder.commit().unwrap();
@@ -671,7 +672,7 @@ mod tests {
         let id = layer.name();
 
         let layer2 = store.get_layer_from_id(id).unwrap().unwrap();
-        assert!(layer2.string_triple_exists(&StringTriple::new_value("cow", "says", "moo")));
+        assert!(layer2.value_triple_exists(&ValueTriple::new_string_value("cow", "says", "moo")));
     }
 
     #[test]
@@ -679,7 +680,7 @@ mod tests {
         let store = open_sync_memory_store();
         let builder = store.create_base_layer().unwrap();
         builder
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
 
         assert!(!builder.committed());
@@ -700,19 +701,19 @@ mod tests {
 
         let builder1 = store1.create_base_layer().unwrap();
         builder1
-            .add_string_triple(StringTriple::new_value("cow", "says", "moo"))
+            .add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"))
             .unwrap();
         let layer1 = builder1.commit().unwrap();
 
         let builder2 = store1.create_base_layer().unwrap();
         builder2
-            .add_string_triple(StringTriple::new_value("duck", "says", "quack"))
+            .add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"))
             .unwrap();
         let layer2 = builder2.commit().unwrap();
 
         let builder3 = layer2.open_write().unwrap();
         builder3
-            .add_string_triple(StringTriple::new_value("horse", "says", "neigh"))
+            .add_value_triple(ValueTriple::new_string_value("horse", "says", "neigh"))
             .unwrap();
         let layer3 = builder3.commit().unwrap();
 
@@ -733,11 +734,9 @@ mod tests {
             .unwrap();
 
         let result_layer = store2.get_layer_from_id(layer3.name()).unwrap().unwrap();
-        assert!(
-            result_layer.string_triple_exists(&StringTriple::new_value("duck", "says", "quack"))
-        );
-        assert!(
-            result_layer.string_triple_exists(&StringTriple::new_value("horse", "says", "neigh"))
-        );
+        assert!(result_layer
+            .value_triple_exists(&ValueTriple::new_string_value("duck", "says", "quack")));
+        assert!(result_layer
+            .value_triple_exists(&ValueTriple::new_string_value("horse", "says", "neigh")));
     }
 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index a3fa1aed..a344177e 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -110,6 +110,10 @@ impl TypedDict {
     pub fn id<T: TdbDataType, Q: ToLexical<T>>(&self, v: &Q) -> IdLookupResult {
         let entry = T::make_entry(v);
 
+        self.id_entry(&entry)
+    }
+
+    pub fn id_entry(&self, entry: &TypedDictEntry) -> IdLookupResult {
         self.id_slice(entry.datatype, &entry.to_bytes())
     }
 
@@ -174,6 +178,7 @@ impl TypedDict {
         }
     }
 
+    // TOOD: would be nice if this worked on a buf instead of a slice
     pub fn id_slice(&self, dt: Datatype, slice: &[u8]) -> IdLookupResult {
         if let Some((dict, offset)) = self.type_segment(dt) {
             let result = dict.id(slice).offset(offset);

From f741afef329b80c27707d168004ca04dbbeaa2f6 Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Wed, 7 Dec 2022 15:06:35 +0100
Subject: [PATCH 75/99] value extraction from typed dict + test

---
 src/layer/layer.rs         | 65 ++++++++++++++++++++++++++++++++++++++
 src/structure/tfc/typed.rs |  8 +++--
 2 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/src/layer/layer.rs b/src/layer/layer.rs
index fa6fe21b..51652384 100644
--- a/src/layer/layer.rs
+++ b/src/layer/layer.rs
@@ -591,4 +591,69 @@ mod tests {
             triple_2
         );
     }
+
+    #[tokio::test]
+    async fn find_nonstring_triples() {
+        let files = base_layer_files();
+        let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
+
+        builder.add_value_triple(ValueTriple::new_value(
+            "duck",
+            "num_feet",
+            u32::make_entry(&2),
+        ));
+        builder.add_value_triple(ValueTriple::new_value(
+            "cow",
+            "num_feet",
+            u32::make_entry(&4),
+        ));
+        builder.add_value_triple(ValueTriple::new_value(
+            "disabled_cow",
+            "num_feet",
+            u32::make_entry(&3),
+        ));
+        builder.add_value_triple(ValueTriple::new_value(
+            "duck",
+            "swims",
+            String::make_entry(&"true"),
+        ));
+        builder.add_value_triple(ValueTriple::new_value(
+            "cow",
+            "swims",
+            String::make_entry(&"false"),
+        ));
+        builder.add_value_triple(ValueTriple::new_value(
+            "disabled_cow",
+            "swims",
+            String::make_entry(&"false"),
+        ));
+
+        builder.commit().await.unwrap();
+
+        let base: Arc<InternalLayer> = Arc::new(
+            BaseLayer::load_from_files([1, 2, 3, 4, 5], &files)
+                .await
+                .unwrap()
+                .into(),
+        );
+
+        let mut results: Vec<_> = base
+            .triples_p(base.predicate_id("num_feet").unwrap())
+            .map(|t| {
+                (
+                    base.id_subject(t.subject).unwrap(),
+                    base.id_object_value(t.object).unwrap().as_val::<u32, u32>(),
+                )
+            })
+            .collect();
+        results.sort();
+
+        let expected = vec![
+            ("cow".to_owned(), 4),
+            ("disabled_cow".to_owned(), 3),
+            ("duck".to_owned(), 2),
+        ];
+
+        assert_eq!(expected, results);
+    }
 }
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index a344177e..101978d4 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -9,7 +9,7 @@ use std::{borrow::Cow, marker::PhantomData};
 use super::{
     block::{IdLookupResult, SizedDictBlock, SizedDictEntry},
     dict::{SizedDict, SizedDictBufBuilder},
-    Datatype, OwnedSizedDictEntryBuf, SizedDictEntryBuf, TdbDataType, ToLexical,
+    Datatype, FromLexical, OwnedSizedDictEntryBuf, SizedDictEntryBuf, TdbDataType, ToLexical,
 };
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
@@ -33,6 +33,11 @@ impl TypedDictEntry {
     pub fn into_buf(self) -> OwnedSizedDictEntryBuf {
         self.entry.into_buf()
     }
+
+    pub fn as_val<Q: TdbDataType, T: FromLexical<Q>>(&self) -> T {
+        assert_eq!(Q::datatype(), self.datatype);
+        T::from_lexical(self.entry.as_buf())
+    }
 }
 
 #[derive(Clone, Debug)]
@@ -494,7 +499,6 @@ mod tests {
     use bytes::BytesMut;
     use rug::Integer;
 
-    use super::super::datatypes::FromLexical;
     use crate::structure::Decimal;
 
     use super::*;

From 9a310036e405faa212fbe3ffddb334101ea71c5b Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 16:14:06 +0100
Subject: [PATCH 76/99] Adding datatype accessor function

---
 src/structure/tfc/typed.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 101978d4..778c8976 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -38,6 +38,10 @@ impl TypedDictEntry {
         assert_eq!(Q::datatype(), self.datatype);
         T::from_lexical(self.entry.as_buf())
     }
+
+    pub fn datatype(&self) -> Datatype {
+        self.datatype
+    }
 }
 
 #[derive(Clone, Debug)]

From 8d0b943697e33a5b8531b5871d4e9f6b8aa05bf2 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 22:46:30 +0100
Subject: [PATCH 77/99] Some more data types

---
 src/structure/tfc/datatypes.rs | 68 ++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index d12e4e7b..da76f494 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -19,6 +19,8 @@ pub enum Datatype {
     Float64,
     Decimal,
     BigInt,
+    Boolean,
+    Token,
 }
 
 impl Datatype {
@@ -32,6 +34,7 @@ impl Datatype {
 
     pub fn record_size(&self) -> Option<u8> {
         match self {
+            Datatype::Boolean => Some(4), // this is huge
             Datatype::String => None,
             Datatype::UInt32 => Some(4),
             Datatype::Int32 => Some(4),
@@ -278,8 +281,73 @@ impl FromLexical<Decimal> for Decimal {
     }
 }
 
+impl FromLexical<Decimal> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        // TODO make this better
+        Decimal::from_lexical(b).0
+    }
+}
+
 impl ToLexical<Decimal> for Decimal {
     fn to_lexical(&self) -> Bytes {
         Bytes::from(decimal_to_storage(&self.0))
     }
 }
+
+impl TdbDataType for bool {
+    fn datatype() -> Datatype {
+        Datatype::Boolean
+    }
+}
+
+impl FromLexical<bool> for bool {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let num = b.get_u8();
+        if num == 0 {
+            false
+        } else {
+            true
+        }
+    }
+}
+
+impl ToLexical<bool> for bool {
+    fn to_lexical(&self) -> Bytes {
+        if *self {
+            vec![1].into()
+        } else {
+            vec![0].into()
+        }
+    }
+}
+
+#[derive(PartialEq, Debug)]
+pub struct Token(pub String);
+
+impl TdbDataType for Token {
+    fn datatype() -> Datatype {
+        Datatype::Token
+    }
+}
+
+impl ToLexical<Token> for Token {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(self.0.as_ref().as_bytes())
+    }
+}
+
+impl FromLexical<Token> for Token {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        Token(String::from_utf8(vec).unwrap())
+    }
+}
+
+impl FromLexical<Token> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        String::from_utf8(vec).unwrap()
+    }
+}

From 81d2333c077b224dc718860c521dc8355609bb87 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Wed, 7 Dec 2022 23:14:02 +0100
Subject: [PATCH 78/99] Accidentally broke build briefly

---
 src/structure/tfc/datatypes.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index da76f494..44fbde60 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -44,6 +44,7 @@ impl Datatype {
             Datatype::Float64 => Some(8),
             Datatype::Decimal => None,
             Datatype::BigInt => None,
+            Datatype::Token => None,
         }
     }
 }
@@ -324,6 +325,12 @@ impl ToLexical<bool> for bool {
 #[derive(PartialEq, Debug)]
 pub struct Token(pub String);
 
+impl AsRef<str> for Token {
+    fn as_ref(&self) -> &str {
+        &self.0
+    }
+}
+
 impl TdbDataType for Token {
     fn datatype() -> Datatype {
         Datatype::Token
@@ -332,7 +339,7 @@ impl TdbDataType for Token {
 
 impl ToLexical<Token> for Token {
     fn to_lexical(&self) -> Bytes {
-        Bytes::copy_from_slice(self.0.as_ref().as_bytes())
+        Bytes::copy_from_slice(self.as_ref().as_bytes())
     }
 }
 

From d27ad4ed0eeaa96efe6424b6cab1dfa4109d4188 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 8 Dec 2022 08:45:22 +0100
Subject: [PATCH 79/99] Remove length test

---
 src/structure/tfc/typed.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 778c8976..d334d64a 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -64,7 +64,7 @@ impl TypedDict {
         let types_present = MonotonicLogArray::parse(types_present).unwrap();
         let type_offsets = MonotonicLogArray::parse(type_offsets).unwrap();
         let block_offsets = MonotonicLogArray::parse(block_offsets).unwrap();
-        if types_present.len() == 0 {
+        if types_present.is_empty() {
             return Self {
                 types_present,
                 type_offsets,

From c860a0a658391d5d690d5c88dc793cc01fe0010f Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Thu, 8 Dec 2022 12:25:10 +0100
Subject: [PATCH 80/99] add langstring support

---
 src/structure/tfc/datatypes.rs | 72 ++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 30 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 44fbde60..55f7c2c2 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -21,6 +21,7 @@ pub enum Datatype {
     BigInt,
     Boolean,
     Token,
+    LangString,
 }
 
 impl Datatype {
@@ -45,6 +46,7 @@ impl Datatype {
             Datatype::Decimal => None,
             Datatype::BigInt => None,
             Datatype::Token => None,
+            Datatype::LangString => None,
         }
     }
 }
@@ -322,39 +324,49 @@ impl ToLexical<bool> for bool {
     }
 }
 
-#[derive(PartialEq, Debug)]
-pub struct Token(pub String);
+macro_rules! stringy_type {
+    ($ty:ident) => {
+        stringy_type!($ty, $ty);
+    };
+    ($ty:ident, $datatype:ident) => {
+        #[derive(PartialEq, Debug)]
+        pub struct $ty(String);
 
-impl AsRef<str> for Token {
-    fn as_ref(&self) -> &str {
-        &self.0
-    }
-}
+        impl AsRef<str> for $ty {
+            fn as_ref(&self) -> &str {
+                &self.0
+            }
+        }
 
-impl TdbDataType for Token {
-    fn datatype() -> Datatype {
-        Datatype::Token
-    }
-}
+        impl TdbDataType for $ty {
+            fn datatype() -> Datatype {
+                Datatype::$datatype
+            }
+        }
 
-impl ToLexical<Token> for Token {
-    fn to_lexical(&self) -> Bytes {
-        Bytes::copy_from_slice(self.as_ref().as_bytes())
-    }
-}
+        impl<T: AsRef<str>> ToLexical<$ty> for T {
+            fn to_lexical(&self) -> Bytes {
+                Bytes::copy_from_slice(self.as_ref().as_bytes())
+            }
+        }
 
-impl FromLexical<Token> for Token {
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        let mut vec = vec![0; b.remaining()];
-        b.copy_to_slice(&mut vec);
-        Token(String::from_utf8(vec).unwrap())
-    }
-}
+        impl FromLexical<$ty> for $ty {
+            fn from_lexical<B: Buf>(mut b: B) -> Self {
+                let mut vec = vec![0; b.remaining()];
+                b.copy_to_slice(&mut vec);
+                $ty(String::from_utf8(vec).unwrap())
+            }
+        }
 
-impl FromLexical<Token> for String {
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        let mut vec = vec![0; b.remaining()];
-        b.copy_to_slice(&mut vec);
-        String::from_utf8(vec).unwrap()
-    }
+        impl FromLexical<$ty> for String {
+            fn from_lexical<B: Buf>(mut b: B) -> Self {
+                let mut vec = vec![0; b.remaining()];
+                b.copy_to_slice(&mut vec);
+                String::from_utf8(vec).unwrap()
+            }
+        }
+    };
 }
+
+stringy_type!(Token);
+stringy_type!(LangString);

From d34778661af614a6f2a246085f7aa63f7155040d Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 8 Dec 2022 13:54:06 +0100
Subject: [PATCH 81/99] Extending types

---
 src/structure/tfc/datatypes.rs | 72 +++++++++++++++++++++++++++++++++-
 src/structure/tfc/typed.rs     | 39 +++++++++++-------
 2 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 55f7c2c2..0f87af13 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -20,8 +20,16 @@ pub enum Datatype {
     Decimal,
     BigInt,
     Boolean,
-    Token,
     LangString,
+    DateTime,
+    Date,
+    AnyURI,
+    Language,
+    NormalizedString,
+    Token,
+    NMToken,
+    Name,
+    NCName,
 }
 
 impl Datatype {
@@ -47,6 +55,7 @@ impl Datatype {
             Datatype::BigInt => None,
             Datatype::Token => None,
             Datatype::LangString => None,
+            _ => None,
         }
     }
 }
@@ -291,6 +300,16 @@ impl FromLexical<Decimal> for String {
     }
 }
 
+/*
+impl FromLexical<Decimal> for f64 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let s = Decimal::from_lexical(b).0;
+        s.parse::<f64>()
+            .expect("Too much precision for cast from decimal to f64")
+    }
+}
+*/
+
 impl ToLexical<Decimal> for Decimal {
     fn to_lexical(&self) -> Bytes {
         Bytes::from(decimal_to_storage(&self.0))
@@ -368,5 +387,54 @@ macro_rules! stringy_type {
     };
 }
 
-stringy_type!(Token);
+/*
+macro_rules! biginty_type {
+    ($ty:ident) => {
+        biginty_type!($ty, $ty);
+    };
+    ($ty:ident, $datatype:ident) => {
+        #[derive(PartialEq, Debug)]
+        pub struct $ty(Integer);
+
+        impl TdbDataType for $ty {
+            fn datatype() -> Datatype {
+                Datatype::$datatype
+            }
+        }
+
+        impl FromLexical<$ty> for $ty {
+            fn from_lexical<B: Buf>(mut b: B) -> Self {
+                $ty(storage_to_bigint(&mut b).to_string())
+            }
+        }
+
+        impl FromLexical<$ty> for String {
+            fn from_lexical<B: Buf>(mut b: B) -> Self {
+                $ty(storage_to_bigint(&mut b).to_string())
+            }
+        }
+
+        impl ToLexical<$ty> for $ty {
+            fn to_lexical(&self) -> Bytes {
+                Bytes::from(bigint_to_storage(self.0.clone()))
+            }
+        }
+    };
+}
+*/
+
 stringy_type!(LangString);
+stringy_type!(NCName);
+stringy_type!(Name);
+stringy_type!(Token);
+stringy_type!(NMToken);
+stringy_type!(NormalizedString);
+stringy_type!(Language);
+stringy_type!(AnyURI);
+
+/*
+biginty_type!(PositiveInteger);
+biginty_type!(NonNegativeInteger);
+biginty_type!(NegativeInteger);
+biginty_type!(NonPositiveInteger);
+*/
diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index d334d64a..5a5a27fd 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -77,22 +77,20 @@ impl TypedDict {
         let mut tally: u64 = 0;
         let mut type_id_offsets = Vec::with_capacity(types_present.len() - 1);
         for type_offset in type_offsets.iter() {
-            let last_block_len;
-            if type_offset == 0 {
-                last_block_len = parse_block_control_records(data[0]);
+            let last_block_len = if type_offset == 0 {
+                parse_block_control_records(data[0])
             } else {
                 let last_block_offset_of_previous_type =
                     block_offsets.entry(type_offset as usize - 1);
-                last_block_len =
-                    parse_block_control_records(data[last_block_offset_of_previous_type as usize]);
-            }
+                parse_block_control_records(data[last_block_offset_of_previous_type as usize])
+            };
 
             let gap = BLOCK_SIZE as u8 - last_block_len;
             tally += gap as u64;
             type_id_offsets.push((type_offset + 1) * 8 - tally);
         }
 
-        let last_gap = if block_offsets.len() == 0 {
+        let last_gap = if block_offsets.is_empty() {
             1
         } else {
             BLOCK_SIZE
@@ -100,7 +98,7 @@ impl TypedDict {
                     data[block_offsets.entry(block_offsets.len() - 1) as usize],
                 ) as usize
         };
-        let num_entries = if block_offsets.len() == 0 {
+        let num_entries = if block_offsets.is_empty() {
             parse_block_control_records(data[0]) as usize
         } else {
             (block_offsets.len() + 1) * BLOCK_SIZE - tally as usize - last_gap
@@ -442,7 +440,7 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,
     }
 
     pub fn add(&mut self, value: TypedDictEntry) -> u64 {
-        if self.current_datatype == None {
+        if self.current_datatype.is_none() {
             self.current_datatype = Some(value.datatype);
             self.types_present_builder.push(value.datatype as u64);
             self.sized_dict_buf_builder
@@ -576,14 +574,21 @@ mod tests {
         let mut offsets = BytesMut::new();
         let mut data = BytesMut::new();
 
-        build_segment_and_offsets(
+        build_segment_and_offsets::<
+            &mut bytes::BytesMut,
+            &mut bytes::BytesMut,
+            String,
+            String,
+            std::vec::IntoIter<String>,
+        >(
             Datatype::String,
             &mut offsets,
             &mut data,
             strings.clone().into_iter(),
         );
 
-        let segment = TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
+        let segment: TypedDictSegment<String> =
+            TypedDictSegment::parse(offsets.freeze(), data.freeze(), 0);
 
         for (ix, s) in strings.into_iter().enumerate() {
             assert_eq!(IdLookupResult::Found((ix + 1) as u64), segment.id(&s));
@@ -746,7 +751,7 @@ mod tests {
 
         assert_eq!(13, dict.num_entries());
 
-        let id = dict.id(&"Batty".to_string());
+        let id = dict.id::<String, String>(&"Batty".to_string());
         assert_eq!(IdLookupResult::Found(2), id);
         assert_eq!(IdLookupResult::Found(6), dict.id(&20_u32));
         assert_eq!(IdLookupResult::Found(7), dict.id(&(-500_i32)));
@@ -829,8 +834,14 @@ mod tests {
         assert_eq!(26_u32, dict.get::<u32>(14).unwrap());
         assert_eq!(Decimal("234.8973".to_string()), dict.get(29).unwrap());
 
-        assert_eq!(IdLookupResult::NotFound, dict.id(&"AAAA".to_string()));
-        assert_eq!(IdLookupResult::Closest(2), dict.id(&"Baz".to_string()));
+        assert_eq!(
+            IdLookupResult::NotFound,
+            dict.id::<String, String>(&"AAAA".to_string())
+        );
+        assert_eq!(
+            IdLookupResult::Closest(2),
+            dict.id::<String, String>(&"Baz".to_string())
+        );
 
         assert_eq!(IdLookupResult::Found(17), dict.id(&3000_u32));
 

From 5e6bb66e1c29db7919cbc14afce193709bce08ec Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Thu, 8 Dec 2022 16:19:45 +0100
Subject: [PATCH 82/99] Adding date times.

---
 Cargo.toml                     |  1 +
 src/structure/tfc/datatypes.rs | 21 ++++++++++++++
 src/structure/tfc/datetime.rs  | 53 ++++++++++++++++++++++++++++++++++
 src/structure/tfc/decimal.rs   | 36 ++++++++++++++---------
 src/structure/tfc/mod.rs       |  1 +
 5 files changed, 98 insertions(+), 14 deletions(-)
 create mode 100644 src/structure/tfc/datetime.rs

diff --git a/Cargo.toml b/Cargo.toml
index 51034a67..86f79137 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ itertools = "0.10"
 rug = {version="1.16", default-features=false, features=["integer","rational"]}
 num-derive = "0.3"
 num-traits = "0.2"
+chrono = "0.4"
 
 [dev-dependencies]
 tempfile = "3.1"
diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 0f87af13..4d1a6137 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -5,6 +5,7 @@ use super::{
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
+use chrono::{DateTime, Utc};
 use num_derive::FromPrimitive;
 use rug::Integer;
 
@@ -438,3 +439,23 @@ biginty_type!(NonNegativeInteger);
 biginty_type!(NegativeInteger);
 biginty_type!(NonPositiveInteger);
 */
+
+/*
+impl TdbDataType for DateTime<Utc> {
+    fn datatype() -> Datatype {
+        Datatype::DateTime
+    }
+}
+
+impl ToLexical<DateTime<Utc>> for DateTime<Utc> {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(datetime_to_storage(&self))
+    }
+}
+
+impl FromLexical<DateTime<Utc>> for DateTime<Utc> {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        todo!()
+    }
+}
+*/
diff --git a/src/structure/tfc/datetime.rs b/src/structure/tfc/datetime.rs
new file mode 100644
index 00000000..1ca6459c
--- /dev/null
+++ b/src/structure/tfc/datetime.rs
@@ -0,0 +1,53 @@
+use chrono::{DateTime, Utc};
+use rug::Integer;
+
+use super::decimal::integer_and_fraction_to_storage;
+
+fn datetime_to_parts(datetime: &DateTime<Utc>) -> (bool, Integer, u32) {
+    let mut seconds = Integer::from(datetime.timestamp());
+    let is_neg = seconds < 0;
+    let mut nanos = datetime.timestamp_subsec_nanos();
+    if is_neg {
+        if nanos != 0 {
+            seconds += 1;
+        }
+        nanos = 1_000_000_000 - nanos;
+    }
+    (is_neg, seconds, nanos)
+}
+
+fn datetime_to_storage(datetime: &DateTime<Utc>) -> Vec<u8> {
+    let (is_neg, seconds, nanos) = datetime_to_parts(datetime);
+    let fraction = if nanos == 0 {
+        None
+    } else if nanos % 1_000_000 == 0 {
+        Some(format!("{:02}", nanos / 1_000_000))
+    } else if nanos % 1_000 == 0 {
+        Some(format!("{:05}", nanos / 1_000))
+    } else {
+        Some(format!("{:08}", nanos))
+    };
+    integer_and_fraction_to_storage(is_neg, seconds, fraction.as_ref().map(|b| b.as_ref()))
+}
+
+/*
+fn storage_to_datetime<B: Buf>(bytes: &mut B) -> DateTime<Utc> {
+    let (int, is_pos) = storage_to_bigint_and_sign(bytes);
+    let fraction = u32::parse(decode_fraction(bytes, is_pos));
+    Utc.timestamp(int) + .opt_with_nanoseconds(fraction)
+}
+*/
+
+#[cfg(test)]
+mod tests {
+    use chrono::TimeZone;
+
+    use super::*;
+
+    #[test]
+    fn a_few_nanos_before_epoch() {
+        let dt = Utc.timestamp_opt(-1, 234).unwrap();
+        let result = dbg!(datetime_to_parts(&dt));
+        assert_eq!((true, Integer::from(0), 999999766_u32), result)
+    }
+}
diff --git a/src/structure/tfc/decimal.rs b/src/structure/tfc/decimal.rs
index b7acaf6c..a973577b 100644
--- a/src/structure/tfc/decimal.rs
+++ b/src/structure/tfc/decimal.rs
@@ -85,8 +85,28 @@ pub fn decimal_to_storage(decimal: &str) -> Vec<u8> {
     let fraction = parts.next();
     let integer_part = bigint.parse::<Integer>().unwrap();
     let is_neg = decimal.starts_with('-');
-    let prefix = bigint_to_storage(integer_part.clone());
-    let mut prefix = if integer_part == 0 && is_neg {
+    integer_and_fraction_to_storage(is_neg, integer_part, fraction)
+}
+
+pub fn storage_to_decimal<B: Buf>(bytes: &mut B) -> String {
+    let (int, is_pos) = storage_to_bigint_and_sign(bytes);
+    let fraction = decode_fraction(bytes, is_pos);
+    let decimal = if fraction.is_empty() {
+        format!("{int:}")
+    } else {
+        let sign = if int == 0 && !is_pos { "-" } else { "" };
+        format!("{sign:}{int:}.{fraction:}")
+    };
+    decimal
+}
+
+pub fn integer_and_fraction_to_storage(
+    is_neg: bool,
+    integer: Integer,
+    fraction: Option<&str>,
+) -> Vec<u8> {
+    let prefix = bigint_to_storage(integer.clone());
+    let mut prefix = if integer == 0 && is_neg {
         vec![NEGATIVE_ZERO] // negative zero
     } else {
         prefix
@@ -103,15 +123,3 @@ pub fn decimal_to_storage(decimal: &str) -> Vec<u8> {
     prefix.extend(suffix);
     prefix
 }
-
-pub fn storage_to_decimal<B: Buf>(bytes: &mut B) -> String {
-    let (int, is_pos) = storage_to_bigint_and_sign(bytes);
-    let fraction = decode_fraction(bytes, is_pos);
-    let decimal = if fraction.is_empty() {
-        format!("{int:}")
-    } else {
-        let sign = if int == 0 && !is_pos { "-" } else { "" };
-        format!("{sign:}{int:}.{fraction:}")
-    };
-    decimal
-}
diff --git a/src/structure/tfc/mod.rs b/src/structure/tfc/mod.rs
index 2c2f120c..c3e07550 100644
--- a/src/structure/tfc/mod.rs
+++ b/src/structure/tfc/mod.rs
@@ -1,5 +1,6 @@
 pub mod block;
 pub mod datatypes;
+pub mod datetime;
 pub mod decimal;
 pub mod dict;
 pub mod file;

From d4cef311c93629e439c82ddcfb31074b3c47e0fa Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 10:21:17 +0100
Subject: [PATCH 83/99] DateTimes (with very little testing)

---
 src/structure/tfc/datatypes.rs | 15 +++++-----
 src/structure/tfc/datetime.rs  | 52 +++++++++++++++++++++++++++-------
 src/structure/tfc/decimal.rs   |  2 +-
 3 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 4d1a6137..3935a86c 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -1,11 +1,12 @@
 use super::{
+    datetime::{datetime_to_storage, storage_to_datetime},
     decimal::{decimal_to_storage, storage_to_decimal},
     integer::{bigint_to_storage, storage_to_bigint},
     TypedDictEntry,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use chrono::{DateTime, Utc};
+use chrono::NaiveDateTime;
 use num_derive::FromPrimitive;
 use rug::Integer;
 
@@ -440,22 +441,20 @@ biginty_type!(NegativeInteger);
 biginty_type!(NonPositiveInteger);
 */
 
-/*
-impl TdbDataType for DateTime<Utc> {
+impl TdbDataType for NaiveDateTime {
     fn datatype() -> Datatype {
         Datatype::DateTime
     }
 }
 
-impl ToLexical<DateTime<Utc>> for DateTime<Utc> {
+impl ToLexical<NaiveDateTime> for NaiveDateTime {
     fn to_lexical(&self) -> Bytes {
-        Bytes::from(datetime_to_storage(&self))
+        Bytes::from(datetime_to_storage(self))
     }
 }
 
-impl FromLexical<DateTime<Utc>> for DateTime<Utc> {
+impl FromLexical<NaiveDateTime> for NaiveDateTime {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
-        todo!()
+        storage_to_datetime(&mut b)
     }
 }
-*/
diff --git a/src/structure/tfc/datetime.rs b/src/structure/tfc/datetime.rs
index 1ca6459c..99dd1463 100644
--- a/src/structure/tfc/datetime.rs
+++ b/src/structure/tfc/datetime.rs
@@ -1,9 +1,13 @@
-use chrono::{DateTime, Utc};
+use bytes::Buf;
+use chrono::NaiveDateTime;
 use rug::Integer;
 
-use super::decimal::integer_and_fraction_to_storage;
+use super::{
+    decimal::{decode_fraction, integer_and_fraction_to_storage},
+    integer::storage_to_bigint_and_sign,
+};
 
-fn datetime_to_parts(datetime: &DateTime<Utc>) -> (bool, Integer, u32) {
+pub fn datetime_to_parts(datetime: &NaiveDateTime) -> (bool, Integer, u32) {
     let mut seconds = Integer::from(datetime.timestamp());
     let is_neg = seconds < 0;
     let mut nanos = datetime.timestamp_subsec_nanos();
@@ -16,7 +20,7 @@ fn datetime_to_parts(datetime: &DateTime<Utc>) -> (bool, Integer, u32) {
     (is_neg, seconds, nanos)
 }
 
-fn datetime_to_storage(datetime: &DateTime<Utc>) -> Vec<u8> {
+pub fn datetime_to_storage(datetime: &NaiveDateTime) -> Vec<u8> {
     let (is_neg, seconds, nanos) = datetime_to_parts(datetime);
     let fraction = if nanos == 0 {
         None
@@ -25,18 +29,44 @@ fn datetime_to_storage(datetime: &DateTime<Utc>) -> Vec<u8> {
     } else if nanos % 1_000 == 0 {
         Some(format!("{:05}", nanos / 1_000))
     } else {
-        Some(format!("{:08}", nanos))
+        Some(format!("{nanos:08}"))
     };
     integer_and_fraction_to_storage(is_neg, seconds, fraction.as_ref().map(|b| b.as_ref()))
 }
 
-/*
-fn storage_to_datetime<B: Buf>(bytes: &mut B) -> DateTime<Utc> {
+fn count_leading_zeros(string: &str) -> usize {
+    string
+        .chars()
+        .take_while(|ch| *ch == '0')
+        .map(|ch| ch.len_utf8())
+        .sum()
+}
+
+pub fn storage_to_datetime<B: Buf>(bytes: &mut B) -> NaiveDateTime {
     let (int, is_pos) = storage_to_bigint_and_sign(bytes);
-    let fraction = u32::parse(decode_fraction(bytes, is_pos));
-    Utc.timestamp(int) + .opt_with_nanoseconds(fraction)
+    let fraction = decode_fraction(bytes, is_pos);
+    let seconds = int
+        .to_i64()
+        .expect("This is a surprisingly large number of seconds!");
+    if fraction.is_empty() {
+        if is_pos {
+            NaiveDateTime::from_timestamp_opt(seconds, 0).unwrap()
+        } else {
+            NaiveDateTime::from_timestamp_opt(-seconds, 0).unwrap()
+        }
+    } else {
+        let leading_zeros = count_leading_zeros(&fraction);
+        let nanos = fraction
+            .parse::<u32>()
+            .expect("Nano seconds should actually fit in u32")
+            * u32::pow(10, leading_zeros as u32);
+        if is_pos {
+            NaiveDateTime::from_timestamp_opt(seconds, nanos).unwrap()
+        } else {
+            NaiveDateTime::from_timestamp_opt(seconds - 1, 1_000_000_000 - nanos).unwrap()
+        }
+    }
 }
-*/
 
 #[cfg(test)]
 mod tests {
@@ -46,7 +76,7 @@ mod tests {
 
     #[test]
     fn a_few_nanos_before_epoch() {
-        let dt = Utc.timestamp_opt(-1, 234).unwrap();
+        let dt = NaiveDateTime::from_timestamp_opt(-1, 234).unwrap();
         let result = dbg!(datetime_to_parts(&dt));
         assert_eq!((true, Integer::from(0), 999999766_u32), result)
     }
diff --git a/src/structure/tfc/decimal.rs b/src/structure/tfc/decimal.rs
index a973577b..9b26074e 100644
--- a/src/structure/tfc/decimal.rs
+++ b/src/structure/tfc/decimal.rs
@@ -54,7 +54,7 @@ fn centary_decimal_decode(i: u8) -> String {
     }
 }
 
-fn decode_fraction<B: Buf>(fraction_buf: &mut B, is_pos: bool) -> String {
+pub fn decode_fraction<B: Buf>(fraction_buf: &mut B, is_pos: bool) -> String {
     let mut first_byte = fraction_buf.chunk()[0];
     if !is_pos {
         first_byte = !first_byte;

From 56d33101987267b8afa19b3972c49d6ebfccc9b9 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 10:21:31 +0100
Subject: [PATCH 84/99] Remove warning

---
 src/structure/tfc/datetime.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/structure/tfc/datetime.rs b/src/structure/tfc/datetime.rs
index 99dd1463..c6374d99 100644
--- a/src/structure/tfc/datetime.rs
+++ b/src/structure/tfc/datetime.rs
@@ -70,8 +70,6 @@ pub fn storage_to_datetime<B: Buf>(bytes: &mut B) -> NaiveDateTime {
 
 #[cfg(test)]
 mod tests {
-    use chrono::TimeZone;
-
     use super::*;
 
     #[test]

From 5b7ebed18a40268f4b424284147780e9b9ffd4df Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 15:01:07 +0100
Subject: [PATCH 85/99] More types

---
 src/structure/tfc/datatypes.rs | 291 ++++++++++++++++++++++++++++-----
 1 file changed, 249 insertions(+), 42 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 3935a86c..cacf65d2 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -6,7 +6,7 @@ use super::{
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use chrono::NaiveDateTime;
+use chrono::{NaiveDateTime, NaiveTime};
 use num_derive::FromPrimitive;
 use rug::Integer;
 
@@ -23,8 +23,6 @@ pub enum Datatype {
     BigInt,
     Boolean,
     LangString,
-    DateTime,
-    Date,
     AnyURI,
     Language,
     NormalizedString,
@@ -32,6 +30,33 @@ pub enum Datatype {
     NMToken,
     Name,
     NCName,
+    Notation,
+    QName,
+    ID,
+    IDRef,
+    Entity,
+    PositiveInteger,
+    NonNegativeInteger,
+    NonPositiveInteger,
+    NegativeInteger,
+    Date,
+    DateTime,
+    DateTimeStamp,
+    Time,
+    GYear,
+    GMonth,
+    GDay,
+    GYearMonth,
+    GMonthDay,
+    Duration,
+    YearMonthDuration,
+    DayTimeDuration,
+    UInt8,
+    Int8,
+    UInt16,
+    Int16,
+    Base64Binary,
+    HexBinary,
 }
 
 impl Datatype {
@@ -101,6 +126,48 @@ impl TdbDataType for String {
     }
 }
 
+impl TdbDataType for u8 {
+    fn datatype() -> Datatype {
+        Datatype::UInt32
+    }
+}
+
+impl FromLexical<u8> for u8 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u8().unwrap()
+    }
+}
+
+impl ToLexical<u8> for u8 {
+    fn to_lexical(&self) -> Bytes {
+        let mut buf = BytesMut::new().writer();
+        buf.write_u8(*self).unwrap();
+
+        buf.into_inner().freeze()
+    }
+}
+
+impl TdbDataType for u16 {
+    fn datatype() -> Datatype {
+        Datatype::UInt16
+    }
+}
+
+impl FromLexical<u16> for u16 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        b.reader().read_u16::<BigEndian>().unwrap()
+    }
+}
+
+impl ToLexical<u16> for u16 {
+    fn to_lexical(&self) -> Bytes {
+        let mut buf = BytesMut::new().writer();
+        buf.write_u16::<BigEndian>(*self).unwrap();
+
+        buf.into_inner().freeze()
+    }
+}
+
 impl TdbDataType for u32 {
     fn datatype() -> Datatype {
         Datatype::UInt32
@@ -122,6 +189,52 @@ impl ToLexical<u32> for u32 {
     }
 }
 
+const I8_BYTE_MASK: u8 = 0b1000_0000;
+impl TdbDataType for i8 {
+    fn datatype() -> Datatype {
+        Datatype::Int8
+    }
+}
+
+impl FromLexical<i8> for i8 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u8().unwrap();
+        (I8_BYTE_MASK ^ i) as i8
+    }
+}
+
+impl ToLexical<i8> for i8 {
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I8_BYTE_MASK ^ (*self as u8);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u8(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
+const I16_BYTE_MASK: u16 = 0b1000_0000 << 8;
+impl TdbDataType for i16 {
+    fn datatype() -> Datatype {
+        Datatype::Int16
+    }
+}
+
+impl FromLexical<i16> for i16 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let i = b.reader().read_u16::<BigEndian>().unwrap();
+        (I16_BYTE_MASK ^ i) as i16
+    }
+}
+
+impl ToLexical<i16> for i16 {
+    fn to_lexical(&self) -> Bytes {
+        let sign_flip = I16_BYTE_MASK ^ (*self as u16);
+        let mut buf = BytesMut::new().writer();
+        buf.write_u16::<BigEndian>(sign_flip).unwrap();
+        buf.into_inner().freeze()
+    }
+}
+
 const I32_BYTE_MASK: u32 = 0b1000_0000 << (3 * 8);
 impl TdbDataType for i32 {
     fn datatype() -> Datatype {
@@ -302,16 +415,6 @@ impl FromLexical<Decimal> for String {
     }
 }
 
-/*
-impl FromLexical<Decimal> for f64 {
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        let s = Decimal::from_lexical(b).0;
-        s.parse::<f64>()
-            .expect("Too much precision for cast from decimal to f64")
-    }
-}
-*/
-
 impl ToLexical<Decimal> for Decimal {
     fn to_lexical(&self) -> Bytes {
         Bytes::from(decimal_to_storage(&self.0))
@@ -327,11 +430,7 @@ impl TdbDataType for bool {
 impl FromLexical<bool> for bool {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         let num = b.get_u8();
-        if num == 0 {
-            false
-        } else {
-            true
-        }
+        num != 0
     }
 }
 
@@ -345,6 +444,127 @@ impl ToLexical<bool> for bool {
     }
 }
 
+impl TdbDataType for NaiveDateTime {
+    fn datatype() -> Datatype {
+        Datatype::DateTime
+    }
+}
+
+impl ToLexical<NaiveDateTime> for NaiveDateTime {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(datetime_to_storage(self))
+    }
+}
+
+impl FromLexical<NaiveDateTime> for NaiveDateTime {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        storage_to_datetime(&mut b)
+    }
+}
+
+pub struct DateTimeStamp(NaiveDateTime);
+
+impl TdbDataType for DateTimeStamp {
+    fn datatype() -> Datatype {
+        Datatype::DateTimeStamp
+    }
+}
+
+impl ToLexical<DateTimeStamp> for DateTimeStamp {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::from(datetime_to_storage(&self.0))
+    }
+}
+
+impl FromLexical<DateTimeStamp> for DateTimeStamp {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        DateTimeStamp(storage_to_datetime(&mut b))
+    }
+}
+
+impl TdbDataType for NaiveTime {
+    fn datatype() -> Datatype {
+        Datatype::Time
+    }
+}
+
+impl ToLexical<NaiveTime> for NaiveTime {
+    fn to_lexical(&self) -> Bytes {
+        self.to_string().into()
+    }
+}
+
+impl FromLexical<NaiveTime> for NaiveTime {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        String::from_utf8(vec)
+            .unwrap()
+            .parse::<NaiveTime>()
+            .unwrap()
+    }
+}
+
+struct GYear(i64);
+
+impl TdbDataType for GYear {
+    fn datatype() -> Datatype {
+        Datatype::GYear
+    }
+}
+
+impl ToLexical<GYear> for GYear {
+    fn to_lexical(&self) -> Bytes {
+        self.0.to_lexical()
+    }
+}
+
+impl FromLexical<GYear> for GYear {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        GYear(i64::from_lexical(b))
+    }
+}
+
+struct GMonth(u8);
+
+impl TdbDataType for GMonth {
+    fn datatype() -> Datatype {
+        Datatype::GMonth
+    }
+}
+
+impl ToLexical<GMonth> for GMonth {
+    fn to_lexical(&self) -> Bytes {
+        self.0.to_lexical()
+    }
+}
+
+impl FromLexical<GMonth> for GMonth {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        GMonth(u8::from_lexical(b))
+    }
+}
+
+struct GDay(u8);
+
+impl TdbDataType for GDay {
+    fn datatype() -> Datatype {
+        Datatype::GDay
+    }
+}
+
+impl ToLexical<GDay> for GDay {
+    fn to_lexical(&self) -> Bytes {
+        self.0.to_lexical()
+    }
+}
+
+impl FromLexical<GDay> for GDay {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        GDay(u8::from_lexical(b))
+    }
+}
+
 macro_rules! stringy_type {
     ($ty:ident) => {
         stringy_type!($ty, $ty);
@@ -389,7 +609,6 @@ macro_rules! stringy_type {
     };
 }
 
-/*
 macro_rules! biginty_type {
     ($ty:ident) => {
         biginty_type!($ty, $ty);
@@ -406,13 +625,13 @@ macro_rules! biginty_type {
 
         impl FromLexical<$ty> for $ty {
             fn from_lexical<B: Buf>(mut b: B) -> Self {
-                $ty(storage_to_bigint(&mut b).to_string())
+                $ty(storage_to_bigint(&mut b))
             }
         }
 
         impl FromLexical<$ty> for String {
             fn from_lexical<B: Buf>(mut b: B) -> Self {
-                $ty(storage_to_bigint(&mut b).to_string())
+                storage_to_bigint(&mut b).to_string()
             }
         }
 
@@ -423,7 +642,6 @@ macro_rules! biginty_type {
         }
     };
 }
-*/
 
 stringy_type!(LangString);
 stringy_type!(NCName);
@@ -433,28 +651,17 @@ stringy_type!(NMToken);
 stringy_type!(NormalizedString);
 stringy_type!(Language);
 stringy_type!(AnyURI);
+stringy_type!(Notation);
+stringy_type!(QName);
+stringy_type!(ID);
+stringy_type!(IDRef);
+stringy_type!(Entity);
+
+stringy_type!(Duration);
+stringy_type!(YearMonthDuration);
+stringy_type!(DayTimeDuration);
 
-/*
 biginty_type!(PositiveInteger);
 biginty_type!(NonNegativeInteger);
 biginty_type!(NegativeInteger);
 biginty_type!(NonPositiveInteger);
-*/
-
-impl TdbDataType for NaiveDateTime {
-    fn datatype() -> Datatype {
-        Datatype::DateTime
-    }
-}
-
-impl ToLexical<NaiveDateTime> for NaiveDateTime {
-    fn to_lexical(&self) -> Bytes {
-        Bytes::from(datetime_to_storage(self))
-    }
-}
-
-impl FromLexical<NaiveDateTime> for NaiveDateTime {
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        storage_to_datetime(&mut b)
-    }
-}

From e3a1424ee51b6e27fcb70c574671852d6206f8a9 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 15:55:04 +0100
Subject: [PATCH 86/99] Adding more types to store

---
 src/structure/tfc/datatypes.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index cacf65d2..0ccef4d6 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -70,7 +70,7 @@ impl Datatype {
 
     pub fn record_size(&self) -> Option<u8> {
         match self {
-            Datatype::Boolean => Some(4), // this is huge
+            Datatype::Boolean => None,
             Datatype::String => None,
             Datatype::UInt32 => Some(4),
             Datatype::Int32 => Some(4),
@@ -505,7 +505,7 @@ impl FromLexical<NaiveTime> for NaiveTime {
     }
 }
 
-struct GYear(i64);
+pub struct GYear(pub i64);
 
 impl TdbDataType for GYear {
     fn datatype() -> Datatype {
@@ -525,7 +525,7 @@ impl FromLexical<GYear> for GYear {
     }
 }
 
-struct GMonth(u8);
+pub struct GMonth(pub u8);
 
 impl TdbDataType for GMonth {
     fn datatype() -> Datatype {
@@ -615,7 +615,7 @@ macro_rules! biginty_type {
     };
     ($ty:ident, $datatype:ident) => {
         #[derive(PartialEq, Debug)]
-        pub struct $ty(Integer);
+        pub struct $ty(pub Integer);
 
         impl TdbDataType for $ty {
             fn datatype() -> Datatype {

From 59763072be3401c55e6fae51431dc1161500ad3b Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 19:51:04 +0100
Subject: [PATCH 87/99] Adding gyear, days, etc.

---
 src/structure/tfc/datatypes.rs | 164 +++++++++++++++++++++++++++++++--
 1 file changed, 155 insertions(+), 9 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 0ccef4d6..3d2a7ef4 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -505,7 +505,10 @@ impl FromLexical<NaiveTime> for NaiveTime {
     }
 }
 
-pub struct GYear(pub i64);
+pub struct GYear {
+    pub year: i64,
+    pub offset: i16,
+}
 
 impl TdbDataType for GYear {
     fn datatype() -> Datatype {
@@ -515,17 +518,47 @@ impl TdbDataType for GYear {
 
 impl ToLexical<GYear> for GYear {
     fn to_lexical(&self) -> Bytes {
-        self.0.to_lexical()
+        let year = self.year.to_lexical();
+        let offset = self.offset.to_lexical();
+        [year, offset].concat().into()
     }
 }
 
 impl FromLexical<GYear> for GYear {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let year = i64::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        GYear { year, offset }
+    }
+}
+
+fn offset_string(offset: i16) -> String {
+    if offset == 0 {
+        "".to_string()
+    } else {
+        let hours = offset / 60;
+        let minutes = offset % 60;
+        if hours < 0 {
+            format!("-{hours}:{minutes}")
+        } else {
+            format!("+{hours}:{minutes}")
+        }
+    }
+}
+
+impl FromLexical<GYear> for String {
     fn from_lexical<B: Buf>(b: B) -> Self {
-        GYear(i64::from_lexical(b))
+        let gyear = GYear::from_lexical(b);
+        let year = gyear.year;
+        let offset = offset_string(gyear.offset);
+        format!("{year:04}{offset:}")
     }
 }
 
-pub struct GMonth(pub u8);
+pub struct GMonth {
+    month: u8,
+    offset: i16,
+}
 
 impl TdbDataType for GMonth {
     fn datatype() -> Datatype {
@@ -535,17 +568,33 @@ impl TdbDataType for GMonth {
 
 impl ToLexical<GMonth> for GMonth {
     fn to_lexical(&self) -> Bytes {
-        self.0.to_lexical()
+        let month = self.month.to_lexical();
+        let offset = self.offset.to_lexical();
+        [month, offset].concat().into()
     }
 }
 
 impl FromLexical<GMonth> for GMonth {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let month = u8::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        GMonth { month, offset }
+    }
+}
+
+impl FromLexical<GMonth> for String {
     fn from_lexical<B: Buf>(b: B) -> Self {
-        GMonth(u8::from_lexical(b))
+        let gmonth = GMonth::from_lexical(b);
+        let month = gmonth.month;
+        let offset = offset_string(gmonth.offset);
+        format!("-{month:02}{offset:}")
     }
 }
 
-struct GDay(u8);
+struct GDay {
+    day: u8,
+    offset: i16,
+}
 
 impl TdbDataType for GDay {
     fn datatype() -> Datatype {
@@ -555,13 +604,110 @@ impl TdbDataType for GDay {
 
 impl ToLexical<GDay> for GDay {
     fn to_lexical(&self) -> Bytes {
-        self.0.to_lexical()
+        let day = self.day.to_lexical();
+        let offset = self.offset.to_lexical();
+        [day, offset].concat().into()
     }
 }
 
 impl FromLexical<GDay> for GDay {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let day = u8::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        GDay { day, offset }
+    }
+}
+
+impl FromLexical<GDay> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let gday = GDay::from_lexical(b);
+        let day = gday.day;
+        let offset = offset_string(gday.offset);
+        format!("--{day:02}{offset:}")
+    }
+}
+
+struct GYearMonth {
+    year: i64,
+    month: u8,
+    offset: i16,
+}
+
+impl TdbDataType for GYearMonth {
+    fn datatype() -> Datatype {
+        Datatype::GYearMonth
+    }
+}
+
+impl ToLexical<GYearMonth> for GYearMonth {
+    fn to_lexical(&self) -> Bytes {
+        let year = self.year.to_lexical();
+        let month = self.month.to_lexical();
+        let offset = self.offset.to_lexical();
+        [year, month, offset].concat().into()
+    }
+}
+
+impl FromLexical<GYearMonth> for GYearMonth {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let year = i64::from_lexical(&mut b);
+        let month = u8::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        GYearMonth {
+            year,
+            month,
+            offset,
+        }
+    }
+}
+
+impl FromLexical<GYearMonth> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let gyearmonth = GYearMonth::from_lexical(b);
+        let year = gyearmonth.year;
+        let month = gyearmonth.month;
+        let offset = offset_string(gyearmonth.offset);
+        format!("{year:04}-{month:02}{offset:}")
+    }
+}
+
+struct GMonthDay {
+    month: u8,
+    day: u8,
+    offset: i16,
+}
+
+impl TdbDataType for GMonthDay {
+    fn datatype() -> Datatype {
+        Datatype::GMonthDay
+    }
+}
+
+impl ToLexical<GMonthDay> for GMonthDay {
+    fn to_lexical(&self) -> Bytes {
+        let month = self.month.to_lexical();
+        let day = self.day.to_lexical();
+        let offset = self.offset.to_lexical();
+        [month, day, offset].concat().into()
+    }
+}
+
+impl FromLexical<GMonthDay> for GMonthDay {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let month = u8::from_lexical(&mut b);
+        let day = u8::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        GMonthDay { month, day, offset }
+    }
+}
+
+impl FromLexical<GMonthDay> for String {
     fn from_lexical<B: Buf>(b: B) -> Self {
-        GDay(u8::from_lexical(b))
+        let gmonthday = GMonthDay::from_lexical(b);
+        let month = gmonthday.month;
+        let day = gmonthday.day;
+        let offset = offset_string(gmonthday.offset);
+        format!("-{month:02}-{day:02}{offset:}")
     }
 }
 

From 5d2b3566216db76861d3e41efb961f485aff1c51 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Fri, 9 Dec 2022 20:05:52 +0100
Subject: [PATCH 88/99] Make acccessors public

---
 src/structure/tfc/datatypes.rs | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 3d2a7ef4..e51c604b 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -556,8 +556,8 @@ impl FromLexical<GYear> for String {
 }
 
 pub struct GMonth {
-    month: u8,
-    offset: i16,
+    pub month: u8,
+    pub offset: i16,
 }
 
 impl TdbDataType for GMonth {
@@ -591,9 +591,9 @@ impl FromLexical<GMonth> for String {
     }
 }
 
-struct GDay {
-    day: u8,
-    offset: i16,
+pub struct GDay {
+    pub day: u8,
+    pub offset: i16,
 }
 
 impl TdbDataType for GDay {
@@ -627,10 +627,10 @@ impl FromLexical<GDay> for String {
     }
 }
 
-struct GYearMonth {
-    year: i64,
-    month: u8,
-    offset: i16,
+pub struct GYearMonth {
+    pub year: i64,
+    pub month: u8,
+    pub offset: i16,
 }
 
 impl TdbDataType for GYearMonth {
@@ -671,10 +671,10 @@ impl FromLexical<GYearMonth> for String {
     }
 }
 
-struct GMonthDay {
-    month: u8,
-    day: u8,
-    offset: i16,
+pub struct GMonthDay {
+    pub month: u8,
+    pub day: u8,
+    pub offset: i16,
 }
 
 impl TdbDataType for GMonthDay {

From 9dd5f8e69df6eda244624d36c5874176bfee229b Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 10 Dec 2022 01:57:30 +0100
Subject: [PATCH 89/99] Add some more datatypes

---
 Cargo.toml                     |   2 +
 src/structure/tfc/datatypes.rs | 125 ++++++++++++++++++++++++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 86f79137..b77f75be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,8 @@ rug = {version="1.16", default-features=false, features=["integer","rational"]}
 num-derive = "0.3"
 num-traits = "0.2"
 chrono = "0.4"
+base64 = "0.13"
+hex = "0.4"
 
 [dev-dependencies]
 tempfile = "3.1"
diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index e51c604b..d84ddf8f 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -4,6 +4,7 @@ use super::{
     integer::{bigint_to_storage, storage_to_bigint},
     TypedDictEntry,
 };
+use base64::display::Base64Display;
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use chrono::{NaiveDateTime, NaiveTime};
@@ -462,7 +463,7 @@ impl FromLexical<NaiveDateTime> for NaiveDateTime {
     }
 }
 
-pub struct DateTimeStamp(NaiveDateTime);
+pub struct DateTimeStamp(pub NaiveDateTime);
 
 impl TdbDataType for DateTimeStamp {
     fn datatype() -> Datatype {
@@ -482,6 +483,12 @@ impl FromLexical<DateTimeStamp> for DateTimeStamp {
     }
 }
 
+impl FromLexical<DateTimeStamp> for NaiveDateTime {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        storage_to_datetime(&mut b)
+    }
+}
+
 impl TdbDataType for NaiveTime {
     fn datatype() -> Datatype {
         Datatype::Time
@@ -505,6 +512,55 @@ impl FromLexical<NaiveTime> for NaiveTime {
     }
 }
 
+pub struct Date {
+    pub year: i64,
+    pub month: u8,
+    pub day: u8,
+    pub offset: i16,
+}
+
+impl TdbDataType for Date {
+    fn datatype() -> Datatype {
+        Datatype::Date
+    }
+}
+
+impl ToLexical<Date> for Date {
+    fn to_lexical(&self) -> Bytes {
+        let year = self.year.to_lexical();
+        let month = self.month.to_lexical();
+        let day = self.month.to_lexical();
+        let offset = self.offset.to_lexical();
+        [year, month, day, offset].concat().into()
+    }
+}
+
+impl FromLexical<Date> for Date {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let year = i64::from_lexical(&mut b);
+        let month = u8::from_lexical(&mut b);
+        let day = u8::from_lexical(&mut b);
+        let offset = i16::from_lexical(b);
+        Date {
+            year,
+            month,
+            day,
+            offset,
+        }
+    }
+}
+
+impl FromLexical<Date> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let date = Date::from_lexical(b);
+        let year = date.year;
+        let month = date.month;
+        let day = date.day;
+        let offset = offset_string(date.offset);
+        format!("{year:04}-{month:02}-{day:02}{offset:}")
+    }
+}
+
 pub struct GYear {
     pub year: i64,
     pub offset: i16,
@@ -711,6 +767,67 @@ impl FromLexical<GMonthDay> for String {
     }
 }
 
+pub struct Base64Binary(pub Vec<u8>);
+
+impl ToLexical<Base64Binary> for Base64Binary {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(&self.0[..])
+    }
+}
+
+impl FromLexical<Base64Binary> for Base64Binary {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        Base64Binary(vec)
+    }
+}
+
+impl FromLexical<Base64Binary> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        let wrapper = Base64Display::with_config(&vec, base64::STANDARD);
+        format!("{wrapper}")
+    }
+}
+
+impl TdbDataType for Base64Binary {
+    fn datatype() -> Datatype {
+        Datatype::Base64Binary
+    }
+}
+
+pub struct HexBinary(pub Vec<u8>);
+
+impl ToLexical<HexBinary> for HexBinary {
+    fn to_lexical(&self) -> Bytes {
+        Bytes::copy_from_slice(&self.0[..])
+    }
+}
+
+impl FromLexical<HexBinary> for HexBinary {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        HexBinary(vec)
+    }
+}
+
+impl FromLexical<HexBinary> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let mut vec = vec![0; b.remaining()];
+        b.copy_to_slice(&mut vec);
+        hex::encode(vec)
+    }
+}
+
+impl TdbDataType for HexBinary {
+    fn datatype() -> Datatype {
+        Datatype::HexBinary
+    }
+}
+
 macro_rules! stringy_type {
     ($ty:ident) => {
         stringy_type!($ty, $ty);
@@ -781,6 +898,12 @@ macro_rules! biginty_type {
             }
         }
 
+        impl FromLexical<$ty> for Integer {
+            fn from_lexical<B: Buf>(mut b: B) -> Self {
+                storage_to_bigint(&mut b)
+            }
+        }
+
         impl ToLexical<$ty> for $ty {
             fn to_lexical(&self) -> Bytes {
                 Bytes::from(bigint_to_storage(self.0.clone()))

From 9ceb73e899a17ed3e4a23f72a34724ea2e1ee655 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 10 Dec 2022 10:24:12 +0100
Subject: [PATCH 90/99] Typo bug

---
 src/structure/tfc/datatypes.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index d84ddf8f..469ac7cb 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -529,7 +529,7 @@ impl ToLexical<Date> for Date {
     fn to_lexical(&self) -> Bytes {
         let year = self.year.to_lexical();
         let month = self.month.to_lexical();
-        let day = self.month.to_lexical();
+        let day = self.day.to_lexical();
         let offset = self.offset.to_lexical();
         [year, month, day, offset].concat().into()
     }

From dc6cec6ad9697fd7561f2969cd180e2ee5a39b6e Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sat, 10 Dec 2022 18:51:54 +0100
Subject: [PATCH 91/99] Typo!

---
 src/structure/tfc/datatypes.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 469ac7cb..dc53fc6b 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -129,7 +129,7 @@ impl TdbDataType for String {
 
 impl TdbDataType for u8 {
     fn datatype() -> Datatype {
-        Datatype::UInt32
+        Datatype::UInt8
     }
 }
 

From f1557d85c187b2aee4adc788a1c8787dbc9359d8 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sun, 11 Dec 2022 00:01:15 +0100
Subject: [PATCH 92/99] Add any simple type

---
 src/structure/tfc/datatypes.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index dc53fc6b..78c0b6a0 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -58,6 +58,7 @@ pub enum Datatype {
     Int16,
     Base64Binary,
     HexBinary,
+    AnySimpleType,
 }
 
 impl Datatype {
@@ -930,7 +931,10 @@ stringy_type!(Duration);
 stringy_type!(YearMonthDuration);
 stringy_type!(DayTimeDuration);
 
+stringy_type!(AnySimpleType);
+
 biginty_type!(PositiveInteger);
 biginty_type!(NonNegativeInteger);
 biginty_type!(NegativeInteger);
 biginty_type!(NonPositiveInteger);
+

From ce4d31e1b4fade4623b954418012f0a8894d0ee0 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sun, 11 Dec 2022 00:05:28 +0100
Subject: [PATCH 93/99] Fix tests

---
 src/structure/tfc/typed.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 5a5a27fd..6bea16ac 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -799,7 +799,7 @@ mod tests {
             Decimal::make_entry(&Decimal("2".to_string())),
             Decimal::make_entry(&Decimal("0".to_string())),
             f32::make_entry(&4.389832_f32),
-            f32::make_entry(&23434.389832_f32),
+            f32::make_entry(&23434.389_f32),
             Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
@@ -995,7 +995,7 @@ mod tests {
             Decimal::make_entry(&Decimal("2".to_string())),
             Decimal::make_entry(&Decimal("0".to_string())),
             f32::make_entry(&4.389832_f32),
-            f32::make_entry(&23434.389832_f32),
+            f32::make_entry(&23434.389_f32),
             Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
@@ -1040,7 +1040,7 @@ mod tests {
             u32::make_entry(&20_u32),
             i64::make_entry(&-3_i64),
             Decimal::make_entry(&Decimal("-12342343.2348973".to_string())),
-            f32::make_entry(&23434.389832_f32),
+            f32::make_entry(&23434.389_f32),
             Integer::make_entry(&int("239487329872343987")),
         ];
         vec.sort();
@@ -1187,8 +1187,8 @@ mod tests {
             data.freeze(),
         );
 
-        for i in 0..vec.len() {
-            assert_eq!(vec[i], dict.entry(i + 1).unwrap())
+        for (i, e) in vec.into_iter().enumerate() {
+            assert_eq!(e, dict.entry(i + 1).unwrap())
         }
     }
 }

From 1e582626895856db4e7d9bf3d8e71846465f1abc Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Sun, 11 Dec 2022 11:35:29 +0100
Subject: [PATCH 94/99] Create fake f32

---
 src/structure/tfc/datatypes.rs | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 78c0b6a0..c43522ee 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -370,6 +370,35 @@ impl ToLexical<f64> for f64 {
     }
 }
 
+// Fake f32s to avoid rounding errors
+#[derive(PartialEq, Debug)]
+pub struct Float32(pub f64);
+
+impl TdbDataType for Float32 {
+    fn datatype() -> Datatype {
+        Datatype::Float32
+    }
+}
+
+impl FromLexical<Float32> for Float32 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        Float32(FromLexical::<f64>::from_lexical(b))
+    }
+}
+
+impl FromLexical<Float32> for f64 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        // TODO make this better
+        Float32::from_lexical(b).0
+    }
+}
+
+impl ToLexical<Float32> for Float32 {
+    fn to_lexical(&self) -> Bytes {
+        ToLexical::<f64>::to_lexical(&self.0)
+    }
+}
+
 impl TdbDataType for Integer {
     fn datatype() -> Datatype {
         Datatype::BigInt
@@ -937,4 +966,3 @@ biginty_type!(PositiveInteger);
 biginty_type!(NonNegativeInteger);
 biginty_type!(NegativeInteger);
 biginty_type!(NonPositiveInteger);
-

From f6a107ee7cd41e88701671530d1f1781d501cbfc Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Mon, 12 Dec 2022 11:39:51 +0100
Subject: [PATCH 95/99] Add back f32s with a cast to f64

---
 src/structure/tfc/datatypes.rs | 35 ++++++----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index c43522ee..37568c54 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -323,6 +323,12 @@ impl FromLexical<f32> for f32 {
     }
 }
 
+impl FromLexical<f32> for f64 {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        f32::from_lexical(b) as f64
+    }
+}
+
 impl ToLexical<f32> for f32 {
     fn to_lexical(&self) -> Bytes {
         let f = *self;
@@ -370,35 +376,6 @@ impl ToLexical<f64> for f64 {
     }
 }
 
-// Fake f32s to avoid rounding errors
-#[derive(PartialEq, Debug)]
-pub struct Float32(pub f64);
-
-impl TdbDataType for Float32 {
-    fn datatype() -> Datatype {
-        Datatype::Float32
-    }
-}
-
-impl FromLexical<Float32> for Float32 {
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        Float32(FromLexical::<f64>::from_lexical(b))
-    }
-}
-
-impl FromLexical<Float32> for f64 {
-    fn from_lexical<B: Buf>(b: B) -> Self {
-        // TODO make this better
-        Float32::from_lexical(b).0
-    }
-}
-
-impl ToLexical<Float32> for Float32 {
-    fn to_lexical(&self) -> Bytes {
-        ToLexical::<f64>::to_lexical(&self.0)
-    }
-}
-
 impl TdbDataType for Integer {
     fn datatype() -> Datatype {
         Datatype::BigInt

From caedc17e81f101905b17d961ceae2b7d59a8d4c9 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Mon, 12 Dec 2022 12:57:51 +0100
Subject: [PATCH 96/99] Adding durations

---
 src/structure/tfc/datatypes.rs | 155 ++++++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 4 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 37568c54..54e36259 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -774,6 +774,157 @@ impl FromLexical<GMonthDay> for String {
     }
 }
 
+pub struct Duration {
+    pub sign: i8,
+    pub year: i64,
+    pub month: u8,
+    pub day: u8,
+    pub hour: u8,
+    pub minute: u8,
+    pub second: u8,
+}
+
+impl TdbDataType for Duration {
+    fn datatype() -> Datatype {
+        Datatype::Duration
+    }
+}
+
+impl ToLexical<Duration> for Duration {
+    fn to_lexical(&self) -> Bytes {
+        let sign = self.sign.to_lexical();
+        let year = self.year.to_lexical();
+        let month = self.month.to_lexical();
+        let day = self.day.to_lexical();
+        let hour = self.hour.to_lexical();
+        let minute = self.minute.to_lexical();
+        let second = self.second.to_lexical();
+        [sign, year, month, day, hour, minute, second]
+            .concat()
+            .into()
+    }
+}
+
+impl FromLexical<Duration> for Duration {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let sign = i8::from_lexical(&mut b);
+        let year = i64::from_lexical(&mut b);
+        let month = u8::from_lexical(&mut b);
+        let day = u8::from_lexical(&mut b);
+        let hour = u8::from_lexical(&mut b);
+        let minute = u8::from_lexical(&mut b);
+        let second = u8::from_lexical(b);
+        Duration {
+            sign,
+            year,
+            month,
+            day,
+            hour,
+            minute,
+            second,
+        }
+    }
+}
+
+fn duration_string(duration: &Duration) -> String {
+    let year = if duration.year == 0 {
+        format!("{:04}Y", duration.year)
+    } else {
+        "".to_string()
+    };
+    let month = if duration.month == 0 {
+        format!("{:02}M", duration.month)
+    } else {
+        "".to_string()
+    };
+    let day = if duration.day == 0 {
+        format!("{:04}D", duration.year)
+    } else {
+        "".to_string()
+    };
+    if duration.hour == 0 && duration.minute == 0 && duration.second == 0 {
+        format!("P{year}{month}{day}")
+    } else {
+        let hour = if duration.hour == 0 {
+            format!("{:02}H", duration.hour)
+        } else {
+            "".to_string()
+        };
+        let minute = if duration.minute == 0 {
+            format!("{:02}M", duration.minute)
+        } else {
+            "".to_string()
+        };
+        let second = if duration.second == 0 {
+            format!("{:02}S", duration.second)
+        } else {
+            "".to_string()
+        };
+        format!("{year}{month}{day}T{hour}{minute}{second}")
+    }
+}
+
+impl FromLexical<Duration> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let duration = Duration::from_lexical(b);
+        duration_string(&duration)
+    }
+}
+
+pub struct YearMonthDuration(pub Duration);
+
+impl TdbDataType for YearMonthDuration {
+    fn datatype() -> Datatype {
+        Datatype::YearMonthDuration
+    }
+}
+
+impl ToLexical<YearMonthDuration> for YearMonthDuration {
+    fn to_lexical(&self) -> Bytes {
+        Duration::to_lexical(&self.0)
+    }
+}
+
+impl FromLexical<YearMonthDuration> for YearMonthDuration {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        YearMonthDuration(Duration::from_lexical(b))
+    }
+}
+
+impl FromLexical<YearMonthDuration> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let duration = Duration::from_lexical(b);
+        duration_string(&duration)
+    }
+}
+
+pub struct DayTimeDuration(pub Duration);
+
+impl TdbDataType for DayTimeDuration {
+    fn datatype() -> Datatype {
+        Datatype::DayTimeDuration
+    }
+}
+
+impl ToLexical<DayTimeDuration> for DayTimeDuration {
+    fn to_lexical(&self) -> Bytes {
+        Duration::to_lexical(&self.0)
+    }
+}
+
+impl FromLexical<DayTimeDuration> for DayTimeDuration {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        DayTimeDuration(Duration::from_lexical(b))
+    }
+}
+
+impl FromLexical<DayTimeDuration> for String {
+    fn from_lexical<B: Buf>(b: B) -> Self {
+        let duration = Duration::from_lexical(b);
+        duration_string(&duration)
+    }
+}
+
 pub struct Base64Binary(pub Vec<u8>);
 
 impl ToLexical<Base64Binary> for Base64Binary {
@@ -933,10 +1084,6 @@ stringy_type!(ID);
 stringy_type!(IDRef);
 stringy_type!(Entity);
 
-stringy_type!(Duration);
-stringy_type!(YearMonthDuration);
-stringy_type!(DayTimeDuration);
-
 stringy_type!(AnySimpleType);
 
 biginty_type!(PositiveInteger);

From 5aa62c83bae40d1ce389b67ff0a74dee0e2ba3e2 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 13 Dec 2022 00:35:04 +0100
Subject: [PATCH 97/99] Adding string casts

---
 src/structure/tfc/datatypes.rs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 54e36259..7a4cc9ff 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -470,6 +470,13 @@ impl FromLexical<NaiveDateTime> for NaiveDateTime {
     }
 }
 
+impl FromLexical<NaiveDateTime> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let ndt = storage_to_datetime(&mut b);
+        ndt.format("%Y-%m-%dT%H:%M:%S%.fZ").to_string()
+    }
+}
+
 pub struct DateTimeStamp(pub NaiveDateTime);
 
 impl TdbDataType for DateTimeStamp {
@@ -496,6 +503,13 @@ impl FromLexical<DateTimeStamp> for NaiveDateTime {
     }
 }
 
+impl FromLexical<DateTimeStamp> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let ndt = storage_to_datetime(&mut b);
+        ndt.format("%Y-%m-%dT%H:%M:%S%.fZ").to_string()
+    }
+}
+
 impl TdbDataType for NaiveTime {
     fn datatype() -> Datatype {
         Datatype::Time
@@ -519,6 +533,13 @@ impl FromLexical<NaiveTime> for NaiveTime {
     }
 }
 
+impl FromLexical<NaiveTime> for String {
+    fn from_lexical<B: Buf>(mut b: B) -> Self {
+        let ndt = NaiveTime::from_lexical(&mut b);
+        ndt.format("%H:%M:%S%.fZ").to_string()
+    }
+}
+
 pub struct Date {
     pub year: i64,
     pub month: u8,

From 07d4f1292b59a9753e9d961689a624c009a7736a Mon Sep 17 00:00:00 2001
From: Matthijs van Otterdijk <matthijs@terminusdb.com>
Date: Tue, 13 Dec 2022 11:01:20 +0100
Subject: [PATCH 98/99] fix test

---
 src/structure/tfc/typed.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/structure/tfc/typed.rs b/src/structure/tfc/typed.rs
index 6bea16ac..651b9a2e 100644
--- a/src/structure/tfc/typed.rs
+++ b/src/structure/tfc/typed.rs
@@ -681,7 +681,7 @@ mod tests {
         cycle(f64::NEG_INFINITY);
         cycle(f64::INFINITY);
 
-        let j = f64::from_lexical(f64::NAN.to_lexical());
+        let j = <f64 as FromLexical<f64>>::from_lexical(f64::NAN.to_lexical());
         assert!(j.is_nan())
     }
 

From eb31c89c1089261836e17004e0e3473877e14954 Mon Sep 17 00:00:00 2001
From: Gavin Mendel-Gleason <gavin@terminusdb.com>
Date: Tue, 13 Dec 2022 11:13:34 +0100
Subject: [PATCH 99/99] Fix date tyeps

---
 src/structure/tfc/datatypes.rs | 36 ++++++++++++++--------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/structure/tfc/datatypes.rs b/src/structure/tfc/datatypes.rs
index 7a4cc9ff..da9d2c81 100644
--- a/src/structure/tfc/datatypes.rs
+++ b/src/structure/tfc/datatypes.rs
@@ -497,12 +497,6 @@ impl FromLexical<DateTimeStamp> for DateTimeStamp {
     }
 }
 
-impl FromLexical<DateTimeStamp> for NaiveDateTime {
-    fn from_lexical<B: Buf>(mut b: B) -> Self {
-        storage_to_datetime(&mut b)
-    }
-}
-
 impl FromLexical<DateTimeStamp> for String {
     fn from_lexical<B: Buf>(mut b: B) -> Self {
         let ndt = storage_to_datetime(&mut b);
@@ -671,7 +665,7 @@ impl FromLexical<GMonth> for String {
         let gmonth = GMonth::from_lexical(b);
         let month = gmonth.month;
         let offset = offset_string(gmonth.offset);
-        format!("-{month:02}{offset:}")
+        format!("--{month:02}{offset:}")
     }
 }
 
@@ -707,7 +701,7 @@ impl FromLexical<GDay> for String {
         let gday = GDay::from_lexical(b);
         let day = gday.day;
         let offset = offset_string(gday.offset);
-        format!("--{day:02}{offset:}")
+        format!("---{day:02}{offset:}")
     }
 }
 
@@ -848,40 +842,40 @@ impl FromLexical<Duration> for Duration {
 }
 
 fn duration_string(duration: &Duration) -> String {
-    let year = if duration.year == 0 {
-        format!("{:04}Y", duration.year)
+    let year = if duration.year != 0 {
+        format!("{}Y", duration.year)
     } else {
         "".to_string()
     };
-    let month = if duration.month == 0 {
-        format!("{:02}M", duration.month)
+    let month = if duration.month != 0 {
+        format!("{}M", duration.month)
     } else {
         "".to_string()
     };
-    let day = if duration.day == 0 {
-        format!("{:04}D", duration.year)
+    let day = if duration.day != 0 {
+        format!("{}D", duration.day)
     } else {
         "".to_string()
     };
     if duration.hour == 0 && duration.minute == 0 && duration.second == 0 {
         format!("P{year}{month}{day}")
     } else {
-        let hour = if duration.hour == 0 {
-            format!("{:02}H", duration.hour)
+        let hour = if duration.hour != 0 {
+            format!("{}H", duration.hour)
         } else {
             "".to_string()
         };
-        let minute = if duration.minute == 0 {
-            format!("{:02}M", duration.minute)
+        let minute = if duration.minute != 0 {
+            format!("{}M", duration.minute)
         } else {
             "".to_string()
         };
-        let second = if duration.second == 0 {
-            format!("{:02}S", duration.second)
+        let second = if duration.second != 0 {
+            format!("{}S", duration.second)
         } else {
             "".to_string()
         };
-        format!("{year}{month}{day}T{hour}{minute}{second}")
+        format!("P{year}{month}{day}T{hour}{minute}{second}")
     }
 }