Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove the epochs vector from HistoryTreeNode #113

Merged
merged 4 commits into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 64 additions & 30 deletions akd/src/history_tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ pub struct HistoryTreeNode {
pub label: NodeLabel,
/// The location of this node in the storage
pub location: u64,
/// The epochs this node was updated
pub epochs: Vec<u64>,
/// The last epoch this node was updated in
pub last_epoch: u64,
/// The epoch that this node was birthed in
pub birth_epoch: u64,
/// The location of this node's parent
pub parent: u64, // The root node is marked its own parent.
/// The type of node: leaf root or interior.
Expand Down Expand Up @@ -106,19 +108,21 @@ impl Clone for HistoryTreeNode {
Self {
label: self.label,
location: self.location,
epochs: self.epochs.clone(),
last_epoch: self.last_epoch,
birth_epoch: self.birth_epoch,
parent: self.parent,
node_type: self.node_type,
}
}
}

impl HistoryTreeNode {
fn new(label: NodeLabel, location: u64, parent: u64, node_type: NodeType) -> Self {
fn new(label: NodeLabel, location: u64, parent: u64, node_type: NodeType, birth_epoch: u64) -> Self {
HistoryTreeNode {
label,
location,
epochs: vec![],
birth_epoch,
last_epoch: birth_epoch,
parent, // Root node is its own parent
node_type,
}
Expand Down Expand Up @@ -248,8 +252,8 @@ impl HistoryTreeNode {
new_node_location,
parent.location,
NodeType::Interior,
epoch,
);
new_node.epochs.push(epoch);
new_node.write_to_storage(storage).await?;
set_state_map(
storage,
Expand Down Expand Up @@ -470,11 +474,11 @@ impl HistoryTreeNode {
match self.get_latest_epoch() {
Ok(latest) => {
if latest != epoch {
self.epochs.push(epoch);
self.last_epoch = epoch;
}
}
Err(_) => {
self.epochs.push(epoch);
self.last_epoch = epoch;
}
}
self.write_to_storage(storage).await?;
Expand Down Expand Up @@ -578,7 +582,7 @@ impl HistoryTreeNode {
}

pub(crate) fn get_birth_epoch(&self) -> u64 {
self.epochs[0]
self.birth_epoch
}

// gets the direction of node, i.e. if it's a left
Expand Down Expand Up @@ -623,14 +627,31 @@ impl HistoryTreeNode {
if self.get_birth_epoch() > epoch {
Err(HistoryTreeNodeError::NoChildInTreeAtEpoch(epoch, dir))
} else {
let mut chosen_ep = self.get_birth_epoch();
for existing_ep in &self.epochs {
if *existing_ep <= epoch && *existing_ep > chosen_ep {
chosen_ep = *existing_ep;
let chosen_ep = {
if self.last_epoch <= epoch {
// the "last" updated epoch is <= epoch, so it is
// the last valid state at this epoch
Some(self.last_epoch)
} else if self.birth_epoch == epoch {
// we're looking at the state at the birth epoch
Some(self.birth_epoch)
} else {
// Indeterminate, we are somewhere above the
// birth epoch but we're less than the "last" epoch.
// db query is necessary
None
}
};

if let Some(ep) = chosen_ep {
self.get_child_at_existing_epoch::<_, H>(storage, ep, direction)
.await
} else {
let target_ep = storage.get_epoch_lte_epoch(self.label, epoch).await?;
// DB query for the state <= this epoch value
self.get_child_at_existing_epoch::<_, H>(storage, target_ep, direction)
.await
}
self.get_child_at_existing_epoch::<_, H>(storage, chosen_ep, direction)
.await
}
}
}
Expand Down Expand Up @@ -658,13 +679,28 @@ impl HistoryTreeNode {
if self.get_birth_epoch() > epoch {
Err(HistoryTreeNodeError::NodeDidNotExistAtEp(self.label, epoch))
} else {
let mut chosen_ep = self.get_birth_epoch();
for existing_ep in &self.epochs {
if *existing_ep <= epoch {
chosen_ep = *existing_ep;
let chosen_ep = {
if self.last_epoch <= epoch {
// the "last" updated epoch is <= epoch, so it is
// the last valid state at this epoch
Some(self.last_epoch)
} else if self.birth_epoch == epoch {
// we're looking at the state at the birth epoch
Some(self.birth_epoch)
} else {
// Indeterminate, we are somewhere above the
// birth epoch but we're less than the "last" epoch.
// db query is necessary
None
}
};
if let Some(ep) = chosen_ep {
self.get_state_at_existing_epoch(storage, ep).await
} else {
let target_ep = storage.get_epoch_lte_epoch(self.label, epoch).await?;
// DB query for the state <= this epoch value
self.get_state_at_existing_epoch(storage, target_ep).await
}
self.get_state_at_existing_epoch(storage, chosen_ep).await
}
}

Expand All @@ -681,12 +717,7 @@ impl HistoryTreeNode {
/* Functions for compression-related operations */

pub(crate) fn get_latest_epoch(&self) -> Result<u64, HistoryTreeNodeError> {
match self.epochs.len() {
0 => Err(HistoryTreeNodeError::NodeCreatedWithoutEpochs(
self.label.get_val(),
)),
n => Ok(self.epochs[n - 1]),
}
Ok(self.last_epoch)
}

/////// Helpers /////////
Expand Down Expand Up @@ -747,9 +778,10 @@ pub(crate) async fn get_empty_root<H: Hasher, S: Storage + Send + Sync>(
storage: &S,
ep: Option<u64>,
) -> Result<HistoryTreeNode, HistoryTreeNodeError> {
let mut node = HistoryTreeNode::new(NodeLabel::new(0u64, 0u32), 0, 0, NodeType::Root);
let mut node = HistoryTreeNode::new(NodeLabel::new(0u64, 0u32), 0, 0, NodeType::Root, 0u64);
if let Some(epoch) = ep {
node.epochs.push(epoch);
node.birth_epoch = epoch;
node.last_epoch = epoch;
let new_state: HistoryNodeState =
HistoryNodeState::new::<H>(NodeStateKey(node.label, epoch));
set_state_map(storage, new_state).await?;
Expand All @@ -769,7 +801,8 @@ pub(crate) async fn get_leaf_node<H: Hasher, S: Storage + Sync + Send>(
let node = HistoryTreeNode {
label,
location,
epochs: vec![birth_epoch],
birth_epoch,
last_epoch: birth_epoch,
parent,
node_type: NodeType::Leaf,
};
Expand All @@ -794,7 +827,8 @@ pub(crate) async fn get_leaf_node_without_hashing<H: Hasher, S: Storage + Sync +
let node = HistoryTreeNode {
label,
location,
epochs: vec![birth_epoch],
birth_epoch,
last_epoch: birth_epoch,
parent,
node_type: NodeType::Leaf,
};
Expand Down
74 changes: 74 additions & 0 deletions akd/src/storage/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,43 @@ impl Storage for AsyncInMemoryDatabase {
}
Ok(map)
}

async fn get_epoch_lte_epoch(
&self,
node_label: crate::node_state::NodeLabel,
epoch_in_question: u64,
) -> Result<u64, StorageError> {
Comment on lines +270 to +274
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess the cool thing about doing this "linear search" for the correct epoch is that it can be done efficiently with a MySQL query, but may be less efficient if we are just relying on a simple set/get interface. Would it be better to find a middle-ground here with some pointer-based solution? Or perhaps we are not too worried about the performance impact of this function because it is not going to be called in lookup or publish?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah so the key history and audit pero queries do make this call, but we could probably figure out a nifty way to cache it but I think this is a good starting point at least.

I agree that it's not perfect but it completely mitigates the unbounded storage problems.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, good point. Let's merge this for now then, and if we run into issues with performance on key history / audit, we can always revisit.

Thanks!!

Copy link
Member

@afterdusk afterdusk Dec 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe most key-value storage systems provide some sort of key scan or match API that would make regex matching or prefix searching possible.

It would then be up to the storage layer implementer to use an encoding scheme that would allow for the most efficient searching. Perhaps still less efficient than an RDBMS like MySQL but might not be terrible if optimized to the specific system :)

let ids = (0..=epoch_in_question)
.map(|epoch| crate::node_state::NodeStateKey(node_label, epoch))
.collect::<Vec<_>>();
let data = self
.batch_get::<crate::node_state::HistoryNodeState>(ids)
.await?;
let mut epochs = data
.into_iter()
.map(|item| {
if let DbRecord::HistoryNodeState(state) = item {
state.key.1
} else {
0u64
}
})
.collect::<Vec<u64>>();
// reverse sort
epochs.sort_unstable_by(|a, b| b.cmp(a));

// move through the epochs from largest to smallest, first one that's <= ```epoch_in_question```
// and Bob's your uncle
for item in epochs {
if item <= epoch_in_question {
return Ok(item);
}
}
Err(StorageError::GetError(format!(
"Node (val: {}, len: {}) did not exist <= epoch {}",
node_label.val, node_label.len, epoch_in_question
)))
}
}

// ===== In-Memory database w/caching (for benchmarking) ==== //
Expand Down Expand Up @@ -606,4 +643,41 @@ impl Storage for AsyncInMemoryDbWithCache {
}
Ok(map)
}

async fn get_epoch_lte_epoch(
&self,
node_label: crate::node_state::NodeLabel,
epoch_in_question: u64,
) -> Result<u64, StorageError> {
let ids = (0..epoch_in_question)
.map(|epoch| crate::node_state::NodeStateKey(node_label, epoch))
.collect::<Vec<_>>();
let data = self
.batch_get::<crate::node_state::HistoryNodeState>(ids)
.await?;
let mut epochs = data
.into_iter()
.map(|item| {
if let DbRecord::HistoryNodeState(state) = item {
state.key.1
} else {
0u64
}
})
.collect::<Vec<u64>>();
// reverse sort
epochs.sort_unstable_by(|a, b| b.cmp(a));

// move through the epochs from largest to smallest, first one that's <= ```epoch_in_question```
// and Bob's your uncle
for item in epochs {
if item <= epoch_in_question {
return Ok(item);
}
}
Err(StorageError::GetError(format!(
"Node (val: {}, len: {}) did not exist <= epoch {}",
node_label.val, node_label.len, epoch_in_question
)))
}
}
14 changes: 12 additions & 2 deletions akd/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ pub trait Storage: Clone {
/// Retrieve a stored record from the data layer
async fn get<St: Storable>(&self, id: St::Key) -> Result<DbRecord, StorageError>;

/// Retrieve the last epoch <= ```epoch_in_question``` where the node with ```node_key```
/// was edited
async fn get_epoch_lte_epoch(
&self,
node_label: crate::node_state::NodeLabel,
epoch_in_question: u64,
) -> Result<u64, StorageError>;

/// Retrieve a batch of records by id
async fn batch_get<St: Storable>(
&self,
Expand Down Expand Up @@ -138,7 +146,8 @@ pub trait Storage: Clone {
label_val: u64,
label_len: u32,
location: u64,
epochs: Vec<u64>,
birth_epoch: u64,
last_epoch: u64,
parent: u64,
node_type: u8,
) -> crate::history_tree_node::HistoryTreeNode {
Expand All @@ -148,7 +157,8 @@ pub trait Storage: Clone {
len: label_len,
},
location,
epochs,
birth_epoch,
last_epoch,
parent,
node_type: crate::history_tree_node::NodeType::from_u8(node_type),
}
Expand Down
6 changes: 4 additions & 2 deletions akd/src/storage/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ async fn test_get_and_set_item<Ns: Storage>(storage: &Ns) {
let node = HistoryTreeNode {
label: NodeLabel { val: 13, len: 1 },
location: 234,
epochs: vec![123u64, 234u64, 345u64],
birth_epoch: 123,
last_epoch: 234,
parent: 1,
node_type: NodeType::Leaf,
};
Expand All @@ -100,7 +101,8 @@ async fn test_get_and_set_item<Ns: Storage>(storage: &Ns) {
assert_eq!(got_node.location, node.location);
assert_eq!(got_node.parent, node.parent);
assert_eq!(got_node.node_type, node.node_type);
assert_eq!(got_node.epochs, node.epochs);
assert_eq!(got_node.birth_epoch, node.birth_epoch);
assert_eq!(got_node.last_epoch, node.last_epoch);
} else {
panic!("Failed to retrieve History Tree Node");
}
Expand Down
25 changes: 20 additions & 5 deletions akd/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ async fn test_set_child_without_hash_at_root() -> Result<(), HistoryTreeNodeErro
root.get_latest_epoch().unwrap_or(0) == 1,
"Latest epochs don't match!"
);
assert!(root.epochs.len() == 1, "Ask yourself some pressing questions, such as: Why are there random extra epochs in the root?");
assert!(
root.birth_epoch == root.last_epoch,
"How would the last epoch be different from the birth epoch without an update?"
);

Ok(())
}
Expand Down Expand Up @@ -103,7 +106,10 @@ async fn test_set_children_without_hash_at_root() -> Result<(), HistoryTreeNodeE
}
let latest_ep = root.get_latest_epoch();
assert!(latest_ep.unwrap_or(0) == 1, "Latest epochs don't match!");
assert!(root.epochs.len() == 1, "Ask yourself some pressing questions, such as: Why are there random extra epochs in the root?");
assert!(
root.birth_epoch == root.last_epoch,
"How would the last epoch be different from the birth epoch without an update?"
);

Ok(())
}
Expand Down Expand Up @@ -173,7 +179,10 @@ async fn test_set_children_without_hash_multiple_at_root() -> Result<(), History
}
let latest_ep = root.get_latest_epoch();
assert!(latest_ep.unwrap_or(0) == 2, "Latest epochs don't match!");
assert!(root.epochs.len() == 2, "Ask yourself some pressing questions, such as: Why are there random extra epochs in the root?");
assert!(
root.birth_epoch < root.last_epoch,
"How is the last epoch not higher than the birth epoch after an udpate?"
);

Ok(())
}
Expand Down Expand Up @@ -242,7 +251,10 @@ async fn test_get_child_at_existing_epoch_multiple_at_root() -> Result<(), Histo
}
let latest_ep = root.get_latest_epoch();
assert!(latest_ep.unwrap_or(0) == 2, "Latest epochs don't match!");
assert!(root.epochs.len() == 2, "Ask yourself some pressing questions, such as: Why are there random extra epochs in the root?");
assert!(
root.birth_epoch < root.last_epoch,
"How is the last epoch not higher than the birth epoch after an udpate?"
);

Ok(())
}
Expand Down Expand Up @@ -316,7 +328,10 @@ pub async fn test_get_child_at_epoch_at_root() -> Result<(), HistoryTreeNodeErro
}
let latest_ep = root.get_latest_epoch();
assert!(latest_ep.unwrap_or(0) == 4, "Latest epochs don't match!");
assert!(root.epochs.len() == 3, "Ask yourself some pressing questions, such as: Why are there random extra epochs in the root?");
assert!(
root.birth_epoch < root.last_epoch,
"How is the last epoch not higher than the birth epoch after an udpate?"
);

Ok(())
}
Expand Down
Loading