Skip to content

Commit

Permalink
local_backend: use ContentHash rather than hashing protos
Browse files Browse the repository at this point in the history
Insulates identifiers from the unstable serialized form.
  • Loading branch information
Ralith committed Nov 13, 2022
1 parent f192c1a commit e8f2db3
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 59 deletions.
130 changes: 90 additions & 40 deletions lib/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::vec::Vec;

use thiserror::Error;

use crate::content_hash::ContentHash;
use crate::repo_path::{RepoPath, RepoPathComponent};

content_hash! {
Expand Down Expand Up @@ -59,8 +60,10 @@ impl CommitId {
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ChangeId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ChangeId(Vec<u8>);
}

impl Debug for ChangeId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -94,8 +97,10 @@ impl ChangeId {
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct TreeId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct TreeId(Vec<u8>);
}

impl Debug for TreeId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -129,8 +134,10 @@ impl TreeId {
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct FileId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct FileId(Vec<u8>);
}

impl Debug for FileId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -160,8 +167,10 @@ impl FileId {
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct SymlinkId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct SymlinkId(Vec<u8>);
}

impl Debug for SymlinkId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -191,8 +200,10 @@ impl SymlinkId {
}
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ConflictId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct ConflictId(Vec<u8>);
}

impl Debug for ConflictId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -256,39 +267,47 @@ impl Timestamp {
}
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Signature {
pub name: String,
pub email: String,
pub timestamp: Timestamp,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Signature {
pub name: String,
pub email: String,
pub timestamp: Timestamp,
}
}

#[derive(Debug, Clone)]
pub struct Commit {
pub parents: Vec<CommitId>,
pub predecessors: Vec<CommitId>,
pub root_tree: TreeId,
pub change_id: ChangeId,
pub description: String,
pub author: Signature,
pub committer: Signature,
content_hash! {
#[derive(Debug, Clone)]
pub struct Commit {
pub parents: Vec<CommitId>,
pub predecessors: Vec<CommitId>,
pub root_tree: TreeId,
pub change_id: ChangeId,
pub description: String,
pub author: Signature,
pub committer: Signature,
}
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ConflictPart {
// TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be
// useful e.g. after rebasing this conflict?
pub value: TreeValue,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct ConflictPart {
// TODO: Store e.g. CommitId here too? Labels (theirs/ours/base)? Would those still be
// useful e.g. after rebasing this conflict?
pub value: TreeValue,
}
}

#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Conflict {
// A conflict is represented by a list of positive and negative states that need to be applied.
// In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
// remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
// same as non-conflict A.
pub removes: Vec<ConflictPart>,
pub adds: Vec<ConflictPart>,
content_hash! {
#[derive(Default, Debug, PartialEq, Eq, Clone)]
pub struct Conflict {
// A conflict is represented by a list of positive and negative states that need to be applied.
// In a simple 3-way merge of B and C with merge base A, the conflict will be { add: [B, C],
// remove: [A] }. Also note that a conflict of the form { add: [A], remove: [] } is the
// same as non-conflict A.
pub removes: Vec<ConflictPart>,
pub adds: Vec<ConflictPart>,
}
}

#[derive(Debug, Error, PartialEq, Eq)]
Expand All @@ -310,6 +329,35 @@ pub enum TreeValue {
Conflict(ConflictId),
}

impl ContentHash for TreeValue {
fn hash(&self, state: &mut impl digest::Update) {
use TreeValue::*;
match *self {
Normal { ref id, executable } => {
state.update(&0u32.to_le_bytes());
id.hash(state);
executable.hash(state);
}
Symlink(ref id) => {
state.update(&1u32.to_le_bytes());
id.hash(state);
}
Tree(ref id) => {
state.update(&2u32.to_le_bytes());
id.hash(state);
}
GitSubmodule(ref id) => {
state.update(&3u32.to_le_bytes());
id.hash(state);
}
Conflict(ref id) => {
state.update(&4u32.to_le_bytes());
id.hash(state);
}
}
}
}

#[derive(Debug, PartialEq, Eq, Clone)]
pub struct TreeEntry<'a> {
name: &'a RepoPathComponent,
Expand Down Expand Up @@ -344,9 +392,11 @@ impl<'a> Iterator for TreeEntriesNonRecursiveIterator<'a> {
}
}

#[derive(Default, Debug, Clone)]
pub struct Tree {
entries: BTreeMap<RepoPathComponent, TreeValue>,
content_hash! {
#[derive(Default, Debug, Clone)]
pub struct Tree {
entries: BTreeMap<RepoPathComponent, TreeValue>,
}
}

impl Tree {
Expand Down
6 changes: 6 additions & 0 deletions lib/src/content_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ impl ContentHash for () {
fn hash(&self, _: &mut impl digest::Update) {}
}

impl ContentHash for bool {
fn hash(&self, state: &mut impl digest::Update) {
u8::from(*self).hash(state);
}
}

impl ContentHash for u8 {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&[*self]);
Expand Down
30 changes: 14 additions & 16 deletions lib/src/local_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use crate::backend::{
ConflictId, ConflictPart, FileId, MillisSinceEpoch, Signature, SymlinkId, Timestamp, Tree,
TreeId, TreeValue,
};
use crate::content_hash::ContentHash;
use crate::file_util::persist_content_addressed_temp_file;
use crate::repo_path::{RepoPath, RepoPathComponent};

Expand Down Expand Up @@ -72,7 +73,7 @@ impl LocalBackend {

pub fn load(store_path: &Path) -> Self {
let root_commit_id = CommitId::from_bytes(&[0; 64]);
let empty_tree_id = TreeId::from_hex("786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce");
let empty_tree_id = TreeId::from_hex("482ae5a29fbe856c7272f2071b8b0f0359ee2d89ff392b8a900643fbd0836eccd067b8bf41909e206c90d45d6e7d8b6686b93ecaee5fe1a9060d87b672101310");
LocalBackend {
path: store_path.to_path_buf(),
root_commit_id,
Expand Down Expand Up @@ -192,12 +193,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;

let proto = tree_to_proto(tree);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;

temp_file.as_file().write_all(&proto_bytes)?;

let id = TreeId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = TreeId::new(hash(tree).to_vec());

persist_content_addressed_temp_file(temp_file, self.tree_path(&id))?;
Ok(id)
Expand All @@ -215,12 +213,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;

let proto = conflict_to_proto(conflict);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;

temp_file.as_file().write_all(&proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;

let id = ConflictId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = ConflictId::new(hash(conflict).to_vec());

persist_content_addressed_temp_file(temp_file, self.conflict_path(&id))?;
Ok(id)
Expand All @@ -242,12 +237,9 @@ impl Backend for LocalBackend {
let temp_file = NamedTempFile::new_in(&self.path)?;

let proto = commit_to_proto(commit);
let mut proto_bytes: Vec<u8> = Vec::new();
proto.write_to_writer(&mut proto_bytes)?;

temp_file.as_file().write_all(&proto_bytes)?;
proto.write_to_writer(&mut temp_file.as_file())?;

let id = CommitId::new(Blake2b512::digest(&proto_bytes).to_vec());
let id = CommitId::new(hash(commit).to_vec());

persist_content_addressed_temp_file(temp_file, self.commit_path(&id))?;
Ok(id)
Expand Down Expand Up @@ -412,3 +404,9 @@ fn conflict_part_to_proto(part: &ConflictPart) -> crate::protos::store::conflict
proto.content = MessageField::some(tree_value_to_proto(&part.value));
proto
}

fn hash(x: &impl ContentHash) -> digest::Output<Blake2b512> {
let mut hasher = Blake2b512::default();
x.hash(&mut hasher);
hasher.finalize()
}
8 changes: 5 additions & 3 deletions lib/src/repo_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ use thiserror::Error;

use crate::file_util;

#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RepoPathComponent {
value: String,
content_hash! {
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RepoPathComponent {
value: String,
}
}

impl RepoPathComponent {
Expand Down

0 comments on commit e8f2db3

Please sign in to comment.