diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md new file mode 100644 index 0000000..8dc653f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -0,0 +1,65 @@ + + +## Description + + +reviewpad:summary + +## Issue + +Fixes # + +## Type of change + +Please mark the relevant option(s): + +- [ ] New feature, functionality or library +- [ ] Bug fix +- [ ] Code health or cleanup +- [ ] Major breaking change +- [ ] Documentation +- [ ] Other + +## List of changes + + + +- Change #1 +- Change #2 +- ... + +## Testing + +- [ ] **Task specific tests or benchmarks**: `go test ...` +- [ ] **New tests or benchmarks**: `go test ...` +- [ ] **All tests**: `go test -v` + + + +## Required Checklist + +- [ ] I have performed a self-review of my own code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have added, or updated, [`godoc` format comments](https://go.dev/blog/godoc) on touched members (see: [tip.golang.org/doc/comment](https://tip.golang.org/doc/comment)) +- [ ] I have tested my changes using the available tooling +- [ ] I have updated the corresponding CHANGELOG + +### If Applicable Checklist + +- [ ] Update any relevant README(s) +- [ ] Add or update any relevant or supporting [mermaid](https://mermaid-js.github.io/mermaid/) diagrams +- [ ] I have added tests that prove my fix is effective or that my feature works diff --git a/.github/workflows/fuzz_build.yml b/.github/workflows/fuzz_build.yml deleted file mode 100644 index 32a074f..0000000 --- a/.github/workflows/fuzz_build.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Build Fuzz - -on: - pull_request: - push: - branches: - - master - -jobs: - build_fuzz: - runs-on: ubuntu-latest - steps: - - uses: actions/setup-go@v2 - with: - go-version: "1.15" - - uses: actions/checkout@v2 - - uses: technote-space/get-diff-action@v4 - with: - PATTERNS: | - **/**.go - go.mod - go.sum - - name: install deps - run: go get -u github.com/dvyukov/go-fuzz/go-fuzz github.com/dvyukov/go-fuzz/go-fuzz-build - - name: build fuzz - run: go-fuzz-build - working-directory: fuzz diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 96f0822..b295dac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,62 +1,102 @@ -name: Tests +name: Test & Build on: pull_request: push: branches: - - master + - main - release/** +env: + # Even though we can test against multiple versions, this one is considered a target version. + TARGET_GOLANG_VERSION: "1.18" + jobs: - build: + tests: runs-on: ubuntu-latest strategy: fail-fast: false matrix: - goarch: ["arm64", "amd64"] - timeout-minutes: 5 + go: ["1.18"] + name: Go ${{ matrix.go }} test steps: - - uses: actions/setup-go@v2 + - uses: actions/checkout@v3 + - name: Setup go + uses: actions/setup-go@v3 with: - go-version: "1.15" - - uses: actions/checkout@v2 - - uses: technote-space/get-diff-action@v4 + go-version: ${{ matrix.go }} + - name: Setup Golang caches + uses: actions/cache@v3 with: - PATTERNS: | - **/**.go - go.mod - go.sum - - name: install - run: GOOS=linux GOARCH=${{ matrix.goarch }} go build - if: "env.GIT_DIFF != ''" + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-golang-${{ matrix.go }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-golang-${{ matrix.go }}- + - name: Create coverage report and run tests + run: | + set -euo pipefail + GODEBUG=netdns=cgo go test -p 1 -json ./... -mod=readonly -timeout 8m -race -coverprofile=coverage.txt -covermode=atomic 2>&1 | tee test_results.json + - name: Sanitize test results + # We're utilizing `tee` above which can capture non-json stdout output so we need to remove non-json lines before additional parsing and submitting it to the external github action. + if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} + run: cat test_results.json | jq -c -R 'fromjson? | select(type == "object")' > tmp.json && mv tmp.json test_results.json + - name: Output test failures + # Makes it easier to find failed tests so no need to scroll through the whole log. + if: ${{ failure() && env.TARGET_GOLANG_VERSION == matrix.go }} + run: | + jq --argjson fail_tests "$(jq -c -r 'select(.Action == "fail") | select(.Test) | .Test' test_results.json | jq -R -s -c -r 'split("\n") | map(select(length>0))')" 'select(.Test as $t | ($fail_tests | arrays)[] | select($t == .)) | select(.Output) | .Output' test_results.json | jq -r | sed ':a;N;$!ba;s/\n\n/\n/g' > test_failures.json + cat test_failures.json + exit 1 + - name: Upload test results + if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} + uses: actions/upload-artifact@v3 + with: + name: test-results + path: | + test_*.json + - name: Annotate tests on GitHub + # Only annotate if the test failed on target version to avoid duplicated annotations on GitHub. + if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} + uses: guyarb/golang-test-annotations@v0.5.1 + with: + test-results: test_results.json + - name: Upload coverage to Codecov + if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} + uses: codecov/codecov-action@v3 + with: + files: ./coverage.txt + - name: golangci-lint + if: ${{ always() && env.TARGET_GOLANG_VERSION == matrix.go }} + uses: golangci/golangci-lint-action@v3 + with: + version: latest + args: --timeout=10m + skip-cache: true + only-new-issues: true - tests: + build: runs-on: ubuntu-latest - needs: build + needs: tests strategy: fail-fast: false matrix: - goarch: ["amd64"] + goarch: ["arm64", "amd64"] + go: ["1.18"] + timeout-minutes: 5 + name: Build for ${{ matrix.goarch }} steps: - - uses: actions/setup-go@v2 + - uses: actions/setup-go@v3 with: - go-version: "1.15" - - uses: actions/checkout@v2 + go-version: ${{ matrix.go }} + - uses: actions/checkout@v3 - uses: technote-space/get-diff-action@v4 with: PATTERNS: | **/**.go go.mod go.sum - - name: Set up Go - uses: actions/setup-go@v2 - with: - go-version: 1.15 - - name: test & coverage report creation - run: | - GOARCH=${{ matrix.goarch }} go test -mod=readonly -timeout 8m -race -coverprofile=coverage.txt -covermode=atomic - if: env.GIT_DIFF - - uses: codecov/codecov-action@v1.0.15 - with: - file: ./coverage.txt + - name: Go build + run: GOOS=linux GOARCH=${{ matrix.goarch }} go build if: env.GIT_DIFF diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..24d76ea --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +testdata/ diff --git a/README.md b/README.md index 776059e..e7f2fd8 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,304 @@ -# smt +# smt -A Go library that implements a Sparse Merkle tree for a key-value map. The tree implements the same optimisations specified in the [Libra whitepaper][libra whitepaper], to reduce the number of hash operations required per tree operation to O(k) where k is the number of non-empty elements in the tree. - -[![Tests](https://github.com/pokt-network/smt/actions/workflows/test.yml/badge.svg)](https://github.com/pokt-network/smt/actions/workflows/test.yml) -[![codecov](https://codecov.io/gh/pokt-network/smt/branch/master/graph/badge.svg?token=U3GGEDSA94)](https://codecov.io/gh/pokt-network/smt) +[![Tag](https://img.shields.io/github/v/tag/pokt-network/smt.svg?sort=semver)](https://img.shields.io/github/v/tag/pokt-network/smt.svg?sort=semver) [![GoDoc](https://godoc.org/github.com/pokt-network/smt?status.svg)](https://godoc.org/github.com/pokt-network/smt) +[![Go Report Card](https://goreportcard.com/badge/github.com/pokt-network/smt)](https://goreportcard.com/report/github.com/pokt-network/smt) +[![Tests](https://github.com/pokt-network/smt/actions/workflows/test.yml/badge.svg)](https://github.com/pokt-network/smt/actions/workflows/test.yml) +[![codecov](https://codecov.io/gh/pokt-network/smt/branch/main/graph/badge.svg)](https://codecov.io/gh/pokt-network/smt) + +Note: **Requires Go 1.18+** + +- [Overview](#overview) +- [Implementation](#implementation) + - [Inner Nodes](#inner-nodes) + - [Extension Nodes](#extension-nodes) + - [Leaf Nodes](#leaf-nodes) + - [Lazy Nodes](#lazy-nodes) + - [Lazy Loading](#lazy-loading) + - [Visualisations](#visualisations) + - [General Tree Structure](#general-tree-structure) + - [Lazy Nodes](#lazy-nodes-1) +- [Paths](#paths) + - [Visualisation](#visualisation) +- [Values](#values) +- [Hashers \& Digests](#hashers--digests) +- [Proofs](#proofs) + - [Verification](#verification) +- [Database](#database) + - [Data Loss](#data-loss) +- [Example](#example) + +## Overview + +This is a Go library that implements a Sparse Merkle tree for a key-value map. The tree implements the same optimisations specified in the [Libra whitepaper][libra whitepaper], to reduce the number of hash operations required per tree operation to O(k) where k is the number of non-empty elements in the tree. + +Sparse Merkle Trees (SMTs) are efficient and secure data structures for storing key-value pairs. They use a hash-based tree structure to represent the data sparsely, saving memory. Cryptographic hash functions ensure data integrity and authenticity. SMTs enable users to prove the existence or non-existence of specific key-value pairs by constructing cryptographic proofs. These properties make SMTs valuable in applications like blockchains, decentralized databases, and authenticated data structures, providing optimized and trustworthy data storage and verification. + +## Implementation + +The SMT has 4 node types that are used to construct the tree: + +- Inner Nodes + - Prefixed `[]byte{1}` + - `digest = hash([]byte{1} + leftChild.digest + rightChild.digest)` +- Extension Nodes + - Prefixed `[]byte{2}` + - `digest = hash([]byte{2} + pathBounds + path + child.digest)` +- Leaf Nodes + - Prefixed `[]byte{0}` + - `digest = hash([]byte{0} + path + value)` +- Lazy Nodes + - Prefix of the actual node type is stored in the digest + - `digest = persistedDigest` + +### Inner Nodes + +Inner nodes represent a branch in the tree with two **non-nil** child nodes. The inner node has an internal `digest` which represents the hash of the child nodes concatenated hashes. + +### Extension Nodes + +Extension nodes represent a singly linked chain of inner nodes, with a single child. They are used to represent a common path in the tree and as such contain the path and bounds of the path they represent. The `digest` of an extension node is the hash of its path bounds, the path itself and the child nodes digest concatenated. + +### Leaf Nodes + +Leaf nodes store the full path which they represent and also the hash of the value they store. The `digest` of a leaf node is the hash of the leaf nodes path and value concatenated. + +The SMT stores only the hashes of the values in the tree, not the raw values themselves. In order to store the raw values in the underlying database the option `WithValueHasher(nil)` must be passed into the `NewSparseMerkleTree` constructor. + +### Lazy Nodes + +Lazy nodes represent uncached, persisted nodes, and as such only store the `digest` of the node. When a lazy node is accessed the node type will be determined and the full node type will be populated with any relevant fields such as its children and path. + +### Lazy Loading + +This library uses a cached, lazy-loaded tree structure to optimize performance. It optimises performance by not reading from/writing to the underlying database on each operation, deferring any underlying changes until the `Commit()` function is called. + +All nodes have a `persisted` field which signals whether they have been persisted to the underlying database or not. In practice this gives a large performance optimisation by working on cached data and not reading from/writing to the database on each operation. If a node is deleted from the tree it is marked as `orphaned` and will be deleted from the database when the `Commit()` function is called. + +Once the `Commit()` function is called the tree will delete any orphaned nodes from the database and write the key-value pairs of all the unpersisted leaf nodes' hashes and their values to the database. + +### Visualisations + +The following diagrams are representations of how the tree and its components can be visualised. + +#### General Tree Structure + +The different nodes types described above make the tree have a structure similar to the following: + +```mermaid +graph TD + subgraph Root + A["Hash(Hash(Path+Hash1)+Hash(Hash2+(Hash(Hash3+Hash4))))"] + end + subgraph BI[Inner Node] + B1["Hash(Hash2+(Hash(Hash3+Hash4)))"] + end + subgraph BE[Extension Node] + B2["Hash(Path+Hash1)"] + end + subgraph CI[Inner Node] + C1["Hash(Hash3+Hash4)"] + end + subgraph CL[Leaf Node] + C2[Hash2] + end + subgraph DL1[Leaf Node] + D1[Hash3] + end + subgraph DL2[Leaf Node] + D2[Hash4] + end + subgraph EL[Leaf Node] + E1[Hash1] + end + Root-->|0| BE + Root-->|1| BI + BI-->|0| CL + BI-->|1| CI + CI-->|0| DL1 + CI-->|1| DL2 + BE-->EL +``` + +#### Lazy Nodes + +When importing a tree via `ImportSparseMerkleTree` the tree will be lazily loaded from the root hash provided. As such the initial tree structure would contain just a single lazy node, until the tree is used and nodes have to be resolved from the database, whose digest is the root hash of the tree. + +```mermaid +graph TD + subgraph L[Lazy Node] + A[rootHash] + end + subgraph T[Tree] + L + end +``` + +If we were to resolve just this root node, we could have the following tree structure: + +```mermaid +graph TD + subgraph I[Inner Node] + A["Hash(Hash1 + Hash2)"] + end + subgraph L1[Lazy Node] + B["Hash1"] + end + subgraph L2[Lazy Node] + C["Hash2"] + end + subgraph T[Tree] + I --> L1 + I --> L2 + end +``` + +Where `Hash(Hash1 + Hash2)` is the same root hash as the previous example. + +## Paths + +Paths are **only** stored in two types of nodes: Leaf nodes and Extension nodes. + +- Extension nodes contain not only the path they represent but also the path bounds (ie. the start and end of the path they cover). +- Leaf nodes contain the full path which they represent, as well as the value stored at that path. + +Inner nodes do **not** contain a path, as they represent a branch in the tree and not a path. As such their children, _if they are extension nodes or leaf nodes_, will hold a path value. + +### Visualisation + +The following diagram shows how paths are stored in the different nodes of the tree. In the actual SMT paths are not 8 bit binary strings but are instead the returned values of the `PathHasher` (discussed below). These are then used to calculate the path bit (`0` or `1`) at any index of the path byte slice. + +```mermaid +graph LR + subgraph RI[Inner Node] + A[Root Hash] + end + subgraph I1[Inner Node] + B[Hash] + end + subgraph I2[Inner Node] + C[Hash] + end + subgraph L1[Leaf Node] + D[Path: 0b0010000] + E[Value: 0x01] + end + subgraph L3[Leaf Node] + F[Path: 0b1010000] + G[Value: 0x03] + end + subgraph L4[Leaf Node] + H[Path: 0b1100000] + I[Value: 0x04] + end + subgraph E1[Extension Node] + J[Path: 0b01100101] + K["Path Bounds: [2, 6)"] + end + subgraph L2[Leaf Node] + L[Path: 0b01100101] + M[Value: 0x02] + end + RI -->|0| I1 + RI -->|1| I2 + I1 -->|0| L1 + I1 -->|1| E1 + E1 --> L2 + I2 -->|0| L3 + I2 -->|1| L4 +``` + +## Values + +By default the SMT will use the `hasher` passed into `NewSparseMerkleTree` to hash both the keys into their paths in the tree, as well as the values. This means the data stored in a leaf node will be the hash of the value, not the value itself. + +However, if this is not desired, the two option functions `WithPathHasher` and `WithValueHasher` can be used to change the hashing function used for the keys and values respectively. + +If `nil` is passed into `WithValueHasher` functions, it will act as identity hasher and store the values unaltered in the tree. + +## Hashers & Digests + +When creating a new SMT or importing one a `hasher` is provided, typically this would be `sha256.New()` but could be any hasher implementing the go `hash.Hash` interface. By default this hasher, referred to as the `TreeHasher` will be used on both keys (to create paths) and values (to store). But separate hashers can be passed in via the option functions mentioned above. + +Whenever we do an operation on the tree, the `PathHasher` is used to hash the key and return its digest - the path. When we store a value in a leaf node we hash it using the `ValueHasher`. These digests are calculated by writing to the hasher and then calculating the checksum by calling `Sum(nil)`. + +The digests of all nodes, regardless of the `PathHasher` and `ValueHasher`s being used, will be the result of writing to the `TreeHasher` and calculating the `Sum`. The exact data hashed will depend on the type of node, this is described in the [implementation](#implementation) section. + +The following diagram represents the creation of a leaf node in an abstracted and simplified manner. + +_Note_: This diagram is not entirely accurate regarding the process of creating a leaf node, but is a good representation of the process. + +```mermaid +graph TD + subgraph L[Leaf Node] + A["Path"] + B["ValueHash"] + D["Digest"] + end + subgraph PH[Path Hasher] + E["Write(key)"] + F["Sum(nil)"] + E-->F + end + subgraph VH[Value Hasher] + G["Write(value)"] + H["Sum(nil)"] + G-->H + end + subgraph TH[Tree Hasher] + I["Write([]byte{0}+Path+ValueHash])"] + J["Sum(nil)"] + I-->J + end + subgraph KV[KV Pair] + end + KV --Key-->PH + KV --Value-->VH + PH --Path-->TH + VH --ValueHash-->TH + TH --Digest-->L + PH --Path-->L + VH --ValueHash-->L +``` + +## Proofs + +The `SparseMerkleProof` type contains the information required for membership and non-membership proofs, depending on the key provided to the tree method `Prove(key []byte)` either a membership or non-membership proof will be generated. + +The `SparseMerkleProof` type contains the relevant information required to rebuild the root hash of the tree from the given key. This information is: + +- Any side nodes +- Data of the sibling node +- Data for the unrelated leaf at the path + - This is `nil` for membership proofs, and only used for non-membership proofs + +`SparseMerkleProof`s can be compressed into `SparseCompactMerkleProof` objects, which are smaller and more efficient to store. These can be created by calling `CompactProof()` with a `SparseMerkleProof`. + +### Verification + +In order to verify a `SparseMerkleProof` the `VerifyProof` method is called with the proof, tree spec, root hash as well as the key and value (if a membership proof) that the proof is for. + +The verification step simply uses the proof data to recompute the root hash with the data provided and the digests stored in the proof. If the root hash matches the one provided then the proof is valid, otherwise it is an invalid proof. + +## Database + +This library defines the `MapStore` interface, in [mapstore.go](./mapstore.go) + +```go +type MapStore interface { + Get(key []byte) ([]byte, error) + Set(key []byte, value []byte) error + Delete(key []byte) error +} +``` + +This interface abstracts the `SimpleMap` key-value store and can be used by the SMT to store the nodes of the tree. Any key-value store that implements the `MapStore` interface can be used with this library. + +When changes are commited to the underlying database using `Commit()` the digests of the leaf nodes are stored at their respective paths. If retrieved manually from the database the returned value will be the digest of the leaf node, **not** the leaf node's value, even when `WithValueHasher(nil)` is used. The node value can be parsed from this value, as the tree `Get` function does by removing the prefix and path bytes from the returned value. + +### Data Loss + +In the event of a system crash or unexpected failure of the program utilising the SMT, if the `Commit()` function has not been called, any changes to the tree will be lost. This is due to the underlying database not being changed **until** the `Commit()` function is called and changes are persisted. ## Example @@ -19,20 +313,21 @@ import ( ) func main() { - // Initialise two new key-value store to store the nodes of the tree + // Initialise a new key-value store to store the nodes of the tree // (Note: the tree only stores hashed values, not raw value data) nodeStore := smt.NewSimpleMap() + // Initialise the tree - tree := smt.NewSMT(nodeStore, sha256.New()) + tree := smt.NewSparseMerkleTree(nodeStore, sha256.New()) // Update the key "foo" with the value "bar" _ = tree.Update([]byte("foo"), []byte("bar")) - // Generate a Merkle proof for foo=bar + // Generate a Merkle proof for "foo" proof, _ := tree.Prove([]byte("foo")) root := tree.Root() // We also need the current tree root for the proof - // Verify the Merkle proof for foo=bar + // Verify the Merkle proof for "foo"="bar" if smt.VerifyProof(proof, root, []byte("foo"), []byte("bar"), tree.Spec()) { fmt.Println("Proof verification succeeded.") } else { diff --git a/fuzz/delete/fuzz.go b/fuzz/delete/fuzz.go deleted file mode 100644 index e0fa4ee..0000000 --- a/fuzz/delete/fuzz.go +++ /dev/null @@ -1,37 +0,0 @@ -package delete - -import ( - "bytes" - "crypto/sha256" - - "github.com/pokt-network/smt" -) - -func Fuzz(data []byte) int { - if len(data) == 0 { - return -1 - } - - splits := bytes.Split(data, []byte("*")) - if len(splits) < 3 { - return -1 - } - - smn := smt.NewSimpleMap() - tree := smt.NewSparseMerkleTree(smn, sha256.New()) - for i := 0; i < len(splits)-1; i += 2 { - key, value := splits[i], splits[i+1] - tree.Update(key, value) - } - - deleteKey := splits[len(splits)-1] - err := tree.Delete(deleteKey) - newRoot := tree.Root() - if err != nil { - return 0 - } - if len(newRoot) == 0 { - panic("newRoot is nil yet err==nil") - } - return 1 -} diff --git a/fuzz/fuzz.go b/fuzz/fuzz.go deleted file mode 100644 index a9061e7..0000000 --- a/fuzz/fuzz.go +++ /dev/null @@ -1,70 +0,0 @@ -package fuzz - -import ( - "bytes" - "crypto/sha256" - "encoding/binary" - "math" - - "github.com/pokt-network/smt" -) - -func Fuzz(input []byte) int { - if len(input) < 100 { - return 0 - } - smn := smt.NewSimpleMap() - tree := smt.NewSparseMerkleTree(smn, sha256.New()) - r := bytes.NewReader(input) - var keys [][]byte - key := func() []byte { - if readByte(r) < math.MaxUint8/2 { - k := make([]byte, readByte(r)/2) - r.Read(k) - keys = append(keys, k) - return k - } - if len(keys) == 0 { - return nil - } - return keys[int(readByte(r))%len(keys)] - } - for i := 0; r.Len() != 0; i++ { - b, err := r.ReadByte() - if err != nil { - continue - } - op := op(int(b) % int(Noop)) - switch op { - case Get: - tree.Get(key()) - case Update: - value := make([]byte, 32) - binary.BigEndian.PutUint64(value, uint64(i)) - tree.Update(key(), value) - case Delete: - tree.Delete(key()) - case Prove: - tree.Prove(key()) - } - } - return 1 -} - -type op int - -const ( - Get op = iota - Update - Delete - Prove - Noop -) - -func readByte(r *bytes.Reader) byte { - b, err := r.ReadByte() - if err != nil { - return 0 - } - return b -} diff --git a/fuzz_test.go b/fuzz_test.go new file mode 100644 index 0000000..a734d56 --- /dev/null +++ b/fuzz_test.go @@ -0,0 +1,120 @@ +package smt + +import ( + "bytes" + "crypto/sha256" + "encoding/binary" + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +// FuzzSMT uses fuzzing to attempt to break the SMT implementation +// in its current state. This fuzzing test does not confirm the SMT +// functions correctly, it only trys to detect when it fails unexpectedly +func FuzzSMT_DetectUnexpectedFailures(f *testing.F) { + seeds := [][]byte{ + []byte(""), + []byte("foo"), + {1, 2, 3, 4}, + []byte("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"), + nil, + } + for _, s := range seeds { + f.Add(s) + } + f.Fuzz(func(t *testing.T, input []byte) { + smn := NewSimpleMap() + tree := NewSparseMerkleTree(smn, sha256.New()) + + r := bytes.NewReader(input) + var keys [][]byte + + // key returns a random byte to be used as a key, either generating a new + // one or using a previously generated one with a 50/50 chance of either + key := func() []byte { + b := readByte(r) + if b < math.MaxUint8/2 { + k := make([]byte, b/2) + if _, err := r.Read(k); err != nil { + return nil + } + keys = append(keys, k) + return k + } + + if len(keys) == 0 { + return nil + } + + return keys[int(b)%len(keys)] + } + + // `i` is the loop counter but also used as the input value to `Update` operations + for i := 0; r.Len() != 0; i++ { + originalRoot := tree.Root() + b, err := r.ReadByte() + if err != nil { + continue + } + + // Randomly select an operation to perform + op := op(int(b) % int(NumOps)) + switch op { + case Get: + _, err := tree.Get(key()) + if err != nil { + require.ErrorIsf(t, err, ErrKeyNotPresent, "unknown error occured while getting") + } + newRoot := tree.Root() + require.Equal(t, originalRoot, newRoot, "root changed while getting") + case Update: + value := make([]byte, 32) + binary.BigEndian.PutUint64(value, uint64(i)) + err := tree.Update(key(), value) + require.NoErrorf(t, err, "unknown error occured while updating") + newRoot := tree.Root() + require.NotEqual(t, originalRoot, newRoot, "root unchanged while updating") + case Delete: + err := tree.Delete(key()) + if err != nil { + require.ErrorIsf(t, err, ErrKeyNotPresent, "unknown error occured while deleting") + continue + } + // If the key was present check root has changed + newRoot := tree.Root() + require.NotEqual(t, originalRoot, newRoot, "root unchanged while deleting") + case Prove: + _, err := tree.Prove(key()) + if err != nil { + require.ErrorIsf(t, err, ErrKeyNotPresent, "unknown error occured while proving") + } + newRoot := tree.Root() + require.Equal(t, originalRoot, newRoot, "root changed while proving") + } + + newRoot := tree.Root() + require.Greater(t, len(newRoot), 0, "new root is empty while err is nil") + } + }) +} + +// Fuzzing helpers +type op int + +const ( + Get op = iota + Update + Delete + Prove + NumOps +) + +func readByte(r *bytes.Reader) byte { + b, err := r.ReadByte() + if err != nil { + return 0 + } + return b +} diff --git a/hasher.go b/hasher.go index ad4edd4..5fe2ae6 100644 --- a/hasher.go +++ b/hasher.go @@ -12,8 +12,8 @@ var ( ) var ( - _ PathHasher = &pathHasher{} - _ ValueHasher = &valueHasher{} + _ PathHasher = (*pathHasher)(nil) + _ ValueHasher = (*valueHasher)(nil) ) // PathHasher defines how key inputs are hashed to produce tree paths. @@ -47,10 +47,13 @@ func newTreeHasher(hasher hash.Hash) *treeHasher { return &th } +// Path returns the digest of a key produced by the path hasher func (ph *pathHasher) Path(key []byte) []byte { return ph.digest(key)[:ph.PathSize()] } +// PathSize returns the length (in bytes) of digests produced by the path hasher +// which is the length of any path in the tree func (ph *pathHasher) PathSize() int { return ph.hasher.Size() } diff --git a/mapstore.go b/mapstore.go index 5fe381b..0182bca 100644 --- a/mapstore.go +++ b/mapstore.go @@ -6,9 +6,9 @@ import ( // MapStore is a key-value store. type MapStore interface { - Get(key []byte) ([]byte, error) // Get gets the value for a key. - Set(key []byte, value []byte) error // Set updates the value for a key. - Delete(key []byte) error // Delete deletes a key. + Get(key []byte) ([]byte, error) // Get gets the value for a key. + Set(key, value []byte) error // Set updates the value for a key. + Delete(key []byte) error // Delete deletes a key. } // InvalidKeyError is thrown when a key that does not exist is being accessed. diff --git a/options.go b/options.go index 364d4bc..4a2a4f1 100644 --- a/options.go +++ b/options.go @@ -3,10 +3,12 @@ package smt // Option is a function that configures SparseMerkleTree. type Option func(*SMT) +// WithPathHasher returns an Option that sets the PathHasher to the one provided func WithPathHasher(ph PathHasher) Option { return func(smt *SMT) { smt.ph = ph } } +// WithValueHasher returns an Option that sets the ValueHasher to the one provided func WithValueHasher(vh ValueHasher) Option { return func(smt *SMT) { smt.vh = vh } } diff --git a/oss-fuzz-build.sh b/oss-fuzz-build.sh deleted file mode 100755 index 5b3d663..0000000 --- a/oss-fuzz-build.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -eu - -export FUZZ_ROOT="github.com/pokt-network/smt" - -compile_go_fuzzer "$FUZZ_ROOT"/fuzz Fuzz fuzz_basic_op fuzz -compile_go_fuzzer "$FUZZ_ROOT"/fuzz/delete Fuzz fuzz_delete fuzz diff --git a/reviewpad.yml b/reviewpad.yml index f5a701c..953406b 100644 --- a/reviewpad.yml +++ b/reviewpad.yml @@ -20,60 +20,6 @@ labels: # A workflow is a list of actions that will be executed based on the defined rules. # For more details see https://docs.reviewpad.com/guides/syntax#workflow. workflows: - # This workflow calls Reviewpad AI agent to summarize the pull request. - - name: summarize - description: Summarize the pull request - run: - # Summarize the pull request on pull request synchronization. - - if: ($eventType() == "synchronize" || $eventType() == "opened") && $state() == "open" - then: $summarize() - - # This workflow assigns the most relevant reviewer to pull requests. - # This helps guarantee that most pull requests are reviewed by at least one person. - - name: reviewer-assignment - description: Assign the most relevant reviewer to pull requests - run: - # Automatically assign reviewer when the pull request is ready for review; - - if: $isDraft() == false - then: $assignCodeAuthorReviewers() - - # This workflow praises contributors on their pull request contributions. - # This helps contributors feel appreciated. - - name: praise-contributors-on-milestones - description: Praise contributors based on their contributions - run: - # Praise contributors on their first pull request. - - if: $pullRequestCountBy($author()) == 1 - then: $commentOnce($sprintf("Thank you @%s for this first contribution!", [$author()])) - - # This workflow validates that pull requests follow the conventional commits specification. - # This helps developers automatically generate changelogs. - # For more details, see https://www.conventionalcommits.org/en/v1.0.0/. - - name: check-conventional-commits - description: Validate that pull requests follow the conventional commits - run: - - if: $isDraft() == false - then: - # Check commits messages against the conventional commits specification - - $commitLint() - # Check pull request title against the conventional commits specification. - - $titleLint() - - # This workflow validates best practices for pull request management. - # This helps developers follow best practices. - - name: best-practices - description: Validate best practices for pull request management - run: - # Warn pull requests that do not have an associated GitHub issue. - - if: $hasLinkedIssues() == false - then: $warn("Please link an issue to the pull request") - # Warn pull requests if their description is empty. - - if: $description() == "" - then: $warn("Please provide a description for the pull request") - # Warn pull request do not have a clean linear history. - - if: $hasLinearHistory() == false - then: $warn("Please rebase your pull request on the latest changes") - # This workflow labels pull requests based on the total number of lines changed. # This helps pick pull requests based on their size and to incentivize small pull requests. - name: size-labeling diff --git a/smt.go b/smt.go index da9d40d..06c4bd4 100644 --- a/smt.go +++ b/smt.go @@ -6,8 +6,8 @@ import ( ) var ( - _ treeNode = &innerNode{} - _ treeNode = &leafNode{} + _ treeNode = (*innerNode)(nil) + _ treeNode = (*leafNode)(nil) ) type treeNode interface { @@ -62,6 +62,7 @@ type SMT struct { // Hashes of persisted nodes deleted from tree type orphanNodes = [][]byte +// NewSparseMerkleTree returns a new pointer to an SMT struct, and applys any options provided func NewSparseMerkleTree(nodes MapStore, hasher hash.Hash, options ...Option) *SMT { smt := SMT{ TreeSpec: newTreeSpec(hasher), @@ -73,6 +74,7 @@ func NewSparseMerkleTree(nodes MapStore, hasher hash.Hash, options ...Option) *S return &smt } +// ImportSparseMerkleTree returns a pointer to an SMT struct with the provided root hash func ImportSparseMerkleTree(nodes MapStore, hasher hash.Hash, root []byte, options ...Option) *SMT { smt := NewSparseMerkleTree(nodes, hasher, options...) smt.tree = &lazyNode{root} @@ -80,6 +82,7 @@ func ImportSparseMerkleTree(nodes MapStore, hasher hash.Hash, root []byte, optio return smt } +// Get returns the digest of the value stored at the given key func (smt *SMT) Get(key []byte) ([]byte, error) { path := smt.ph.Path(key) var leaf *leafNode @@ -122,6 +125,7 @@ func (smt *SMT) Get(key []byte) ([]byte, error) { return leaf.valueHash, nil } +// Update sets the value for the given key, to the digest of the provided value func (smt *SMT) Update(key []byte, value []byte) error { path := smt.ph.Path(key) valueHash := smt.digestValue(value) @@ -203,6 +207,7 @@ func (smt *SMT) update( return node, nil } +// Delete removes the node at the path corresponding to the given key func (smt *SMT) Delete(key []byte) error { path := smt.ph.Path(key) var orphans orphanNodes @@ -294,6 +299,7 @@ func (smt *SMT) delete(node treeNode, depth int, path []byte, orphans *orphanNod return node, nil } +// Prove generates a SparseMerkleProof for the given key func (smt *SMT) Prove(key []byte) (proof SparseMerkleProof, err error) { path := smt.ph.Path(key) var siblings []treeNode @@ -370,6 +376,7 @@ func (smt *SMT) Prove(key []byte) (proof SparseMerkleProof, err error) { return } +//nolint:unused func (smt *SMT) recursiveLoad(hash []byte) (treeNode, error) { return smt.resolve(hash, smt.recursiveLoad) } @@ -428,6 +435,8 @@ func (smt *SMT) resolve(hash []byte, resolver func([]byte) (treeNode, error), return &inner, nil } +// Commit persists all dirty nodes in the tree, deletes all orphaned +// nodes from the database and then computes and saves the root hash func (smt *SMT) Commit() (err error) { // All orphans are persisted and have cached digests, so we don't need to check for null for _, orphans := range smt.orphans { @@ -517,6 +526,7 @@ func (ext *extensionNode) match(path []byte, depth int) (int, bool) { return ext.length(), true } +//nolint:unused func (ext *extensionNode) commonPrefix(path []byte) int { count := 0 for i := ext.pathStart(); i < ext.pathEnd(); i++ { diff --git a/smt_proofs_test.go b/smt_proofs_test.go index 59f4b2b..c378dfb 100644 --- a/smt_proofs_test.go +++ b/smt_proofs_test.go @@ -92,10 +92,14 @@ func TestProofsSanityCheck(t *testing.T) { smt := NewSMTWithStorage(smn, smv, sha256.New()) base := smt.Spec() - smt.Update([]byte("testKey1"), []byte("testValue1")) - smt.Update([]byte("testKey2"), []byte("testValue2")) - smt.Update([]byte("testKey3"), []byte("testValue3")) - smt.Update([]byte("testKey4"), []byte("testValue4")) + err := smt.Update([]byte("testKey1"), []byte("testValue1")) + require.NoError(t, err) + err = smt.Update([]byte("testKey2"), []byte("testValue2")) + require.NoError(t, err) + err = smt.Update([]byte("testKey3"), []byte("testValue3")) + require.NoError(t, err) + err = smt.Update([]byte("testKey4"), []byte("testValue4")) + require.NoError(t, err) root := smt.Root() // Case: invalid number of sidenodes. @@ -108,7 +112,7 @@ func TestProofsSanityCheck(t *testing.T) { require.False(t, proof.sanityCheck(base)) result := VerifyProof(proof, root, []byte("testKey1"), []byte("testValue1"), base) require.False(t, result) - _, err := CompactProof(proof, base) + _, err = CompactProof(proof, base) require.Error(t, err) // Case: incorrect size for NonMembershipLeafData. diff --git a/smt_test.go b/smt_test.go index 30d2c51..e683c4b 100644 --- a/smt_test.go +++ b/smt_test.go @@ -146,7 +146,7 @@ func TestTreeDeleteBasic(t *testing.T) { err = smt.Update([]byte("foo"), []byte("testValue")) require.NoError(t, err) - value, err = smt.GetValue([]byte("foo")) + _, err = smt.GetValue([]byte("foo")) require.NoError(t, err) err = smt.Delete([]byte("foo")) @@ -201,7 +201,7 @@ func TestTreeKnownPath(t *testing.T) { baseKey := make([]byte, ph.PathSize()) keys := make([][]byte, 7) - for i, _ := range keys { + for i := range keys { keys[i] = make([]byte, ph.PathSize()) copy(keys[i], baseKey) } diff --git a/smt_testutil.go b/smt_testutil.go index 4cb6943..8a5b165 100644 --- a/smt_testutil.go +++ b/smt_testutil.go @@ -35,7 +35,7 @@ func (smt *SMTWithStorage) Delete(key []byte) error { // Get gets the value of a key from the tree. func (smt *SMTWithStorage) GetValue(key []byte) ([]byte, error) { - valueHash, err := smt.SMT.Get(key) + valueHash, err := smt.Get(key) if err != nil { return nil, err } diff --git a/types.go b/types.go index 9283226..a027320 100644 --- a/types.go +++ b/types.go @@ -12,6 +12,7 @@ const ( var ( defaultValue []byte = nil + // ErrKeyNotPresent is returned when a key is not present in the tree. ErrKeyNotPresent = errors.New("key already empty") )