diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1de0967c7..82740a846 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v3 - name: test & coverage report creation run: | - CGO_ENABLED=1 go test ./... -mod=readonly -timeout 8m -race -coverprofile=coverage.txt -covermode=atomic -tags=memdb,goleveldb,cleveldb,boltdb,rocksdb,badgerdb -v + CGO_ENABLED=1 go test ./... -mod=readonly -timeout 8m -race -coverprofile=coverage.txt -covermode=atomic -tags=memdb,goleveldb,cleveldb,boltdb,rocksdb,badgerdb,mdbx -v - uses: codecov/codecov-action@v3 with: file: ./coverage.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 9654735be..372484f19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased - remove mutex from prefixdb +- Add mdbx backend. ## 0.6.7 diff --git a/db.go b/db.go index ef573f17e..0a10b6d4c 100644 --- a/db.go +++ b/db.go @@ -35,6 +35,8 @@ const ( RocksDBBackend BackendType = "rocksdb" BadgerDBBackend BackendType = "badgerdb" + + MDBXBackend BackendType = "mdbx" ) type dbCreator func(name string, dir string) (DB, error) diff --git a/go.mod b/go.mod index 1395b1d16..fbe599cdb 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/tendermint/tm-db go 1.17 require ( + github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b github.com/cosmos/gorocksdb v1.2.0 github.com/dgraph-io/badger/v3 v3.2103.2 github.com/gogo/protobuf v1.3.2 @@ -10,6 +11,7 @@ require ( github.com/jmhodges/levigo v1.0.0 github.com/stretchr/testify v1.8.0 github.com/syndtr/goleveldb v1.0.1-0.20200815110645-5c35d600f0ca + github.com/torquem-ch/mdbx-go v0.26.0 go.etcd.io/bbolt v1.3.6 google.golang.org/grpc v1.48.0 ) diff --git a/go.sum b/go.sum index 0d1afaab2..e8c7c630b 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,8 @@ github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b h1:6+ZFm0flnudZzdSE0JxlhR2hKnGPcNB35BjQf4RYQDY= +github.com/c2h5oh/datasize v0.0.0-20220606134207-859f65c6625b/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= @@ -144,6 +146,8 @@ github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PK github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/syndtr/goleveldb v1.0.1-0.20200815110645-5c35d600f0ca h1:Ld/zXl5t4+D69SiV4JoN7kkfvJdOWlPpfxrzxpLMoUk= github.com/syndtr/goleveldb v1.0.1-0.20200815110645-5c35d600f0ca/go.mod h1:u2MKkTVTVJWe5D1rCvame8WqhBd88EuIwODJZ1VHCPM= +github.com/torquem-ch/mdbx-go v0.26.0 h1:d8ph2MsVZoBZr0eFWHRiSYjoCXggED6XzcspUX/HsZM= +github.com/torquem-ch/mdbx-go v0.26.0/go.mod h1:T2fsoJDVppxfAPTLd1svUgH1kpPmeXdPESmroSHcL1E= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/mdbx.go b/mdbx.go new file mode 100644 index 000000000..2aff8d3bb --- /dev/null +++ b/mdbx.go @@ -0,0 +1,215 @@ +//go:build mdbx +// +build mdbx + +package db + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + + "github.com/c2h5oh/datasize" + "github.com/torquem-ch/mdbx-go/mdbx" +) + +func init() { + registerDBCreator(MDBXBackend, NewMDBX, false) +} + +type MDBX struct { + Env *mdbx.Env + DBI mdbx.DBI +} + +var _ DB = (*MDBX)(nil) + +func NewMDBX(name string, dir string) (DB, error) { + path := filepath.Join(dir, name) + ".db" + env, err := mdbx.NewEnv() + if err != nil { + return nil, err + } + env.SetGeometry(-1, -1, int(3*datasize.TB), int(2*datasize.GB), -1, int(DefaultPageSize())) + if err := env.Open(path, 0, 0644); err != nil { + return nil, err + } + var dbi mdbx.DBI + if err := env.View(func(txn *mdbx.Txn) error { + dbi, err = txn.OpenRoot(0) + if err != nil { + return err + } + return nil + }); err != nil { + return nil, err + } + return &MDBX{Env: env, DBI: dbi}, nil +} + +func (db *MDBX) Get(key []byte) ([]byte, error) { + if len(key) == 0 { + return nil, errKeyEmpty + } + + var value []byte + if err := db.Env.View(func(txn *mdbx.Txn) error { + v, err := txn.Get(db.DBI, key) + if err != nil { + if mdbx.IsNotFound(err) { + return nil + } + return err + } + value = v + return nil + }); err != nil { + return nil, err + } + return value, nil +} + +func (db *MDBX) Has(key []byte) (bool, error) { + if len(key) == 0 { + return false, errKeyEmpty + } + + result := false + if err := db.Env.View(func(txn *mdbx.Txn) error { + // zero-copy + txn.RawRead = true + + itr, err := txn.OpenCursor(db.DBI) + if err != nil { + return err + } + defer itr.Close() + + _, _, err = itr.Get(key, nil, mdbx.Set) + if err != nil { + if mdbx.IsNotFound(err) { + return nil + } + return err + } + result = true + return nil + }); err != nil { + return false, err + } + return result, nil +} + +func (db *MDBX) Set(key []byte, value []byte) error { + if len(key) == 0 { + return errKeyEmpty + } + if value == nil { + return errValueNil + } + + return db.Env.Update(func(txn *mdbx.Txn) error { + return txn.Put(db.DBI, key, value, 0) + }) +} + +func (db *MDBX) SetSync(key []byte, value []byte) error { + if err := db.Set(key, value); err != nil { + return err + } + return db.Env.Sync(true, false) +} + +func (db *MDBX) Delete(key []byte) error { + if len(key) == 0 { + return errKeyEmpty + } + + err := db.Env.Update(func(txn *mdbx.Txn) error { + return txn.Del(db.DBI, key, nil) + }) + if err != nil && mdbx.IsNotFound(err) { + return nil + } + return err +} + +func (db *MDBX) DeleteSync(key []byte) error { + if err := db.Delete(key); err != nil { + return err + } + return db.Env.Sync(true, false) +} + +func (db *MDBX) Close() error { + db.Env.CloseDBI(db.DBI) + db.Env.Close() + return nil +} + +func (db *MDBX) Print() error { + itr, err := db.Iterator(nil, nil) + if err != nil { + return err + } + defer itr.Close() + for ; itr.Valid(); itr.Next() { + key := itr.Key() + value := itr.Value() + fmt.Printf("[%X]:\t[%X]\n", key, value) + } + return nil +} + +func (db *MDBX) Stats() map[string]string { + stat, err := db.Env.Stat() + if err != nil { + return nil + } + return map[string]string{ + "mdbx.psize": strconv.FormatUint(uint64(stat.PSize), 10), + "mdbx.depth": strconv.FormatUint(uint64(stat.Depth), 10), + "mdbx.branch_pages": strconv.FormatUint(stat.BranchPages, 10), + "mdbx.leaf_pages": strconv.FormatUint(stat.LeafPages, 10), + "mdbx.overflow_pages": strconv.FormatUint(stat.OverflowPages, 10), + "mdbx.entries": strconv.FormatUint(stat.Entries, 10), + "mdbx.last_tx_id": strconv.FormatUint(stat.LastTxId, 10), + } +} + +func (db *MDBX) NewBatch() Batch { + return newMDBXBatch(db) +} + +func (db *MDBX) Iterator(start, end []byte) (Iterator, error) { + if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { + return nil, errKeyEmpty + } + txn, err := db.Env.BeginTxn(nil, mdbx.Readonly) + if err != nil { + return nil, err + } + return newMDBXIterator(db.DBI, txn, start, end, false) +} + +func (db *MDBX) ReverseIterator(start, end []byte) (Iterator, error) { + if (start != nil && len(start) == 0) || (end != nil && len(end) == 0) { + return nil, errKeyEmpty + } + txn, err := db.Env.BeginTxn(nil, mdbx.Readonly) + if err != nil { + return nil, err + } + return newMDBXIterator(db.DBI, txn, start, end, true) +} + +func DefaultPageSize() uint64 { + osPageSize := os.Getpagesize() + if osPageSize < 4096 { // reduce further may lead to errors (because some data is just big) + osPageSize = 4096 + } else if osPageSize > mdbx.MaxPageSize { + osPageSize = mdbx.MaxPageSize + } + osPageSize = osPageSize / 4096 * 4096 // ensure it's rounded + return uint64(osPageSize) +} diff --git a/mdbx_batch.go b/mdbx_batch.go new file mode 100644 index 000000000..0f610090c --- /dev/null +++ b/mdbx_batch.go @@ -0,0 +1,86 @@ +//go:build mdbx +// +build mdbx + +package db + +import "github.com/torquem-ch/mdbx-go/mdbx" + +// MDBXBatch stores operations internally and dumps them to MDBX on Write(). +type MDBXBatch struct { + db *MDBX + ops []operation +} + +var _ Batch = (*MDBXBatch)(nil) + +func newMDBXBatch(db *MDBX) *MDBXBatch { + return &MDBXBatch{ + db: db, + ops: []operation{}, + } +} + +// Set implements Batch. +func (b *MDBXBatch) Set(key, value []byte) error { + if len(key) == 0 { + return errKeyEmpty + } + if value == nil { + return errValueNil + } + if b.ops == nil { + return errBatchClosed + } + b.ops = append(b.ops, operation{opTypeSet, key, value}) + return nil +} + +// Delete implements Batch. +func (b *MDBXBatch) Delete(key []byte) error { + if len(key) == 0 { + return errKeyEmpty + } + if b.ops == nil { + return errBatchClosed + } + b.ops = append(b.ops, operation{opTypeDelete, key, nil}) + return nil +} + +// Write implements Batch. +func (b *MDBXBatch) Write() error { + if b.ops == nil { + return errBatchClosed + } + err := b.db.Env.Update(func(txn *mdbx.Txn) error { + for _, op := range b.ops { + switch op.opType { + case opTypeSet: + if err := txn.Put(b.db.DBI, op.key, op.value, 0); err != nil { + return err + } + case opTypeDelete: + if err := txn.Del(b.db.DBI, op.key, nil); err != nil { + return err + } + } + } + return nil + }) + if err != nil { + return err + } + // Make sure batch cannot be used afterwards. Callers should still call Close(), for errors. + return b.Close() +} + +// WriteSync implements Batch. +func (b *MDBXBatch) WriteSync() error { + return b.Write() +} + +// Close implements Batch. +func (b *MDBXBatch) Close() error { + b.ops = nil + return nil +} diff --git a/mdbx_iterator.go b/mdbx_iterator.go new file mode 100644 index 000000000..c2120d979 --- /dev/null +++ b/mdbx_iterator.go @@ -0,0 +1,160 @@ +//go:build mdbx +// +build mdbx + +package db + +import ( + "bytes" + + "github.com/torquem-ch/mdbx-go/mdbx" +) + +// mdbxIterator allows you to iterate on range of keys/values given some +// start / end keys (nil & nil will result in doing full scan). +type mdbxIterator struct { + tx *mdbx.Txn + + itr *mdbx.Cursor + start []byte + end []byte + + currentKey []byte + currentValue []byte + + isInvalid bool + isReverse bool + + err error +} + +var _ Iterator = (*mdbxIterator)(nil) + +// newMDBXIterator creates a new mdbxIterator. +func newMDBXIterator(dbi mdbx.DBI, tx *mdbx.Txn, start, end []byte, isReverse bool) (*mdbxIterator, error) { + itr, err := tx.OpenCursor(dbi) + if err != nil { + return nil, err + } + + var ck, cv []byte + if isReverse { + switch { + case end == nil: + ck, cv, err = itr.Get(nil, nil, mdbx.Last) + default: + _, _, err = itr.Get(end, nil, mdbx.SetRange) + if err == nil { + ck, cv, err = itr.Get(nil, nil, mdbx.Prev) + } else if mdbx.IsNotFound(err) { + ck, cv, err = itr.Get(nil, nil, mdbx.Last) + } + } + } else { + switch { + case start == nil: + ck, cv, err = itr.Get(nil, nil, mdbx.First) + default: + ck, cv, err = itr.Get(start, nil, mdbx.SetRange) + } + } + if mdbx.IsNotFound(err) { + err = nil + } + + return &mdbxIterator{ + tx: tx, + itr: itr, + start: start, + end: end, + currentKey: ck, + currentValue: cv, + isReverse: isReverse, + isInvalid: false, + err: err, + }, nil +} + +// Domain implements Iterator. +func (itr *mdbxIterator) Domain() ([]byte, []byte) { + return itr.start, itr.end +} + +// Valid implements Iterator. +func (itr *mdbxIterator) Valid() bool { + if itr.isInvalid { + return false + } + + if itr.Error() != nil { + itr.isInvalid = true + return false + } + + // iterated to the end of the cursor + if itr.currentKey == nil { + itr.isInvalid = true + return false + } + + if itr.isReverse { + if itr.start != nil && bytes.Compare(itr.currentKey, itr.start) < 0 { + itr.isInvalid = true + return false + } + } else { + if itr.end != nil && bytes.Compare(itr.end, itr.currentKey) <= 0 { + itr.isInvalid = true + return false + } + } + + // Valid + return true +} + +// Next implements Iterator. +func (itr *mdbxIterator) Next() { + var err error + itr.assertIsValid() + if itr.isReverse { + itr.currentKey, itr.currentValue, err = itr.itr.Get(nil, nil, mdbx.Prev) + } else { + itr.currentKey, itr.currentValue, err = itr.itr.Get(nil, nil, mdbx.Next) + } + if !mdbx.IsNotFound(err) { + itr.err = err + } +} + +// Key implements Iterator. +func (itr *mdbxIterator) Key() []byte { + itr.assertIsValid() + return append([]byte{}, itr.currentKey...) +} + +// Value implements Iterator. +func (itr *mdbxIterator) Value() []byte { + itr.assertIsValid() + var value []byte + if itr.currentValue != nil { + value = append([]byte{}, itr.currentValue...) + } + return value +} + +// Error implements Iterator. +func (itr *mdbxIterator) Error() error { + return itr.err +} + +// Close implements Iterator. +func (itr *mdbxIterator) Close() error { + itr.tx.Abort() + return nil +} + +func (itr *mdbxIterator) assertIsValid() { + if !itr.Valid() { + panic("iterator is invalid") + } +} diff --git a/mdbx_test.go b/mdbx_test.go new file mode 100644 index 000000000..d69652df1 --- /dev/null +++ b/mdbx_test.go @@ -0,0 +1,47 @@ +//go:build mdbx +// +build mdbx + +package db + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMDBXNewMDBX(t *testing.T) { + name := fmt.Sprintf("test_%x", randStr(12)) + dir := os.TempDir() + defer cleanupDBDir(dir, name) + + db, err := NewMDBX(name, dir) + require.NoError(t, err) + db.Close() +} + +func TestWithMDBX(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "mdbx") + + db, err := NewMDBX(path, "") + require.NoError(t, err) + + t.Run("MDBX", func(t *testing.T) { Run(t, db) }) +} + +func BenchmarkMDBXRandomReadsWrites(b *testing.B) { + name := fmt.Sprintf("test_%x", randStr(12)) + db, err := NewMDBX(name, "") + if err != nil { + b.Fatal(err) + } + defer func() { + db.Close() + cleanupDBDir("", name) + }() + + benchmarkRandomReadsWrites(b, db) +}