From e38ce1c6e879529565f0949d7a50ca850cffd21c Mon Sep 17 00:00:00 2001 From: Dmitry Kolesnikov Date: Sun, 3 Mar 2024 22:34:20 +0200 Subject: [PATCH] support permanent storage --- README.md | 4 ++- examples/main.go | 2 +- hashmap.go | 62 ++++++++++++++++++++++++++++++++++++++++----- symbol_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++++---- types.go | 6 +++++ 5 files changed, 127 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1078358..9cf2462 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ The latest version of the library is available at `main` branch of this reposito import "github.com/kshard/atom" // Create new atoms table -atoms := atom.New(atom.NewMemMap()) +atoms := atom.New(atom.NewEphemeralMap()) // Convert string to atom code := atoms.Atom("String interning") @@ -39,6 +39,8 @@ code := atoms.Atom("String interning") atoms.String(code) ``` +Use `atom.NewPermanentMap()` to implement atoms backed by either local permanent or external storage. + ## How To Contribute The library is [MIT](LICENSE) licensed and accepts contributions via GitHub pull requests: diff --git a/examples/main.go b/examples/main.go index ed7bc22..25a9186 100644 --- a/examples/main.go +++ b/examples/main.go @@ -20,7 +20,7 @@ const n = 1000000 func main() { var err error - atoms := atom.New(atom.NewMemMap()) + atoms := atom.New(atom.NewEphemeralMap()) ids := make([]atom.Atom, n) for i := 0; i < n; i++ { diff --git a/hashmap.go b/hashmap.go index 0ab6d62..c1e9934 100644 --- a/hashmap.go +++ b/hashmap.go @@ -8,20 +8,26 @@ package atom -import "sync" +import ( + "encoding/binary" + "sync" + "unsafe" +) -type hashmap struct { +//------------------------------------------------------------------------------ + +type ephemeral struct { sync.RWMutex kv map[Atom]string } -func NewMemMap() HashMap { - return &hashmap{ +func NewEphemeralMap() HashMap { + return &ephemeral{ kv: make(map[Atom]string), } } -func (m *hashmap) Get(key Atom) (string, error) { +func (m *ephemeral) Get(key Atom) (string, error) { m.RLock() val, has := m.kv[key] m.RUnlock() @@ -32,10 +38,54 @@ func (m *hashmap) Get(key Atom) (string, error) { return val, nil } -func (m *hashmap) Put(key Atom, val string) error { +func (m *ephemeral) Put(key Atom, val string) error { m.Lock() m.kv[key] = val m.Unlock() return nil } + +//------------------------------------------------------------------------------ + +type permanent struct { + store Store +} + +func NewPermanentMap(store Store) HashMap { + return &permanent{store: store} +} + +func (m *permanent) Get(key Atom) (string, error) { + var bkey [5]byte + bkey[0] = ':' + binary.LittleEndian.PutUint32(bkey[1:], key) + + val, err := m.store.Get(bkey[:]) + if err != nil { + return "", err + } + + // This is copied from runtime. It relies on the string + // header being a prefix of the slice header! + str := *(*string)(unsafe.Pointer(&val)) + + return str, nil +} + +func (m *permanent) Put(key Atom, val string) error { + var bkey [5]byte + bkey[0] = ':' + binary.LittleEndian.PutUint32(bkey[1:], key) + + // This is copied from runtime. It relies on the string + // header being a prefix of the slice header! + bval := *(*[]byte)(unsafe.Pointer(&val)) + + err := m.store.Put(bkey[:], bval) + if err != nil { + return err + } + + return nil +} diff --git a/symbol_test.go b/symbol_test.go index 91602fb..0dc8447 100644 --- a/symbol_test.go +++ b/symbol_test.go @@ -1,6 +1,7 @@ package atom_test import ( + "bytes" "strconv" "testing" "time" @@ -14,8 +15,29 @@ const ( sc = "interning" ) -func TestPut(t *testing.T) { - s := atom.New(atom.NewMemMap()) +func TestEphemeralPut(t *testing.T) { + s := atom.New(atom.NewEphemeralMap()) + + for val, expected := range map[string]uint32{ + sa: 1247594388, + sb: 3572195896, + sc: 1304336027, + } { + sym, err := s.Atom(val) + if err != nil { + t.Errorf("failed to assign symbol: %s", err) + } + if sym != expected { + t.Errorf("failed to assign symbol: %d, expected %d", sym, expected) + } + if val != s.String(sym) { + t.Errorf("failed to lookup string") + } + } +} + +func TestPermanentPut(t *testing.T) { + s := atom.New(atom.NewPermanentMap(&none{})) for val, expected := range map[string]uint32{ sa: 1247594388, @@ -38,8 +60,21 @@ func TestPut(t *testing.T) { // --------------------------------------------------------------- // go test -run=^$ -bench=. -cpu=1 -benchtime=10s -cpuprofile profile.out -func BenchmarkPut(b *testing.B) { - s := atom.New(atom.NewMemMap()) +func BenchmarkEphemeralPut(b *testing.B) { + s := atom.New(atom.NewEphemeralMap()) + + b.ReportAllocs() + b.ResetTimer() + + t := time.Now().Nanosecond() + + for n := 0; n < b.N; n++ { + s.Atom("https://pkg.go.dev/hash/fnv@go1.20." + strconv.Itoa(t+n)) + } +} + +func BenchmarkPermanentPut(b *testing.B) { + s := atom.New(atom.NewPermanentMap(&none{})) b.ReportAllocs() b.ResetTimer() @@ -55,7 +90,7 @@ func BenchmarkPut(b *testing.B) { // go test -fuzz=FuzzSymbolOf func FuzzSymbolOf(f *testing.F) { - s := atom.New(atom.NewMemMap()) + s := atom.New(atom.NewEphemeralMap()) f.Add("abc") @@ -66,3 +101,24 @@ func FuzzSymbolOf(f *testing.F) { } }) } + +// --------------------------------------------------------------- + +type none struct { + key []byte + val []byte +} + +func (n *none) Get(key []byte) ([]byte, error) { + if bytes.Equal(n.key, key) { + return n.val, nil + } + + return nil, nil +} + +func (n *none) Put(key []byte, val []byte) error { + n.key = key + n.val = val + return nil +} diff --git a/types.go b/types.go index bed9f29..839a07a 100644 --- a/types.go +++ b/types.go @@ -21,3 +21,9 @@ type HashMap interface { Getter Putter } + +// abstraction of permanent storage +type Store interface { + Get([]byte) ([]byte, error) + Put([]byte, []byte) error +}