-
Notifications
You must be signed in to change notification settings - Fork 220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replace badgerhold with directly using BadgerDB v4 #376
Conversation
Pull Request Test Coverage Report for Build 8466241881Details
💛 - Coveralls |
@mochi-co Not ready to merge yet. |
I believe there are no changes needed on my end. Please review and merge. I came across some information about Badger GC here: dgraph-io/badger#767. Perhaps there's no issue with the GC. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't used Badger before, so can't provide too much feedback. I did look into the the compaction/GC, that part looks good.
One feedback is current Marshal/UnMarshal is using JSON, should we consider changing to gob?
@thedevop If considering serialization, I don't think gob would be the best choice. Please refer to: https://github.com/alecthomas/go_serialization_benchmarks. Personally, I believe sticking to the standard library would suffice. When it comes to choosing a serialization library, different users may have different preferences, much like when we chose a logging library before. |
Understand. I was just mentioning as we have binary data, and json is not efficient as it use base64 encoding. I normally would not recommend gob (as it is only for Go) for any network based service, but the use case here maybe appropriate as it is just to store/load what was in-memory for the app. BTW, gob is part of standard Go library. |
I've noticed that badgerhold is using gob. If we decide to use gob, then simply updating all UnmarshalBinary and MarshalBinary under the storage package to use gob would suffice. @thedevop I think it might be better for you to submit another PR for this change. |
I can do that, let's hear from @mochi-co first. |
@thedevop @mochi-co I've written a benchmark test case. You can refer to it and check if there's something wrong.
package serialization
import (
"bytes"
"encoding/gob"
"encoding/json"
"fmt"
"math"
"math/rand"
"testing"
)
// UserProperty is an arbitrary key-value pair for a packet user properties array.
type UserProperty struct { // [MQTT-1.5.7-1]
Key string `json:"k"`
Val string `json:"v"`
}
// Client is a storable representation of an MQTT client.
type Client struct {
Will ClientWill `json:"will"` // will topic and payload data if applicable
Properties ClientProperties `json:"properties"` // the connect properties for the client
Username []byte `json:"username"` // the username of the client
ID string `json:"id" storm:"id"` // the client id / storage key
T string `json:"t"` // the data type (client)
Remote string `json:"remote"` // the remote address of the client
Listener string `json:"listener"` // the listener the client connected on
ProtocolVersion byte `json:"protocolVersion"` // mqtt protocol version of the client
Clean bool `json:"clean"` // if the client requested a clean start/session
}
// ClientProperties contains a limited set of the mqtt v5 properties specific to a client connection.
type ClientProperties struct {
AuthenticationData []byte `json:"authenticationData"`
User []UserProperty `json:"user"`
AuthenticationMethod string `json:"authenticationMethod"`
SessionExpiryInterval uint32 `json:"sessionExpiryInterval"`
MaximumPacketSize uint32 `json:"maximumPacketSize"`
ReceiveMaximum uint16 `json:"receiveMaximum"`
TopicAliasMaximum uint16 `json:"topicAliasMaximum"`
SessionExpiryIntervalFlag bool `json:"sessionExpiryIntervalFlag"`
RequestProblemInfo byte `json:"requestProblemInfo"`
RequestProblemInfoFlag bool `json:"requestProblemInfoFlag"`
RequestResponseInfo byte `json:"requestResponseInfo"`
}
// ClientWill contains a will message for a client, and limited mqtt v5 properties.
type ClientWill struct {
Payload []byte `json:"payload"`
User []UserProperty `json:"user"`
TopicName string `json:"topicName"`
Flag uint32 `json:"flag"`
WillDelayInterval uint32 `json:"willDelayInterval"`
Qos byte `json:"qos"`
Retain bool `json:"retain"`
}
type Serializer interface {
Marshal(o interface{}) ([]byte, error)
Unmarshal(d []byte, o interface{}) error
}
func randString(l int) string {
buf := make([]byte, l)
for i := 0; i < (l+1)/2; i++ {
buf[i] = byte(rand.Intn(256))
}
return fmt.Sprintf("%x", buf)[:l]
}
func generate() []*Client {
a := make([]*Client, 0, 1000)
for i := 0; i < 1000; i++ {
a = append(a, &Client{
ID: randString(16),
Properties: ClientProperties{
MaximumPacketSize: math.MaxUint32,
},
Will: ClientWill{
Payload: []byte(randString(256)),
},
})
}
return a
}
func benchMarshal(b *testing.B, s Serializer) {
b.Helper()
data := generate()
b.ReportAllocs()
b.ResetTimer()
var serialSize int
for i := 0; i < b.N; i++ {
o := data[rand.Intn(len(data))]
bytes, err := s.Marshal(o)
if err != nil {
b.Fatalf("marshal error %s for %#v", err, o)
}
serialSize += len(bytes)
}
b.ReportMetric(float64(serialSize)/float64(b.N), "B/serial")
}
func benchUnmarshal(b *testing.B, s Serializer) {
b.Helper()
b.StopTimer()
data := generate()
ser := make([][]byte, len(data))
var serialSize int
for i, d := range data {
o, err := s.Marshal(d)
if err != nil {
b.Fatal(err)
}
t := make([]byte, len(o))
serialSize += copy(t, o)
ser[i] = t
}
b.ReportMetric(float64(serialSize)/float64(len(data)), "B/serial")
b.ReportAllocs()
b.StartTimer()
for i := 0; i < b.N; i++ {
n := rand.Intn(len(ser))
o := &Client{}
err := s.Unmarshal(ser[n], o)
if err != nil {
b.Fatalf("unmarshal error %s for %#x / %q", err, ser[n], ser[n])
}
// Validate unmarshalled data.
i := data[n]
correct := o.ID == i.ID
if !correct {
b.Fatalf("unmarshaled object differed:\n%v\n%v", i, o)
}
}
}
type GobSerializer struct{}
func (g *GobSerializer) Marshal(o interface{}) ([]byte, error) {
var buf bytes.Buffer
err := gob.NewEncoder(&buf).Encode(o)
return buf.Bytes(), err
}
func (g *GobSerializer) Unmarshal(d []byte, o interface{}) error {
return gob.NewDecoder(bytes.NewReader(d)).Decode(o)
}
func NewGobSerializer() *GobSerializer {
// registration required before first use
gob.Register(Client{})
return &GobSerializer{}
}
func Benchmark_Gob_Marshal(b *testing.B) {
s := NewGobSerializer()
benchMarshal(b, s)
}
func Benchmark_Gob_Unmarshal(b *testing.B) {
s := NewGobSerializer()
benchUnmarshal(b, s)
}
type JsonSerializer struct{}
func (j JsonSerializer) Marshal(o interface{}) ([]byte, error) {
return json.Marshal(o)
}
func (j JsonSerializer) Unmarshal(d []byte, o interface{}) error {
return json.Unmarshal(d, o)
}
func Benchmark_Json_Marshal(b *testing.B) {
benchMarshal(b, JsonSerializer{})
}
func Benchmark_Json_Unmarshal(b *testing.B) {
benchUnmarshal(b, JsonSerializer{})
} |
Refer to #375 .