Skip to content

Commit

Permalink
Better performance for Bulk API
Browse files Browse the repository at this point in the history
This commit has a few performance-related changes. It comes in two
flavours.

First, the way that JSON got serialized was changed from using a simple
`map[string]interface{}` to a specific Go type. That had the following impact:

```
benchmark                                    old ns/op     new ns/op     delta
BenchmarkBulkIndexRequestSerialization-8     3348          2285          -31.75%

benchmark                                    old allocs     new allocs     delta
BenchmarkBulkIndexRequestSerialization-8     34             18             -47.06%

benchmark                                    old bytes     new bytes     delta
BenchmarkBulkIndexRequestSerialization-8     2464          1744
-29.22%

benchmark                                     old ns/op     new ns/op     delta
BenchmarkBulkUpdateRequestSerialization-8     3131          2091          -33.22%

benchmark                                     old allocs     new allocs     delta
BenchmarkBulkUpdateRequestSerialization-8     39             19             -51.28%

benchmark                                     old bytes     new bytes     delta
BenchmarkBulkUpdateRequestSerialization-8     2624          2624
+0.00%

benchmark                                     old ns/op     new ns/op     delta
BenchmarkBulkDeleteRequestSerialization-8     2233          1158          -48.14%

benchmark                                     old allocs     new allocs     delta
BenchmarkBulkDeleteRequestSerialization-8     28             11             -60.71%

benchmark                                     old bytes     new bytes     delta
BenchmarkBulkDeleteRequestSerialization-8     1744          1664
-4.59%
```

Next, we enabled a setting that makes Bulk API use
`github.com/mailru/easyjson` instead of `encoding/json`. This yields
the following results (compared to the baseline):

```
benchmark                                    old ns/op     new ns/op     delta
BenchmarkBulkIndexRequestSerialization-8     3348          1692          -49.46%

benchmark                                    old allocs     new allocs     delta
BenchmarkBulkIndexRequestSerialization-8     34             12             -64.71%

benchmark                                    old bytes     new bytes     delta
BenchmarkBulkIndexRequestSerialization-8     2464          1328
-46.10%

benchmark                                     old ns/op     new ns/op     delta
BenchmarkBulkUpdateRequestSerialization-8     3131          1072          -65.76%

benchmark                                     old allocs     new allocs     delta
BenchmarkBulkUpdateRequestSerialization-8     39             10             -74.36%

benchmark                                     old bytes     new bytes     delta
BenchmarkBulkUpdateRequestSerialization-8     2624          1952
-25.61%

benchmark                                     old ns/op     new ns/op     delta
BenchmarkBulkDeleteRequestSerialization-8     2233          593           -73.44%

benchmark                                     old allocs     new allocs     delta
BenchmarkBulkDeleteRequestSerialization-8     28             5              -82.14%

benchmark                                     old bytes     new bytes     delta
BenchmarkBulkDeleteRequestSerialization-8     1744          1280
-26.61%
```

You can enable serialization with `UseEasyJSON(true)` on the individual
`BulkIndexRequest`, `BulkUpdateRequest`, and `BulkDeleteRequest`. Using
easyjson is **experimental** for now. As easyjson relies on code
generation, you need to use `go generate` when structs change, in order
to create the underlying structs that easyjson makes use of.

See #667 for discussion.
  • Loading branch information
olivere committed Jan 12, 2018
1 parent d2219c2 commit e771061
Show file tree
Hide file tree
Showing 11 changed files with 1,194 additions and 152 deletions.
75 changes: 48 additions & 27 deletions bulk_delete_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

package elastic

//go:generate easyjson bulk_delete_request.go

import (
"encoding/json"
"fmt"
Expand All @@ -27,13 +29,39 @@ type BulkDeleteRequest struct {
versionType string // default is "internal"

source []string

useEasyJSON bool
}

//easyjson:json
type bulkDeleteRequestCommand map[string]bulkDeleteRequestCommandOp

//easyjson:json
type bulkDeleteRequestCommandOp struct {
Index string `json:"_index,omitempty"`
Type string `json:"_type,omitempty"`
Id string `json:"_id,omitempty"`
Parent string `json:"_parent,omitempty"`
Routing string `json:"_routing,omitempty"`
Version int64 `json:"_version,omitempty"`
VersionType string `json:"_version_type,omitempty"`
}

// NewBulkDeleteRequest returns a new BulkDeleteRequest.
func NewBulkDeleteRequest() *BulkDeleteRequest {
return &BulkDeleteRequest{}
}

// UseEasyJSON is an experimental setting that enables serialization
// with github.com/mailru/easyjson, which should in faster serialization
// time and less allocations, but removed compatibility with encoding/json,
// usage of unsafe etc. See https://github.com/mailru/easyjson#issues-notes-and-limitations
// for details. This setting is disabled by default.
func (r *BulkDeleteRequest) UseEasyJSON(enable bool) *BulkDeleteRequest {
r.useEasyJSON = enable
return r
}

// Index specifies the Elasticsearch index to use for this delete request.
// If unspecified, the index set on the BulkService will be used.
func (r *BulkDeleteRequest) Index(index string) *BulkDeleteRequest {
Expand Down Expand Up @@ -106,39 +134,32 @@ func (r *BulkDeleteRequest) Source() ([]string, error) {
if r.source != nil {
return r.source, nil
}
lines := make([]string, 1)

source := make(map[string]interface{})
deleteCommand := make(map[string]interface{})
if r.index != "" {
deleteCommand["_index"] = r.index
command := bulkDeleteRequestCommand{
"delete": bulkDeleteRequestCommandOp{
Index: r.index,
Type: r.typ,
Id: r.id,
Routing: r.routing,
Parent: r.parent,
Version: r.version,
VersionType: r.versionType,
},
}
if r.typ != "" {
deleteCommand["_type"] = r.typ
}
if r.id != "" {
deleteCommand["_id"] = r.id
}
if r.parent != "" {
deleteCommand["_parent"] = r.parent
}
if r.routing != "" {
deleteCommand["_routing"] = r.routing
}
if r.version > 0 {
deleteCommand["_version"] = r.version
}
if r.versionType != "" {
deleteCommand["_version_type"] = r.versionType
}
source["delete"] = deleteCommand

body, err := json.Marshal(source)
var err error
var body []byte
if r.useEasyJSON {
// easyjson
body, err = command.MarshalJSON()
} else {
// encoding/json
body, err = json.Marshal(command)
}
if err != nil {
return nil, err
}

lines[0] = string(body)
lines := []string{string(body)}
r.source = lines

return lines, nil
Expand Down
230 changes: 230 additions & 0 deletions bulk_delete_request_easyjson.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 15 additions & 4 deletions bulk_delete_request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@ func TestBulkDeleteRequestSerialization(t *testing.T) {
{
Request: NewBulkDeleteRequest().Index("index1").Type("doc").Id("1"),
Expected: []string{
`{"delete":{"_id":"1","_index":"index1","_type":"doc"}}`,
`{"delete":{"_index":"index1","_type":"doc","_id":"1"}}`,
},
},
// #1
{
Request: NewBulkDeleteRequest().Index("index1").Type("doc").Id("1").Parent("2"),
Expected: []string{
`{"delete":{"_id":"1","_index":"index1","_parent":"2","_type":"doc"}}`,
`{"delete":{"_index":"index1","_type":"doc","_id":"1","_parent":"2"}}`,
},
},
// #2
{
Request: NewBulkDeleteRequest().Index("index1").Type("doc").Id("1").Routing("3"),
Expected: []string{
`{"delete":{"_id":"1","_index":"index1","_routing":"3","_type":"doc"}}`,
`{"delete":{"_index":"index1","_type":"doc","_id":"1","_routing":"3"}}`,
},
},
}
Expand All @@ -58,11 +58,22 @@ func TestBulkDeleteRequestSerialization(t *testing.T) {
var bulkDeleteRequestSerializationResult string

func BenchmarkBulkDeleteRequestSerialization(b *testing.B) {
r := NewBulkDeleteRequest().Index(testIndexName).Type("doc").Id("1")
b.Run("stdlib", func(b *testing.B) {
r := NewBulkDeleteRequest().Index(testIndexName).Type("doc").Id("1")
benchmarkBulkDeleteRequestSerialization(b, r.UseEasyJSON(false))
})
b.Run("easyjson", func(b *testing.B) {
r := NewBulkDeleteRequest().Index(testIndexName).Type("doc").Id("1")
benchmarkBulkDeleteRequestSerialization(b, r.UseEasyJSON(true))
})
}

func benchmarkBulkDeleteRequestSerialization(b *testing.B, r *BulkDeleteRequest) {
var s string
for n := 0; n < b.N; n++ {
s = r.String()
r.source = nil // Don't let caching spoil the benchmark
}
bulkDeleteRequestSerializationResult = s // ensure the compiler doesn't optimize
b.ReportAllocs()
}

0 comments on commit e771061

Please sign in to comment.