-
Notifications
You must be signed in to change notification settings - Fork 50
/
producer_daemon_aggregator.go
254 lines (211 loc) · 6.74 KB
/
producer_daemon_aggregator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
package stream
import (
"bytes"
"compress/gzip"
"context"
"fmt"
"io"
"strconv"
"github.com/justtrackio/gosoline/pkg/encoding/base64"
"github.com/justtrackio/gosoline/pkg/encoding/json"
)
const (
gzipMaxExpectedBuffer = 8192
)
var (
jsonArrayStart = []byte("[")
jsonArraySep = []byte(",")
jsonArrayEnd = []byte("]")
)
type producerDaemonAggregator struct {
maxMessages int
maxBytes int
compression CompressionType
attributes map[string]string
encodeBase64 bool
buffer *bytes.Buffer
writer io.WriteCloser
messageCount int
uncompressedBytes int
expectedCompressionRatio float32
}
type AggregateFlush struct {
Attributes map[string]string
Body string
MessageCount int
}
//go:generate mockery --name ProducerDaemonAggregator
type ProducerDaemonAggregator interface {
Write(ctx context.Context, msg *Message) ([]AggregateFlush, error)
Flush() ([]AggregateFlush, error)
}
func NewProducerDaemonAggregator(settings ProducerDaemonSettings, compression CompressionType, attributeSets ...map[string]string) (ProducerDaemonAggregator, error) {
a := &producerDaemonAggregator{
maxMessages: settings.AggregationSize,
maxBytes: settings.AggregationMaxSize,
compression: compression,
attributes: map[string]string{
AttributeEncoding: EncodingJson.String(),
},
// initially assume we don't perform any compression, first message might not be packed as tightly as the rest,
// but if your app runs for a little longer you will already have a proper ratio here for the second message
expectedCompressionRatio: 1,
}
for _, attributes := range attributeSets {
for k, v := range attributes {
a.attributes[k] = v
}
}
switch compression {
case CompressionGZip:
a.encodeBase64 = true
// For gzip there might be some bytes in the buffer which are not yet written to our writer.
// We expect that not more than this many bytes are still pending to be written when we have
// written a record to the stream. It is most likely not much higher than 500 bytes, so we
// have quite some headroom here.
// We could also be calling Flush after each element, but that would break runs between messages
// and also add additional bytes to encode the flush.
if a.maxBytes > gzipMaxExpectedBuffer {
a.maxBytes -= gzipMaxExpectedBuffer
}
a.attributes[AttributeCompression] = CompressionGZip.String()
case CompressionNone:
a.encodeBase64 = false
default:
return nil, fmt.Errorf("unhandled compression type: %s", a.compression)
}
if err := a.reset(); err != nil {
return nil, fmt.Errorf("failed to reset aggregate to initial state: %w", err)
}
return a, nil
}
func (a *producerDaemonAggregator) Write(_ context.Context, msg *Message) ([]AggregateFlush, error) {
encodedMessage, err := json.Marshal(msg)
if err != nil {
return nil, fmt.Errorf("failed to encode message for aggregate: %w", err)
}
expectedMessageSize := int(float32(len(encodedMessage)) * a.expectedCompressionRatio)
var flushes []AggregateFlush
if a.messageCount > 0 && a.maxBytes != 0 && a.getCurrentSize(expectedMessageSize) >= a.maxBytes {
if flushes, err = a.Flush(); err != nil {
return nil, err
}
}
if err := a.write(encodedMessage); err != nil {
return nil, err
}
if a.messageCount >= a.maxMessages || (a.maxBytes != 0 && a.getCurrentSize(0) >= a.maxBytes) {
if flush, err := a.Flush(); err != nil {
return nil, err
} else {
flushes = append(flushes, flush...)
}
}
return flushes, nil
}
func (a *producerDaemonAggregator) getCurrentSize(newMessageSize int) int {
// estimate current size - we need to write at least the terminating ']' character
currentSize := a.buffer.Len() + newMessageSize + 1
if a.encodeBase64 {
currentSize = currentSize * 4 / 3
}
return currentSize
}
func (a *producerDaemonAggregator) write(encodedMessage []byte) error {
if a.messageCount > 0 {
_, err := a.writer.Write(jsonArraySep)
if err != nil {
return fmt.Errorf("failed to write separator to buffer: %w", err)
}
a.uncompressedBytes += 1
}
_, err := a.writer.Write(encodedMessage)
if err != nil {
return fmt.Errorf("failed to write message to buffer: %w", err)
}
a.messageCount++
a.uncompressedBytes += len(encodedMessage)
return nil
}
func (a *producerDaemonAggregator) reset() error {
a.messageCount = 0
a.uncompressedBytes = 0
if a.buffer == nil {
// allocate a nice, large buffer at the start (128 kb should be enough to fit most messages as we normally limit
// to 64 kb, so there is some headroom in the end)
a.buffer = bytes.NewBuffer(make([]byte, 0, 128*1024))
switch a.compression {
case CompressionGZip:
a.writer = gzip.NewWriter(a.buffer)
case CompressionNone:
a.writer = newWriterNopCloser(a.buffer)
default:
return fmt.Errorf("unhandled compression type: %s", a.compression)
}
} else {
// re-use the buffer, we take care that we read its contents and convert it to a string (thereby copying it)
// before we reset the aggregator, otherwise we will in the next step start to overwrite the data we already wrote
a.buffer.Reset()
switch a.compression {
case CompressionGZip:
a.writer.(*gzip.Writer).Reset(a.buffer)
case CompressionNone:
// nothing to do as we already reset the buffer
break
default:
return fmt.Errorf("unhandled compression type: %s", a.compression)
}
}
_, err := a.writer.Write(jsonArrayStart)
a.uncompressedBytes += 1
return err
}
type writerNopCloser struct {
io.Writer
}
func (w writerNopCloser) Close() error {
return nil
}
func newWriterNopCloser(writer io.Writer) io.WriteCloser {
return writerNopCloser{
Writer: writer,
}
}
func (a *producerDaemonAggregator) Flush() ([]AggregateFlush, error) {
if _, err := a.writer.Write(jsonArrayEnd); err != nil {
return nil, err
}
a.uncompressedBytes += 1
// for gzip compression, close the writer to write the footer, without compression this is a no-op
if err := a.writer.Close(); err != nil {
return nil, fmt.Errorf("failed to close writer during flush: %w", err)
}
messageCount := a.messageCount
var body string
if a.encodeBase64 {
body = base64.EncodeToString(a.buffer.Bytes())
} else {
body = a.buffer.String()
}
// only update the expectation if we have some user data, if there are no messages in the aggregate, the ticker triggered,
// and we would otherwise expect a compression ratio of much > 1.
if messageCount > 0 {
a.expectedCompressionRatio = float32(len(body)) / float32(a.uncompressedBytes)
}
if err := a.reset(); err != nil {
return nil, err
}
attributes := map[string]string{
AttributeAggregateCount: strconv.Itoa(messageCount),
}
for k, v := range a.attributes {
attributes[k] = v
}
return []AggregateFlush{
{
Attributes: attributes,
MessageCount: messageCount,
Body: body,
},
}, nil
}