Skip to content

Commit eb4da17

Browse files
authored
perf: optimize ReadFrame (#50)
Some small — micro? — optimizations suggested by an LLM that hopefully do not harm readability/clarity of code. Individual small optimizations summarized here, with references to original commits on GitHub: 1. Use fixed size arrays where possible to minimize heap allocations (c13460f) - ~1.5% improvement over baseline in `sec/op` and `B/op` 2. Convert bytes to lengths directly, skipping `binary.Read()` (98daa47) - Why? binary.Read() uses reflection on the destination, copies bytes instead of using them in place, takes an io.Reader interface requiring method dispatch - Additional ~0.25% improvement over (1) in `sec/op` and `B/op` 3. Manually optimized payload masking** (a5569e2) - Additional ~14% improvement over (2) 🔥 4. Replace modulo division with bit shifting** (70fcb03) - Why? TIL that `i % N == i & (N - 1)` when `N` is a power of 2 and bitwise AND is usually a single CPU cycle vs multiple cycles for modulo division - Another ~1.5% improvement over (3) With all of these combined, it appears that we've improved ReadFrame's throughput by ~20%! Note: All intermediate results are combined into the edit history of this comment[1], which shows only the latest results. [1]: #50 (comment)
1 parent 9efd887 commit eb4da17

File tree

1 file changed

+40
-15
lines changed

1 file changed

+40
-15
lines changed

proto.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,20 @@ var formatPayload = func(p []byte) string {
210210

211211
// ReadFrame reads a [Frame] from the wire.
212212
func ReadFrame(buf io.Reader, mode Mode, maxPayloadLen int) (*Frame, error) {
213-
header := make([]byte, 2)
214-
if _, err := io.ReadFull(buf, header); err != nil {
213+
// Frame header is 2 bytes:
214+
// https://datatracker.ietf.org/doc/html/rfc6455#section-5.2
215+
//
216+
// First byte contains FIN, RSV1-3, OPCODE bits, but we store that byte
217+
// as-is and only extract the bits when needed.
218+
//
219+
// Second byte contains MASK bit and payload length, which must be
220+
// extracted here to read the rest of the payload.
221+
var header [2]byte
222+
if _, err := io.ReadFull(buf, header[:]); err != nil {
215223
return nil, newError(StatusAbnormalClose, "error reading frame header: %w", err)
216224
}
217-
218-
// figure out how to parse payload
219225
var (
220-
masked = header[1]&maskedMask != 0
226+
masked = (header[1] & maskedMask) != 0
221227
payloadLen = uint64(header[1] & payloadLenMask)
222228
)
223229

@@ -233,28 +239,29 @@ func ReadFrame(buf io.Reader, mode Mode, maxPayloadLen int) (*Frame, error) {
233239
case 126:
234240
// Payload lengths 126 to 65535 are represented in the next 2 bytes
235241
// (16-bit unsigned integer)
236-
var l uint16
237-
if err := binary.Read(buf, binary.BigEndian, &l); err != nil {
242+
var extendedLenBuf [2]byte
243+
if _, err := io.ReadFull(buf, extendedLenBuf[:]); err != nil {
238244
return nil, newError(StatusAbnormalClose, "error reading 2-byte extended payload length: %w", err)
239245
}
240-
payloadLen = uint64(l)
246+
payloadLen = uint64(binary.BigEndian.Uint16(extendedLenBuf[:]))
241247
case 127:
242248
// Payload lengths >= 65536 are represented in the next 8 bytes
243249
// (64-bit unsigned integer)
244-
if err := binary.Read(buf, binary.BigEndian, &payloadLen); err != nil {
250+
var extendedLenBuf [8]byte
251+
if _, err := io.ReadFull(buf, extendedLenBuf[:]); err != nil {
245252
return nil, newError(StatusAbnormalClose, "error reading 8-byte extended payload length: %w", err)
246253
}
254+
payloadLen = binary.BigEndian.Uint64(extendedLenBuf[:])
247255
}
248256

249257
if payloadLen > uint64(maxPayloadLen) {
250258
return nil, ErrFrameTooLarge
251259
}
252260

253261
// read mask key (if present)
254-
var mask []byte
262+
var mask MaskingKey
255263
if masked {
256-
mask = make([]byte, 4)
257-
if _, err := io.ReadFull(buf, mask); err != nil {
264+
if _, err := io.ReadFull(buf, mask[:]); err != nil {
258265
return nil, newError(StatusAbnormalClose, "error reading mask key: %w", err)
259266
}
260267
}
@@ -265,9 +272,7 @@ func ReadFrame(buf io.Reader, mode Mode, maxPayloadLen int) (*Frame, error) {
265272
return nil, newError(StatusAbnormalClose, "error reading %d byte payload: %w", payloadLen, err)
266273
}
267274
if masked {
268-
for i, b := range payload {
269-
payload[i] = b ^ mask[i%4]
270-
}
275+
applyMask(payload, mask)
271276
}
272277
return &Frame{
273278
header: header[0],
@@ -478,3 +483,23 @@ func NewMaskingKey() MaskingKey {
478483
}
479484
return key
480485
}
486+
487+
// applyMask optimizes payload masking by working 8 bytes at a time.
488+
func applyMask(payload []byte, mask MaskingKey) {
489+
n := len(payload)
490+
chunks := n / 8
491+
for i := 0; i < chunks; i++ {
492+
pos := i * 8
493+
payload[pos+0] ^= mask[0]
494+
payload[pos+1] ^= mask[1]
495+
payload[pos+2] ^= mask[2]
496+
payload[pos+3] ^= mask[3]
497+
payload[pos+4] ^= mask[0]
498+
payload[pos+5] ^= mask[1]
499+
payload[pos+6] ^= mask[2]
500+
payload[pos+7] ^= mask[3]
501+
}
502+
for i := chunks * 8; i < n; i++ {
503+
payload[i] ^= mask[i&3] // apparently i&3 faster than i%4
504+
}
505+
}

0 commit comments

Comments
 (0)