Skip to content

Commit

Permalink
Merge pull request #1574 from ifedorenko/1567_optimize-pack-readHeader
Browse files Browse the repository at this point in the history
Optimize pack readHeader() implementation
  • Loading branch information
fd0 committed Jan 24, 2018
2 parents f86c141 + 953f3d5 commit fc6c341
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 27 deletions.
7 changes: 7 additions & 0 deletions changelog/0.8.2/pull-1574
@@ -0,0 +1,7 @@
Enhancement: Reduce number of remote requests reading pack header

This change eliminates extra remote repository calls for most pack
files and improves repository reindex and purge time.

https://github.com/restic/restic/issues/1567
https://github.com/restic/restic/pull/1574
63 changes: 36 additions & 27 deletions internal/pack/pack.go
Expand Up @@ -170,29 +170,14 @@ func (p *Packer) String() string {
return fmt.Sprintf("<Packer %d blobs, %d bytes>", len(p.blobs), p.bytes)
}

// readHeaderLength returns the header length read from the end of the file
// encoded in little endian.
func readHeaderLength(rd io.ReaderAt, size int64) (uint32, error) {
off := size - int64(binary.Size(uint32(0)))

buf := make([]byte, binary.Size(uint32(0)))
n, err := rd.ReadAt(buf, off)
if err != nil {
return 0, errors.Wrap(err, "ReadAt")
}

if n != len(buf) {
return 0, errors.New("not enough bytes read")
}

return binary.LittleEndian.Uint32(buf), nil
}

const maxHeaderSize = 16 * 1024 * 1024

// we require at least one entry in the header, and one blob for a pack file
var minFileSize = entrySize + crypto.Extension

// number of header enries to download as part of header-length request
var eagerEntries = uint(15)

// readHeader reads the header at the end of rd. size is the length of the
// whole data accessible in rd.
func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
Expand All @@ -207,11 +192,25 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
return nil, errors.Wrap(err, "readHeader")
}

hl, err := readHeaderLength(rd, size)
// assuming extra request is significantly slower than extra bytes download,
// eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries

eagerHl := uint32((eagerEntries * entrySize) + crypto.Extension)
if int64(eagerHl) > size {
eagerHl = uint32(size) - uint32(binary.Size(uint32(0)))
}
eagerBuf := make([]byte, eagerHl+uint32(binary.Size(uint32(0))))

n, err := rd.ReadAt(eagerBuf, size-int64(len(eagerBuf)))
if err != nil {
return nil, err
}
if n != len(eagerBuf) {
return nil, errors.New("not enough bytes read")
}

hl := binary.LittleEndian.Uint32(eagerBuf[eagerHl:])
debug.Log("header length: %v", size)

if hl == 0 {
Expand Down Expand Up @@ -239,14 +238,24 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
return nil, errors.Wrap(err, "readHeader")
}

buf := make([]byte, int(hl))
n, err := rd.ReadAt(buf, size-int64(hl)-int64(binary.Size(hl)))
if err != nil {
return nil, errors.Wrap(err, "ReadAt")
}

if n != len(buf) {
return nil, errors.New("not enough bytes read")
eagerBuf = eagerBuf[:eagerHl]

var buf []byte
if hl <= eagerHl {
// already have all header bytes. yay.
buf = eagerBuf[eagerHl-hl:]
} else {
// need more header bytes
buf = make([]byte, hl)
missingHl := hl - eagerHl
n, err := rd.ReadAt(buf[:missingHl], size-int64(hl)-int64(binary.Size(hl)))
if err != nil {
return nil, errors.Wrap(err, "ReadAt")
}
if uint32(n) != missingHl {
return nil, errors.New("not enough bytes read")
}
copy(buf[hl-eagerHl:], eagerBuf)
}

return buf, nil
Expand Down
46 changes: 46 additions & 0 deletions internal/pack/pack_internal_test.go
@@ -0,0 +1,46 @@
package pack

import (
"bytes"
"encoding/binary"
"io"
"testing"

"github.com/restic/restic/internal/crypto"
rtest "github.com/restic/restic/internal/test"
)

type countingReaderAt struct {
delegate io.ReaderAt
invocationCount int
}

func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
rd.invocationCount++
return rd.delegate.ReadAt(p, off)
}

func TestReadHeaderEagerLoad(t *testing.T) {

testReadHeader := func(entryCount uint, expectedReadInvocationCount int) {
expectedHeader := rtest.Random(0, int(entryCount*entrySize)+crypto.Extension)

buf := &bytes.Buffer{}
buf.Write(rtest.Random(0, 100)) // pack blobs data
buf.Write(expectedHeader) // pack header
binary.Write(buf, binary.LittleEndian, uint32(len(expectedHeader))) // pack header length

rd := &countingReaderAt{delegate: bytes.NewReader(buf.Bytes())}

header, err := readHeader(rd, int64(buf.Len()))
rtest.OK(t, err)

rtest.Equals(t, expectedHeader, header)
rtest.Equals(t, expectedReadInvocationCount, rd.invocationCount)
}

testReadHeader(1, 1)
testReadHeader(eagerEntries-1, 1)
testReadHeader(eagerEntries, 1)
testReadHeader(eagerEntries+1, 2)
}

0 comments on commit fc6c341

Please sign in to comment.