Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

storage: reuse deltas from packfiles #515

Merged
merged 3 commits into from
Jul 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 135 additions & 10 deletions plumbing/format/packfile/delta_selector.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,123 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
var objectsToPack []*ObjectToPack
for _, h := range hashes {
o, err := dw.storer.EncodedObject(plumbing.AnyObject, h)
o, err := dw.encodedDeltaObject(h)
if err != nil {
return nil, err
}

objectsToPack = append(objectsToPack, newObjectToPack(o))
otp := newObjectToPack(o)
if _, ok := o.(plumbing.DeltaObject); ok {
otp.Original = nil
}

objectsToPack = append(objectsToPack, otp)
}

if err := dw.fixAndBreakChains(objectsToPack); err != nil {
return nil, err
}

return objectsToPack, nil
}

func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
edos, ok := dw.storer.(storer.DeltaObjectStorer)
if !ok {
return dw.encodedObject(h)
}

return edos.DeltaObject(plumbing.AnyObject, h)
}

func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
return dw.storer.EncodedObject(plumbing.AnyObject, h)
}

func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
for _, otp := range objectsToPack {
m[otp.Hash()] = otp
}

for _, otp := range objectsToPack {
if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
return err
}
}

return nil
}

func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why One ? what does it means?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's one because it processes just one ObjectToPack.

if !otp.Object.Type().IsDelta() {
return nil
}

// Initial ObjectToPack instances might have a delta assigned to Object
// but no actual base initially. Once Base is assigned to a delta, it means
// we already fixed it.
if otp.Base != nil {
return nil
}

do, ok := otp.Object.(plumbing.DeltaObject)
if !ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1

// if this is not a DeltaObject, then we cannot retrieve its base,
// so we have to break the delta chain here.
return dw.undeltify(otp)
}

base, ok := objectsToPack[do.BaseHash()]
if !ok {
// The base of the delta is not in our list of objects to pack, so
// we break the chain.
return dw.undeltify(otp)
}

if base.Size() <= otp.Size() {
// Bases should be bigger
return dw.undeltify(otp)
}

if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
return err
}

otp.SetDelta(base, otp.Object)
return nil
}

func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
if otp.Original != nil {
return nil
}

if !otp.Object.Type().IsDelta() {
return nil
}

obj, err := dw.encodedObject(otp.Hash())
if err != nil {
return err
}

otp.Original = obj
return nil
}

// undeltify undeltifies an *ObjectToPack by retrieving the original object from
// the storer and resetting it.
func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
if err := dw.restoreOriginal(otp); err != nil {
return err
}

otp.Object = otp.Original
otp.Depth = 0
return nil
}

func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
Expand All @@ -66,15 +172,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
for i := 0; i < len(objectsToPack); i++ {
target := objectsToPack[i]

// We only want to create deltas from specific types
if !applyDelta[target.Original.Type()] {
// If we already have a delta, we don't try to find a new one for this
// object. This happens when a delta is set to be reused from an existing
// packfile.
if target.IsDelta() {
continue
}

// We only want to create deltas from specific types.
if !applyDelta[target.Type()] {
continue
}

for j := i - 1; j >= 0; j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
if base.Original.Type() != target.Original.Type() {
// Since objectsToPack is sorted by type and size, once we find
// a different type, we know we won't find more of them.
if base.Type() != target.Type() {
break
}

Expand All @@ -89,7 +204,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {

func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
// If the sizes are radically different, this is a bad pairing.
if target.Original.Size() < base.Original.Size()>>4 {
if target.Size() < base.Size()>>4 {
return nil
}

Expand All @@ -106,10 +221,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
}

// If we have to insert a lot to make this work, find another.
if base.Original.Size()-target.Object.Size() > msz {
if base.Size()-target.Size() > msz {
return nil
}

// Original object might not be present if we're reusing a delta, so we
// ensure it is restored.
if err := dw.restoreOriginal(target); err != nil {
return err
}

if err := dw.restoreOriginal(base); err != nil {
return err
}

// Now we can generate the delta using originals
delta, err := GetDelta(base.Original, target.Original)
if err != nil {
Expand Down Expand Up @@ -162,13 +287,13 @@ func (a byTypeAndSize) Len() int { return len(a) }
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

func (a byTypeAndSize) Less(i, j int) bool {
if a[i].Object.Type() < a[j].Object.Type() {
if a[i].Type() < a[j].Type() {
return false
}

if a[i].Object.Type() > a[j].Object.Type() {
if a[i].Type() > a[j].Type() {
return true
}

return a[i].Object.Size() > a[j].Object.Size()
return a[i].Size() > a[j].Size()
}
29 changes: 18 additions & 11 deletions plumbing/format/packfile/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ type Encoder struct {
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
// offsets is a map of object hashes to corresponding offsets in the packfile.
// It is used to determine offset of the base of a delta when a OFS_DELTA is
// used.
offsets map[plumbing.Hash]int64
useRefDeltas bool
}
Expand Down Expand Up @@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error {

func (e *Encoder) entry(o *ObjectToPack) error {
offset := e.w.Offset()
e.offsets[o.Hash()] = offset

if o.IsDelta() {
if err := e.writeDeltaHeader(o, offset); err != nil {
return err
}
} else {
if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
if err := e.entryHead(o.Type(), o.Size()); err != nil {
return err
}
}

// Save the position using the original hash, maybe a delta will need it
e.offsets[o.Original.Hash()] = offset

e.zw.Reset(e.w)
or, err := o.Object.Reader()
if err != nil {
return err
}

_, err = io.Copy(e.zw, or)
if err != nil {
return err
Expand All @@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
}

if e.useRefDeltas {
return e.writeRefDeltaHeader(o.Base.Original.Hash())
return e.writeRefDeltaHeader(o.Base.Hash())
} else {
return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash())
return e.writeOfsDeltaHeader(offset, o.Base.Hash())
}
}

Expand All @@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
}

func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
// because it is an offset delta, we need the base
// object position
offset, ok := e.offsets[base]
baseOffset, ok := e.offsets[base]
if !ok {
return fmt.Errorf("delta base not found. Hash: %v", base)
return fmt.Errorf("base for delta not found, base hash: %v", base)
}

// for OFS_DELTA, offset of the base is interpreted as negative offset
// relative to the type-byte of the header of the ofs-delta entry.
relativeOffset := deltaOffset-baseOffset
if relativeOffset <= 0 {
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}

return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
return binary.WriteVariableWidthInt(e.w, relativeOffset)
}

func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
Expand Down
91 changes: 91 additions & 0 deletions plumbing/format/packfile/encoder_advanced_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package packfile_test

import (
"bytes"
"math/rand"

"gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage/filesystem"
"gopkg.in/src-d/go-git.v4/storage/memory"

"github.com/src-d/go-git-fixtures"
. "gopkg.in/check.v1"
)

type EncoderAdvancedSuite struct {
fixtures.Suite
}

var _ = Suite(&EncoderAdvancedSuite{})

func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
ByTag("packfile").ByTag(".git").One())
fixs.Test(c, func(f *fixtures.Fixture) {
storage, err := filesystem.NewStorage(f.DotGit())
c.Assert(err, IsNil)
s.testEncodeDecode(c, storage)
})

}

func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {

objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
c.Assert(err, IsNil)

expectedObjects := map[plumbing.Hash]bool{}
var hashes []plumbing.Hash
err = objIter.ForEach(func(o plumbing.EncodedObject) error {
expectedObjects[o.Hash()] = true
hashes = append(hashes, o.Hash())
return err

})
c.Assert(err, IsNil)

// Shuffle hashes to avoid delta selector getting order right just because
// the initial order is correct.
auxHashes := make([]plumbing.Hash, len(hashes))
for i, j := range rand.Perm(len(hashes)) {
auxHashes[j] = hashes[i]
}
hashes = auxHashes

buf := bytes.NewBuffer(nil)
enc := NewEncoder(buf, storage, false)
_, err = enc.Encode(hashes)
c.Assert(err, IsNil)

scanner := NewScanner(buf)
storage = memory.NewStorage()
d, err := NewDecoder(scanner, storage)
c.Assert(err, IsNil)
_, err = d.Decode()
c.Assert(err, IsNil)

objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
obtainedObjects := map[plumbing.Hash]bool{}
err = objIter.ForEach(func(o plumbing.EncodedObject) error {
obtainedObjects[o.Hash()] = true
return nil
})
c.Assert(err, IsNil)
c.Assert(obtainedObjects, DeepEquals, expectedObjects)

for h := range obtainedObjects {
if !expectedObjects[h] {
c.Errorf("obtained unexpected object: %s", h)
}
}

for h := range expectedObjects {
if !obtainedObjects[h] {
c.Errorf("missing object: %s", h)
}
}
}
Loading