Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read from branch with compaction data #7701

Merged
merged 29 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
deee833
Read from branch with compaction data
idanovo Apr 25, 2024
036a2b4
Remove unneeded test
idanovo Apr 25, 2024
82694ff
Fix comments
idanovo Apr 28, 2024
90059b0
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo May 1, 2024
c554d87
WIP
idanovo May 2, 2024
0170d65
WIP
idanovo May 2, 2024
5f44d2b
WIP
idanovo May 2, 2024
644a04d
WIP
idanovo May 3, 2024
18742fa
WIP
idanovo May 3, 2024
6cf1337
PR reviews
idanovo May 6, 2024
11a448f
WIP
idanovo May 6, 2024
dfb54e6
Fix review
idanovo May 6, 2024
f609ec2
Added tests
idanovo May 6, 2024
eb9c4e4
Added GC tests
idanovo May 7, 2024
d51ead2
PR review
idanovo May 7, 2024
3adc263
Fix
idanovo May 7, 2024
70c49d0
Fix tests
idanovo May 7, 2024
ca9a9cb
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo May 8, 2024
29e5085
WIP
idanovo May 8, 2024
c2cef8b
Added compaction to gc test
idanovo May 8, 2024
005f718
Rename function name
idanovo May 8, 2024
2717510
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo May 9, 2024
0400e6c
Review comments
idanovo May 9, 2024
23221ae
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo May 15, 2024
1c10a6e
Fix PR review
idanovo May 18, 2024
19d797c
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo May 18, 2024
2a0f495
Lint
idanovo May 19, 2024
4c46b26
Merge branch 'master' of https://github.com/treeverse/lakeFS into rea…
idanovo Jun 10, 2024
3426e0b
PR review
idanovo Jun 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 50 additions & 40 deletions pkg/graveler/joined_diff_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
iterAHasMore bool
iterB DiffIterator
iterBHasMore bool
p DiffIterator
started bool
// the current iterator that has the value to return (iterA or iterB)
currenIter DiffIterator
started bool
}

func NewJoinedDiffIterator(iterA DiffIterator, iterB DiffIterator) *JoinedDiffIterator {
Expand All @@ -20,13 +21,28 @@
iterAHasMore: true,
iterB: iterB,
iterBHasMore: true,
p: nil,
currenIter: nil,
}
}

// progressIterByKey advances the iterators to the next key when both iterators still has more keys
func (c *JoinedDiffIterator) progressIterByKey(keyA Key, keyB Key) {
compareResult := bytes.Compare(keyA, keyB)
switch {
case compareResult == 0:
// key exists in both iterators
c.iterAHasMore = c.iterA.Next()
c.iterBHasMore = c.iterB.Next()
case compareResult < 0:
// value of iterA > value of iterB
c.iterAHasMore = c.iterA.Next()
default:
// value of iterA < value of iterB
c.iterBHasMore = c.iterB.Next()
}
}

func (c *JoinedDiffIterator) Next() bool {
valA := c.iterA.Value()
valB := c.iterB.Value()
switch {
case !c.started:
// first
Expand All @@ -38,68 +54,62 @@
return false
case !c.iterAHasMore:
// iterA is done
c.p = c.iterB
c.currenIter = c.iterB
c.iterBHasMore = c.iterB.Next()
case !c.iterBHasMore:
// iterB is done
c.p = c.iterA
c.iterAHasMore = c.iterA.Next()
case bytes.Equal(valA.Key, valB.Key):
c.iterAHasMore = c.iterA.Next()
c.iterBHasMore = c.iterB.Next()
case bytes.Compare(valA.Key, valB.Key) < 0:
c.currenIter = c.iterA
c.iterAHasMore = c.iterA.Next()
c.iterBHasMore = true
default:
// value of iterA < value of iterB
c.iterBHasMore = c.iterB.Next()
c.iterAHasMore = true
// both iterators has more keys- progress by key
c.progressIterByKey(c.iterA.Value().Key, c.iterB.Value().Key)
}
if c.iterA.Err() != nil {
c.p = c.iterA
c.currenIter = c.iterA
c.iterAHasMore = false
c.iterBHasMore = false
return false
}
if c.iterB.Err() != nil {
c.p = c.iterB
c.currenIter = c.iterB
c.iterAHasMore = false
c.iterBHasMore = false
return false
}
if !c.iterAHasMore && !c.iterBHasMore {
return false

// set c.currenIter to be the next (smaller) value

// if one of the iterators is done, set the other one as the current iterator and return
if !c.iterAHasMore {
c.currenIter = c.iterB
if !c.iterBHasMore {

Check failure on line 85 in pkg/graveler/joined_diff_iterator.go

View workflow job for this annotation

GitHub Actions / Run Linters and Checkers

S1008: should use 'return c.iterBHasMore' instead of 'if !c.iterBHasMore { return false }; return true' (gosimple)
return false
}
return true
} else if !c.iterBHasMore {
c.currenIter = c.iterA
return true
}

// set c.p to be the next (smaller) value
valA = c.iterA.Value()
valB = c.iterB.Value()
switch {
case !c.iterAHasMore && !c.iterBHasMore:
c.p = c.iterA
return false
case !c.iterAHasMore:
c.p = c.iterB
case !c.iterBHasMore:
c.p = c.iterA
case bytes.Compare(valA.Key, valB.Key) <= 0:
c.p = c.iterA
default:
c.p = c.iterB
// if both iterators has more keys, set the iterator with the smaller key as the current iterator
if bytes.Compare(c.iterA.Value().Key, c.iterB.Value().Key) <= 0 {
c.currenIter = c.iterA
} else {
c.currenIter = c.iterB
}
return true
}

func (c *JoinedDiffIterator) Value() *Diff {
if c.p == nil {
if c.currenIter == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please note that calling Value() here is an error. Even though you return successfully, the caller will probably panic shortly after. Your call, but personally I would at least log an "internal error".

return nil
}
return c.p.Value()
return c.currenIter.Value()
}

func (c *JoinedDiffIterator) Err() error {
if c.p != nil && c.p.Err() != nil {
return c.p.Err()
if c.currenIter != nil && c.currenIter.Err() != nil {
return c.currenIter.Err()
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems wrong: it says that if one iterator failed but the other still works, then everything is OK. But it's not! Next() returned false, and I need to fail.

Suggested change
if c.currenIter != nil && c.currenIter.Err() != nil {
return c.currenIter.Err()
}
if c.iterA.Err() != nil { // TODO(idan): Return a multierror with _both_ errors if both failed!
return c.iterA.Err()
}
if c.iterB.Err() != nil {
return c.iterB.Err()
}
return nil

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a unit test for this, too, unfortunately.

return nil
}
Expand All @@ -110,7 +120,7 @@
}

func (c *JoinedDiffIterator) SeekGE(id Key) {
c.p = nil
c.currenIter = nil
c.iterA.SeekGE(id)
c.iterB.SeekGE(id)
}
Loading
Loading