Skip to content

Commit

Permalink
Implement cataloger current diff using scanners (#790)
Browse files Browse the repository at this point in the history
  • Loading branch information
nopcoder committed Oct 19, 2020
1 parent baadd89 commit 0f923de
Show file tree
Hide file tree
Showing 16 changed files with 1,101 additions and 704 deletions.
443 changes: 360 additions & 83 deletions catalog/cataloger_diff.go

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion catalog/cataloger_merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import (
"github.com/treeverse/lakefs/logging"
)

// Merge perform diff between two branches (left and right), apply changes on right branch and commit
// It uses the cataloger diff internal API to produce a temporary table that we delete at the end of a successful merge
// the table holds entry ctid to reference entries in case of changed/added and source branch in case of delete.
// That information is used to address cases where we need to create new entry or tombstone as part of the merge
func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBranch, committer, message string, metadata Metadata) (*MergeResult, error) {
if err := Validate(ValidateFields{
{Name: "repository", IsValid: ValidateRepositoryName(repository)},
Expand Down Expand Up @@ -39,7 +43,7 @@ func (c *cataloger) Merge(ctx context.Context, repository, leftBranch, rightBran
return nil, fmt.Errorf("branch relation: %w", err)
}

err = c.doDiffByRelation(ctx, tx, relation, leftID, rightID)
err = c.doDiffByRelation(ctx, tx, relation, leftID, rightID, -1, "")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -188,6 +192,7 @@ func (c *cataloger) mergeFromChild(ctx context.Context, tx db.Tx, previousMaxCom
if err != nil {
return err
}
// delete(mark max-commit) entries in parent that are either deleted or changed from child
_, err = tx.Exec(`UPDATE catalog_entries SET max_commit = $2
WHERE branch_id = $1 AND max_commit = $5
AND path in (SELECT path FROM `+diffResultsTableName+` WHERE diff_type IN ($3,$4))`,
Expand Down
121 changes: 88 additions & 33 deletions catalog/cataloger_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,11 +700,16 @@ func TestCataloger_Merge_FromChildNewEntrySameEntry(t *testing.T) {
if len(commitLog.Parents) != 2 {
t.Fatal("merge commit log should have two parents")
}
if diff := deep.Equal(res.Summary, map[DifferenceType]int{}); diff != nil {

if diff := deep.Equal(res.Summary, map[DifferenceType]int{
DifferenceTypeChanged: 1,
}); diff != nil {
t.Fatal("Merge Summary", diff)
}
// TODO(barak): enable test after diff between commits is supported
//expectedDifferences = Differences{}
//expectedDifferences := Differences{
// Difference{Type: DifferenceTypeModified, Path: "/file0"},
//}
//differences, _, err := c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "")
//testutil.MustDo(t, "diff merge changes", err)
//if !differences.Equal(expectedDifferences) {
Expand Down Expand Up @@ -1019,8 +1024,24 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) {
_, err = c.Merge(ctx, repository, "branch2", "branch1", "tester", "pushing /file111 down", nil)
testutil.MustDo(t, "merge branch2 to branch1", err)

_, err = c.Merge(ctx, repository, "branch1", "master", "tester", "pushing /file111 down", nil)
testutil.MustDo(t, "merge branch1 to master", err)
res, err = c.Merge(ctx, repository, "branch1", "master", "tester", "pushing /file111 down", nil)
if !errors.Is(err, ErrConflictFound) {
t.Fatalf("Merge err=%s, expected conflict", err)
}
if res == nil {
t.Fatal("Expected merge result, got none")
} else if res.Reference != "" {
t.Fatalf("Expected empty reference, got %s", res.Reference)
}
if diff := deep.Equal(res.Summary, map[DifferenceType]int{
DifferenceTypeConflict: 1,
}); diff != nil {
t.Fatal("Merge Summary", diff)
}

// delete the file to resolve conflict
testutil.MustDo(t, "delete the conflict file on master",
c.DeleteEntry(ctx, repository, "master", "/file111"))

// push file111 delete
_, err = c.Merge(ctx, repository, "branch1", "branch2", "tester", "delete /file111 up", nil)
Expand Down Expand Up @@ -1055,30 +1076,6 @@ func TestCataloger_Merge_FromParentThreeBranchesExtended1(t *testing.T) {
//if !differences.Equal(expectedDifferences) {
// t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences))
//}

res, err = c.Merge(ctx, repository, "branch1", "master", "tester", "try delete /file111 . get conflict", nil)
if !errors.Is(err, ErrConflictFound) {
t.Fatalf("Expected to get conflict error, got err=%+v", err)
}
if res == nil {
t.Fatal("Expected merge result, got none")
} else if res.Reference != "" {
t.Fatalf("Expected empty reference, got %s", res.Reference)
}
if diff := deep.Equal(res.Summary, map[DifferenceType]int{
DifferenceTypeConflict: 1,
}); diff != nil {
t.Fatal("Merge Summary", diff)
}
// TODO(barak): enable test after diff between commits is supported
//expectedDifferences = Differences{
// Difference{Type: DifferenceTypeConflict, Path: "/file111"},
//}
//differences, _, err = c.Diff(ctx, repository, commitLog.Parents[0], commitLog.Parents[1], -1, "")
//testutil.MustDo(t, "diff merge changes", err)
//if !differences.Equal(expectedDifferences) {
// t.Errorf("Merge differences = %s, expected %s", spew.Sdump(differences), spew.Sdump(expectedDifferences))
//}
}

func TestCataloger_MergeOverDeletedEntries(t *testing.T) {
Expand Down Expand Up @@ -1117,7 +1114,7 @@ func TestCataloger_MergeOverDeletedEntries(t *testing.T) {
}
}

func TestCataloger_MergeWitoutDiff(t *testing.T) {
func TestCataloger_MergeWithoutDiff(t *testing.T) {
ctx := context.Background()
c := testCataloger(t)
// setup a report with 'master' with a single file, and branch 'b1' that started after the file was committed
Expand All @@ -1128,8 +1125,8 @@ func TestCataloger_MergeWitoutDiff(t *testing.T) {
_, err = c.CreateBranch(ctx, repository, "b1", "master")
testutil.MustDo(t, "create branch b1", err)
_, err = c.Merge(ctx, repository, "master", "b1", "tester", "merge nothing from master to b1", nil)
if err.Error() != "no difference was found" {
t.Fatal("did not get 'nothing to commit' error")
if !errors.Is(err, ErrNoDifferenceWasFound) {
t.Fatalf("Merge expected err=%s, expected=%s", err, ErrNoDifferenceWasFound)
}
testCatalogerCreateEntry(t, ctx, c, repository, "master", "file_dummy", nil, "master1")
_, err = c.Commit(ctx, repository, "master", "file_dummy", "tester", nil)
Expand All @@ -1143,7 +1140,65 @@ func TestCataloger_MergeWitoutDiff(t *testing.T) {
t.Fatalf("error on merge with no changes:%+v", err)
}
_, err = c.Merge(ctx, repository, "master", "b1", "tester", "merge nothing from master to b1", nil)
if err.Error() != "no difference was found" {
t.Fatal("did not get 'nothing to commit' error")
if !errors.Is(err, ErrNoDifferenceWasFound) {
t.Fatalf("Merge expected err=%s, expected=%s", err, ErrNoDifferenceWasFound)
}
}

func TestCataloger_MergeFromChildAfterMergeFromParent(t *testing.T) {
t.Skip("Should we trigger conflict when we apply changes on changes we merged from parent")
ctx := context.Background()
c := testCataloger(t)
// setup a report with 'master' with a single file, and branch 'b1' that started after the file was committed
repository := testCatalogerRepo(t, ctx, c, "repository", "master")
// first entry creation
testCatalogerCreateEntry(t, ctx, c, repository, "master", "fileX", nil, "master")
_, err := c.Commit(ctx, repository, "master", "fileX", "tester", nil)
testutil.MustDo(t, "commit file first time on master", err)
_, err = c.CreateBranch(ctx, repository, "b1", "master")
testutil.MustDo(t, "create branch b1", err)
_, err = c.Merge(ctx, repository, "master", "b1", "tester", "merge nothing from master to b1", nil)
if !errors.Is(err, ErrNoDifferenceWasFound) {
t.Fatalf("merge err=%s, expected ErrNoDifferenceWasFound", err)
}

// PART I
// two entries on master
testCatalogerCreateEntry(t, ctx, c, repository, "master", "fileY", nil, "master1")
testCatalogerCreateEntry(t, ctx, c, repository, "master", "fileZ", nil, "master1")
_, err = c.Commit(ctx, repository, "master", "fileY and fileZ", "tester", nil)
testutil.MustDo(t, "commit fileY master", err)
// merge them into child
_, err = c.Merge(ctx, repository, "master", "b1", "tester", "merge fileY from master to b1", nil)
testutil.MustDo(t, "merge into branch b1", err)
// delete one of those files in b1
err = c.DeleteEntry(ctx, repository, "b1", "fileY")
testutil.MustDo(t, "delete fileY on b1", err)
testCatalogerCreateEntry(t, ctx, c, repository, "b1", "fileZ", nil, "master1")
_, err = c.Commit(ctx, repository, "b1", "fileY and fileZ", "tester", nil)
testutil.MustDo(t, "commit fileY b1", err)
_, err = c.Merge(ctx, repository, "b1", "master", "tester", "merge nothing from master to b1", nil)
if err != nil {
t.Fatalf("Merge err=%s, expected none", err)
}

// PART II
// two entries on master
testCatalogerCreateEntry(t, ctx, c, repository, "master", "fileYY", nil, "master1")
testCatalogerCreateEntry(t, ctx, c, repository, "master", "fileZZ", nil, "master1")
_, err = c.Commit(ctx, repository, "master", "fileYY and fileZZ", "tester", nil)
testutil.MustDo(t, "commit fileYY master", err)
// merge them into child
_, err = c.Merge(ctx, repository, "master", "b1", "tester", "merge fileYY from master to b1", nil)
testutil.MustDo(t, "merge into branch b1", err)
// delete one of those files in b1
err = c.DeleteEntry(ctx, repository, "b1", "fileYY")
testutil.MustDo(t, "delete fileYY on b1", err)
testCatalogerCreateEntry(t, ctx, c, repository, "b1", "fileZZ", nil, "master1")
_, err = c.Commit(ctx, repository, "b1", "fileYY and fileZZ", "tester", nil)
testutil.MustDo(t, "commit fileYY b1", err)
_, err = c.Merge(ctx, repository, "b1", "master", "tester", "merge nothing from master to b1", nil)
if err != nil {
t.Fatalf("Merge err=%s, expected none", err)
}
}
1 change: 1 addition & 0 deletions catalog/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ func testCreateEntryCalcChecksum(key string, seed string) string {
}

func testVerifyEntries(t testing.TB, ctx context.Context, c Cataloger, repository string, reference string, entries []testEntryInfo) {
t.Helper()
for _, entry := range entries {
ent, err := c.GetEntry(ctx, repository, reference, entry.Path, GetEntryParams{})
if entry.Deleted {
Expand Down
80 changes: 0 additions & 80 deletions catalog/db_branch_reader.go

This file was deleted.

62 changes: 0 additions & 62 deletions catalog/db_branch_reader_test.go

This file was deleted.

0 comments on commit 0f923de

Please sign in to comment.