From 1f87a89bb695143162726a4de531c00ff6b9727c Mon Sep 17 00:00:00 2001 From: Priti Desai Date: Tue, 17 Nov 2020 23:11:34 -0800 Subject: [PATCH] fix cycle detection logic visited map holds name of the nodes been visited as keys and "true" as value. This map is being updated with currentName.HashKey on every iteration which results in keys such as a.d, b.d where a, b, and d are nodes of a dag. But such keys are not utilized anywhere in the visit function. Visit function checks existence of the node just by the name without any string concatenation. This extra addition in the map is causing severe delay for a graph with more than >60 nodes. --- pkg/reconciler/pipeline/dag/dag.go | 14 +--- pkg/reconciler/pipeline/dag/dag_test.go | 75 +++++++++++++++++++ .../pipeline/dag/dagv1beta1_test.go | 75 +++++++++++++++++++ 3 files changed, 154 insertions(+), 10 deletions(-) diff --git a/pkg/reconciler/pipeline/dag/dag.go b/pkg/reconciler/pipeline/dag/dag.go index 16f39fc4fce..52f607c8d51 100644 --- a/pkg/reconciler/pipeline/dag/dag.go +++ b/pkg/reconciler/pipeline/dag/dag.go @@ -126,9 +126,8 @@ func linkPipelineTasks(prev *Node, next *Node) error { return fmt.Errorf("cycle detected; task %q depends on itself", next.Task.HashKey()) } // Check if we are adding cycles. - visited := map[string]bool{prev.Task.HashKey(): true, next.Task.HashKey(): true} path := []string{next.Task.HashKey(), prev.Task.HashKey()} - if err := visit(next.Task.HashKey(), prev.Prev, path, visited); err != nil { + if err := lookForNode(prev.Prev, path, next.Task.HashKey()); err != nil { return fmt.Errorf("cycle detected: %w", err) } next.Prev = append(next.Prev, prev) @@ -136,18 +135,13 @@ func linkPipelineTasks(prev *Node, next *Node) error { return nil } -func visit(currentName string, nodes []*Node, path []string, visited map[string]bool) error { - var sb strings.Builder +func lookForNode(nodes []*Node, path []string, next string) error { for _, n := range nodes { path = append(path, n.Task.HashKey()) - if _, ok := visited[n.Task.HashKey()]; ok { + if n.Task.HashKey() == next { return errors.New(getVisitedPath(path)) } - sb.WriteString(currentName) - sb.WriteByte('.') - sb.WriteString(n.Task.HashKey()) - visited[sb.String()] = true - if err := visit(n.Task.HashKey(), n.Prev, path, visited); err != nil { + if err := lookForNode(n.Prev, path, next); err != nil { return err } } diff --git a/pkg/reconciler/pipeline/dag/dag_test.go b/pkg/reconciler/pipeline/dag/dag_test.go index 0b55df9f623..ee395201c1c 100644 --- a/pkg/reconciler/pipeline/dag/dag_test.go +++ b/pkg/reconciler/pipeline/dag/dag_test.go @@ -17,6 +17,7 @@ limitations under the License. package dag_test import ( + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -435,18 +436,39 @@ func TestBuild_Invalid(t *testing.T) { name string spec v1beta1.PipelineSpec }{{ + // a + // | + // a ("a" depends on resource from "a") name: "self-link-from", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{selfLinkFrom}}, }, { + // a + // | + // a ("a" runAfter "a") name: "self-link-after", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{selfLinkAfter}}, }, { + // a (also "a" depends on resource from "z") + // | + // x ("x" depends on resource from "a") + // | + // z ("z" depends on resource from "x") name: "cycle-from", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xDependsOnA, zDependsOnX, aDependsOnZ}}, }, { + // a (also "a" runAfter "z") + // | + // x ("x" runAfter "a") + // | + // z ("z" runAfter "x") name: "cycle-runAfter", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xAfterA, zAfterX, aAfterZ}}, }, { + // a (also "a" depends on resource from "z") + // | + // x ("x" depends on resource from "a") + // | + // z ("z" runAfter "x") name: "cycle-both", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xDependsOnA, zAfterX, aDependsOnZ}}, }, { @@ -669,3 +691,56 @@ func TestBuild_ConditionsParamsFromTaskResults(t *testing.T) { } assertSameDAG(t, expectedDAG, g) } + +// This test make sure we detect cycle when it exists. +// This means we "visit" "a" twice, from two different paths. +// And one of the paths is creating a cycle with "e -> a". +// a +// / \ +// b c +// \ / +// d +// / \ +// e f +// | +// g +func TestBuild_InvalidDAG_OneCyclicBranch(t *testing.T) { + a := v1beta1.PipelineTask{Name: "a", RunAfter: []string{"e"}} + bDependsOnA := v1beta1.PipelineTask{ + Name: "b", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"a"}}}, + }, + } + cRunsAfterA := v1beta1.PipelineTask{ + Name: "c", + RunAfter: []string{"a"}, + } + dDependsOnBAndC := v1beta1.PipelineTask{ + Name: "d", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"b", "c"}}}, + }, + } + eRunsAfterD := v1beta1.PipelineTask{ + Name: "e", + RunAfter: []string{"d"}, + } + fRunsAfterD := v1beta1.PipelineTask{ + Name: "f", + RunAfter: []string{"d"}, + } + gDependsOnF := v1beta1.PipelineTask{ + Name: "g", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"f"}}}, + }, + } + + tasks := v1beta1.PipelineTaskList{a, bDependsOnA, cRunsAfterA, dDependsOnBAndC, eRunsAfterD, fRunsAfterD, gDependsOnF} + + _, err := dag.Build(tasks) + if err == nil || !strings.Contains(err.Error(), "cycle detected") { + t.Errorf("expected cycle error but got %v", err) + } +} diff --git a/pkg/reconciler/pipeline/dag/dagv1beta1_test.go b/pkg/reconciler/pipeline/dag/dagv1beta1_test.go index de2509934cd..afaed8067af 100644 --- a/pkg/reconciler/pipeline/dag/dagv1beta1_test.go +++ b/pkg/reconciler/pipeline/dag/dagv1beta1_test.go @@ -22,6 +22,7 @@ package dag_test // dealing with v1beta1 in our code and we won't need 2 sets of tests. import ( + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -391,18 +392,39 @@ func TestBuild_Invalid_v1beta1(t *testing.T) { name string spec v1beta1.PipelineSpec }{{ + // a + // | + // a ("a" depends on resource from "a") name: "self-link-from", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{selfLinkFrom}}, }, { + // a + // | + // a ("a" runAfter "a") name: "self-link-after", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{selfLinkAfter}}, }, { + // a (also "a" depends on resource from "z") + // | + // x ("x" depends on resource from "a") + // | + // z ("z" depends on resource from "x") name: "cycle-from", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xDependsOnA, zDependsOnX, aDependsOnZ}}, }, { + // a (also "a" runAfter "z") + // | + // x ("x" runAfter "a") + // | + // z ("z" runAfter "x") name: "cycle-runAfter", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xAfterA, zAfterX, aAfterZ}}, }, { + // a (also "a" depends on resource from "z") + // | + // x ("x" depends on resource from "a") + // | + // z ("z" runAfter "x") name: "cycle-both", spec: v1beta1.PipelineSpec{Tasks: []v1beta1.PipelineTask{xDependsOnA, zAfterX, aDependsOnZ}}, }, { @@ -625,3 +647,56 @@ func TestBuild_ConditionsParamsFromTaskResults_v1beta1(t *testing.T) { } assertSameDAG(t, expectedDAG, g) } + +// This test make sure we detect cycle when it exists. +// This means we "visit" "a" twice, from two different paths. +// And one of the paths is creating a cycle with "e -> a". +// a +// / \ +// b c +// \ / +// d +// / \ +// e f +// | +// g +func TestBuild_InvalidDAG_OneCyclicBranch_v1beta1(t *testing.T) { + a := v1beta1.PipelineTask{Name: "a", RunAfter: []string{"e"}} + bDependsOnA := v1beta1.PipelineTask{ + Name: "b", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"a"}}}, + }, + } + cRunsAfterA := v1beta1.PipelineTask{ + Name: "c", + RunAfter: []string{"a"}, + } + dDependsOnBAndC := v1beta1.PipelineTask{ + Name: "d", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"b", "c"}}}, + }, + } + eRunsAfterD := v1beta1.PipelineTask{ + Name: "e", + RunAfter: []string{"d"}, + } + fRunsAfterD := v1beta1.PipelineTask{ + Name: "f", + RunAfter: []string{"d"}, + } + gDependsOnF := v1beta1.PipelineTask{ + Name: "g", + Resources: &v1beta1.PipelineTaskResources{ + Inputs: []v1beta1.PipelineTaskInputResource{{From: []string{"f"}}}, + }, + } + + tasks := v1beta1.PipelineTaskList{a, bDependsOnA, cRunsAfterA, dDependsOnBAndC, eRunsAfterD, fRunsAfterD, gDependsOnF} + + _, err := dag.Build(tasks) + if err == nil || !strings.Contains(err.Error(), "cycle detected") { + t.Errorf("expected cycle error but got %v", err) + } +}