Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1815816: more timing windows on build/pod owner ref correlation (allow GC to c… #96

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 23 additions & 10 deletions pkg/build/controller/build/build_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,11 @@ func (bc *BuildController) handleNewBuild(build *buildv1.Build, pod *corev1.Pod)
// creation is done in the transition X->Pending.
if strategy.HasOwnerReference(pod, build) {
return bc.handleActiveBuild(build, pod)
} else {
err := retryOnOwnerRef(build, pod)
if err != nil {
return nil, err
}
}
// If a pod was not created by the current build, move the build to
// error.
Expand Down Expand Up @@ -1087,6 +1092,21 @@ func (bc *BuildController) resolveImageReferences(build *buildv1.Build, update *
return nil
}

func retryOnOwnerRef(build *buildv1.Build, pod *corev1.Pod) error {
if len(pod.OwnerReferences) > 0 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A general rule is to return early, the code will read better if this looks like:

if len(pod.OwnerReferences) == 0 {
    return nil
}
// ... rest of the code

// check to see if we have retried long enough for the old pod to get GC'ed
twoMinutesAgo := metav1.Now().Add(-2 * time.Minute)
if !build.CreationTimestamp.Time.Before(twoMinutesAgo) {
// requeue for retry via returning an error
return fmt.Errorf("waiting since %s to see if pod %s/%s with incorrect uid %s is "+
"gc'ed before commencing build %s/%s",
build.CreationTimestamp.String(), build.Namespace, pod.Name, pod.UID,
build.Namespace, build.Name)
}
}
return nil
}

// createBuildPod creates a new pod to run a build
func (bc *BuildController) createBuildPod(build *buildv1.Build) (*buildUpdate, error) {
update := &buildUpdate{}
Expand Down Expand Up @@ -1241,16 +1261,9 @@ func (bc *BuildController) createBuildPod(build *buildv1.Build) (*buildUpdate, e
// having a different ref compared to our build here; that said, if the pod has not owner refs,
// there is no chance GC will clean it up, so bypass our retry here
if !strategy.HasOwnerReference(existingPod, build) {
if len(existingPod.OwnerReferences) > 0 {
// check to see if we have retried long enough for the old pod to get GC'ed
twoMinutesAgo := metav1.Now().Add(-2 * time.Minute)
if !build.CreationTimestamp.Time.Before(twoMinutesAgo) {
// requeue for retry via returning an error
return nil, fmt.Errorf("waiting since %s to see if pod %s/%s with incorrect uid %s is "+
"gc'ed before commencing build %s/%s",
build.CreationTimestamp.String(), build.Namespace, buildPod.Name, existingPod.UID,
build.Namespace, build.Name)
}
err := retryOnOwnerRef(build, existingPod)
if err != nil {
return nil, err
}

// reach this point, we give up
Expand Down
76 changes: 76 additions & 0 deletions pkg/build/controller/build/build_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,82 @@ func TestCreateBuildPodWithExistingRelatedPodBadCA(t *testing.T) {
validateUpdate(t, "create build pod with existing related pod and bad CA configMap error", expected, update)
}

func TestHandleNewBuildWithExistingUnrelatedPod(t *testing.T) {
or := &metav1.OwnerReference{
APIVersion: "v1",
Kind: "Build",
Name: "foo",
UID: "bar",
}
tests := []struct {
name string
createTime time.Time
expectErr bool
expectedErrPrefix string
ownerRef *metav1.OwnerReference
}{
{
name: "create-within-two-minutes",
createTime: metav1.Now().Add(-1 * time.Minute),
expectErr: true,
expectedErrPrefix: "waiting since",
ownerRef: or,
},
{
name: "create-after-two-minutes",
createTime: metav1.Now().Add(-3 * time.Minute),
ownerRef: or,
},
{
name: "create-existing-pod-no-owner-refs",
},
}
for _, test := range tests {
build := dockerStrategy(mockBuild(buildv1.BuildPhaseNew, buildv1.BuildOutput{}))
build.CreationTimestamp.Time = test.createTime

existingPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: buildutil.GetBuildPodName(build),
Namespace: build.Namespace,
},
}

if test.ownerRef != nil {
existingPod.OwnerReferences = []metav1.OwnerReference{
*or,
}
}

kubeClient := fakeKubeExternalClientSet(existingPod, registryCAConfigMap)
errorReaction := func(action clientgotesting.Action) (bool, runtime.Object, error) {
return true, nil, errors.NewAlreadyExists(schema.GroupResource{Group: "", Resource: "pods"}, existingPod.Name)
}
kubeClient.(*fake.Clientset).PrependReactor("create", "pods", errorReaction)
bc := newFakeBuildController(nil, nil, kubeClient, nil, nil)
defer bc.stop()

update, err := bc.handleNewBuild(build, existingPod)

if test.expectErr {
if err == nil {
t.Errorf("expected an error for %s", test.name)
continue
}
if !strings.HasPrefix(err.Error(), test.expectedErrPrefix) {
t.Errorf("unexpected error: %s", err.Error())
}
continue
}

expected := &buildUpdate{}
expected.setPhase(buildv1.BuildPhaseError)
expected.setReason(buildv1.StatusReasonBuildPodExists)
expected.setMessage("The pod for this build already exists and is older than the build.")
validateUpdate(t, "create build pod with pod with older existing pod", expected, update)
}
}

func TestCreatedBuildPodWithExistingUnrelatedPod(t *testing.T) {
or := &metav1.OwnerReference{
APIVersion: "v1",
Expand Down