Skip to content

Commit

Permalink
session: ensure 'tilt ci' exits properly when reattaching to jobs (#6276
Browse files Browse the repository at this point in the history
)

* Handle jobs with deleted pods

Signed-off-by: Daniel Nephin <dnephin@gmail.com>

* session: ensure 'tilt ci' exits properly when reattaching to jobs

fixes #6272

History of this change:
- we used to have a very naive definition of "Ready"
- callers of the runtime status API had hacks to look at Job pods to determine completeness (#4367)
- later, we changed it to have a more sophisticated definition, so that
  Jobs only count as 'ready' when they're complete. (#5013)
- we forgot to remove the old caller hacks :grimace:

in any case, i added a bunch of tests, since we need more job integration tests anyway.

Signed-off-by: Nick Santos <nick.santos@docker.com>

---------

Signed-off-by: Daniel Nephin <dnephin@gmail.com>
Signed-off-by: Nick Santos <nick.santos@docker.com>
Co-authored-by: Daniel Nephin <dnephin@gmail.com>
  • Loading branch information
nicks and dnephin committed Dec 8, 2023
1 parent 3b66e88 commit e850ee8
Show file tree
Hide file tree
Showing 18 changed files with 243 additions and 1 deletion.
13 changes: 13 additions & 0 deletions integration/job_fail/Tiltfile
@@ -0,0 +1,13 @@
# -*- mode: Python -*-

include('../Tiltfile')
docker_build('db', '.', dockerfile='db.dockerfile')
k8s_yaml('db.yaml')

docker_build('db-init', '.', dockerfile='db-init.dockerfile')
k8s_yaml('db-init.yaml')
k8s_resource('job-fail-db-init', resource_deps=['job-fail-db'])

docker_build('app', '.', dockerfile='app.dockerfile')
k8s_yaml('app.yaml')
k8s_resource('job-fail-app', resource_deps=['job-fail-db-init'])
2 changes: 2 additions & 0 deletions integration/job_fail/app.dockerfile
@@ -0,0 +1,2 @@
FROM busybox
ENTRYPOINT busybox httpd -f -p 8000
19 changes: 19 additions & 0 deletions integration/job_fail/app.yaml
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: job-fail-app
namespace: tilt-integration
labels:
app: job-fail-app
spec:
selector:
matchLabels:
app: job-fail-app
template:
metadata:
labels:
app: job-fail-app
spec:
containers:
- name: app
image: app
3 changes: 3 additions & 0 deletions integration/job_fail/db-init.dockerfile
@@ -0,0 +1,3 @@
FROM busybox
ENTRYPOINT ["sh", "-c", "sleep 1 && echo 'db-init job failed' && exit 1"]

17 changes: 17 additions & 0 deletions integration/job_fail/db-init.yaml
@@ -0,0 +1,17 @@
apiVersion: batch/v1
kind: Job
metadata:
name: job-fail-db-init
namespace: tilt-integration
labels:
app: job-fail-db-init
spec:
template:
metadata:
labels:
app: job-fail-db-init
spec:
restartPolicy: Never
containers:
- name: db-init
image: db-init
3 changes: 3 additions & 0 deletions integration/job_fail/db.dockerfile
@@ -0,0 +1,3 @@
FROM busybox
ENTRYPOINT busybox httpd -f -p 8000

19 changes: 19 additions & 0 deletions integration/job_fail/db.yaml
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: job-fail-db
namespace: tilt-integration
labels:
app: job-fail-db
spec:
selector:
matchLabels:
app: job-fail-db
template:
metadata:
labels:
app: job-fail-db
spec:
containers:
- name: db
image: db
25 changes: 25 additions & 0 deletions integration/job_fail_test.go
@@ -0,0 +1,25 @@
//go:build integration
// +build integration

package integration

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
)

func TestJobFail(t *testing.T) {
f := newK8sFixture(t, "job_fail")
f.SetRestrictedCredentials()

// Make sure 'ci' fails.
err := f.tilt.CI(f.ctx, f.LogWriter())
require.Error(t, err)
assert.Contains(t, f.logs.String(), "db-init job failed")

_, _, podNames := f.AllPodsInPhase(f.ctx, "app=job-fail-db-init", v1.PodFailed)
require.Equal(t, 1, len(podNames))
}
13 changes: 13 additions & 0 deletions integration/job_reattach/Tiltfile
@@ -0,0 +1,13 @@
# -*- mode: Python -*-

include('../Tiltfile')
docker_build('db', '.', dockerfile='db.dockerfile')
k8s_yaml('db.yaml')

docker_build('db-init', '.', dockerfile='db-init.dockerfile')
k8s_yaml('db-init.yaml')
k8s_resource('job-reattach-db-init', resource_deps=['job-reattach-db'])

docker_build('app', '.', dockerfile='app.dockerfile')
k8s_yaml('app.yaml')
k8s_resource('job-reattach-app', resource_deps=['job-reattach-db-init'])
2 changes: 2 additions & 0 deletions integration/job_reattach/app.dockerfile
@@ -0,0 +1,2 @@
FROM busybox
ENTRYPOINT busybox httpd -f -p 8000
19 changes: 19 additions & 0 deletions integration/job_reattach/app.yaml
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: job-reattach-app
namespace: tilt-integration
labels:
app: job-reattach-app
spec:
selector:
matchLabels:
app: job-reattach-app
template:
metadata:
labels:
app: job-reattach-app
spec:
containers:
- name: app
image: app
3 changes: 3 additions & 0 deletions integration/job_reattach/db-init.dockerfile
@@ -0,0 +1,3 @@
FROM busybox
ENTRYPOINT echo INIT

17 changes: 17 additions & 0 deletions integration/job_reattach/db-init.yaml
@@ -0,0 +1,17 @@
apiVersion: batch/v1
kind: Job
metadata:
name: job-reattach-db-init
namespace: tilt-integration
labels:
app: job-reattach-db-init
spec:
template:
metadata:
labels:
app: job-reattach-db-init
spec:
restartPolicy: Never
containers:
- name: db-init
image: db-init
3 changes: 3 additions & 0 deletions integration/job_reattach/db.dockerfile
@@ -0,0 +1,3 @@
FROM busybox
ENTRYPOINT busybox httpd -f -p 8000

19 changes: 19 additions & 0 deletions integration/job_reattach/db.yaml
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: job-reattach-db
namespace: tilt-integration
labels:
app: job-reattach-db
spec:
selector:
matchLabels:
app: job-reattach-db
template:
metadata:
labels:
app: job-reattach-db
spec:
containers:
- name: db
image: db
29 changes: 29 additions & 0 deletions integration/job_reattach_test.go
@@ -0,0 +1,29 @@
//go:build integration
// +build integration

package integration

import (
"testing"

"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
)

func TestJobReattach(t *testing.T) {
f := newK8sFixture(t, "job_reattach")
f.SetRestrictedCredentials()

f.TiltCI()

_, _, podNames := f.AllPodsInPhase(f.ctx, "app=job-reattach-db-init", v1.PodSucceeded)
require.Equal(t, 1, len(podNames))

f.runCommandSilently("kubectl", "delete", "-n", "tilt-integration", "pod", podNames[0])

// Make sure 'ci' still succeeds, but we don't restart the Job pod.
f.TiltCI()

_, _, podNames = f.AllPodsInPhase(f.ctx, "app=job-reattach-db-init", v1.PodSucceeded)
require.Equal(t, 0, len(podNames))
}
36 changes: 36 additions & 0 deletions internal/controllers/core/session/reconciler_test.go
Expand Up @@ -411,6 +411,42 @@ func TestExitControlCI_JobSuccess(t *testing.T) {
f.requireDoneWithNoError()
}

func TestExitControlCI_JobSuccessWithNoPods(t *testing.T) {
f := newFixture(t, store.EngineModeCI)

m := manifestbuilder.New(f, "fe").
WithK8sYAML(testyaml.JobYAML).
WithK8sPodReadiness(model.PodReadinessSucceeded).
Build()
f.upsertManifest(m)
f.Store.WithState(func(state *store.EngineState) {
state.ManifestTargets["fe"].State.AddCompletedBuild(model.BuildRecord{
StartTime: time.Now(),
FinishTime: time.Now(),
})
})

f.MustReconcile(sessionKey)
f.requireNotDone()

f.Store.WithState(func(state *store.EngineState) {
mt := state.ManifestTargets["fe"]
krs := store.NewK8sRuntimeState(mt.Manifest)
krs.HasEverDeployedSuccessfully = true
// There are no pods but the job completed successfully.
krs.Conditions = []metav1.Condition{
{
Type: v1alpha1.ApplyConditionJobComplete,
Status: metav1.ConditionTrue,
},
}
mt.State.RuntimeState = krs
})

f.MustReconcile(sessionKey)
f.requireDoneWithNoError()
}

func TestExitControlCI_TriggerMode_Local(t *testing.T) {
type tc struct {
triggerMode model.TriggerMode
Expand Down
2 changes: 1 addition & 1 deletion internal/controllers/core/session/status.go
Expand Up @@ -98,7 +98,7 @@ func (r *Reconciler) processExitCondition(spec v1alpha1.SessionSpec, state *stor
}
if res.State.Waiting != nil {
waiting = append(waiting, fmt.Sprintf("%v %v", res.Name, res.State.Waiting.WaitReason))
} else if res.State.Active != nil && (!res.State.Active.Ready || res.Type == v1alpha1.TargetTypeJob) {
} else if res.State.Active != nil && !res.State.Active.Ready {
// jobs must run to completion
notReady = append(notReady, res.Name)
}
Expand Down

0 comments on commit e850ee8

Please sign in to comment.