Skip to content

Commit

Permalink
filewatch: avoid accepting stale/duplicate events (#4397)
Browse files Browse the repository at this point in the history
This fixes the somewhat legitimate flakiness experienced by one of
the tests (`TestCrashRebuildTwoContainersTwoImages`).

Previously, we added logic to avoid missed live updates, but the
safer logic means that it's possible to get a duplicate change:
in the case of the test that was failing intermittently, it had two
image targets that watched overlapping sets of files. If `foo` is
changed, once `img1` target sees the event, it adds a record to its
pending file set for `time.Now()`, and a build is triggered.
Then, `img2` target sees the *same* event, and adds a record to its
pending file set for `time.Now()`, which will _intentionally_ not
be cleared after the build finishes because it appears to have been
a change that came from _after_ the build was started. As a result,
it'll end up triggering another build.

The revised logic in this commit tries to handle cases safely but
also avoid redundant triggered builds **when there's a current build
in progress**:
      * If it was started _after_ the file was seen AND already knows
        about the file, just ignore it. (This handles the case of the
        overlapping target watch sets as in the flaky test.)
      * Otherwise, use `time.Now()` so that it won't be erroneously
        cleared at the finish of the build. (This is the missed live
        update case.)

It's still possible to get a redundant build in some cases, but that
is impossible to avoid reliably given the current logic. Since we do
make an effort to batch file events (within a target), the most likely
cause would be two image targets with non-overlapping changed files,
where the later-to-be-processed file change happened before the build
was started, but we use `time.Now()` since it isn't in the list of
affected files for the build. The likelihood of multiple almost
instantaneous changes to files across targets is low enough that this
is still an acceptable trade-off vs the alternative of missing updates.

If extra builds become a common complaint, we could add a slight
waiting period of ~100ms or so after the latest file change before we
would start a build to increase the likelihood that changes across
targets have successfully propagated.
  • Loading branch information
milas committed Apr 2, 2021
1 parent 612eb7f commit 3ed62b5
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 61 deletions.
24 changes: 2 additions & 22 deletions internal/engine/fswatch/action.go
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"strings"
"time"

"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -123,27 +122,9 @@ func processFileWatchStatus(ctx context.Context, state *store.EngineState, fw *f
return
}

// NOTE(nick): BuildController uses these timestamps to determine which files
// to clear after a build. In particular, it:
//
// 1) Grabs the pending files
// 2) Runs a live update
// 3) Clears the pending files with timestamps before the live update started.
//
// Here's the race condition: suppose a file changes, but it doesn't get into
// the EngineState until after step (2). That means step (3) will clear the file
// even though it wasn't live-updated properly. Because as far as we can tell,
// the file must have been in the EngineState before the build started.
//
// Ideally, BuildController should be do more bookkeeping to keep track of
// which files it consumed from which FileWatches. But we're changing
// this architecture anyway. For now, we record the time it got into
// the EngineState, rather than the time it was originally changed.
now := time.Now()

if targetID.Type == model.TargetTypeConfigs {
for _, f := range latestEvent.SeenFiles {
state.PendingConfigFileChanges[f] = now
state.PendingConfigFileChanges[f] = latestEvent.Time.Time
}
return
}
Expand All @@ -155,9 +136,8 @@ func processFileWatchStatus(ctx context.Context, state *store.EngineState, fw *f
return
}

status := ms.MutableBuildStatus(targetID)
for _, f := range latestEvent.SeenFiles {
status.PendingFileChanges[f] = now
ms.AddPendingFileChange(targetID, f, latestEvent.Time.Time)
}
}
}
Expand Down
45 changes: 6 additions & 39 deletions internal/engine/upper_test.go
Expand Up @@ -21,8 +21,6 @@ import (

"github.com/tilt-dev/tilt/internal/controllers/core/filewatch/fsevent"

"github.com/tilt-dev/tilt/pkg/apis"

"github.com/docker/distribution/reference"
dockertypes "github.com/docker/docker/api/types"
"github.com/google/uuid"
Expand Down Expand Up @@ -82,7 +80,6 @@ import (
"github.com/tilt-dev/tilt/internal/user"
"github.com/tilt-dev/tilt/internal/watch"
"github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1"
filewatches "github.com/tilt-dev/tilt/pkg/apis/core/v1alpha1"
"github.com/tilt-dev/tilt/pkg/assets"
"github.com/tilt-dev/tilt/pkg/logger"
"github.com/tilt-dev/tilt/pkg/model"
Expand Down Expand Up @@ -1565,7 +1562,7 @@ func TestPodUnexpectedContainerStartsImageBuild(t *testing.T) {
f.registerDeployedPodTemplateSpecHashToManifest(name, ptsh)

// Start and end a fake build to set manifestState.ExpectedContainerId
f.triggerFileChange(manifest.ImageTargetAt(0).ID(), "/go/a")
f.fsWatcher.Events <- watch.NewFileEvent(f.JoinPath("go/a"))

f.WaitUntil("builds ready & changed file recorded", func(st store.EngineState) bool {
ms, _ := st.ManifestState(manifest.Name)
Expand Down Expand Up @@ -1616,7 +1613,7 @@ func TestPodUnexpectedContainerStartsImageBuildOutOfOrderEvents(t *testing.T) {
f.registerDeployedPodTemplateSpecHashToManifest(name, ptsh)

// Start a fake build
f.triggerFileChange(manifest.ImageTargetAt(0).ID(), "/go/a")
f.fsWatcher.Events <- watch.NewFileEvent(f.JoinPath("go/a"))
f.WaitUntil("builds ready & changed file recorded", func(st store.EngineState) bool {
ms, _ := st.ManifestState(manifest.Name)
return buildcontrol.NextManifestNameToBuild(st) == manifest.Name && ms.HasPendingFileChanges()
Expand Down Expand Up @@ -1657,7 +1654,7 @@ func TestPodUnexpectedContainerAfterSuccessfulUpdate(t *testing.T) {
f.Start([]model.Manifest{manifest})

// Start and end a normal build
f.triggerFileChange(manifest.ImageTargetAt(0).ID(), "/go/a")
f.fsWatcher.Events <- watch.NewFileEvent(f.JoinPath("go/a"))
f.WaitUntil("builds ready & changed file recorded", func(st store.EngineState) bool {
ms, _ := st.ManifestState(manifest.Name)
return buildcontrol.NextManifestNameToBuild(st) == manifest.Name && ms.HasPendingFileChanges()
Expand Down Expand Up @@ -1687,7 +1684,7 @@ func TestPodUnexpectedContainerAfterSuccessfulUpdate(t *testing.T) {
})

// Start another fake build
f.triggerFileChange(manifest.ImageTargetAt(0).ID(), "/go/a")
f.fsWatcher.Events <- watch.NewFileEvent(f.JoinPath("go/a"))
f.WaitUntil("waiting for builds to be ready", func(st store.EngineState) bool {
return buildcontrol.NextManifestNameToBuild(st) == manifest.Name
})
Expand Down Expand Up @@ -2096,7 +2093,7 @@ func TestUpper_ShowErrorPodLog(t *testing.T) {
f.startPod(pod, name)
f.podLog(pod, name, "first string")

f.triggerFileChange(manifest.ImageTargetAt(0).ID(), "/go/a.go")
f.fsWatcher.Events <- watch.NewFileEvent(f.JoinPath("go/a"))

f.waitForCompletedBuildCount(2)
f.podLog(pod, name, "second string")
Expand Down Expand Up @@ -4323,28 +4320,19 @@ func (f *testFixture) loadAndStart(initOptions ...initOption) {
}

func (f *testFixture) WriteConfigFiles(args ...string) {
f.t.Helper()
if (len(args) % 2) != 0 {
f.T().Fatalf("WriteConfigFiles needs an even number of arguments; got %d", len(args))
}

filenames := []string{}
for i := 0; i < len(args); i += 2 {
filename := f.JoinPath(args[i])
contents := args[i+1]
f.WriteFile(filename, contents)
filenames = append(filenames, filename)

// Fire an FS event thru the normal pipeline, so that manifests get marked dirty.
f.fsWatcher.Events <- watch.NewFileEvent(filename)
}

// The test harness was written for a time when most tests didn't
// have a Tiltfile. So
// 1) Tiltfile execution doesn't happen at test startup
// 2) Because the Tiltfile isn't executed, ConfigFiles isn't populated
// 3) Because ConfigFiles isn't populated, ConfigsTargetID watches aren't set up properly
// So just fire a change action manually.
f.triggerFileChange(fswatch.ConfigsTargetID, filenames...)
}

func (f *testFixture) setupDCFixture() (redis, server model.Manifest) {
Expand Down Expand Up @@ -4427,27 +4415,6 @@ func (f *testFixture) completeBuildForManifest(m model.Manifest) {
f.b.completeBuild(targetIDStringForManifest(m))
}

func (f *testFixture) triggerFileChange(targetID model.TargetID, paths ...string) {
now := metav1.NowMicro()
key := types.NamespacedName{Name: apis.SanitizeName(targetID.String())}

st := f.store.RLockState()
fw := st.FileWatches[key]
if fw == nil {
f.t.Fatalf("Cannot trigger file change for %q - does not exist in store", targetID.String())
}
fw = fw.DeepCopy()
f.store.RUnlockState()

fw.Status.LastEventTime = now
fw.Status.FileEvents = append(fw.Status.FileEvents, filewatches.FileEvent{
Time: now,
SeenFiles: append([]string{}, paths...),
})

f.store.Dispatch(fswatch.NewFileWatchUpdateStatusAction(fw))
}

func podTemplateSpecHashesForTarg(t *testing.T, targ model.K8sTarget) []k8s.PodTemplateSpecHash {
entities, err := k8s.ParseYAMLFromString(targ.YAML)
require.NoError(t, err)
Expand Down
34 changes: 34 additions & 0 deletions internal/store/engine_state.go
Expand Up @@ -595,6 +595,40 @@ func (ms *ManifestState) PodWithID(pid k8s.PodID) (*Pod, bool) {
return nil, false
}

func (ms *ManifestState) AddPendingFileChange(targetID model.TargetID, file string, timestamp time.Time) {
if !ms.CurrentBuild.Empty() {
if timestamp.Before(ms.CurrentBuild.StartTime) {
// this file change occurred before the build started, but if the current build already knows
// about it (from another target or rapid successive changes that weren't de-duped), it can be ignored
for _, edit := range ms.CurrentBuild.Edits {
if edit == file {
return
}
}
}
// NOTE(nick): BuildController uses these timestamps to determine which files
// to clear after a build. In particular, it:
//
// 1) Grabs the pending files
// 2) Runs a live update
// 3) Clears the pending files with timestamps before the live update started.
//
// Here's the race condition: suppose a file changes, but it doesn't get into
// the EngineState until after step (2). That means step (3) will clear the file
// even though it wasn't live-updated properly. Because as far as we can tell,
// the file must have been in the EngineState before the build started.
//
// Ideally, BuildController should be do more bookkeeping to keep track of
// which files it consumed from which FileWatches. But we're changing
// this architecture anyway. For now, we record the time it got into
// the EngineState, rather than the time it was originally changed.
timestamp = time.Now()
}

bs := ms.MutableBuildStatus(targetID)
bs.PendingFileChanges[file] = timestamp
}

func (ms *ManifestState) HasPendingFileChanges() bool {
for _, status := range ms.BuildStatuses {
if len(status.PendingFileChanges) > 0 {
Expand Down

0 comments on commit 3ed62b5

Please sign in to comment.