Merge #10687 #10729 #10740

10687: add envvar to omit snapshot writes on each state mutation r=dixler a=dixler  # Description  Fixes #10668 Provides an environment variable to override the internal behavior of the SnapshotManager to speed up large deployments by only writing the final state of the snapshot rather than the current behavior which saves every mutation to the snapshot to the state backend. ## Checklist  - [x] I have added tests that prove my fix is effective or that my feature works  - [x] I have updated the [CHANGELOG-PENDING](https://github.com/pulumi/pulumi/blob/master/CHANGELOG_PENDING.md) file with my change  - [ ] Yes, there are changes in this PR that warrants bumping the Pulumi Service API version  10729: ci: Fix package parallelism assignment r=AaronFriel a=AaronFriel 10740: Add missing `ProgramTestOptions` overrides in `With` r=justinvp a=justinvp These options were previously added without also adding the override handling in `With`. Co-authored-by: Kyle Dixler <kyle@pulumi.com> Co-authored-by: Aaron Friel <mayreply@aaronfriel.com> Co-authored-by: Justin Van Patten <jvp@justinvp.com>
pulumi · Sep 15, 2022 · 0f3e536 · 0f3e536
4 parents bc704af + 9065d7c + 22f2989 + 9f5ec4a
commit 0f3e536
Show file tree

Hide file tree

Showing 9 changed files with 258 additions and 31 deletions.
diff --git a/pkg/backend/snapshot.go b/pkg/backend/snapshot.go
@@ -17,6 +17,7 @@ package backend
 import (
 	"errors"
 	"fmt"
+	"os"
 	"reflect"
 	"sort"
 	"time"
@@ -638,6 +639,56 @@ func (sm *SnapshotManager) saveSnapshot() error {
 	return nil
 }
 
+// defaultServiceLoop saves a Snapshot whenever a mutation occurs
+func (sm *SnapshotManager) defaultServiceLoop(mutationRequests chan mutationRequest, done chan error) {
+	// True if we have elided writes since the last actual write.
+	hasElidedWrites := false
+
+	// Service each mutation request in turn.
+serviceLoop:
+	for {
+		select {
+		case request := <-mutationRequests:
+			var err error
+			if request.mutator() {
+				err = sm.saveSnapshot()
+				hasElidedWrites = false
+			} else {
+				hasElidedWrites = true
+			}
+			request.result <- err
+		case <-sm.cancel:
+			break serviceLoop
+		}
+	}
+
+	// If we still have elided writes once the channel has closed, flush the snapshot.
+	var err error
+	if hasElidedWrites {
+		logging.V(9).Infof("SnapshotManager: flushing elided writes...")
+		err = sm.saveSnapshot()
+	}
+	done <- err
+}
+
+// unsafeServiceLoop doesn't save Snapshots when mutations occur and instead saves Snapshots when
+// SnapshotManager.Close() is invoked. It trades reliability for speed as every mutation does not
+// cause a Snapshot to be serialized to the user's state backend.
+func (sm *SnapshotManager) unsafeServiceLoop(mutationRequests chan mutationRequest, done chan error) {
+	for {
+		select {
+		case request := <-mutationRequests:
+			request.mutator()
+			request.result <- nil
+		case <-sm.cancel:
+			done <- sm.saveSnapshot()
+			return
+		}
+	}
+}
+
+const experimentalSnapshotManagerFlag = "PULUMI_EXPERIMENTAL_SNAPSHOT_MANAGER"
+
 // NewSnapshotManager creates a new SnapshotManager for the given stack name, using the given persister
 // and base snapshot.
 //
@@ -658,36 +709,12 @@ func NewSnapshotManager(persister SnapshotPersister, baseSnap *deploy.Snapshot)
 		done:             done,
 	}
 
-	go func() {
-		// True if we have elided writes since the last actual write.
-		hasElidedWrites := false
-
-		// Service each mutation request in turn.
-	serviceLoop:
-		for {
-			select {
-			case request := <-mutationRequests:
-				var err error
-				if request.mutator() {
-					err = manager.saveSnapshot()
-					hasElidedWrites = false
-				} else {
-					hasElidedWrites = true
-				}
-				request.result <- err
-			case <-cancel:
-				break serviceLoop
-			}
-		}
-
-		// If we still have elided writes once the channel has closed, flush the snapshot.
-		var err error
-		if hasElidedWrites {
-			logging.V(9).Infof("SnapshotManager: flushing elided writes...")
-			err = manager.saveSnapshot()
-		}
-		done <- err
-	}()
+	serviceLoop := manager.defaultServiceLoop
+	unsafeEnabled := os.Getenv(experimentalSnapshotManagerFlag) != ""
+	if unsafeEnabled {
+		serviceLoop = manager.unsafeServiceLoop
+	}
+	go serviceLoop(mutationRequests, done)
 
 	return manager
 }
diff --git a/pkg/backend/snapshot_test.go b/pkg/backend/snapshot_test.go
@@ -250,6 +250,67 @@ func TestSamesWithDependencyChanges(t *testing.T) {
 	assert.Equal(t, resourceB.URN, secondSnap.Resources[1].Dependencies[0])
 }
 
+// This test checks that we only write the Checkpoint once whether or not there
+// are important changes when the `PULUMI_EXPERIMENTAL_SNAPSHOT_MANAGER` envvar
+// is provided
+//
+//nolint:paralleltest // mutates environment variables
+func TestWriteCheckpointOnceUnsafe(t *testing.T) {
+	t.Setenv(experimentalSnapshotManagerFlag, "1")
+
+	provider := NewResource("urn:pulumi:foo::bar::pulumi:providers:pkgUnsafe::provider")
+	provider.Custom, provider.Type, provider.ID = true, "pulumi:providers:pkgUnsafe", "id"
+
+	resourceP := NewResource("a-unique-urn-resource-p")
+	resourceA := NewResource("a-unique-urn-resource-a")
+
+	snap := NewSnapshot([]*resource.State{
+		provider,
+		resourceP,
+		resourceA,
+	})
+
+	manager, sp := MockSetup(t, snap)
+
+	// Generate a same for the provider.
+	provUpdated := NewResource(string(provider.URN))
+	provUpdated.Custom, provUpdated.Type = true, provider.Type
+	provSame := deploy.NewSameStep(nil, nil, provider, provUpdated)
+	mutation, err := manager.BeginMutation(provSame)
+	assert.NoError(t, err)
+	_, _, err = provSame.Apply(false)
+	assert.NoError(t, err)
+	err = mutation.End(provSame, true)
+	assert.NoError(t, err)
+
+	// The engine generates a meaningful change, the DEFAULT behavior is that a snapshot is written:
+	pUpdated := NewResource(string(resourceP.URN))
+	pUpdated.Protect = !resourceP.Protect
+	pSame := deploy.NewSameStep(nil, nil, resourceP, pUpdated)
+	mutation, err = manager.BeginMutation(pSame)
+	assert.NoError(t, err)
+	err = mutation.End(pSame, true)
+	assert.NoError(t, err)
+
+	// The engine generates a meaningful change, the DEFAULT behavior is that a snapshot is written:
+	aUpdated := NewResource(string(resourceA.URN))
+	aUpdated.Protect = !resourceA.Protect
+	aSame := deploy.NewSameStep(nil, nil, resourceA, aUpdated)
+	mutation, err = manager.BeginMutation(aSame)
+	assert.NoError(t, err)
+	err = mutation.End(aSame, true)
+	assert.NoError(t, err)
+
+	// a `Close()` call is required to write back the snapshots.
+	// It is called in all of the references to SnapshotManager.
+	err = manager.Close()
+	assert.NoError(t, err)
+
+	// DEFAULT behavior would cause more than 1 snapshot to be written,
+	// but the provided flag should only create 1 Snapshot
+	assert.Len(t, sp.SavedSnapshots, 1)
+}
+
 // This test exercises same steps with meaningful changes to properties _other_ than `Dependencies` in order to ensure
 // that the snapshot is written.
 func TestSamesWithOtherMeaningfulChanges(t *testing.T) {

diff --git a/pkg/testing/integration/program.go b/pkg/testing/integration/program.go
@@ -536,15 +536,33 @@ func (opts ProgramTestOptions) With(overrides ProgramTestOptions) ProgramTestOpt
 	if overrides.PipenvBin != "" {
 		opts.PipenvBin = overrides.PipenvBin
 	}
+	if overrides.DotNetBin != "" {
+		opts.DotNetBin = overrides.DotNetBin
+	}
 	if overrides.Env != nil {
 		opts.Env = append(opts.Env, overrides.Env...)
 	}
+	if overrides.UseAutomaticVirtualEnv {
+		opts.UseAutomaticVirtualEnv = overrides.UseAutomaticVirtualEnv
+	}
 	if overrides.UsePipenv {
 		opts.UsePipenv = overrides.UsePipenv
 	}
+	if overrides.PreviewCompletedHook != nil {
+		opts.PreviewCompletedHook = overrides.PreviewCompletedHook
+	}
+	if overrides.JSONOutput {
+		opts.JSONOutput = overrides.JSONOutput
+	}
+	if overrides.ExportStateValidator != nil {
+		opts.ExportStateValidator = overrides.ExportStateValidator
+	}
 	if overrides.PrepareProject != nil {
 		opts.PrepareProject = overrides.PrepareProject
 	}
+	if overrides.LocalDependencies != nil {
+		opts.LocalDependencies = append(opts.LocalDependencies, overrides.LocalDependencies...)
+	}
 	return opts
 }
 

diff --git a/scripts/retry b/scripts/retry
@@ -30,7 +30,7 @@ run_tests() {
         attempts=$((attempts + 1))
 
         export GO_TEST_PARALLELISM=$((GO_TEST_PARALLELISM <= 2 ? 1 : GO_TEST_PARALLELISM / 2))
-        export GO_TEST_PKG_PARALLELISM=$((GO_TEST_PARALLELISM <= 2 ? 1 : GO_TEST_PKG_PARALLELISM / 2))
+        export GO_TEST_PKG_PARALLELISM=$((GO_TEST_PKG_PARALLELISM <= 2 ? 1 : GO_TEST_PKG_PARALLELISM / 2))
         export GO_TEST_SHUFFLE="off"
     done
 

diff --git a/tests/integration/integration_nodejs_test.go b/tests/integration/integration_nodejs_test.go
@@ -1336,3 +1336,25 @@ func TestTSConfigOption(t *testing.T) {
 	e.RunCommand("pulumi", "stack", "select", "tsconfg", "--create")
 	e.RunCommand("pulumi", "preview")
 }
+
+// This tests that despite an exception, that the snapshot is still written.
+func TestUnsafeSnapshotManagerRetainsResourcesOnError(t *testing.T) {
+	integration.ProgramTest(t, &integration.ProgramTestOptions{
+		Dir:          filepath.Join("unsafe_snapshot_tests", "bad_resource"),
+		Dependencies: []string{"@pulumi/pulumi"},
+		Env:          []string{"PULUMI_EXPERIMENTAL_SNAPSHOT_MANAGER=1"},
+		Quick:        true,
+		// The program throws an exception and 1 resource fails to be created.
+		ExpectFailure: true,
+		ExtraRuntimeValidation: func(t *testing.T, stackInfo integration.RuntimeValidationStackInfo) {
+			// Ensure the checkpoint contains the 1003 other resources that were created
+			// - stack
+			// - provider
+			// - `base` resource
+			// - 1000 resources(via a for loop)
+			// - NOT a resource that failed to be created dependent on the `base` resource output
+			assert.NotNil(t, stackInfo.Deployment)
+			assert.Equal(t, 3+1000, len(stackInfo.Deployment.Resources))
+		},
+	})
+}
diff --git a/tests/integration/unsafe_snapshot_tests/bad_resource/Pulumi.yaml b/tests/integration/unsafe_snapshot_tests/bad_resource/Pulumi.yaml
@@ -0,0 +1,2 @@
+name: bad_resource
+runtime: nodejs
diff --git a/tests/integration/unsafe_snapshot_tests/bad_resource/index.ts b/tests/integration/unsafe_snapshot_tests/bad_resource/index.ts
@@ -0,0 +1,17 @@
+// Copyright 2016-2022, Pulumi Corporation.  All rights reserved.
+import * as process from "process";
+import { Resource } from "./resource";
+// Base depends on nothing.
+const a = new Resource("base", { uniqueKey: 1, state: 99 });
+
+for(let i = 0; i < 1000; i++) {
+    new Resource(`base-${i}`, { uniqueKey: 100+i, state: 99 });
+}
+
+// Dependent depends on Base with state 99.
+new Resource("dependent", { uniqueKey: a.state.apply(() => {
+    if (process.env["PULUMI_NODEJS_DRY_RUN"] != "true") {
+        throw Error("`base` should be created and `dependent` should not");
+    }
+    return 1;
+}), state: a.state });
diff --git a/tests/integration/unsafe_snapshot_tests/bad_resource/package.json b/tests/integration/unsafe_snapshot_tests/bad_resource/package.json
@@ -0,0 +1,13 @@
+{
+    "name": "stack_project_name",
+    "license": "Apache-2.0",
+    "devDependencies": {
+        "typescript": "^3.0.0"
+    },
+    "peerDependencies": {
+        "@pulumi/pulumi": "latest"
+    },
+    "dependencies": {
+        "@types/node": "^18.7.17"
+    }
+}
diff --git a/tests/integration/unsafe_snapshot_tests/bad_resource/resource.ts b/tests/integration/unsafe_snapshot_tests/bad_resource/resource.ts
@@ -0,0 +1,67 @@
+// Copyright 2016-2022, Pulumi Corporation.  All rights reserved.
+
+import * as pulumi from "@pulumi/pulumi";
+import * as dynamic from "@pulumi/pulumi/dynamic";
+
+export class Provider implements dynamic.ResourceProvider {
+    public static readonly instance = new Provider();
+
+    private id: number = 0;
+
+    public async check(olds: any, news: any): Promise<dynamic.CheckResult> {
+        // When the engine re-creates a resource after it was deleted, it should
+        // not pass the old (deleted) inputs to Check when re-creating.
+        //
+        // This Check implementation fails the test if this happens.
+        if (olds.state === 99 && news.state === 22) {
+            return {
+                inputs: news,
+                failures: [
+                    {
+                        property: "state",
+                        reason: "engine did invalid comparison of old and new check inputs for recreated resource",
+                    },
+                ],
+            };
+        }
+
+        return {
+            inputs: news,
+        };
+    }
+
+    public async diff(id: pulumi.ID, olds: any, news: any): Promise<dynamic.DiffResult> {
+        if (olds.state !== news.state) {
+            return {
+                changes: true,
+                replaces: ["state"],
+                deleteBeforeReplace: true,
+            };
+        }
+
+        return {
+            changes: false,
+        };
+    }
+
+    public async create(inputs: any): Promise<dynamic.CreateResult> {
+        return {
+            id: (this.id++).toString(),
+            outs: inputs,
+        };
+    }
+}
+
+export class Resource extends pulumi.dynamic.Resource {
+    public uniqueKey?: pulumi.Output<number>;
+    public state: pulumi.Output<number>;
+
+    constructor(name: string, props: ResourceProps, opts?: pulumi.ResourceOptions) {
+        super(Provider.instance, name, props, opts);
+    }
+}
+
+export interface ResourceProps {
+    readonly uniqueKey?: pulumi.Input<number>;
+    readonly state: pulumi.Input<number>;
+}