segmentio · yolken-segment · Apr 14, 2021 · Apr 13, 2021 · Apr 13, 2021 · Apr 13, 2021
@@ -222,9 +222,7 @@ other source types use custom code in the `kubeapply` binary.
 This validates all of the expanded configs for the cluster using the
 [`kubeconform`](https://github.com/yannh/kubeconform) library. It also, optionally, supports
 validating configs using one or more [OPA](https://www.openpolicyagent.org/) policies in
-rego format. The latter allows checking that configs satisfy organization-specific standards,
-e.g. that resource labels are in the correct format, that images are only pulled from the
-expected registries, etc.
+rego format; see the "Experimental features" section below for more details.
 
 #### Diff
 
@@ -338,6 +336,24 @@ where the `url`s are in the same format as those for Helm chart locations,
 e.g. `file://path/to/my/file`. The outputs of each profile will be expanded into
 `[expanded dir]/[profile name]/...`.
 
+### OPA policy checks
+
+The `kubeapply validate` subcommand now supports checking expanded configs against policies in
+[Open Policy Agent (OPA)](https://www.openpolicyagent.org/) format. This can be helpful for
+enforcing organization-specific standards, e.g. that images need to be pulled from a particular
+private registry, that all labels are in a consistent format, etc.
+
+To use this, write up your policies as `.rego` files as described in the OPA documentation and run
+the former subcommand with one or more `--policy=[path to policy]` arguments. By default, policies
+should be in the `com.segment.kubeapply` package. Denial reasons, if any, are returned by
+setting a `deny` variable with a set of denial reason strings. If this set is empty,
+`kubeapply` will assume that the config has passed all checks in the policy file.
+
+If a denial reason begins with the string `warn:`, then that denial will be treated as a
+non-blocking warning as opposed to an error that causes validation to fail.
+
+See [this unit test](/pkg/validation/policy_test.go) for some examples.
+
 ## Testing
 
 ### Unit tests

@@ -2,6 +2,7 @@ package subcmd
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"path/filepath"
 
@@ -144,69 +145,27 @@ func execValidation(ctx context.Context, clusterConfig *config.ClusterConfig) er
 		return err
 	}
 
-	numInvalidResourceChecks := 0
-	numValidResourceChecks := 0
-	numSkippedResourceChecks := 0
-
-	for _, result := range results {
-		for _, checkResult := range result.CheckResults {
-			switch checkResult.Status {
-			case validation.StatusValid:
-				numValidResourceChecks++
-				log.Debugf(
-					"Resource %s in file %s OK according to check %s",
-					result.Resource.PrettyName(),
-					result.Resource.Path,
-					checkResult.CheckName,
-				)
-			case validation.StatusSkipped:
-				numSkippedResourceChecks++
-				log.Debugf(
-					"Resource %s in file %s was skipped by check %s",
-					result.Resource.PrettyName(),
-					result.Resource.Path,
-					checkResult.CheckName,
-				)
-			case validation.StatusError:
-				numInvalidResourceChecks++
-				log.Errorf(
-					"Resource %s in file %s could not be processed by check %s: %s",
-					result.Resource.PrettyName(),
-					result.Resource.Path,
-					checkResult.CheckName,
-					checkResult.Message,
-				)
-			case validation.StatusInvalid:
-				numInvalidResourceChecks++
-				log.Errorf(
-					"Resource %s in file %s is invalid according to check %s: %s",
-					result.Resource.PrettyName(),
-					result.Resource.Path,
-					checkResult.CheckName,
-					checkResult.Message,
-				)
-			case validation.StatusEmpty:
-			default:
-				log.Infof("Unrecognized result type: %+v", result)
-			}
+	counts := validation.CountsByStatus(results)
+	resultsWithIssues := validation.ResultsWithIssues(results)
+
+	if len(resultsWithIssues) > 0 {
+		log.Warnf("Found %d resources with potential issues", len(resultsWithIssues))
+		for _, result := range resultsWithIssues {
+			fmt.Println(
+				validation.ResultTable(
+					result,
+					clusterConfig.DescriptiveName(),
+					clusterConfig.ExpandedPath,
+					debug,
+				),
+			)
 		}
 	}
 
-	if numInvalidResourceChecks > 0 {
-		return fmt.Errorf(
-			"Validation failed for %d resources in cluster %s (%d checks valid, %d skipped)",
-			numInvalidResourceChecks,
-			clusterConfig.DescriptiveName(),
-			numValidResourceChecks,
-			numSkippedResourceChecks,
-		)
+	if counts[validation.StatusError]+counts[validation.StatusInvalid] > 0 {
+		return errors.New("Validation failed")
 	}
 
-	log.Infof(
-		"Validation of cluster %s passed (%d checks valid, %d skipped)",
-		clusterConfig.DescriptiveName(),
-		numValidResourceChecks,
-		numSkippedResourceChecks,
-	)
+	log.Infof("Validation passed")
 	return nil
 }
@@ -65,7 +65,8 @@ import (
 )
 
 const (
-	JitterFactor = 1.2
+	JitterFactor   = 1.2
+	releaseTimeout = 10 * time.Second
 )
 
 // NewLeaderElector creates a LeaderElector from a LeaderElectionConfig
@@ -240,7 +241,8 @@ func (le *LeaderElector) acquire(ctx context.Context) bool {
 	return succeeded
 }
 
-// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done.
+// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or
+// ctx signals done.
 func (le *LeaderElector) renew(ctx context.Context) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()
@@ -264,7 +266,14 @@ func (le *LeaderElector) renew(ctx context.Context) {
 
 	// if we hold the lease, give it up
 	if le.config.ReleaseOnCancel {
-		le.release(ctx)
+		// Use the background context, not the one that was passed in originally. If
+		// the latter was cancelled, then we can't actually do the release.
+		releaseCtx, releaseCancel := context.WithTimeout(
+			context.Background(),
+			releaseTimeout,
+		)
+		defer releaseCancel()
+		le.release(releaseCtx)
 	}
 }
 
@@ -332,7 +341,10 @@ func (le *LeaderElector) tryAcquireOrRenew(ctx context.Context) bool {
 		le.observedTime.Add(le.config.LeaseDuration).After(now.Time) &&
 		oldLeaderElectionRecord.RenewTime.Time.After(thresholdTime) &&
 		!le.IsLeader() {
-		log.Infof("Lock is held by %v and has not yet expired", oldLeaderElectionRecord.HolderIdentity)
+		log.Infof(
+			"Lock is held by %v and has not yet expired",
+			oldLeaderElectionRecord.HolderIdentity,
+		)
 		return false
 	}