diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 69d80885d..c9266c42a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,6 +86,16 @@ jobs: HUB_SERVER_URL: 'https://172.19.0.2:6443' e2e-tests: + strategy: + fail-fast: false + matrix: + customized-settings: [default, custom] + include: + - customized-settings: default + # to shorten the test duration, set the resource snapshot creation interval to 0 + resource-snapshot-creation-interval: 0m + - customized-settings: custom + resource-snapshot-creation-interval: 1m runs-on: ubuntu-latest needs: [ detect-noop, @@ -119,7 +129,11 @@ jobs: - name: Run e2e tests run: | - make e2e-tests + if [ "${{ matrix.customized-settings }}" = "default" ]; then + make e2e-tests + else + make e2e-tests-custom + fi env: KUBECONFIG: '/home/runner/.kube/config' HUB_SERVER_URL: 'https://172.19.0.2:6443' @@ -129,4 +143,5 @@ jobs: # TO-DO (chenyu1): to ensure a vendor-neutral experience, switch to a dummy # property provider once the AKS one is split out. PROPERTY_PROVIDER: 'azure' + RESOURCE_SNAPSHOT_CREATION_INTERVAL: ${{ matrix.resource-snapshot-creation-interval }} diff --git a/Makefile b/Makefile index fa83cadf9..7428ab593 100644 --- a/Makefile +++ b/Makefile @@ -213,7 +213,10 @@ e2e-tests-v1alpha1: create-kind-cluster run-e2e-v1alpha1 .PHONY: e2e-tests e2e-tests: setup-clusters - cd ./test/e2e && ginkgo -v -p . + cd ./test/e2e && ginkgo --label-filter="!custom" -v -p . + +e2e-tests-custom: setup-clusters + cd ./test/e2e && ginkgo --label-filter="custom" -v -p . .PHONY: setup-clusters setup-clusters: diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md index 54e91161f..47d2428a3 100644 --- a/charts/hub-agent/README.md +++ b/charts/hub-agent/README.md @@ -19,24 +19,25 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen ## Parameters -| Parameter | Description | Default | -|:------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------| -| replicaCount | The number of hub-agent replicas to deploy | `1` | -| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` | -| image.pullPolicy | Image pullPolicy | `Always` | -| image.tag | The image release tag to use | `v0.1.0` | -| namespace | Namespace that this Helm chart is installed on | `fleet-system` | -| serviceAccount.create | Whether to create service account | `true` | -| serviceAccount.name | Service account name | `hub-agent-sa` | -| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi | -| affinity | The node affinity to use for hubagent pod | `{}` | -| tolerations | The tolerations to use for hubagent pod | `[]` | -| logVerbosity | Log level. Uses V logs (klog) | `5` | -| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` | -| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` | -| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` | -| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` | -| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` | -| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` | -| logFileMaxSize | Max size of log file before rotation | `1000000` | -| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` | \ No newline at end of file +| Parameter | Description | Default | +|:-----------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------| +| replicaCount | The number of hub-agent replicas to deploy | `1` | +| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` | +| image.pullPolicy | Image pullPolicy | `Always` | +| image.tag | The image release tag to use | `v0.1.0` | +| namespace | Namespace that this Helm chart is installed on | `fleet-system` | +| serviceAccount.create | Whether to create service account | `true` | +| serviceAccount.name | Service account name | `hub-agent-sa` | +| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi | +| affinity | The node affinity to use for hubagent pod | `{}` | +| tolerations | The tolerations to use for hubagent pod | `[]` | +| logVerbosity | Log level. Uses V logs (klog) | `5` | +| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` | +| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` | +| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` | +| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` | +| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` | +| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` | +| logFileMaxSize | Max size of log file before rotation | `1000000` | +| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` | +| resourceSnapshotCreationInterval | The interval at which resource snapshots are created. | `1m` | \ No newline at end of file diff --git a/charts/hub-agent/templates/deployment.yaml b/charts/hub-agent/templates/deployment.yaml index 4d53f3d3c..b7e9bcc5e 100644 --- a/charts/hub-agent/templates/deployment.yaml +++ b/charts/hub-agent/templates/deployment.yaml @@ -43,6 +43,7 @@ spec: - --hub-api-burst={{ .Values.hubAPIBurst }} - --force-delete-wait-time={{ .Values.forceDeleteWaitTime }} - --cluster-unhealthy-threshold={{ .Values.clusterUnhealthyThreshold }} + - --resource-snapshot-creation-interval={{ .Values.resourceSnapshotCreationInterval }} ports: - name: metrics containerPort: 8080 diff --git a/charts/hub-agent/values.yaml b/charts/hub-agent/values.yaml index 8d3dfe291..300ed00b5 100644 --- a/charts/hub-agent/values.yaml +++ b/charts/hub-agent/values.yaml @@ -18,6 +18,8 @@ enableGuardRail: true webhookClientConnectionType: service forceDeleteWaitTime: 15m0s clusterUnhealthyThreshold: 3m0s +resourceSnapshotCreationInterval: 1m0s + namespace: fleet-system diff --git a/cmd/hubagent/options/options.go b/cmd/hubagent/options/options.go index 77d9f6282..e4e9f9c26 100644 --- a/cmd/hubagent/options/options.go +++ b/cmd/hubagent/options/options.go @@ -104,6 +104,8 @@ type Options struct { PprofPort int // DenyModifyMemberClusterLabels indicates if the member cluster labels cannot be modified by groups (excluding system:masters) DenyModifyMemberClusterLabels bool + // ResourceSnapshotCreationInterval is the interval at which resource snapshots are created. + ResourceSnapshotCreationInterval time.Duration } // NewOptions builds an empty options. @@ -115,14 +117,15 @@ func NewOptions() *Options { ResourceNamespace: utils.FleetSystemNamespace, ResourceName: "136224848560.hub.fleet.azure.com", }, - MaxConcurrentClusterPlacement: 10, - ConcurrentResourceChangeSyncs: 1, - MaxFleetSizeSupported: 100, - EnableV1Alpha1APIs: false, - EnableClusterInventoryAPIs: true, - EnableStagedUpdateRunAPIs: true, - EnablePprof: false, - PprofPort: 6065, + MaxConcurrentClusterPlacement: 10, + ConcurrentResourceChangeSyncs: 1, + MaxFleetSizeSupported: 100, + EnableV1Alpha1APIs: false, + EnableClusterInventoryAPIs: true, + EnableStagedUpdateRunAPIs: true, + EnablePprof: false, + PprofPort: 6065, + ResourceSnapshotCreationInterval: 1 * time.Minute, } } @@ -169,6 +172,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) { flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.") flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.") flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.") + flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 1*time.Minute, "The interval at which resource snapshots are created.") o.RateLimiterOpts.AddFlags(flags) } diff --git a/cmd/hubagent/workload/setup.go b/cmd/hubagent/workload/setup.go index b64cac9ba..9ae695035 100644 --- a/cmd/hubagent/workload/setup.go +++ b/cmd/hubagent/workload/setup.go @@ -153,14 +153,15 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager, // Set up a custom controller to reconcile cluster resource placement crpc := &clusterresourceplacement.Reconciler{ - Client: mgr.GetClient(), - Recorder: mgr.GetEventRecorderFor(crpControllerName), - RestMapper: mgr.GetRESTMapper(), - InformerManager: dynamicInformerManager, - ResourceConfig: resourceConfig, - SkippedNamespaces: skippedNamespaces, - Scheme: mgr.GetScheme(), - UncachedReader: mgr.GetAPIReader(), + Client: mgr.GetClient(), + Recorder: mgr.GetEventRecorderFor(crpControllerName), + RestMapper: mgr.GetRESTMapper(), + InformerManager: dynamicInformerManager, + ResourceConfig: resourceConfig, + SkippedNamespaces: skippedNamespaces, + Scheme: mgr.GetScheme(), + UncachedReader: mgr.GetAPIReader(), + ResourceSnapshotCreationInterval: opts.ResourceSnapshotCreationInterval, } rateLimiter := options.DefaultControllerRateLimiter(opts.RateLimiterOpts) diff --git a/pkg/controllers/clusterinventory/clusterprofile/controller_test.go b/pkg/controllers/clusterinventory/clusterprofile/controller_test.go index 32894037e..0be621ac9 100644 --- a/pkg/controllers/clusterinventory/clusterprofile/controller_test.go +++ b/pkg/controllers/clusterinventory/clusterprofile/controller_test.go @@ -159,13 +159,13 @@ func TestSyncClusterProfileCondition(t *testing.T) { t.Run(tt.name, func(t *testing.T) { reconciler.syncClusterProfileCondition(tt.memberCluster, tt.clusterProfile) condition := meta.FindStatusCondition(tt.clusterProfile.Status.Conditions, clusterinventory.ClusterConditionControlPlaneHealthy) - if condition == nil { + if condition == nil { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference t.Fatalf("expected condition to be set, but it was not") } - if condition.Status != tt.expectedConditionStatus { + if condition.Status != tt.expectedConditionStatus { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference t.Errorf("test case `%s` failed, expected condition status %v, got %v", tt.name, tt.expectedConditionStatus, condition.Status) } - if condition.Reason != tt.expectedConditionReason { + if condition.Reason != tt.expectedConditionReason { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference t.Errorf("test case `%s` failed, expected condition reason %v, got %v", tt.name, tt.expectedConditionReason, condition.Reason) } }) diff --git a/pkg/controllers/clusterresourceplacement/controller.go b/pkg/controllers/clusterresourceplacement/controller.go index 241c6e0ea..b4c86aa8f 100644 --- a/pkg/controllers/clusterresourceplacement/controller.go +++ b/pkg/controllers/clusterresourceplacement/controller.go @@ -203,10 +203,10 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster return ctrl.Result{}, err } - latestResourceSnapshot, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, envelopeObjCount, + res, latestResourceSnapshot, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, envelopeObjCount, &fleetv1beta1.ResourceSnapshotSpec{SelectedResources: selectedResources}, int(revisionLimit)) - if err != nil { - return ctrl.Result{}, err + if err != nil || res.Requeue { + return res, err } // isClusterScheduled is to indicate whether we need to requeue the CRP request to track the rollout status. @@ -426,18 +426,21 @@ func (r *Reconciler) deleteRedundantResourceSnapshots(ctx context.Context, crp * return nil } -func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, envelopeObjCount int, resourceSnapshotSpec *fleetv1beta1.ResourceSnapshotSpec, revisionHistoryLimit int) (*fleetv1beta1.ClusterResourceSnapshot, error) { +// getOrCreateClusterResourceSnapshot gets or creates a clusterResourceSnapshot for the given clusterResourcePlacement. +// It returns the latest clusterResourceSnapshot if it exists and is up to date, otherwise it creates a new one. +// It also returns the ctrl.Result to indicate whether the request should be requeued or not. +func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp *fleetv1beta1.ClusterResourcePlacement, envelopeObjCount int, resourceSnapshotSpec *fleetv1beta1.ResourceSnapshotSpec, revisionHistoryLimit int) (ctrl.Result, *fleetv1beta1.ClusterResourceSnapshot, error) { resourceHash, err := resource.HashOf(resourceSnapshotSpec) crpKObj := klog.KObj(crp) if err != nil { klog.ErrorS(err, "Failed to generate resource hash of crp", "clusterResourcePlacement", crpKObj) - return nil, controller.NewUnexpectedBehaviorError(err) + return ctrl.Result{}, nil, controller.NewUnexpectedBehaviorError(err) } // latestResourceSnapshotIndex should be -1 when there is no snapshot. latestResourceSnapshot, latestResourceSnapshotIndex, err := r.lookupLatestResourceSnapshot(ctx, crp) if err != nil { - return nil, err + return ctrl.Result{}, nil, err } latestResourceSnapshotHash := "" @@ -446,12 +449,12 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp latestResourceSnapshotHash, err = parseResourceGroupHashFromAnnotation(latestResourceSnapshot) if err != nil { klog.ErrorS(err, "Failed to get the ResourceGroupHashAnnotation", "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot)) - return nil, controller.NewUnexpectedBehaviorError(err) + return ctrl.Result{}, nil, controller.NewUnexpectedBehaviorError(err) } numberOfSnapshots, err = annotations.ExtractNumberOfResourceSnapshotsFromResourceSnapshot(latestResourceSnapshot) if err != nil { klog.ErrorS(err, "Failed to get the NumberOfResourceSnapshotsAnnotation", "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot)) - return nil, controller.NewUnexpectedBehaviorError(err) + return ctrl.Result{}, nil, controller.NewUnexpectedBehaviorError(err) } } @@ -463,7 +466,7 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp resourceSnapshotStartIndex := 0 if latestResourceSnapshot != nil && latestResourceSnapshotHash == resourceHash { if err := r.ensureLatestResourceSnapshot(ctx, latestResourceSnapshot); err != nil { - return nil, err + return ctrl.Result{}, nil, err } // check to see all that the master cluster resource snapshot and sub-indexed snapshots belonging to the same group index exists. latestGroupResourceLabelMatcher := client.MatchingLabels{ @@ -474,11 +477,11 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp if err := r.Client.List(ctx, resourceSnapshotList, latestGroupResourceLabelMatcher); err != nil { klog.ErrorS(err, "Failed to list the latest group clusterResourceSnapshots associated with the clusterResourcePlacement", "clusterResourcePlacement", crp.Name) - return nil, controller.NewAPIServerError(true, err) + return ctrl.Result{}, nil, controller.NewAPIServerError(true, err) } if len(resourceSnapshotList.Items) == numberOfSnapshots { klog.V(2).InfoS("ClusterResourceSnapshots have not changed", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot)) - return latestResourceSnapshot, nil + return ctrl.Result{}, latestResourceSnapshot, nil } // we should not create a new master cluster resource snapshot. shouldCreateNewMasterClusterSnapshot = false @@ -490,14 +493,22 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp // mark the last resource snapshot as inactive if it is different from what we have now or 3) when some // sub-indexed cluster resource snapshots belonging to the same group have not been created, the master // cluster resource snapshot should exist and be latest. - if latestResourceSnapshot != nil && - latestResourceSnapshotHash != resourceHash && - latestResourceSnapshot.Labels[fleetv1beta1.IsLatestSnapshotLabel] == strconv.FormatBool(true) { + if latestResourceSnapshot != nil && latestResourceSnapshotHash != resourceHash && latestResourceSnapshot.Labels[fleetv1beta1.IsLatestSnapshotLabel] == strconv.FormatBool(true) { + // When the latest resource snapshot without the isLastest label, it means it fails to create the new + // resource snapshot in the last reconcile and we don't need to check and delay the request. + if since := time.Since(latestResourceSnapshot.CreationTimestamp.Time); since < r.ResourceSnapshotCreationInterval { + // If the latest resource snapshot is created less than configured the resourceSnapshotCreationInterval, + // requeue the request to avoid too frequent update. + klog.V(2).InfoS("The latest resource snapshot is just created, skipping the update", "clusterResourcePlacement", crpKObj, + "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot), "creationTime", latestResourceSnapshot.CreationTimestamp, "configuredResourceSnapshotCreationInterval", r.ResourceSnapshotCreationInterval, "afterDuration", r.ResourceSnapshotCreationInterval-since) + return ctrl.Result{Requeue: true, RequeueAfter: r.ResourceSnapshotCreationInterval - since}, nil, nil + } + // set the latest label to false first to make sure there is only one or none active resource snapshot latestResourceSnapshot.Labels[fleetv1beta1.IsLatestSnapshotLabel] = strconv.FormatBool(false) if err := r.Client.Update(ctx, latestResourceSnapshot); err != nil { klog.ErrorS(err, "Failed to set the isLatestSnapshot label to false", "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot)) - return nil, controller.NewUpdateIgnoreConflictError(err) + return ctrl.Result{}, nil, controller.NewUpdateIgnoreConflictError(err) } klog.V(2).InfoS("Marked the existing clusterResourceSnapshot as inactive", "clusterResourcePlacement", crpKObj, "clusterResourceSnapshot", klog.KObj(latestResourceSnapshot)) } @@ -507,7 +518,7 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp // delete redundant snapshot revisions before creating a new master cluster resource snapshot to guarantee that the number of snapshots // won't exceed the limit. if err := r.deleteRedundantResourceSnapshots(ctx, crp, revisionHistoryLimit); err != nil { - return nil, err + return ctrl.Result{}, nil, err } latestResourceSnapshotIndex++ } @@ -522,7 +533,7 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp resourceSnapshot = buildSubIndexResourceSnapshot(latestResourceSnapshotIndex, i-1, crp.Name, selectedResourcesList[i]) } if err = r.createResourceSnapshot(ctx, crp, resourceSnapshot); err != nil { - return nil, err + return ctrl.Result{}, nil, err } } // shouldCreateNewMasterClusterSnapshot is used here to be defensive in case of the regression. @@ -530,10 +541,10 @@ func (r *Reconciler) getOrCreateClusterResourceSnapshot(ctx context.Context, crp resourceSnapshot = buildMasterClusterResourceSnapshot(latestResourceSnapshotIndex, 1, envelopeObjCount, crp.Name, resourceHash, []fleetv1beta1.ResourceContent{}) latestResourceSnapshot = resourceSnapshot if err = r.createResourceSnapshot(ctx, crp, resourceSnapshot); err != nil { - return nil, err + return ctrl.Result{}, nil, err } } - return latestResourceSnapshot, nil + return ctrl.Result{}, latestResourceSnapshot, nil } // buildMasterClusterResourceSnapshot builds and returns the master cluster resource snapshot for the latest resource snapshot index and selected resources. diff --git a/pkg/controllers/clusterresourceplacement/controller_test.go b/pkg/controllers/clusterresourceplacement/controller_test.go index 40a17e9af..3727624ef 100644 --- a/pkg/controllers/clusterresourceplacement/controller_test.go +++ b/pkg/controllers/clusterresourceplacement/controller_test.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -1075,7 +1076,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { t.Fatalf("failed to create the resourceSnapshotSpecWithMultipleResources hash: %v", err) } resourceSnapshotSpecWithMultipleResourcesHash := fmt.Sprintf("%x", sha256.Sum256(jsonBytes)) - + now := metav1.Now() tests := []struct { name string envelopeObjCount int @@ -1085,6 +1086,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { resourceSnapshots []fleetv1beta1.ClusterResourceSnapshot wantResourceSnapshots []fleetv1beta1.ClusterResourceSnapshot wantLatestSnapshotIndex int // index of the wantPolicySnapshots array + wantRequeue bool }{ { name: "new resourceSnapshot and no existing snapshots owned by my-crp", @@ -1303,6 +1305,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithServiceResourceHash, fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", }, + CreationTimestamp: now, }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -1337,6 +1340,155 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, wantLatestSnapshotIndex: 0, }, + { + name: "resource has changed too fast and there is an active snapshot with multiple revisionLimit", + envelopeObjCount: 3, + resourceSnapshotSpec: &fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{}}, + revisionHistoryLimit: &multipleRevisionLimit, + resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testCRPName, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + fleetv1beta1.IsLatestSnapshotLabel: "true", + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithServiceResourceHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", + }, + CreationTimestamp: now, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{}}, + }, + }, + wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testCRPName, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + fleetv1beta1.IsLatestSnapshotLabel: "true", + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithServiceResourceHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", + }, + CreationTimestamp: now, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{}}, + }, + }, + wantRequeue: true, + }, { name: "resource has changed and there is an active snapshot with multiple revisionLimit", envelopeObjCount: 3, @@ -1364,6 +1516,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithServiceResourceHash, fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", }, + CreationTimestamp: metav1.NewTime(now.Time.Add(-1 * time.Hour)), }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -1535,6 +1688,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: now, }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -1694,6 +1848,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: now, }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -1799,6 +1954,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: metav1.NewTime(now.Time.Add(-1 * time.Hour)), }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -1898,6 +2054,157 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { }, wantLatestSnapshotIndex: 0, }, + { + name: "resource has changed too fast, selected resources cross clusterResourceSnapshot limit, revision limit is 1", + selectedResourcesSizeLimit: 100, + resourceSnapshotSpec: &fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent, secretResourceContent}}, + revisionHistoryLimit: &singleRevisionLimit, + resourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testCRPName, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.IsLatestSnapshotLabel: "true", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithMultipleResourcesHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", + fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", + }, + CreationTimestamp: now, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{secretResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{deploymentResourceContent}}, + }, + }, + wantResourceSnapshots: []fleetv1beta1.ClusterResourceSnapshot{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameFmt, testCRPName, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.IsLatestSnapshotLabel: "true", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.ResourceGroupHashAnnotation: resourceSnapshotSpecWithMultipleResourcesHash, + fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", + fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", + }, + CreationTimestamp: now, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 0), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "0", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{secretResourceContent}}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf(fleetv1beta1.ResourceSnapshotNameWithSubindexFmt, testCRPName, 0, 1), + Labels: map[string]string{ + fleetv1beta1.ResourceIndexLabel: "0", + fleetv1beta1.CRPTrackingLabel: testCRPName, + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: testCRPName, + BlockOwnerDeletion: ptr.To(true), + Controller: ptr.To(true), + APIVersion: fleetAPIVersion, + Kind: "ClusterResourcePlacement", + }, + }, + Annotations: map[string]string{ + fleetv1beta1.SubindexOfResourceSnapshotAnnotation: "1", + }, + }, + Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{deploymentResourceContent}}, + }, + }, + wantRequeue: true, + }, { name: "selected resources cross clusterResourceSnapshot limit, revision limit is 1, delete existing clusterResourceSnapshot with missing sub-indexed snapshots & create new clusterResourceSnapshots", selectedResourcesSizeLimit: 100, @@ -1926,6 +2233,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "3", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: metav1.NewTime(now.Time.Add(-1 * time.Hour)), }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -2009,6 +2317,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: metav1.NewTime(now.Time.Add(-1 * time.Hour)), }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -2117,6 +2426,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "2", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: now, }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -2222,6 +2532,7 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { fleetv1beta1.NumberOfResourceSnapshotsAnnotation: "1", fleetv1beta1.NumberOfEnvelopedObjectsAnnotation: "0", }, + CreationTimestamp: now, }, Spec: fleetv1beta1.ResourceSnapshotSpec{SelectedResources: []fleetv1beta1.ResourceContent{serviceResourceContent}}, }, @@ -2369,27 +2680,38 @@ func TestGetOrCreateClusterResourceSnapshot(t *testing.T) { WithObjects(objects...). Build() r := Reconciler{ - Client: fakeClient, - Scheme: scheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: scheme, + Recorder: record.NewFakeRecorder(10), + ResourceSnapshotCreationInterval: 1 * time.Minute, } limit := int32(defaulter.DefaultRevisionHistoryLimitValue) if tc.revisionHistoryLimit != nil { limit = *tc.revisionHistoryLimit } resourceSnapshotResourceSizeLimit = tc.selectedResourcesSizeLimit - got, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, tc.envelopeObjCount, tc.resourceSnapshotSpec, int(limit)) + res, got, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, tc.envelopeObjCount, tc.resourceSnapshotSpec, int(limit)) if err != nil { t.Fatalf("failed to handle getOrCreateClusterResourceSnapshot: %v", err) } + if res.Requeue != tc.wantRequeue { + t.Fatalf("getOrCreateClusterResourceSnapshot() got Requeue %v, want %v", res.Requeue, tc.wantRequeue) + } + options := []cmp.Option{ - cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion"), + cmpopts.IgnoreFields(metav1.ObjectMeta{}, "ResourceVersion", "CreationTimestamp"), // Fake API server will add a newline for the runtime.RawExtension type. // ignoring the resourceContent field for now cmpopts.IgnoreFields(runtime.RawExtension{}, "Raw"), } - if diff := cmp.Diff(tc.wantResourceSnapshots[tc.wantLatestSnapshotIndex], *got, options...); diff != "" { - t.Errorf("getOrCreateClusterResourceSnapshot() mismatch (-want, +got):\n%s", diff) + if tc.wantRequeue { + if res.RequeueAfter <= 0 { + t.Fatalf("getOrCreateClusterResourceSnapshot() got RequeueAfter %v, want greater than zero value", res.RequeueAfter) + } + } else { + if diff := cmp.Diff(tc.wantResourceSnapshots[tc.wantLatestSnapshotIndex], *got, options...); diff != "" { + t.Errorf("getOrCreateClusterResourceSnapshot() mismatch (-want, +got):\n%s", diff) + } } clusterResourceSnapshotList := &fleetv1beta1.ClusterResourceSnapshotList{} if err := fakeClient.List(ctx, clusterResourceSnapshotList); err != nil { @@ -2742,10 +3064,13 @@ func TestGetOrCreateClusterResourceSnapshot_failure(t *testing.T) { Client: fakeClient, Scheme: scheme, } - _, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, 0, resourceSnapshotSpecA, 1) + res, _, err := r.getOrCreateClusterResourceSnapshot(ctx, crp, 0, resourceSnapshotSpecA, 1) if err == nil { // if error is nil t.Fatal("getOrCreateClusterResourceSnapshot() = nil, want err") } + if res.Requeue { + t.Fatal("getOrCreateClusterResourceSnapshot() requeue = true, want false") + } if !errors.Is(err, controller.ErrUnexpectedBehavior) { t.Errorf("getOrCreateClusterResourceSnapshot() got %v, want %v type", err, controller.ErrUnexpectedBehavior) } diff --git a/pkg/controllers/clusterresourceplacement/placement_controllerv1alpha1.go b/pkg/controllers/clusterresourceplacement/placement_controllerv1alpha1.go index cdea2ad53..e92223ae4 100644 --- a/pkg/controllers/clusterresourceplacement/placement_controllerv1alpha1.go +++ b/pkg/controllers/clusterresourceplacement/placement_controllerv1alpha1.go @@ -69,6 +69,10 @@ type Reconciler struct { Recorder record.EventRecorder Scheme *runtime.Scheme + + // ResourceSnapshotCreationInterval is the interval to create a new resourcesnapshot + // to avoid too frequent updates. + ResourceSnapshotCreationInterval time.Duration } // ReconcileV1Alpha1 reconciles v1aplha1 APIs. diff --git a/test/e2e/placement_with_custom_config_test.go b/test/e2e/placement_with_custom_config_test.go new file mode 100644 index 000000000..3e0c0dbb9 --- /dev/null +++ b/test/e2e/placement_with_custom_config_test.go @@ -0,0 +1,143 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package e2e + +import ( + "fmt" + "math" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + placementv1beta1 "github.com/kubefleet-dev/kubefleet/apis/placement/v1beta1" +) + +var _ = Describe("validating CRP when using customized resourceSnapshotCreationInterval", Label("custom"), Ordered, func() { + // skip entire suite if interval is zero + BeforeAll(func() { + if resourceSnapshotCreationInterval == 0 { + Skip("Skipping customized-config placement test when RESOURCE_SNAPSHOT_CREATION_INTERVAL=0m") + } + }) + + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + + BeforeAll(func() { + By("creating work resources") + createWorkResources() + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + // Add a custom finalizer; this would allow us to better observe + // the behavior of the controllers. + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ClusterResourceSelector{ + { + Group: "", + Kind: "Namespace", + Version: "v1", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + workNamespaceLabelName: fmt.Sprintf("test-%d", GinkgoParallelProcess()), + }, + }, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(5), + }, + }, + }, + } + By(fmt.Sprintf("creating placement %s", crpName)) + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP %s", crpName) + }) + + AfterAll(func() { + By(fmt.Sprintf("garbage all things related to placement %s", crpName)) + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + It("should update CRP status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual([]placementv1beta1.ResourceIdentifier{}, allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) + }) + + It("should not place work resources on member clusters", checkIfRemovedWorkResourcesFromAllMemberClusters) + + It("updating the resources on the hub and the namespace becomes selected", func() { + workNamespaceName := fmt.Sprintf(workNamespaceNameTemplate, GinkgoParallelProcess()) + ns := &corev1.Namespace{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: workNamespaceName}, ns)).Should(Succeed(), "Failed to get the namespace %s", workNamespaceName) + ns.Labels = map[string]string{ + workNamespaceLabelName: fmt.Sprintf("test-%d", GinkgoParallelProcess()), + } + Expect(hubClient.Update(ctx, ns)).Should(Succeed(), "Failed to update namespace %s", workNamespaceName) + }) + + It("should not update CRP status immediately", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual([]placementv1beta1.ResourceIdentifier{}, allMemberClusterNames, nil, "0") + Consistently(crpStatusUpdatedActual, resourceSnapshotCreationInterval-3*time.Second, consistentlyInterval).Should(Succeed(), "CRP %s status should be unchanged", crpName) + }) + + It("should update CRP status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), allMemberClusterNames, nil, "1") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP %s status as expected", crpName) + }) + + It("should place the selected resources on member clusters", checkIfPlacedWorkResourcesOnAllMemberClusters) + + It("validating the clusterResourceSnapshots are created", func() { + var resourceSnapshotList placementv1beta1.ClusterResourceSnapshotList + masterResourceSnapshotLabels := client.MatchingLabels{ + placementv1beta1.CRPTrackingLabel: crpName, + } + Expect(hubClient.List(ctx, &resourceSnapshotList, masterResourceSnapshotLabels)).Should(Succeed(), "Failed to list ClusterResourceSnapshots for CRP %s", crpName) + Expect(len(resourceSnapshotList.Items)).Should(Equal(2), "Expected 2 ClusterResourceSnapshots for CRP %s, got %d", crpName, len(resourceSnapshotList.Items)) + // Use math.Abs to get the absolute value of the time difference in seconds. + snapshotDiffInSeconds := resourceSnapshotList.Items[0].CreationTimestamp.Time.Sub(resourceSnapshotList.Items[1].CreationTimestamp.Time).Seconds() + diff := math.Abs(snapshotDiffInSeconds) + Expect(time.Duration(diff)*time.Second >= resourceSnapshotCreationInterval).To(BeTrue(), "The time difference between ClusterResourceSnapshots should be more than resourceSnapshotCreationInterval") + }) + + It("can delete the CRP", func() { + // Delete the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + } + Expect(hubClient.Delete(ctx, crp)).To(Succeed(), "Failed to delete CRP %s", crpName) + }) + + It("should remove placed resources from all member clusters", checkIfRemovedWorkResourcesFromAllMemberClusters) + + It("should remove controller finalizers from CRP", func() { + finalizerRemovedActual := allFinalizersExceptForCustomDeletionBlockerRemovedFromCRPActual(crpName) + Eventually(finalizerRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove controller finalizers from CRP %s", crpName) + }) +}) diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index 9c4f17b06..14e9643a9 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -26,6 +26,8 @@ export MEMBER_AGENT_IMAGE="${MEMBER_AGENT_IMAGE:-member-agent}" export REFRESH_TOKEN_IMAGE="${REFRESH_TOKEN_IMAGE:-refresh-token}" export PROPERTY_PROVIDER="${PROPERTY_PROVIDER:-azure}" export USE_PREDEFINED_REGIONS="${USE_PREDEFINED_REGIONS:-false}" +export RESOURCE_SNAPSHOT_CREATION_INTERVAL="${RESOURCE_SNAPSHOT_CREATION_INTERVAL:-0m}" + # The pre-defined regions; if the AKS property provider is used. # # Note that for a specific cluster, if a predefined region is not set, the node region must @@ -123,7 +125,8 @@ helm install hub-agent ../../charts/hub-agent/ \ --set webhookClientConnectionType=service \ --set forceDeleteWaitTime="1m0s" \ --set clusterUnhealthyThreshold="3m0s" \ - --set logFileMaxSize=1000000 + --set logFileMaxSize=1000000 \ + --set resourceSnapshotCreationInterval=$RESOURCE_SNAPSHOT_CREATION_INTERVAL # Download CRDs from Fleet networking repo export ENDPOINT_SLICE_EXPORT_CRD_URL=https://raw.githubusercontent.com/Azure/fleet-networking/v0.2.7/config/crd/bases/networking.fleet.azure.com_endpointsliceexports.yaml diff --git a/test/e2e/setup_test.go b/test/e2e/setup_test.go index 835fb6783..474f9bf92 100644 --- a/test/e2e/setup_test.go +++ b/test/e2e/setup_test.go @@ -112,6 +112,8 @@ var ( allMemberClusters []*framework.Cluster allMemberClusterNames = []string{} + + resourceSnapshotCreationInterval time.Duration ) var ( @@ -301,6 +303,16 @@ func beforeSuiteForAllProcesses() { // Check if the required environment variable, which specifies the path to kubeconfig file, has been set. Expect(os.Getenv(kubeConfigPathEnvVarName)).NotTo(BeEmpty(), "Required environment variable KUBECONFIG is not set") + resourceSnapshotCreationIntervalEnv := os.Getenv("RESOURCE_SNAPSHOT_CREATION_INTERVAL") + if resourceSnapshotCreationIntervalEnv == "" { + // If the environment variable is not set, use a default value. + resourceSnapshotCreationInterval = 0 + } else { + var err error + resourceSnapshotCreationInterval, err = time.ParseDuration(resourceSnapshotCreationIntervalEnv) + Expect(err).Should(Succeed(), "failed to parse RESOURCE_SNAPSHOT_CREATION_INTERVAL") + } + // Initialize the cluster objects and their clients. hubCluster = framework.NewCluster(hubClusterName, "", scheme, nil) Expect(hubCluster).NotTo(BeNil(), "Failed to initialize cluster object")