kubefleet-dev · zhiying-lin · Jun 25, 2025 · Jun 5, 2025 · Jun 6, 2025 · Jun 11, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -86,6 +86,16 @@ jobs:
           HUB_SERVER_URL: 'https://172.19.0.2:6443'
 
   e2e-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        customized-settings: [default, custom]
+        include:
+          - customized-settings: default
+            # to shorten the test duration, set the resource snapshot creation interval to 0
+            resource-snapshot-creation-interval: 0m
+          - customized-settings: custom
+            resource-snapshot-creation-interval: 1m
     runs-on: ubuntu-latest
     needs: [
       detect-noop,
@@ -119,7 +129,11 @@ jobs:
 
       - name: Run e2e tests
         run: |
-          make e2e-tests
+          if [ "${{ matrix.customized-settings }}" = "default" ]; then
+            make e2e-tests
+          else
+            make e2e-tests-custom
+          fi
         env:
           KUBECONFIG: '/home/runner/.kube/config'
           HUB_SERVER_URL: 'https://172.19.0.2:6443'
@@ -129,4 +143,5 @@ jobs:
           # TO-DO (chenyu1): to ensure a vendor-neutral experience, switch to a dummy
           # property provider once the AKS one is split out.
           PROPERTY_PROVIDER: 'azure'
+          RESOURCE_SNAPSHOT_CREATION_INTERVAL: ${{ matrix.resource-snapshot-creation-interval }}
 
diff --git a/Makefile b/Makefile
@@ -213,7 +213,10 @@ e2e-tests-v1alpha1: create-kind-cluster run-e2e-v1alpha1
 
 .PHONY: e2e-tests
 e2e-tests: setup-clusters
-	cd ./test/e2e && ginkgo -v -p .
+	cd ./test/e2e && ginkgo --label-filter="!custom" -v -p . 
+
+e2e-tests-custom: setup-clusters
+	cd ./test/e2e && ginkgo --label-filter="custom" -v -p . 
 
 .PHONY: setup-clusters
 setup-clusters:

diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md
@@ -19,24 +19,25 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen
 
 ## Parameters
 
-| Parameter                     | Description                                                                                                                                                  | Default                                          |
-|:------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
-| replicaCount                  | The number of hub-agent replicas to deploy                                                                                                                   | `1`                                              |
-| image.repository              | Image repository                                                                                                                                             | `ghcr.io/azure/azure/fleet/hub-agent`            |
-| image.pullPolicy              | Image pullPolicy                                                                                                                                             | `Always`                                         |
-| image.tag                     | The image release tag to use                                                                                                                                 | `v0.1.0`                                         |
-| namespace                     | Namespace that this Helm chart is installed on                                                                                                               | `fleet-system`                                   |
-| serviceAccount.create         | Whether to create service account                                                                                                                            | `true`                                           |
-| serviceAccount.name           | Service account name                                                                                                                                         | `hub-agent-sa`                                   |
-| resources                     | The resource request/limits for the container image                                                                                                          | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
-| affinity                      | The node affinity to use for hubagent pod                                                                                                                    | `{}`                                             |
-| tolerations                   | The tolerations to use for hubagent pod                                                                                                                      | `[]`                                             |
-| logVerbosity                  | Log level. Uses V logs (klog)                                                                                                                                | `5`                                              |
-| enableV1Alpha1APIs            | If set, the agents will watch for the v1alpha1 APIs.                                                                                                         | `false`                                          |
-| enableV1Beta1APIs             | If set, the agents will watch for the v1beta1 APIs.                                                                                                          | `true`                                           |
-| hubAPIQPS                     | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.   | `250`                                            |
-| hubAPIBurst                   | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000`                                           |
-| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports.                                                                          | `100`                                            |
-| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently.                                                                               | `20`                                             |
-| logFileMaxSize                | Max size of log file before rotation                                                                                                                         | `1000000`                                        |
-| MaxFleetSizeSupported         | The max number of member clusters this fleet supports.                                                                                                       | `100`                                            |
+| Parameter                         | Description                                                                                                                                                  | Default                                          |
+|:-----------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
+| replicaCount                      | The number of hub-agent replicas to deploy                                                                                                                   | `1`                                              |
+| image.repository                  | Image repository                                                                                                                                             | `ghcr.io/azure/azure/fleet/hub-agent`            |
+| image.pullPolicy                  | Image pullPolicy                                                                                                                                             | `Always`                                         |
+| image.tag                         | The image release tag to use                                                                                                                                 | `v0.1.0`                                         |
+| namespace                         | Namespace that this Helm chart is installed on                                                                                                               | `fleet-system`                                   |
+| serviceAccount.create             | Whether to create service account                                                                                                                            | `true`                                           |
+| serviceAccount.name               | Service account name                                                                                                                                         | `hub-agent-sa`                                   |
+| resources                         | The resource request/limits for the container image                                                                                                          | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
+| affinity                          | The node affinity to use for hubagent pod                                                                                                                    | `{}`                                             |
+| tolerations                       | The tolerations to use for hubagent pod                                                                                                                      | `[]`                                             |
+| logVerbosity                      | Log level. Uses V logs (klog)                                                                                                                                | `5`                                              |
+| enableV1Alpha1APIs                | If set, the agents will watch for the v1alpha1 APIs.                                                                                                         | `false`                                          |
+| enableV1Beta1APIs                 | If set, the agents will watch for the v1beta1 APIs.                                                                                                          | `true`                                           |
+| hubAPIQPS                         | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.   | `250`                                            |
+| hubAPIBurst                       | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000`                                           |
+| MaxConcurrentClusterPlacement     | The max number of clusterResourcePlacement to run concurrently this fleet supports.                                                                          | `100`                                            |
+| ConcurrentResourceChangeSyncs     | The number of resourceChange reconcilers that are allowed to run concurrently.                                                                               | `20`                                             |
+| logFileMaxSize                    | Max size of log file before rotation                                                                                                                         | `1000000`                                        |
+| MaxFleetSizeSupported             | The max number of member clusters this fleet supports.                                                                                                       | `100`                                            |
+| resourceSnapshotCreationInterval  | The interval at which resource snapshots are created.                                                                                           | `1m`                                            |
diff --git a/charts/hub-agent/templates/deployment.yaml b/charts/hub-agent/templates/deployment.yaml
@@ -43,6 +43,7 @@ spec:
             - --hub-api-burst={{ .Values.hubAPIBurst }}
             - --force-delete-wait-time={{ .Values.forceDeleteWaitTime }}
             - --cluster-unhealthy-threshold={{ .Values.clusterUnhealthyThreshold }}
+            - --resource-snapshot-creation-interval={{ .Values.resourceSnapshotCreationInterval }}
           ports:
             - name: metrics
               containerPort: 8080

diff --git a/charts/hub-agent/values.yaml b/charts/hub-agent/values.yaml
@@ -18,6 +18,8 @@ enableGuardRail: true
 webhookClientConnectionType: service
 forceDeleteWaitTime: 15m0s
 clusterUnhealthyThreshold: 3m0s
+resourceSnapshotCreationInterval: 1m0s
+
 namespace:
   fleet-system
 

diff --git a/cmd/hubagent/options/options.go b/cmd/hubagent/options/options.go
@@ -104,6 +104,8 @@ type Options struct {
 	PprofPort int
 	// DenyModifyMemberClusterLabels indicates if the member cluster labels cannot be modified by groups (excluding system:masters)
 	DenyModifyMemberClusterLabels bool
+	// ResourceSnapshotCreationInterval is the interval at which resource snapshots are created.
+	ResourceSnapshotCreationInterval time.Duration
 }
 
 // NewOptions builds an empty options.
@@ -115,14 +117,15 @@ func NewOptions() *Options {
 			ResourceNamespace: utils.FleetSystemNamespace,
 			ResourceName:      "136224848560.hub.fleet.azure.com",
 		},
-		MaxConcurrentClusterPlacement: 10,
-		ConcurrentResourceChangeSyncs: 1,
-		MaxFleetSizeSupported:         100,
-		EnableV1Alpha1APIs:            false,
-		EnableClusterInventoryAPIs:    true,
-		EnableStagedUpdateRunAPIs:     true,
-		EnablePprof:                   false,
-		PprofPort:                     6065,
+		MaxConcurrentClusterPlacement:    10,
+		ConcurrentResourceChangeSyncs:    1,
+		MaxFleetSizeSupported:            100,
+		EnableV1Alpha1APIs:               false,
+		EnableClusterInventoryAPIs:       true,
+		EnableStagedUpdateRunAPIs:        true,
+		EnablePprof:                      false,
+		PprofPort:                        6065,
+		ResourceSnapshotCreationInterval: 1 * time.Minute,
 	}
 }
 
@@ -169,6 +172,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
 	flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.")
 	flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
 	flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
+	flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 1*time.Minute, "The interval at which resource snapshots are created.")
 
 	o.RateLimiterOpts.AddFlags(flags)
 }
diff --git a/cmd/hubagent/workload/setup.go b/cmd/hubagent/workload/setup.go
@@ -153,14 +153,15 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,
 
 	// Set up  a custom controller to reconcile cluster resource placement
 	crpc := &clusterresourceplacement.Reconciler{
-		Client:            mgr.GetClient(),
-		Recorder:          mgr.GetEventRecorderFor(crpControllerName),
-		RestMapper:        mgr.GetRESTMapper(),
-		InformerManager:   dynamicInformerManager,
-		ResourceConfig:    resourceConfig,
-		SkippedNamespaces: skippedNamespaces,
-		Scheme:            mgr.GetScheme(),
-		UncachedReader:    mgr.GetAPIReader(),
+		Client:                           mgr.GetClient(),
+		Recorder:                         mgr.GetEventRecorderFor(crpControllerName),
+		RestMapper:                       mgr.GetRESTMapper(),
+		InformerManager:                  dynamicInformerManager,
+		ResourceConfig:                   resourceConfig,
+		SkippedNamespaces:                skippedNamespaces,
+		Scheme:                           mgr.GetScheme(),
+		UncachedReader:                   mgr.GetAPIReader(),
+		ResourceSnapshotCreationInterval: opts.ResourceSnapshotCreationInterval,
 	}
 
 	rateLimiter := options.DefaultControllerRateLimiter(opts.RateLimiterOpts)

diff --git a/pkg/controllers/clusterinventory/clusterprofile/controller_test.go b/pkg/controllers/clusterinventory/clusterprofile/controller_test.go
@@ -159,13 +159,13 @@ func TestSyncClusterProfileCondition(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			reconciler.syncClusterProfileCondition(tt.memberCluster, tt.clusterProfile)
 			condition := meta.FindStatusCondition(tt.clusterProfile.Status.Conditions, clusterinventory.ClusterConditionControlPlaneHealthy)
-			if condition == nil {
+			if condition == nil { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
 				t.Fatalf("expected condition to be set, but it was not")
 			}
-			if condition.Status != tt.expectedConditionStatus {
+			if condition.Status != tt.expectedConditionStatus { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
 				t.Errorf("test case `%s` failed, expected condition status %v, got %v", tt.name, tt.expectedConditionStatus, condition.Status)
 			}
-			if condition.Reason != tt.expectedConditionReason {
+			if condition.Reason != tt.expectedConditionReason { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
 				t.Errorf("test case `%s` failed, expected condition reason %v, got %v", tt.name, tt.expectedConditionReason, condition.Reason)
 			}
 		})