Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ jobs:
HUB_SERVER_URL: 'https://172.19.0.2:6443'

e2e-tests:
strategy:
fail-fast: false
matrix:
customized-settings: [default, custom]
include:
- customized-settings: default
# to shorten the test duration, set the resource snapshot creation interval to 0
resource-snapshot-creation-interval: 0m
- customized-settings: custom
resource-snapshot-creation-interval: 1m
runs-on: ubuntu-latest
needs: [
detect-noop,
Expand Down Expand Up @@ -119,7 +129,11 @@ jobs:

- name: Run e2e tests
run: |
make e2e-tests
if [ "${{ matrix.customized-settings }}" = "default" ]; then
make e2e-tests
else
make e2e-tests-custom
fi
env:
KUBECONFIG: '/home/runner/.kube/config'
HUB_SERVER_URL: 'https://172.19.0.2:6443'
Expand All @@ -129,4 +143,5 @@ jobs:
# TO-DO (chenyu1): to ensure a vendor-neutral experience, switch to a dummy
# property provider once the AKS one is split out.
PROPERTY_PROVIDER: 'azure'
RESOURCE_SNAPSHOT_CREATION_INTERVAL: ${{ matrix.resource-snapshot-creation-interval }}

5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ e2e-tests-v1alpha1: create-kind-cluster run-e2e-v1alpha1

.PHONY: e2e-tests
e2e-tests: setup-clusters
cd ./test/e2e && ginkgo -v -p .
cd ./test/e2e && ginkgo --label-filter="!custom" -v -p .

e2e-tests-custom: setup-clusters
cd ./test/e2e && ginkgo --label-filter="custom" -v -p .

.PHONY: setup-clusters
setup-clusters:
Expand Down
43 changes: 22 additions & 21 deletions charts/hub-agent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,25 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen

## Parameters

| Parameter | Description | Default |
|:------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
| replicaCount | The number of hub-agent replicas to deploy | `1` |
| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` |
| image.pullPolicy | Image pullPolicy | `Always` |
| image.tag | The image release tag to use | `v0.1.0` |
| namespace | Namespace that this Helm chart is installed on | `fleet-system` |
| serviceAccount.create | Whether to create service account | `true` |
| serviceAccount.name | Service account name | `hub-agent-sa` |
| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
| affinity | The node affinity to use for hubagent pod | `{}` |
| tolerations | The tolerations to use for hubagent pod | `[]` |
| logVerbosity | Log level. Uses V logs (klog) | `5` |
| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` |
| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` |
| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` |
| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` |
| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` |
| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` |
| logFileMaxSize | Max size of log file before rotation | `1000000` |
| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` |
| Parameter | Description | Default |
|:-----------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------|
| replicaCount | The number of hub-agent replicas to deploy | `1` |
| image.repository | Image repository | `ghcr.io/azure/azure/fleet/hub-agent` |
| image.pullPolicy | Image pullPolicy | `Always` |
| image.tag | The image release tag to use | `v0.1.0` |
| namespace | Namespace that this Helm chart is installed on | `fleet-system` |
| serviceAccount.create | Whether to create service account | `true` |
| serviceAccount.name | Service account name | `hub-agent-sa` |
| resources | The resource request/limits for the container image | limits: 500m CPU, 1Gi, requests: 100m CPU, 128Mi |
| affinity | The node affinity to use for hubagent pod | `{}` |
| tolerations | The tolerations to use for hubagent pod | `[]` |
| logVerbosity | Log level. Uses V logs (klog) | `5` |
| enableV1Alpha1APIs | If set, the agents will watch for the v1alpha1 APIs. | `false` |
| enableV1Beta1APIs | If set, the agents will watch for the v1beta1 APIs. | `true` |
| hubAPIQPS | QPS to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `250` |
| hubAPIBurst | Burst to use while talking with fleet-apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags. | `1000` |
| MaxConcurrentClusterPlacement | The max number of clusterResourcePlacement to run concurrently this fleet supports. | `100` |
| ConcurrentResourceChangeSyncs | The number of resourceChange reconcilers that are allowed to run concurrently. | `20` |
| logFileMaxSize | Max size of log file before rotation | `1000000` |
| MaxFleetSizeSupported | The max number of member clusters this fleet supports. | `100` |
| resourceSnapshotCreationInterval | The interval at which resource snapshots are created. | `1m` |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: the dividers don't seem to align

1 change: 1 addition & 0 deletions charts/hub-agent/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ spec:
- --hub-api-burst={{ .Values.hubAPIBurst }}
- --force-delete-wait-time={{ .Values.forceDeleteWaitTime }}
- --cluster-unhealthy-threshold={{ .Values.clusterUnhealthyThreshold }}
- --resource-snapshot-creation-interval={{ .Values.resourceSnapshotCreationInterval }}
ports:
- name: metrics
containerPort: 8080
Expand Down
2 changes: 2 additions & 0 deletions charts/hub-agent/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ enableGuardRail: true
webhookClientConnectionType: service
forceDeleteWaitTime: 15m0s
clusterUnhealthyThreshold: 3m0s
resourceSnapshotCreationInterval: 1m0s

namespace:
fleet-system

Expand Down
20 changes: 12 additions & 8 deletions cmd/hubagent/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type Options struct {
PprofPort int
// DenyModifyMemberClusterLabels indicates if the member cluster labels cannot be modified by groups (excluding system:masters)
DenyModifyMemberClusterLabels bool
// ResourceSnapshotCreationInterval is the interval at which resource snapshots are created.
ResourceSnapshotCreationInterval time.Duration
}

// NewOptions builds an empty options.
Expand All @@ -115,14 +117,15 @@ func NewOptions() *Options {
ResourceNamespace: utils.FleetSystemNamespace,
ResourceName: "136224848560.hub.fleet.azure.com",
},
MaxConcurrentClusterPlacement: 10,
ConcurrentResourceChangeSyncs: 1,
MaxFleetSizeSupported: 100,
EnableV1Alpha1APIs: false,
EnableClusterInventoryAPIs: true,
EnableStagedUpdateRunAPIs: true,
EnablePprof: false,
PprofPort: 6065,
MaxConcurrentClusterPlacement: 10,
ConcurrentResourceChangeSyncs: 1,
MaxFleetSizeSupported: 100,
EnableV1Alpha1APIs: false,
EnableClusterInventoryAPIs: true,
EnableStagedUpdateRunAPIs: true,
EnablePprof: false,
PprofPort: 6065,
ResourceSnapshotCreationInterval: 1 * time.Minute,
}
}

Expand Down Expand Up @@ -169,6 +172,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
flags.BoolVar(&o.EnablePprof, "enable-pprof", false, "If set, the pprof profiling is enabled.")
flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
flags.DurationVar(&o.ResourceSnapshotCreationInterval, "resource-snapshot-creation-interval", 1*time.Minute, "The interval at which resource snapshots are created.")

o.RateLimiterOpts.AddFlags(flags)
}
17 changes: 9 additions & 8 deletions cmd/hubagent/workload/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,15 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager,

// Set up a custom controller to reconcile cluster resource placement
crpc := &clusterresourceplacement.Reconciler{
Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor(crpControllerName),
RestMapper: mgr.GetRESTMapper(),
InformerManager: dynamicInformerManager,
ResourceConfig: resourceConfig,
SkippedNamespaces: skippedNamespaces,
Scheme: mgr.GetScheme(),
UncachedReader: mgr.GetAPIReader(),
Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor(crpControllerName),
RestMapper: mgr.GetRESTMapper(),
InformerManager: dynamicInformerManager,
ResourceConfig: resourceConfig,
SkippedNamespaces: skippedNamespaces,
Scheme: mgr.GetScheme(),
UncachedReader: mgr.GetAPIReader(),
ResourceSnapshotCreationInterval: opts.ResourceSnapshotCreationInterval,
}

rateLimiter := options.DefaultControllerRateLimiter(opts.RateLimiterOpts)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,13 @@ func TestSyncClusterProfileCondition(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
reconciler.syncClusterProfileCondition(tt.memberCluster, tt.clusterProfile)
condition := meta.FindStatusCondition(tt.clusterProfile.Status.Conditions, clusterinventory.ClusterConditionControlPlaneHealthy)
if condition == nil {
if condition == nil { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
t.Fatalf("expected condition to be set, but it was not")
}
if condition.Status != tt.expectedConditionStatus {
if condition.Status != tt.expectedConditionStatus { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
t.Errorf("test case `%s` failed, expected condition status %v, got %v", tt.name, tt.expectedConditionStatus, condition.Status)
}
if condition.Reason != tt.expectedConditionReason {
if condition.Reason != tt.expectedConditionReason { //nolint: staticcheck // false positive SA5011: possible nil pointer dereference
t.Errorf("test case `%s` failed, expected condition reason %v, got %v", tt.name, tt.expectedConditionReason, condition.Reason)
}
})
Expand Down
Loading
Loading