Skip to content

Commit 91a764b

Browse files
Wei WengWei Weng
authored andcommitted
enable HA hub agent
Signed-off-by: Wei Weng <Wei.Weng@microsoft.com>
1 parent 1b9dd16 commit 91a764b

File tree

10 files changed

+292
-15
lines changed

10 files changed

+292
-15
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ on:
1414

1515
env:
1616
GO_VERSION: '1.24.9'
17+
CERT_MANAGER_VERSION: 'v1.16.2'
1718

1819
jobs:
1920
detect-noop:
@@ -125,6 +126,7 @@ jobs:
125126
PROPERTY_PROVIDER: 'azure'
126127
RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL: ${{ matrix.resource-snapshot-creation-minimum-interval }}
127128
RESOURCE_CHANGES_COLLECTION_DURATION: ${{ matrix.resource-changes-collection-duration }}
129+
CERT_MANAGER_VERSION: ${{ env.CERT_MANAGER_VERSION }}
128130

129131
- name: Collect logs
130132
if: always()

charts/hub-agent/README.md

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,33 @@
22

33
## Install Chart
44

5+
### Default Installation (Self-Signed Certificates)
6+
57
```console
68
# Helm install with fleet-system namespace already created
79
helm install hub-agent ./charts/hub-agent/
810
```
911

12+
### Installation with cert-manager
13+
14+
When using cert-manager for certificate management, install cert-manager as a prerequisite first:
15+
16+
```console
17+
# Install cert-manager
18+
helm repo add jetstack https://charts.jetstack.io
19+
helm repo update
20+
helm install cert-manager jetstack/cert-manager \
21+
--namespace cert-manager \
22+
--create-namespace \
23+
--version v1.16.2 \
24+
--set crds.enabled=true
25+
26+
# Then install hub-agent with cert-manager enabled
27+
helm install hub-agent ./charts/hub-agent --set useCertManager=true
28+
```
29+
30+
This configures cert-manager to manage webhook certificates.
31+
1032
## Upgrade Chart
1133

1234
```console
@@ -32,6 +54,11 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen
3254
| `affinity` | Node affinity for hub-agent pods | `{}` |
3355
| `tolerations` | Tolerations for hub-agent pods | `[]` |
3456
| `logVerbosity` | Log level (klog V logs) | `5` |
57+
| `enableWebhook` | Enable webhook server | `true` |
58+
| `webhookServiceName` | Webhook service name | `fleetwebhook` |
59+
| `enableGuardRail` | Enable guard rail webhook configurations | `true` |
60+
| `webhookClientConnectionType` | Connection type for webhook client (service or url) | `service` |
61+
| `useCertManager` | Use cert-manager for webhook certificate management | `false` |
3562
| `enableV1Beta1APIs` | Watch for v1beta1 APIs | `true` |
3663
| `hubAPIQPS` | QPS for fleet-apiserver (not including events/node heartbeat) | `250` |
3764
| `hubAPIBurst` | Burst for fleet-apiserver (not including events/node heartbeat) | `1000` |
@@ -41,4 +68,38 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen
4168
| `MaxFleetSizeSupported` | Max number of member clusters supported | `100` |
4269
| `resourceSnapshotCreationMinimumInterval` | The minimum interval at which resource snapshots could be created. | `30s` |
4370
| `resourceChangesCollectionDuration` | The duration for collecting resource changes into one snapshot. | `15s` |
44-
| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` |
71+
| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` |
72+
73+
## Certificate Management
74+
75+
The hub-agent supports two modes for webhook certificate management:
76+
77+
### Automatic Certificate Generation (Default)
78+
79+
By default, the hub-agent generates certificates automatically at startup. This mode:
80+
- Requires no external dependencies
81+
- Works out of the box
82+
- Certificates are valid for 10 years
83+
84+
### cert-manager (Optional)
85+
86+
When `useCertManager=true`, certificates are managed by cert-manager. This mode:
87+
- Requires cert-manager to be installed as a prerequisite
88+
- Handles certificate rotation automatically (90-day certificates)
89+
- Follows industry-standard certificate management practices
90+
- Suitable for production environments
91+
92+
To switch to cert-manager mode:
93+
```console
94+
# Install cert-manager first
95+
helm repo add jetstack https://charts.jetstack.io
96+
helm repo update
97+
helm install cert-manager jetstack/cert-manager \
98+
--namespace cert-manager \
99+
--create-namespace \
100+
--version v1.16.2 \
101+
--set crds.enabled=true
102+
103+
# Then install hub-agent with cert-manager enabled
104+
helm install hub-agent ./charts/hub-agent --set useCertManager=true
105+
```
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{{- if and .Values.enableWebhook .Values.useCertManager }}
2+
---
3+
apiVersion: cert-manager.io/v1
4+
kind: Certificate
5+
metadata:
6+
name: fleet-webhook-server-cert
7+
namespace: {{ .Values.namespace }}
8+
labels:
9+
{{- include "hub-agent.labels" . | nindent 4 }}
10+
spec:
11+
# Secret name where cert-manager will store the certificate
12+
secretName: fleet-webhook-server-cert
13+
14+
# Certificate duration (90 days is cert-manager's default and recommended)
15+
duration: 2160h # 90 days
16+
17+
# Renew certificate 30 days before expiry
18+
renewBefore: 720h # 30 days
19+
20+
# Subject configuration
21+
subject:
22+
organizations:
23+
- KubeFleet
24+
25+
# Common name
26+
commonName: fleet-webhook.{{ .Values.namespace }}.svc
27+
28+
# DNS names for the certificate
29+
dnsNames:
30+
- {{ .Values.webhookServiceName }}
31+
- {{ .Values.webhookServiceName }}.{{ .Values.namespace }}
32+
- {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc
33+
- {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc.cluster.local
34+
35+
# Issuer reference - using self-signed issuer
36+
issuerRef:
37+
name: fleet-selfsigned-issuer
38+
kind: Issuer
39+
group: cert-manager.io
40+
41+
# Private key configuration
42+
privateKey:
43+
algorithm: ECDSA
44+
size: 256
45+
46+
# Key usages
47+
usages:
48+
- digital signature
49+
- key encipherment
50+
- server auth
51+
---
52+
# Self-signed issuer for generating the certificate
53+
apiVersion: cert-manager.io/v1
54+
kind: Issuer
55+
metadata:
56+
name: fleet-selfsigned-issuer
57+
namespace: {{ .Values.namespace }}
58+
labels:
59+
{{- include "hub-agent.labels" . | nindent 4 }}
60+
spec:
61+
selfSigned: {}
62+
{{- end }}

charts/hub-agent/templates/deployment.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ metadata:
66
labels:
77
{{- include "hub-agent.labels" . | nindent 4 }}
88
spec:
9+
replicas: {{ .Values.replicaCount }}
910
selector:
1011
matchLabels:
1112
{{- include "hub-agent.selectorLabels" . | nindent 6 }}
@@ -25,6 +26,7 @@ spec:
2526
- --webhook-service-name={{ .Values.webhookServiceName }}
2627
- --enable-guard-rail={{ .Values.enableGuardRail }}
2728
- --enable-workload={{ .Values.enableWorkload }}
29+
- --use-cert-manager={{ .Values.useCertManager }}
2830
- --whitelisted-users=system:serviceaccount:fleet-system:hub-agent-sa
2931
- --webhook-client-connection-type={{.Values.webhookClientConnectionType}}
3032
- --v={{ .Values.logVerbosity }}
@@ -73,6 +75,19 @@ spec:
7375
fieldPath: metadata.namespace
7476
resources:
7577
{{- toYaml .Values.resources | nindent 12 }}
78+
{{- if .Values.useCertManager }}
79+
volumeMounts:
80+
- name: webhook-cert
81+
mountPath: /tmp/k8s-webhook-server/serving-certs
82+
readOnly: true
83+
{{- end }}
84+
{{- if .Values.useCertManager }}
85+
volumes:
86+
- name: webhook-cert
87+
secret:
88+
secretName: fleet-webhook-server-cert
89+
defaultMode: 0644
90+
{{- end }}
7691
{{- with .Values.affinity }}
7792
affinity:
7893
{{- toYaml . | nindent 8 }}

charts/hub-agent/values.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@ webhookServiceName: fleetwebhook
1717
enableGuardRail: true
1818
webhookClientConnectionType: service
1919
enableWorkload: false
20+
# useCertManager enables cert-manager for webhook certificate management
21+
# When enabled, cert-manager will be installed as a dependency
22+
# and a Certificate resource will be created
23+
useCertManager: false
24+
2025
forceDeleteWaitTime: 15m0s
2126
clusterUnhealthyThreshold: 3m0s
2227
resourceSnapshotCreationMinimumInterval: 30s
2328
resourceChangesCollectionDuration: 15s
2429

25-
namespace:
26-
fleet-system
30+
namespace: fleet-system
2731

2832
resources:
2933
limits:

cmd/hubagent/main.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ func main() {
157157

158158
if opts.EnableWebhook {
159159
whiteListedUsers := strings.Split(opts.WhiteListedUsers, ",")
160-
if err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers, opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload); err != nil {
160+
if err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers, opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.UseCertManager); err != nil {
161161
klog.ErrorS(err, "unable to set up webhook")
162162
exitWithErrorFunc()
163163
}
@@ -198,9 +198,9 @@ func main() {
198198
}
199199

200200
// SetupWebhook generates the webhook cert and then set up the webhook configurator.
201-
func SetupWebhook(mgr manager.Manager, webhookClientConnectionType options.WebhookClientConnectionType, webhookServiceName string, whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool) error {
201+
func SetupWebhook(mgr manager.Manager, webhookClientConnectionType options.WebhookClientConnectionType, webhookServiceName string, whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, useCertManager bool) error {
202202
// Generate self-signed key and crt files in FleetWebhookCertDir for the webhook server to start.
203-
w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, FleetWebhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload)
203+
w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, FleetWebhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload, useCertManager)
204204
if err != nil {
205205
klog.ErrorS(err, "fail to generate WebhookConfig")
206206
return err

cmd/hubagent/options/options.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ type Options struct {
110110
// EnableWorkload enables workload resources (pods and replicasets) to be created in the hub cluster.
111111
// When set to true, the pod and replicaset validating webhooks are disabled.
112112
EnableWorkload bool
113+
// UseCertManager indicates whether to use cert-manager for webhook certificate management.
114+
// When enabled, webhook certificates are managed by cert-manager instead of self-signed generation.
115+
UseCertManager bool
113116
// ResourceSnapshotCreationMinimumInterval is the minimum interval at which resource snapshots could be created.
114117
// Whether the resource snapshot is created or not depends on the both ResourceSnapshotCreationMinimumInterval and ResourceChangesCollectionDuration.
115118
ResourceSnapshotCreationMinimumInterval time.Duration
@@ -185,6 +188,7 @@ func (o *Options) AddFlags(flags *flag.FlagSet) {
185188
flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.")
186189
flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.")
187190
flags.BoolVar(&o.EnableWorkload, "enable-workload", false, "If set, workloads (pods and replicasets) can be created in the hub cluster. This disables the pod and replicaset validating webhooks.")
191+
flags.BoolVar(&o.UseCertManager, "use-cert-manager", false, "If set, cert-manager will be used for webhook certificate management instead of self-signed certificates.")
188192
flags.DurationVar(&o.ResourceSnapshotCreationMinimumInterval, "resource-snapshot-creation-minimum-interval", 30*time.Second, "The minimum interval at which resource snapshots could be created.")
189193
flags.DurationVar(&o.ResourceChangesCollectionDuration, "resource-changes-collection-duration", 15*time.Second,
190194
"The duration for collecting resource changes into one snapshot. The default is 15 seconds, which means that the controller will collect resource changes for 15 seconds before creating a resource snapshot.")

pkg/webhook/webhook.go

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ import (
7070
const (
7171
fleetWebhookCertFileName = "tls.crt"
7272
fleetWebhookKeyFileName = "tls.key"
73+
fleetWebhookCertSecretName = "fleet-webhook-server-cert" //nolint:gosec // This is a Secret name, not a credential
7374
fleetValidatingWebhookCfgName = "fleet-validating-webhook-configuration"
7475
fleetGuardRailWebhookCfgName = "fleet-guard-rail-webhook-configuration"
7576
fleetMutatingWebhookCfgName = "fleet-mutating-webhook-configuration"
@@ -154,15 +155,20 @@ type Config struct {
154155
// caPEM is a PEM encoded CA bundle which will be used to validate the webhook's server certificate.
155156
caPEM []byte
156157

158+
// certDir is the directory where certificate files will be written
159+
certDir string
160+
157161
clientConnectionType *options.WebhookClientConnectionType
158162

159163
enableGuardRail bool
160164

161165
denyModifyMemberClusterLabels bool
162166
enableWorkload bool
167+
// useCertManager indicates whether cert-manager is used for certificate management
168+
useCertManager bool
163169
}
164170

165-
func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool) (*Config, error) {
171+
func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool, useCertManager bool) (*Config, error) {
166172
// We assume the Pod namespace should be passed to env through downward API in the Pod spec.
167173
namespace := os.Getenv("POD_NAMESPACE")
168174
if namespace == "" {
@@ -174,17 +180,34 @@ func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32
174180
serviceNamespace: namespace,
175181
serviceName: webhookServiceName,
176182
serviceURL: fmt.Sprintf("https://%s.%s.svc.cluster.local:%d", webhookServiceName, namespace, port),
183+
certDir: certDir,
177184
clientConnectionType: clientConnectionType,
178185
enableGuardRail: enableGuardRail,
179186
denyModifyMemberClusterLabels: denyModifyMemberClusterLabels,
180187
enableWorkload: enableWorkload,
188+
useCertManager: useCertManager,
181189
}
182-
caPEM, err := w.genCertificate(certDir)
183-
if err != nil {
184-
return nil, err
190+
191+
var caPEM []byte
192+
var err error
193+
194+
if useCertManager {
195+
// When using cert-manager, certificates are mounted as files by Kubernetes
196+
// cert-manager creates tls.crt and tls.key, but we need ca.crt for the webhook config
197+
caPEM, err = w.loadCertManagerCA(certDir)
198+
if err != nil {
199+
return nil, fmt.Errorf("failed to load cert-manager CA certificate: %w", err)
200+
}
201+
} else {
202+
// Use self-signed certificate generation (original flow)
203+
caPEM, err = w.genCertificate(certDir)
204+
if err != nil {
205+
return nil, err
206+
}
185207
}
208+
186209
w.caPEM = caPEM
187-
return &w, err
210+
return &w, nil
188211
}
189212

190213
func (w *Config) Start(ctx context.Context) error {
@@ -214,12 +237,19 @@ func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error {
214237

215238
// createMutatingWebhookConfiguration creates the MutatingWebhookConfiguration object for the webhook.
216239
func (w *Config) createMutatingWebhookConfiguration(ctx context.Context, webhooks []admv1.MutatingWebhook, configName string) error {
240+
annotations := map[string]string{}
241+
if w.useCertManager {
242+
// Tell cert-manager's CA injector to automatically inject the CA bundle
243+
annotations["cert-manager.io/inject-ca-from"] = fmt.Sprintf("%s/%s", w.serviceNamespace, fleetWebhookCertSecretName)
244+
}
245+
217246
mutatingWebhookConfig := admv1.MutatingWebhookConfiguration{
218247
ObjectMeta: metav1.ObjectMeta{
219248
Name: configName,
220249
Labels: map[string]string{
221250
"admissions.enforcer/disabled": "true",
222251
},
252+
Annotations: annotations,
223253
},
224254
Webhooks: webhooks,
225255
}
@@ -267,12 +297,19 @@ func (w *Config) buildFleetMutatingWebhooks() []admv1.MutatingWebhook {
267297
}
268298

269299
func (w *Config) createValidatingWebhookConfiguration(ctx context.Context, webhooks []admv1.ValidatingWebhook, configName string) error {
300+
annotations := map[string]string{}
301+
if w.useCertManager {
302+
// Tell cert-manager's CA injector to automatically inject the CA bundle
303+
annotations["cert-manager.io/inject-ca-from"] = fmt.Sprintf("%s/%s", w.serviceNamespace, fleetWebhookCertSecretName)
304+
}
305+
270306
validatingWebhookConfig := admv1.ValidatingWebhookConfiguration{
271307
ObjectMeta: metav1.ObjectMeta{
272308
Name: configName,
273309
Labels: map[string]string{
274310
"admissions.enforcer/disabled": "true",
275311
},
312+
Annotations: annotations,
276313
},
277314
Webhooks: webhooks,
278315
}
@@ -657,6 +694,26 @@ func (w *Config) genCertificate(certDir string) ([]byte, error) {
657694
return caPEM, nil
658695
}
659696

697+
// loadCertManagerCA loads the CA certificate from the mounted cert-manager Secret.
698+
// When using cert-manager, Kubernetes mounts the Secret as files in the certDir.
699+
// cert-manager creates: ca.crt, tls.crt, and tls.key
700+
// The tls.crt and tls.key are automatically used by the webhook server.
701+
// We only need to read ca.crt for the webhook configuration's CABundle.
702+
func (w *Config) loadCertManagerCA(certDir string) ([]byte, error) {
703+
caPath := filepath.Join(certDir, "ca.crt")
704+
caCert, err := os.ReadFile(caPath)
705+
if err != nil {
706+
return nil, fmt.Errorf("failed to read ca.crt from %s: %w", caPath, err)
707+
}
708+
709+
if len(caCert) == 0 {
710+
return nil, fmt.Errorf("ca.crt is empty at %s", caPath)
711+
}
712+
713+
klog.V(2).InfoS("Successfully loaded CA certificate from cert-manager mounted Secret", "path", caPath)
714+
return caCert, nil
715+
}
716+
660717
// genSelfSignedCert generates the self signed Certificate/Key pair
661718
func (w *Config) genSelfSignedCert() (caPEMByte, certPEMByte, keyPEMByte []byte, err error) {
662719
// CA config

0 commit comments

Comments
 (0)