From f034ceb10758560f8dc7fba3b7b24a073ac8ca96 Mon Sep 17 00:00:00 2001 From: Saloni Choudhary <146118978+salonichf5@users.noreply.github.com> Date: Wed, 29 Oct 2025 10:03:56 -0600 Subject: [PATCH] Make endpoint picker connection flags configurable (#4105) Problem: User should be able to configure Endpoint picker connection flags Solution: Adds command line support for endpoint picker flags --- charts/nginx-gateway-fabric/README.md | 5 +- .../templates/deployment.yaml | 4 ++ .../nginx-gateway-fabric/values.schema.json | 22 ++++++++ charts/nginx-gateway-fabric/values.yaml | 13 +++++ cmd/gateway/commands.go | 42 ++++++++++++++-- cmd/gateway/commands_test.go | 50 +++++++++++++++++++ cmd/gateway/endpoint_picker.go | 9 ++-- deploy/inference-nginx-plus/deploy.yaml | 1 + deploy/inference/deploy.yaml | 1 + internal/controller/config/config.go | 4 ++ internal/controller/manager.go | 2 + internal/controller/provisioner/objects.go | 17 +++++-- .../controller/provisioner/objects_test.go | 15 +++++- .../controller/provisioner/provisioner.go | 2 + 14 files changed, 171 insertions(+), 16 deletions(-) diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md index 66c130a624..aebd8ff6f2 100644 --- a/charts/nginx-gateway-fabric/README.md +++ b/charts/nginx-gateway-fabric/README.md @@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` | | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` | | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` | -| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"IfNotPresent","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"2.2.0"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | +| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false,"endpointPicker":{"disableTLS":false,"skipVerify":true}},"image":{"pullPolicy":"IfNotPresent","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"2.2.0"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` | | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` | | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` | @@ -258,6 +258,9 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` | | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` | | `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` | +| `nginxGateway.gwAPIInferenceExtension.endpointPicker` | EndpointPicker TLS configuration. | object | `{"disableTLS":false,"skipVerify":true}` | +| `nginxGateway.gwAPIInferenceExtension.endpointPicker.disableTLS` | Disable TLS for EndpointPicker communication. By default, TLS is enabled. Set to true only for development/testing or when using a service mesh for encryption. | bool | `false` | +| `nginxGateway.gwAPIInferenceExtension.endpointPicker.skipVerify` | Disables TLS certificate verification when connecting to the EndpointPicker. By default, certificate verification is disabled. REQUIRED: Must be true until Gateway API Inference Extension EndpointPicker supports mounting certificates. See: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556 | bool | `true` | | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"2.2.0"}` | | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` | | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` | diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml index 604acd768c..6c79253fe7 100644 --- a/charts/nginx-gateway-fabric/templates/deployment.yaml +++ b/charts/nginx-gateway-fabric/templates/deployment.yaml @@ -102,6 +102,10 @@ spec: {{- end }} {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }} - --gateway-api-inference-extension + {{- if .Values.nginxGateway.gwAPIInferenceExtension.endpointPicker.disableTLS }} + - --endpoint-picker-disable-tls + {{- end }} + - --endpoint-picker-tls-skip-verify={{ .Values.nginxGateway.gwAPIInferenceExtension.endpointPicker.skipVerify }} {{- end }} {{- if .Values.nginxGateway.snippetsFilters.enable }} - --snippets-filters diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json index 24f2675194..37879065d2 100644 --- a/charts/nginx-gateway-fabric/values.schema.json +++ b/charts/nginx-gateway-fabric/values.schema.json @@ -846,6 +846,28 @@ "required": [], "title": "enable", "type": "boolean" + }, + "endpointPicker": { + "description": "EndpointPicker TLS configuration.", + "properties": { + "disableTLS": { + "default": false, + "description": "Disable TLS for EndpointPicker communication. By default, TLS is enabled.\nSet to true only for development/testing or when using a service mesh for encryption.", + "required": [], + "title": "disableTLS", + "type": "boolean" + }, + "skipVerify": { + "default": true, + "description": "Disables TLS certificate verification when connecting to the EndpointPicker.\nBy default, certificate verification is disabled.\nREQUIRED: Must be true until Gateway API Inference Extension EndpointPicker supports mounting certificates.\nSee: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556", + "required": [], + "title": "skipVerify", + "type": "boolean" + } + }, + "required": [], + "title": "endpointPicker", + "type": "object" } }, "required": [], diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml index c3a84b7747..1893a315da 100644 --- a/charts/nginx-gateway-fabric/values.yaml +++ b/charts/nginx-gateway-fabric/values.yaml @@ -214,6 +214,19 @@ nginxGateway: # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. enable: false + # -- EndpointPicker TLS configuration. + endpointPicker: + # -- Disable TLS for EndpointPicker communication. By default, TLS is enabled. + # Set to true only for development/testing or when using a service mesh for encryption. + disableTLS: false + + # -- Disables TLS certificate verification when connecting to the EndpointPicker. + # By default, certificate verification is disabled. + # REQUIRED: Must be true until Gateway API Inference Extension EndpointPicker supports mounting certificates. + # See: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556 + skipVerify: true + + snippetsFilters: # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX # config for HTTPRoute and GRPCRoute resources. diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index a152ea09dd..2236022df5 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -37,9 +37,11 @@ const ( `The controller name must be of the form: DOMAIN/PATH. The controller's domain is '%s'` plusFlag = "nginx-plus" - serverTLSSecret = "server-tls" - agentTLSSecret = "agent-tls" - nginxOneTelemetryEndpointHost = "agent.connect.nginx.com" + serverTLSSecret = "server-tls" + agentTLSSecret = "agent-tls" + nginxOneTelemetryEndpointHost = "agent.connect.nginx.com" + endpointPickerDisableTLSFlag = "endpoint-picker-disable-tls" + endpointPickerTLSSkipVerifyFlag = "endpoint-picker-tls-skip-verify" ) // usageReportParams holds the parameters for building the usage report configuration for PLUS. @@ -162,6 +164,9 @@ func createControllerCommand() *cobra.Command { nginxDockerSecrets = stringSliceValidatingValue{ validator: validateResourceName, } + + endpointPickerDisableTLS bool + endpointPickerTLSSkipVerify = true ) usageReportParams := usageReportParams{ @@ -288,6 +293,8 @@ func createControllerCommand() *cobra.Command { EndpointPort: nginxOneConsoleTelemetryEndpointPort.value, EndpointTLSSkipVerify: nginxOneConsoleTLSSkipVerify, }, + EndpointPickerDisableTLS: endpointPickerDisableTLS, + EndpointPickerTLSSkipVerify: endpointPickerTLSSkipVerify, } if err := controller.StartManager(conf); err != nil { @@ -441,6 +448,8 @@ func createControllerCommand() *cobra.Command { "traffic to AI workloads.", ) + addEPPConnectionFlags(cmd, &endpointPickerDisableTLS, &endpointPickerTLSSkipVerify) + cmd.Flags().Var( &nginxDockerSecrets, nginxDockerSecretFlag, @@ -758,19 +767,44 @@ func createSleepCommand() *cobra.Command { } func createEndpointPickerCommand() *cobra.Command { + var endpointPickerDisableTLS bool + endpointPickerTLSSkipVerify := true cmd := &cobra.Command{ Use: "endpoint-picker", Short: "Shim server for communication between NGINX and the Gateway API Inference Extension Endpoint Picker", RunE: func(_ *cobra.Command, _ []string) error { logger := ctlrZap.New().WithName("endpoint-picker-shim") - handler := createEndpointPickerHandler(realExtProcClientFactory(), logger) + handler := createEndpointPickerHandler( + realExtProcClientFactory(endpointPickerDisableTLS, endpointPickerTLSSkipVerify), + logger, + ) return endpointPickerServer(handler) }, } + addEPPConnectionFlags(cmd, &endpointPickerDisableTLS, &endpointPickerTLSSkipVerify) + return cmd } +func addEPPConnectionFlags(cmd *cobra.Command, disableTLS, tlsSkipVerify *bool) { + cmd.Flags().BoolVar( + disableTLS, + endpointPickerDisableTLSFlag, + false, + "Disables TLS when connecting to the EndpointPicker. "+ + "Set to true only for development/testing or when using a service mesh for encryption.", + ) + + cmd.Flags().BoolVar( + tlsSkipVerify, + endpointPickerTLSSkipVerifyFlag, + true, + "Disables server certificate verification when connecting to the EndpointPicker, if TLS is enabled. "+ + "REQUIRED: Must be true until Gateway API Inference Extension EndpointPicker supports mounting certificates.", + ) +} + func parseFlags(flags *pflag.FlagSet) ([]string, []string) { var flagKeys, flagValues []string diff --git a/cmd/gateway/commands_test.go b/cmd/gateway/commands_test.go index 4cb8f0d532..d31916a745 100644 --- a/cmd/gateway/commands_test.go +++ b/cmd/gateway/commands_test.go @@ -161,6 +161,8 @@ func TestControllerCmdFlagValidation(t *testing.T) { "--nginx-one-telemetry-endpoint-host=telemetry-endpoint-host", "--nginx-one-telemetry-endpoint-port=443", "--nginx-one-tls-skip-verify", + "--endpoint-picker-disable-tls", + "--endpoint-picker-tls-skip-verify", }, wantErr: false, }, @@ -924,3 +926,51 @@ func TestUsageReportConfig(t *testing.T) { }) } } + +func TestEndpointPickerFlags(t *testing.T) { + t.Parallel() + tests := []flagTestCase{ + { + name: "valid flags with default values", + args: []string{ + "--endpoint-picker-disable-tls=false", + "--endpoint-picker-tls-skip-verify=true", + }, + wantErr: false, + }, + { + name: "valid flags with changed values", + args: []string{ + "--endpoint-picker-disable-tls=true", + "--endpoint-picker-tls-skip-verify=false", + }, + wantErr: false, + }, + { + name: "endpoint-picker-disable-tls is not a bool", + args: []string{ + "--endpoint-picker-disable-tls=not-a-bool", + }, + wantErr: true, + expectedErrPrefix: `invalid argument "not-a-bool" for "--endpoint-picker-disable-tls" flag:` + + ` strconv.ParseBool: parsing "not-a-bool": invalid syntax`, + }, + { + name: "endpoint-picker-tls-skip-verify is not a bool", + args: []string{ + "--endpoint-picker-tls-skip-verify=not-a-bool", + }, + wantErr: true, + expectedErrPrefix: `invalid argument "not-a-bool" for "--endpoint-picker-tls-skip-verify" flag:` + + ` strconv.ParseBool: parsing "not-a-bool": invalid syntax`, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + cmd := createEndpointPickerCommand() + testFlag(t, cmd, test) + }) + } +} diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go index 0d814b91d0..7b8e85a708 100644 --- a/cmd/gateway/endpoint_picker.go +++ b/cmd/gateway/endpoint_picker.go @@ -35,20 +35,19 @@ func endpointPickerServer(handler http.Handler) error { } // realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request. -func realExtProcClientFactory() extProcClientFactory { +func realExtProcClientFactory(disableTLS, tlsSkipVerify bool) extProcClientFactory { return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) { var opts []grpc.DialOption - enableTLS := true - insecureSkipVerify := true - if !enableTLS { + if disableTLS { opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) } else { creds := credentials.NewTLS(&tls.Config{ - InsecureSkipVerify: insecureSkipVerify, //nolint:gosec + InsecureSkipVerify: tlsSkipVerify, //nolint:gosec }) opts = append(opts, grpc.WithTransportCredentials(creds)) } + conn, err := grpc.NewClient(target, opts...) if err != nil { return nil, nil, err diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml index c33b2772eb..96a28bde2a 100644 --- a/deploy/inference-nginx-plus/deploy.yaml +++ b/deploy/inference-nginx-plus/deploy.yaml @@ -305,6 +305,7 @@ spec: - --health-port=8081 - --leader-election-lock-name=nginx-gateway-leader-election - --gateway-api-inference-extension + - --endpoint-picker-tls-skip-verify=true env: - name: POD_NAMESPACE valueFrom: diff --git a/deploy/inference/deploy.yaml b/deploy/inference/deploy.yaml index f5a36622f2..5d134c4c15 100644 --- a/deploy/inference/deploy.yaml +++ b/deploy/inference/deploy.yaml @@ -301,6 +301,7 @@ spec: - --health-port=8081 - --leader-election-lock-name=nginx-gateway-leader-election - --gateway-api-inference-extension + - --endpoint-picker-tls-skip-verify=true env: - name: POD_NAMESPACE valueFrom: diff --git a/internal/controller/config/config.go b/internal/controller/config/config.go index d1e77df07b..9394bc8c66 100644 --- a/internal/controller/config/config.go +++ b/internal/controller/config/config.go @@ -52,6 +52,10 @@ type Config struct { InferenceExtension bool // SnippetsFilters indicates if SnippetsFilters are enabled. SnippetsFilters bool + // EndpointPickerDisableTLS indicates if TLS is disabled for EndpointPicker communication. + EndpointPickerDisableTLS bool + // EndpointPickerTLSSkipVerify indicates if secure verification is skipped for EndpointPicker communication. + EndpointPickerTLSSkipVerify bool } // GatewayPodConfig contains information about this Pod. diff --git a/internal/controller/manager.go b/internal/controller/manager.go index 264f51cf96..cb7c2bc2df 100644 --- a/internal/controller/manager.go +++ b/internal/controller/manager.go @@ -221,6 +221,8 @@ func StartManager(cfg config.Config) error { PlusUsageConfig: &cfg.UsageReportConfig, NginxOneConsoleTelemetryConfig: cfg.NginxOneConsoleTelemetryConfig, InferenceExtension: cfg.InferenceExtension, + EndpointPickerDisableTLS: cfg.EndpointPickerDisableTLS, + EndpointPickerTLSSkipVerify: cfg.EndpointPickerTLSSkipVerify, }, ) if err != nil { diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go index 68cde04608..501f33c51b 100644 --- a/internal/controller/provisioner/objects.go +++ b/internal/controller/provisioner/objects.go @@ -1121,14 +1121,23 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec( } if p.cfg.InferenceExtension { + command := []string{ + "/usr/bin/gateway", + "endpoint-picker", + } + + if p.cfg.EndpointPickerDisableTLS { + command = append(command, "--endpoint-picker-disable-tls") + } + if p.cfg.EndpointPickerTLSSkipVerify { + command = append(command, "--endpoint-picker-tls-skip-verify") + } + spec.Spec.Containers = append(spec.Spec.Containers, corev1.Container{ Name: "endpoint-picker-shim", Image: p.cfg.GatewayPodConfig.Image, ImagePullPolicy: pullPolicy, - Command: []string{ - "/usr/bin/gateway", - "endpoint-picker", - }, + Command: command, SecurityContext: &corev1.SecurityContext{ AllowPrivilegeEscalation: helpers.GetPointer(false), Capabilities: &corev1.Capabilities{ diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go index 30403f85a7..cb5f4f4fd7 100644 --- a/internal/controller/provisioner/objects_test.go +++ b/internal/controller/provisioner/objects_test.go @@ -1784,8 +1784,10 @@ func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) { GatewayPodConfig: &config.GatewayPodConfig{ Namespace: ngfNamespace, }, - AgentTLSSecretName: agentTLSTestSecretName, - InferenceExtension: true, + AgentTLSSecretName: agentTLSTestSecretName, + InferenceExtension: true, + EndpointPickerDisableTLS: true, + EndpointPickerTLSSkipVerify: true, }, k8sClient: fakeClient, baseLabelSelector: metav1.LabelSelector{ @@ -1814,8 +1816,17 @@ func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) { break } } + + expectedCommands := []string{ + "/usr/bin/gateway", + "endpoint-picker", + "--endpoint-picker-disable-tls", + "--endpoint-picker-tls-skip-verify", + } + g.Expect(deployment).ToNot(BeNil()) containers := deployment.Spec.Template.Spec.Containers g.Expect(containers).To(HaveLen(2)) g.Expect(containers[1].Name).To(Equal("endpoint-picker-shim")) + g.Expect(containers[1].Command).To(Equal(expectedCommands)) } diff --git a/internal/controller/provisioner/provisioner.go b/internal/controller/provisioner/provisioner.go index f8a2b06957..d2957404f4 100644 --- a/internal/controller/provisioner/provisioner.go +++ b/internal/controller/provisioner/provisioner.go @@ -59,6 +59,8 @@ type Config struct { NginxOneConsoleTelemetryConfig config.NginxOneConsoleTelemetryConfig Plus bool InferenceExtension bool + EndpointPickerDisableTLS bool + EndpointPickerTLSSkipVerify bool } // NginxProvisioner handles provisioning nginx kubernetes resources.