diff --git a/Makefile b/Makefile index a49147a7d8..4a14d2ae5b 100644 --- a/Makefile +++ b/Makefile @@ -136,6 +136,14 @@ install-gateway-crds: ## Install Gateway API CRDs uninstall-gateway-crds: ## Uninstall Gateway API CRDs kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f - +.PHONY: install-inference-crds +install-inference-crds: ## Install Gateway API Inference Extension CRDs + kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f - + +.PHONY: uninstall-inference-crds +uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs + kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f - + .PHONY: generate-manifests generate-manifests: ## Generate manifests using Helm. ./scripts/generate-manifests.sh diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md index 4f2541107c..0bae9e5584 100644 --- a/charts/nginx-gateway-fabric/README.md +++ b/charts/nginx-gateway-fabric/README.md @@ -244,7 +244,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` | | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` | | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` | -| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | +| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` | | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` | | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` | | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` | @@ -256,6 +256,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri | `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` | | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` | | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` | +| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` | | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` | | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` | | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` | diff --git a/charts/nginx-gateway-fabric/templates/clusterrole.yaml b/charts/nginx-gateway-fabric/templates/clusterrole.yaml index 8fc4da400e..9be339c04a 100644 --- a/charts/nginx-gateway-fabric/templates/clusterrole.yaml +++ b/charts/nginx-gateway-fabric/templates/clusterrole.yaml @@ -129,6 +129,22 @@ rules: {{- end }} verbs: - update +{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }} +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +{{- end }} {{- if .Values.nginxGateway.leaderElection.enable }} - apiGroups: - coordination.k8s.io diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml index 9be1b13f16..b168051199 100644 --- a/charts/nginx-gateway-fabric/templates/deployment.yaml +++ b/charts/nginx-gateway-fabric/templates/deployment.yaml @@ -97,6 +97,9 @@ spec: {{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }} - --gateway-api-experimental-features {{- end }} + {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }} + - --gateway-api-inference-extension + {{- end }} {{- if .Values.nginxGateway.snippetsFilters.enable }} - --snippets-filters {{- end }} diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json index ca5d339d44..d2e08e9d3d 100644 --- a/charts/nginx-gateway-fabric/values.schema.json +++ b/charts/nginx-gateway-fabric/values.schema.json @@ -831,6 +831,20 @@ "title": "gwAPIExperimentalFeatures", "type": "object" }, + "gwAPIInferenceExtension": { + "properties": { + "enable": { + "default": false, + "description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.", + "required": [], + "title": "enable", + "type": "boolean" + } + }, + "required": [], + "title": "gwAPIInferenceExtension", + "type": "object" + }, "image": { "description": "The image configuration for the NGINX Gateway Fabric control plane.", "properties": { diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml index 044e0f2d37..665fada305 100644 --- a/charts/nginx-gateway-fabric/values.yaml +++ b/charts/nginx-gateway-fabric/values.yaml @@ -210,6 +210,10 @@ nginxGateway: # APIs installed from the experimental channel. enable: false + gwAPIInferenceExtension: + # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. + enable: false + snippetsFilters: # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX # config for HTTPRoute and GRPCRoute resources. diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index 9f83cbcfb3..ce2eff9df6 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -74,6 +74,7 @@ func createControllerCommand() *cobra.Command { leaderElectionLockNameFlag = "leader-election-lock-name" productTelemetryDisableFlag = "product-telemetry-disable" gwAPIExperimentalFlag = "gateway-api-experimental-features" + gwAPIInferenceExtensionFlag = "gateway-api-inference-extension" nginxDockerSecretFlag = "nginx-docker-secret" //nolint:gosec // not credentials usageReportSecretFlag = "usage-report-secret" usageReportEndpointFlag = "usage-report-endpoint" @@ -139,6 +140,7 @@ func createControllerCommand() *cobra.Command { } gwExperimentalFeatures bool + gwInferenceExtension bool disableProductTelemetry bool @@ -264,6 +266,7 @@ func createControllerCommand() *cobra.Command { }, Plus: plus, ExperimentalFeatures: gwExperimentalFeatures, + InferenceExtension: gwInferenceExtension, ImageSource: imageSource, Flags: config.Flags{ Names: flagKeys, @@ -424,6 +427,14 @@ func createControllerCommand() *cobra.Command { "Requires the Gateway APIs installed from the experimental channel.", ) + cmd.Flags().BoolVar( + &gwInferenceExtension, + gwAPIInferenceExtensionFlag, + false, + "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+ + "traffic to AI workloads.", + ) + cmd.Flags().Var( &nginxDockerSecrets, nginxDockerSecretFlag, diff --git a/config/crd/inference-extension/kustomization.yaml b/config/crd/inference-extension/kustomization.yaml new file mode 100644 index 0000000000..6b6e210cf5 --- /dev/null +++ b/config/crd/inference-extension/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0 diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml new file mode 100644 index 0000000000..77ee4da544 --- /dev/null +++ b/deploy/inference-nginx-plus/deploy.yaml @@ -0,0 +1,441 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nginx-gateway +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - update + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +rules: +- apiGroups: + - "" + - apps + - autoscaling + resources: + - secrets + - configmaps + - serviceaccounts + - services + - deployments + - daemonsets + - horizontalpodautoscalers + verbs: + - create + - update + - delete + - list + - get + - watch +- apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list +- apiGroups: + - "" + resources: + - nodes + verbs: + - list +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - gateway.networking.k8s.io + resources: + - gatewayclasses + - gateways + - httproutes + - referencegrants + - grpcroutes + verbs: + - list + - watch +- apiGroups: + - gateway.networking.k8s.io + resources: + - httproutes/status + - gateways/status + - gatewayclasses/status + - grpcroutes/status + verbs: + - update +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways + verbs: + - get + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxproxies + - clientsettingspolicies + - observabilitypolicies + - upstreamsettingspolicies + verbs: + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways/status + - clientsettingspolicies/status + - observabilitypolicies/status + - upstreamsettingspolicies/status + verbs: + - update +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx-gateway-cert-generator +subjects: +- kind: ServiceAccount + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-gateway +subjects: +- kind: ServiceAccount + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + ports: + - name: agent-grpc + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + template: + metadata: + annotations: + prometheus.io/port: "9113" + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + spec: + automountServiceAccountToken: true + containers: + - args: + - controller + - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller + - --gatewayclass=nginx + - --config=nginx-gateway-config + - --service=nginx-gateway + - --agent-tls-secret=agent-tls + - --nginx-docker-secret=nginx-plus-registry-secret + - --nginx-plus + - --usage-report-secret=nplus-license + - --metrics-port=9113 + - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election + - --gateway-api-inference-extension + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: INSTANCE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app.kubernetes.io/instance'] + - name: IMAGE_NAME + value: ghcr.io/nginx/nginx-gateway-fabric:edge + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: nginx-gateway + ports: + - containerPort: 8443 + name: agent-grpc + - containerPort: 9113 + name: metrics + - containerPort: 8081 + name: health + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 3 + periodSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /var/run/secrets/ngf + name: nginx-agent-tls + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway + terminationGracePeriodSeconds: 30 + volumes: + - name: nginx-agent-tls + secret: + secretName: server-tls +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +spec: + template: + metadata: + annotations: null + spec: + containers: + - args: + - generate-certs + - --service=nginx-gateway + - --cluster-domain=cluster.local + - --server-tls-secret=server-tls + - --agent-tls-secret=agent-tls + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: cert-generator + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + restartPolicy: Never + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway-cert-generator + ttlSecondsAfterFinished: 30 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx +spec: + controllerName: gateway.nginx.org/nginx-gateway-controller + parametersRef: + group: gateway.nginx.org + kind: NginxProxy + name: nginx-gateway-proxy-config + namespace: nginx-gateway +--- +apiVersion: gateway.nginx.org/v1alpha1 +kind: NginxGateway +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-config + namespace: nginx-gateway +spec: + logging: + level: info +--- +apiVersion: gateway.nginx.org/v1alpha2 +kind: NginxProxy +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-proxy-config + namespace: nginx-gateway +spec: + kubernetes: + deployment: + container: + image: + pullPolicy: Always + repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus + tag: edge + replicas: 1 + service: + externalTrafficPolicy: Local + type: LoadBalancer diff --git a/deploy/inference/deploy.yaml b/deploy/inference/deploy.yaml new file mode 100644 index 0000000000..49a8f85053 --- /dev/null +++ b/deploy/inference/deploy.yaml @@ -0,0 +1,438 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nginx-gateway +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - create + - update + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +rules: +- apiGroups: + - "" + - apps + - autoscaling + resources: + - secrets + - configmaps + - serviceaccounts + - services + - deployments + - daemonsets + - horizontalpodautoscalers + verbs: + - create + - update + - delete + - list + - get + - watch +- apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list +- apiGroups: + - "" + resources: + - nodes + verbs: + - list +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - gateway.networking.k8s.io + resources: + - gatewayclasses + - gateways + - httproutes + - referencegrants + - grpcroutes + verbs: + - list + - watch +- apiGroups: + - gateway.networking.k8s.io + resources: + - httproutes/status + - gateways/status + - gatewayclasses/status + - grpcroutes/status + verbs: + - update +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways + verbs: + - get + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxproxies + - clientsettingspolicies + - observabilitypolicies + - upstreamsettingspolicies + verbs: + - list + - watch +- apiGroups: + - gateway.nginx.org + resources: + - nginxgateways/status + - clientsettingspolicies/status + - observabilitypolicies/status + - upstreamsettingspolicies/status + verbs: + - update +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools + verbs: + - get + - list + - watch +- apiGroups: + - inference.networking.k8s.io + resources: + - inferencepools/status + verbs: + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nginx-gateway-cert-generator +subjects: +- kind: ServiceAccount + name: nginx-gateway-cert-generator + namespace: nginx-gateway +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-gateway +subjects: +- kind: ServiceAccount + name: nginx-gateway + namespace: nginx-gateway +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + ports: + - name: agent-grpc + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway + namespace: nginx-gateway +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + template: + metadata: + annotations: + prometheus.io/port: "9113" + prometheus.io/scrape: "true" + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + spec: + automountServiceAccountToken: true + containers: + - args: + - controller + - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller + - --gatewayclass=nginx + - --config=nginx-gateway-config + - --service=nginx-gateway + - --agent-tls-secret=agent-tls + - --metrics-port=9113 + - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election + - --gateway-api-inference-extension + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: INSTANCE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['app.kubernetes.io/instance'] + - name: IMAGE_NAME + value: ghcr.io/nginx/nginx-gateway-fabric:edge + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: nginx-gateway + ports: + - containerPort: 8443 + name: agent-grpc + - containerPort: 9113 + name: metrics + - containerPort: 8081 + name: health + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 3 + periodSeconds: 1 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /var/run/secrets/ngf + name: nginx-agent-tls + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway + terminationGracePeriodSeconds: 30 + volumes: + - name: nginx-agent-tls + secret: + secretName: server-tls +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-cert-generator + namespace: nginx-gateway +spec: + template: + metadata: + annotations: null + spec: + containers: + - args: + - generate-certs + - --service=nginx-gateway + - --cluster-domain=cluster.local + - --server-tls-secret=server-tls + - --agent-tls-secret=agent-tls + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + image: ghcr.io/nginx/nginx-gateway-fabric:edge + imagePullPolicy: Always + name: cert-generator + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1001 + runAsUser: 101 + seccompProfile: + type: RuntimeDefault + restartPolicy: Never + securityContext: + fsGroup: 1001 + runAsNonRoot: true + serviceAccountName: nginx-gateway-cert-generator + ttlSecondsAfterFinished: 30 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx +spec: + controllerName: gateway.nginx.org/nginx-gateway-controller + parametersRef: + group: gateway.nginx.org + kind: NginxProxy + name: nginx-gateway-proxy-config + namespace: nginx-gateway +--- +apiVersion: gateway.nginx.org/v1alpha1 +kind: NginxGateway +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-config + namespace: nginx-gateway +spec: + logging: + level: info +--- +apiVersion: gateway.nginx.org/v1alpha2 +kind: NginxProxy +metadata: + labels: + app.kubernetes.io/instance: nginx-gateway + app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/version: edge + name: nginx-gateway-proxy-config + namespace: nginx-gateway +spec: + kubernetes: + deployment: + container: + image: + pullPolicy: Always + repository: ghcr.io/nginx/nginx-gateway-fabric/nginx + tag: edge + replicas: 1 + service: + externalTrafficPolicy: Local + type: LoadBalancer diff --git a/examples/helm/inference-nginx-plus/values.yaml b/examples/helm/inference-nginx-plus/values.yaml new file mode 100644 index 0000000000..1d89293db2 --- /dev/null +++ b/examples/helm/inference-nginx-plus/values.yaml @@ -0,0 +1,10 @@ +nginxGateway: + name: nginx-gateway + gwAPIInferenceExtension: + enable: true + +nginx: + plus: true + image: + repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus + imagePullSecret: nginx-plus-registry-secret diff --git a/examples/helm/inference/values.yaml b/examples/helm/inference/values.yaml new file mode 100644 index 0000000000..0bb54b57e9 --- /dev/null +++ b/examples/helm/inference/values.yaml @@ -0,0 +1,4 @@ +nginxGateway: + name: nginx-gateway + gwAPIInferenceExtension: + enable: true diff --git a/go.mod b/go.mod index be2026a4ff..d4ec8da3ba 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( k8s.io/klog/v2 v2.130.1 sigs.k8s.io/controller-runtime v0.22.0 sigs.k8s.io/gateway-api v1.3.0 + sigs.k8s.io/gateway-api-inference-extension v1.0.0 ) require ( @@ -80,7 +81,7 @@ require ( golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.34.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect diff --git a/go.sum b/go.sum index e2b1136f46..5325827f3f 100644 --- a/go.sum +++ b/go.sum @@ -66,8 +66,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/goccy/go-yaml v1.17.1 h1:LI34wktB2xEE3ONG/2Ar54+/HJVBriAGJ55PHls4YuY= -github.com/goccy/go-yaml v1.17.1/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= @@ -218,8 +218,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= @@ -283,8 +283,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -336,6 +336,8 @@ sigs.k8s.io/controller-runtime v0.22.0 h1:mTOfibb8Hxwpx3xEkR56i7xSjB+nH4hZG37Srl sigs.k8s.io/controller-runtime v0.22.0/go.mod h1:FwiwRjkRPbiN+zp2QRp7wlTCzbUXxZ/D4OzuQUDwBHY= sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M= sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk= +sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8= +sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/internal/controller/config/config.go b/internal/controller/config/config.go index ff6744b101..0e526aa4ca 100644 --- a/internal/controller/config/config.go +++ b/internal/controller/config/config.go @@ -48,6 +48,8 @@ type Config struct { Plus bool // ExperimentalFeatures indicates if experimental features are enabled. ExperimentalFeatures bool + // InferenceExtension indicates if Gateway API Inference Extension support is enabled. + InferenceExtension bool // SnippetsFilters indicates if SnippetsFilters are enabled. SnippetsFilters bool } diff --git a/internal/controller/handler.go b/internal/controller/handler.go index 8613de046b..2a7fdef201 100644 --- a/internal/controller/handler.go +++ b/internal/controller/handler.go @@ -12,9 +12,11 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -78,6 +80,8 @@ type eventHandlerConfig struct { controlConfigNSName types.NamespacedName // gatewayCtlrName is the name of the NGF controller. gatewayCtlrName string + // gatewayInstanceName is the name of the NGINX Gateway instance. + gatewayInstanceName string // gatewayClassName is the name of the GatewayClass. gatewayClassName string // plus is whether or not we are running NGINX Plus. @@ -115,8 +119,10 @@ type eventHandlerImpl struct { // objectFilters contains all created objectFilters, with the key being a filterKey objectFilters map[filterKey]objectFilter - cfg eventHandlerConfig - lock sync.Mutex + cfg eventHandlerConfig + lock sync.RWMutex + leaderLock sync.RWMutex + leader bool } // newEventHandlerImpl creates a new eventHandlerImpl. @@ -169,6 +175,10 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log // enable is called when the pod becomes leader to ensure the provisioner has // the latest configuration. func (h *eventHandlerImpl) enable(ctx context.Context) { + h.leaderLock.Lock() + h.leader = true + h.leaderLock.Unlock() + h.sendNginxConfig(ctx, h.cfg.logger, h.cfg.processor.GetLatestGraph()) } @@ -186,6 +196,9 @@ func (h *eventHandlerImpl) sendNginxConfig(ctx context.Context, logger logr.Logg return } + // ensure headless "shadow" Services are created for any referenced InferencePools + h.ensureInferencePoolServices(ctx, gr.ReferencedInferencePools) + for _, gw := range gr.Gateways { go func() { if err := h.cfg.nginxProvisioner.RegisterGateway(ctx, gw, gw.DeploymentName.Name); err != nil { @@ -539,8 +552,8 @@ func (h *eventHandlerImpl) getDeploymentContext(ctx context.Context) (dataplane. // GetLatestConfiguration gets the latest configuration. func (h *eventHandlerImpl) GetLatestConfiguration() []*dataplane.Configuration { - h.lock.Lock() - defer h.lock.Unlock() + h.lock.RLock() + defer h.lock.RUnlock() configs := make([]*dataplane.Configuration, 0, len(h.latestConfigurations)) for _, cfg := range h.latestConfigurations { @@ -566,6 +579,111 @@ func objectFilterKey(obj client.Object, nsName types.NamespacedName) filterKey { return filterKey(fmt.Sprintf("%T_%s_%s", obj, nsName.Namespace, nsName.Name)) } +// ensureInferencePoolServices ensures a headless Service exists and is up to date for each InferencePool. +func (h *eventHandlerImpl) ensureInferencePoolServices( + ctx context.Context, + pools map[types.NamespacedName]*graph.ReferencedInferencePool, +) { + if !h.isLeader() { + return + } + + for _, pool := range pools { + if pool.Source == nil { + continue + } + + selectors := make(map[string]string) + for k, v := range pool.Source.Spec.Selector.MatchLabels { + selectors[string(k)] = string(v) + } + + // v1 of InferencePool only supports a single port right now + ports := []v1.ServicePort{ + { + Port: int32(pool.Source.Spec.TargetPorts[0].Number), + TargetPort: intstr.FromInt32(int32(pool.Source.Spec.TargetPorts[0].Number)), + }, + } + + labels := map[string]string{ + controller.AppManagedByLabel: controller.CreateNginxResourceName( + h.cfg.gatewayInstanceName, + h.cfg.gatewayClassName, + ), + } + + svc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: controller.CreateInferencePoolServiceName(pool.Source.Name), + Namespace: pool.Source.Namespace, + Labels: labels, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: pool.Source.APIVersion, + Kind: pool.Source.Kind, + Name: pool.Source.Name, + UID: pool.Source.UID, + }, + }, + }, + Spec: v1.ServiceSpec{ + ClusterIP: v1.ClusterIPNone, // headless + Selector: selectors, + Ports: ports, + }, + } + + svcCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + res, err := controllerutil.CreateOrUpdate( + svcCtx, + h.cfg.k8sClient, + svc, + serviceSpecSetter(svc, svc.Spec, svc.ObjectMeta), + ) + if err != nil { + cancel() + msg := "Failed to upsert headless Service for InferencePool" + h.cfg.logger.Error(err, msg, "Service", svc.Name, "InferencePool", pool.Source.Name) + h.cfg.eventRecorder.Eventf( + svc, + v1.EventTypeWarning, + "ServiceCreateOrUpdateFailed", + "%s %q: %v", msg, pool.Source.Name, err, + ) + continue + } + cancel() + + if res == controllerutil.OperationResultCreated || res == controllerutil.OperationResultUpdated { + h.cfg.logger.Info( + fmt.Sprintf("Successfully %s headless Service for InferencePool", res), + "Service", svc.Name, "InferencePool", pool.Source.Name, + ) + } + } +} + +func serviceSpecSetter( + service *v1.Service, + spec v1.ServiceSpec, + objectMeta metav1.ObjectMeta, +) controllerutil.MutateFn { + return func() error { + service.Labels = objectMeta.Labels + service.Spec = spec + return nil + } +} + +// isLeader returns whether or not this handler is the leader. +func (h *eventHandlerImpl) isLeader() bool { + h.leaderLock.RLock() + defer h.leaderLock.RUnlock() + + return h.leader +} + /* Handler Callback functions diff --git a/internal/controller/handler_test.go b/internal/controller/handler_test.go index a9fa942c27..40d3aa71a2 100644 --- a/internal/controller/handler_test.go +++ b/internal/controller/handler_test.go @@ -12,11 +12,13 @@ import ( "go.uber.org/zap" v1 "k8s.io/api/core/v1" discoveryV1 "k8s.io/api/discovery/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -149,6 +151,7 @@ var _ = Describe("eventHandler", func() { metricsCollector: collectors.NewControllerNoopCollector(), }) Expect(handler.cfg.graphBuiltHealthChecker.ready).To(BeFalse()) + handler.leader = true }) AfterEach(func() { @@ -518,6 +521,115 @@ var _ = Describe("eventHandler", func() { Expect(handler.cfg.graphBuiltHealthChecker.readyCheck(nil)).To(Succeed()) }) + It("should create a headless Service for each referenced InferencePool", func() { + namespace := "test-ns" + poolName1 := "pool1" + poolName2 := "pool2" + poolUID1 := types.UID("uid1") + poolUID2 := types.UID("uid2") + + pool1 := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName1, + Namespace: namespace, + UID: poolUID1, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8081}, + }, + }, + } + + g := &graph.Graph{ + Gateways: map[types.NamespacedName]*graph.Gateway{ + {}: { + Source: &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "gateway", + }, + }, + Valid: true, + }, + }, + ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName1}: {Source: pool1}, + {Namespace: namespace, Name: poolName2}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName2, + Namespace: namespace, + UID: poolUID2, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "bar"}, + }, + TargetPorts: []inference.Port{ + {Number: 9090}, + }, + }, + }, + }, + }, + } + + fakeProcessor.ProcessReturns(g) + + e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} + batch := []any{e} + + handler.HandleEventBatch(context.Background(), logr.Discard(), batch) + + // Check Service for pool1 + svc1 := &v1.Service{} + svcName1 := controller.CreateInferencePoolServiceName(poolName1) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1) + Expect(err).ToNot(HaveOccurred()) + Expect(svc1.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "foo")) + Expect(svc1.Spec.Ports).To(HaveLen(1)) + Expect(svc1.Spec.Ports[0].Port).To(Equal(int32(8081))) + Expect(svc1.OwnerReferences).To(HaveLen(1)) + Expect(svc1.OwnerReferences[0].Name).To(Equal(poolName1)) + Expect(svc1.OwnerReferences[0].UID).To(Equal(poolUID1)) + + // Check Service for pool2 + svc2 := &v1.Service{} + svcName2 := controller.CreateInferencePoolServiceName(poolName2) + err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName2, Namespace: namespace}, svc2) + Expect(err).ToNot(HaveOccurred()) + Expect(svc2.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc2.Spec.Selector).To(HaveKeyWithValue("app", "bar")) + Expect(svc2.Spec.Ports).To(HaveLen(1)) + Expect(svc2.Spec.Ports[0].Port).To(Equal(int32(9090))) + Expect(svc2.OwnerReferences).To(HaveLen(1)) + Expect(svc2.OwnerReferences[0].Name).To(Equal(poolName2)) + Expect(svc2.OwnerReferences[0].UID).To(Equal(poolUID2)) + + // Now update pool1's selector and ensure the Service selector is updated + updatedSelector := map[inference.LabelKey]inference.LabelValue{"app": "baz"} + pool1.Spec.Selector.MatchLabels = updatedSelector + + // Simulate the updated pool in the graph + g.ReferencedInferencePools[types.NamespacedName{Namespace: namespace, Name: poolName1}].Source = pool1 + fakeProcessor.ProcessReturns(g) + + e = &events.UpsertEvent{Resource: &inference.InferencePool{}} + batch = []any{e} + handler.HandleEventBatch(context.Background(), logr.Discard(), batch) + + // Check that the Service selector was updated + svc1 = &v1.Service{} + err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1) + Expect(err).ToNot(HaveOccurred()) + Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "baz")) + }) + It("should panic for an unknown event type", func() { e := &struct{}{} @@ -672,3 +784,156 @@ var _ = Describe("getDeploymentContext", func() { }) }) }) + +var _ = Describe("ensureInferencePoolServices", func() { + var ( + handler *eventHandlerImpl + fakeK8sClient client.Client + fakeEventRecorder *record.FakeRecorder + namespace = "test-ns" + poolName = "my-inference-pool" + poolUID = types.UID("pool-uid") + ) + + BeforeEach(func() { + fakeK8sClient = fake.NewFakeClient() + fakeEventRecorder = record.NewFakeRecorder(1) + handler = newEventHandlerImpl(eventHandlerConfig{ + ctx: context.Background(), + k8sClient: fakeK8sClient, + statusQueue: status.NewQueue(), + eventRecorder: fakeEventRecorder, + logger: logr.Discard(), + }) + // Set as leader so ensureInferencePoolServices will run + handler.leader = true + }) + + It("creates a headless Service for a referenced InferencePool", func() { + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + + // The Service should have been created + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).ToNot(HaveOccurred()) + Expect(svc.Spec.ClusterIP).To(Equal(v1.ClusterIPNone)) + Expect(svc.Spec.Selector).To(HaveKeyWithValue("app", "foo")) + Expect(svc.Spec.Ports).To(HaveLen(1)) + Expect(svc.Spec.Ports[0].Port).To(Equal(int32(8080))) + Expect(svc.OwnerReferences).To(HaveLen(1)) + Expect(svc.OwnerReferences[0].Name).To(Equal(poolName)) + Expect(svc.OwnerReferences[0].UID).To(Equal(poolUID)) + }) + + It("does nothing if not leader", func() { + handler.leader = false + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).To(HaveOccurred()) + }) + + It("skips pools with nil Source", func() { + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: nil, + }, + } + handler.ensureInferencePoolServices(context.Background(), pools) + // Should not panic or create anything + svc := &v1.Service{} + svcName := controller.CreateInferencePoolServiceName(poolName) + err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc) + Expect(err).To(HaveOccurred()) + }) + + It("emits an event if Service creation fails", func() { + // Use a client that will fail on CreateOrUpdate + handler.cfg.k8sClient = &badFakeClient{} + handler.leader = true + + pools := map[types.NamespacedName]*graph.ReferencedInferencePool{ + {Namespace: namespace, Name: poolName}: { + Source: &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolName, + Namespace: namespace, + UID: poolUID, + }, + Spec: inference.InferencePoolSpec{ + Selector: inference.LabelSelector{ + MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"}, + }, + TargetPorts: []inference.Port{ + {Number: 8080}, + }, + }, + }, + }, + } + + handler.ensureInferencePoolServices(context.Background(), pools) + Eventually(func() int { return len(fakeEventRecorder.Events) }).Should(BeNumerically(">=", 1)) + event := <-fakeEventRecorder.Events + Expect(event).To(ContainSubstring("ServiceCreateOrUpdateFailed")) + }) +}) + +// badFakeClient always returns an error on Create or Update. +type badFakeClient struct { + client.Client +} + +func (*badFakeClient) Get(context.Context, client.ObjectKey, client.Object, ...client.GetOption) error { + return apiErrors.NewNotFound(v1.Resource("service"), "not-found") +} + +func (*badFakeClient) Create(context.Context, client.Object, ...client.CreateOption) error { + return errors.New("create error") +} + +func (*badFakeClient) Update(context.Context, client.Object, ...client.UpdateOption) error { + return errors.New("update error") +} diff --git a/internal/controller/manager.go b/internal/controller/manager.go index a4e9fd9cf0..d02411571b 100644 --- a/internal/controller/manager.go +++ b/internal/controller/manager.go @@ -32,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/metrics" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" k8spredicate "sigs.k8s.io/controller-runtime/pkg/predicate" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2" gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -95,6 +96,7 @@ func init() { utilruntime.Must(autoscalingv2.AddToScheme(scheme)) utilruntime.Must(authv1.AddToScheme(scheme)) utilruntime.Must(rbacv1.AddToScheme(scheme)) + utilruntime.Must(inference.Install(scheme)) } func StartManager(cfg config.Config) error { @@ -251,6 +253,7 @@ func StartManager(cfg config.Config) error { gatewayPodConfig: cfg.GatewayPodConfig, controlConfigNSName: controlConfigNSName, gatewayCtlrName: cfg.GatewayCtlrName, + gatewayInstanceName: cfg.GatewayPodConfig.InstanceName, gatewayClassName: cfg.GatewayClassName, plus: cfg.Plus, statusQueue: statusQueue, @@ -536,6 +539,18 @@ func registerControllers( controllerRegCfgs = append(controllerRegCfgs, gwExpFeatures...) } + if cfg.InferenceExtension { + inferenceExt := []ctlrCfg{ + { + objectType: &inference.InferencePool{}, + options: []controller.Option{ + controller.WithK8sPredicate(k8spredicate.GenerationChangedPredicate{}), + }, + }, + } + controllerRegCfgs = append(controllerRegCfgs, inferenceExt...) + } + if cfg.ConfigName != "" { controllerRegCfgs = append(controllerRegCfgs, ctlrCfg{ @@ -761,6 +776,10 @@ func prepareFirstEventBatchPreparerArgs(cfg config.Config) ([]client.Object, []c ) } + if cfg.InferenceExtension { + objectLists = append(objectLists, &inference.InferencePoolList{}) + } + if cfg.SnippetsFilters { objectLists = append( objectLists, diff --git a/internal/controller/manager_test.go b/internal/controller/manager_test.go index 60d7b0e5d5..76e613a1f6 100644 --- a/internal/controller/manager_test.go +++ b/internal/controller/manager_test.go @@ -14,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2" gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -47,9 +48,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { { name: "base case", cfg: config.Config{ - GatewayClassName: gcName, - ExperimentalFeatures: false, - SnippetsFilters: false, + GatewayClassName: gcName, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -75,7 +74,6 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { cfg: config.Config{ GatewayClassName: gcName, ExperimentalFeatures: true, - SnippetsFilters: false, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -99,12 +97,37 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { &ngfAPIv1alpha1.UpstreamSettingsPolicyList{}, }, }, + { + name: "inference extension enabled", + cfg: config.Config{ + GatewayClassName: gcName, + InferenceExtension: true, + }, + expectedObjects: []client.Object{ + &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, + }, + expectedObjectLists: []client.ObjectList{ + &apiv1.ServiceList{}, + &apiv1.SecretList{}, + &apiv1.NamespaceList{}, + &discoveryV1.EndpointSliceList{}, + &gatewayv1.HTTPRouteList{}, + &gatewayv1.GatewayList{}, + &gatewayv1beta1.ReferenceGrantList{}, + &ngfAPIv1alpha2.NginxProxyList{}, + &gatewayv1.GRPCRouteList{}, + partialObjectMetadataList, + &inference.InferencePoolList{}, + &ngfAPIv1alpha1.ClientSettingsPolicyList{}, + &ngfAPIv1alpha2.ObservabilityPolicyList{}, + &ngfAPIv1alpha1.UpstreamSettingsPolicyList{}, + }, + }, { name: "snippets filters enabled", cfg: config.Config{ - GatewayClassName: gcName, - ExperimentalFeatures: false, - SnippetsFilters: true, + GatewayClassName: gcName, + SnippetsFilters: true, }, expectedObjects: []client.Object{ &gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}}, @@ -127,10 +150,11 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { }, }, { - name: "experimental and snippets filters enabled", + name: "experimental, inference, and snippets filters enabled", cfg: config.Config{ GatewayClassName: gcName, ExperimentalFeatures: true, + InferenceExtension: true, SnippetsFilters: true, }, expectedObjects: []client.Object{ @@ -147,6 +171,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) { &gatewayv1beta1.ReferenceGrantList{}, &ngfAPIv1alpha2.NginxProxyList{}, partialObjectMetadataList, + &inference.InferencePoolList{}, &gatewayv1alpha3.BackendTLSPolicyList{}, &gatewayv1alpha2.TLSRouteList{}, &gatewayv1.GRPCRouteList{}, diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js index 8efcd70ece..d4beeb9e15 100644 --- a/internal/controller/nginx/modules/src/epp.js +++ b/internal/controller/nginx/modules/src/epp.js @@ -1,6 +1,6 @@ // This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP). -// TODO (sberman): this module will need to be enhanced to include the following: +// TODO(sberman): this module will need to be enhanced to include the following: // - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP) // - if a user has specified an Exact matching condition for a model name, extract the model name from // the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header diff --git a/internal/controller/state/change_processor.go b/internal/controller/state/change_processor.go index f3184adde8..27a62bb0e5 100644 --- a/internal/controller/state/change_processor.go +++ b/internal/controller/state/change_processor.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -98,6 +99,7 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl { TLSRoutes: make(map[types.NamespacedName]*v1alpha2.TLSRoute), NGFPolicies: make(map[graph.PolicyKey]policies.Policy), SnippetsFilters: make(map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter), + InferencePools: make(map[types.NamespacedName]*inference.InferencePool), } processor := &ChangeProcessorImpl{ @@ -166,6 +168,11 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl { store: newObjectStoreMapAdapter(clusterStore.Services), predicate: funcPredicate{stateChanged: isReferenced}, }, + { + gvk: cfg.MustExtractGVK(&inference.InferencePool{}), + store: newObjectStoreMapAdapter(clusterStore.InferencePools), + predicate: funcPredicate{stateChanged: isReferenced}, + }, { gvk: cfg.MustExtractGVK(&discoveryV1.EndpointSlice{}), store: nil, diff --git a/internal/controller/state/change_processor_test.go b/internal/controller/state/change_processor_test.go index 2d17e6f6e9..44dbdb0613 100644 --- a/internal/controller/state/change_processor_test.go +++ b/internal/controller/state/change_processor_test.go @@ -14,6 +14,7 @@ import ( "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -317,6 +318,7 @@ func createScheme() *runtime.Scheme { utilruntime.Must(apiext.AddToScheme(scheme)) utilruntime.Must(ngfAPIv1alpha1.AddToScheme(scheme)) utilruntime.Must(ngfAPIv1alpha2.AddToScheme(scheme)) + utilruntime.Must(inference.Install(scheme)) return scheme } diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go index 997df80c75..cd10236f92 100644 --- a/internal/controller/state/graph/backend_refs.go +++ b/internal/controller/state/graph/backend_refs.go @@ -15,7 +15,9 @@ import ( ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/sort" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) const ( @@ -57,10 +59,11 @@ func addBackendRefsToRouteRules( routes map[RouteKey]*L7Route, refGrantResolver *referenceGrantResolver, services map[types.NamespacedName]*v1.Service, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy, ) { for _, r := range routes { - addBackendRefsToRules(r, refGrantResolver, services, backendTLSPolicies) + addBackendRefsToRules(r, refGrantResolver, services, referencedInferencePools, backendTLSPolicies) } } @@ -70,6 +73,7 @@ func addBackendRefsToRules( route *L7Route, refGrantResolver *referenceGrantResolver, services map[types.NamespacedName]*v1.Service, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy, ) { if !route.Valid { @@ -99,6 +103,24 @@ func addBackendRefsToRules( } routeNs := route.Source.GetNamespace() + // if we have an InferencePool backend disguised as a Service, set the port value + if ref.IsInferencePool { + namespace := routeNs + if ref.Namespace != nil { + namespace = string(*ref.Namespace) + } + + poolName := types.NamespacedName{ + Name: controller.GetInferencePoolName(string(ref.Name)), + Namespace: namespace, + } + + if pool, exists := referencedInferencePools[poolName]; exists { + port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number) + ref.Port = helpers.GetPointer(port) + } + } + ref, conds := createBackendRef( ref, route, @@ -149,7 +171,14 @@ func createBackendRef( } } - valid, cond := validateRouteBackendRef(ref, route.Source.GetNamespace(), refGrantResolver, refPath) + valid, cond := validateRouteBackendRef( + route.RouteType, + ref, + route.Source.GetNamespace(), + refGrantResolver, + refPath, + ) + if !valid { backendRef := BackendRef{ Weight: weight, @@ -413,6 +442,7 @@ func checkExternalNameValidForGateways( } func validateRouteBackendRef( + routeType RouteType, ref RouteBackendRef, routeNs string, refGrantResolver func(resource toResource) bool, @@ -424,6 +454,10 @@ func validateRouteBackendRef( return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) } + if routeType == RouteTypeHTTP { + return validateBackendRefHTTPRoute(ref, routeNs, refGrantResolver, path) + } + return validateBackendRef(ref.BackendRef, routeNs, refGrantResolver, path) } @@ -475,6 +509,120 @@ func validateBackendRef( return true, conditions.Condition{} } +func validateBackendRefHTTPRoute( + ref RouteBackendRef, + routeNs string, + refGrantResolver func(toResource toResource) bool, + path *field.Path, +) (valid bool, cond conditions.Condition) { + // Because all errors cause same condition but different reasons, we return as soon as we find an error + + if valid, cond := validateBackendRefHTTPRouteGroupKind(ref.BackendRef, path); !valid { + return false, cond + } + + // no need to validate ref.Name + + if ref.Namespace != nil && string(*ref.Namespace) != routeNs { + var inferencePool bool + var inferencePoolName types.NamespacedName + + switch { + case ref.Kind != nil && *ref.Kind == kinds.InferencePool: + inferencePool = true + inferencePoolName = types.NamespacedName{ + Namespace: string(*ref.Namespace), + Name: string(ref.Name), + } + case ref.IsInferencePool: + // Case where RouteBackendRef has been updated with headless Service backend for the InferencePool + inferencePool = true + inferencePoolName = types.NamespacedName{ + Namespace: string(*ref.Namespace), + Name: controller.GetInferencePoolName(string(ref.Name)), + } + default: + refNsName := types.NamespacedName{Namespace: string(*ref.Namespace), Name: string(ref.Name)} + + if !refGrantResolver(toService(refNsName)) { + msg := fmt.Sprintf("Backend ref to Service %s not permitted by any ReferenceGrant", refNsName) + valErr := field.Forbidden(path.Child("namespace"), msg) + + return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error()) + } + } + + if inferencePool { + if !refGrantResolver(toInferencePool(inferencePoolName)) { + msg := fmt.Sprintf( + "Backend ref to InferencePool %s not permitted by any ReferenceGrant", + inferencePoolName, + ) + valErr := field.Forbidden(path.Child("namespace"), msg) + return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error()) + } + } + } + + if ref.Port == nil && (ref.Kind == nil || *ref.Kind == kinds.Service) { + valErr := field.Required(path.Child("port"), "port cannot be nil") + return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) + } + + // any value of port is OK + + if ref.Weight != nil { + if err := validateWeight(*ref.Weight); err != nil { + valErr := field.Invalid(path.Child("weight"), *ref.Weight, err.Error()) + return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error()) + } + } + + return true, conditions.Condition{} +} + +func validateBackendRefHTTPRouteGroupKind( + ref gatewayv1.BackendRef, + path *field.Path, +) (bool, conditions.Condition) { + if ref.Group != nil { + group := *ref.Group + if group != "core" && group != "" && group != inferenceAPIGroup { + valErr := field.NotSupported(path.Child("group"), group, []string{"core", "", inferenceAPIGroup}) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + if group == inferenceAPIGroup { + if ref.Kind == nil || *ref.Kind != kinds.InferencePool { + valErr := field.Invalid( + path.Child("kind"), + ref.Kind, + fmt.Sprintf("kind must be InferencePool when group is %s", inferenceAPIGroup), + ) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + } + } + + if ref.Kind != nil { + kind := *ref.Kind + if kind != kinds.Service && kind != kinds.InferencePool { + valErr := field.NotSupported(path.Child("kind"), kind, []string{kinds.Service, kinds.InferencePool}) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + if kind == kinds.InferencePool { + if ref.Group == nil || *ref.Group != inferenceAPIGroup { + valErr := field.Invalid( + path.Child("group"), + ref.Group, + fmt.Sprintf("group must be %s when kind is InferencePool", inferenceAPIGroup), + ) + return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error()) + } + } + } + return true, conditions.Condition{} +} + // validateRouteBackendRefAppProtocol checks if a given RouteType supports sending traffic to a service AppProtocol. // Returns nil if true or AppProtocol is not a Kubernetes Standard Application Protocol. func validateRouteBackendRefAppProtocol( diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go index 0d3e74a1dd..08b00723aa 100644 --- a/internal/controller/state/graph/backend_refs_test.go +++ b/internal/controller/state/graph/backend_refs_test.go @@ -11,13 +11,16 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) func getNormalRef() gatewayv1.BackendRef { @@ -36,16 +39,46 @@ func getModifiedRef(mod func(ref gatewayv1.BackendRef) gatewayv1.BackendRef) gat return mod(getNormalRef()) } +func getNormalRouteBackendRef() RouteBackendRef { + return RouteBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind]("Service"), + Name: "service1", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + Port: helpers.GetPointer[gatewayv1.PortNumber](80), + }, + Weight: helpers.GetPointer[int32](5), + }, + } +} + +func getModifiedRouteBackendRef(mod func(ref RouteBackendRef) RouteBackendRef) RouteBackendRef { + return mod(getNormalRouteBackendRef()) +} + func TestValidateRouteBackendRef(t *testing.T) { t.Parallel() + tests := []struct { + routeType RouteType expectedCondition conditions.Condition name string ref RouteBackendRef expectedValid bool }{ { - name: "normal case", + name: "normal case", + routeType: RouteTypeHTTP, + ref: RouteBackendRef{ + BackendRef: getNormalRef(), + Filters: nil, + }, + expectedValid: true, + }, + { + name: "normal case grpc", + routeType: RouteTypeGRPC, ref: RouteBackendRef{ BackendRef: getNormalRef(), Filters: nil, @@ -53,7 +86,35 @@ func TestValidateRouteBackendRef(t *testing.T) { expectedValid: true, }, { - name: "filters not supported", + name: "normal case; inferencepool backend", + routeType: RouteTypeHTTP, + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.BackendObjectReference = gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: "ipool", + } + return backend + }), + }, + expectedValid: true, + }, + { + name: "normal case; headless Service inferencepool backend", + routeType: RouteTypeHTTP, + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")) + return backend + }), + IsInferencePool: true, + }, + expectedValid: true, + }, + { + name: "filters not supported", + routeType: RouteTypeHTTP, ref: RouteBackendRef{ BackendRef: getNormalRef(), Filters: []any{ @@ -70,7 +131,8 @@ func TestValidateRouteBackendRef(t *testing.T) { ), }, { - name: "invalid base ref", + name: "invalid base ref", + routeType: RouteTypeHTTP, ref: RouteBackendRef{ BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") @@ -79,7 +141,7 @@ func TestValidateRouteBackendRef(t *testing.T) { }, expectedValid: false, expectedCondition: conditions.NewRouteBackendRefInvalidKind( - `test.kind: Unsupported value: "NotService": supported values: "Service"`, + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, } @@ -90,7 +152,13 @@ func TestValidateRouteBackendRef(t *testing.T) { g := NewWithT(t) alwaysTrueRefGrantResolver := func(_ toResource) bool { return true } - valid, cond := validateRouteBackendRef(test.ref, "test", alwaysTrueRefGrantResolver, field.NewPath("test")) + valid, cond := validateRouteBackendRef( + test.routeType, + test.ref, + "test", + alwaysTrueRefGrantResolver, + field.NewPath("test"), + ) g.Expect(valid).To(Equal(test.expectedValid)) g.Expect(cond).To(Equal(test.expectedCondition)) @@ -156,7 +224,7 @@ func TestValidateBackendRef(t *testing.T) { ), }, { - name: "not a service kind", + name: "invalid kind", ref: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") return backend @@ -218,6 +286,209 @@ func TestValidateBackendRef(t *testing.T) { } } +func TestValidateBackendRefHTTPRoute(t *testing.T) { + t.Parallel() + + alwaysFalseRefGrantResolver := func(_ toResource) bool { return false } + alwaysTrueRefGrantResolver := func(_ toResource) bool { return true } + + tests := []struct { + refGrantResolver func(resource toResource) bool + expectedCondition conditions.Condition + name string + ref RouteBackendRef + expectedValid bool + }{ + { + name: "normal case", + ref: getNormalRouteBackendRef(), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with implicit namespace", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with implicit kind Service", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "normal case with InferencePool", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "group is inference group but kind is not InferencePool", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.Service) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.kind: Invalid value: "Service": kind must be InferencePool when group is inference.networking.k8s.io`, + ), + }, + { + name: "kind is InferencePool but group is not inference", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + backend.Group = helpers.GetPointer[gatewayv1.Group]("core") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.group: Invalid value: "core": group must be inference.networking.k8s.io when kind is InferencePool`, + ), + }, + { + name: "normal case with backend ref allowed by reference grant", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("cross-ns") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + { + name: "inferencepool backend ref not allowed by reference grant", + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.BackendObjectReference = gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: "ipool", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("invalid"), + } + return backend + }), + }, + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant", + ), + }, + { + name: "headless Service inferencepool backend ref not allowed by reference grant", + ref: RouteBackendRef{ + BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef { + backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")) + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid") + return backend + }), + IsInferencePool: true, + }, + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant", + ), + }, + { + name: "invalid group", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Group = helpers.GetPointer[gatewayv1.Group]("invalid") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.group: Unsupported value: "invalid": supported values: "core", "", "inference.networking.k8s.io"`, + ), + }, + { + name: "invalid kind", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService") + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefInvalidKind( + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, + ), + }, + { + name: "backend ref not allowed by reference grant", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid") + return backend + }), + refGrantResolver: alwaysFalseRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefRefNotPermitted( + "test.namespace: Forbidden: Backend ref to Service invalid/service1 not permitted by any ReferenceGrant", + ), + }, + { + name: "invalid weight", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Weight = helpers.GetPointer[int32](-1) + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefUnsupportedValue( + "test.weight: Invalid value: -1: must be in the range [0, 1000000]", + ), + }, + { + name: "nil port", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Port = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: false, + expectedCondition: conditions.NewRouteBackendRefUnsupportedValue( + "test.port: Required value: port cannot be nil", + ), + }, + { + name: "nil port allowed for InferencePool kind", + ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef { + backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool) + backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup) + backend.Port = nil + return backend + }), + refGrantResolver: alwaysTrueRefGrantResolver, + expectedValid: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + valid, cond := validateBackendRefHTTPRoute(test.ref, "test", test.refGrantResolver, field.NewPath("test")) + + g.Expect(valid).To(Equal(test.expectedValid)) + g.Expect(cond).To(Equal(test.expectedCondition)) + }) + } +} + func TestValidateWeight(t *testing.T) { t.Parallel() validWeights := []int32{0, 1, 1000000} @@ -523,13 +794,21 @@ func TestAddBackendRefsToRules(t *testing.T) { Name: "svcGRPC", } + svcInferenceName := controller.CreateInferencePoolServiceName("ipool") + svcInference := getSvc(svcInferenceName) + svcInferenceNsName := types.NamespacedName{ + Namespace: "test", + Name: svcInferenceName, + } + services := map[types.NamespacedName]*v1.Service{ - {Namespace: "test", Name: "svc1"}: svc1, - {Namespace: "test", Name: "svc2"}: svc2, - {Namespace: "test", Name: "svcH2c"}: svcH2c, - {Namespace: "test", Name: "svcWS"}: svcWS, - {Namespace: "test", Name: "svcWSS"}: svcWSS, - {Namespace: "test", Name: "svcGRPC"}: svcGRPC, + svc1NsName: svc1, + svc2NsName: svc2, + svcH2cNsName: svcH2c, + svcWSNsName: svcWS, + svcWSSNsName: svcWSS, + svcGRPCNsName: svcGRPC, + svcInferenceNsName: svcInference, } emptyPolicies := map[types.NamespacedName]*BackendTLSPolicy{} @@ -892,7 +1171,7 @@ func TestAddBackendRefsToRules(t *testing.T) { }, expectedConditions: []conditions.Condition{ conditions.NewRouteBackendRefInvalidKind( - `spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service"`, + `spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, policies: emptyPolicies, @@ -938,6 +1217,29 @@ func TestAddBackendRefsToRules(t *testing.T) { expectedConditions: nil, name: "zero backendRefs", }, + { + route: func() *L7Route { + route := createRoute("hr-inference", RouteTypeHTTP, "Service", 1, svcInferenceName) + // Mark the backend ref as IsInferencePool and set the port to nil (simulate InferencePool logic) + route.Spec.Rules[0].RouteBackendRefs[0].IsInferencePool = true + route.Spec.Rules[0].RouteBackendRefs[0].Port = nil + return route + }(), + expectedBackendRefs: []BackendRef{ + { + SvcNsName: types.NamespacedName{Namespace: "test", Name: svcInferenceName}, + ServicePort: v1.ServicePort{ + Port: 80, + }, + Valid: true, + Weight: 1, + InvalidForGateways: map[types.NamespacedName]conditions.Condition{}, + }, + }, + expectedConditions: nil, + policies: emptyPolicies, + name: "headless Service for InferencePool gets port set correctly", + }, } for _, test := range tests { @@ -946,7 +1248,22 @@ func TestAddBackendRefsToRules(t *testing.T) { g := NewWithT(t) resolver := newReferenceGrantResolver(nil) - addBackendRefsToRules(test.route, resolver, services, test.policies) + + referencedInferencePools := map[types.NamespacedName]*ReferencedInferencePool{ + {Namespace: "test", Name: "ipool"}: { + Source: &inference.InferencePool{ + Spec: inference.InferencePoolSpec{ + TargetPorts: []inference.Port{ + { + Number: 80, + }, + }, + }, + }, + }, + } + + addBackendRefsToRules(test.route, resolver, services, referencedInferencePools, test.policies) var actual []BackendRef if test.route.Spec.Rules != nil { @@ -1169,7 +1486,7 @@ func TestCreateBackend(t *testing.T) { expectedServicePortReference: "", expectedConditions: []conditions.Condition{ conditions.NewRouteBackendRefInvalidKind( - `test.kind: Unsupported value: "NotService": supported values: "Service"`, + `test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`, ), }, name: "invalid kind", @@ -1403,11 +1720,13 @@ func TestCreateBackend(t *testing.T) { g := NewWithT(t) rbr := RouteBackendRef{ - nil, - test.ref.BackendRef, - []any{}, + MirrorBackendIdx: nil, + IsInferencePool: false, + BackendRef: test.ref.BackendRef, + Filters: []any{}, } route := &L7Route{ + RouteType: RouteTypeHTTP, Source: &gatewayv1.HTTPRoute{ ObjectMeta: metav1.ObjectMeta{ Namespace: "test", @@ -1467,12 +1786,14 @@ func TestCreateBackend(t *testing.T) { // test mirror backend case g := NewWithT(t) ref := RouteBackendRef{ - helpers.GetPointer(0), // mirrorFilterIdx - getNormalRef(), - []any{}, + MirrorBackendIdx: helpers.GetPointer(0), + IsInferencePool: false, + BackendRef: getNormalRef(), + Filters: []any{}, } route := &L7Route{ + RouteType: RouteTypeHTTP, Source: &gatewayv1.HTTPRoute{ ObjectMeta: metav1.ObjectMeta{ Namespace: "test", diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go index e556c798ba..b5e13991e9 100644 --- a/internal/controller/state/graph/graph.go +++ b/internal/controller/state/graph/graph.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -40,6 +41,7 @@ type ClusterState struct { GRPCRoutes map[types.NamespacedName]*gatewayv1.GRPCRoute NGFPolicies map[PolicyKey]policies.Policy SnippetsFilters map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter + InferencePools map[types.NamespacedName]*inference.InferencePool } // Graph is a Graph-like representation of Gateway API resources. @@ -65,6 +67,9 @@ type Graph struct { ReferencedNamespaces map[types.NamespacedName]*v1.Namespace // ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one Route. ReferencedServices map[types.NamespacedName]*ReferencedService + // ReferencedInferencePools includes the NamespacedNames of all the InferencePools + // that are referenced by at least one Route. + ReferencedInferencePools map[types.NamespacedName]*ReferencedInferencePool // ReferencedCaCertConfigMaps includes ConfigMaps that have been referenced by any BackendTLSPolicies. ReferencedCaCertConfigMaps map[types.NamespacedName]*CaCertConfigMap // ReferencedNginxProxies includes NginxProxies that have been referenced by a GatewayClass or a Gateway. @@ -115,11 +120,15 @@ func (g *Graph) IsReferenced(resourceType ngftypes.ObjectType, nsname types.Name _, existed := g.ReferencedNamespaces[nsname] exists := isNamespaceReferenced(obj, g.Gateways) return existed || exists - // Service reference exists if at least one HTTPRoute references it. + // Service reference exists if at least one Route references it. case *v1.Service: _, exists := g.ReferencedServices[nsname] return exists - // EndpointSlice reference exists if its Service owner is referenced by at least one HTTPRoute. + // InferencePool reference exists if at least one Route references it. + case *inference.InferencePool: + _, exists := g.ReferencedInferencePools[nsname] + return exists + // EndpointSlice reference exists if its Service owner is referenced by at least one Route. case *discoveryV1.EndpointSlice: svcName := index.GetServiceNameFromEndpointSlice(obj) @@ -249,7 +258,9 @@ func BuildGraph( state.GRPCRoutes, gws, processedSnippetsFilters, + state.InferencePools, ) + referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools) l4routes := buildL4RoutesForGateways( state.TLSRoutes, @@ -262,6 +273,7 @@ func BuildGraph( routes, refGrantResolver, state.Services, + referencedInferencePools, processedBackendTLSPolicies, ) bindRoutesToListeners(routes, l4routes, gws, state.Namespaces) @@ -295,6 +307,7 @@ func BuildGraph( ReferencedSecrets: secretResolver.getResolvedSecrets(), ReferencedNamespaces: referencedNamespaces, ReferencedServices: referencedServices, + ReferencedInferencePools: referencedInferencePools, ReferencedCaCertConfigMaps: configMapResolver.getResolvedConfigMaps(), ReferencedNginxProxies: processedNginxProxies, BackendTLSPolicies: processedBackendTLSPolicies, diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go index 46802f3391..30d9532c3c 100644 --- a/internal/controller/state/graph/graph_test.go +++ b/internal/controller/state/graph/graph_test.go @@ -13,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" "sigs.k8s.io/gateway-api/apis/v1alpha3" @@ -25,6 +26,7 @@ import ( "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation/validationfakes" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller/index" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" @@ -213,6 +215,44 @@ func TestBuildGraph(t *testing.T) { return rule } + createValidRuleWithInferencePoolBackendRef := func(matches []gatewayv1.HTTPRouteMatch) RouteRule { + refs := []BackendRef{ + { + SvcNsName: types.NamespacedName{ + Namespace: testNs, + Name: controller.CreateInferencePoolServiceName("ipool"), + }, + ServicePort: v1.ServicePort{Port: 80}, + Valid: true, + Weight: 1, + InvalidForGateways: map[types.NamespacedName]conditions.Condition{}, + }, + } + rbrs := []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](""), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")), + Namespace: helpers.GetPointer(gatewayv1.Namespace(testNs)), + }, + }, + }, + } + return RouteRule{ + ValidMatches: true, + Filters: RouteRuleFilters{ + Filters: []Filter{}, + Valid: true, + }, + BackendRefs: refs, + Matches: matches, + RouteBackendRefs: rbrs, + } + } + routeMatches := []gatewayv1.HTTPRouteMatch{ { Path: &gatewayv1.HTTPPathMatch{ @@ -337,6 +377,32 @@ func TestBuildGraph(t *testing.T) { }, } + inferencePool := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNs, + Name: "ipool", + }, + Spec: inference.InferencePoolSpec{ + TargetPorts: []inference.Port{ + {Number: 80}, + }, + }, + } + + ir := createRoute("ir", "gateway-1", "listener-80-1") + ir.Spec.Hostnames = []gatewayv1.Hostname{"inference.example.com"} + // Update the backend ref to point to the InferencePool instead of a Service + ir.Spec.Rules[0].BackendRefs[0] = gatewayv1.HTTPBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Name: gatewayv1.ObjectName(inferencePool.Name), + Namespace: helpers.GetPointer(gatewayv1.Namespace(inferencePool.Namespace)), + }, + }, + } + secret := &v1.Secret{ TypeMeta: metav1.TypeMeta{ Kind: "Secret", @@ -488,7 +554,20 @@ func TestBuildGraph(t *testing.T) { svc1 := &v1.Service{ ObjectMeta: metav1.ObjectMeta{ - Namespace: "test", Name: "foo2", + Namespace: testNs, Name: "foo2", + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Port: 80, + }, + }, + }, + } + + inferenceSvc := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNs, Name: controller.CreateInferencePoolServiceName(inferencePool.Name), }, Spec: v1.ServiceSpec{ Ports: []v1.ServicePort{ @@ -690,6 +769,7 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(hr1): hr1, client.ObjectKeyFromObject(hr2): hr2, client.ObjectKeyFromObject(hr3): hr3, + client.ObjectKeyFromObject(ir): ir, }, TLSRoutes: map[types.NamespacedName]*v1alpha2.TLSRoute{ client.ObjectKeyFromObject(tr): tr, @@ -699,8 +779,12 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(gr): gr, }, Services: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(svc): svc, - client.ObjectKeyFromObject(svc1): svc1, + client.ObjectKeyFromObject(svc): svc, + client.ObjectKeyFromObject(svc1): svc1, + client.ObjectKeyFromObject(inferenceSvc): inferenceSvc, + }, + InferencePools: map[types.NamespacedName]*inference.InferencePool{ + client.ObjectKeyFromObject(inferencePool): inferencePool, }, Namespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(ns): ns, @@ -991,6 +1075,37 @@ func TestBuildGraph(t *testing.T) { }, } + inferenceRoute := &L7Route{ + RouteType: RouteTypeHTTP, + Valid: true, + Attachable: true, + Source: ir, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: &ParentRefGateway{ + NamespacedName: client.ObjectKeyFromObject(gw1.Source), + EffectiveNginxProxy: np1Effective, + }, + SectionName: ir.Spec.ParentRefs[0].SectionName, + Attachment: &ParentRefAttachmentStatus{ + Attached: true, + AcceptedHostnames: map[string][]string{ + CreateGatewayListenerKey( + client.ObjectKeyFromObject(gw1.Source), + "listener-80-1", + ): {"inference.example.com"}, + }, + ListenerPort: 80, + }, + }, + }, + Spec: L7RouteSpec{ + Hostnames: ir.Spec.Hostnames, + Rules: []RouteRule{createValidRuleWithInferencePoolBackendRef(routeMatches)}, + }, + } + supportedKindsForListeners := []gatewayv1.RouteGroupKind{ {Kind: gatewayv1.Kind(kinds.HTTPRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)}, {Kind: gatewayv1.Kind(kinds.GRPCRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)}, @@ -1020,6 +1135,7 @@ func TestBuildGraph(t *testing.T) { Routes: map[RouteKey]*L7Route{ CreateRouteKey(hr1): routeHR1, CreateRouteKey(gr): routeGR, + CreateRouteKey(ir): inferenceRoute, }, SupportedKinds: supportedKindsForListeners, L4Routes: map[L4RouteKey]*L4Route{}, @@ -1174,6 +1290,7 @@ func TestBuildGraph(t *testing.T) { CreateRouteKey(hr1): routeHR1, CreateRouteKey(hr3): routeHR3, CreateRouteKey(gr): routeGR, + CreateRouteKey(ir): inferenceRoute, }, L4Routes: map[L4RouteKey]*L4Route{ CreateRouteKeyL4(tr): routeTR, @@ -1198,6 +1315,14 @@ func TestBuildGraph(t *testing.T) { client.ObjectKeyFromObject(svc1): { GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}}, }, + client.ObjectKeyFromObject(inferenceSvc): { + GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}}, + }, + }, + ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{ + client.ObjectKeyFromObject(inferencePool): { + Source: inferencePool, + }, }, ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{ client.ObjectKeyFromObject(cm): { @@ -1381,6 +1506,20 @@ func TestIsReferenced(t *testing.T) { } emptyService := &v1.Service{} + inferenceInGraph := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "inferenceInGraph", + }, + } + inferenceNotInGraph := &inference.InferencePool{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "inferenceNotInGraph", + }, + } + emptyInferencePool := &inference.InferencePool{} + createEndpointSlice := func(name string, svcName string) *discoveryV1.EndpointSlice { return &discoveryV1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ @@ -1460,6 +1599,9 @@ func TestIsReferenced(t *testing.T) { ReferencedServices: map[types.NamespacedName]*ReferencedService{ client.ObjectKeyFromObject(serviceInGraph): {}, }, + ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{ + client.ObjectKeyFromObject(inferenceInGraph): {}, + }, ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{ client.ObjectKeyFromObject(baseConfigMap): { Source: baseConfigMap, @@ -1561,6 +1703,26 @@ func TestIsReferenced(t *testing.T) { expected: false, }, + // InferencePool tests + { + name: "InferencePool is referenced", + resource: inferenceInGraph, + graph: graph, + expected: true, + }, + { + name: "InferencePool is not referenced", + resource: inferenceNotInGraph, + graph: graph, + expected: false, + }, + { + name: "Empty InferencePool", + resource: emptyInferencePool, + graph: graph, + expected: false, + }, + // EndpointSlice tests { name: "EndpointSlice with Service owner in graph's ReferencedServices is referenced", diff --git a/internal/controller/state/graph/grpcroute_test.go b/internal/controller/state/graph/grpcroute_test.go index 8579c54627..71f87d58c0 100644 --- a/internal/controller/state/graph/grpcroute_test.go +++ b/internal/controller/state/graph/grpcroute_test.go @@ -230,6 +230,7 @@ func TestBuildGRPCRoutes(t *testing.T) { grRoutes, test.gateways, snippetsFilters, + nil, ) g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty()) }) diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go index 48415d0573..de7a85370d 100644 --- a/internal/controller/state/graph/httproute.go +++ b/internal/controller/state/graph/httproute.go @@ -7,13 +7,16 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/mirror" "github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) var ( @@ -27,6 +30,7 @@ func buildHTTPRoute( ghr *v1.HTTPRoute, gws map[types.NamespacedName]*Gateway, snippetsFilters map[types.NamespacedName]*SnippetsFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) *L7Route { r := &L7Route{ Source: ghr, @@ -59,9 +63,10 @@ func buildHTTPRoute( r.Attachable = true rules, valid, conds := processHTTPRouteRules( - ghr.Spec.Rules, + ghr, validator, getSnippetsFilterResolverForNamespace(snippetsFilters, r.Source.GetNamespace()), + inferencePools, ) r.Spec.Rules = rules @@ -113,6 +118,7 @@ func buildHTTPMirrorRoutes( tmpMirrorRoute, gateways, snippetsFilters, + nil, ) if mirrorRoute != nil { @@ -163,9 +169,11 @@ func removeHTTPMirrorFilters(filters []v1.HTTPRouteFilter) []v1.HTTPRouteFilter func processHTTPRouteRule( specRule v1.HTTPRouteRule, + routeNamespace string, rulePath *field.Path, validator validation.HTTPFieldsValidator, resolveExtRefFunc resolveExtRefFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) (RouteRule, routeRuleErrors) { var errors routeRuleErrors @@ -201,10 +209,32 @@ func processHTTPRouteRule( interfaceFilters = append(interfaceFilters, filter) } } - rbr := RouteBackendRef{ - BackendRef: b.BackendRef, - Filters: interfaceFilters, + + var rbr RouteBackendRef + // If route specifies an InferencePool backend, we need to convert it to its associated + // headless Service backend (that we created), so nginx config can be built properly. + // Only do this if the InferencePool actually exists. + if inferencePoolBackend(b, routeNamespace, inferencePools) { + svcName := controller.CreateInferencePoolServiceName(string(b.Name)) + rbr = RouteBackendRef{ + IsInferencePool: true, + BackendRef: v1.BackendRef{ + BackendObjectReference: v1.BackendObjectReference{ + Group: helpers.GetPointer[v1.Group](""), + Kind: helpers.GetPointer[v1.Kind](kinds.Service), + Name: v1.ObjectName(svcName), + Namespace: b.Namespace, + }, + Weight: b.Weight, + }, + } + } else { + rbr = RouteBackendRef{ + BackendRef: b.BackendRef, + } } + + rbr.Filters = interfaceFilters backendRefs = append(backendRefs, rbr) } @@ -233,25 +263,28 @@ func processHTTPRouteRule( } func processHTTPRouteRules( - specRules []v1.HTTPRouteRule, + route *v1.HTTPRoute, validator validation.HTTPFieldsValidator, resolveExtRefFunc resolveExtRefFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) (rules []RouteRule, valid bool, conds []conditions.Condition) { - rules = make([]RouteRule, len(specRules)) + rules = make([]RouteRule, len(route.Spec.Rules)) var ( allRulesErrors routeRuleErrors atLeastOneValid bool ) - for i, rule := range specRules { + for i, rule := range route.Spec.Rules { rulePath := field.NewPath("spec").Child("rules").Index(i) rr, errors := processHTTPRouteRule( rule, + route.GetNamespace(), rulePath, validator, resolveExtRefFunc, + inferencePools, ) if rr.ValidMatches && rr.Filters.Valid { @@ -288,6 +321,32 @@ func processHTTPRouteRules( return rules, valid, conds } +// inferencePoolBackend returns if a Route references an InferencePool backend +// and that InferencePool exists. +func inferencePoolBackend( + backendRef v1.HTTPBackendRef, + routeNamespace string, + inferencePools map[types.NamespacedName]*inference.InferencePool, +) bool { + if backendRef.Group != nil && + *backendRef.Group == inferenceAPIGroup && + *backendRef.Kind == kinds.InferencePool { + namespace := routeNamespace + if backendRef.Namespace != nil { + namespace = string(*backendRef.Namespace) + } + key := types.NamespacedName{ + Name: string(backendRef.Name), + Namespace: namespace, + } + if _, exists := inferencePools[key]; exists { + return true + } + } + + return false +} + func validateMatch( validator validation.HTTPFieldsValidator, match v1.HTTPRouteMatch, diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go index 3b90b0970f..0e06e5bf7e 100644 --- a/internal/controller/state/graph/httproute_test.go +++ b/internal/controller/state/graph/httproute_test.go @@ -9,6 +9,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1" @@ -117,6 +118,17 @@ var expRouteBackendRef = RouteBackendRef{ }, } +func createInferencePoolBackend(name, namespace string) gatewayv1.BackendRef { + return gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + Name: gatewayv1.ObjectName(name), + Namespace: helpers.GetPointer(gatewayv1.Namespace(namespace)), + }, + } +} + func TestBuildHTTPRoutes(t *testing.T) { t.Parallel() @@ -263,6 +275,7 @@ func TestBuildHTTPRoutes(t *testing.T) { map[types.NamespacedName]*gatewayv1.GRPCRoute{}, test.gateways, snippetsFilters, + nil, ) g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty()) }) @@ -377,6 +390,21 @@ func TestBuildHTTPRoute(t *testing.T) { addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", invalidSnippetsFilterExtRef) addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", unresolvableSnippetsFilterExtRef) + // routes with an inference pool backend + hrInferencePool := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/") + hrInferencePool.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool", gatewayNsName.Namespace), + }, + } + // route with an inference pool backend that does not exist + hrInferencePoolDoesNotExist := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/") + hrInferencePoolDoesNotExist.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace), + }, + } + validatorInvalidFieldsInRule := &validationfakes.FakeHTTPFieldsValidator{ ValidatePathInMatchStub: func(path string) error { if path == invalidPath { @@ -943,6 +971,86 @@ func TestBuildHTTPRoute(t *testing.T) { }, name: "rule with one invalid and one unresolvable snippets filter extension ref filter", }, + { + validator: &validationfakes.FakeHTTPFieldsValidator{}, + hr: hrInferencePool, + expected: &L7Route{ + RouteType: RouteTypeHTTP, + Source: hrInferencePool, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: CreateParentRefGateway(gw), + SectionName: hrInferencePool.Spec.ParentRefs[0].SectionName, + }, + }, + Valid: true, + Attachable: true, + Spec: L7RouteSpec{ + Hostnames: hrInferencePool.Spec.Hostnames, + Rules: []RouteRule{ + { + ValidMatches: true, + Filters: RouteRuleFilters{ + Valid: true, + Filters: []Filter{}, + }, + Matches: hrInferencePool.Spec.Rules[0].Matches, + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Group: helpers.GetPointer[gatewayv1.Group](""), + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: "ipool-pool-svc", + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + }, + }, + }, + }, + }, + }, + }, + }, + name: "route with an inference pool backend gets converted to service", + }, + { + validator: &validationfakes.FakeHTTPFieldsValidator{}, + hr: hrInferencePoolDoesNotExist, + expected: &L7Route{ + RouteType: RouteTypeHTTP, + Source: hrInferencePoolDoesNotExist, + ParentRefs: []ParentRef{ + { + Idx: 0, + Gateway: CreateParentRefGateway(gw), + SectionName: hrInferencePoolDoesNotExist.Spec.ParentRefs[0].SectionName, + }, + }, + Valid: true, + Attachable: true, + Spec: L7RouteSpec{ + Hostnames: hrInferencePoolDoesNotExist.Spec.Hostnames, + Rules: []RouteRule{ + { + ValidMatches: true, + Filters: RouteRuleFilters{ + Valid: true, + Filters: []Filter{}, + }, + Matches: hrInferencePoolDoesNotExist.Spec.Rules[0].Matches, + RouteBackendRefs: []RouteBackendRef{ + { + BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace), + }, + }, + }, + }, + }, + }, + name: "route with an inference pool backend that doesn't exist", + }, } gws := map[types.NamespacedName]*Gateway{ @@ -957,8 +1065,11 @@ func TestBuildHTTPRoute(t *testing.T) { snippetsFilters := map[types.NamespacedName]*SnippetsFilter{ {Namespace: "test", Name: "sf"}: {Valid: true}, } + inferencePools := map[types.NamespacedName]*inference.InferencePool{ + {Namespace: "test", Name: "ipool"}: {}, + } - route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters) + route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters, inferencePools) g.Expect(helpers.Diff(test.expected, route)).To(BeEmpty()) }) } @@ -1090,7 +1201,7 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) { g := NewWithT(t) routes := map[RouteKey]*L7Route{} - l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters) + l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters, nil) g.Expect(l7route).NotTo(BeNil()) buildHTTPMirrorRoutes(routes, l7route, hr, gateways, snippetsFilters) diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go new file mode 100644 index 0000000000..ada688bcc5 --- /dev/null +++ b/internal/controller/state/graph/inferencepools.go @@ -0,0 +1,82 @@ +package graph + +import ( + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" +) + +// A ReferencedInferencePool represents an InferencePool that is referenced by a Route and the +// Gateways it belongs to. +type ReferencedInferencePool struct { + // Source is the original InferencePool that this ReferencedInferencePool is based on. + Source *inference.InferencePool +} + +// buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes +// per Gateway that we process. +func buildReferencedInferencePools( + routes map[RouteKey]*L7Route, + gws map[types.NamespacedName]*Gateway, + inferencePools map[types.NamespacedName]*inference.InferencePool, +) map[types.NamespacedName]*ReferencedInferencePool { + referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool) + + for _, gw := range gws { + if gw == nil { + continue + } + + processInferencePoolsForGateway(routes, gw, referencedInferencePools, inferencePools) + } + + if len(referencedInferencePools) == 0 { + return nil + } + + return referencedInferencePools +} + +// processInferencePoolsForGateway processes all InferencePools that belong to the given gateway. +func processInferencePoolsForGateway( + routes map[RouteKey]*L7Route, + gw *Gateway, + referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool, + inferencePools map[types.NamespacedName]*inference.InferencePool, +) { + gwKey := client.ObjectKeyFromObject(gw.Source) + for _, route := range routes { + if !route.Valid || !routeBelongsToGateway(route.ParentRefs, gwKey) { + continue + } + + for _, rule := range route.Spec.Rules { + for _, ref := range rule.RouteBackendRefs { + if !ref.IsInferencePool && (ref.Kind == nil || *ref.Kind != kinds.InferencePool) { + continue + } + + namespace := route.Source.GetNamespace() + if ref.Namespace != nil { + namespace = string(*ref.Namespace) + } + + poolName := types.NamespacedName{ + Name: controller.GetInferencePoolName(string(ref.Name)), + Namespace: namespace, + } + + if _, referenced := referencedInferencePools[poolName]; !referenced { + referencedInferencePools[poolName] = &ReferencedInferencePool{} + } + + if pool, exists := inferencePools[poolName]; exists { + referencedInferencePools[poolName].Source = pool + } + } + } + } +} diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go new file mode 100644 index 0000000000..d67331b5e7 --- /dev/null +++ b/internal/controller/state/graph/inferencepools_test.go @@ -0,0 +1,249 @@ +package graph + +import ( + "testing" + + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers" + "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" +) + +func TestBuildReferencedInferencePools(t *testing.T) { + t.Parallel() + + gwNsName := types.NamespacedName{Namespace: "test", Name: "gwNsname"} + gws := map[types.NamespacedName]*Gateway{ + gwNsName: { + Source: &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: gwNsName.Namespace, + Name: gwNsName.Name, + }, + }, + }, + } + + getNormalRoute := func() *L7Route { + return &L7Route{ + Source: &gatewayv1.HTTPRoute{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test", + Name: "valid-route", + }, + }, + ParentRefs: []ParentRef{ + { + Gateway: &ParentRefGateway{NamespacedName: gwNsName}, + }, + }, + Valid: true, + Spec: L7RouteSpec{ + Rules: []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + Name: "pool", + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool), + }, + }, + }, + }, + }, + }, + }, + } + } + + getModifiedRoute := func(mod func(route *L7Route) *L7Route) *L7Route { + return mod(getNormalRoute()) + } + + validRoute := getNormalRoute() + + invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route { + route.Valid = false + return route + }) + + tests := []struct { + routes map[RouteKey]*L7Route + gws map[types.NamespacedName]*Gateway + inferencePools map[types.NamespacedName]*inference.InferencePool + expPools map[types.NamespacedName]*ReferencedInferencePool + name string + }{ + { + name: "no gateways", + gws: nil, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: nil, + }, + { + name: "invalid route", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): invalidRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: nil, + }, + { + name: "valid route with referenced inferencepool", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + }, + }, + { + name: "route with service backend", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules = []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + }, + }, + }, + }, + }, + } + return route + }), + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: nil, + }, + { + name: "route with both inferencepool and service backends", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs, + RouteBackendRef{ + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + }, + }, + }, + ) + return route + }), + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + }, + }, + { + name: "route with headless InferencePool Service backend", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules = []RouteRule{ + { + RouteBackendRefs: []RouteBackendRef{ + { + IsInferencePool: true, + BackendRef: gatewayv1.BackendRef{ + BackendObjectReference: gatewayv1.BackendObjectReference{ + Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service), + Name: gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")), + Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"), + }, + }, + }, + }, + }, + } + return route + }), + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + }, + }, + { + name: "inferencepool backend with no namespace uses route namespace", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route { + route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil + return route + }), + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{ + {Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}}, + }, + }, + }, + { + name: "referenced inferencepool does not exist", + gws: gws, + routes: map[RouteKey]*L7Route{ + CreateRouteKey(validRoute.Source): validRoute, + }, + inferencePools: map[types.NamespacedName]*inference.InferencePool{}, + expPools: map[types.NamespacedName]*ReferencedInferencePool{ + {Name: "pool", Namespace: "test"}: { + Source: nil, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools) + g.Expect(pools).To(Equal(test.expPools)) + }) + } +} diff --git a/internal/controller/state/graph/reference_grant.go b/internal/controller/state/graph/reference_grant.go index b827d47024..3fa04ecc7a 100644 --- a/internal/controller/state/graph/reference_grant.go +++ b/internal/controller/state/graph/reference_grant.go @@ -51,7 +51,16 @@ func toSecret(nsname types.NamespacedName) toResource { func toService(nsname types.NamespacedName) toResource { return toResource{ - kind: "Service", + kind: kinds.Service, + name: nsname.Name, + namespace: nsname.Namespace, + } +} + +func toInferencePool(nsname types.NamespacedName) toResource { + return toResource{ + group: inferenceAPIGroup, + kind: kinds.InferencePool, name: nsname.Name, namespace: nsname.Namespace, } @@ -139,6 +148,7 @@ func (r *referenceGrantResolver) refAllowed(to toResource, from fromResource) bo // of the particular kind in the namespace allInNamespaceKey := allowedReference{ to: toResource{ + group: to.group, kind: to.kind, namespace: to.namespace, }, diff --git a/internal/controller/state/graph/reference_grant_test.go b/internal/controller/state/graph/reference_grant_test.go index 21fee614e1..bf97f22c25 100644 --- a/internal/controller/state/graph/reference_grant_test.go +++ b/internal/controller/state/graph/reference_grant_test.go @@ -189,7 +189,7 @@ func TestToService(t *testing.T) { ref := toService(types.NamespacedName{Namespace: "ns", Name: "service"}) exp := toResource{ - kind: "Service", + kind: kinds.Service, namespace: "ns", name: "service", } @@ -198,6 +198,21 @@ func TestToService(t *testing.T) { g.Expect(ref).To(Equal(exp)) } +func TestToInferencePool(t *testing.T) { + t.Parallel() + ref := toInferencePool(types.NamespacedName{Namespace: "ns", Name: "inference-pool"}) + + exp := toResource{ + group: inferenceAPIGroup, + kind: kinds.InferencePool, + namespace: "ns", + name: "inference-pool", + } + + g := NewWithT(t) + g.Expect(ref).To(Equal(exp)) +} + func TestFromGateway(t *testing.T) { t.Parallel() ref := fromGateway("ns") @@ -306,7 +321,24 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, + }, + }, + }, + }, + {Namespace: allowedHTTPRouteNs, Name: "hr-2-ipool"}: { + Spec: v1beta1.ReferenceGrantSpec{ + From: []v1beta1.ReferenceGrantFrom{ + { + Group: v1beta1.GroupName, + Kind: kinds.HTTPRoute, + Namespace: v1beta1.Namespace(hrNs), + }, + }, + To: []v1beta1.ReferenceGrantTo{ + { + Group: inferenceAPIGroup, + Kind: kinds.InferencePool, }, }, }, @@ -322,7 +354,7 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, }, }, }, @@ -338,7 +370,7 @@ func TestRefAllowedFrom(t *testing.T) { }, To: []v1beta1.ReferenceGrantTo{ { - Kind: "Service", + Kind: kinds.Service, }, }, }, @@ -375,6 +407,18 @@ func TestRefAllowedFrom(t *testing.T) { toResource: toService(notAllowedNsName), expAllowed: false, }, + { + name: "ref allowed from httproute to inferencepool", + refAllowedFrom: fromHTTPRoute(hrNs), + toResource: toInferencePool(allowedHTTPRouteNsName), + expAllowed: true, + }, + { + name: "ref not allowed from httproute to inferencepool", + refAllowedFrom: fromHTTPRoute(hrNs), + toResource: toInferencePool(notAllowedNsName), + expAllowed: false, + }, { name: "ref allowed from grpcroute to service", refAllowedFrom: fromGRPCRoute(grNs), diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go index c156ca738a..f3d3b04e4a 100644 --- a/internal/controller/state/graph/route_common.go +++ b/internal/controller/state/graph/route_common.go @@ -10,6 +10,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation/field" "sigs.k8s.io/controller-runtime/pkg/client" + inference "sigs.k8s.io/gateway-api-inference-extension/api/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" v1alpha "sigs.k8s.io/gateway-api/apis/v1alpha2" @@ -19,7 +20,10 @@ import ( "github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds" ) -const wildcardHostname = "~^" +const ( + wildcardHostname = "~^" + inferenceAPIGroup = "inference.networking.k8s.io" +) // ParentRef describes a reference to a parent in a Route. type ParentRef struct { @@ -157,11 +161,15 @@ type RouteRule struct { // RouteBackendRef is a wrapper for v1.BackendRef and any BackendRef filters from the HTTPRoute or GRPCRoute. type RouteBackendRef struct { + v1.BackendRef + // If this backend is defined in a RequestMirror filter, this value will indicate the filter's index. MirrorBackendIdx *int - v1.BackendRef Filters []any + + // IsInferencePool indicates if this backend is an InferencePool disguised as a Service. + IsInferencePool bool } // CreateRouteKey takes a client.Object and creates a RouteKey. @@ -242,6 +250,7 @@ func buildRoutesForGateways( grpcRoutes map[types.NamespacedName]*v1.GRPCRoute, gateways map[types.NamespacedName]*Gateway, snippetsFilters map[types.NamespacedName]*SnippetsFilter, + inferencePools map[types.NamespacedName]*inference.InferencePool, ) map[RouteKey]*L7Route { if len(gateways) == 0 { return nil @@ -250,7 +259,7 @@ func buildRoutesForGateways( routes := make(map[RouteKey]*L7Route) for _, route := range httpRoutes { - r := buildHTTPRoute(validator, route, gateways, snippetsFilters) + r := buildHTTPRoute(validator, route, gateways, snippetsFilters, inferencePools) if r == nil { continue } diff --git a/internal/controller/state/graph/service.go b/internal/controller/state/graph/service.go index d43ecacfd8..3a702facc9 100644 --- a/internal/controller/state/graph/service.go +++ b/internal/controller/state/graph/service.go @@ -34,7 +34,6 @@ func buildReferencedServices( } processL7RoutesForGateway(l7routes, gw, gwNsName, referencedServices, services) - processL4RoutesForGateway(l4Routes, gw, gwNsName, referencedServices, services) } diff --git a/internal/framework/controller/resource.go b/internal/framework/controller/resource.go index a0d49e3789..d17662169e 100644 --- a/internal/framework/controller/resource.go +++ b/internal/framework/controller/resource.go @@ -2,10 +2,31 @@ package controller import ( "fmt" + "strings" ) +// inferencePoolServiceSuffix is the suffix of the headless Service name for an InferencePool. +const inferencePoolServiceSuffix = "-pool-svc" + // CreateNginxResourceName creates the base resource name for all nginx resources // created by the control plane. func CreateNginxResourceName(prefix, suffix string) string { return fmt.Sprintf("%s-%s", prefix, suffix) } + +// CreateInferencePoolServiceName creates the name for a headless Service that +// we create for an InferencePool. +func CreateInferencePoolServiceName(name string) string { + svcName := fmt.Sprintf("%s%s", name, inferencePoolServiceSuffix) + // if InferencePool name is already at or near max length, just use that name + if len(svcName) > 253 { + return name + } + + return svcName +} + +// GetInferencePoolName returns the name of the InferencePool for a given headless Service name. +func GetInferencePoolName(serviceName string) string { + return strings.TrimSuffix(serviceName, inferencePoolServiceSuffix) +} diff --git a/internal/framework/kinds/kinds.go b/internal/framework/kinds/kinds.go index 35ca8e2b00..b59b06df96 100644 --- a/internal/framework/kinds/kinds.go +++ b/internal/framework/kinds/kinds.go @@ -25,6 +25,12 @@ const ( BackendTLSPolicy = "BackendTLSPolicy" ) +// Gateway API Inference Extension kinds. +const ( + // InferencePool is the InferencePool kind. + InferencePool = "InferencePool" +) + // Core API Kinds. const ( // Service is the Service kind. diff --git a/tests/go.mod b/tests/go.mod index 1dacb8289d..49f434614d 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -68,7 +68,7 @@ require ( golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.34.0 // indirect golang.org/x/text v0.28.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect diff --git a/tests/go.sum b/tests/go.sum index cf26652a10..672cc6017c 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -191,8 +191,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=