From 308136be59772f2e642a93037d7df74bc1ff5b8c Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Tue, 9 Sep 2025 10:17:45 -0600
Subject: [PATCH 01/10] Basic NJS module to extract model name (#3877)

Problem: To support the full Gateway API Inference Extension, we need to be able to extract the model name from the client request body in certain situations.

Solution: Add a basic NJS module to extract the model name. This module will be enhanced (I've added notes) to be included in the full solution. On its own, it is not yet used.
---
 .nvmrc                                        |  2 +-
 Makefile                                      |  2 +-
 build/Dockerfile.nginx                        |  2 +-
 build/Dockerfile.nginxplus                    |  2 +-
 .../controller/nginx/conf/nginx-plus.conf     |  3 +-
 internal/controller/nginx/conf/nginx.conf     |  3 +-
 internal/controller/nginx/modules/README.md   |  1 +
 internal/controller/nginx/modules/src/epp.js  | 29 +++++++++++
 .../controller/nginx/modules/test/epp.test.js | 52 +++++++++++++++++++
 9 files changed, 90 insertions(+), 6 deletions(-)
 create mode 100644 internal/controller/nginx/modules/src/epp.js
 create mode 100644 internal/controller/nginx/modules/test/epp.test.js

diff --git a/.nvmrc b/.nvmrc
index 2bd5a0a98a..a45fd52cc5 100644
--- a/.nvmrc
+++ b/.nvmrc
@@ -1 +1 @@
-22
+24
diff --git a/Makefile b/Makefile
index 25fca06a44..e492d57bdb 100644
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,7 @@ GEN_CRD_API_REFERENCE_DOCS_VERSION = v0.3.0
 # renovate: datasource=go depName=sigs.k8s.io/controller-tools
 CONTROLLER_TOOLS_VERSION = v0.19.0
 # renovate: datasource=docker depName=node
-NODE_VERSION = 22
+NODE_VERSION = 24
 # renovate: datasource=docker depName=quay.io/helmpack/chart-testing
 CHART_TESTING_VERSION = v3.14.0
 # renovate: datasource=github-tags depName=dadav/helm-schema
diff --git a/build/Dockerfile.nginx b/build/Dockerfile.nginx
index e219eb9b79..322eb54337 100644
--- a/build/Dockerfile.nginx
+++ b/build/Dockerfile.nginx
@@ -23,7 +23,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf
diff --git a/build/Dockerfile.nginxplus b/build/Dockerfile.nginxplus
index 65676a5040..ebe913153c 100644
--- a/build/Dockerfile.nginxplus
+++ b/build/Dockerfile.nginxplus
@@ -29,7 +29,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx-plus.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf
diff --git a/internal/controller/nginx/conf/nginx-plus.conf b/internal/controller/nginx/conf/nginx-plus.conf
index bcf0bfc613..56029281b7 100644
--- a/internal/controller/nginx/conf/nginx-plus.conf
+++ b/internal/controller/nginx/conf/nginx-plus.conf
@@ -12,7 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/conf/nginx.conf b/internal/controller/nginx/conf/nginx.conf
index 46179b930c..5b64fe4761 100644
--- a/internal/controller/nginx/conf/nginx.conf
+++ b/internal/controller/nginx/conf/nginx.conf
@@ -12,7 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
+  js_import /usr/lib/nginx/modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/modules/README.md b/internal/controller/nginx/modules/README.md
index 9c7c805276..3313ea6604 100644
--- a/internal/controller/nginx/modules/README.md
+++ b/internal/controller/nginx/modules/README.md
@@ -22,6 +22,7 @@ dependencies.
 
 - [httpmatches](./src/httpmatches.js): a location handler for HTTP requests. It redirects requests to an internal
   location block based on the request's headers, arguments, and method.
+- [epp](./src/epp.js): handles communication with the EndpointPicker (EPP) component. This is for acquiring a specific AI endpoint to route client traffic to when using the Gateway API Inference Extension.
 
 ### Helpful Resources for Module Development
 
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
new file mode 100644
index 0000000000..8efcd70ece
--- /dev/null
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -0,0 +1,29 @@
+// This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
+
+// TODO (sberman): this module will need to be enhanced to include the following:
+// - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
+// - if a user has specified an Exact matching condition for a model name, extract the model name from
+// the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
+// (based on if we do a redirect or traffic split (see design doc)) in the subrequest. If the client request
+// already has this header set, then I don't think we need to extract the model from the body, just pass
+// through the existing header.
+// I believe we have to use js_content to call the NJS functionality. Because this takes over
+// the request, we will likely have to finish the NJS functionality with an internalRedirect to an internal
+// location that proxy_passes to the chosen endpoint.
+
+// extractModel extracts the model name from the request body.
+function extractModel(r) {
+	try {
+		var body = JSON.parse(r.requestText);
+		if (body && body.model !== undefined) {
+			return String(body.model);
+		}
+	} catch (e) {
+		r.error(`error parsing request body for model name: ${e.message}`);
+		return '';
+	}
+	r.error('request body does not contain model parameter');
+	return '';
+}
+
+export default { extractModel };
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
new file mode 100644
index 0000000000..6994423e7a
--- /dev/null
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -0,0 +1,52 @@
+import { default as epp } from '../src/epp.js';
+import { expect, describe, it } from 'vitest';
+
+function makeRequest(body) {
+	let r = {
+		// Test mocks
+		error(msg) {
+			r.variables.error = msg;
+		},
+		requestText: body,
+		variables: {},
+	};
+
+	return r;
+}
+
+describe('extractModel', () => {
+	const tests = [
+		{
+			name: 'returns the model value',
+			body: '{"model":"gpt-4"}',
+			model: 'gpt-4',
+			error: undefined,
+		},
+		{
+			name: 'returns empty string if model is missing',
+			body: '{"foo":1}',
+			model: '',
+			error: 'request body does not contain model parameter',
+		},
+		{
+			name: 'returns empty string for invalid JSON',
+			body: 'not-json',
+			model: '',
+			error: `error parsing request body for model name: Unexpected token 'o', "not-json" is not valid JSON`,
+		},
+		{
+			name: 'empty request body',
+			body: '',
+			model: '',
+			error: 'error parsing request body for model name: Unexpected end of JSON input',
+		},
+	];
+
+	tests.forEach((test) => {
+		it(test.name, () => {
+			let r = makeRequest(test.body);
+			expect(epp.extractModel(r)).to.equal(test.model);
+			expect(r.variables.error).to.equal(test.error);
+		});
+	});
+});

From 032c0dc016c3ec666070e58d4002e72a3e06cfda Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Tue, 16 Sep 2025 12:07:30 -0600
Subject: [PATCH 02/10] Watch InferencePools and configure nginx (#3894)

This commit adds support for the control plane to watch InferencePools. A feature flag has been added to enable/disable processing these resources. By default, it is disabled.

When an HTTPRoute references an InferencePool, we will create a headless Service associated with that InferencePool, and reference it internally in the graph config for that Route. This allows us to use all of our existing logic to get the endpoints and build the proper nginx config for those endpoints.

In a future commit, the nginx config will be updated to handle the proper load balancing for the AI workloads, but for now we just use our default methods by proxy_passing to the upstream.
---
 Makefile                                      |   8 +
 charts/nginx-gateway-fabric/README.md         |   3 +-
 .../templates/clusterrole.yaml                |  16 +
 .../templates/deployment.yaml                 |   3 +
 .../nginx-gateway-fabric/values.schema.json   |  14 +
 charts/nginx-gateway-fabric/values.yaml       |   4 +
 cmd/gateway/commands.go                       |  11 +
 .../inference-extension/kustomization.yaml    |   4 +
 deploy/inference-nginx-plus/deploy.yaml       | 441 ++++++++++++++++++
 deploy/inference/deploy.yaml                  | 438 +++++++++++++++++
 .../helm/inference-nginx-plus/values.yaml     |  10 +
 examples/helm/inference/values.yaml           |   4 +
 go.mod                                        |   3 +-
 go.sum                                        |  10 +-
 internal/controller/config/config.go          |   2 +
 internal/controller/handler.go                | 126 ++++-
 internal/controller/handler_test.go           | 265 +++++++++++
 internal/controller/manager.go                |  19 +
 internal/controller/manager_test.go           |  41 +-
 internal/controller/nginx/modules/src/epp.js  |   2 +-
 internal/controller/state/change_processor.go |   7 +
 .../controller/state/change_processor_test.go |   2 +
 .../controller/state/graph/backend_refs.go    | 152 +++++-
 .../state/graph/backend_refs_test.go          | 363 +++++++++++++-
 internal/controller/state/graph/graph.go      |  17 +-
 internal/controller/state/graph/graph_test.go | 168 ++++++-
 .../controller/state/graph/grpcroute_test.go  |   1 +
 internal/controller/state/graph/httproute.go  |  67 ++-
 .../controller/state/graph/httproute_test.go  | 118 ++++-
 .../controller/state/graph/inferencepools.go  |  82 ++++
 .../state/graph/inferencepools_test.go        | 249 ++++++++++
 .../controller/state/graph/reference_grant.go |  12 +-
 .../state/graph/reference_grant_test.go       |  52 ++-
 .../controller/state/graph/route_common.go    |  15 +-
 internal/controller/state/graph/service.go    |   1 -
 internal/framework/controller/resource.go     |  21 +
 internal/framework/kinds/kinds.go             |   6 +
 tests/go.mod                                  |   2 +-
 tests/go.sum                                  |   4 +-
 39 files changed, 2699 insertions(+), 64 deletions(-)
 create mode 100644 config/crd/inference-extension/kustomization.yaml
 create mode 100644 deploy/inference-nginx-plus/deploy.yaml
 create mode 100644 deploy/inference/deploy.yaml
 create mode 100644 examples/helm/inference-nginx-plus/values.yaml
 create mode 100644 examples/helm/inference/values.yaml
 create mode 100644 internal/controller/state/graph/inferencepools.go
 create mode 100644 internal/controller/state/graph/inferencepools_test.go

diff --git a/Makefile b/Makefile
index e492d57bdb..797216a42e 100644
--- a/Makefile
+++ b/Makefile
@@ -139,6 +139,14 @@ install-gateway-crds: ## Install Gateway API CRDs
 uninstall-gateway-crds: ## Uninstall Gateway API CRDs
 	kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
 
+.PHONY: install-inference-crds
+install-inference-crds: ## Install Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
+
+.PHONY: uninstall-inference-crds
+uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
+
 .PHONY: generate-manifests
 generate-manifests: ## Generate manifests using Helm.
 	./scripts/generate-manifests.sh
diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md
index 7d0052bf8d..a77df4ebb8 100644
--- a/charts/nginx-gateway-fabric/README.md
+++ b/charts/nginx-gateway-fabric/README.md
@@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
 | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
 | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
-| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
+| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
 | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
 | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
 | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
 | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
 | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
+| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
 | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
 | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
 | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |
diff --git a/charts/nginx-gateway-fabric/templates/clusterrole.yaml b/charts/nginx-gateway-fabric/templates/clusterrole.yaml
index 57c92e4692..890585afc0 100644
--- a/charts/nginx-gateway-fabric/templates/clusterrole.yaml
+++ b/charts/nginx-gateway-fabric/templates/clusterrole.yaml
@@ -147,6 +147,22 @@ rules:
   {{- end }}
   verbs:
   - update
+{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+{{- end }}
 {{- if .Values.nginxGateway.leaderElection.enable }}
 - apiGroups:
   - coordination.k8s.io
diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml
index 5bc292bdb4..604acd768c 100644
--- a/charts/nginx-gateway-fabric/templates/deployment.yaml
+++ b/charts/nginx-gateway-fabric/templates/deployment.yaml
@@ -100,6 +100,9 @@ spec:
         {{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
         - --gateway-api-experimental-features
         {{- end }}
+        {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+        - --gateway-api-inference-extension
+        {{- end }}
         {{- if .Values.nginxGateway.snippetsFilters.enable }}
         - --snippets-filters
         {{- end }}
diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json
index 9f44991db3..c1456d2503 100644
--- a/charts/nginx-gateway-fabric/values.schema.json
+++ b/charts/nginx-gateway-fabric/values.schema.json
@@ -838,6 +838,20 @@
           "title": "gwAPIExperimentalFeatures",
           "type": "object"
         },
+        "gwAPIInferenceExtension": {
+          "properties": {
+            "enable": {
+              "default": false,
+              "description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.",
+              "required": [],
+              "title": "enable",
+              "type": "boolean"
+            }
+          },
+          "required": [],
+          "title": "gwAPIInferenceExtension",
+          "type": "object"
+        },
         "image": {
           "description": "The image configuration for the NGINX Gateway Fabric control plane.",
           "properties": {
diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml
index 52f1e03e55..4e3747a9d1 100644
--- a/charts/nginx-gateway-fabric/values.yaml
+++ b/charts/nginx-gateway-fabric/values.yaml
@@ -210,6 +210,10 @@ nginxGateway:
     # APIs installed from the experimental channel.
     enable: false
 
+  gwAPIInferenceExtension:
+    # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.
+    enable: false
+
   snippetsFilters:
     # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX
     # config for HTTPRoute and GRPCRoute resources.
diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index 4cb67bf3ec..a38cef2dd6 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -85,6 +85,7 @@ func createControllerCommand() *cobra.Command {
 		leaderElectionLockNameFlag          = "leader-election-lock-name"
 		productTelemetryDisableFlag         = "product-telemetry-disable"
 		gwAPIExperimentalFlag               = "gateway-api-experimental-features"
+		gwAPIInferenceExtensionFlag         = "gateway-api-inference-extension"
 		nginxDockerSecretFlag               = "nginx-docker-secret" //nolint:gosec // not credentials
 		usageReportSecretFlag               = "usage-report-secret"
 		usageReportEndpointFlag             = "usage-report-endpoint"
@@ -151,6 +152,7 @@ func createControllerCommand() *cobra.Command {
 		}
 
 		gwExperimentalFeatures bool
+		gwInferenceExtension   bool
 
 		disableProductTelemetry bool
 
@@ -270,6 +272,7 @@ func createControllerCommand() *cobra.Command {
 				},
 				Plus:                 plus,
 				ExperimentalFeatures: gwExperimentalFeatures,
+				InferenceExtension:   gwInferenceExtension,
 				ImageSource:          imageSource,
 				Flags: config.Flags{
 					Names:  flagKeys,
@@ -430,6 +433,14 @@ func createControllerCommand() *cobra.Command {
 			"Requires the Gateway APIs installed from the experimental channel.",
 	)
 
+	cmd.Flags().BoolVar(
+		&gwInferenceExtension,
+		gwAPIInferenceExtensionFlag,
+		false,
+		"Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+
+			"traffic to AI workloads.",
+	)
+
 	cmd.Flags().Var(
 		&nginxDockerSecrets,
 		nginxDockerSecretFlag,
diff --git a/config/crd/inference-extension/kustomization.yaml b/config/crd/inference-extension/kustomization.yaml
new file mode 100644
index 0000000000..6b6e210cf5
--- /dev/null
+++ b/config/crd/inference-extension/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0
diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml
new file mode 100644
index 0000000000..77ee4da544
--- /dev/null
+++ b/deploy/inference-nginx-plus/deploy.yaml
@@ -0,0 +1,441 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nginx-gateway
+---
+apiVersion: v1
+automountServiceAccountToken: false
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - secrets
+  verbs:
+  - create
+  - update
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  - apps
+  - autoscaling
+  resources:
+  - secrets
+  - configmaps
+  - serviceaccounts
+  - services
+  - deployments
+  - daemonsets
+  - horizontalpodautoscalers
+  verbs:
+  - create
+  - update
+  - delete
+  - list
+  - get
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - replicasets
+  verbs:
+  - get
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
+- apiGroups:
+  - discovery.k8s.io
+  resources:
+  - endpointslices
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gatewayclasses
+  - gateways
+  - httproutes
+  - referencegrants
+  - grpcroutes
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - httproutes/status
+  - gateways/status
+  - gatewayclasses/status
+  - grpcroutes/status
+  verbs:
+  - update
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxproxies
+  - clientsettingspolicies
+  - observabilitypolicies
+  - upstreamsettingspolicies
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways/status
+  - clientsettingspolicies/status
+  - observabilitypolicies/status
+  - upstreamsettingspolicies/status
+  verbs:
+  - update
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - get
+  - update
+- apiGroups:
+  - apiextensions.k8s.io
+  resources:
+  - customresourcedefinitions
+  verbs:
+  - list
+  - watch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: nginx-gateway-cert-generator
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: nginx-gateway
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  ports:
+  - name: agent-grpc
+    port: 443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: nginx-gateway
+      app.kubernetes.io/name: nginx-gateway
+  template:
+    metadata:
+      annotations:
+        prometheus.io/port: "9113"
+        prometheus.io/scrape: "true"
+      labels:
+        app.kubernetes.io/instance: nginx-gateway
+        app.kubernetes.io/name: nginx-gateway
+    spec:
+      automountServiceAccountToken: true
+      containers:
+      - args:
+        - controller
+        - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller
+        - --gatewayclass=nginx
+        - --config=nginx-gateway-config
+        - --service=nginx-gateway
+        - --agent-tls-secret=agent-tls
+        - --nginx-docker-secret=nginx-plus-registry-secret
+        - --nginx-plus
+        - --usage-report-secret=nplus-license
+        - --metrics-port=9113
+        - --health-port=8081
+        - --leader-election-lock-name=nginx-gateway-leader-election
+        - --gateway-api-inference-extension
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_UID
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.uid
+        - name: INSTANCE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.labels['app.kubernetes.io/instance']
+        - name: IMAGE_NAME
+          value: ghcr.io/nginx/nginx-gateway-fabric:edge
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: nginx-gateway
+        ports:
+        - containerPort: 8443
+          name: agent-grpc
+        - containerPort: 9113
+          name: metrics
+        - containerPort: 8081
+          name: health
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+        volumeMounts:
+        - mountPath: /var/run/secrets/ngf
+          name: nginx-agent-tls
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: nginx-agent-tls
+        secret:
+          secretName: server-tls
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+spec:
+  template:
+    metadata:
+      annotations: null
+    spec:
+      containers:
+      - args:
+        - generate-certs
+        - --service=nginx-gateway
+        - --cluster-domain=cluster.local
+        - --server-tls-secret=server-tls
+        - --agent-tls-secret=agent-tls
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: cert-generator
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+      restartPolicy: Never
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway-cert-generator
+  ttlSecondsAfterFinished: 30
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx
+spec:
+  controllerName: gateway.nginx.org/nginx-gateway-controller
+  parametersRef:
+    group: gateway.nginx.org
+    kind: NginxProxy
+    name: nginx-gateway-proxy-config
+    namespace: nginx-gateway
+---
+apiVersion: gateway.nginx.org/v1alpha1
+kind: NginxGateway
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-config
+  namespace: nginx-gateway
+spec:
+  logging:
+    level: info
+---
+apiVersion: gateway.nginx.org/v1alpha2
+kind: NginxProxy
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-proxy-config
+  namespace: nginx-gateway
+spec:
+  kubernetes:
+    deployment:
+      container:
+        image:
+          pullPolicy: Always
+          repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus
+          tag: edge
+      replicas: 1
+    service:
+      externalTrafficPolicy: Local
+      type: LoadBalancer
diff --git a/deploy/inference/deploy.yaml b/deploy/inference/deploy.yaml
new file mode 100644
index 0000000000..49a8f85053
--- /dev/null
+++ b/deploy/inference/deploy.yaml
@@ -0,0 +1,438 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nginx-gateway
+---
+apiVersion: v1
+automountServiceAccountToken: false
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - secrets
+  verbs:
+  - create
+  - update
+  - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+rules:
+- apiGroups:
+  - ""
+  - apps
+  - autoscaling
+  resources:
+  - secrets
+  - configmaps
+  - serviceaccounts
+  - services
+  - deployments
+  - daemonsets
+  - horizontalpodautoscalers
+  verbs:
+  - create
+  - update
+  - delete
+  - list
+  - get
+  - watch
+- apiGroups:
+  - ""
+  resources:
+  - namespaces
+  - pods
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - apps
+  resources:
+  - replicasets
+  verbs:
+  - get
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - list
+- apiGroups:
+  - ""
+  resources:
+  - events
+  verbs:
+  - create
+  - patch
+- apiGroups:
+  - discovery.k8s.io
+  resources:
+  - endpointslices
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - gatewayclasses
+  - gateways
+  - httproutes
+  - referencegrants
+  - grpcroutes
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.networking.k8s.io
+  resources:
+  - httproutes/status
+  - gateways/status
+  - gatewayclasses/status
+  - grpcroutes/status
+  verbs:
+  - update
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxproxies
+  - clientsettingspolicies
+  - observabilitypolicies
+  - upstreamsettingspolicies
+  verbs:
+  - list
+  - watch
+- apiGroups:
+  - gateway.nginx.org
+  resources:
+  - nginxgateways/status
+  - clientsettingspolicies/status
+  - observabilitypolicies/status
+  - upstreamsettingspolicies/status
+  verbs:
+  - update
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - coordination.k8s.io
+  resources:
+  - leases
+  verbs:
+  - create
+  - get
+  - update
+- apiGroups:
+  - apiextensions.k8s.io
+  resources:
+  - customresourcedefinitions
+  verbs:
+  - list
+  - watch
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: nginx-gateway-cert-generator
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: nginx-gateway
+subjects:
+- kind: ServiceAccount
+  name: nginx-gateway
+  namespace: nginx-gateway
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  ports:
+  - name: agent-grpc
+    port: 443
+    protocol: TCP
+    targetPort: 8443
+  selector:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway
+  namespace: nginx-gateway
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: nginx-gateway
+      app.kubernetes.io/name: nginx-gateway
+  template:
+    metadata:
+      annotations:
+        prometheus.io/port: "9113"
+        prometheus.io/scrape: "true"
+      labels:
+        app.kubernetes.io/instance: nginx-gateway
+        app.kubernetes.io/name: nginx-gateway
+    spec:
+      automountServiceAccountToken: true
+      containers:
+      - args:
+        - controller
+        - --gateway-ctlr-name=gateway.nginx.org/nginx-gateway-controller
+        - --gatewayclass=nginx
+        - --config=nginx-gateway-config
+        - --service=nginx-gateway
+        - --agent-tls-secret=agent-tls
+        - --metrics-port=9113
+        - --health-port=8081
+        - --leader-election-lock-name=nginx-gateway-leader-election
+        - --gateway-api-inference-extension
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_UID
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.uid
+        - name: INSTANCE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.labels['app.kubernetes.io/instance']
+        - name: IMAGE_NAME
+          value: ghcr.io/nginx/nginx-gateway-fabric:edge
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: nginx-gateway
+        ports:
+        - containerPort: 8443
+          name: agent-grpc
+        - containerPort: 9113
+          name: metrics
+        - containerPort: 8081
+          name: health
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+        volumeMounts:
+        - mountPath: /var/run/secrets/ngf
+          name: nginx-agent-tls
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: nginx-agent-tls
+        secret:
+          secretName: server-tls
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-cert-generator
+  namespace: nginx-gateway
+spec:
+  template:
+    metadata:
+      annotations: null
+    spec:
+      containers:
+      - args:
+        - generate-certs
+        - --service=nginx-gateway
+        - --cluster-domain=cluster.local
+        - --server-tls-secret=server-tls
+        - --agent-tls-secret=agent-tls
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        image: ghcr.io/nginx/nginx-gateway-fabric:edge
+        imagePullPolicy: Always
+        name: cert-generator
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: true
+          runAsGroup: 1001
+          runAsUser: 101
+          seccompProfile:
+            type: RuntimeDefault
+      restartPolicy: Never
+      securityContext:
+        fsGroup: 1001
+        runAsNonRoot: true
+      serviceAccountName: nginx-gateway-cert-generator
+  ttlSecondsAfterFinished: 30
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: GatewayClass
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx
+spec:
+  controllerName: gateway.nginx.org/nginx-gateway-controller
+  parametersRef:
+    group: gateway.nginx.org
+    kind: NginxProxy
+    name: nginx-gateway-proxy-config
+    namespace: nginx-gateway
+---
+apiVersion: gateway.nginx.org/v1alpha1
+kind: NginxGateway
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-config
+  namespace: nginx-gateway
+spec:
+  logging:
+    level: info
+---
+apiVersion: gateway.nginx.org/v1alpha2
+kind: NginxProxy
+metadata:
+  labels:
+    app.kubernetes.io/instance: nginx-gateway
+    app.kubernetes.io/name: nginx-gateway
+    app.kubernetes.io/version: edge
+  name: nginx-gateway-proxy-config
+  namespace: nginx-gateway
+spec:
+  kubernetes:
+    deployment:
+      container:
+        image:
+          pullPolicy: Always
+          repository: ghcr.io/nginx/nginx-gateway-fabric/nginx
+          tag: edge
+      replicas: 1
+    service:
+      externalTrafficPolicy: Local
+      type: LoadBalancer
diff --git a/examples/helm/inference-nginx-plus/values.yaml b/examples/helm/inference-nginx-plus/values.yaml
new file mode 100644
index 0000000000..1d89293db2
--- /dev/null
+++ b/examples/helm/inference-nginx-plus/values.yaml
@@ -0,0 +1,10 @@
+nginxGateway:
+  name: nginx-gateway
+  gwAPIInferenceExtension:
+    enable: true
+
+nginx:
+  plus: true
+  image:
+    repository: private-registry.nginx.com/nginx-gateway-fabric/nginx-plus
+  imagePullSecret: nginx-plus-registry-secret
diff --git a/examples/helm/inference/values.yaml b/examples/helm/inference/values.yaml
new file mode 100644
index 0000000000..0bb54b57e9
--- /dev/null
+++ b/examples/helm/inference/values.yaml
@@ -0,0 +1,4 @@
+nginxGateway:
+  name: nginx-gateway
+  gwAPIInferenceExtension:
+    enable: true
diff --git a/go.mod b/go.mod
index 02af2757a0..4533824369 100644
--- a/go.mod
+++ b/go.mod
@@ -28,6 +28,7 @@ require (
 	k8s.io/klog/v2 v2.130.1
 	sigs.k8s.io/controller-runtime v0.22.3
 	sigs.k8s.io/gateway-api v1.3.0
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0
 )
 
 require (
@@ -80,7 +81,7 @@ require (
 	golang.org/x/sync v0.17.0 // indirect
 	golang.org/x/sys v0.36.0 // indirect
 	golang.org/x/term v0.35.0 // indirect
-	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.37.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
diff --git a/go.sum b/go.sum
index 39a0169511..22c578fa59 100644
--- a/go.sum
+++ b/go.sum
@@ -238,8 +238,8 @@ github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
 go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
 go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
 go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
@@ -303,8 +303,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
-golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
-golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
@@ -356,6 +356,8 @@ sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTi
 sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
 sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
 sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
diff --git a/internal/controller/config/config.go b/internal/controller/config/config.go
index e23f73ca59..d1e77df07b 100644
--- a/internal/controller/config/config.go
+++ b/internal/controller/config/config.go
@@ -48,6 +48,8 @@ type Config struct {
 	Plus bool
 	// ExperimentalFeatures indicates if experimental features are enabled.
 	ExperimentalFeatures bool
+	// InferenceExtension indicates if Gateway API Inference Extension support is enabled.
+	InferenceExtension bool
 	// SnippetsFilters indicates if SnippetsFilters are enabled.
 	SnippetsFilters bool
 }
diff --git a/internal/controller/handler.go b/internal/controller/handler.go
index 39be0f158d..8a69a16ef1 100644
--- a/internal/controller/handler.go
+++ b/internal/controller/handler.go
@@ -13,9 +13,11 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -79,6 +81,8 @@ type eventHandlerConfig struct {
 	controlConfigNSName types.NamespacedName
 	// gatewayCtlrName is the name of the NGF controller.
 	gatewayCtlrName string
+	// gatewayInstanceName is the name of the NGINX Gateway instance.
+	gatewayInstanceName string
 	// gatewayClassName is the name of the GatewayClass.
 	gatewayClassName string
 	// plus is whether or not we are running NGINX Plus.
@@ -116,8 +120,10 @@ type eventHandlerImpl struct {
 	// objectFilters contains all created objectFilters, with the key being a filterKey
 	objectFilters map[filterKey]objectFilter
 
-	cfg  eventHandlerConfig
-	lock sync.Mutex
+	cfg        eventHandlerConfig
+	lock       sync.RWMutex
+	leaderLock sync.RWMutex
+	leader     bool
 }
 
 // newEventHandlerImpl creates a new eventHandlerImpl.
@@ -170,6 +176,10 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log
 // enable is called when the pod becomes leader to ensure the provisioner has
 // the latest configuration.
 func (h *eventHandlerImpl) enable(ctx context.Context) {
+	h.leaderLock.Lock()
+	h.leader = true
+	h.leaderLock.Unlock()
+
 	h.sendNginxConfig(ctx, h.cfg.logger, h.cfg.processor.GetLatestGraph())
 }
 
@@ -187,6 +197,9 @@ func (h *eventHandlerImpl) sendNginxConfig(ctx context.Context, logger logr.Logg
 		return
 	}
 
+	// ensure headless "shadow" Services are created for any referenced InferencePools
+	h.ensureInferencePoolServices(ctx, gr.ReferencedInferencePools)
+
 	for _, gw := range gr.Gateways {
 		go func() {
 			if err := h.cfg.nginxProvisioner.RegisterGateway(ctx, gw, gw.DeploymentName.Name); err != nil {
@@ -554,8 +567,8 @@ func (h *eventHandlerImpl) getDeploymentContext(ctx context.Context) (dataplane.
 
 // GetLatestConfiguration gets the latest configuration.
 func (h *eventHandlerImpl) GetLatestConfiguration() []*dataplane.Configuration {
-	h.lock.Lock()
-	defer h.lock.Unlock()
+	h.lock.RLock()
+	defer h.lock.RUnlock()
 
 	configs := make([]*dataplane.Configuration, 0, len(h.latestConfigurations))
 	for _, cfg := range h.latestConfigurations {
@@ -581,6 +594,111 @@ func objectFilterKey(obj client.Object, nsName types.NamespacedName) filterKey {
 	return filterKey(fmt.Sprintf("%T_%s_%s", obj, nsName.Namespace, nsName.Name))
 }
 
+// ensureInferencePoolServices ensures a headless Service exists and is up to date for each InferencePool.
+func (h *eventHandlerImpl) ensureInferencePoolServices(
+	ctx context.Context,
+	pools map[types.NamespacedName]*graph.ReferencedInferencePool,
+) {
+	if !h.isLeader() {
+		return
+	}
+
+	for _, pool := range pools {
+		if pool.Source == nil {
+			continue
+		}
+
+		selectors := make(map[string]string)
+		for k, v := range pool.Source.Spec.Selector.MatchLabels {
+			selectors[string(k)] = string(v)
+		}
+
+		// v1 of InferencePool only supports a single port right now
+		ports := []v1.ServicePort{
+			{
+				Port:       int32(pool.Source.Spec.TargetPorts[0].Number),
+				TargetPort: intstr.FromInt32(int32(pool.Source.Spec.TargetPorts[0].Number)),
+			},
+		}
+
+		labels := map[string]string{
+			controller.AppManagedByLabel: controller.CreateNginxResourceName(
+				h.cfg.gatewayInstanceName,
+				h.cfg.gatewayClassName,
+			),
+		}
+
+		svc := &v1.Service{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      controller.CreateInferencePoolServiceName(pool.Source.Name),
+				Namespace: pool.Source.Namespace,
+				Labels:    labels,
+				OwnerReferences: []metav1.OwnerReference{
+					{
+						APIVersion: pool.Source.APIVersion,
+						Kind:       pool.Source.Kind,
+						Name:       pool.Source.Name,
+						UID:        pool.Source.UID,
+					},
+				},
+			},
+			Spec: v1.ServiceSpec{
+				ClusterIP: v1.ClusterIPNone, // headless
+				Selector:  selectors,
+				Ports:     ports,
+			},
+		}
+
+		svcCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
+		res, err := controllerutil.CreateOrUpdate(
+			svcCtx,
+			h.cfg.k8sClient,
+			svc,
+			serviceSpecSetter(svc, svc.Spec, svc.ObjectMeta),
+		)
+		if err != nil {
+			cancel()
+			msg := "Failed to upsert headless Service for InferencePool"
+			h.cfg.logger.Error(err, msg, "Service", svc.Name, "InferencePool", pool.Source.Name)
+			h.cfg.eventRecorder.Eventf(
+				svc,
+				v1.EventTypeWarning,
+				"ServiceCreateOrUpdateFailed",
+				"%s %q: %v", msg, pool.Source.Name, err,
+			)
+			continue
+		}
+		cancel()
+
+		if res == controllerutil.OperationResultCreated || res == controllerutil.OperationResultUpdated {
+			h.cfg.logger.Info(
+				fmt.Sprintf("Successfully %s headless Service for InferencePool", res),
+				"Service", svc.Name, "InferencePool", pool.Source.Name,
+			)
+		}
+	}
+}
+
+func serviceSpecSetter(
+	service *v1.Service,
+	spec v1.ServiceSpec,
+	objectMeta metav1.ObjectMeta,
+) controllerutil.MutateFn {
+	return func() error {
+		service.Labels = objectMeta.Labels
+		service.Spec = spec
+		return nil
+	}
+}
+
+// isLeader returns whether or not this handler is the leader.
+func (h *eventHandlerImpl) isLeader() bool {
+	h.leaderLock.RLock()
+	defer h.leaderLock.RUnlock()
+
+	return h.leader
+}
+
 /*
 
 Handler Callback functions
diff --git a/internal/controller/handler_test.go b/internal/controller/handler_test.go
index ec9fe05848..4c23a12e55 100644
--- a/internal/controller/handler_test.go
+++ b/internal/controller/handler_test.go
@@ -12,11 +12,13 @@ import (
 	"go.uber.org/zap"
 	v1 "k8s.io/api/core/v1"
 	discoveryV1 "k8s.io/api/discovery/v1"
+	apiErrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -149,6 +151,7 @@ var _ = Describe("eventHandler", func() {
 			metricsCollector: collectors.NewControllerNoopCollector(),
 		})
 		Expect(handler.cfg.graphBuiltHealthChecker.ready).To(BeFalse())
+		handler.leader = true
 	})
 
 	AfterEach(func() {
@@ -518,6 +521,115 @@ var _ = Describe("eventHandler", func() {
 		Expect(handler.cfg.graphBuiltHealthChecker.readyCheck(nil)).To(Succeed())
 	})
 
+	It("should create a headless Service for each referenced InferencePool", func() {
+		namespace := "test-ns"
+		poolName1 := "pool1"
+		poolName2 := "pool2"
+		poolUID1 := types.UID("uid1")
+		poolUID2 := types.UID("uid2")
+
+		pool1 := &inference.InferencePool{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      poolName1,
+				Namespace: namespace,
+				UID:       poolUID1,
+			},
+			Spec: inference.InferencePoolSpec{
+				Selector: inference.LabelSelector{
+					MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+				},
+				TargetPorts: []inference.Port{
+					{Number: 8081},
+				},
+			},
+		}
+
+		g := &graph.Graph{
+			Gateways: map[types.NamespacedName]*graph.Gateway{
+				{}: {
+					Source: &gatewayv1.Gateway{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "gateway",
+						},
+					},
+					Valid: true,
+				},
+			},
+			ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: namespace, Name: poolName1}: {Source: pool1},
+				{Namespace: namespace, Name: poolName2}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:      poolName2,
+							Namespace: namespace,
+							UID:       poolUID2,
+						},
+						Spec: inference.InferencePoolSpec{
+							Selector: inference.LabelSelector{
+								MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "bar"},
+							},
+							TargetPorts: []inference.Port{
+								{Number: 9090},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		fakeProcessor.ProcessReturns(g)
+
+		e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}}
+		batch := []any{e}
+
+		handler.HandleEventBatch(context.Background(), logr.Discard(), batch)
+
+		// Check Service for pool1
+		svc1 := &v1.Service{}
+		svcName1 := controller.CreateInferencePoolServiceName(poolName1)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc1.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "foo"))
+		Expect(svc1.Spec.Ports).To(HaveLen(1))
+		Expect(svc1.Spec.Ports[0].Port).To(Equal(int32(8081)))
+		Expect(svc1.OwnerReferences).To(HaveLen(1))
+		Expect(svc1.OwnerReferences[0].Name).To(Equal(poolName1))
+		Expect(svc1.OwnerReferences[0].UID).To(Equal(poolUID1))
+
+		// Check Service for pool2
+		svc2 := &v1.Service{}
+		svcName2 := controller.CreateInferencePoolServiceName(poolName2)
+		err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName2, Namespace: namespace}, svc2)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc2.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc2.Spec.Selector).To(HaveKeyWithValue("app", "bar"))
+		Expect(svc2.Spec.Ports).To(HaveLen(1))
+		Expect(svc2.Spec.Ports[0].Port).To(Equal(int32(9090)))
+		Expect(svc2.OwnerReferences).To(HaveLen(1))
+		Expect(svc2.OwnerReferences[0].Name).To(Equal(poolName2))
+		Expect(svc2.OwnerReferences[0].UID).To(Equal(poolUID2))
+
+		// Now update pool1's selector and ensure the Service selector is updated
+		updatedSelector := map[inference.LabelKey]inference.LabelValue{"app": "baz"}
+		pool1.Spec.Selector.MatchLabels = updatedSelector
+
+		// Simulate the updated pool in the graph
+		g.ReferencedInferencePools[types.NamespacedName{Namespace: namespace, Name: poolName1}].Source = pool1
+		fakeProcessor.ProcessReturns(g)
+
+		e = &events.UpsertEvent{Resource: &inference.InferencePool{}}
+		batch = []any{e}
+		handler.HandleEventBatch(context.Background(), logr.Discard(), batch)
+
+		// Check that the Service selector was updated
+		svc1 = &v1.Service{}
+		err = fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName1, Namespace: namespace}, svc1)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc1.Spec.Selector).To(HaveKeyWithValue("app", "baz"))
+	})
+
 	It("should panic for an unknown event type", func() {
 		e := &struct{}{}
 
@@ -688,3 +800,156 @@ var _ = Describe("getDeploymentContext", func() {
 		})
 	})
 })
+
+var _ = Describe("ensureInferencePoolServices", func() {
+	var (
+		handler           *eventHandlerImpl
+		fakeK8sClient     client.Client
+		fakeEventRecorder *record.FakeRecorder
+		namespace         = "test-ns"
+		poolName          = "my-inference-pool"
+		poolUID           = types.UID("pool-uid")
+	)
+
+	BeforeEach(func() {
+		fakeK8sClient = fake.NewFakeClient()
+		fakeEventRecorder = record.NewFakeRecorder(1)
+		handler = newEventHandlerImpl(eventHandlerConfig{
+			ctx:           context.Background(),
+			k8sClient:     fakeK8sClient,
+			statusQueue:   status.NewQueue(),
+			eventRecorder: fakeEventRecorder,
+			logger:        logr.Discard(),
+		})
+		// Set as leader so ensureInferencePoolServices will run
+		handler.leader = true
+	})
+
+	It("creates a headless Service for a referenced InferencePool", func() {
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+
+		// The Service should have been created
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).ToNot(HaveOccurred())
+		Expect(svc.Spec.ClusterIP).To(Equal(v1.ClusterIPNone))
+		Expect(svc.Spec.Selector).To(HaveKeyWithValue("app", "foo"))
+		Expect(svc.Spec.Ports).To(HaveLen(1))
+		Expect(svc.Spec.Ports[0].Port).To(Equal(int32(8080)))
+		Expect(svc.OwnerReferences).To(HaveLen(1))
+		Expect(svc.OwnerReferences[0].Name).To(Equal(poolName))
+		Expect(svc.OwnerReferences[0].UID).To(Equal(poolUID))
+	})
+
+	It("does nothing if not leader", func() {
+		handler.leader = false
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).To(HaveOccurred())
+	})
+
+	It("skips pools with nil Source", func() {
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: nil,
+			},
+		}
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		// Should not panic or create anything
+		svc := &v1.Service{}
+		svcName := controller.CreateInferencePoolServiceName(poolName)
+		err := fakeK8sClient.Get(context.Background(), types.NamespacedName{Name: svcName, Namespace: namespace}, svc)
+		Expect(err).To(HaveOccurred())
+	})
+
+	It("emits an event if Service creation fails", func() {
+		// Use a client that will fail on CreateOrUpdate
+		handler.cfg.k8sClient = &badFakeClient{}
+		handler.leader = true
+
+		pools := map[types.NamespacedName]*graph.ReferencedInferencePool{
+			{Namespace: namespace, Name: poolName}: {
+				Source: &inference.InferencePool{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      poolName,
+						Namespace: namespace,
+						UID:       poolUID,
+					},
+					Spec: inference.InferencePoolSpec{
+						Selector: inference.LabelSelector{
+							MatchLabels: map[inference.LabelKey]inference.LabelValue{"app": "foo"},
+						},
+						TargetPorts: []inference.Port{
+							{Number: 8080},
+						},
+					},
+				},
+			},
+		}
+
+		handler.ensureInferencePoolServices(context.Background(), pools)
+		Eventually(func() int { return len(fakeEventRecorder.Events) }).Should(BeNumerically(">=", 1))
+		event := <-fakeEventRecorder.Events
+		Expect(event).To(ContainSubstring("ServiceCreateOrUpdateFailed"))
+	})
+})
+
+// badFakeClient always returns an error on Create or Update.
+type badFakeClient struct {
+	client.Client
+}
+
+func (*badFakeClient) Get(context.Context, client.ObjectKey, client.Object, ...client.GetOption) error {
+	return apiErrors.NewNotFound(v1.Resource("service"), "not-found")
+}
+
+func (*badFakeClient) Create(context.Context, client.Object, ...client.CreateOption) error {
+	return errors.New("create error")
+}
+
+func (*badFakeClient) Update(context.Context, client.Object, ...client.UpdateOption) error {
+	return errors.New("update error")
+}
diff --git a/internal/controller/manager.go b/internal/controller/manager.go
index a4e9fd9cf0..d02411571b 100644
--- a/internal/controller/manager.go
+++ b/internal/controller/manager.go
@@ -32,6 +32,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/metrics"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
 	k8spredicate "sigs.k8s.io/controller-runtime/pkg/predicate"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
 	gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -95,6 +96,7 @@ func init() {
 	utilruntime.Must(autoscalingv2.AddToScheme(scheme))
 	utilruntime.Must(authv1.AddToScheme(scheme))
 	utilruntime.Must(rbacv1.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 }
 
 func StartManager(cfg config.Config) error {
@@ -251,6 +253,7 @@ func StartManager(cfg config.Config) error {
 		gatewayPodConfig:        cfg.GatewayPodConfig,
 		controlConfigNSName:     controlConfigNSName,
 		gatewayCtlrName:         cfg.GatewayCtlrName,
+		gatewayInstanceName:     cfg.GatewayPodConfig.InstanceName,
 		gatewayClassName:        cfg.GatewayClassName,
 		plus:                    cfg.Plus,
 		statusQueue:             statusQueue,
@@ -536,6 +539,18 @@ func registerControllers(
 		controllerRegCfgs = append(controllerRegCfgs, gwExpFeatures...)
 	}
 
+	if cfg.InferenceExtension {
+		inferenceExt := []ctlrCfg{
+			{
+				objectType: &inference.InferencePool{},
+				options: []controller.Option{
+					controller.WithK8sPredicate(k8spredicate.GenerationChangedPredicate{}),
+				},
+			},
+		}
+		controllerRegCfgs = append(controllerRegCfgs, inferenceExt...)
+	}
+
 	if cfg.ConfigName != "" {
 		controllerRegCfgs = append(controllerRegCfgs,
 			ctlrCfg{
@@ -761,6 +776,10 @@ func prepareFirstEventBatchPreparerArgs(cfg config.Config) ([]client.Object, []c
 		)
 	}
 
+	if cfg.InferenceExtension {
+		objectLists = append(objectLists, &inference.InferencePoolList{})
+	}
+
 	if cfg.SnippetsFilters {
 		objectLists = append(
 			objectLists,
diff --git a/internal/controller/manager_test.go b/internal/controller/manager_test.go
index 60d7b0e5d5..76e613a1f6 100644
--- a/internal/controller/manager_test.go
+++ b/internal/controller/manager_test.go
@@ -14,6 +14,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
 	gatewayv1alpha3 "sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -47,9 +48,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 		{
 			name: "base case",
 			cfg: config.Config{
-				GatewayClassName:     gcName,
-				ExperimentalFeatures: false,
-				SnippetsFilters:      false,
+				GatewayClassName: gcName,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -75,7 +74,6 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 			cfg: config.Config{
 				GatewayClassName:     gcName,
 				ExperimentalFeatures: true,
-				SnippetsFilters:      false,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -99,12 +97,37 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 				&ngfAPIv1alpha1.UpstreamSettingsPolicyList{},
 			},
 		},
+		{
+			name: "inference extension enabled",
+			cfg: config.Config{
+				GatewayClassName:   gcName,
+				InferenceExtension: true,
+			},
+			expectedObjects: []client.Object{
+				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
+			},
+			expectedObjectLists: []client.ObjectList{
+				&apiv1.ServiceList{},
+				&apiv1.SecretList{},
+				&apiv1.NamespaceList{},
+				&discoveryV1.EndpointSliceList{},
+				&gatewayv1.HTTPRouteList{},
+				&gatewayv1.GatewayList{},
+				&gatewayv1beta1.ReferenceGrantList{},
+				&ngfAPIv1alpha2.NginxProxyList{},
+				&gatewayv1.GRPCRouteList{},
+				partialObjectMetadataList,
+				&inference.InferencePoolList{},
+				&ngfAPIv1alpha1.ClientSettingsPolicyList{},
+				&ngfAPIv1alpha2.ObservabilityPolicyList{},
+				&ngfAPIv1alpha1.UpstreamSettingsPolicyList{},
+			},
+		},
 		{
 			name: "snippets filters enabled",
 			cfg: config.Config{
-				GatewayClassName:     gcName,
-				ExperimentalFeatures: false,
-				SnippetsFilters:      true,
+				GatewayClassName: gcName,
+				SnippetsFilters:  true,
 			},
 			expectedObjects: []client.Object{
 				&gatewayv1.GatewayClass{ObjectMeta: metav1.ObjectMeta{Name: "nginx"}},
@@ -127,10 +150,11 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 			},
 		},
 		{
-			name: "experimental and snippets filters enabled",
+			name: "experimental, inference, and snippets filters enabled",
 			cfg: config.Config{
 				GatewayClassName:     gcName,
 				ExperimentalFeatures: true,
+				InferenceExtension:   true,
 				SnippetsFilters:      true,
 			},
 			expectedObjects: []client.Object{
@@ -147,6 +171,7 @@ func TestPrepareFirstEventBatchPreparerArgs(t *testing.T) {
 				&gatewayv1beta1.ReferenceGrantList{},
 				&ngfAPIv1alpha2.NginxProxyList{},
 				partialObjectMetadataList,
+				&inference.InferencePoolList{},
 				&gatewayv1alpha3.BackendTLSPolicyList{},
 				&gatewayv1alpha2.TLSRouteList{},
 				&gatewayv1.GRPCRouteList{},
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index 8efcd70ece..d4beeb9e15 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -1,6 +1,6 @@
 // This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
 
-// TODO (sberman): this module will need to be enhanced to include the following:
+// TODO(sberman): this module will need to be enhanced to include the following:
 // - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
 // - if a user has specified an Exact matching condition for a model name, extract the model name from
 // the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
diff --git a/internal/controller/state/change_processor.go b/internal/controller/state/change_processor.go
index f3184adde8..27a62bb0e5 100644
--- a/internal/controller/state/change_processor.go
+++ b/internal/controller/state/change_processor.go
@@ -11,6 +11,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/tools/record"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -98,6 +99,7 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl {
 		TLSRoutes:          make(map[types.NamespacedName]*v1alpha2.TLSRoute),
 		NGFPolicies:        make(map[graph.PolicyKey]policies.Policy),
 		SnippetsFilters:    make(map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter),
+		InferencePools:     make(map[types.NamespacedName]*inference.InferencePool),
 	}
 
 	processor := &ChangeProcessorImpl{
@@ -166,6 +168,11 @@ func NewChangeProcessorImpl(cfg ChangeProcessorConfig) *ChangeProcessorImpl {
 				store:     newObjectStoreMapAdapter(clusterStore.Services),
 				predicate: funcPredicate{stateChanged: isReferenced},
 			},
+			{
+				gvk:       cfg.MustExtractGVK(&inference.InferencePool{}),
+				store:     newObjectStoreMapAdapter(clusterStore.InferencePools),
+				predicate: funcPredicate{stateChanged: isReferenced},
+			},
 			{
 				gvk:       cfg.MustExtractGVK(&discoveryV1.EndpointSlice{}),
 				store:     nil,
diff --git a/internal/controller/state/change_processor_test.go b/internal/controller/state/change_processor_test.go
index 2d17e6f6e9..44dbdb0613 100644
--- a/internal/controller/state/change_processor_test.go
+++ b/internal/controller/state/change_processor_test.go
@@ -14,6 +14,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -317,6 +318,7 @@ func createScheme() *runtime.Scheme {
 	utilruntime.Must(apiext.AddToScheme(scheme))
 	utilruntime.Must(ngfAPIv1alpha1.AddToScheme(scheme))
 	utilruntime.Must(ngfAPIv1alpha2.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 
 	return scheme
 }
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index d18a81cc43..e14d0fb0fa 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -15,7 +15,9 @@ import (
 	ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/sort"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 const (
@@ -57,10 +59,11 @@ func addBackendRefsToRouteRules(
 	routes map[RouteKey]*L7Route,
 	refGrantResolver *referenceGrantResolver,
 	services map[types.NamespacedName]*v1.Service,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
 	backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy,
 ) {
 	for _, r := range routes {
-		addBackendRefsToRules(r, refGrantResolver, services, backendTLSPolicies)
+		addBackendRefsToRules(r, refGrantResolver, services, referencedInferencePools, backendTLSPolicies)
 	}
 }
 
@@ -70,6 +73,7 @@ func addBackendRefsToRules(
 	route *L7Route,
 	refGrantResolver *referenceGrantResolver,
 	services map[types.NamespacedName]*v1.Service,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
 	backendTLSPolicies map[types.NamespacedName]*BackendTLSPolicy,
 ) {
 	if !route.Valid {
@@ -99,6 +103,24 @@ func addBackendRefsToRules(
 			}
 			routeNs := route.Source.GetNamespace()
 
+			// if we have an InferencePool backend disguised as a Service, set the port value
+			if ref.IsInferencePool {
+				namespace := routeNs
+				if ref.Namespace != nil {
+					namespace = string(*ref.Namespace)
+				}
+
+				poolName := types.NamespacedName{
+					Name:      controller.GetInferencePoolName(string(ref.Name)),
+					Namespace: namespace,
+				}
+
+				if pool, exists := referencedInferencePools[poolName]; exists {
+					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
+					ref.Port = helpers.GetPointer(port)
+				}
+			}
+
 			ref, conds := createBackendRef(
 				ref,
 				route,
@@ -149,7 +171,14 @@ func createBackendRef(
 		}
 	}
 
-	valid, cond := validateRouteBackendRef(ref, route.Source.GetNamespace(), refGrantResolver, refPath)
+	valid, cond := validateRouteBackendRef(
+		route.RouteType,
+		ref,
+		route.Source.GetNamespace(),
+		refGrantResolver,
+		refPath,
+	)
+
 	if !valid {
 		backendRef := BackendRef{
 			Weight:             weight,
@@ -440,6 +469,7 @@ func checkExternalNameValidForGateways(
 }
 
 func validateRouteBackendRef(
+	routeType RouteType,
 	ref RouteBackendRef,
 	routeNs string,
 	refGrantResolver func(resource toResource) bool,
@@ -451,6 +481,10 @@ func validateRouteBackendRef(
 		return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
 	}
 
+	if routeType == RouteTypeHTTP {
+		return validateBackendRefHTTPRoute(ref, routeNs, refGrantResolver, path)
+	}
+
 	return validateBackendRef(ref.BackendRef, routeNs, refGrantResolver, path)
 }
 
@@ -502,6 +536,120 @@ func validateBackendRef(
 	return true, conditions.Condition{}
 }
 
+func validateBackendRefHTTPRoute(
+	ref RouteBackendRef,
+	routeNs string,
+	refGrantResolver func(toResource toResource) bool,
+	path *field.Path,
+) (valid bool, cond conditions.Condition) {
+	// Because all errors cause same condition but different reasons, we return as soon as we find an error
+
+	if valid, cond := validateBackendRefHTTPRouteGroupKind(ref.BackendRef, path); !valid {
+		return false, cond
+	}
+
+	// no need to validate ref.Name
+
+	if ref.Namespace != nil && string(*ref.Namespace) != routeNs {
+		var inferencePool bool
+		var inferencePoolName types.NamespacedName
+
+		switch {
+		case ref.Kind != nil && *ref.Kind == kinds.InferencePool:
+			inferencePool = true
+			inferencePoolName = types.NamespacedName{
+				Namespace: string(*ref.Namespace),
+				Name:      string(ref.Name),
+			}
+		case ref.IsInferencePool:
+			// Case where RouteBackendRef has been updated with headless Service backend for the InferencePool
+			inferencePool = true
+			inferencePoolName = types.NamespacedName{
+				Namespace: string(*ref.Namespace),
+				Name:      controller.GetInferencePoolName(string(ref.Name)),
+			}
+		default:
+			refNsName := types.NamespacedName{Namespace: string(*ref.Namespace), Name: string(ref.Name)}
+
+			if !refGrantResolver(toService(refNsName)) {
+				msg := fmt.Sprintf("Backend ref to Service %s not permitted by any ReferenceGrant", refNsName)
+				valErr := field.Forbidden(path.Child("namespace"), msg)
+
+				return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error())
+			}
+		}
+
+		if inferencePool {
+			if !refGrantResolver(toInferencePool(inferencePoolName)) {
+				msg := fmt.Sprintf(
+					"Backend ref to InferencePool %s not permitted by any ReferenceGrant",
+					inferencePoolName,
+				)
+				valErr := field.Forbidden(path.Child("namespace"), msg)
+				return false, conditions.NewRouteBackendRefRefNotPermitted(valErr.Error())
+			}
+		}
+	}
+
+	if ref.Port == nil && (ref.Kind == nil || *ref.Kind == kinds.Service) {
+		valErr := field.Required(path.Child("port"), "port cannot be nil")
+		return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
+	}
+
+	// any value of port is OK
+
+	if ref.Weight != nil {
+		if err := validateWeight(*ref.Weight); err != nil {
+			valErr := field.Invalid(path.Child("weight"), *ref.Weight, err.Error())
+			return false, conditions.NewRouteBackendRefUnsupportedValue(valErr.Error())
+		}
+	}
+
+	return true, conditions.Condition{}
+}
+
+func validateBackendRefHTTPRouteGroupKind(
+	ref gatewayv1.BackendRef,
+	path *field.Path,
+) (bool, conditions.Condition) {
+	if ref.Group != nil {
+		group := *ref.Group
+		if group != "core" && group != "" && group != inferenceAPIGroup {
+			valErr := field.NotSupported(path.Child("group"), group, []string{"core", "", inferenceAPIGroup})
+			return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+		}
+		if group == inferenceAPIGroup {
+			if ref.Kind == nil || *ref.Kind != kinds.InferencePool {
+				valErr := field.Invalid(
+					path.Child("kind"),
+					ref.Kind,
+					fmt.Sprintf("kind must be InferencePool when group is %s", inferenceAPIGroup),
+				)
+				return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+			}
+		}
+	}
+
+	if ref.Kind != nil {
+		kind := *ref.Kind
+		if kind != kinds.Service && kind != kinds.InferencePool {
+			valErr := field.NotSupported(path.Child("kind"), kind, []string{kinds.Service, kinds.InferencePool})
+			return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+		}
+		if kind == kinds.InferencePool {
+			if ref.Group == nil || *ref.Group != inferenceAPIGroup {
+				valErr := field.Invalid(
+					path.Child("group"),
+					ref.Group,
+					fmt.Sprintf("group must be %s when kind is InferencePool", inferenceAPIGroup),
+				)
+				return false, conditions.NewRouteBackendRefInvalidKind(valErr.Error())
+			}
+		}
+	}
+	return true, conditions.Condition{}
+}
+
 // validateRouteBackendRefAppProtocol checks if a given RouteType supports sending traffic to a service AppProtocol.
 // Returns nil if true or AppProtocol is not a Kubernetes Standard Application Protocol.
 func validateRouteBackendRefAppProtocol(
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index 6e07bad538..3f05f793a6 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -11,13 +11,16 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
 	ngfAPIv1alpha2 "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha2"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 func getNormalRef() gatewayv1.BackendRef {
@@ -36,16 +39,46 @@ func getModifiedRef(mod func(ref gatewayv1.BackendRef) gatewayv1.BackendRef) gat
 	return mod(getNormalRef())
 }
 
+func getNormalRouteBackendRef() RouteBackendRef {
+	return RouteBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind:      helpers.GetPointer[gatewayv1.Kind]("Service"),
+				Name:      "service1",
+				Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+				Port:      helpers.GetPointer[gatewayv1.PortNumber](80),
+			},
+			Weight: helpers.GetPointer[int32](5),
+		},
+	}
+}
+
+func getModifiedRouteBackendRef(mod func(ref RouteBackendRef) RouteBackendRef) RouteBackendRef {
+	return mod(getNormalRouteBackendRef())
+}
+
 func TestValidateRouteBackendRef(t *testing.T) {
 	t.Parallel()
+
 	tests := []struct {
+		routeType         RouteType
 		expectedCondition conditions.Condition
 		name              string
 		ref               RouteBackendRef
 		expectedValid     bool
 	}{
 		{
-			name: "normal case",
+			name:      "normal case",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getNormalRef(),
+				Filters:    nil,
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "normal case grpc",
+			routeType: RouteTypeGRPC,
 			ref: RouteBackendRef{
 				BackendRef: getNormalRef(),
 				Filters:    nil,
@@ -53,7 +86,35 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			expectedValid: true,
 		},
 		{
-			name: "filters not supported",
+			name:      "normal case; inferencepool backend",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.BackendObjectReference = gatewayv1.BackendObjectReference{
+						Group: helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+						Kind:  helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+						Name:  "ipool",
+					}
+					return backend
+				}),
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "normal case; headless Service inferencepool backend",
+			routeType: RouteTypeHTTP,
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool"))
+					return backend
+				}),
+				IsInferencePool: true,
+			},
+			expectedValid: true,
+		},
+		{
+			name:      "filters not supported",
+			routeType: RouteTypeHTTP,
 			ref: RouteBackendRef{
 				BackendRef: getNormalRef(),
 				Filters: []any{
@@ -70,7 +131,8 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			),
 		},
 		{
-			name: "invalid base ref",
+			name:      "invalid base ref",
+			routeType: RouteTypeHTTP,
 			ref: RouteBackendRef{
 				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
 					backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
@@ -79,7 +141,7 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			},
 			expectedValid: false,
 			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
-				`test.kind: Unsupported value: "NotService": supported values: "Service"`,
+				`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 			),
 		},
 	}
@@ -90,7 +152,13 @@ func TestValidateRouteBackendRef(t *testing.T) {
 			g := NewWithT(t)
 			alwaysTrueRefGrantResolver := func(_ toResource) bool { return true }
 
-			valid, cond := validateRouteBackendRef(test.ref, "test", alwaysTrueRefGrantResolver, field.NewPath("test"))
+			valid, cond := validateRouteBackendRef(
+				test.routeType,
+				test.ref,
+				"test",
+				alwaysTrueRefGrantResolver,
+				field.NewPath("test"),
+			)
 
 			g.Expect(valid).To(Equal(test.expectedValid))
 			g.Expect(cond).To(Equal(test.expectedCondition))
@@ -156,7 +224,7 @@ func TestValidateBackendRef(t *testing.T) {
 			),
 		},
 		{
-			name: "not a service kind",
+			name: "invalid kind",
 			ref: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
 				backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
 				return backend
@@ -218,6 +286,209 @@ func TestValidateBackendRef(t *testing.T) {
 	}
 }
 
+func TestValidateBackendRefHTTPRoute(t *testing.T) {
+	t.Parallel()
+
+	alwaysFalseRefGrantResolver := func(_ toResource) bool { return false }
+	alwaysTrueRefGrantResolver := func(_ toResource) bool { return true }
+
+	tests := []struct {
+		refGrantResolver  func(resource toResource) bool
+		expectedCondition conditions.Condition
+		name              string
+		ref               RouteBackendRef
+		expectedValid     bool
+	}{
+		{
+			name:             "normal case",
+			ref:              getNormalRouteBackendRef(),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with implicit namespace",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with implicit kind Service",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "normal case with InferencePool",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "group is inference group but kind is not InferencePool",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.Service)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.kind: Invalid value: "Service": kind must be InferencePool when group is inference.networking.k8s.io`,
+			),
+		},
+		{
+			name: "kind is InferencePool but group is not inference",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				backend.Group = helpers.GetPointer[gatewayv1.Group]("core")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.group: Invalid value: "core": group must be inference.networking.k8s.io when kind is InferencePool`,
+			),
+		},
+		{
+			name: "normal case with backend ref allowed by reference grant",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("cross-ns")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+		{
+			name: "inferencepool backend ref not allowed by reference grant",
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.BackendObjectReference = gatewayv1.BackendObjectReference{
+						Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+						Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+						Name:      "ipool",
+						Namespace: helpers.GetPointer[gatewayv1.Namespace]("invalid"),
+					}
+					return backend
+				}),
+			},
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "headless Service inferencepool backend ref not allowed by reference grant",
+			ref: RouteBackendRef{
+				BackendRef: getModifiedRef(func(backend gatewayv1.BackendRef) gatewayv1.BackendRef {
+					backend.Name = gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool"))
+					backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid")
+					return backend
+				}),
+				IsInferencePool: true,
+			},
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to InferencePool invalid/ipool not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "invalid group",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Group = helpers.GetPointer[gatewayv1.Group]("invalid")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.group: Unsupported value: "invalid": supported values: "core", "", "inference.networking.k8s.io"`,
+			),
+		},
+		{
+			name: "invalid kind",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind]("NotService")
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefInvalidKind(
+				`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
+			),
+		},
+		{
+			name: "backend ref not allowed by reference grant",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Namespace = helpers.GetPointer[gatewayv1.Namespace]("invalid")
+				return backend
+			}),
+			refGrantResolver: alwaysFalseRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefRefNotPermitted(
+				"test.namespace: Forbidden: Backend ref to Service invalid/service1 not permitted by any ReferenceGrant",
+			),
+		},
+		{
+			name: "invalid weight",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Weight = helpers.GetPointer[int32](-1)
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefUnsupportedValue(
+				"test.weight: Invalid value: -1: must be in the range [0, 1000000]",
+			),
+		},
+		{
+			name: "nil port",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Port = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    false,
+			expectedCondition: conditions.NewRouteBackendRefUnsupportedValue(
+				"test.port: Required value: port cannot be nil",
+			),
+		},
+		{
+			name: "nil port allowed for InferencePool kind",
+			ref: getModifiedRouteBackendRef(func(backend RouteBackendRef) RouteBackendRef {
+				backend.Kind = helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool)
+				backend.Group = helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup)
+				backend.Port = nil
+				return backend
+			}),
+			refGrantResolver: alwaysTrueRefGrantResolver,
+			expectedValid:    true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			valid, cond := validateBackendRefHTTPRoute(test.ref, "test", test.refGrantResolver, field.NewPath("test"))
+
+			g.Expect(valid).To(Equal(test.expectedValid))
+			g.Expect(cond).To(Equal(test.expectedCondition))
+		})
+	}
+}
+
 func TestValidateWeight(t *testing.T) {
 	t.Parallel()
 	validWeights := []int32{0, 1, 1000000}
@@ -523,13 +794,21 @@ func TestAddBackendRefsToRules(t *testing.T) {
 		Name:      "svcGRPC",
 	}
 
+	svcInferenceName := controller.CreateInferencePoolServiceName("ipool")
+	svcInference := getSvc(svcInferenceName)
+	svcInferenceNsName := types.NamespacedName{
+		Namespace: "test",
+		Name:      svcInferenceName,
+	}
+
 	services := map[types.NamespacedName]*v1.Service{
-		{Namespace: "test", Name: "svc1"}:    svc1,
-		{Namespace: "test", Name: "svc2"}:    svc2,
-		{Namespace: "test", Name: "svcH2c"}:  svcH2c,
-		{Namespace: "test", Name: "svcWS"}:   svcWS,
-		{Namespace: "test", Name: "svcWSS"}:  svcWSS,
-		{Namespace: "test", Name: "svcGRPC"}: svcGRPC,
+		svc1NsName:         svc1,
+		svc2NsName:         svc2,
+		svcH2cNsName:       svcH2c,
+		svcWSNsName:        svcWS,
+		svcWSSNsName:       svcWSS,
+		svcGRPCNsName:      svcGRPC,
+		svcInferenceNsName: svcInference,
 	}
 	emptyPolicies := map[types.NamespacedName]*BackendTLSPolicy{}
 
@@ -892,7 +1171,7 @@ func TestAddBackendRefsToRules(t *testing.T) {
 			},
 			expectedConditions: []conditions.Condition{
 				conditions.NewRouteBackendRefInvalidKind(
-					`spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service"`,
+					`spec.rules[0].backendRefs[0].kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 				),
 			},
 			policies: emptyPolicies,
@@ -938,6 +1217,29 @@ func TestAddBackendRefsToRules(t *testing.T) {
 			expectedConditions:  nil,
 			name:                "zero backendRefs",
 		},
+		{
+			route: func() *L7Route {
+				route := createRoute("hr-inference", RouteTypeHTTP, "Service", 1, svcInferenceName)
+				// Mark the backend ref as IsInferencePool and set the port to nil (simulate InferencePool logic)
+				route.Spec.Rules[0].RouteBackendRefs[0].IsInferencePool = true
+				route.Spec.Rules[0].RouteBackendRefs[0].Port = nil
+				return route
+			}(),
+			expectedBackendRefs: []BackendRef{
+				{
+					SvcNsName: types.NamespacedName{Namespace: "test", Name: svcInferenceName},
+					ServicePort: v1.ServicePort{
+						Port: 80,
+					},
+					Valid:              true,
+					Weight:             1,
+					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				},
+			},
+			expectedConditions: nil,
+			policies:           emptyPolicies,
+			name:               "headless Service for InferencePool gets port set correctly",
+		},
 	}
 
 	for _, test := range tests {
@@ -946,7 +1248,22 @@ func TestAddBackendRefsToRules(t *testing.T) {
 
 			g := NewWithT(t)
 			resolver := newReferenceGrantResolver(nil)
-			addBackendRefsToRules(test.route, resolver, services, test.policies)
+
+			referencedInferencePools := map[types.NamespacedName]*ReferencedInferencePool{
+				{Namespace: "test", Name: "ipool"}: {
+					Source: &inference.InferencePool{
+						Spec: inference.InferencePoolSpec{
+							TargetPorts: []inference.Port{
+								{
+									Number: 80,
+								},
+							},
+						},
+					},
+				},
+			}
+
+			addBackendRefsToRules(test.route, resolver, services, referencedInferencePools, test.policies)
 
 			var actual []BackendRef
 			if test.route.Spec.Rules != nil {
@@ -1169,7 +1486,7 @@ func TestCreateBackend(t *testing.T) {
 			expectedServicePortReference: "",
 			expectedConditions: []conditions.Condition{
 				conditions.NewRouteBackendRefInvalidKind(
-					`test.kind: Unsupported value: "NotService": supported values: "Service"`,
+					`test.kind: Unsupported value: "NotService": supported values: "Service", "InferencePool"`,
 				),
 			},
 			name: "invalid kind",
@@ -1403,11 +1720,13 @@ func TestCreateBackend(t *testing.T) {
 			g := NewWithT(t)
 
 			rbr := RouteBackendRef{
-				nil,
-				test.ref.BackendRef,
-				[]any{},
+				MirrorBackendIdx: nil,
+				IsInferencePool:  false,
+				BackendRef:       test.ref.BackendRef,
+				Filters:          []any{},
 			}
 			route := &L7Route{
+				RouteType: RouteTypeHTTP,
 				Source: &gatewayv1.HTTPRoute{
 					ObjectMeta: metav1.ObjectMeta{
 						Namespace: "test",
@@ -1467,12 +1786,14 @@ func TestCreateBackend(t *testing.T) {
 	// test mirror backend case
 	g := NewWithT(t)
 	ref := RouteBackendRef{
-		helpers.GetPointer(0), // mirrorFilterIdx
-		getNormalRef(),
-		[]any{},
+		MirrorBackendIdx: helpers.GetPointer(0),
+		IsInferencePool:  false,
+		BackendRef:       getNormalRef(),
+		Filters:          []any{},
 	}
 
 	route := &L7Route{
+		RouteType: RouteTypeHTTP,
 		Source: &gatewayv1.HTTPRoute{
 			ObjectMeta: metav1.ObjectMeta{
 				Namespace: "test",
diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go
index e556c798ba..b5e13991e9 100644
--- a/internal/controller/state/graph/graph.go
+++ b/internal/controller/state/graph/graph.go
@@ -9,6 +9,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -40,6 +41,7 @@ type ClusterState struct {
 	GRPCRoutes         map[types.NamespacedName]*gatewayv1.GRPCRoute
 	NGFPolicies        map[PolicyKey]policies.Policy
 	SnippetsFilters    map[types.NamespacedName]*ngfAPIv1alpha1.SnippetsFilter
+	InferencePools     map[types.NamespacedName]*inference.InferencePool
 }
 
 // Graph is a Graph-like representation of Gateway API resources.
@@ -65,6 +67,9 @@ type Graph struct {
 	ReferencedNamespaces map[types.NamespacedName]*v1.Namespace
 	// ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one Route.
 	ReferencedServices map[types.NamespacedName]*ReferencedService
+	// ReferencedInferencePools includes the NamespacedNames of all the InferencePools
+	// that are referenced by at least one Route.
+	ReferencedInferencePools map[types.NamespacedName]*ReferencedInferencePool
 	// ReferencedCaCertConfigMaps includes ConfigMaps that have been referenced by any BackendTLSPolicies.
 	ReferencedCaCertConfigMaps map[types.NamespacedName]*CaCertConfigMap
 	// ReferencedNginxProxies includes NginxProxies that have been referenced by a GatewayClass or a Gateway.
@@ -115,11 +120,15 @@ func (g *Graph) IsReferenced(resourceType ngftypes.ObjectType, nsname types.Name
 		_, existed := g.ReferencedNamespaces[nsname]
 		exists := isNamespaceReferenced(obj, g.Gateways)
 		return existed || exists
-	// Service reference exists if at least one HTTPRoute references it.
+	// Service reference exists if at least one Route references it.
 	case *v1.Service:
 		_, exists := g.ReferencedServices[nsname]
 		return exists
-	// EndpointSlice reference exists if its Service owner is referenced by at least one HTTPRoute.
+	// InferencePool reference exists if at least one Route references it.
+	case *inference.InferencePool:
+		_, exists := g.ReferencedInferencePools[nsname]
+		return exists
+	// EndpointSlice reference exists if its Service owner is referenced by at least one Route.
 	case *discoveryV1.EndpointSlice:
 		svcName := index.GetServiceNameFromEndpointSlice(obj)
 
@@ -249,7 +258,9 @@ func BuildGraph(
 		state.GRPCRoutes,
 		gws,
 		processedSnippetsFilters,
+		state.InferencePools,
 	)
+	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools)
 
 	l4routes := buildL4RoutesForGateways(
 		state.TLSRoutes,
@@ -262,6 +273,7 @@ func BuildGraph(
 		routes,
 		refGrantResolver,
 		state.Services,
+		referencedInferencePools,
 		processedBackendTLSPolicies,
 	)
 	bindRoutesToListeners(routes, l4routes, gws, state.Namespaces)
@@ -295,6 +307,7 @@ func BuildGraph(
 		ReferencedSecrets:          secretResolver.getResolvedSecrets(),
 		ReferencedNamespaces:       referencedNamespaces,
 		ReferencedServices:         referencedServices,
+		ReferencedInferencePools:   referencedInferencePools,
 		ReferencedCaCertConfigMaps: configMapResolver.getResolvedConfigMaps(),
 		ReferencedNginxProxies:     processedNginxProxies,
 		BackendTLSPolicies:         processedBackendTLSPolicies,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index ac5cfff3a2..da0ca04d47 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -13,6 +13,7 @@ import (
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -25,6 +26,7 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation/validationfakes"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller/index"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
@@ -214,6 +216,44 @@ func TestBuildGraph(t *testing.T) {
 		return rule
 	}
 
+	createValidRuleWithInferencePoolBackendRef := func(matches []gatewayv1.HTTPRouteMatch) RouteRule {
+		refs := []BackendRef{
+			{
+				SvcNsName: types.NamespacedName{
+					Namespace: testNs,
+					Name:      controller.CreateInferencePoolServiceName("ipool"),
+				},
+				ServicePort:        v1.ServicePort{Port: 80},
+				Valid:              true,
+				Weight:             1,
+				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+			},
+		}
+		rbrs := []RouteBackendRef{
+			{
+				IsInferencePool: true,
+				BackendRef: gatewayv1.BackendRef{
+					BackendObjectReference: gatewayv1.BackendObjectReference{
+						Group:     helpers.GetPointer[gatewayv1.Group](""),
+						Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+						Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+						Namespace: helpers.GetPointer(gatewayv1.Namespace(testNs)),
+					},
+				},
+			},
+		}
+		return RouteRule{
+			ValidMatches: true,
+			Filters: RouteRuleFilters{
+				Filters: []Filter{},
+				Valid:   true,
+			},
+			BackendRefs:      refs,
+			Matches:          matches,
+			RouteBackendRefs: rbrs,
+		}
+	}
+
 	routeMatches := []gatewayv1.HTTPRouteMatch{
 		{
 			Path: &gatewayv1.HTTPPathMatch{
@@ -338,6 +378,32 @@ func TestBuildGraph(t *testing.T) {
 		},
 	}
 
+	inferencePool := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: testNs,
+			Name:      "ipool",
+		},
+		Spec: inference.InferencePoolSpec{
+			TargetPorts: []inference.Port{
+				{Number: 80},
+			},
+		},
+	}
+
+	ir := createRoute("ir", "gateway-1", "listener-80-1")
+	ir.Spec.Hostnames = []gatewayv1.Hostname{"inference.example.com"}
+	// Update the backend ref to point to the InferencePool instead of a Service
+	ir.Spec.Rules[0].BackendRefs[0] = gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+				Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+				Name:      gatewayv1.ObjectName(inferencePool.Name),
+				Namespace: helpers.GetPointer(gatewayv1.Namespace(inferencePool.Namespace)),
+			},
+		},
+	}
+
 	secret := &v1.Secret{
 		TypeMeta: metav1.TypeMeta{
 			Kind: "Secret",
@@ -489,7 +555,20 @@ func TestBuildGraph(t *testing.T) {
 
 	svc1 := &v1.Service{
 		ObjectMeta: metav1.ObjectMeta{
-			Namespace: "test", Name: "foo2",
+			Namespace: testNs, Name: "foo2",
+		},
+		Spec: v1.ServiceSpec{
+			Ports: []v1.ServicePort{
+				{
+					Port: 80,
+				},
+			},
+		},
+	}
+
+	inferenceSvc := &v1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: testNs, Name: controller.CreateInferencePoolServiceName(inferencePool.Name),
 		},
 		Spec: v1.ServiceSpec{
 			Ports: []v1.ServicePort{
@@ -691,6 +770,7 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(hr1): hr1,
 				client.ObjectKeyFromObject(hr2): hr2,
 				client.ObjectKeyFromObject(hr3): hr3,
+				client.ObjectKeyFromObject(ir):  ir,
 			},
 			TLSRoutes: map[types.NamespacedName]*v1alpha2.TLSRoute{
 				client.ObjectKeyFromObject(tr):  tr,
@@ -700,8 +780,12 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(gr): gr,
 			},
 			Services: map[types.NamespacedName]*v1.Service{
-				client.ObjectKeyFromObject(svc):  svc,
-				client.ObjectKeyFromObject(svc1): svc1,
+				client.ObjectKeyFromObject(svc):          svc,
+				client.ObjectKeyFromObject(svc1):         svc1,
+				client.ObjectKeyFromObject(inferenceSvc): inferenceSvc,
+			},
+			InferencePools: map[types.NamespacedName]*inference.InferencePool{
+				client.ObjectKeyFromObject(inferencePool): inferencePool,
 			},
 			Namespaces: map[types.NamespacedName]*v1.Namespace{
 				client.ObjectKeyFromObject(ns): ns,
@@ -992,6 +1076,37 @@ func TestBuildGraph(t *testing.T) {
 		},
 	}
 
+	inferenceRoute := &L7Route{
+		RouteType:  RouteTypeHTTP,
+		Valid:      true,
+		Attachable: true,
+		Source:     ir,
+		ParentRefs: []ParentRef{
+			{
+				Idx: 0,
+				Gateway: &ParentRefGateway{
+					NamespacedName:      client.ObjectKeyFromObject(gw1.Source),
+					EffectiveNginxProxy: np1Effective,
+				},
+				SectionName: ir.Spec.ParentRefs[0].SectionName,
+				Attachment: &ParentRefAttachmentStatus{
+					Attached: true,
+					AcceptedHostnames: map[string][]string{
+						CreateGatewayListenerKey(
+							client.ObjectKeyFromObject(gw1.Source),
+							"listener-80-1",
+						): {"inference.example.com"},
+					},
+					ListenerPort: 80,
+				},
+			},
+		},
+		Spec: L7RouteSpec{
+			Hostnames: ir.Spec.Hostnames,
+			Rules:     []RouteRule{createValidRuleWithInferencePoolBackendRef(routeMatches)},
+		},
+	}
+
 	supportedKindsForListeners := []gatewayv1.RouteGroupKind{
 		{Kind: gatewayv1.Kind(kinds.HTTPRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)},
 		{Kind: gatewayv1.Kind(kinds.GRPCRoute), Group: helpers.GetPointer[gatewayv1.Group](gatewayv1.GroupName)},
@@ -1021,6 +1136,7 @@ func TestBuildGraph(t *testing.T) {
 							Routes: map[RouteKey]*L7Route{
 								CreateRouteKey(hr1): routeHR1,
 								CreateRouteKey(gr):  routeGR,
+								CreateRouteKey(ir):  inferenceRoute,
 							},
 							SupportedKinds:            supportedKindsForListeners,
 							L4Routes:                  map[L4RouteKey]*L4Route{},
@@ -1175,6 +1291,7 @@ func TestBuildGraph(t *testing.T) {
 				CreateRouteKey(hr1): routeHR1,
 				CreateRouteKey(hr3): routeHR3,
 				CreateRouteKey(gr):  routeGR,
+				CreateRouteKey(ir):  inferenceRoute,
 			},
 			L4Routes: map[L4RouteKey]*L4Route{
 				CreateRouteKeyL4(tr):  routeTR,
@@ -1199,6 +1316,14 @@ func TestBuildGraph(t *testing.T) {
 				client.ObjectKeyFromObject(svc1): {
 					GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}},
 				},
+				client.ObjectKeyFromObject(inferenceSvc): {
+					GatewayNsNames: map[types.NamespacedName]struct{}{{Namespace: testNs, Name: "gateway-1"}: {}},
+				},
+			},
+			ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
+				client.ObjectKeyFromObject(inferencePool): {
+					Source: inferencePool,
+				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
 				client.ObjectKeyFromObject(cm): {
@@ -1382,6 +1507,20 @@ func TestIsReferenced(t *testing.T) {
 	}
 	emptyService := &v1.Service{}
 
+	inferenceInGraph := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: "default",
+			Name:      "inferenceInGraph",
+		},
+	}
+	inferenceNotInGraph := &inference.InferencePool{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace: "default",
+			Name:      "inferenceNotInGraph",
+		},
+	}
+	emptyInferencePool := &inference.InferencePool{}
+
 	createEndpointSlice := func(name string, svcName string) *discoveryV1.EndpointSlice {
 		return &discoveryV1.EndpointSlice{
 			ObjectMeta: metav1.ObjectMeta{
@@ -1461,6 +1600,9 @@ func TestIsReferenced(t *testing.T) {
 		ReferencedServices: map[types.NamespacedName]*ReferencedService{
 			client.ObjectKeyFromObject(serviceInGraph): {},
 		},
+		ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
+			client.ObjectKeyFromObject(inferenceInGraph): {},
+		},
 		ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
 			client.ObjectKeyFromObject(baseConfigMap): {
 				Source: baseConfigMap,
@@ -1562,6 +1704,26 @@ func TestIsReferenced(t *testing.T) {
 			expected: false,
 		},
 
+		// InferencePool tests
+		{
+			name:     "InferencePool is referenced",
+			resource: inferenceInGraph,
+			graph:    graph,
+			expected: true,
+		},
+		{
+			name:     "InferencePool is not referenced",
+			resource: inferenceNotInGraph,
+			graph:    graph,
+			expected: false,
+		},
+		{
+			name:     "Empty InferencePool",
+			resource: emptyInferencePool,
+			graph:    graph,
+			expected: false,
+		},
+
 		// EndpointSlice tests
 		{
 			name:     "EndpointSlice with Service owner in graph's ReferencedServices is referenced",
diff --git a/internal/controller/state/graph/grpcroute_test.go b/internal/controller/state/graph/grpcroute_test.go
index e78f22fe53..032b6a891f 100644
--- a/internal/controller/state/graph/grpcroute_test.go
+++ b/internal/controller/state/graph/grpcroute_test.go
@@ -232,6 +232,7 @@ func TestBuildGRPCRoutes(t *testing.T) {
 				grRoutes,
 				test.gateways,
 				snippetsFilters,
+				nil,
 			)
 			g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty())
 		})
diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go
index 0fbf0cdb49..7ced07a3ff 100644
--- a/internal/controller/state/graph/httproute.go
+++ b/internal/controller/state/graph/httproute.go
@@ -7,13 +7,16 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/mirror"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/validation"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
 var (
@@ -27,6 +30,7 @@ func buildHTTPRoute(
 	ghr *v1.HTTPRoute,
 	gws map[types.NamespacedName]*Gateway,
 	snippetsFilters map[types.NamespacedName]*SnippetsFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) *L7Route {
 	r := &L7Route{
 		Source:    ghr,
@@ -62,6 +66,8 @@ func buildHTTPRoute(
 		ghr.Spec.Rules,
 		validator,
 		getSnippetsFilterResolverForNamespace(snippetsFilters, r.Source.GetNamespace()),
+		inferencePools,
+		r.Source.GetNamespace(),
 	)
 
 	r.Spec.Rules = rules
@@ -113,6 +119,7 @@ func buildHTTPMirrorRoutes(
 					tmpMirrorRoute,
 					gateways,
 					snippetsFilters,
+					nil,
 				)
 
 				if mirrorRoute != nil {
@@ -166,6 +173,8 @@ func processHTTPRouteRule(
 	rulePath *field.Path,
 	validator validation.HTTPFieldsValidator,
 	resolveExtRefFunc resolveExtRefFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+	routeNamespace string,
 ) (RouteRule, routeRuleErrors) {
 	var errors routeRuleErrors
 
@@ -206,10 +215,32 @@ func processHTTPRouteRule(
 				interfaceFilters = append(interfaceFilters, filter)
 			}
 		}
-		rbr := RouteBackendRef{
-			BackendRef: b.BackendRef,
-			Filters:    interfaceFilters,
+
+		var rbr RouteBackendRef
+		// If route specifies an InferencePool backend, we need to convert it to its associated
+		// headless Service backend (that we created), so nginx config can be built properly.
+		// Only do this if the InferencePool actually exists.
+		if inferencePoolBackend(b, routeNamespace, inferencePools) {
+			svcName := controller.CreateInferencePoolServiceName(string(b.Name))
+			rbr = RouteBackendRef{
+				IsInferencePool: true,
+				BackendRef: v1.BackendRef{
+					BackendObjectReference: v1.BackendObjectReference{
+						Group:     helpers.GetPointer[v1.Group](""),
+						Kind:      helpers.GetPointer[v1.Kind](kinds.Service),
+						Name:      v1.ObjectName(svcName),
+						Namespace: b.Namespace,
+					},
+					Weight: b.Weight,
+				},
+			}
+		} else {
+			rbr = RouteBackendRef{
+				BackendRef: b.BackendRef,
+			}
 		}
+
+		rbr.Filters = interfaceFilters
 		backendRefs = append(backendRefs, rbr)
 	}
 
@@ -241,6 +272,8 @@ func processHTTPRouteRules(
 	specRules []v1.HTTPRouteRule,
 	validator validation.HTTPFieldsValidator,
 	resolveExtRefFunc resolveExtRefFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+	routeNamespace string,
 ) (rules []RouteRule, valid bool, conds []conditions.Condition) {
 	rules = make([]RouteRule, len(specRules))
 
@@ -257,6 +290,8 @@ func processHTTPRouteRules(
 			rulePath,
 			validator,
 			resolveExtRefFunc,
+			inferencePools,
+			routeNamespace,
 		)
 
 		if rr.ValidMatches && rr.Filters.Valid {
@@ -298,6 +333,32 @@ func processHTTPRouteRules(
 	return rules, valid, conds
 }
 
+// inferencePoolBackend returns if a Route references an InferencePool backend
+// and that InferencePool exists.
+func inferencePoolBackend(
+	backendRef v1.HTTPBackendRef,
+	routeNamespace string,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) bool {
+	if backendRef.Group != nil &&
+		*backendRef.Group == inferenceAPIGroup &&
+		*backendRef.Kind == kinds.InferencePool {
+		namespace := routeNamespace
+		if backendRef.Namespace != nil {
+			namespace = string(*backendRef.Namespace)
+		}
+		key := types.NamespacedName{
+			Name:      string(backendRef.Name),
+			Namespace: namespace,
+		}
+		if _, exists := inferencePools[key]; exists {
+			return true
+		}
+	}
+
+	return false
+}
+
 func validateMatch(
 	validator validation.HTTPFieldsValidator,
 	match v1.HTTPRouteMatch,
diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go
index f765ec78d4..ce60506f57 100644
--- a/internal/controller/state/graph/httproute_test.go
+++ b/internal/controller/state/graph/httproute_test.go
@@ -9,6 +9,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
 	ngfAPI "github.com/nginx/nginx-gateway-fabric/v2/apis/v1alpha1"
@@ -118,6 +119,17 @@ var expRouteBackendRef = RouteBackendRef{
 	},
 }
 
+func createInferencePoolBackend(name, namespace string) gatewayv1.BackendRef {
+	return gatewayv1.BackendRef{
+		BackendObjectReference: gatewayv1.BackendObjectReference{
+			Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+			Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+			Name:      gatewayv1.ObjectName(name),
+			Namespace: helpers.GetPointer(gatewayv1.Namespace(namespace)),
+		},
+	}
+}
+
 func TestBuildHTTPRoutes(t *testing.T) {
 	t.Parallel()
 
@@ -264,6 +276,7 @@ func TestBuildHTTPRoutes(t *testing.T) {
 				map[types.NamespacedName]*gatewayv1.GRPCRoute{},
 				test.gateways,
 				snippetsFilters,
+				nil,
 			)
 			g.Expect(helpers.Diff(test.expected, routes)).To(BeEmpty())
 		})
@@ -382,6 +395,21 @@ func TestBuildHTTPRoute(t *testing.T) {
 	addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", invalidSnippetsFilterExtRef)
 	addFilterToPath(hrInvalidAndUnresolvableSnippetsFilter, "/filter", unresolvableSnippetsFilterExtRef)
 
+	// routes with an inference pool backend
+	hrInferencePool := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/")
+	hrInferencePool.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{
+		{
+			BackendRef: createInferencePoolBackend("ipool", gatewayNsName.Namespace),
+		},
+	}
+	// route with an inference pool backend that does not exist
+	hrInferencePoolDoesNotExist := createHTTPRoute("hr", gatewayNsName.Name, "example.com", "/")
+	hrInferencePoolDoesNotExist.Spec.Rules[0].BackendRefs = []gatewayv1.HTTPBackendRef{
+		{
+			BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace),
+		},
+	}
+
 	validatorInvalidFieldsInRule := &validationfakes.FakeHTTPFieldsValidator{
 		ValidatePathInMatchStub: func(path string) error {
 			if path == invalidPath {
@@ -983,6 +1011,86 @@ func TestBuildHTTPRoute(t *testing.T) {
 			},
 			name: "valid route with unsupported field",
 		},
+		{
+			validator: &validationfakes.FakeHTTPFieldsValidator{},
+			hr:        hrInferencePool,
+			expected: &L7Route{
+				RouteType: RouteTypeHTTP,
+				Source:    hrInferencePool,
+				ParentRefs: []ParentRef{
+					{
+						Idx:         0,
+						Gateway:     CreateParentRefGateway(gw),
+						SectionName: hrInferencePool.Spec.ParentRefs[0].SectionName,
+					},
+				},
+				Valid:      true,
+				Attachable: true,
+				Spec: L7RouteSpec{
+					Hostnames: hrInferencePool.Spec.Hostnames,
+					Rules: []RouteRule{
+						{
+							ValidMatches: true,
+							Filters: RouteRuleFilters{
+								Valid:   true,
+								Filters: []Filter{},
+							},
+							Matches: hrInferencePool.Spec.Rules[0].Matches,
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									IsInferencePool: true,
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Group:     helpers.GetPointer[gatewayv1.Group](""),
+											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+											Name:      "ipool-pool-svc",
+											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			name: "route with an inference pool backend gets converted to service",
+		},
+		{
+			validator: &validationfakes.FakeHTTPFieldsValidator{},
+			hr:        hrInferencePoolDoesNotExist,
+			expected: &L7Route{
+				RouteType: RouteTypeHTTP,
+				Source:    hrInferencePoolDoesNotExist,
+				ParentRefs: []ParentRef{
+					{
+						Idx:         0,
+						Gateway:     CreateParentRefGateway(gw),
+						SectionName: hrInferencePoolDoesNotExist.Spec.ParentRefs[0].SectionName,
+					},
+				},
+				Valid:      true,
+				Attachable: true,
+				Spec: L7RouteSpec{
+					Hostnames: hrInferencePoolDoesNotExist.Spec.Hostnames,
+					Rules: []RouteRule{
+						{
+							ValidMatches: true,
+							Filters: RouteRuleFilters{
+								Valid:   true,
+								Filters: []Filter{},
+							},
+							Matches: hrInferencePoolDoesNotExist.Spec.Rules[0].Matches,
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									BackendRef: createInferencePoolBackend("ipool-does-not-exist", gatewayNsName.Namespace),
+								},
+							},
+						},
+					},
+				},
+			},
+			name: "route with an inference pool backend that doesn't exist",
+		},
 	}
 
 	gws := map[types.NamespacedName]*Gateway{
@@ -997,8 +1105,11 @@ func TestBuildHTTPRoute(t *testing.T) {
 			snippetsFilters := map[types.NamespacedName]*SnippetsFilter{
 				{Namespace: "test", Name: "sf"}: {Valid: true},
 			}
+			inferencePools := map[types.NamespacedName]*inference.InferencePool{
+				{Namespace: "test", Name: "ipool"}: {},
+			}
 
-			route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters)
+			route := buildHTTPRoute(test.validator, test.hr, gws, snippetsFilters, inferencePools)
 			g.Expect(helpers.Diff(test.expected, route)).To(BeEmpty())
 		})
 	}
@@ -1130,7 +1241,7 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) {
 	g := NewWithT(t)
 
 	routes := map[RouteKey]*L7Route{}
-	l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters)
+	l7route := buildHTTPRoute(validator, hr, gateways, snippetsFilters, nil)
 	g.Expect(l7route).NotTo(BeNil())
 
 	buildHTTPMirrorRoutes(routes, l7route, hr, gateways, snippetsFilters)
@@ -1772,6 +1883,7 @@ func TestUnsupportedFieldsErrors(t *testing.T) {
 
 func TestProcessHTTPRouteRules_UnsupportedFields(t *testing.T) {
 	t.Parallel()
+	routeNamespace := "test"
 
 	tests := []struct {
 		name          string
@@ -1833,6 +1945,8 @@ func TestProcessHTTPRouteRules_UnsupportedFields(t *testing.T) {
 				test.specRules,
 				validation.SkipValidator{},
 				nil,
+				nil,
+				routeNamespace,
 			)
 
 			g.Expect(valid).To(Equal(test.expectedValid))
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
new file mode 100644
index 0000000000..ada688bcc5
--- /dev/null
+++ b/internal/controller/state/graph/inferencepools.go
@@ -0,0 +1,82 @@
+package graph
+
+import (
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
+)
+
+// A ReferencedInferencePool represents an InferencePool that is referenced by a Route and the
+// Gateways it belongs to.
+type ReferencedInferencePool struct {
+	// Source is the original InferencePool that this ReferencedInferencePool is based on.
+	Source *inference.InferencePool
+}
+
+// buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
+// per Gateway that we process.
+func buildReferencedInferencePools(
+	routes map[RouteKey]*L7Route,
+	gws map[types.NamespacedName]*Gateway,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) map[types.NamespacedName]*ReferencedInferencePool {
+	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool)
+
+	for _, gw := range gws {
+		if gw == nil {
+			continue
+		}
+
+		processInferencePoolsForGateway(routes, gw, referencedInferencePools, inferencePools)
+	}
+
+	if len(referencedInferencePools) == 0 {
+		return nil
+	}
+
+	return referencedInferencePools
+}
+
+// processInferencePoolsForGateway processes all InferencePools that belong to the given gateway.
+func processInferencePoolsForGateway(
+	routes map[RouteKey]*L7Route,
+	gw *Gateway,
+	referencedInferencePools map[types.NamespacedName]*ReferencedInferencePool,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
+) {
+	gwKey := client.ObjectKeyFromObject(gw.Source)
+	for _, route := range routes {
+		if !route.Valid || !routeBelongsToGateway(route.ParentRefs, gwKey) {
+			continue
+		}
+
+		for _, rule := range route.Spec.Rules {
+			for _, ref := range rule.RouteBackendRefs {
+				if !ref.IsInferencePool && (ref.Kind == nil || *ref.Kind != kinds.InferencePool) {
+					continue
+				}
+
+				namespace := route.Source.GetNamespace()
+				if ref.Namespace != nil {
+					namespace = string(*ref.Namespace)
+				}
+
+				poolName := types.NamespacedName{
+					Name:      controller.GetInferencePoolName(string(ref.Name)),
+					Namespace: namespace,
+				}
+
+				if _, referenced := referencedInferencePools[poolName]; !referenced {
+					referencedInferencePools[poolName] = &ReferencedInferencePool{}
+				}
+
+				if pool, exists := inferencePools[poolName]; exists {
+					referencedInferencePools[poolName].Source = pool
+				}
+			}
+		}
+	}
+}
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
new file mode 100644
index 0000000000..d67331b5e7
--- /dev/null
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -0,0 +1,249 @@
+package graph
+
+import (
+	"testing"
+
+	. "github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
+)
+
+func TestBuildReferencedInferencePools(t *testing.T) {
+	t.Parallel()
+
+	gwNsName := types.NamespacedName{Namespace: "test", Name: "gwNsname"}
+	gws := map[types.NamespacedName]*Gateway{
+		gwNsName: {
+			Source: &gatewayv1.Gateway{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: gwNsName.Namespace,
+					Name:      gwNsName.Name,
+				},
+			},
+		},
+	}
+
+	getNormalRoute := func() *L7Route {
+		return &L7Route{
+			Source: &gatewayv1.HTTPRoute{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "valid-route",
+				},
+			},
+			ParentRefs: []ParentRef{
+				{
+					Gateway: &ParentRefGateway{NamespacedName: gwNsName},
+				},
+			},
+			Valid: true,
+			Spec: L7RouteSpec{
+				Rules: []RouteRule{
+					{
+						RouteBackendRefs: []RouteBackendRef{
+							{
+								IsInferencePool: true,
+								BackendRef: gatewayv1.BackendRef{
+									BackendObjectReference: gatewayv1.BackendObjectReference{
+										Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										Name:      "pool",
+										Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+	}
+
+	getModifiedRoute := func(mod func(route *L7Route) *L7Route) *L7Route {
+		return mod(getNormalRoute())
+	}
+
+	validRoute := getNormalRoute()
+
+	invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Valid = false
+		return route
+	})
+
+	tests := []struct {
+		routes         map[RouteKey]*L7Route
+		gws            map[types.NamespacedName]*Gateway
+		inferencePools map[types.NamespacedName]*inference.InferencePool
+		expPools       map[types.NamespacedName]*ReferencedInferencePool
+		name           string
+	}{
+		{
+			name: "no gateways",
+			gws:  nil,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "invalid route",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): invalidRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "valid route with referenced inferencepool",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "route with service backend",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules = []RouteRule{
+						{
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+										},
+									},
+								},
+							},
+						},
+					}
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: nil,
+		},
+		{
+			name: "route with both inferencepool and service backends",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
+						RouteBackendRef{
+							BackendRef: gatewayv1.BackendRef{
+								BackendObjectReference: gatewayv1.BackendObjectReference{
+									Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+								},
+							},
+						},
+					)
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "route with headless InferencePool Service backend",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules = []RouteRule{
+						{
+							RouteBackendRefs: []RouteBackendRef{
+								{
+									IsInferencePool: true,
+									BackendRef: gatewayv1.BackendRef{
+										BackendObjectReference: gatewayv1.BackendObjectReference{
+											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+											Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
+											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+										},
+									},
+								},
+							},
+						},
+					}
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "inferencepool backend with no namespace uses route namespace",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
+					route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
+					return route
+				}),
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				},
+			},
+		},
+		{
+			name: "referenced inferencepool does not exist",
+			gws:  gws,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(validRoute.Source): validRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: nil,
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools)
+			g.Expect(pools).To(Equal(test.expPools))
+		})
+	}
+}
diff --git a/internal/controller/state/graph/reference_grant.go b/internal/controller/state/graph/reference_grant.go
index b827d47024..3fa04ecc7a 100644
--- a/internal/controller/state/graph/reference_grant.go
+++ b/internal/controller/state/graph/reference_grant.go
@@ -51,7 +51,16 @@ func toSecret(nsname types.NamespacedName) toResource {
 
 func toService(nsname types.NamespacedName) toResource {
 	return toResource{
-		kind:      "Service",
+		kind:      kinds.Service,
+		name:      nsname.Name,
+		namespace: nsname.Namespace,
+	}
+}
+
+func toInferencePool(nsname types.NamespacedName) toResource {
+	return toResource{
+		group:     inferenceAPIGroup,
+		kind:      kinds.InferencePool,
 		name:      nsname.Name,
 		namespace: nsname.Namespace,
 	}
@@ -139,6 +148,7 @@ func (r *referenceGrantResolver) refAllowed(to toResource, from fromResource) bo
 	// of the particular kind in the namespace
 	allInNamespaceKey := allowedReference{
 		to: toResource{
+			group:     to.group,
 			kind:      to.kind,
 			namespace: to.namespace,
 		},
diff --git a/internal/controller/state/graph/reference_grant_test.go b/internal/controller/state/graph/reference_grant_test.go
index 21fee614e1..bf97f22c25 100644
--- a/internal/controller/state/graph/reference_grant_test.go
+++ b/internal/controller/state/graph/reference_grant_test.go
@@ -189,7 +189,7 @@ func TestToService(t *testing.T) {
 	ref := toService(types.NamespacedName{Namespace: "ns", Name: "service"})
 
 	exp := toResource{
-		kind:      "Service",
+		kind:      kinds.Service,
 		namespace: "ns",
 		name:      "service",
 	}
@@ -198,6 +198,21 @@ func TestToService(t *testing.T) {
 	g.Expect(ref).To(Equal(exp))
 }
 
+func TestToInferencePool(t *testing.T) {
+	t.Parallel()
+	ref := toInferencePool(types.NamespacedName{Namespace: "ns", Name: "inference-pool"})
+
+	exp := toResource{
+		group:     inferenceAPIGroup,
+		kind:      kinds.InferencePool,
+		namespace: "ns",
+		name:      "inference-pool",
+	}
+
+	g := NewWithT(t)
+	g.Expect(ref).To(Equal(exp))
+}
+
 func TestFromGateway(t *testing.T) {
 	t.Parallel()
 	ref := fromGateway("ns")
@@ -306,7 +321,24 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
+					},
+				},
+			},
+		},
+		{Namespace: allowedHTTPRouteNs, Name: "hr-2-ipool"}: {
+			Spec: v1beta1.ReferenceGrantSpec{
+				From: []v1beta1.ReferenceGrantFrom{
+					{
+						Group:     v1beta1.GroupName,
+						Kind:      kinds.HTTPRoute,
+						Namespace: v1beta1.Namespace(hrNs),
+					},
+				},
+				To: []v1beta1.ReferenceGrantTo{
+					{
+						Group: inferenceAPIGroup,
+						Kind:  kinds.InferencePool,
 					},
 				},
 			},
@@ -322,7 +354,7 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
 					},
 				},
 			},
@@ -338,7 +370,7 @@ func TestRefAllowedFrom(t *testing.T) {
 				},
 				To: []v1beta1.ReferenceGrantTo{
 					{
-						Kind: "Service",
+						Kind: kinds.Service,
 					},
 				},
 			},
@@ -375,6 +407,18 @@ func TestRefAllowedFrom(t *testing.T) {
 			toResource:     toService(notAllowedNsName),
 			expAllowed:     false,
 		},
+		{
+			name:           "ref allowed from httproute to inferencepool",
+			refAllowedFrom: fromHTTPRoute(hrNs),
+			toResource:     toInferencePool(allowedHTTPRouteNsName),
+			expAllowed:     true,
+		},
+		{
+			name:           "ref not allowed from httproute to inferencepool",
+			refAllowedFrom: fromHTTPRoute(hrNs),
+			toResource:     toInferencePool(notAllowedNsName),
+			expAllowed:     false,
+		},
 		{
 			name:           "ref allowed from grpcroute to service",
 			refAllowedFrom: fromGRPCRoute(grNs),
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index ba7e117c46..f7429a594a 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -10,6 +10,7 @@ import (
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	v1alpha "sigs.k8s.io/gateway-api/apis/v1alpha2"
 
@@ -19,7 +20,10 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
 
-const wildcardHostname = "~^"
+const (
+	wildcardHostname  = "~^"
+	inferenceAPIGroup = "inference.networking.k8s.io"
+)
 
 // ParentRef describes a reference to a parent in a Route.
 type ParentRef struct {
@@ -157,11 +161,15 @@ type RouteRule struct {
 
 // RouteBackendRef is a wrapper for v1.BackendRef and any BackendRef filters from the HTTPRoute or GRPCRoute.
 type RouteBackendRef struct {
+	v1.BackendRef
+
 	// If this backend is defined in a RequestMirror filter, this value will indicate the filter's index.
 	MirrorBackendIdx *int
 
-	v1.BackendRef
 	Filters []any
+
+	// IsInferencePool indicates if this backend is an InferencePool disguised as a Service.
+	IsInferencePool bool
 }
 
 // CreateRouteKey takes a client.Object and creates a RouteKey.
@@ -244,6 +252,7 @@ func buildRoutesForGateways(
 	grpcRoutes map[types.NamespacedName]*v1.GRPCRoute,
 	gateways map[types.NamespacedName]*Gateway,
 	snippetsFilters map[types.NamespacedName]*SnippetsFilter,
+	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) map[RouteKey]*L7Route {
 	if len(gateways) == 0 {
 		return nil
@@ -252,7 +261,7 @@ func buildRoutesForGateways(
 	routes := make(map[RouteKey]*L7Route)
 
 	for _, route := range httpRoutes {
-		r := buildHTTPRoute(validator, route, gateways, snippetsFilters)
+		r := buildHTTPRoute(validator, route, gateways, snippetsFilters, inferencePools)
 		if r == nil {
 			continue
 		}
diff --git a/internal/controller/state/graph/service.go b/internal/controller/state/graph/service.go
index d43ecacfd8..3a702facc9 100644
--- a/internal/controller/state/graph/service.go
+++ b/internal/controller/state/graph/service.go
@@ -34,7 +34,6 @@ func buildReferencedServices(
 		}
 
 		processL7RoutesForGateway(l7routes, gw, gwNsName, referencedServices, services)
-
 		processL4RoutesForGateway(l4Routes, gw, gwNsName, referencedServices, services)
 	}
 
diff --git a/internal/framework/controller/resource.go b/internal/framework/controller/resource.go
index a0d49e3789..d17662169e 100644
--- a/internal/framework/controller/resource.go
+++ b/internal/framework/controller/resource.go
@@ -2,10 +2,31 @@ package controller
 
 import (
 	"fmt"
+	"strings"
 )
 
+// inferencePoolServiceSuffix is the suffix of the headless Service name for an InferencePool.
+const inferencePoolServiceSuffix = "-pool-svc"
+
 // CreateNginxResourceName creates the base resource name for all nginx resources
 // created by the control plane.
 func CreateNginxResourceName(prefix, suffix string) string {
 	return fmt.Sprintf("%s-%s", prefix, suffix)
 }
+
+// CreateInferencePoolServiceName creates the name for a headless Service that
+// we create for an InferencePool.
+func CreateInferencePoolServiceName(name string) string {
+	svcName := fmt.Sprintf("%s%s", name, inferencePoolServiceSuffix)
+	// if InferencePool name is already at or near max length, just use that name
+	if len(svcName) > 253 {
+		return name
+	}
+
+	return svcName
+}
+
+// GetInferencePoolName returns the name of the InferencePool for a given headless Service name.
+func GetInferencePoolName(serviceName string) string {
+	return strings.TrimSuffix(serviceName, inferencePoolServiceSuffix)
+}
diff --git a/internal/framework/kinds/kinds.go b/internal/framework/kinds/kinds.go
index 35ca8e2b00..b59b06df96 100644
--- a/internal/framework/kinds/kinds.go
+++ b/internal/framework/kinds/kinds.go
@@ -25,6 +25,12 @@ const (
 	BackendTLSPolicy = "BackendTLSPolicy"
 )
 
+// Gateway API Inference Extension kinds.
+const (
+	// InferencePool is the InferencePool kind.
+	InferencePool = "InferencePool"
+)
+
 // Core API Kinds.
 const (
 	// Service is the Service kind.
diff --git a/tests/go.mod b/tests/go.mod
index 892675abdb..297e8b95a6 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -68,7 +68,7 @@ require (
 	golang.org/x/sys v0.36.0 // indirect
 	golang.org/x/term v0.35.0 // indirect
 	golang.org/x/text v0.30.0 // indirect
-	golang.org/x/time v0.9.0 // indirect
+	golang.org/x/time v0.12.0 // indirect
 	golang.org/x/tools v0.37.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index be0966140a..b24e1ba391 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -213,8 +213,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
-golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
-golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
 golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=

From e2b3252925d32560af0318ebdc0f1ddca079ddcb Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Thu, 18 Sep 2025 12:38:25 -0600
Subject: [PATCH 03/10] Add golang shim for comms with EPP (#3930)

Problem: In order for NGINX to get the endpoint of the AI workload from the EndpointPicker, it needs to send a gRPC request using the proper protobuf protocol.

Solution: A simple Go server is injected as an additional container when the inference extension feature is enabled, that will listen for a request from our (upcoming) NJS module, and forward to the configured EPP to get a response in a header.
---
 cmd/gateway/commands.go                       |  14 +
 cmd/gateway/endpoint_picker.go                | 190 +++++++++++++
 cmd/gateway/endpoint_picker_test.go           | 261 ++++++++++++++++++
 cmd/gateway/main.go                           |   1 +
 go.mod                                        |   4 +
 go.sum                                        |   8 +
 internal/controller/manager.go                |   1 +
 internal/controller/provisioner/objects.go    |  25 ++
 .../controller/provisioner/objects_test.go    |  54 ++++
 .../controller/provisioner/provisioner.go     |   1 +
 10 files changed, 559 insertions(+)
 create mode 100644 cmd/gateway/endpoint_picker.go
 create mode 100644 cmd/gateway/endpoint_picker_test.go

diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index a38cef2dd6..a152ea09dd 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -757,6 +757,20 @@ func createSleepCommand() *cobra.Command {
 	return cmd
 }
 
+func createEndpointPickerCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "endpoint-picker",
+		Short: "Shim server for communication between NGINX and the Gateway API Inference Extension Endpoint Picker",
+		RunE: func(_ *cobra.Command, _ []string) error {
+			logger := ctlrZap.New().WithName("endpoint-picker-shim")
+			handler := createEndpointPickerHandler(realExtProcClientFactory(), logger)
+			return endpointPickerServer(handler)
+		},
+	}
+
+	return cmd
+}
+
 func parseFlags(flags *pflag.FlagSet) ([]string, []string) {
 	var flagKeys, flagValues []string
 
diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
new file mode 100644
index 0000000000..7c67a83671
--- /dev/null
+++ b/cmd/gateway/endpoint_picker.go
@@ -0,0 +1,190 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"time"
+
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/go-logr/logr"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+)
+
+const (
+	// defaultPort is the default port for this server to listen on. If collisions become a problem,
+	// we can make this configurable via the NginxProxy resource.
+	defaultPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
+	// eppEndpointHostHeader is the HTTP header used to specify the EPP endpoint host, set by the NJS module caller.
+	eppEndpointHostHeader = "X-EPP-Host"
+	// eppEndpointPortHeader is the HTTP header used to specify the EPP endpoint port, set by the NJS module caller.
+	eppEndpointPortHeader = "X-EPP-Port"
+)
+
+// extProcClientFactory creates a new ExternalProcessorClient and returns a close function.
+type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient, func() error, error)
+
+// endpointPickerServer starts an HTTP server on the given port with the provided handler.
+func endpointPickerServer(handler http.Handler) error {
+	server := &http.Server{
+		Addr:              fmt.Sprintf("127.0.0.1:%d", defaultPort),
+		Handler:           handler,
+		ReadHeaderTimeout: 10 * time.Second,
+	}
+	return server.ListenAndServe()
+}
+
+// realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
+func realExtProcClientFactory() extProcClientFactory {
+	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		if err != nil {
+			return nil, nil, err
+		}
+		client := extprocv3.NewExternalProcessorClient(conn)
+		return client, conn.Close, nil
+	}
+}
+
+// createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker.
+func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		host := r.Header.Get(eppEndpointHostHeader)
+		port := r.Header.Get(eppEndpointPortHeader)
+		if host == "" || port == "" {
+			msg := fmt.Sprintf(
+				"missing at least one of required headers: %s and %s",
+				eppEndpointHostHeader,
+				eppEndpointPortHeader,
+			)
+			logger.Error(errors.New(msg), "error contacting EndpointPicker")
+			http.Error(w, msg, http.StatusBadRequest)
+			return
+		}
+
+		target := net.JoinHostPort(host, port)
+		logger.Info("Getting inference workload endpoint from EndpointPicker", "endpointPicker", target)
+
+		client, closeConn, err := factory(target)
+		if err != nil {
+			logger.Error(err, "error creating gRPC client")
+			http.Error(w, fmt.Sprintf("error creating gRPC client: %v", err), http.StatusInternalServerError)
+			return
+		}
+		defer func() {
+			if err := closeConn(); err != nil {
+				logger.Error(err, "error closing gRPC connection")
+			}
+		}()
+
+		stream, err := client.Process(r.Context())
+		if err != nil {
+			logger.Error(err, "error opening ext_proc stream")
+			http.Error(w, fmt.Sprintf("error opening ext_proc stream: %v", err), http.StatusBadGateway)
+			return
+		}
+
+		if code, err := sendRequest(stream, r); err != nil {
+			logger.Error(err, "error sending request")
+			http.Error(w, err.Error(), code)
+			return
+		}
+
+		// Receive response and extract header
+		for {
+			resp, err := stream.Recv()
+			if errors.Is(err, io.EOF) {
+				break // End of stream
+			} else if err != nil {
+				logger.Error(err, "error receiving from ext_proc")
+				http.Error(w, fmt.Sprintf("error receiving from ext_proc: %v", err), http.StatusBadGateway)
+				return
+			}
+
+			if ir := resp.GetImmediateResponse(); ir != nil {
+				code := int(ir.GetStatus().GetCode())
+				body := ir.GetBody()
+				logger.Error(fmt.Errorf("code: %d, body: %s", code, body), "received immediate response")
+				http.Error(w, string(body), code)
+				return
+			}
+
+			headers := resp.GetRequestHeaders().GetResponse().GetHeaderMutation().GetSetHeaders()
+			for _, h := range headers {
+				if h.GetHeader().GetKey() == eppMetadata.DestinationEndpointKey {
+					endpoint := string(h.GetHeader().GetRawValue())
+					w.Header().Set(h.GetHeader().GetKey(), endpoint)
+					logger.Info("Found endpoint", "endpoint", endpoint)
+				}
+			}
+		}
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+func sendRequest(stream extprocv3.ExternalProcessor_ProcessClient, r *http.Request) (int, error) {
+	if err := stream.Send(buildHeaderRequest(r)); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending headers: %w", err)
+	}
+
+	bodyReq, err := buildBodyRequest(r)
+	if err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error building body request: %w", err)
+	}
+
+	if err := stream.Send(bodyReq); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending body: %w", err)
+	}
+
+	if err := stream.CloseSend(); err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error closing stream: %w", err)
+	}
+
+	return 0, nil
+}
+
+func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
+	headerList := make([]*corev3.HeaderValue, 0, len(r.Header))
+	headerMap := &corev3.HeaderMap{
+		Headers: headerList,
+	}
+
+	for key, values := range r.Header {
+		for _, value := range values {
+			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
+				Key:   key,
+				Value: value,
+			})
+		}
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestHeaders{
+			RequestHeaders: &extprocv3.HttpHeaders{
+				Headers:     headerMap,
+				EndOfStream: false,
+			},
+		},
+	}
+}
+
+func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) {
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading request body: %w", err)
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestBody{
+			RequestBody: &extprocv3.HttpBody{
+				Body:        body,
+				EndOfStream: true,
+			},
+		},
+	}, nil
+}
diff --git a/cmd/gateway/endpoint_picker_test.go b/cmd/gateway/endpoint_picker_test.go
new file mode 100644
index 0000000000..99808348fc
--- /dev/null
+++ b/cmd/gateway/endpoint_picker_test.go
@@ -0,0 +1,261 @@
+package main
+
+import (
+	"context"
+	"errors"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
+	"github.com/go-logr/logr"
+	. "github.com/onsi/gomega"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+)
+
+type mockExtProcClient struct {
+	ProcessFunc func(
+		context.Context,
+		...grpc.CallOption,
+	) (extprocv3.ExternalProcessor_ProcessClient, error)
+}
+
+func (m *mockExtProcClient) Process(
+	ctx context.Context,
+	opts ...grpc.CallOption,
+) (extprocv3.ExternalProcessor_ProcessClient, error) {
+	if m.ProcessFunc != nil {
+		return m.ProcessFunc(ctx, opts...)
+	}
+	return nil, errors.New("not implemented")
+}
+
+type mockProcessClient struct {
+	SendFunc      func(*extprocv3.ProcessingRequest) error
+	RecvFunc      func() (*extprocv3.ProcessingResponse, error)
+	CloseSendFunc func() error
+	Ctx           context.Context
+}
+
+func (m *mockProcessClient) Send(req *extprocv3.ProcessingRequest) error {
+	if m.SendFunc != nil {
+		return m.SendFunc(req)
+	}
+	return nil
+}
+
+func (m *mockProcessClient) Recv() (*extprocv3.ProcessingResponse, error) {
+	if m.RecvFunc != nil {
+		return m.RecvFunc()
+	}
+	return nil, io.EOF
+}
+
+func (*mockProcessClient) RecvMsg(any) error { return nil }
+func (*mockProcessClient) SendMsg(any) error { return nil }
+
+func (m *mockProcessClient) CloseSend() error {
+	if m.CloseSendFunc != nil {
+		return m.CloseSendFunc()
+	}
+	return nil
+}
+
+func (m *mockProcessClient) Context() context.Context {
+	if m.Ctx != nil {
+		return m.Ctx
+	}
+	return context.Background()
+}
+
+func (*mockProcessClient) Header() (metadata.MD, error) { return nil, nil } //nolint:nilnil // interface satisfier
+func (*mockProcessClient) Trailer() metadata.MD         { return nil }
+
+func TestEndpointPickerHandler_Success(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	// Prepare mock client to simulate gRPC responses
+	callCount := 0
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error { return nil },
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) {
+			if callCount == 0 {
+				callCount++
+				resp := &extprocv3.ProcessingResponse{
+					Response: &extprocv3.ProcessingResponse_RequestHeaders{
+						RequestHeaders: &extprocv3.HeadersResponse{
+							Response: &extprocv3.CommonResponse{
+								HeaderMutation: &extprocv3.HeaderMutation{
+									SetHeaders: []*corev3.HeaderValueOption{{
+										Header: &corev3.HeaderValue{
+											Key:      eppMetadata.DestinationEndpointKey,
+											RawValue: []byte("test-value"),
+										},
+									}},
+								},
+							},
+						},
+					},
+				}
+				return resp, nil
+			}
+			return nil, io.EOF
+		},
+	}
+
+	extProcClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+	req.Header.Set(eppEndpointHostHeader, "test-host")
+	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+
+	h.ServeHTTP(w, req)
+
+	resp := w.Result()
+	g.Expect(resp.StatusCode).To(Equal(http.StatusOK))
+	g.Expect(resp.Header.Get(eppMetadata.DestinationEndpointKey)).To(Equal("test-value"))
+}
+
+func TestEndpointPickerHandler_ImmediateResponse(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error { return nil },
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) {
+			resp := &extprocv3.ProcessingResponse{
+				Response: &extprocv3.ProcessingResponse_ImmediateResponse{
+					ImmediateResponse: &extprocv3.ImmediateResponse{
+						Status: &typev3.HttpStatus{Code: http.StatusInternalServerError},
+						Body:   []byte("some error"),
+					},
+				},
+			}
+			return resp, nil
+		},
+	}
+
+	extClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extClient, func() error { return nil }, nil
+	}
+
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+	req.Header.Set(eppEndpointHostHeader, "test-host")
+	req.Header.Set(eppEndpointPortHeader, "1234")
+	w := httptest.NewRecorder()
+
+	h.ServeHTTP(w, req)
+
+	resp := w.Result()
+
+	g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError))
+	body, _ := io.ReadAll(resp.Body)
+	g.Expect(string(body)).To(ContainSubstring("some error"))
+}
+
+func TestEndpointPickerHandler_Errors(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	runErrorTestCase := func(factory func(string) (extprocv3.ExternalProcessorClient, func() error, error),
+		setHeaders bool,
+		expectedStatus int,
+		expectedBodySubstring string,
+	) {
+		h := createEndpointPickerHandler(factory, logr.Discard())
+		req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
+		if setHeaders {
+			req.Header.Set(eppEndpointHostHeader, "test-host")
+			req.Header.Set(eppEndpointPortHeader, "1234")
+		}
+		w := httptest.NewRecorder()
+		h.ServeHTTP(w, req)
+		resp := w.Result()
+		g.Expect(resp.StatusCode).To(Equal(expectedStatus))
+		body, _ := io.ReadAll(resp.Body)
+		g.Expect(string(body)).To(ContainSubstring(expectedBodySubstring))
+	}
+
+	// 1. Error creating gRPC client
+	factoryErr := errors.New("factory error")
+	factory := func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return nil, nil, factoryErr
+	}
+	runErrorTestCase(factory, true, http.StatusInternalServerError, "error creating gRPC client")
+
+	// 2. Error opening ext_proc stream
+	extProcClient := &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return nil, errors.New("process error")
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error opening ext_proc stream")
+
+	// 3. Error sending headers
+	client := &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error {
+			return errors.New("send headers error")
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending headers")
+
+	// 4. Error sending body
+	client = &mockProcessClient{
+		SendFunc: func(req *extprocv3.ProcessingRequest) error {
+			if req.GetRequestBody() != nil {
+				return errors.New("send body error")
+			}
+			return nil
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending body")
+
+	// 5. Error with empty headers
+	runErrorTestCase(factory, false, http.StatusBadRequest, "missing at least one of required headers")
+}
diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go
index 515fcc3f16..c932a4ee4c 100644
--- a/cmd/gateway/main.go
+++ b/cmd/gateway/main.go
@@ -25,6 +25,7 @@ func main() {
 		createGenerateCertsCommand(),
 		createInitializeCommand(),
 		createSleepCommand(),
+		createEndpointPickerCommand(),
 	)
 
 	if err := rootCmd.Execute(); err != nil {
diff --git a/go.mod b/go.mod
index 4533824369..14d29b3190 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/nginx/nginx-gateway-fabric/v2
 go 1.24.2
 
 require (
+	github.com/envoyproxy/go-control-plane/envoy v1.32.4
 	github.com/fsnotify/fsnotify v1.9.0
 	github.com/go-logr/logr v1.4.3
 	github.com/google/go-cmp v0.7.0
@@ -37,8 +38,10 @@ require (
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
 	github.com/emicklei/go-restful/v3 v3.12.2 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.11 // indirect
 	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -60,6 +63,7 @@ require (
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/client_model v0.6.2 // indirect
 	github.com/prometheus/common v0.66.1 // indirect
diff --git a/go.sum b/go.sum
index 22c578fa59..25a3ac2452 100644
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,8 @@ github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1x
 github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls=
+github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
 github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
 github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
@@ -39,6 +41,10 @@ github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0o
 github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
 github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU=
 github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
+github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
+github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
+github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
 github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
 github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
 github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
@@ -167,6 +173,8 @@ github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNH
 github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
+github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
 github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
diff --git a/internal/controller/manager.go b/internal/controller/manager.go
index d02411571b..dc9c4835bd 100644
--- a/internal/controller/manager.go
+++ b/internal/controller/manager.go
@@ -220,6 +220,7 @@ func StartManager(cfg config.Config) error {
 			NginxDockerSecretNames:         cfg.NginxDockerSecretNames,
 			PlusUsageConfig:                &cfg.UsageReportConfig,
 			NginxOneConsoleTelemetryConfig: cfg.NginxOneConsoleTelemetryConfig,
+			InferenceExtension:             cfg.InferenceExtension,
 		},
 	)
 	if err != nil {
diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go
index c9fb112e36..68cde04608 100644
--- a/internal/controller/provisioner/objects.go
+++ b/internal/controller/provisioner/objects.go
@@ -899,6 +899,7 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
 						{MountPath: "/etc/nginx/events-includes", Name: "nginx-events-includes"},
 					},
 					SecurityContext: &corev1.SecurityContext{
+						AllowPrivilegeEscalation: helpers.GetPointer(false),
 						Capabilities: &corev1.Capabilities{
 							Drop: []corev1.Capability{"ALL"},
 						},
@@ -1119,6 +1120,30 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
 		spec.Spec.Containers[0].VolumeMounts = volumeMounts
 	}
 
+	if p.cfg.InferenceExtension {
+		spec.Spec.Containers = append(spec.Spec.Containers, corev1.Container{
+			Name:            "endpoint-picker-shim",
+			Image:           p.cfg.GatewayPodConfig.Image,
+			ImagePullPolicy: pullPolicy,
+			Command: []string{
+				"/usr/bin/gateway",
+				"endpoint-picker",
+			},
+			SecurityContext: &corev1.SecurityContext{
+				AllowPrivilegeEscalation: helpers.GetPointer(false),
+				Capabilities: &corev1.Capabilities{
+					Drop: []corev1.Capability{"ALL"},
+				},
+				ReadOnlyRootFilesystem: helpers.GetPointer(true),
+				RunAsGroup:             helpers.GetPointer[int64](1001),
+				RunAsUser:              helpers.GetPointer[int64](101),
+				SeccompProfile: &corev1.SeccompProfile{
+					Type: corev1.SeccompProfileTypeRuntimeDefault,
+				},
+			},
+		})
+	}
+
 	return spec
 }
 
diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go
index 2327db259d..30403f85a7 100644
--- a/internal/controller/provisioner/objects_test.go
+++ b/internal/controller/provisioner/objects_test.go
@@ -1765,3 +1765,57 @@ func TestBuildNginxResourceObjects_Patches(t *testing.T) {
 	g.Expect(svc.Labels).To(HaveKeyWithValue("app", "nginx"))
 	g.Expect(dep.Labels).To(HaveKeyWithValue("app", "nginx"))
 }
+
+func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	agentTLSSecret := &corev1.Secret{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      agentTLSTestSecretName,
+			Namespace: ngfNamespace,
+		},
+		Data: map[string][]byte{"tls.crt": []byte("tls")},
+	}
+	fakeClient := fake.NewFakeClient(agentTLSSecret)
+
+	provisioner := &NginxProvisioner{
+		cfg: Config{
+			GatewayPodConfig: &config.GatewayPodConfig{
+				Namespace: ngfNamespace,
+			},
+			AgentTLSSecretName: agentTLSTestSecretName,
+			InferenceExtension: true,
+		},
+		k8sClient: fakeClient,
+		baseLabelSelector: metav1.LabelSelector{
+			MatchLabels: map[string]string{"app": "nginx"},
+		},
+	}
+
+	gateway := &gatewayv1.Gateway{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "gw",
+			Namespace: "default",
+		},
+		Spec: gatewayv1.GatewaySpec{
+			Listeners: []gatewayv1.Listener{{Port: 80}},
+		},
+	}
+
+	objects, err := provisioner.buildNginxResourceObjects("gw-nginx", gateway, &graph.EffectiveNginxProxy{})
+	g.Expect(err).ToNot(HaveOccurred())
+
+	// Find the deployment object
+	var deployment *appsv1.Deployment
+	for _, obj := range objects {
+		if d, ok := obj.(*appsv1.Deployment); ok {
+			deployment = d
+			break
+		}
+	}
+	g.Expect(deployment).ToNot(BeNil())
+	containers := deployment.Spec.Template.Spec.Containers
+	g.Expect(containers).To(HaveLen(2))
+	g.Expect(containers[1].Name).To(Equal("endpoint-picker-shim"))
+}
diff --git a/internal/controller/provisioner/provisioner.go b/internal/controller/provisioner/provisioner.go
index b279d86c31..f8a2b06957 100644
--- a/internal/controller/provisioner/provisioner.go
+++ b/internal/controller/provisioner/provisioner.go
@@ -58,6 +58,7 @@ type Config struct {
 	NginxDockerSecretNames         []string
 	NginxOneConsoleTelemetryConfig config.NginxOneConsoleTelemetryConfig
 	Plus                           bool
+	InferenceExtension             bool
 }
 
 // NginxProvisioner handles provisioning nginx kubernetes resources.

From afc73813376a5d3d683997d55d881c2c35e8b68c Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Wed, 24 Sep 2025 08:07:41 -0600
Subject: [PATCH 04/10] Query EPP and proxy AI traffic (#3942)

Problem: We need to connect NGINX to the Golang shim that talks to the EndpointPicker, and then pass client traffic to the proper inference workload.

Solution: Write an NJS module that will query the local Go server to get the AI endpoint to route traffic to. Then redirect the original client request to an internal location that proxies the traffic to the chosen endpoint.

The location building gets a bit complicated especially when using both HTTP matching conditions and inference workloads. It requires 2 layers of internal redirects. I added lots of comments to hopefully clear up how we build these locations to perform all the routing steps.
---
 cmd/gateway/endpoint_picker.go                |  24 +-
 cmd/gateway/endpoint_picker_test.go           |  42 ++-
 deploy/inference-nginx-plus/deploy.yaml       |   1 +
 .../controller/nginx/config/http/config.go    |  56 ++-
 internal/controller/nginx/config/maps.go      |  44 +++
 internal/controller/nginx/config/maps_test.go |  65 +++-
 internal/controller/nginx/config/servers.go   | 336 +++++++++++++++---
 .../nginx/config/servers_template.go          |  12 +-
 .../controller/nginx/config/servers_test.go   | 182 +++++++++-
 internal/controller/nginx/modules/src/epp.js  |  74 ++--
 .../controller/nginx/modules/test/epp.test.js | 144 +++++---
 .../state/dataplane/configuration.go          |  27 +-
 .../state/dataplane/configuration_test.go     |  89 ++++-
 internal/controller/state/dataplane/types.go  |   6 +
 .../controller/state/graph/backend_refs.go    |  90 +++--
 .../state/graph/backend_refs_test.go          |   8 +-
 internal/controller/state/graph/graph_test.go |  10 +-
 internal/controller/state/graph/httproute.go  |  21 +-
 .../controller/state/graph/httproute_test.go  |  61 ++++
 .../controller/state/graph/route_common.go    |   3 +
 internal/framework/types/types.go             |  11 +
 21 files changed, 1079 insertions(+), 227 deletions(-)

diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
index 7c67a83671..acf9bdfbb6 100644
--- a/cmd/gateway/endpoint_picker.go
+++ b/cmd/gateway/endpoint_picker.go
@@ -14,16 +14,8 @@ import (
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
-)
 
-const (
-	// defaultPort is the default port for this server to listen on. If collisions become a problem,
-	// we can make this configurable via the NginxProxy resource.
-	defaultPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
-	// eppEndpointHostHeader is the HTTP header used to specify the EPP endpoint host, set by the NJS module caller.
-	eppEndpointHostHeader = "X-EPP-Host"
-	// eppEndpointPortHeader is the HTTP header used to specify the EPP endpoint port, set by the NJS module caller.
-	eppEndpointPortHeader = "X-EPP-Port"
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
 )
 
 // extProcClientFactory creates a new ExternalProcessorClient and returns a close function.
@@ -32,7 +24,7 @@ type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient
 // endpointPickerServer starts an HTTP server on the given port with the provided handler.
 func endpointPickerServer(handler http.Handler) error {
 	server := &http.Server{
-		Addr:              fmt.Sprintf("127.0.0.1:%d", defaultPort),
+		Addr:              fmt.Sprintf("127.0.0.1:%d", types.GoShimPort),
 		Handler:           handler,
 		ReadHeaderTimeout: 10 * time.Second,
 	}
@@ -54,13 +46,13 @@ func realExtProcClientFactory() extProcClientFactory {
 // createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker.
 func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		host := r.Header.Get(eppEndpointHostHeader)
-		port := r.Header.Get(eppEndpointPortHeader)
+		host := r.Header.Get(types.EPPEndpointHostHeader)
+		port := r.Header.Get(types.EPPEndpointPortHeader)
 		if host == "" || port == "" {
 			msg := fmt.Sprintf(
 				"missing at least one of required headers: %s and %s",
-				eppEndpointHostHeader,
-				eppEndpointPortHeader,
+				types.EPPEndpointHostHeader,
+				types.EPPEndpointPortHeader,
 			)
 			logger.Error(errors.New(msg), "error contacting EndpointPicker")
 			http.Error(w, msg, http.StatusBadRequest)
@@ -174,6 +166,10 @@ func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
 }
 
 func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) {
+	if r.ContentLength == 0 {
+		return nil, errors.New("request body is empty")
+	}
+
 	body, err := io.ReadAll(r.Body)
 	if err != nil {
 		return nil, fmt.Errorf("error reading request body: %w", err)
diff --git a/cmd/gateway/endpoint_picker_test.go b/cmd/gateway/endpoint_picker_test.go
index 99808348fc..99fd95aa90 100644
--- a/cmd/gateway/endpoint_picker_test.go
+++ b/cmd/gateway/endpoint_picker_test.go
@@ -17,6 +17,8 @@ import (
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/metadata"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
 )
 
 type mockExtProcClient struct {
@@ -122,8 +124,8 @@ func TestEndpointPickerHandler_Success(t *testing.T) {
 
 	h := createEndpointPickerHandler(factory, logr.Discard())
 	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
-	req.Header.Set(eppEndpointHostHeader, "test-host")
-	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
 	req.Header.Set("Content-Type", "application/json")
 	w := httptest.NewRecorder()
 
@@ -165,8 +167,8 @@ func TestEndpointPickerHandler_ImmediateResponse(t *testing.T) {
 
 	h := createEndpointPickerHandler(factory, logr.Discard())
 	req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
-	req.Header.Set(eppEndpointHostHeader, "test-host")
-	req.Header.Set(eppEndpointPortHeader, "1234")
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
 	w := httptest.NewRecorder()
 
 	h.ServeHTTP(w, req)
@@ -190,8 +192,8 @@ func TestEndpointPickerHandler_Errors(t *testing.T) {
 		h := createEndpointPickerHandler(factory, logr.Discard())
 		req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader("test body"))
 		if setHeaders {
-			req.Header.Set(eppEndpointHostHeader, "test-host")
-			req.Header.Set(eppEndpointPortHeader, "1234")
+			req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+			req.Header.Set(types.EPPEndpointPortHeader, "1234")
 		}
 		w := httptest.NewRecorder()
 		h.ServeHTTP(w, req)
@@ -236,7 +238,33 @@ func TestEndpointPickerHandler_Errors(t *testing.T) {
 	}
 	runErrorTestCase(factory, true, http.StatusBadGateway, "error sending headers")
 
-	// 4. Error sending body
+	// 4a. Error building body request (content length 0)
+	client = &mockProcessClient{
+		SendFunc: func(*extprocv3.ProcessingRequest) error {
+			return nil
+		},
+		RecvFunc: func() (*extprocv3.ProcessingResponse, error) { return nil, io.EOF },
+	}
+	extProcClient = &mockExtProcClient{
+		ProcessFunc: func(context.Context, ...grpc.CallOption) (extprocv3.ExternalProcessor_ProcessClient, error) {
+			return client, nil
+		},
+	}
+	factory = func(string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		return extProcClient, func() error { return nil }, nil
+	}
+	h := createEndpointPickerHandler(factory, logr.Discard())
+	req := httptest.NewRequest(http.MethodPost, "/", nil) // nil body, ContentLength = 0
+	req.Header.Set(types.EPPEndpointHostHeader, "test-host")
+	req.Header.Set(types.EPPEndpointPortHeader, "1234")
+	w := httptest.NewRecorder()
+	h.ServeHTTP(w, req)
+	resp := w.Result()
+	g.Expect(resp.StatusCode).To(Equal(http.StatusInternalServerError))
+	body, _ := io.ReadAll(resp.Body)
+	g.Expect(string(body)).To(ContainSubstring("request body is empty"))
+
+	// 4b. Error sending body
 	client = &mockProcessClient{
 		SendFunc: func(req *extprocv3.ProcessingRequest) error {
 			if req.GetRequestBody() != nil {
diff --git a/deploy/inference-nginx-plus/deploy.yaml b/deploy/inference-nginx-plus/deploy.yaml
index 77ee4da544..025cfeb410 100644
--- a/deploy/inference-nginx-plus/deploy.yaml
+++ b/deploy/inference-nginx-plus/deploy.yaml
@@ -281,6 +281,7 @@ spec:
         - --nginx-docker-secret=nginx-plus-registry-secret
         - --nginx-plus
         - --usage-report-secret=nplus-license
+        - --usage-report-enforce-initial-report=true
         - --metrics-port=9113
         - --health-port=8081
         - --leader-election-lock-name=nginx-gateway-leader-election
diff --git a/internal/controller/nginx/config/http/config.go b/internal/controller/nginx/config/http/config.go
index 3a76ab30b4..dedfd04349 100644
--- a/internal/controller/nginx/config/http/config.go
+++ b/internal/controller/nginx/config/http/config.go
@@ -26,26 +26,58 @@ type Server struct {
 type LocationType string
 
 const (
+	// InternalLocationType defines an internal location that is only accessible within NGINX.
 	InternalLocationType LocationType = "internal"
+	// ExternalLocationType defines a normal external location that is accessible by clients.
 	ExternalLocationType LocationType = "external"
+	// RedirectLocationType defines an external location that redirects to an internal location
+	// based on HTTP matching conditions.
 	RedirectLocationType LocationType = "redirect"
+	// InferenceExternalLocationType defines an external location that is used for calling NJS
+	// to get the inference workload endpoint and redirects to the internal location that will proxy_pass
+	// to that endpoint.
+	InferenceExternalLocationType LocationType = "inference-external"
+	// InferenceInternalLocationType defines an internal location that is used for calling NJS
+	// to get the inference workload endpoint and redirects to the internal location that will proxy_pass
+	// to that endpoint. This is used when an HTTP redirect location is also defined that redirects
+	// to this internal inference location.
+	InferenceInternalLocationType LocationType = "inference-internal"
 )
 
 // Location holds all configuration for an HTTP location.
 type Location struct {
-	Path                           string
-	ProxyPass                      string
-	HTTPMatchKey                   string
+	// Return specifies a return directive (e.g., HTTP status or redirect) for this location block.
+	Return *Return
+	// ProxySSLVerify controls SSL verification for upstreams when proxying requests.
+	ProxySSLVerify *ProxySSLVerify
+	// ProxyPass is the upstream backend (URL or name) to which requests are proxied.
+	ProxyPass string
+	// HTTPMatchKey is the key for associating HTTP match rules, used for routing and NJS module logic.
+	HTTPMatchKey string
+	// MirrorSplitClientsVariableName is the variable name for split_clients, used in traffic mirroring scenarios.
 	MirrorSplitClientsVariableName string
-	Type                           LocationType
-	ProxySetHeaders                []Header
-	ProxySSLVerify                 *ProxySSLVerify
-	Return                         *Return
-	ResponseHeaders                ResponseHeaders
-	Rewrites                       []string
-	MirrorPaths                    []string
-	Includes                       []shared.Include
-	GRPC                           bool
+	// EPPInternalPath is the internal path for the inference NJS module to redirect to.
+	EPPInternalPath string
+	// EPPHost is the host for the EndpointPicker, used for inference routing.
+	EPPHost string
+	// Type indicates the type of location (external, internal, redirect, etc).
+	Type LocationType
+	// Path is the NGINX location path.
+	Path string
+	// ResponseHeaders are custom response headers to be sent.
+	ResponseHeaders ResponseHeaders
+	// ProxySetHeaders are headers to set when proxying requests upstream.
+	ProxySetHeaders []Header
+	// Rewrites are rewrite rules for modifying request paths.
+	Rewrites []string
+	// MirrorPaths are paths to which requests are mirrored.
+	MirrorPaths []string
+	// Includes are additional NGINX config snippets or policies to include in this location.
+	Includes []shared.Include
+	// EPPPort is the port for the EndpointPicker, used for inference routing.
+	EPPPort int
+	// GRPC indicates if this location proxies gRPC traffic.
+	GRPC bool
 }
 
 // Header defines an HTTP header to be passed to the proxied server.
diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index 5a5e5ff189..e0f9ee98d5 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -1,9 +1,12 @@
 package config
 
 import (
+	"fmt"
 	"strings"
 	gotemplate "text/template"
 
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
@@ -26,6 +29,8 @@ const (
 
 func executeMaps(conf dataplane.Configuration) []executeResult {
 	maps := buildAddHeaderMaps(append(conf.HTTPServers, conf.SSLServers...))
+	maps = append(maps, buildInferenceMaps(conf.BackendGroups)...)
+
 	result := executeResult{
 		dest: httpConfigFile,
 		data: helpers.MustExecuteTemplate(mapsTemplate, maps),
@@ -177,3 +182,42 @@ func createAddHeadersMap(name string) shared.Map {
 		Parameters: params,
 	}
 }
+
+// buildInferenceMaps creates maps for InferencePool Backends.
+func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
+	inferenceMaps := make([]shared.Map, 0, len(groups))
+	for _, group := range groups {
+		for _, backend := range group.Backends {
+			if backend.EndpointPickerConfig != nil {
+				var defaultResult string
+				switch backend.EndpointPickerConfig.FailureMode {
+				// in FailClose mode, if the EPP is unavailable or returns an error,
+				// we return an invalid backend to ensure the request fails
+				case inference.EndpointPickerFailClose:
+					defaultResult = invalidBackendRef
+				// in FailOpen mode, if the EPP is unavailable or returns an error,
+				// we fall back to the upstream
+				case inference.EndpointPickerFailOpen:
+					defaultResult = backend.UpstreamName
+				}
+				params := []shared.MapParameter{
+					{
+						Value:  "~.+",
+						Result: "$inference_workload_endpoint",
+					},
+					{
+						Value:  "default",
+						Result: defaultResult,
+					},
+				}
+				backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
+				inferenceMaps = append(inferenceMaps, shared.Map{
+					Source:     "$inference_workload_endpoint",
+					Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
+					Parameters: params,
+				})
+			}
+		}
+	}
+	return inferenceMaps
+}
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index d133882d7b..736d7808ec 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 
 	. "github.com/onsi/gomega"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/shared"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/dataplane"
@@ -59,22 +60,24 @@ func TestExecuteMaps(t *testing.T) {
 
 	conf := dataplane.Configuration{
 		HTTPServers: []dataplane.VirtualServer{
-			{
-				PathRules: pathRules,
-			},
-			{
-				PathRules: pathRules,
-			},
-			{
-				IsDefault: true,
-			},
+			{PathRules: pathRules},
+			{PathRules: pathRules},
+			{IsDefault: true},
 		},
 		SSLServers: []dataplane.VirtualServer{
+			{PathRules: pathRules},
+			{IsDefault: true},
+		},
+		BackendGroups: []dataplane.BackendGroup{
 			{
-				PathRules: pathRules,
-			},
-			{
-				IsDefault: true,
+				Backends: []dataplane.Backend{
+					{
+						UpstreamName: "upstream1",
+						EndpointPickerConfig: &inference.EndpointPickerRef{
+							FailureMode: inference.EndpointPickerFailClose,
+						},
+					},
+				},
 			},
 		},
 	}
@@ -86,6 +89,9 @@ func TestExecuteMaps(t *testing.T) {
 		"map ${http_my_second_add_header} $my_second_add_header_header_var {": 1,
 		"~.* ${http_my_second_add_header},;":                                  1,
 		"map ${http_my_set_header} $my_set_header_header_var {":               0,
+		"$inference_workload_endpoint":                                        2,
+		"$inference_backend":                                                  1,
+		"invalid-backend-ref":                                                 1,
 	}
 
 	mapResult := executeMaps(conf)
@@ -385,3 +391,36 @@ func TestCreateStreamMapsWithEmpty(t *testing.T) {
 
 	g.Expect(maps).To(BeNil())
 }
+
+func TestBuildInferenceMaps(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	group := dataplane.BackendGroup{
+		Backends: []dataplane.Backend{
+			{
+				UpstreamName: "upstream1",
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					FailureMode: inference.EndpointPickerFailClose,
+				},
+			},
+			{
+				UpstreamName: "upstream2",
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					FailureMode: inference.EndpointPickerFailOpen,
+				},
+			},
+			{
+				UpstreamName:         "upstream3",
+				EndpointPickerConfig: nil,
+			},
+		},
+	}
+
+	maps := buildInferenceMaps([]dataplane.BackendGroup{group})
+	g.Expect(maps).To(HaveLen(2))
+	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
+	g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref"))
+	g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2"))
+}
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index b1bf3c3576..203b4dbecb 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -16,7 +16,13 @@ import (
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 )
 
-var serversTemplate = gotemplate.Must(gotemplate.New("servers").Parse(serversTemplateText))
+var serversTemplate = gotemplate.Must(
+	gotemplate.New("servers").Funcs(gotemplate.FuncMap{
+		"contains": func(str http.LocationType, substr string) bool {
+			return strings.Contains(string(str), substr)
+		},
+	}).Parse(serversTemplateText),
+)
 
 const (
 	// HeaderMatchSeparator is the separator for constructing header-based match for NJS.
@@ -252,6 +258,78 @@ func extractMirrorTargetsWithPercentages(pathRules []dataplane.PathRule) map[str
 	return mirrorTargets
 }
 
+/*
+There are several different flows of location blocks, depending on the user configuration.
+The following describes them, with basic location examples.
+
+---------------
+Base case, no HTTP matching conditions or inference extension.
+
+External location proxies straight to backend.
+
+location /coffee {
+    proxy_pass http://backend;
+}
+---------------
+HTTP matching conditions.
+
+External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist
+and which backend to use, then redirects to the appropriate internal location.
+The internal location proxies to the backend.
+
+location /coffee {
+    js_content httpmatches.match; // chooses backend1 or backend2, and redirects to appropriate internal location
+}
+location /_ngf-internal-rule0-route0 {
+	internal;
+	proxy_pass http://backend1;
+}
+location /_ngf-internal-rule1-route0 {
+	internal;
+	proxy_pass http://backend2;
+}
+---------------
+Inference extension, no HTTP matching conditions.
+
+External location calls inference NJS module. The module gets the AI endpoint to proxy to,
+then redirects to the internal inference location that proxies to the backend.
+
+location /coffee {
+	set $epp_internal_path /_ngf-internal-rule0-route0-inference;
+	js_content epp.getEndpoint; // gets endpoint and redirects to /_ngf-internal-rule0-route0-inference
+}
+location /_ngf-internal-rule0-route0-inference {
+	internal;
+	proxy_pass http://$inference-backend;
+}
+---------------
+Inference extension with HTTP matching conditions.
+
+External location calls httpmatch NJS module. The module determines the HTTP request conditions that exist
+and which backend to use, then redirects to the internal inference location. The internal inference
+location calls the inference NJS module to get the AI endpoint to proxy to, then redirects to the
+internal location that proxies to the backend.
+
+Note that the location path naming here is a little different than the previous example.
+The final location that proxy_passes has the non-inference name to avoid too much refactoring
+in the code, and the intermediate location has -inference in the name, whereas in the previous example
+it was the final location that had -inference in the name.
+
+location /coffee {
+	js_content httpmatches.match; // chooses backend and redirects to appropriate internal inference location
+}
+location /_ngf-internal-rule0-route0-inference {
+	internal;
+
+	set $epp_internal_path /_ngf-internal-rule0-route0;
+	js_content epp.getEndpoint; // redirects to /_ngf-internal-rule0-route0
+}
+location /_ngf-internal-rule0-route0 {
+	internal;
+	proxy_pass http://$inference-backend;
+}
+*/
+
 type httpMatchPairs map[string][]routeMatch
 
 func createLocations(
@@ -270,8 +348,6 @@ func createLocations(
 	mirrorPathToPercentage := extractMirrorTargetsWithPercentages(server.PathRules)
 
 	for pathRuleIdx, rule := range server.PathRules {
-		matches := make([]routeMatch, 0, len(rule.MatchRules))
-
 		if rule.Path == rootPath {
 			rootPathExists = true
 		}
@@ -281,7 +357,6 @@ func createLocations(
 		}
 
 		mirrorPercentage := mirrorPathToPercentage[rule.Path]
-
 		extLocations := initializeExternalLocations(rule, pathsAndTypes)
 		for i := range extLocations {
 			extLocations[i].Includes = createIncludesFromPolicyGenerateResult(
@@ -289,54 +364,45 @@ func createLocations(
 			)
 		}
 
-		if !needsInternalLocations(rule) {
-			for _, r := range rule.MatchRules {
-				extLocations = updateLocations(
-					r,
-					rule,
-					extLocations,
-					server.Port,
-					keepAliveCheck,
-					mirrorPercentage,
-				)
-			}
-
-			locs = append(locs, extLocations...)
-			continue
-		}
-
-		internalLocations := make([]http.Location, 0, len(rule.MatchRules))
-
-		for matchRuleIdx, r := range rule.MatchRules {
-			intLocation, match := initializeInternalLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC)
-			intLocation.Includes = createIncludesFromPolicyGenerateResult(
-				generator.GenerateForInternalLocation(rule.Policies),
+		switch {
+		case !needsInternalLocationsForMatches(rule) && !rule.HasInferenceBackends:
+			locs = append(locs, updateExternalLocationsForRule(
+				rule,
+				extLocations,
+				server.Port,
+				keepAliveCheck,
+				mirrorPercentage)...,
 			)
-
-			intLocation = updateLocation(
-				r,
+		case needsInternalLocationsForMatches(rule):
+			internalLocations, matches := createInternalLocationsForRule(
+				pathRuleIdx,
 				rule,
-				intLocation,
+				generator,
 				server.Port,
 				keepAliveCheck,
 				mirrorPercentage,
 			)
-
-			internalLocations = append(internalLocations, intLocation)
-			matches = append(matches, match)
-		}
-
-		httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx)
-		for i := range extLocations {
-			// FIXME(sberman): De-dupe matches and associated locations
-			// so we don't need nginx/njs to perform unnecessary matching.
-			// https://github.com/nginx/nginx-gateway-fabric/issues/662
-			extLocations[i].HTTPMatchKey = httpMatchKey
-			matchPairs[extLocations[i].HTTPMatchKey] = matches
+			httpMatchKey := serverID + "_" + strconv.Itoa(pathRuleIdx)
+			for i := range extLocations {
+				// FIXME(sberman): De-dupe matches and associated locations
+				// so we don't need nginx/njs to perform unnecessary matching.
+				// https://github.com/nginx/nginx-gateway-fabric/issues/662
+				extLocations[i].HTTPMatchKey = httpMatchKey
+				matchPairs[extLocations[i].HTTPMatchKey] = matches
+			}
+			locs = append(locs, extLocations...)
+			locs = append(locs, internalLocations...)
+		case rule.HasInferenceBackends:
+			locs = append(locs, createInferenceLocationsForRule(
+				pathRuleIdx,
+				rule,
+				extLocations,
+				generator,
+				server.Port,
+				keepAliveCheck,
+				mirrorPercentage)...,
+			)
 		}
-
-		locs = append(locs, extLocations...)
-		locs = append(locs, internalLocations...)
 	}
 
 	if !rootPathExists {
@@ -346,10 +412,124 @@ func createLocations(
 	return locs, matchPairs, grpcServer
 }
 
-func needsInternalLocations(rule dataplane.PathRule) bool {
+func updateExternalLocationsForRule(
+	rule dataplane.PathRule,
+	extLocations []http.Location,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) []http.Location {
+	for _, r := range rule.MatchRules {
+		extLocations = updateLocations(
+			r,
+			rule,
+			extLocations,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+	}
+
+	return extLocations
+}
+
+func createInternalLocationsForRule(
+	pathRuleIdx int,
+	rule dataplane.PathRule,
+	generator policies.Generator,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) ([]http.Location, []routeMatch) {
+	internalLocations := make([]http.Location, 0, len(rule.MatchRules))
+	matches := make([]routeMatch, 0, len(rule.MatchRules))
+	for matchRuleIdx, r := range rule.MatchRules {
+		var intLocation http.Location
+		var match routeMatch
+		if !rule.HasInferenceBackends {
+			intLocation, match = initializeInternalMatchLocation(pathRuleIdx, matchRuleIdx, r.Match, rule.GRPC)
+		} else {
+			intLocation, match = initializeInternalMatchLocationWithInference(pathRuleIdx, matchRuleIdx, r.Match)
+			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
+			for _, b := range r.BackendGroup.Backends {
+				if b.EndpointPickerConfig != nil {
+					var portNum int
+					if b.EndpointPickerConfig.Port != nil {
+						portNum = int(b.EndpointPickerConfig.Port.Number)
+					}
+					intInfLocation.EPPInternalPath = intLocation.Path
+					intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					intInfLocation.EPPPort = portNum
+				}
+			}
+			internalLocations = append(internalLocations, intInfLocation)
+		}
+		intLocation.Includes = createIncludesFromPolicyGenerateResult(
+			generator.GenerateForInternalLocation(rule.Policies),
+		)
+		intLocation = updateLocation(
+			r,
+			rule,
+			intLocation,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+		internalLocations = append(internalLocations, intLocation)
+		matches = append(matches, match)
+	}
+
+	return internalLocations, matches
+}
+
+func createInferenceLocationsForRule(
+	pathRuleIdx int,
+	rule dataplane.PathRule,
+	extLocations []http.Location,
+	generator policies.Generator,
+	port int32,
+	keepAliveCheck keepAliveChecker,
+	mirrorPercentage *float64,
+) []http.Location {
+	locs := make([]http.Location, 0, len(rule.MatchRules)+len(extLocations))
+	for matchRuleIdx, r := range rule.MatchRules {
+		intLocation := initializeInternalInferenceLocation(pathRuleIdx, matchRuleIdx)
+		intLocation.Includes = createIncludesFromPolicyGenerateResult(
+			generator.GenerateForInternalLocation(rule.Policies),
+		)
+		intLocation = updateLocation(
+			r,
+			rule,
+			intLocation,
+			port,
+			keepAliveCheck,
+			mirrorPercentage,
+		)
+		for _, b := range r.BackendGroup.Backends {
+			if b.EndpointPickerConfig != nil {
+				for i := range extLocations {
+					var portNum int
+					if b.EndpointPickerConfig.Port != nil {
+						portNum = int(b.EndpointPickerConfig.Port.Number)
+					}
+					extLocations[i].EPPInternalPath = intLocation.Path
+					extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					extLocations[i].EPPPort = portNum
+				}
+			}
+		}
+		locs = append(locs, intLocation)
+	}
+	locs = append(locs, extLocations...)
+
+	return locs
+}
+
+func needsInternalLocationsForMatches(rule dataplane.PathRule) bool {
 	if len(rule.MatchRules) > 1 {
 		return true
 	}
+
 	return len(rule.MatchRules) == 1 && !isPathOnlyMatch(rule.MatchRules[0].Match)
 }
 
@@ -362,12 +542,13 @@ type pathAndTypeMap map[string]map[dataplane.PathType]struct{}
 // 2. Each path rule may have an additional location if it contains non-path-only matches.
 // 3. Each prefix path rule may have an additional location if it doesn't contain trailing slash.
 // 4. There may be an additional location for the default root path.
+// 5. There may be an additional location per parent location for the inference extension.
 // We also return a map of all paths and their types.
 func getMaxLocationCountAndPathMap(pathRules []dataplane.PathRule) (int, pathAndTypeMap) {
 	maxLocs := 1
 	pathsAndTypes := make(pathAndTypeMap)
 	for _, rule := range pathRules {
-		maxLocs += len(rule.MatchRules) + 2
+		maxLocs += (len(rule.MatchRules) * 2) + 2
 		if pathsAndTypes[rule.Path] == nil {
 			pathsAndTypes[rule.Path] = map[dataplane.PathType]struct{}{
 				rule.PathType: {},
@@ -431,14 +612,20 @@ func initializeExternalLocations(
 }
 
 func getLocationTypeForPathRule(rule dataplane.PathRule) http.LocationType {
-	if needsInternalLocations(rule) {
+	if needsInternalLocationsForMatches(rule) {
 		return http.RedirectLocationType
 	}
 
+	if rule.HasInferenceBackends {
+		return http.InferenceExternalLocationType
+	}
+
 	return http.ExternalLocationType
 }
 
-func initializeInternalLocation(
+// initializeInternalMatchLocation initializes the internal location that is redirected to by an
+// external location HTTP matching decision. This location will proxy_pass to the backend.
+func initializeInternalMatchLocation(
 	pathruleIdx,
 	matchRuleIdx int,
 	match dataplane.Match,
@@ -448,6 +635,45 @@ func initializeInternalLocation(
 	return createMatchLocation(path, grpc), createRouteMatch(match, path)
 }
 
+// initializeInternalInferenceRedirectLocation initializes the internal inference location that is redirected to by
+// an external HTTP matching location. This location then redirects to the final proxy_pass location.
+func initializeInternalInferenceRedirectLocation(pathruleIdx, matchRuleIdx int) http.Location {
+	return http.Location{
+		Path: inferencePath(pathruleIdx, matchRuleIdx),
+		Type: http.InferenceInternalLocationType,
+	}
+}
+
+// initializeInternalMatchLocationWithInference initializes the internal location that is redirected to by
+// an internal inference location, which was redirected to by the external HTTP matching location.
+// This location will proxy_pass to the backend.
+// The routeMatch is created with the inference internal location path, so that the HTTP match in the external
+// location can redirect to the proper inference location, which then redirects to this location.
+func initializeInternalMatchLocationWithInference(
+	pathruleIdx,
+	matchRuleIdx int,
+	match dataplane.Match,
+) (http.Location, routeMatch) {
+	path := fmt.Sprintf("%s-rule%d-route%d", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx)
+	grpc := false
+
+	return createMatchLocation(path, grpc), createRouteMatch(match, inferencePath(pathruleIdx, matchRuleIdx))
+}
+
+// initializeInternalInferenceLocation initializes the internal inference location that does the final
+// proxy_pass to the inference backend.
+// This is used when the external location redirects directly here, without any HTTP matching.
+func initializeInternalInferenceLocation(pathruleIdx, matchRuleIdx int) http.Location {
+	return http.Location{
+		Path: inferencePath(pathruleIdx, matchRuleIdx),
+		Type: http.InternalLocationType,
+	}
+}
+
+func inferencePath(pathruleIdx int, matchRuleIdx int) string {
+	return fmt.Sprintf("%s-rule%d-route%d-inference", http.InternalRoutePathPrefix, pathruleIdx, matchRuleIdx)
+}
+
 // updateLocation updates a location with any relevant configurations, like proxy_pass, filters, tls settings, etc.
 func updateLocation(
 	matchRule dataplane.MatchRule,
@@ -459,6 +685,7 @@ func updateLocation(
 ) http.Location {
 	filters := matchRule.Filters
 	grpc := pathRule.GRPC
+	inferenceBackend := pathRule.HasInferenceBackends
 
 	if filters.InvalidFilter != nil {
 		location.Return = &http.Return{Code: http.StatusInternalServerError}
@@ -474,7 +701,7 @@ func updateLocation(
 
 	location = updateLocationRewriteFilter(location, filters.RequestURLRewrite, pathRule)
 	location = updateLocationMirrorFilters(location, filters.RequestMirrors, pathRule.Path, mirrorPercentage)
-	location = updateLocationProxySettings(location, matchRule, grpc, keepAliveCheck)
+	location = updateLocationProxySettings(location, matchRule, grpc, inferenceBackend, keepAliveCheck)
 
 	return location
 }
@@ -554,6 +781,7 @@ func updateLocationProxySettings(
 	location http.Location,
 	matchRule dataplane.MatchRule,
 	grpc bool,
+	inferenceBackend bool,
 	keepAliveCheck keepAliveChecker,
 ) http.Location {
 	extraHeaders := make([]http.Header, 0, 3)
@@ -574,6 +802,7 @@ func updateLocationProxySettings(
 		matchRule.Filters.RequestURLRewrite,
 		generateProtocolString(location.ProxySSLVerify, grpc),
 		grpc,
+		inferenceBackend,
 	)
 
 	location.ResponseHeaders = responseHeaders
@@ -872,6 +1101,7 @@ func createProxyPass(
 	filter *dataplane.HTTPURLRewriteFilter,
 	protocol string,
 	grpc bool,
+	inferenceBackend bool,
 ) string {
 	var requestURI string
 	if !grpc {
@@ -881,6 +1111,12 @@ func createProxyPass(
 	}
 
 	backendName := backendGroupName(backendGroup)
+
+	if inferenceBackend {
+		backendVarName := strings.ReplaceAll(backendName, "-", "_")
+		return "http://$inference_backend_" + backendVarName + requestURI
+	}
+
 	if backendGroupNeedsSplit(backendGroup) {
 		return protocol + "://$" + convertStringToSafeVariableName(backendName) + requestURI
 	}
diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go
index 224e189a6e..9575b77480 100644
--- a/internal/controller/nginx/config/servers_template.go
+++ b/internal/controller/nginx/config/servers_template.go
@@ -92,7 +92,7 @@ server {
 
         {{ range $l := $s.Locations }}
     location {{ $l.Path }} {
-        {{ if eq $l.Type "internal" -}}
+        {{ if contains $l.Type "internal" -}}
         internal;
         {{ end }}
 
@@ -118,11 +118,19 @@ server {
         return {{ $l.Return.Code }} "{{ $l.Return.Body }}";
         {{- end }}
 
-        {{- if eq $l.Type "redirect" }}
+        {{- if eq $l.Type "redirect" -}}
         set $match_key {{ $l.HTTPMatchKey }};
         js_content httpmatches.redirect;
         {{- end }}
 
+        {{- if contains $l.Type "inference" -}}
+        js_var $inference_workload_endpoint;
+        set $epp_internal_path {{ $l.EPPInternalPath }};
+        set $epp_host {{ $l.EPPHost }};
+        set $epp_port {{ $l.EPPPort }};
+        js_content epp.getEndpoint;
+        {{- end }}
+
         {{ $proxyOrGRPC := "proxy" }}{{ if $l.GRPC }}{{ $proxyOrGRPC = "grpc" }}{{ end }}
 
         {{- if $l.GRPC }}
diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go
index 05d9a53e56..25ef92896d 100644
--- a/internal/controller/nginx/config/servers_test.go
+++ b/internal/controller/nginx/config/servers_test.go
@@ -9,6 +9,7 @@ import (
 	. "github.com/onsi/gomega"
 	"github.com/onsi/gomega/format"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/http"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies"
@@ -1239,7 +1240,7 @@ func TestCreateServers(t *testing.T) {
 					Filters: dataplane.HTTPFilters{
 						RequestRedirect: &dataplane.HTTPRequestRedirectFilter{
 							Hostname:   helpers.GetPointer("redirect.example.com"),
-							StatusCode: helpers.GetPointer[int](301),
+							StatusCode: helpers.GetPointer(301),
 							Port:       helpers.GetPointer[int32](8080),
 							Path: &dataplane.HTTPPathModifier{
 								Type:        dataplane.ReplaceFullPath,
@@ -2443,6 +2444,154 @@ func TestCreateLocations_Includes(t *testing.T) {
 	}
 }
 
+func TestCreateLocations_InferenceBackends(t *testing.T) {
+	t.Parallel()
+
+	hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"}
+
+	fooGroup := dataplane.BackendGroup{
+		Source:  hrNsName,
+		RuleIdx: 0,
+		Backends: []dataplane.Backend{
+			{
+				UpstreamName: "test_foo_80",
+				Valid:        true,
+				Weight:       1,
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					Name: "test-epp",
+					Port: &inference.Port{
+						Number: 80,
+					},
+				},
+			},
+		},
+	}
+
+	pathRuleInferenceOnly := dataplane.PathRule{
+		Path:                 "/inference",
+		PathType:             dataplane.PathTypeExact,
+		HasInferenceBackends: true,
+		MatchRules: []dataplane.MatchRule{
+			{
+				Match:        dataplane.Match{},
+				BackendGroup: fooGroup,
+			},
+		},
+	}
+
+	pathRuleInferenceWithMatch := dataplane.PathRule{
+		Path:                 "/inference-match",
+		PathType:             dataplane.PathTypeExact,
+		HasInferenceBackends: true,
+		MatchRules: []dataplane.MatchRule{
+			{
+				Match: dataplane.Match{
+					Method: helpers.GetPointer("POST"),
+				},
+				BackendGroup: fooGroup,
+			},
+		},
+	}
+
+	tests := []struct {
+		expMatches httpMatchPairs
+		name       string
+		pathRules  []dataplane.PathRule
+		expLocs    []http.Location
+	}{
+		{
+			name:      "inference only, no internal locations for matches",
+			pathRules: []dataplane.PathRule{pathRuleInferenceOnly},
+			expLocs: []http.Location{
+				{
+					Path:      "/_ngf-internal-rule0-route0-inference",
+					Type:      http.InternalLocationType,
+					ProxyPass: "http://$inference_backend_test_foo_80$request_uri",
+					ProxySetHeaders: []http.Header{
+						{Name: "Host", Value: "$gw_api_compliant_host"},
+						{Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"},
+						{Name: "X-Real-IP", Value: "$remote_addr"},
+						{Name: "X-Forwarded-Proto", Value: "$scheme"},
+						{Name: "X-Forwarded-Host", Value: "$host"},
+						{Name: "X-Forwarded-Port", Value: "$server_port"},
+						{Name: "Upgrade", Value: "$http_upgrade"},
+						{Name: "Connection", Value: "$connection_upgrade"},
+					},
+				},
+				{
+					Path:            "= /inference",
+					Type:            http.InferenceExternalLocationType,
+					EPPInternalPath: "/_ngf-internal-rule0-route0-inference",
+					EPPHost:         "test-epp",
+					EPPPort:         80,
+				},
+				createDefaultRootLocation(),
+			},
+			expMatches: httpMatchPairs{},
+		},
+		{
+			name:      "inference with match, needs internal locations for matches",
+			pathRules: []dataplane.PathRule{pathRuleInferenceWithMatch},
+			expLocs: []http.Location{
+				{
+					Path:         "= /inference-match",
+					Type:         http.RedirectLocationType,
+					HTTPMatchKey: "1_0",
+				},
+				{
+					Path:            "/_ngf-internal-rule0-route0-inference",
+					Type:            http.InferenceInternalLocationType,
+					EPPInternalPath: "/_ngf-internal-rule0-route0",
+					EPPHost:         "test-epp",
+					EPPPort:         80,
+				},
+				{
+					Path:      "/_ngf-internal-rule0-route0",
+					Type:      http.InternalLocationType,
+					ProxyPass: "http://$inference_backend_test_foo_80$request_uri",
+					ProxySetHeaders: []http.Header{
+						{Name: "Host", Value: "$gw_api_compliant_host"},
+						{Name: "X-Forwarded-For", Value: "$proxy_add_x_forwarded_for"},
+						{Name: "X-Real-IP", Value: "$remote_addr"},
+						{Name: "X-Forwarded-Proto", Value: "$scheme"},
+						{Name: "X-Forwarded-Host", Value: "$host"},
+						{Name: "X-Forwarded-Port", Value: "$server_port"},
+						{Name: "Upgrade", Value: "$http_upgrade"},
+						{Name: "Connection", Value: "$connection_upgrade"},
+					},
+				},
+				createDefaultRootLocation(),
+			},
+			expMatches: httpMatchPairs{
+				"1_0": {
+					{Method: "POST", RedirectPath: "/_ngf-internal-rule0-route0-inference"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			locs, matches, _ := createLocations(
+				&dataplane.VirtualServer{
+					Hostname:  "example.com",
+					PathRules: tc.pathRules,
+					Port:      80,
+				},
+				"1",
+				&policiesfakes.FakeGenerator{},
+				alwaysFalseKeepAliveChecker,
+			)
+
+			g.Expect(helpers.Diff(tc.expLocs, locs)).To(BeEmpty())
+			g.Expect(matches).To(Equal(tc.expMatches))
+		})
+	}
+}
+
 func TestCreateLocationsRootPath(t *testing.T) {
 	t.Parallel()
 	hrNsName := types.NamespacedName{Namespace: "test", Name: "route1"}
@@ -3686,10 +3835,11 @@ func TestCreateProxyPass(t *testing.T) {
 	t.Parallel()
 
 	tests := []struct {
-		rewrite  *dataplane.HTTPURLRewriteFilter
-		expected string
-		grp      dataplane.BackendGroup
-		GRPC     bool
+		rewrite          *dataplane.HTTPURLRewriteFilter
+		expected         string
+		grp              dataplane.BackendGroup
+		GRPC             bool
+		inferenceBackend bool
 	}{
 		{
 			expected: "http://10.0.0.1:80$request_uri",
@@ -3703,6 +3853,20 @@ func TestCreateProxyPass(t *testing.T) {
 				},
 			},
 		},
+		// Inference case
+		{
+			expected: "http://$inference_backend_upstream_inference$request_uri",
+			grp: dataplane.BackendGroup{
+				Backends: []dataplane.Backend{
+					{
+						UpstreamName: "upstream-inference",
+						Valid:        true,
+						Weight:       1,
+					},
+				},
+			},
+			inferenceBackend: true,
+		},
 		{
 			expected: "http://$group_ns1__bg_rule0$request_uri",
 			grp: dataplane.BackendGroup{
@@ -3755,7 +3919,13 @@ func TestCreateProxyPass(t *testing.T) {
 		t.Run(tc.expected, func(t *testing.T) {
 			t.Parallel()
 			g := NewWithT(t)
-			result := createProxyPass(tc.grp, tc.rewrite, generateProtocolString(nil, tc.GRPC), tc.GRPC)
+			result := createProxyPass(
+				tc.grp,
+				tc.rewrite,
+				generateProtocolString(nil, tc.GRPC),
+				tc.GRPC,
+				tc.inferenceBackend,
+			)
 			g.Expect(result).To(Equal(tc.expected))
 		})
 	}
diff --git a/internal/controller/nginx/modules/src/epp.js b/internal/controller/nginx/modules/src/epp.js
index d4beeb9e15..88de40062b 100644
--- a/internal/controller/nginx/modules/src/epp.js
+++ b/internal/controller/nginx/modules/src/epp.js
@@ -1,29 +1,59 @@
-// This file contains the methods to get an AI workload endpoint from the EndpointPicker (EPP).
+import qs from 'querystring';
 
-// TODO(sberman): this module will need to be enhanced to include the following:
-// - function that sends the subrequest to the Go middleware application (to get the endpoint from EPP)
-// - if a user has specified an Exact matching condition for a model name, extract the model name from
-// the request body, and if it matches that condition, set the proper value in the X-Gateway-Model-Name header
-// (based on if we do a redirect or traffic split (see design doc)) in the subrequest. If the client request
-// already has this header set, then I don't think we need to extract the model from the body, just pass
-// through the existing header.
-// I believe we have to use js_content to call the NJS functionality. Because this takes over
-// the request, we will likely have to finish the NJS functionality with an internalRedirect to an internal
-// location that proxy_passes to the chosen endpoint.
+const EPP_HOST_HEADER_VAR = 'epp_host';
+const EPP_PORT_HEADER_VAR = 'epp_port';
+const EPP_HOST_HEADER = 'X-EPP-Host';
+const EPP_PORT_HEADER = 'X-EPP-Port';
+const ENDPOINT_HEADER = 'X-Gateway-Destination-Endpoint';
+const EPP_INTERNAL_PATH_VAR = 'epp_internal_path';
+const WORKLOAD_ENDPOINT_VAR = 'inference_workload_endpoint';
+const SHIM_URI = 'http://127.0.0.1:54800';
+
+async function getEndpoint(r) {
+	if (!r.variables[EPP_HOST_HEADER_VAR] || !r.variables[EPP_PORT_HEADER_VAR]) {
+		throw Error(
+			`Missing required variables: ${EPP_HOST_HEADER_VAR} and/or ${EPP_PORT_HEADER_VAR}`,
+		);
+	}
+	if (!r.variables[EPP_INTERNAL_PATH_VAR]) {
+		throw Error(`Missing required variable: ${EPP_INTERNAL_PATH_VAR}`);
+	}
+
+	let headers = Object.assign({}, r.headersIn);
+	headers[EPP_HOST_HEADER] = r.variables[EPP_HOST_HEADER_VAR];
+	headers[EPP_PORT_HEADER] = r.variables[EPP_PORT_HEADER_VAR];
 
-// extractModel extracts the model name from the request body.
-function extractModel(r) {
 	try {
-		var body = JSON.parse(r.requestText);
-		if (body && body.model !== undefined) {
-			return String(body.model);
+		const response = await ngx.fetch(SHIM_URI, {
+			method: r.method,
+			headers: headers,
+			body: r.requestText,
+		});
+		const endpointHeader = response.headers.get(ENDPOINT_HEADER);
+		if (response.status === 200 && endpointHeader) {
+			r.variables[WORKLOAD_ENDPOINT_VAR] = endpointHeader;
+			r.log(
+				`found inference endpoint from EndpointPicker: ${r.variables[WORKLOAD_ENDPOINT_VAR]}`,
+			);
+		} else {
+			const body = await response.text();
+			r.error(
+				`could not get specific inference endpoint from EndpointPicker; ` +
+					`status: ${response.status}; body: ${body}`,
+			);
 		}
-	} catch (e) {
-		r.error(`error parsing request body for model name: ${e.message}`);
-		return '';
+	} catch (err) {
+		r.error(`Error in ngx.fetch: ${err}`);
 	}
-	r.error('request body does not contain model parameter');
-	return '';
+
+	// If performing a rewrite, $request_uri won't be used,
+	// so we have to preserve args in the internal redirect.
+	let args = qs.stringify(r.args);
+	if (args) {
+		args = '?' + args;
+	}
+
+	r.internalRedirect(r.variables[EPP_INTERNAL_PATH_VAR] + args);
 }
 
-export default { extractModel };
+export default { getEndpoint };
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index 6994423e7a..c2a4528694 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -1,52 +1,106 @@
 import { default as epp } from '../src/epp.js';
-import { expect, describe, it } from 'vitest';
-
-function makeRequest(body) {
-	let r = {
-		// Test mocks
-		error(msg) {
-			r.variables.error = msg;
-		},
-		requestText: body,
-		variables: {},
-	};
+import { expect, describe, it, beforeEach, afterEach, vi } from 'vitest';
 
-	return r;
+function makeRequest({
+	method = 'POST',
+	headersIn = {},
+	args = {},
+	requestText = '',
+	variables = {},
+} = {}) {
+	return {
+		method,
+		headersIn,
+		requestText,
+		variables,
+		args,
+		error: vi.fn(),
+		log: vi.fn(),
+		internalRedirect: vi.fn(),
+	};
 }
 
-describe('extractModel', () => {
-	const tests = [
-		{
-			name: 'returns the model value',
-			body: '{"model":"gpt-4"}',
-			model: 'gpt-4',
-			error: undefined,
-		},
-		{
-			name: 'returns empty string if model is missing',
-			body: '{"foo":1}',
-			model: '',
-			error: 'request body does not contain model parameter',
-		},
-		{
-			name: 'returns empty string for invalid JSON',
-			body: 'not-json',
-			model: '',
-			error: `error parsing request body for model name: Unexpected token 'o', "not-json" is not valid JSON`,
-		},
-		{
-			name: 'empty request body',
-			body: '',
-			model: '',
-			error: 'error parsing request body for model name: Unexpected end of JSON input',
-		},
-	];
-
-	tests.forEach((test) => {
-		it(test.name, () => {
-			let r = makeRequest(test.body);
-			expect(epp.extractModel(r)).to.equal(test.model);
-			expect(r.variables.error).to.equal(test.error);
+describe('getEndpoint', () => {
+	let originalNgx;
+	beforeEach(() => {
+		originalNgx = globalThis.ngx;
+	});
+	afterEach(() => {
+		globalThis.ngx = originalNgx;
+	});
+
+	it('throws if host or port is missing', async () => {
+		const r = makeRequest({ variables: { epp_internal_path: '/foo' } });
+		await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variables/);
+	});
+
+	it('throws if internal path is missing', async () => {
+		const r = makeRequest({ variables: { epp_host: 'host', epp_port: '1234' } });
+		await expect(epp.getEndpoint(r)).rejects.toThrow(/Missing required variable/);
+	});
+
+	it('sets endpoint and logs on 200 with endpoint header', async () => {
+		const endpoint = 'http://endpoint';
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 200,
+				headers: { get: () => endpoint },
+				text: vi.fn(),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.variables.inference_workload_endpoint).toBe(endpoint);
+		expect(r.log).toHaveBeenCalledWith(expect.stringContaining(endpoint));
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('calls error if response is not 200 or endpoint header missing', async () => {
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 404,
+				headers: { get: () => null },
+				text: vi.fn().mockResolvedValue('fail'),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.error).toHaveBeenCalledWith(
+			expect.stringContaining('could not get specific inference endpoint'),
+		);
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('calls error if fetch throws', async () => {
+		globalThis.ngx = {
+			fetch: vi.fn().mockRejectedValue(new Error('network fail')),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+		});
+		await epp.getEndpoint(r);
+		expect(r.error).toHaveBeenCalledWith(expect.stringContaining('Error in ngx.fetch'));
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo');
+	});
+
+	it('preserves args in internal redirect when args are present', async () => {
+		const endpoint = 'http://endpoint';
+		globalThis.ngx = {
+			fetch: vi.fn().mockResolvedValue({
+				status: 200,
+				headers: { get: () => endpoint },
+				text: vi.fn(),
+			}),
+		};
+		const r = makeRequest({
+			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			args: { a: '1', b: '2' },
 		});
+		await epp.getEndpoint(r);
+		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
 });
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index 52306f4e0b..59030c0ca7 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -374,12 +374,13 @@ func newBackendGroup(
 	gatewayName types.NamespacedName,
 	sourceNsName types.NamespacedName,
 	ruleIdx int,
-) BackendGroup {
+) (BackendGroup, bool) {
 	var backends []Backend
 
 	if len(refs) > 0 {
 		backends = make([]Backend, 0, len(refs))
 	}
+	var inferencePoolBackendExists bool
 
 	for _, ref := range refs {
 		if ref.IsMirrorBackend {
@@ -391,11 +392,14 @@ func newBackendGroup(
 			valid = false
 		}
 
+		inferencePoolBackendExists = inferencePoolBackendExists || ref.IsInferencePool
+
 		backends = append(backends, Backend{
-			UpstreamName: ref.ServicePortReference(),
-			Weight:       ref.Weight,
-			Valid:        valid,
-			VerifyTLS:    convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
+			UpstreamName:         ref.ServicePortReference(),
+			Weight:               ref.Weight,
+			Valid:                valid,
+			VerifyTLS:            convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		})
 	}
 
@@ -403,7 +407,7 @@ func newBackendGroup(
 		Backends: backends,
 		Source:   sourceNsName,
 		RuleIdx:  ruleIdx,
-	}
+	}, inferencePoolBackendExists
 }
 
 func convertBackendTLS(btp *graph.BackendTLSPolicy, gwNsName types.NamespacedName) *VerifyTLS {
@@ -595,10 +599,19 @@ func (hpr *hostPathRules) upsertRoute(
 				}
 
 				hostRule.GRPC = GRPC
+				backendGroup, inferencePoolBackendExists := newBackendGroup(
+					rule.BackendRefs,
+					listener.GatewayName,
+					routeNsName,
+					idx,
+				)
+				if inferencePoolBackendExists {
+					hostRule.HasInferenceBackends = true
+				}
 
 				hostRule.MatchRules = append(hostRule.MatchRules, MatchRule{
 					Source:       objectSrc,
-					BackendGroup: newBackendGroup(rule.BackendRefs, listener.GatewayName, routeNsName, idx),
+					BackendGroup: backendGroup,
 					Filters:      filters,
 					Match:        convertMatch(m),
 				})
diff --git a/internal/controller/state/dataplane/configuration_test.go b/internal/controller/state/dataplane/configuration_test.go
index b329b9d46a..3e1697590d 100644
--- a/internal/controller/state/dataplane/configuration_test.go
+++ b/internal/controller/state/dataplane/configuration_test.go
@@ -2777,6 +2777,93 @@ func TestBuildConfiguration_Plus(t *testing.T) {
 	}
 }
 
+func TestUpsertRoute_PathRuleHasInferenceBackend(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	// Setup minimal route with one BackendRef marked as IsInferencePool
+	backendRef := graph.BackendRef{
+		SvcNsName:       types.NamespacedName{Name: "svc", Namespace: "test"},
+		ServicePort:     apiv1.ServicePort{Port: 80},
+		Valid:           true,
+		IsInferencePool: true,
+	}
+
+	listenerName := "listener-80"
+	gwName := types.NamespacedName{Namespace: "test", Name: "gw"}
+
+	route := &graph.L7Route{
+		RouteType: graph.RouteTypeHTTP,
+		Source: &v1.HTTPRoute{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "hr",
+				Namespace: "test",
+			},
+		},
+		Spec: graph.L7RouteSpec{
+			Rules: []graph.RouteRule{
+				{
+					ValidMatches: true,
+					Filters:      graph.RouteRuleFilters{Valid: true},
+					BackendRefs:  []graph.BackendRef{backendRef},
+					Matches: []v1.HTTPRouteMatch{
+						{
+							Path: &v1.HTTPPathMatch{
+								Type:  helpers.GetPointer(v1.PathMatchPathPrefix),
+								Value: helpers.GetPointer("/infer"),
+							},
+						},
+					},
+				},
+			},
+		},
+		ParentRefs: []graph.ParentRef{
+			{
+				Attachment: &graph.ParentRefAttachmentStatus{
+					AcceptedHostnames: map[string][]string{
+						graph.CreateGatewayListenerKey(gwName, listenerName): {"*"},
+					},
+				},
+			},
+		},
+		Valid: true,
+	}
+
+	listener := &graph.Listener{
+		Name:        listenerName,
+		GatewayName: gwName,
+		Valid:       true,
+		Routes: map[graph.RouteKey]*graph.L7Route{
+			graph.CreateRouteKey(route.Source): route,
+		},
+	}
+
+	gateway := &graph.Gateway{
+		Source: &v1.Gateway{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "gw",
+				Namespace: "test",
+			},
+		},
+		Listeners: []*graph.Listener{listener},
+	}
+
+	hpr := newHostPathRules()
+	hpr.upsertRoute(route, listener, gateway)
+
+	// Find the PathRule for "/infer"
+	found := false
+	for _, rules := range hpr.rulesPerHost {
+		for _, pr := range rules {
+			if pr.Path == "/infer" {
+				found = true
+				g.Expect(pr.HasInferenceBackends).To(BeTrue())
+			}
+		}
+	}
+	g.Expect(found).To(BeTrue(), "PathRule for '/infer' not found")
+}
+
 func TestNewBackendGroup_Mirror(t *testing.T) {
 	t.Parallel()
 	g := NewWithT(t)
@@ -2788,7 +2875,7 @@ func TestNewBackendGroup_Mirror(t *testing.T) {
 		IsMirrorBackend: true,
 	}
 
-	group := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0)
+	group, _ := newBackendGroup([]graph.BackendRef{backendRef}, types.NamespacedName{}, types.NamespacedName{}, 0)
 
 	g.Expect(group.Backends).To(BeEmpty())
 }
diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go
index e593dfd6e0..c6d7e8f93a 100644
--- a/internal/controller/state/dataplane/types.go
+++ b/internal/controller/state/dataplane/types.go
@@ -5,6 +5,7 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/nginx/config/policies"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/graph"
@@ -139,6 +140,8 @@ type PathRule struct {
 	Policies []policies.Policy
 	// GRPC indicates if this is a gRPC rule
 	GRPC bool
+	// HasInferenceBackends indicates whether the PathRule contains a backend for an inference workload.
+	HasInferenceBackends bool
 }
 
 // InvalidHTTPFilter is a special filter for handling the case when configured filters are invalid.
@@ -325,6 +328,9 @@ func (bg *BackendGroup) Name() string {
 type Backend struct {
 	// VerifyTLS holds the backend TLS verification configuration.
 	VerifyTLS *VerifyTLS
+	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
+	// This is set if this backend is for an inference workload.
+	EndpointPickerConfig *inference.EndpointPickerRef
 	// UpstreamName is the name of the upstream for this backend.
 	UpstreamName string
 	// Weight is the weight of the BackendRef.
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index e14d0fb0fa..95ce6df0b9 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -9,6 +9,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
@@ -30,6 +31,8 @@ const (
 type BackendRef struct {
 	// BackendTLSPolicy is the BackendTLSPolicy of the Service which is referenced by the backendRef.
 	BackendTLSPolicy *BackendTLSPolicy
+	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
+	EndpointPickerConfig *inference.EndpointPickerRef
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
 	// but not others.
@@ -45,6 +48,8 @@ type BackendRef struct {
 	Valid bool
 	// IsMirrorBackend indicates whether the BackendGroup is for a mirrored backend.
 	IsMirrorBackend bool
+	// IsInferencePool indicates whether the BackendRef is for an InferencePool.
+	IsInferencePool bool
 }
 
 // ServicePortReference returns a string representation for the service and port that is referenced by the BackendRef.
@@ -118,6 +123,7 @@ func addBackendRefsToRules(
 				if pool, exists := referencedInferencePools[poolName]; exists {
 					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
 					ref.Port = helpers.GetPointer(port)
+					ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef
 				}
 			}
 
@@ -181,10 +187,12 @@ func createBackendRef(
 
 	if !valid {
 		backendRef := BackendRef{
-			Weight:             weight,
-			Valid:              false,
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: make(map[types.NamespacedName]conditions.Condition),
+			Weight:               weight,
+			Valid:                false,
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, []conditions.Condition{cond}
@@ -198,12 +206,14 @@ func createBackendRef(
 	svcIPFamily, svcPort, err := getIPFamilyAndPortFromRef(ref.BackendRef, svcNsName, services, refPath)
 	if err != nil {
 		backendRef := BackendRef{
-			Weight:             weight,
-			Valid:              false,
-			SvcNsName:          svcNsName,
-			ServicePort:        v1.ServicePort{},
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: make(map[types.NamespacedName]conditions.Condition),
+			Weight:               weight,
+			Valid:                false,
+			SvcNsName:            svcNsName,
+			ServicePort:          v1.ServicePort{},
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   make(map[types.NamespacedName]conditions.Condition),
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, []conditions.Condition{conditions.NewRouteBackendRefRefBackendNotFound(err.Error())}
@@ -220,12 +230,14 @@ func createBackendRef(
 		// Check if externalName field is empty or whitespace-only
 		if strings.TrimSpace(svc.Spec.ExternalName) == "" {
 			backendRef := BackendRef{
-				SvcNsName:          svcNsName,
-				ServicePort:        svcPort,
-				Weight:             weight,
-				Valid:              false,
-				IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-				InvalidForGateways: invalidForGateways,
+				SvcNsName:            svcNsName,
+				ServicePort:          svcPort,
+				Weight:               weight,
+				Valid:                false,
+				IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+				IsInferencePool:      ref.IsInferencePool,
+				InvalidForGateways:   invalidForGateways,
+				EndpointPickerConfig: ref.EndpointPickerConfig,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(
@@ -249,12 +261,14 @@ func createBackendRef(
 	)
 	if err != nil {
 		backendRef := BackendRef{
-			SvcNsName:          svcNsName,
-			ServicePort:        svcPort,
-			Weight:             weight,
-			Valid:              false,
-			IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-			InvalidForGateways: invalidForGateways,
+			SvcNsName:            svcNsName,
+			ServicePort:          svcPort,
+			Weight:               weight,
+			Valid:                false,
+			IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+			IsInferencePool:      ref.IsInferencePool,
+			InvalidForGateways:   invalidForGateways,
+			EndpointPickerConfig: ref.EndpointPickerConfig,
 		}
 
 		return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedValue(err.Error()))
@@ -264,13 +278,15 @@ func createBackendRef(
 		err = validateRouteBackendRefAppProtocol(route.RouteType, *svcPort.AppProtocol, backendTLSPolicy)
 		if err != nil {
 			backendRef := BackendRef{
-				SvcNsName:          svcNsName,
-				BackendTLSPolicy:   backendTLSPolicy,
-				ServicePort:        svcPort,
-				Weight:             weight,
-				Valid:              false,
-				IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-				InvalidForGateways: invalidForGateways,
+				SvcNsName:            svcNsName,
+				BackendTLSPolicy:     backendTLSPolicy,
+				ServicePort:          svcPort,
+				Weight:               weight,
+				Valid:                false,
+				IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+				IsInferencePool:      ref.IsInferencePool,
+				InvalidForGateways:   invalidForGateways,
+				EndpointPickerConfig: ref.EndpointPickerConfig,
 			}
 
 			return backendRef, append(conds, conditions.NewRouteBackendRefUnsupportedProtocol(err.Error()))
@@ -278,13 +294,15 @@ func createBackendRef(
 	}
 
 	backendRef := BackendRef{
-		SvcNsName:          svcNsName,
-		BackendTLSPolicy:   backendTLSPolicy,
-		ServicePort:        svcPort,
-		Valid:              true,
-		Weight:             weight,
-		IsMirrorBackend:    ref.MirrorBackendIdx != nil,
-		InvalidForGateways: invalidForGateways,
+		SvcNsName:            svcNsName,
+		BackendTLSPolicy:     backendTLSPolicy,
+		ServicePort:          svcPort,
+		Valid:                true,
+		Weight:               weight,
+		IsMirrorBackend:      ref.MirrorBackendIdx != nil,
+		IsInferencePool:      ref.IsInferencePool,
+		InvalidForGateways:   invalidForGateways,
+		EndpointPickerConfig: ref.EndpointPickerConfig,
 	}
 
 	return backendRef, conds
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index 3f05f793a6..b786daed9b 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -1231,9 +1231,11 @@ func TestAddBackendRefsToRules(t *testing.T) {
 					ServicePort: v1.ServicePort{
 						Port: 80,
 					},
-					Valid:              true,
-					Weight:             1,
-					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+					Valid:                true,
+					Weight:               1,
+					InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
+					IsInferencePool:      true,
+					EndpointPickerConfig: &inference.EndpointPickerRef{},
 				},
 			},
 			expectedConditions: nil,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index da0ca04d47..1a367e5977 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -223,10 +223,12 @@ func TestBuildGraph(t *testing.T) {
 					Namespace: testNs,
 					Name:      controller.CreateInferencePoolServiceName("ipool"),
 				},
-				ServicePort:        v1.ServicePort{Port: 80},
-				Valid:              true,
-				Weight:             1,
-				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				ServicePort:          v1.ServicePort{Port: 80},
+				Valid:                true,
+				Weight:               1,
+				InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
+				IsInferencePool:      true,
+				EndpointPickerConfig: &inference.EndpointPickerRef{},
 			},
 		}
 		rbrs := []RouteBackendRef{
diff --git a/internal/controller/state/graph/httproute.go b/internal/controller/state/graph/httproute.go
index 7ced07a3ff..4c01f1ff60 100644
--- a/internal/controller/state/graph/httproute.go
+++ b/internal/controller/state/graph/httproute.go
@@ -216,11 +216,26 @@ func processHTTPRouteRule(
 			}
 		}
 
-		var rbr RouteBackendRef
+		rbr := RouteBackendRef{
+			BackendRef: b.BackendRef,
+		}
+
 		// If route specifies an InferencePool backend, we need to convert it to its associated
 		// headless Service backend (that we created), so nginx config can be built properly.
 		// Only do this if the InferencePool actually exists.
 		if inferencePoolBackend(b, routeNamespace, inferencePools) {
+			// We don't support traffic splitting at the Route level for
+			// InferencePool backends, so if there's more than one backendRef, and one of them
+			// is an InferencePool, we mark the rule as invalid.
+			if len(specRule.BackendRefs) > 1 {
+				err := field.Forbidden(
+					rulePath.Child("backendRefs"),
+					"cannot use InferencePool backend when multiple backendRefs are specified in a single rule",
+				)
+				errors.invalid = append(errors.invalid, err)
+				break
+			}
+
 			svcName := controller.CreateInferencePoolServiceName(string(b.Name))
 			rbr = RouteBackendRef{
 				IsInferencePool: true,
@@ -234,10 +249,6 @@ func processHTTPRouteRule(
 					Weight: b.Weight,
 				},
 			}
-		} else {
-			rbr = RouteBackendRef{
-				BackendRef: b.BackendRef,
-			}
 		}
 
 		rbr.Filters = interfaceFilters
diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go
index ce60506f57..aaef8dc519 100644
--- a/internal/controller/state/graph/httproute_test.go
+++ b/internal/controller/state/graph/httproute_test.go
@@ -1253,6 +1253,67 @@ func TestBuildHTTPRouteWithMirrorRoutes(t *testing.T) {
 	g.Expect(helpers.Diff(expectedMirrorRoute, routes[mirrorRouteKey])).To(BeEmpty())
 }
 
+func TestProcessHTTPRouteRule_InferencePoolWithMultipleBackendRefs(t *testing.T) {
+	t.Parallel()
+	g := NewWithT(t)
+
+	validator := &validationfakes.FakeHTTPFieldsValidator{}
+	inferencePoolName := "ipool"
+	routeNamespace := "test"
+	inferencePools := map[types.NamespacedName]*inference.InferencePool{
+		{Namespace: routeNamespace, Name: inferencePoolName}: {},
+	}
+
+	// BackendRef 1: InferencePool
+	backendRef1 := gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Group:     helpers.GetPointer[gatewayv1.Group](inferenceAPIGroup),
+				Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.InferencePool),
+				Name:      gatewayv1.ObjectName(inferencePoolName),
+				Namespace: helpers.GetPointer(gatewayv1.Namespace(routeNamespace)),
+			},
+		},
+	}
+	// BackendRef 2: Service
+	backendRef2 := gatewayv1.HTTPBackendRef{
+		BackendRef: gatewayv1.BackendRef{
+			BackendObjectReference: gatewayv1.BackendObjectReference{
+				Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+				Name: "backend",
+			},
+		},
+	}
+
+	specRule := gatewayv1.HTTPRouteRule{
+		Matches: []gatewayv1.HTTPRouteMatch{
+			{
+				Path: &gatewayv1.HTTPPathMatch{
+					Type:  helpers.GetPointer(gatewayv1.PathMatchPathPrefix),
+					Value: helpers.GetPointer("/"),
+				},
+			},
+		},
+		BackendRefs: []gatewayv1.HTTPBackendRef{backendRef1, backendRef2},
+	}
+
+	rulePath := field.NewPath("spec").Child("rules").Index(0)
+
+	routeRule, errs := processHTTPRouteRule(
+		specRule,
+		routeNamespace,
+		rulePath,
+		validator,
+		nil,
+		inferencePools,
+	)
+
+	g.Expect(routeRule.RouteBackendRefs).To(BeEmpty())
+	g.Expect(errs.invalid).To(HaveLen(1))
+	errMsg := "cannot use InferencePool backend when multiple backendRefs are specified in a single rule"
+	g.Expect(errs.invalid[0].Error()).To(ContainSubstring(errMsg))
+}
+
 func TestValidateMatch(t *testing.T) {
 	t.Parallel()
 	createAllValidValidator := func() *validationfakes.FakeHTTPFieldsValidator {
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index f7429a594a..6421b35aca 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -166,6 +166,9 @@ type RouteBackendRef struct {
 	// If this backend is defined in a RequestMirror filter, this value will indicate the filter's index.
 	MirrorBackendIdx *int
 
+	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
+	EndpointPickerConfig *inference.EndpointPickerRef
+
 	Filters []any
 
 	// IsInferencePool indicates if this backend is an InferencePool disguised as a Service.
diff --git a/internal/framework/types/types.go b/internal/framework/types/types.go
index bf61bd23d7..0aeccd008d 100644
--- a/internal/framework/types/types.go
+++ b/internal/framework/types/types.go
@@ -5,3 +5,14 @@ import "sigs.k8s.io/controller-runtime/pkg/client"
 // ObjectType is used when we only care about the type of client.Object.
 // The fields of the client.Object may be empty.
 type ObjectType client.Object
+
+// Fields used for communication with the EndpointPicker service when using the Inference Extension.
+const (
+	// EPPEndpointHostHeader is the HTTP header used to specify the EPP endpoint host.
+	EPPEndpointHostHeader = "X-EPP-Host"
+	// EPPEndpointPortHeader is the HTTP header used to specify the EPP endpoint port.
+	EPPEndpointPortHeader = "X-EPP-Port"
+	// GoShimPort is the default port for the Go EPP shim server to listen on. If collisions become a problem,
+	// we can make this configurable via the NginxProxy resource.
+	GoShimPort = 54800 // why 54800? Sum "nginx" in ASCII and multiply by 100.
+)

From 7fbfcf29ffe2859304e69524c744e58e3430c612 Mon Sep 17 00:00:00 2001
From: Saloni Choudhary <146118978+salonichf5@users.noreply.github.com>
Date: Wed, 1 Oct 2025 21:21:34 +0530
Subject: [PATCH 05/10] Adds status information to describe the state of
 Inference Pools (#3970)

Update the inference extension design doc to specify different status that needs to be set on Inference Pools to understand its state
---
 docs/proposals/gateway-inference-extension.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/proposals/gateway-inference-extension.md b/docs/proposals/gateway-inference-extension.md
index bf17bf51d4..70258521dc 100644
--- a/docs/proposals/gateway-inference-extension.md
+++ b/docs/proposals/gateway-inference-extension.md
@@ -104,6 +104,14 @@ InferenceObjective represents the desired state of a specific model use case. As
 
 It is my impression that this API is purely for the EPP to handle, and does not need to be handled by NGINX Gateway Fabric.
 
+### Inference Status
+
+Each InferencePool publishes two conditions that together describe its overall state. The first is the `Accepted` condition, which communicates whether the pool is referenced by an HTTPRoute that the Gateway has accepted. When the route is not accepted, this condition is explicitly set to `False` with the reason `InferencePoolReasonHTTPRouteNotAccepted`, making it clear that the Gateway rejected the route referencing the pool.
+
+The second is the `ResolvedRefs` condition, which reflects whether the `EndpointPickerRef` associated with the pool is valid. If it is misconfigured such as being an unsupported kind, left undefined, or pointing to a non-existent Service, this condition is set to `False` with the reason `InferencePoolReasonInvalidExtensionRef`.
+
+The status of an InferencePool records the Gateway as its parent reference and associates it with the relevant conditions; when all conditions are `True`, the pool is valid and traffic can be directed to it.
+
 ### Personas and Processes
 
 Two new personas are introduced, the `Inference Platform Owner/Admin` and `Inference Workload Owner`.

From c8dbd309b39ef06c1e8b30a65cebc62a6afb280f Mon Sep 17 00:00:00 2001
From: bjee19 <139261241+bjee19@users.noreply.github.com>
Date: Mon, 6 Oct 2025 08:52:14 -0700
Subject: [PATCH 06/10] Update gateway inference extension proposal security
 considerations (#4006)

Update gateway inference extension proposal on inability to provide a secure TLS connection to EPP.
---
 docs/proposals/gateway-inference-extension.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/proposals/gateway-inference-extension.md b/docs/proposals/gateway-inference-extension.md
index 70258521dc..f36540f036 100644
--- a/docs/proposals/gateway-inference-extension.md
+++ b/docs/proposals/gateway-inference-extension.md
@@ -132,7 +132,11 @@ For development purposes, the [Getting started guide](https://gateway-api-infere
 
 ## Security Considerations
 
-If the Endpoint Picker (EPP) supports it, we should use a secure TLS connection. This ensures an encrypted and authenticated communication channel between the NGINX data plane and the EPP. For production environments, an integration with `cert-manager` is likely the best solution, as we recommend this for various other secure channels within the NGF ecosystem. Otherwise, our control plane may have to provision certificates in the default case (similar to NGF's startup `cert-generator` Job).
+Secure TLS gRPC connection between Endpoint Picker (EPP) and Go Shim Server is ideal. This would ensure an encrypted and authenticated communication channel between the NGINX data plane and the EPP. However, this is not possible with the current EPP implementation and is a [known issue](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/582), with a separate issue currently open to [provide further support to tls](https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1556).
+
+Since NGF does not provision the EPP, is not in charge of modifying it, and the current [EPP Helm template](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/config/charts/inferencepool/templates/epp-deployment.yaml) does not support providing extra volume/volumeMounts, there is no way to mount a cert on the EPP. Even if specifying extra volume/volumeMounts are allowed through a feature request, the implementation on our side would be hacky and unconventional.
+
+Given that the gateway inference project remains in `Alpha` and explicitly warns against production use, we will follow existing implementations and use an insecure gRPC connection to the EPP. For our goal of meeting the API’s core specifications with a basic solution, secure gRPC is not strictly required at this stage.
 
 At some point, there may be opportunities for attaching Policies (like a BackendTLSPolicy) to an InferenceModel to secure the NGINX -> AI workload connection, however that is not in scope for now.
 

From 4441d35e54fd91424682053744aba2d3cd9079f0 Mon Sep 17 00:00:00 2001
From: Saloni Choudhary <146118978+salonichf5@users.noreply.github.com>
Date: Mon, 6 Oct 2025 23:20:57 +0530
Subject: [PATCH 07/10] Add status to Inference Pools (#4005)

Add status to Inference Pools

Problem: Users want to see the current status of their Inference pools

Solution: Add status for inference pools
---
 internal/controller/handler.go                |   4 +-
 .../controller/state/conditions/conditions.go |  53 +++
 internal/controller/state/graph/graph.go      |   3 +-
 internal/controller/state/graph/graph_test.go |  26 +-
 .../controller/state/graph/inferencepools.go  | 100 +++-
 .../state/graph/inferencepools_test.go        | 439 +++++++++++++++---
 .../controller/status/prepare_requests.go     |  54 +++
 .../status/prepare_requests_test.go           | 238 ++++++++++
 internal/controller/status/status_setters.go  |  63 +++
 .../controller/status/status_setters_test.go  | 301 ++++++++++++
 tests/go.mod                                  |   1 +
 tests/go.sum                                  |   6 +-
 12 files changed, 1218 insertions(+), 70 deletions(-)

diff --git a/internal/controller/handler.go b/internal/controller/handler.go
index 8a69a16ef1..0112a80942 100644
--- a/internal/controller/handler.go
+++ b/internal/controller/handler.go
@@ -368,17 +368,19 @@ func (h *eventHandlerImpl) updateStatuses(ctx context.Context, gr *graph.Graph,
 		transitionTime,
 		h.cfg.gatewayCtlrName,
 	)
+	inferencePoolReqs := status.PrepareInferencePoolRequests(gr.ReferencedInferencePools, transitionTime)
 
 	reqs := make(
 		[]status.UpdateRequest,
 		0,
-		len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs),
+		len(gcReqs)+len(routeReqs)+len(polReqs)+len(ngfPolReqs)+len(snippetsFilterReqs)+len(inferencePoolReqs),
 	)
 	reqs = append(reqs, gcReqs...)
 	reqs = append(reqs, routeReqs...)
 	reqs = append(reqs, polReqs...)
 	reqs = append(reqs, ngfPolReqs...)
 	reqs = append(reqs, snippetsFilterReqs...)
+	reqs = append(reqs, inferencePoolReqs...)
 
 	h.cfg.statusUpdater.UpdateGroup(ctx, groupAllExceptGateways, reqs...)
 
diff --git a/internal/controller/state/conditions/conditions.go b/internal/controller/state/conditions/conditions.go
index d62b04ba1f..c351f5ba32 100644
--- a/internal/controller/state/conditions/conditions.go
+++ b/internal/controller/state/conditions/conditions.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 
@@ -1138,3 +1139,55 @@ func NewBackendTLSPolicyNoValidCACertificate(message string) Condition {
 		Message: message,
 	}
 }
+
+// NewInferencePoolAccepted returns a Condition that indicates that the InferencePool is accepted by the Gateway.
+func NewInferencePoolAccepted() Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionAccepted),
+		Status:  metav1.ConditionTrue,
+		Reason:  string(inference.InferencePoolConditionAccepted),
+		Message: "InferencePool is accepted by the Gateway.",
+	}
+}
+
+// NewInferencePoolResolvedRefs returns a Condition that
+// indicates that all references in the InferencePool are resolved.
+func NewInferencePoolResolvedRefs() Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionResolvedRefs),
+		Status:  metav1.ConditionTrue,
+		Reason:  string(inference.InferencePoolConditionResolvedRefs),
+		Message: "Inference pool references a valid ExtensionRef.",
+	}
+}
+
+// NewDefaultInferenceConditions returns the default Conditions
+// that must be present in the status of an InferencePool.
+func NewDefaultInferenceConditions() []Condition {
+	return []Condition{
+		NewInferencePoolAccepted(),
+		NewInferencePoolResolvedRefs(),
+	}
+}
+
+// NewInferencePoolInvalidHTTPRouteNotAccepted returns a Condition that indicates that the InferencePool is not
+// accepted because the associated HTTPRoute is not accepted by the Gateway.
+func NewInferencePoolInvalidHTTPRouteNotAccepted(msg string) Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionAccepted),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(inference.InferencePoolReasonHTTPRouteNotAccepted),
+		Message: msg,
+	}
+}
+
+// NewInferencePoolInvalidExtensionref returns a Condition that indicates that the InferencePool is not
+// accepted because the ExtensionRef is invalid.
+func NewInferencePoolInvalidExtensionref(msg string) Condition {
+	return Condition{
+		Type:    string(inference.InferencePoolConditionResolvedRefs),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(inference.InferencePoolReasonInvalidExtensionRef),
+		Message: msg,
+	}
+}
diff --git a/internal/controller/state/graph/graph.go b/internal/controller/state/graph/graph.go
index b5e13991e9..538a29a09d 100644
--- a/internal/controller/state/graph/graph.go
+++ b/internal/controller/state/graph/graph.go
@@ -260,7 +260,8 @@ func BuildGraph(
 		processedSnippetsFilters,
 		state.InferencePools,
 	)
-	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools)
+
+	referencedInferencePools := buildReferencedInferencePools(routes, gws, state.InferencePools, state.Services)
 
 	l4routes := buildL4RoutesForGateways(
 		state.TLSRoutes,
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index 1a367e5977..a49202d96e 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -223,12 +223,15 @@ func TestBuildGraph(t *testing.T) {
 					Namespace: testNs,
 					Name:      controller.CreateInferencePoolServiceName("ipool"),
 				},
-				ServicePort:          v1.ServicePort{Port: 80},
-				Valid:                true,
-				Weight:               1,
-				InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
-				IsInferencePool:      true,
-				EndpointPickerConfig: &inference.EndpointPickerRef{},
+				ServicePort:        v1.ServicePort{Port: 80},
+				Valid:              true,
+				Weight:             1,
+				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+				IsInferencePool:    true,
+				EndpointPickerConfig: &inference.EndpointPickerRef{
+					Kind: kinds.Service,
+					Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+				},
 			},
 		}
 		rbrs := []RouteBackendRef{
@@ -389,6 +392,10 @@ func TestBuildGraph(t *testing.T) {
 			TargetPorts: []inference.Port{
 				{Number: 80},
 			},
+			EndpointPickerRef: inference.EndpointPickerRef{
+				Kind: kinds.Service,
+				Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+			},
 		},
 	}
 
@@ -1325,6 +1332,13 @@ func TestBuildGraph(t *testing.T) {
 			ReferencedInferencePools: map[types.NamespacedName]*ReferencedInferencePool{
 				client.ObjectKeyFromObject(inferencePool): {
 					Source: inferencePool,
+					Gateways: []*gatewayv1.Gateway{
+						gw1.Source,
+					},
+					HTTPRoutes: []*L7Route{
+						inferenceRoute,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
index ada688bcc5..84e6d62df2 100644
--- a/internal/controller/state/graph/inferencepools.go
+++ b/internal/controller/state/graph/inferencepools.go
@@ -1,10 +1,15 @@
 package graph
 
 import (
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+	apiv1 "sigs.k8s.io/gateway-api/apis/v1"
 
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
 )
@@ -14,6 +19,12 @@ import (
 type ReferencedInferencePool struct {
 	// Source is the original InferencePool that this ReferencedInferencePool is based on.
 	Source *inference.InferencePool
+	// Gateways are the Gateways that this ReferencedInferencePool is attached to.
+	Gateways []*apiv1.Gateway
+	// HTTPRoutes are the HTTPRoutes that reference this InferencePool.
+	HTTPRoutes []*L7Route
+	// Conditions contains the conditions that should be applied to the InferencePool.
+	Conditions []conditions.Condition
 }
 
 // buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
@@ -22,8 +33,9 @@ func buildReferencedInferencePools(
 	routes map[RouteKey]*L7Route,
 	gws map[types.NamespacedName]*Gateway,
 	inferencePools map[types.NamespacedName]*inference.InferencePool,
+	services map[types.NamespacedName]*v1.Service,
 ) map[types.NamespacedName]*ReferencedInferencePool {
-	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool)
+	referencedInferencePools := make(map[types.NamespacedName]*ReferencedInferencePool, len(inferencePools))
 
 	for _, gw := range gws {
 		if gw == nil {
@@ -37,6 +49,17 @@ func buildReferencedInferencePools(
 		return nil
 	}
 
+	// validate each referenced InferencePool and add conditions.
+	for _, refPool := range referencedInferencePools {
+		if routeCond := validateInferencePoolRoutesAcceptance(refPool.Source, refPool.HTTPRoutes); routeCond != nil {
+			refPool.Conditions = append(refPool.Conditions, *routeCond)
+		}
+
+		if extensionRefCond := validateInferencePoolExtensionRef(refPool.Source, services); extensionRefCond != nil {
+			refPool.Conditions = append(refPool.Conditions, *extensionRefCond)
+		}
+	}
+
 	return referencedInferencePools
 }
 
@@ -48,8 +71,9 @@ func processInferencePoolsForGateway(
 	inferencePools map[types.NamespacedName]*inference.InferencePool,
 ) {
 	gwKey := client.ObjectKeyFromObject(gw.Source)
+
 	for _, route := range routes {
-		if !route.Valid || !routeBelongsToGateway(route.ParentRefs, gwKey) {
+		if !routeBelongsToGateway(route.ParentRefs, gwKey) {
 			continue
 		}
 
@@ -70,13 +94,83 @@ func processInferencePoolsForGateway(
 				}
 
 				if _, referenced := referencedInferencePools[poolName]; !referenced {
-					referencedInferencePools[poolName] = &ReferencedInferencePool{}
+					referencedInferencePools[poolName] = &ReferencedInferencePool{
+						Conditions: make([]conditions.Condition, 0, 2),
+						Gateways:   make([]*apiv1.Gateway, 0),
+						HTTPRoutes: make([]*L7Route, 0),
+					}
 				}
 
 				if pool, exists := inferencePools[poolName]; exists {
 					referencedInferencePools[poolName].Source = pool
+					referencedInferencePools[poolName].Gateways = append(
+						referencedInferencePools[poolName].Gateways,
+						gw.Source,
+					)
+					referencedInferencePools[poolName].HTTPRoutes = append(
+						referencedInferencePools[poolName].HTTPRoutes,
+						route,
+					)
 				}
 			}
 		}
 	}
 }
+
+// validateInferencePoolExtensionRef validates the ExtensionRef of the InferencePool.
+func validateInferencePoolExtensionRef(
+	ip *inference.InferencePool,
+	svc map[types.NamespacedName]*v1.Service,
+) *conditions.Condition {
+	var failingCond conditions.Condition
+	if ip == nil {
+		return nil
+	}
+
+	// if kind is empty, it defaults to Service
+	kind := string(ip.Spec.EndpointPickerRef.Kind)
+	if kind == "" {
+		kind = kinds.Service
+	}
+
+	if kind != kinds.Service {
+		failingCond = conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: " + kind)
+		return &failingCond
+	}
+
+	eppNsName := types.NamespacedName{
+		Name:      string(ip.Spec.EndpointPickerRef.Name),
+		Namespace: ip.GetNamespace(),
+	}
+
+	if _, ok := svc[eppNsName]; !ok {
+		failingCond = conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: " + eppNsName.String())
+		return &failingCond
+	}
+
+	return nil
+}
+
+// validateInferencePoolRoutesAcceptance checks if the routes that reference the InferencePool
+// are accepted by the Gateway.
+func validateInferencePoolRoutesAcceptance(ip *inference.InferencePool, routes []*L7Route) *conditions.Condition {
+	if ip == nil || len(routes) == 0 {
+		return nil
+	}
+
+	// we do not need to validate that the route belongs to the gateway or not
+	// we only process routes that belong to the gateway in the first place
+	for _, route := range routes {
+		if !route.Valid {
+			cond := conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+				fmt.Sprintf("Referenced HTTPRoute %s/%s is not accepted by the Gateway",
+					route.Source.GetNamespace(),
+					route.Source.GetName(),
+				),
+			)
+			return &cond
+		}
+	}
+
+	return nil
+}
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
index d67331b5e7..f6ea66215a 100644
--- a/internal/controller/state/graph/inferencepools_test.go
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -4,11 +4,13 @@ import (
 	"testing"
 
 	. "github.com/onsi/gomega"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/controller/state/conditions"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/controller"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/helpers"
 	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/kinds"
@@ -70,6 +72,65 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 
 	validRoute := getNormalRoute()
 
+	endpointPickerConfig := inference.EndpointPickerRef{
+		Kind: "Service",
+		Name: "valid-svc",
+	}
+
+	validSvcMap := map[types.NamespacedName]*v1.Service{
+		{Name: "valid-svc", Namespace: "test"}: {
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "valid-svc",
+				Namespace: "test",
+			},
+		},
+		{Name: "regular-svc", Namespace: "test"}: {
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      "regular-svc",
+				Namespace: "test",
+			},
+		},
+	}
+
+	modifiedRouteWithServiceBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
+			RouteBackendRef{
+				BackendRef: gatewayv1.BackendRef{
+					BackendObjectReference: gatewayv1.BackendObjectReference{
+						Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+						Name: "regular-svc",
+					},
+				},
+			},
+		)
+		return route
+	})
+
+	routeWithInferencePoolHeadlessSvcBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules = []RouteRule{
+			{
+				RouteBackendRefs: []RouteBackendRef{
+					{
+						IsInferencePool: true,
+						BackendRef: gatewayv1.BackendRef{
+							BackendObjectReference: gatewayv1.BackendObjectReference{
+								Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
+								Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
+								Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
+							},
+						},
+					},
+				},
+			},
+		}
+		return route
+	})
+
+	routeWithNoNamespaceBackend := getModifiedRoute(func(route *L7Route) *L7Route {
+		route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
+		return route
+	})
+
 	invalidRoute := getModifiedRoute(func(route *L7Route) *L7Route {
 		route.Valid = false
 		return route
@@ -78,6 +139,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 	tests := []struct {
 		routes         map[RouteKey]*L7Route
 		gws            map[types.NamespacedName]*Gateway
+		services       map[types.NamespacedName]*v1.Service
 		inferencePools map[types.NamespacedName]*inference.InferencePool
 		expPools       map[types.NamespacedName]*ReferencedInferencePool
 		name           string
@@ -93,17 +155,6 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			},
 			expPools: nil,
 		},
-		{
-			name: "invalid route",
-			gws:  gws,
-			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): invalidRoute,
-			},
-			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
-			},
-			expPools: nil,
-		},
 		{
 			name: "valid route with referenced inferencepool",
 			gws:  gws,
@@ -111,11 +162,29 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 				CreateRouteKey(validRoute.Source): validRoute,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						validRoute,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -149,25 +218,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "route with both inferencepool and service backends",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules[0].RouteBackendRefs = append(route.Spec.Rules[0].RouteBackendRefs,
-						RouteBackendRef{
-							BackendRef: gatewayv1.BackendRef{
-								BackendObjectReference: gatewayv1.BackendObjectReference{
-									Kind: helpers.GetPointer[gatewayv1.Kind](kinds.Service),
-								},
-							},
-						},
-					)
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): modifiedRouteWithServiceBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						modifiedRouteWithServiceBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -175,32 +251,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "route with headless InferencePool Service backend",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules = []RouteRule{
-						{
-							RouteBackendRefs: []RouteBackendRef{
-								{
-									IsInferencePool: true,
-									BackendRef: gatewayv1.BackendRef{
-										BackendObjectReference: gatewayv1.BackendObjectReference{
-											Kind:      helpers.GetPointer[gatewayv1.Kind](kinds.Service),
-											Name:      gatewayv1.ObjectName(controller.CreateInferencePoolServiceName("pool")),
-											Namespace: helpers.GetPointer[gatewayv1.Namespace]("test"),
-										},
-									},
-								},
-							},
-						},
-					}
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): routeWithInferencePoolHeadlessSvcBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						routeWithInferencePoolHeadlessSvcBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -208,17 +284,32 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			name: "inferencepool backend with no namespace uses route namespace",
 			gws:  gws,
 			routes: map[RouteKey]*L7Route{
-				CreateRouteKey(validRoute.Source): getModifiedRoute(func(route *L7Route) *L7Route {
-					route.Spec.Rules[0].RouteBackendRefs[0].Namespace = nil
-					return route
-				}),
+				CreateRouteKey(validRoute.Source): routeWithNoNamespaceBackend,
 			},
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{
-				{Name: "pool", Namespace: "test"}: {ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: endpointPickerConfig,
+					},
+				},
 			},
+			services: validSvcMap,
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: &inference.InferencePool{ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"}},
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: endpointPickerConfig,
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						routeWithNoNamespaceBackend,
+					},
+					Conditions: []conditions.Condition{},
 				},
 			},
 		},
@@ -231,9 +322,243 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			inferencePools: map[types.NamespacedName]*inference.InferencePool{},
 			expPools: map[types.NamespacedName]*ReferencedInferencePool{
 				{Name: "pool", Namespace: "test"}: {
-					Source: nil,
+					Source:     nil,
+					Gateways:   []*gatewayv1.Gateway{},
+					HTTPRoutes: []*L7Route{},
+					Conditions: []conditions.Condition{},
+				},
+			},
+		},
+		{
+			name:     "inferencepool references invalid extensionRef and has invalid route",
+			gws:      gws,
+			services: validSvcMap,
+			routes: map[RouteKey]*L7Route{
+				CreateRouteKey(invalidRoute.Source): invalidRoute,
+			},
+			inferencePools: map[types.NamespacedName]*inference.InferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+					Spec: inference.InferencePoolSpec{
+						EndpointPickerRef: inference.EndpointPickerRef{
+							Kind: "Service",
+							Name: "invalid-extension-ref",
+						},
+					},
+				},
+			},
+			expPools: map[types.NamespacedName]*ReferencedInferencePool{
+				{Name: "pool", Namespace: "test"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{Name: "pool", Namespace: "test"},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: inference.EndpointPickerRef{
+								Kind: "Service",
+								Name: "invalid-extension-ref",
+							},
+						},
+					},
+					Gateways: []*gatewayv1.Gateway{
+						gws[gwNsName].Source,
+					},
+					HTTPRoutes: []*L7Route{
+						invalidRoute,
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+							"Referenced HTTPRoute test/valid-route is not accepted by the Gateway",
+						),
+						conditions.NewInferencePoolInvalidExtensionref(
+							"ExtensionRef Service not found: test/invalid-extension-ref",
+						),
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools, test.services)
+
+			g.Expect(helpers.Diff(test.expPools, pools)).To(BeEmpty())
+		})
+	}
+}
+
+func TestValidateInferencePoolExtensionRef(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		pool     *inference.InferencePool
+		services map[types.NamespacedName]*v1.Service
+		expCond  *conditions.Condition
+		name     string
+	}{
+		{
+			name: "inference pool has a valid extensionRef",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Service",
+						Name: "valid-svc",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{
+				{Name: "valid-svc", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "valid-svc",
+						Namespace: "test",
+					},
+					Spec: v1.ServiceSpec{
+						Ports: []v1.ServicePort{
+							{
+								Port: 80,
+							},
+						},
+					},
+				},
+			},
+			expCond: nil,
+		},
+		{
+			name: "inference pool references a non-existent service",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Service",
+						Name: "does-not-exist",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidExtensionref("ExtensionRef Service not found: test/does-not-exist"),
+			),
+		},
+		{
+			name: "inference pool references an extensionRef that is not a service",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+				Spec: inference.InferencePoolSpec{
+					EndpointPickerRef: inference.EndpointPickerRef{
+						Kind: "Invalid-Kind",
+						Name: "svc",
+					},
+				},
+			},
+			services: map[types.NamespacedName]*v1.Service{
+				{Name: "svc", Namespace: "test"}: {
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "svc",
+						Namespace: "test",
+					},
+					Spec: v1.ServiceSpec{
+						Ports: []v1.ServicePort{
+							{
+								Port: 80,
+							},
+						},
+					},
+				},
+			},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidExtensionref("Invalid ExtensionRef kind: Invalid-Kind"),
+			),
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			conds := validateInferencePoolExtensionRef(test.pool, test.services)
+			g.Expect(conds).To(Equal(test.expCond))
+		})
+	}
+}
+
+func TestValidateInferencePoolRoutesAcceptance(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		pool    *inference.InferencePool
+		expCond *conditions.Condition
+		name    string
+		routes  []*L7Route
+	}{
+		{
+			name: "no routes referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes:  []*L7Route{},
+			expCond: nil,
+		},
+		{
+			name: "one valid route referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes: []*L7Route{
+				{
+					Valid: true,
+					Source: &gatewayv1.HTTPRoute{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "valid-route",
+						},
+					},
+				},
+			},
+			expCond: nil,
+		},
+		{
+			name: "one invalid route referencing the pool",
+			pool: &inference.InferencePool{
+				ObjectMeta: metav1.ObjectMeta{
+					Namespace: "test",
+					Name:      "pool",
+				},
+			},
+			routes: []*L7Route{
+				{
+					Valid: false,
+					Source: &gatewayv1.HTTPRoute{
+						ObjectMeta: metav1.ObjectMeta{
+							Namespace: "test",
+							Name:      "invalid-route",
+						},
+					},
 				},
 			},
+			expCond: helpers.GetPointer(
+				conditions.NewInferencePoolInvalidHTTPRouteNotAccepted(
+					"Referenced HTTPRoute test/invalid-route is not accepted by the Gateway",
+				),
+			),
 		},
 	}
 
@@ -242,8 +567,8 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 			t.Parallel()
 			g := NewWithT(t)
 
-			pools := buildReferencedInferencePools(test.routes, test.gws, test.inferencePools)
-			g.Expect(pools).To(Equal(test.expPools))
+			conds := validateInferencePoolRoutesAcceptance(test.pool, test.routes)
+			g.Expect(conds).To(Equal(test.expCond))
 		})
 	}
 }
diff --git a/internal/controller/status/prepare_requests.go b/internal/controller/status/prepare_requests.go
index 87e3b441cc..e6f35822c4 100644
--- a/internal/controller/status/prepare_requests.go
+++ b/internal/controller/status/prepare_requests.go
@@ -8,6 +8,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -519,3 +520,56 @@ func PrepareNginxGatewayStatus(
 		}),
 	}
 }
+
+// PrepareInferencePoolRequests prepares status UpdateRequests for the given InferencePools.
+func PrepareInferencePoolRequests(
+	inferencePools map[types.NamespacedName]*graph.ReferencedInferencePool,
+	transitionTime metav1.Time,
+) []UpdateRequest {
+	reqs := make([]UpdateRequest, 0, len(inferencePools))
+
+	for nsname, pool := range inferencePools {
+		if pool.Source == nil {
+			continue
+		}
+
+		defaultConds := conditions.NewDefaultInferenceConditions()
+		allConds := make([]conditions.Condition, 0, len(pool.Conditions)+2)
+
+		allConds = append(allConds, defaultConds...)
+
+		if len(pool.Conditions) != 0 {
+			allConds = append(allConds, pool.Conditions...)
+		}
+
+		conds := conditions.DeduplicateConditions(allConds)
+		apiConds := conditions.ConvertConditions(conds, pool.Source.GetGeneration(), transitionTime)
+
+		parents := make([]inference.ParentStatus, 0, len(pool.Gateways))
+		for _, ref := range pool.Gateways {
+			parents = append(parents, inference.ParentStatus{
+				ParentRef: inference.ParentReference{
+					Name:      inference.ObjectName(ref.GetName()),
+					Namespace: inference.Namespace(ref.GetNamespace()),
+					Group:     helpers.GetPointer(inference.Group(ref.GroupVersionKind().Group)),
+					Kind:      kinds.Gateway,
+				},
+				Conditions: apiConds,
+			})
+		}
+
+		status := inference.InferencePoolStatus{
+			Parents: parents,
+		}
+
+		req := UpdateRequest{
+			NsName:       nsname,
+			ResourceType: pool.Source,
+			Setter:       newInferencePoolStatusSetter(status),
+		}
+
+		reqs = append(reqs, req)
+	}
+
+	return reqs
+}
diff --git a/internal/controller/status/prepare_requests_test.go b/internal/controller/status/prepare_requests_test.go
index 3cb629d3c2..f863d5cf97 100644
--- a/internal/controller/status/prepare_requests_test.go
+++ b/internal/controller/status/prepare_requests_test.go
@@ -15,6 +15,7 @@ import (
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -35,6 +36,7 @@ func createK8sClientFor(resourceType ngftypes.ObjectType) client.Client {
 	utilruntime.Must(v1alpha2.Install(scheme))
 	utilruntime.Must(v1alpha3.Install(scheme))
 	utilruntime.Must(ngfAPI.AddToScheme(scheme))
+	utilruntime.Must(inference.Install(scheme))
 
 	k8sClient := fake.NewClientBuilder().
 		WithScheme(scheme).
@@ -2157,3 +2159,239 @@ func TestBuildSnippetsFilterStatuses(t *testing.T) {
 		})
 	}
 }
+
+func TestBuildInferencePoolStatuses(t *testing.T) {
+	t.Parallel()
+	transitionTime := helpers.PrepareTimeForFakeClient(metav1.Now())
+	group := ""
+
+	validAcceptedCondition := metav1.Condition{
+		Type:               string(inference.InferencePoolConditionAccepted),
+		Status:             metav1.ConditionTrue,
+		ObservedGeneration: 1,
+		LastTransitionTime: transitionTime,
+		Reason:             string(inference.InferencePoolReasonAccepted),
+		Message:            "InferencePool is accepted by the Gateway.",
+	}
+
+	validResolvedRefsCondition := metav1.Condition{
+		Type:               string(inference.InferencePoolConditionResolvedRefs),
+		Status:             metav1.ConditionTrue,
+		ObservedGeneration: 1,
+		LastTransitionTime: transitionTime,
+		Reason:             string(inference.InferencePoolConditionResolvedRefs),
+		Message:            "Inference pool references a valid ExtensionRef.",
+	}
+
+	tests := []struct {
+		inferencePool          map[types.NamespacedName]*graph.ReferencedInferencePool
+		expectedPoolWithStatus map[types.NamespacedName]inference.InferencePoolStatus
+		name                   string
+		expectedReqs           int
+	}{
+		{
+			name:         "no referenced inferencePools",
+			expectedReqs: 0,
+		},
+		{
+			name: "an inference pool has valid status for multiple gateways",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-2",
+								Namespace: "test",
+							},
+						},
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								validResolvedRefsCondition,
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								validResolvedRefsCondition,
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-2",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "an inference pool has accepted valid status and is referenced by invalid extension ref",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+						Spec: inference.InferencePoolSpec{
+							EndpointPickerRef: inference.EndpointPickerRef{
+								Name: inference.ObjectName("invalid-extension-ref"),
+							},
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"),
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								validAcceptedCondition,
+								{
+									Type:               string(inference.InferencePoolConditionResolvedRefs),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonInvalidExtensionRef),
+									Message:            "Invalid extension ref: test/invalid-extension-ref",
+								},
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "an inference pool is referencing an invalid route and is referenced by invalid extension ref",
+			inferencePool: map[types.NamespacedName]*graph.ReferencedInferencePool{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Source: &inference.InferencePool{
+						ObjectMeta: metav1.ObjectMeta{
+							Name:       "valid-inference-pool",
+							Namespace:  "test",
+							Generation: 1,
+						},
+					},
+					Gateways: []*v1.Gateway{
+						{
+							ObjectMeta: metav1.ObjectMeta{
+								Name:      "gateway-1",
+								Namespace: "test",
+							},
+						},
+					},
+					Conditions: []conditions.Condition{
+						conditions.NewInferencePoolInvalidHTTPRouteNotAccepted("Invalid HTTPRoute: test/invalid-route not accepted"),
+						conditions.NewInferencePoolInvalidExtensionref("Invalid extension ref: test/invalid-extension-ref"),
+					},
+				},
+			},
+			expectedReqs: 1,
+			expectedPoolWithStatus: map[types.NamespacedName]inference.InferencePoolStatus{
+				{Namespace: "test", Name: "valid-inference-pool"}: {
+					Parents: []inference.ParentStatus{
+						{
+							Conditions: []metav1.Condition{
+								{
+									Type:               string(inference.InferencePoolConditionAccepted),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonHTTPRouteNotAccepted),
+									Message:            "Invalid HTTPRoute: test/invalid-route not accepted",
+								},
+								{
+									Type:               string(inference.InferencePoolConditionResolvedRefs),
+									Status:             metav1.ConditionFalse,
+									ObservedGeneration: 1,
+									LastTransitionTime: transitionTime,
+									Reason:             string(inference.InferencePoolReasonInvalidExtensionRef),
+									Message:            "Invalid extension ref: test/invalid-extension-ref",
+								},
+							},
+							ParentRef: inference.ParentReference{
+								Namespace: inference.Namespace("test"),
+								Name:      "gateway-1",
+								Kind:      kinds.Gateway,
+								Group:     helpers.GetPointer(inference.Group(group)),
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			k8sClient := createK8sClientFor(&inference.InferencePool{})
+			for _, ip := range test.inferencePool {
+				err := k8sClient.Create(context.Background(), ip.Source)
+				g.Expect(err).ToNot(HaveOccurred())
+			}
+
+			updater := NewUpdater(k8sClient, logr.Discard())
+			reqs := PrepareInferencePoolRequests(test.inferencePool, transitionTime)
+			g.Expect(reqs).To(HaveLen(test.expectedReqs))
+			updater.Update(context.Background(), reqs...)
+
+			for nsname, expected := range test.expectedPoolWithStatus {
+				var inferencePool inference.InferencePool
+
+				err := k8sClient.Get(context.Background(), nsname, &inferencePool)
+				g.Expect(err).ToNot(HaveOccurred())
+				g.Expect(helpers.Diff(expected, inferencePool.Status)).To(BeEmpty())
+			}
+		})
+	}
+}
diff --git a/internal/controller/status/status_setters.go b/internal/controller/status/status_setters.go
index c4fcc7c128..efb9f68413 100644
--- a/internal/controller/status/status_setters.go
+++ b/internal/controller/status/status_setters.go
@@ -4,6 +4,7 @@ import (
 	"slices"
 
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -404,3 +405,65 @@ func snippetsStatusEqual(status1, status2 ngfAPI.ControllerStatus) bool {
 
 	return ConditionsEqual(status1.Conditions, status2.Conditions)
 }
+
+func newInferencePoolStatusSetter(status inference.InferencePoolStatus) Setter {
+	return func(obj client.Object) (wasSet bool) {
+		ip := helpers.MustCastObject[*inference.InferencePool](obj)
+
+		// we build all the parent statuses at once so we can directly
+		// compare the previous and current statuses
+		if inferencePoolStatusEqual(ip.Status.Parents, status.Parents) {
+			return false
+		}
+
+		ip.Status = status
+		return true
+	}
+}
+
+func inferencePoolStatusEqual(prevParents, curParents []inference.ParentStatus) bool {
+	// Compare the previous and current parent statuses, ignoring order
+	// Check if any previous parent status is missing in the current status
+	for _, prevParent := range prevParents {
+		exists := slices.ContainsFunc(curParents, func(curParent inference.ParentStatus) bool {
+			return parentStatusEqual(prevParent, curParent)
+		})
+
+		if !exists {
+			return false
+		}
+	}
+
+	// Check if any current parent status is missing in the previous status
+	for _, curParent := range curParents {
+		exists := slices.ContainsFunc(prevParents, func(prevParent inference.ParentStatus) bool {
+			return parentStatusEqual(curParent, prevParent)
+		})
+
+		if !exists {
+			return false
+		}
+	}
+
+	return true
+}
+
+func parentStatusEqual(p1, p2 inference.ParentStatus) bool {
+	if p1.ParentRef.Name != p2.ParentRef.Name {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Namespace, &p2.ParentRef.Namespace) {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Kind, &p2.ParentRef.Kind) {
+		return false
+	}
+
+	if !helpers.EqualPointers(&p1.ParentRef.Group, &p2.ParentRef.Group) {
+		return false
+	}
+
+	return ConditionsEqual(p1.Conditions, p2.Conditions)
+}
diff --git a/internal/controller/status/status_setters_test.go b/internal/controller/status/status_setters_test.go
index 61a34a4e9f..9f3c6f1521 100644
--- a/internal/controller/status/status_setters_test.go
+++ b/internal/controller/status/status_setters_test.go
@@ -5,6 +5,7 @@ import (
 
 	. "github.com/onsi/gomega"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
@@ -1726,3 +1727,303 @@ func TestNewSnippetsFilterStatusSetter(t *testing.T) {
 		})
 	}
 }
+
+func TestInferencePoolStatusSetter(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                         string
+		status, newStatus, expStatus inference.InferencePoolStatus
+		expStatusSet                 bool
+	}{
+		{
+			name: "InferencePool has no status",
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool updates condition of an existing parent status",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "old condition"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has new parent statuses along with existing ones",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has parent statuses and one is removed",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "gateway2 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has existing multiple parent statuses, one gets changed condition",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway1 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway2 is invalid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway2",
+							Namespace: "test",
+						},
+					},
+					{
+						Conditions: []metav1.Condition{{Message: "parent ref gateway3 is valid"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway3",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: true,
+		},
+		{
+			name: "InferencePool has same status",
+			status: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			newStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatus: inference.InferencePoolStatus{
+				Parents: []inference.ParentStatus{
+					{
+						Conditions: []metav1.Condition{{Message: "gateway1 is valid parent ref"}},
+						ParentRef: inference.ParentReference{
+							Name:      "gateway1",
+							Namespace: "test",
+						},
+					},
+				},
+			},
+			expStatusSet: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
+
+			setter := newInferencePoolStatusSetter(test.newStatus)
+			obj := &inference.InferencePool{Status: test.status}
+
+			statusSet := setter(obj)
+
+			g.Expect(statusSet).To(Equal(test.expStatusSet))
+			g.Expect(obj.Status).To(Equal(test.expStatus))
+		})
+	}
+}
diff --git a/tests/go.mod b/tests/go.mod
index 297e8b95a6..ac6ff0fc6b 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -80,6 +80,7 @@ require (
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index b24e1ba391..bf4f331284 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -183,8 +183,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA=
+golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
@@ -267,6 +267,8 @@ sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTi
 sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8=
 sigs.k8s.io/gateway-api v1.3.0 h1:q6okN+/UKDATola4JY7zXzx40WO4VISk7i9DIfOvr9M=
 sigs.k8s.io/gateway-api v1.3.0/go.mod h1:d8NV8nJbaRbEKem+5IuxkL8gJGOZ+FJ+NvOIltV8gDk=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0 h1:GsHvlu1Cn1t6+vrHoPdNNlpwKxf/y1HuQSlUjd58Ds8=
+sigs.k8s.io/gateway-api-inference-extension v1.0.0/go.mod h1:qxSY10qt2+YnZJ43VfpMXa6wpiENPderI2BnNZ4Kxfc=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
 sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
 sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=

From 94b249f689a335bdea3c4741675a7975eaa0b2ae Mon Sep 17 00:00:00 2001
From: bjee19 <139261241+bjee19@users.noreply.github.com>
Date: Tue, 7 Oct 2025 09:05:35 -0700
Subject: [PATCH 08/10] Collect InferencePoolCount in telemetry (#4008)

Proposed changes
Problem: Want to collect number of referenced InferencePools in cluster.

Solution: Collect the count of referenced InferencePools.

Testing: Unit tests and manually verified collection via debug logs.
---
 internal/controller/telemetry/collector.go            |  5 +++++
 internal/controller/telemetry/collector_test.go       | 11 +++++++++++
 internal/controller/telemetry/data.avdl               |  3 +++
 .../controller/telemetry/data_attributes_generated.go |  1 +
 internal/controller/telemetry/data_test.go            |  3 +++
 tests/suite/telemetry_test.go                         |  1 +
 6 files changed, 24 insertions(+)

diff --git a/internal/controller/telemetry/collector.go b/internal/controller/telemetry/collector.go
index e06da3f0b8..43a8a2218b 100644
--- a/internal/controller/telemetry/collector.go
+++ b/internal/controller/telemetry/collector.go
@@ -66,6 +66,8 @@ type Data struct {
 	ControlPlanePodCount int64
 	// NginxOneConnectionEnabled is a boolean that indicates whether the connection to the Nginx One Console is enabled.
 	NginxOneConnectionEnabled bool
+	// InferencePoolCount is the number of InferencePools that are referenced by at least one Route.
+	InferencePoolCount int64
 }
 
 // NGFResourceCounts stores the counts of all relevant resources that NGF processes and generates configuration from.
@@ -174,6 +176,8 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
 
 	nginxPodCount := getNginxPodCount(g, clusterInfo.NodeCount)
 
+	inferencePoolCount := int64(len(g.ReferencedInferencePools))
+
 	data := Data{
 		Data: tel.Data{
 			ProjectName:         "NGF",
@@ -194,6 +198,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
 		NginxPodCount:                  nginxPodCount,
 		ControlPlanePodCount:           int64(replicaCount),
 		NginxOneConnectionEnabled:      c.cfg.NginxOneConsoleConnection,
+		InferencePoolCount:             inferencePoolCount,
 	}
 
 	return data, nil
diff --git a/internal/controller/telemetry/collector_test.go b/internal/controller/telemetry/collector_test.go
index 8c749fdfbe..3aa9896bae 100644
--- a/internal/controller/telemetry/collector_test.go
+++ b/internal/controller/telemetry/collector_test.go
@@ -417,6 +417,11 @@ var _ = Describe("Collector", Ordered, func() {
 							},
 						},
 					},
+					ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{
+						{Namespace: "test", Name: "inferencePool-1"}: {},
+						{Namespace: "test", Name: "inferencePool-2"}: {},
+						{Namespace: "test", Name: "inferencePool-3"}: {},
+					},
 				}
 
 				configs := []*dataplane.Configuration{
@@ -520,6 +525,8 @@ var _ = Describe("Collector", Ordered, func() {
 				expData.ControlPlanePodCount = int64(2)
 				expData.NginxOneConnectionEnabled = true
 
+				expData.InferencePoolCount = 3
+
 				data, err := dataCollector.Collect(ctx)
 				Expect(err).ToNot(HaveOccurred())
 
@@ -700,6 +707,9 @@ var _ = Describe("Collector", Ordered, func() {
 				BackendTLSPolicies: map[types.NamespacedName]*graph.BackendTLSPolicy{
 					{Namespace: "test", Name: "BackendTLSPolicy-1"}: {},
 				},
+				ReferencedInferencePools: map[types.NamespacedName]*graph.ReferencedInferencePool{
+					{Namespace: "test", Name: "inferencePool-1"}: {},
+				},
 			}
 
 			config1 = []*dataplane.Configuration{
@@ -783,6 +793,7 @@ var _ = Describe("Collector", Ordered, func() {
 					BackendTLSPolicyCount:                    1,
 				}
 				expData.NginxPodCount = 1
+				expData.InferencePoolCount = 1
 
 				data, err := dataCollector.Collect(ctx)
 
diff --git a/internal/controller/telemetry/data.avdl b/internal/controller/telemetry/data.avdl
index c19881315a..10034cfbfb 100644
--- a/internal/controller/telemetry/data.avdl
+++ b/internal/controller/telemetry/data.avdl
@@ -114,5 +114,8 @@ attached at the Gateway level. */
 		/** NginxOneConnectionEnabled is a boolean that indicates whether the connection to the Nginx One Console is enabled. */
 		boolean? NginxOneConnectionEnabled = null;
 		
+		/** InferencePoolCount is the number of InferencePools that are referenced by at least one Route. */
+		long? InferencePoolCount = null;
+		
 	}
 }
diff --git a/internal/controller/telemetry/data_attributes_generated.go b/internal/controller/telemetry/data_attributes_generated.go
index 3b8b3dcf3f..37a5a61a03 100644
--- a/internal/controller/telemetry/data_attributes_generated.go
+++ b/internal/controller/telemetry/data_attributes_generated.go
@@ -23,6 +23,7 @@ func (d *Data) Attributes() []attribute.KeyValue {
 	attrs = append(attrs, attribute.Int64("NginxPodCount", d.NginxPodCount))
 	attrs = append(attrs, attribute.Int64("ControlPlanePodCount", d.ControlPlanePodCount))
 	attrs = append(attrs, attribute.Bool("NginxOneConnectionEnabled", d.NginxOneConnectionEnabled))
+	attrs = append(attrs, attribute.Int64("InferencePoolCount", d.InferencePoolCount))
 
 	return attrs
 }
diff --git a/internal/controller/telemetry/data_test.go b/internal/controller/telemetry/data_test.go
index 49c8e3543c..633af19766 100644
--- a/internal/controller/telemetry/data_test.go
+++ b/internal/controller/telemetry/data_test.go
@@ -47,6 +47,7 @@ func TestDataAttributes(t *testing.T) {
 		NginxPodCount:                  3,
 		ControlPlanePodCount:           3,
 		NginxOneConnectionEnabled:      true,
+		InferencePoolCount:             16,
 	}
 
 	expected := []attribute.KeyValue{
@@ -86,6 +87,7 @@ func TestDataAttributes(t *testing.T) {
 		attribute.Int64("NginxPodCount", 3),
 		attribute.Int64("ControlPlanePodCount", 3),
 		attribute.Bool("NginxOneConnectionEnabled", true),
+		attribute.Int64("InferencePoolCount", 16),
 	}
 
 	result := data.Attributes()
@@ -132,6 +134,7 @@ func TestDataAttributesWithEmptyData(t *testing.T) {
 		attribute.Int64("NginxPodCount", 0),
 		attribute.Int64("ControlPlanePodCount", 0),
 		attribute.Bool("NginxOneConnectionEnabled", false),
+		attribute.Int64("InferencePoolCount", 0),
 	}
 
 	result := data.Attributes()
diff --git a/tests/suite/telemetry_test.go b/tests/suite/telemetry_test.go
index 2ad0c0b3a0..7ddce2aa26 100644
--- a/tests/suite/telemetry_test.go
+++ b/tests/suite/telemetry_test.go
@@ -96,6 +96,7 @@ var _ = Describe("Telemetry test with OTel collector", Label("telemetry"), func(
 				"NginxPodCount: Int(0)",
 				"ControlPlanePodCount: Int(1)",
 				"NginxOneConnectionEnabled: Bool(false)",
+				"InferencePoolCount: Int(0)",
 			},
 		)
 	})

From 52fb31bf3077ccaa8d7ecc9fbcdfb79fea360c6b Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Wed, 15 Oct 2025 11:05:15 -0600
Subject: [PATCH 09/10] rebase inference extension branch with main

---
 internal/controller/state/graph/httproute_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/internal/controller/state/graph/httproute_test.go b/internal/controller/state/graph/httproute_test.go
index aaef8dc519..2cbdf3097e 100644
--- a/internal/controller/state/graph/httproute_test.go
+++ b/internal/controller/state/graph/httproute_test.go
@@ -1301,11 +1301,11 @@ func TestProcessHTTPRouteRule_InferencePoolWithMultipleBackendRefs(t *testing.T)
 
 	routeRule, errs := processHTTPRouteRule(
 		specRule,
-		routeNamespace,
 		rulePath,
 		validator,
 		nil,
 		inferencePools,
+		routeNamespace,
 	)
 
 	g.Expect(routeRule.RouteBackendRefs).To(BeEmpty())

From 16cb93a78d19da08532926980e667b02ad0da989 Mon Sep 17 00:00:00 2001
From: salonichf5 <146118978+salonichf5@users.noreply.github.com>
Date: Thu, 16 Oct 2025 13:14:19 -0600
Subject: [PATCH 10/10] update inference extension conformance tests

---
 .github/workflows/conformance.yml             | 27 +++++++
 .github/workflows/lint.yml                    |  2 +
 Makefile                                      |  9 ++-
 cmd/gateway/endpoint_picker.go                | 26 ++++++-
 .../controller/nginx/conf/nginx-plus.conf     |  4 +-
 internal/controller/nginx/conf/nginx.conf     |  4 +-
 internal/controller/nginx/config/maps.go      | 71 +++++++++++--------
 internal/controller/nginx/config/maps_test.go | 41 ++++++++---
 internal/controller/nginx/config/servers.go   | 26 ++++---
 .../nginx/config/servers_template.go          |  4 +-
 .../controller/nginx/config/servers_test.go   | 15 ++--
 .../controller/nginx/modules/test/epp.test.js | 65 +++++++++++++++--
 .../controller/state/conditions/conditions.go | 14 ++++
 .../state/dataplane/configuration.go          | 10 ++-
 .../state/dataplane/configuration_test.go     |  4 ++
 internal/controller/state/dataplane/types.go  | 10 ++-
 .../controller/state/graph/backend_refs.go    | 24 +++++--
 .../state/graph/backend_refs_test.go          | 14 ++--
 internal/controller/state/graph/graph_test.go | 10 ++-
 .../controller/state/graph/inferencepools.go  | 12 ++++
 .../state/graph/inferencepools_test.go        |  6 ++
 .../controller/state/graph/route_common.go    |  2 +-
 tests/Makefile                                | 29 +++++++-
 tests/README.md                               | 20 +++++-
 tests/conformance-profile-inference.yaml      | 24 +++++++
 tests/conformance/conformance-rbac.yaml       | 40 +++++++++++
 tests/conformance/conformance_test.go         | 33 ++++++++-
 tests/go.mod                                  |  3 +-
 tests/go.sum                                  |  2 +
 29 files changed, 463 insertions(+), 88 deletions(-)
 create mode 100644 tests/conformance-profile-inference.yaml

diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
index f44af845ac..7e686df3d3 100644
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@@ -16,6 +16,9 @@ on:
       enable-experimental:
         required: true
         type: boolean
+      enable-inference-extension:
+        required: true
+        type: boolean
       production-release:
         required: false
         type: boolean
@@ -32,6 +35,7 @@ defaults:
 env:
   PLUS_USAGE_ENDPOINT: ${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }}
   ENABLE_EXPERIMENTAL: ${{ inputs.enable-experimental }}
+  ENABLE_INFERENCE_EXTENSION: ${{ inputs.enable-inference-extension }}
 
 permissions:
   contents: read
@@ -194,3 +198,26 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: gh release upload ${{ github.ref_name }} conformance-profile.yaml --clobber
         working-directory: ./tests
+
+      # The same setup works for inference conformance tests
+      # and inference extension flag is enabled
+      - name: Run inference conformance tests
+        run: |
+          make run-inference-conformance-tests CONFORMANCE_TAG=${{ github.sha }} NGF_VERSION=${{ github.ref_name }} CLUSTER_NAME=${{ github.run_id }}
+          core_result=$(cat conformance-inference-profile.yaml | yq '.profiles[0].core.result')
+          if [ "${core_result}" == "failure" ] ]; then echo "Inference Conformance test failed, see above for details." && exit 2; fi
+        working-directory: ./tests
+
+      - name: Upload profile to GitHub
+        if: ${{ inputs.enable-inference-extension }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: conformance-profile-inference-${{ inputs.image }}-${{ inputs.k8s-version }}-${{ steps.ngf-meta.outputs.version }}
+          path: ./tests/conformance-profile-inference.yaml
+
+      - name: Upload profile to release
+        if: ${{ inputs.production-release && inputs.enable-inference-extension }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh release upload ${{ github.ref_name }} conformance-profile-inference.yaml --clobber
+        working-directory: ./tests
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 46fa74fccb..f39fb7f6cb 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -125,6 +125,8 @@ jobs:
         uses: helm/chart-testing-action@0d28d3144d3a25ea2cc349d6e59901c4ff469b3b # v2.7.0
         with:
           version: 3.14.0 # renovate: datasource=github-tags depName=helm/chart-testing
+          # v6.0.0 resolved the compatibility issue with Python > 3.13. may be removed after the action itself is updated
+          yamale_version: "6.0.0"
 
       - name: Run chart-testing
         run: ct lint --print-config --config .ct.yaml
diff --git a/Makefile b/Makefile
index 797216a42e..d5e583b2d0 100644
--- a/Makefile
+++ b/Makefile
@@ -15,6 +15,7 @@ TELEMETRY_ENDPOINT=# if empty, NGF will report telemetry in its logs at debug le
 TELEMETRY_ENDPOINT_INSECURE = false
 
 ENABLE_EXPERIMENTAL ?= false
+ENABLE_INFERENCE_EXTENSION ?= false
 
 # go build flags - should not be overridden by the user
 GO_LINKER_FlAGS_VARS = -X main.version=${VERSION} -X main.telemetryReportPeriod=${TELEMETRY_REPORT_PERIOD} -X main.telemetryEndpoint=${TELEMETRY_ENDPOINT} -X main.telemetryEndpointInsecure=${TELEMETRY_ENDPOINT_INSECURE}
@@ -237,10 +238,16 @@ install-ngf-local-build-with-plus: check-for-plus-usage-endpoint build-images-wi
 
 .PHONY: helm-install-local
 helm-install-local: install-gateway-crds ## Helm install NGF on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build.
-	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
+	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
 
 .PHONY: helm-install-local-with-plus
 helm-install-local-with-plus: check-for-plus-usage-endpoint install-gateway-crds ## Helm install NGF with NGINX Plus on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build-with-plus.
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
 	kubectl create namespace nginx-gateway || true
 	kubectl -n nginx-gateway create secret generic nplus-license --from-file $(PLUS_LICENSE_FILE) || true
 	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PLUS_PREFIX) --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway --set nginx.plus=true --set nginx.usage.endpoint=$(PLUS_USAGE_ENDPOINT) $(HELM_PARAMETERS)
diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
index acf9bdfbb6..0d814b91d0 100644
--- a/cmd/gateway/endpoint_picker.go
+++ b/cmd/gateway/endpoint_picker.go
@@ -1,17 +1,20 @@
 package main
 
 import (
+	"crypto/tls"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
+	"strings"
 	"time"
 
 	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	"github.com/go-logr/logr"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
 	"google.golang.org/grpc/credentials/insecure"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
 
@@ -34,7 +37,19 @@ func endpointPickerServer(handler http.Handler) error {
 // realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
 func realExtProcClientFactory() extProcClientFactory {
 	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
-		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		var opts []grpc.DialOption
+		enableTLS := true
+		insecureSkipVerify := true
+
+		if !enableTLS {
+			opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		} else {
+			creds := credentials.NewTLS(&tls.Config{
+				InsecureSkipVerify: insecureSkipVerify, //nolint:gosec
+			})
+			opts = append(opts, grpc.WithTransportCredentials(creds))
+		}
+		conn, err := grpc.NewClient(target, opts...)
 		if err != nil {
 			return nil, nil, err
 		}
@@ -148,8 +163,15 @@ func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
 
 	for key, values := range r.Header {
 		for _, value := range values {
+			// Normalize header keys to lowercase for case-insensitive matching.
+			// This addresses the mismatch between Go's default HTTP header normalization (Title-Case)
+			// and EPP's expectation of lowercase header keys. Additionally, HTTP/2 — which gRPC uses —
+			// requires all header field names to be lowercase as specified in RFC 7540, Section 8.1.2:
+			// https://datatracker.ietf.org/doc/html/rfc7540#section-8.1.2
+			normalizedKey := strings.ToLower(key)
+
 			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
-				Key:   key,
+				Key:   normalizedKey,
 				Value: value,
 			})
 		}
diff --git a/internal/controller/nginx/conf/nginx-plus.conf b/internal/controller/nginx/conf/nginx-plus.conf
index 56029281b7..0208b9593f 100644
--- a/internal/controller/nginx/conf/nginx-plus.conf
+++ b/internal/controller/nginx/conf/nginx-plus.conf
@@ -12,8 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
-  js_import /usr/lib/nginx/modules/njs/epp.js;
+  js_import modules/njs/httpmatches.js;
+  js_import modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/conf/nginx.conf b/internal/controller/nginx/conf/nginx.conf
index 5b64fe4761..09ee1ba97e 100644
--- a/internal/controller/nginx/conf/nginx.conf
+++ b/internal/controller/nginx/conf/nginx.conf
@@ -12,8 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
-  js_import /usr/lib/nginx/modules/njs/epp.js;
+  js_import modules/njs/httpmatches.js;
+  js_import modules/njs/epp.js;
 
   default_type application/octet-stream;
 
diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
index e0f9ee98d5..90670a4288 100644
--- a/internal/controller/nginx/config/maps.go
+++ b/internal/controller/nginx/config/maps.go
@@ -186,37 +186,52 @@ func createAddHeadersMap(name string) shared.Map {
 // buildInferenceMaps creates maps for InferencePool Backends.
 func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
 	inferenceMaps := make([]shared.Map, 0, len(groups))
+
 	for _, group := range groups {
 		for _, backend := range group.Backends {
-			if backend.EndpointPickerConfig != nil {
-				var defaultResult string
-				switch backend.EndpointPickerConfig.FailureMode {
-				// in FailClose mode, if the EPP is unavailable or returns an error,
-				// we return an invalid backend to ensure the request fails
-				case inference.EndpointPickerFailClose:
-					defaultResult = invalidBackendRef
-				// in FailOpen mode, if the EPP is unavailable or returns an error,
-				// we fall back to the upstream
-				case inference.EndpointPickerFailOpen:
-					defaultResult = backend.UpstreamName
-				}
-				params := []shared.MapParameter{
-					{
-						Value:  "~.+",
-						Result: "$inference_workload_endpoint",
-					},
-					{
-						Value:  "default",
-						Result: defaultResult,
-					},
-				}
-				backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
-				inferenceMaps = append(inferenceMaps, shared.Map{
-					Source:     "$inference_workload_endpoint",
-					Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
-					Parameters: params,
-				})
+			if backend.EndpointPickerConfig == nil || backend.EndpointPickerConfig.EndpointPickerRef == nil {
+				continue
+			}
+
+			// Decide what the map must return when the picker didn’t set a value.
+			var defaultResult string
+			switch backend.EndpointPickerConfig.EndpointPickerRef.FailureMode {
+			case inference.EndpointPickerFailClose:
+				defaultResult = invalidBackendRef
+			case inference.EndpointPickerFailOpen:
+				defaultResult = backend.UpstreamName
 			}
+
+			// Build the ordered parameter list.
+			params := make([]shared.MapParameter, 0, 3)
+
+			// no endpoint picked by EPP go to inference pool directly
+			params = append(params, shared.MapParameter{
+				Value:  `""`,
+				Result: backend.UpstreamName,
+			})
+
+			// endpoint picked by the EPP is stored in $inference_workload_endpoint.
+			params = append(params, shared.MapParameter{
+				Value:  `~.+`,
+				Result: `$inference_workload_endpoint`,
+			})
+
+			// this is set based on EPP failure mode,
+			// if EPP is failOpen, we set the default to the inference pool upstream,
+			// if EPP is failClose, we set the default to invalidBackendRef.
+			params = append(params, shared.MapParameter{
+				Value:  "default",
+				Result: defaultResult,
+			})
+
+			backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
+
+			inferenceMaps = append(inferenceMaps, shared.Map{
+				Source:     `$inference_workload_endpoint`,
+				Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
+				Parameters: params,
+			})
 		}
 	}
 	return inferenceMaps
diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
index 736d7808ec..bac84b0067 100644
--- a/internal/controller/nginx/config/maps_test.go
+++ b/internal/controller/nginx/config/maps_test.go
@@ -73,8 +73,11 @@ func TestExecuteMaps(t *testing.T) {
 				Backends: []dataplane.Backend{
 					{
 						UpstreamName: "upstream1",
-						EndpointPickerConfig: &inference.EndpointPickerRef{
-							FailureMode: inference.EndpointPickerFailClose,
+						EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+							NsName: "default",
+							EndpointPickerRef: &inference.EndpointPickerRef{
+								FailureMode: inference.EndpointPickerFailClose,
+							},
 						},
 					},
 				},
@@ -400,14 +403,20 @@ func TestBuildInferenceMaps(t *testing.T) {
 		Backends: []dataplane.Backend{
 			{
 				UpstreamName: "upstream1",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailClose,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailClose,
+					},
 				},
 			},
 			{
 				UpstreamName: "upstream2",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailOpen,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailOpen,
+					},
 				},
 			},
 			{
@@ -421,6 +430,22 @@ func TestBuildInferenceMaps(t *testing.T) {
 	g.Expect(maps).To(HaveLen(2))
 	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
 	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
-	g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref"))
-	g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2"))
+	g.Expect(maps[0].Parameters).To(HaveLen(3))
+	g.Expect(maps[0].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[0].Parameters[0].Result).To(Equal("upstream1"))
+	g.Expect(maps[0].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[0].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[0].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[0].Parameters[2].Result).To(Equal("invalid-backend-ref"))
+
+	// Check the second map
+	g.Expect(maps[1].Source).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Variable).To(Equal("$inference_backend_upstream2"))
+	g.Expect(maps[1].Parameters).To(HaveLen(3))
+	g.Expect(maps[1].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[1].Parameters[0].Result).To(Equal("upstream2"))
+	g.Expect(maps[1].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[1].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[1].Parameters[2].Result).To(Equal("upstream2"))
 }
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
index 203b4dbecb..1a9ad995d9 100644
--- a/internal/controller/nginx/config/servers.go
+++ b/internal/controller/nginx/config/servers.go
@@ -452,13 +452,18 @@ func createInternalLocationsForRule(
 			intLocation, match = initializeInternalMatchLocationWithInference(pathRuleIdx, matchRuleIdx, r.Match)
 			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
 			for _, b := range r.BackendGroup.Backends {
-				if b.EndpointPickerConfig != nil {
+				if b.EndpointPickerConfig != nil && b.EndpointPickerConfig.EndpointPickerRef != nil {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
-					intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					if b.EndpointPickerConfig.NsName != "" {
+						intInfLocation.EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName
+					} else {
+						intInfLocation.EPPHost = string(eppRef.Name)
+					}
 					intInfLocation.EPPPort = portNum
 				}
 			}
@@ -506,14 +511,19 @@ func createInferenceLocationsForRule(
 			mirrorPercentage,
 		)
 		for _, b := range r.BackendGroup.Backends {
-			if b.EndpointPickerConfig != nil {
+			if b.EndpointPickerConfig != nil && b.EndpointPickerConfig.EndpointPickerRef != nil {
 				for i := range extLocations {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					extLocations[i].EPPInternalPath = intLocation.Path
-					extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					if b.EndpointPickerConfig.NsName != "" {
+						extLocations[i].EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName
+					} else {
+						extLocations[i].EPPHost = string(eppRef.Name)
+					}
 					extLocations[i].EPPPort = portNum
 				}
 			}
diff --git a/internal/controller/nginx/config/servers_template.go b/internal/controller/nginx/config/servers_template.go
index 9575b77480..2bfee59aa8 100644
--- a/internal/controller/nginx/config/servers_template.go
+++ b/internal/controller/nginx/config/servers_template.go
@@ -126,8 +126,8 @@ server {
         {{- if contains $l.Type "inference" -}}
         js_var $inference_workload_endpoint;
         set $epp_internal_path {{ $l.EPPInternalPath }};
-        set $epp_host {{ $l.EPPHost }};
-        set $epp_port {{ $l.EPPPort }};
+        set $epp_host          {{ $l.EPPHost }};
+        set $epp_port          {{ $l.EPPPort }};
         js_content epp.getEndpoint;
         {{- end }}
 
diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go
index 25ef92896d..e5a8ee9132 100644
--- a/internal/controller/nginx/config/servers_test.go
+++ b/internal/controller/nginx/config/servers_test.go
@@ -2457,11 +2457,14 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 				UpstreamName: "test_foo_80",
 				Valid:        true,
 				Weight:       1,
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					Name: "test-epp",
-					Port: &inference.Port{
-						Number: 80,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						Name: "test-epp",
+						Port: &inference.Port{
+							Number: 80,
+						},
 					},
+					NsName: hrNsName.Namespace,
 				},
 			},
 		},
@@ -2522,7 +2525,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "= /inference",
 					Type:            http.InferenceExternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0-inference",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test",
 					EPPPort:         80,
 				},
 				createDefaultRootLocation(),
@@ -2542,7 +2545,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "/_ngf-internal-rule0-route0-inference",
 					Type:            http.InferenceInternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test",
 					EPPPort:         80,
 				},
 				{
diff --git a/internal/controller/nginx/modules/test/epp.test.js b/internal/controller/nginx/modules/test/epp.test.js
index c2a4528694..cbe20850ce 100644
--- a/internal/controller/nginx/modules/test/epp.test.js
+++ b/internal/controller/nginx/modules/test/epp.test.js
@@ -40,7 +40,7 @@ describe('getEndpoint', () => {
 	});
 
 	it('sets endpoint and logs on 200 with endpoint header', async () => {
-		const endpoint = 'http://endpoint';
+		const endpoint = '10.0.0.1:8080';
 		globalThis.ngx = {
 			fetch: vi.fn().mockResolvedValue({
 				status: 200,
@@ -49,7 +49,11 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.variables.inference_workload_endpoint).toBe(endpoint);
@@ -66,7 +70,11 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.error).toHaveBeenCalledWith(
@@ -80,7 +88,11 @@ describe('getEndpoint', () => {
 			fetch: vi.fn().mockRejectedValue(new Error('network fail')),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 		});
 		await epp.getEndpoint(r);
 		expect(r.error).toHaveBeenCalledWith(expect.stringContaining('Error in ngx.fetch'));
@@ -88,7 +100,7 @@ describe('getEndpoint', () => {
 	});
 
 	it('preserves args in internal redirect when args are present', async () => {
-		const endpoint = 'http://endpoint';
+		const endpoint = '10.0.0.1:8080';
 		globalThis.ngx = {
 			fetch: vi.fn().mockResolvedValue({
 				status: 200,
@@ -97,10 +109,51 @@ describe('getEndpoint', () => {
 			}),
 		};
 		const r = makeRequest({
-			variables: { epp_host: 'host', epp_port: '1234', epp_internal_path: '/foo' },
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
 			args: { a: '1', b: '2' },
 		});
 		await epp.getEndpoint(r);
 		expect(r.internalRedirect).toHaveBeenCalledWith('/foo?a=1&b=2');
 	});
+
+	it('forwards all headers including test headers to EPP', async () => {
+		const endpoint = '10.0.0.1:8080';
+		const fetchMock = vi.fn().mockResolvedValue({
+			status: 200,
+			headers: { get: () => endpoint },
+			text: vi.fn(),
+		});
+		globalThis.ngx = {
+			fetch: fetchMock,
+		};
+		const r = makeRequest({
+			variables: {
+				epp_host: 'host',
+				epp_port: '1234',
+				epp_internal_path: '/foo',
+			},
+			headersIn: {
+				'test-epp-endpoint-selection': '10.0.0.1:8080,10.0.0.2:8080',
+				'content-type': 'application/json',
+			},
+		});
+		await epp.getEndpoint(r);
+
+		// Verify that all headers (including test header) were forwarded to EPP
+		expect(fetchMock).toHaveBeenCalledWith(
+			'http://127.0.0.1:54800',
+			expect.objectContaining({
+				headers: expect.objectContaining({
+					'test-epp-endpoint-selection': '10.0.0.1:8080,10.0.0.2:8080',
+					'content-type': 'application/json',
+					'X-EPP-Host': 'host',
+					'X-EPP-Port': '1234',
+				}),
+			}),
+		);
+	});
 });
diff --git a/internal/controller/state/conditions/conditions.go b/internal/controller/state/conditions/conditions.go
index c351f5ba32..f3af2010f3 100644
--- a/internal/controller/state/conditions/conditions.go
+++ b/internal/controller/state/conditions/conditions.go
@@ -67,6 +67,9 @@ const (
 	// invalid. Used with ResolvedRefs (false).
 	RouteReasonInvalidFilter v1.RouteConditionReason = "InvalidFilter"
 
+	// RouteReasonInvalidInferencePool is used when a InferencePool backendRef referenced by a Route is invalid.
+	RouteReasonInvalidInferencePool v1.RouteConditionReason = "InvalidInferencePool"
+
 	// GatewayReasonUnsupportedField is used with the "Accepted" condition when a Gateway contains fields
 	// that are not yet supported.
 	GatewayReasonUnsupportedField v1.GatewayConditionReason = "UnsupportedField"
@@ -465,6 +468,17 @@ func NewRouteBackendRefUnsupportedValue(msg string) Condition {
 	}
 }
 
+// NewRouteBackendRefInvalidInferencePool returns a Condition that indicates that the Route has a InferencePool
+// backendRef that is invalid.
+func NewRouteBackendRefInvalidInferencePool(msg string) Condition {
+	return Condition{
+		Type:    string(v1.RouteConditionResolvedRefs),
+		Status:  metav1.ConditionFalse,
+		Reason:  string(RouteReasonInvalidInferencePool),
+		Message: msg,
+	}
+}
+
 // NewRouteBackendRefUnsupportedProtocol returns a Condition that indicates that the Route has a backendRef with
 // an unsupported protocol.
 func NewRouteBackendRefUnsupportedProtocol(msg string) Condition {
diff --git a/internal/controller/state/dataplane/configuration.go b/internal/controller/state/dataplane/configuration.go
index 59030c0ca7..92b119e052 100644
--- a/internal/controller/state/dataplane/configuration.go
+++ b/internal/controller/state/dataplane/configuration.go
@@ -394,12 +394,20 @@ func newBackendGroup(
 
 		inferencePoolBackendExists = inferencePoolBackendExists || ref.IsInferencePool
 
+		var eppRef *EndpointPickerConfig
+		if ref.EndpointPickerConfig.EndpointPickerRef != nil {
+			eppRef = &EndpointPickerConfig{
+				EndpointPickerRef: ref.EndpointPickerConfig.EndpointPickerRef,
+				NsName:            ref.EndpointPickerConfig.NsName,
+			}
+		}
+
 		backends = append(backends, Backend{
 			UpstreamName:         ref.ServicePortReference(),
 			Weight:               ref.Weight,
 			Valid:                valid,
 			VerifyTLS:            convertBackendTLS(ref.BackendTLSPolicy, gatewayName),
-			EndpointPickerConfig: ref.EndpointPickerConfig,
+			EndpointPickerConfig: eppRef,
 		})
 	}
 
diff --git a/internal/controller/state/dataplane/configuration_test.go b/internal/controller/state/dataplane/configuration_test.go
index 3e1697590d..b029730419 100644
--- a/internal/controller/state/dataplane/configuration_test.go
+++ b/internal/controller/state/dataplane/configuration_test.go
@@ -219,6 +219,10 @@ func TestBuildConfiguration(t *testing.T) {
 		UpstreamName: fooUpstreamName,
 		Weight:       1,
 		Valid:        true,
+		EndpointPickerConfig: &EndpointPickerConfig{
+			NsName:            "",
+			EndpointPickerRef: nil,
+		},
 	}
 
 	createBackendRefs := func(validRule bool) []graph.BackendRef {
diff --git a/internal/controller/state/dataplane/types.go b/internal/controller/state/dataplane/types.go
index c6d7e8f93a..76dd970205 100644
--- a/internal/controller/state/dataplane/types.go
+++ b/internal/controller/state/dataplane/types.go
@@ -330,7 +330,7 @@ type Backend struct {
 	VerifyTLS *VerifyTLS
 	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
 	// This is set if this backend is for an inference workload.
-	EndpointPickerConfig *inference.EndpointPickerRef
+	EndpointPickerConfig *EndpointPickerConfig
 	// UpstreamName is the name of the upstream for this backend.
 	UpstreamName string
 	// Weight is the weight of the BackendRef.
@@ -341,6 +341,14 @@ type Backend struct {
 	Valid bool
 }
 
+// EndpointPickerConfig represents the configuration for the EndpointPicker extension.
+type EndpointPickerConfig struct {
+	// EndpointPickerRef is the reference to the EndpointPicker.
+	EndpointPickerRef *inference.EndpointPickerRef
+	// NsName is the namespace of the EndpointPicker.
+	NsName string
+}
+
 // VerifyTLS holds the backend TLS verification configuration.
 type VerifyTLS struct {
 	CertBundleID CertBundleID
diff --git a/internal/controller/state/graph/backend_refs.go b/internal/controller/state/graph/backend_refs.go
index 95ce6df0b9..a3e5f0b11e 100644
--- a/internal/controller/state/graph/backend_refs.go
+++ b/internal/controller/state/graph/backend_refs.go
@@ -9,7 +9,6 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/validation/field"
-	inference "sigs.k8s.io/gateway-api-inference-extension/api/v1"
 	gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha3"
 
@@ -31,8 +30,8 @@ const (
 type BackendRef struct {
 	// BackendTLSPolicy is the BackendTLSPolicy of the Service which is referenced by the backendRef.
 	BackendTLSPolicy *BackendTLSPolicy
-	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
-	EndpointPickerConfig *inference.EndpointPickerRef
+	// EndpointPickerConfig holds the configuration for the EndpointPicker for this backend.
+	EndpointPickerConfig EndpointPickerConfig
 	// InvalidForGateways is a map of Gateways for which this BackendRef is invalid for, with the corresponding
 	// condition. Certain NginxProxy configurations may result in a backend not being valid for some Gateways,
 	// but not others.
@@ -74,6 +73,8 @@ func addBackendRefsToRouteRules(
 
 // addHTTPBackendRefsToRules iterates over the rules of a Route and adds a list of BackendRef to each rule.
 // If a reference in a rule is invalid, the function will add a condition to the rule.
+//
+//nolint:gocyclo
 func addBackendRefsToRules(
 	route *L7Route,
 	refGrantResolver *referenceGrantResolver,
@@ -121,9 +122,20 @@ func addBackendRefsToRules(
 				}
 
 				if pool, exists := referencedInferencePools[poolName]; exists {
+					// If the InferencePool is invalid, add a condition to the route
+					if !pool.Valid {
+						route.Conditions = append(route.Conditions, conditions.NewRouteBackendRefInvalidInferencePool(
+							fmt.Sprintf("Referenced InferencePool %s/%s is invalid",
+								poolName.Namespace,
+								poolName.Name,
+							),
+						))
+						continue
+					}
 					port := gatewayv1.PortNumber(pool.Source.Spec.TargetPorts[0].Number)
 					ref.Port = helpers.GetPointer(port)
-					ref.EndpointPickerConfig = &pool.Source.Spec.EndpointPickerRef
+					ref.EndpointPickerConfig.EndpointPickerRef = &pool.Source.Spec.EndpointPickerRef
+					ref.EndpointPickerConfig.NsName = poolName.Namespace
 				}
 			}
 
@@ -185,7 +197,9 @@ func createBackendRef(
 		refPath,
 	)
 
-	if !valid {
+	validBackendRef := valid && len(route.Conditions) == 0
+
+	if !validBackendRef {
 		backendRef := BackendRef{
 			Weight:               weight,
 			Valid:                false,
diff --git a/internal/controller/state/graph/backend_refs_test.go b/internal/controller/state/graph/backend_refs_test.go
index b786daed9b..7ef1ae9fd3 100644
--- a/internal/controller/state/graph/backend_refs_test.go
+++ b/internal/controller/state/graph/backend_refs_test.go
@@ -1231,11 +1231,14 @@ func TestAddBackendRefsToRules(t *testing.T) {
 					ServicePort: v1.ServicePort{
 						Port: 80,
 					},
-					Valid:                true,
-					Weight:               1,
-					InvalidForGateways:   map[types.NamespacedName]conditions.Condition{},
-					IsInferencePool:      true,
-					EndpointPickerConfig: &inference.EndpointPickerRef{},
+					Valid:              true,
+					Weight:             1,
+					InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
+					IsInferencePool:    true,
+					EndpointPickerConfig: EndpointPickerConfig{
+						NsName:            svcInferenceNsName.Namespace,
+						EndpointPickerRef: &inference.EndpointPickerRef{},
+					},
 				},
 			},
 			expectedConditions: nil,
@@ -1262,6 +1265,7 @@ func TestAddBackendRefsToRules(t *testing.T) {
 							},
 						},
 					},
+					Valid: true,
 				},
 			}
 
diff --git a/internal/controller/state/graph/graph_test.go b/internal/controller/state/graph/graph_test.go
index a49202d96e..96dbddd2dd 100644
--- a/internal/controller/state/graph/graph_test.go
+++ b/internal/controller/state/graph/graph_test.go
@@ -228,9 +228,12 @@ func TestBuildGraph(t *testing.T) {
 				Weight:             1,
 				InvalidForGateways: map[types.NamespacedName]conditions.Condition{},
 				IsInferencePool:    true,
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					Kind: kinds.Service,
-					Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+				EndpointPickerConfig: EndpointPickerConfig{
+					NsName: testNs,
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						Kind: kinds.Service,
+						Name: inference.ObjectName(controller.CreateInferencePoolServiceName("ipool")),
+					},
 				},
 			},
 		}
@@ -1339,6 +1342,7 @@ func TestBuildGraph(t *testing.T) {
 						inferenceRoute,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 			ReferencedCaCertConfigMaps: map[types.NamespacedName]*CaCertConfigMap{
diff --git a/internal/controller/state/graph/inferencepools.go b/internal/controller/state/graph/inferencepools.go
index 84e6d62df2..8f708bd62f 100644
--- a/internal/controller/state/graph/inferencepools.go
+++ b/internal/controller/state/graph/inferencepools.go
@@ -25,6 +25,16 @@ type ReferencedInferencePool struct {
 	HTTPRoutes []*L7Route
 	// Conditions contains the conditions that should be applied to the InferencePool.
 	Conditions []conditions.Condition
+	// Valid indicates whether the InferencePool is valid or not.
+	Valid bool
+}
+
+// EndpointPickerConfig specifies the namespace and reference to the EndpointPicker extension.
+type EndpointPickerConfig struct {
+	// EndpointPickerRef is the reference to the EndpointPicker.
+	EndpointPickerRef *inference.EndpointPickerRef
+	// NsName is the namespace of the EndpointPicker.
+	NsName string
 }
 
 // buildReferencedInferencePools builds a map of InferencePools that are referenced by HTTPRoutes
@@ -58,6 +68,8 @@ func buildReferencedInferencePools(
 		if extensionRefCond := validateInferencePoolExtensionRef(refPool.Source, services); extensionRefCond != nil {
 			refPool.Conditions = append(refPool.Conditions, *extensionRefCond)
 		}
+
+		refPool.Valid = len(refPool.Conditions) == 0
 	}
 
 	return referencedInferencePools
diff --git a/internal/controller/state/graph/inferencepools_test.go b/internal/controller/state/graph/inferencepools_test.go
index f6ea66215a..a3ef1b3ede 100644
--- a/internal/controller/state/graph/inferencepools_test.go
+++ b/internal/controller/state/graph/inferencepools_test.go
@@ -185,6 +185,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						validRoute,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -244,6 +245,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						modifiedRouteWithServiceBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -277,6 +279,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						routeWithInferencePoolHeadlessSvcBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -310,6 +313,7 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 						routeWithNoNamespaceBackend,
 					},
 					Conditions: []conditions.Condition{},
+					Valid:      true,
 				},
 			},
 		},
@@ -326,6 +330,8 @@ func TestBuildReferencedInferencePools(t *testing.T) {
 					Gateways:   []*gatewayv1.Gateway{},
 					HTTPRoutes: []*L7Route{},
 					Conditions: []conditions.Condition{},
+					// validity of InferencePool depends on condition counts only
+					Valid: true,
 				},
 			},
 		},
diff --git a/internal/controller/state/graph/route_common.go b/internal/controller/state/graph/route_common.go
index 6421b35aca..c6bf225f43 100644
--- a/internal/controller/state/graph/route_common.go
+++ b/internal/controller/state/graph/route_common.go
@@ -167,7 +167,7 @@ type RouteBackendRef struct {
 	MirrorBackendIdx *int
 
 	// EndpointPickerConfig is the configuration for the EndpointPicker, if this backendRef is for an InferencePool.
-	EndpointPickerConfig *inference.EndpointPickerRef
+	EndpointPickerConfig EndpointPickerConfig
 
 	Filters []any
 
diff --git a/tests/Makefile b/tests/Makefile
index 79ed6e300b..b642fbca5f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -20,6 +20,8 @@ CONFORMANCE_PROFILES = $(STANDARD_CONFORMANCE_PROFILES) # by default we use the
 SKIP_TESTS_OPENSHIFT = HTTPRouteServiceTypes # Doesn't work on OpenShift due to security restrictions
 SKIP_TESTS =
 CEL_TEST_TARGET =
+INFERENCE_SUPPORTED_FEATURES = GatewayFollowingEPPRouting,EppUnAvailableFailOpen,HTTPRouteInvalidInferencePoolRef,InferencePoolAccepted,HTTPRouteMultipleGatewaysDifferentPools,HTTPRouteMultipleRulesDifferentPools,InferencePoolHTTPRoutePortValidation,InferencePoolInvalidEPPService
+INFERENCE_SKIP_TESTS = InferencePoolResolvedRefsCondition
 
 # Check if ENABLE_EXPERIMENTAL is true
 ifeq ($(ENABLE_EXPERIMENTAL),true)
@@ -90,6 +92,28 @@ run-conformance-tests-openshift: ## Run conformance tests on OpenShift (skips te
 		exit 2; \
 	fi
 
+.PHONY: run-inference-conformance-tests
+run-inference-conformance-tests: ## Run inference conformance tests
+	kind load docker-image $(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --name $(CLUSTER_NAME)
+	kubectl apply -f conformance/conformance-rbac.yaml
+	kubectl run -i conformance \
+		--image=$(CONFORMANCE_PREFIX):$(CONFORMANCE_TAG) --image-pull-policy=Never \
+		--overrides='{ "spec": { "serviceAccountName": "conformance" }	}' \
+		--restart=Never -- sh -c "go test -v . -tags conformance -args --gateway-class=$(GATEWAY_CLASS) \
+		--version=$(NGF_VERSION) \
+		--skip-tests=$(INFERENCE_SKIP_TESTS) \
+		--supported-features=$(INFERENCE_SUPPORTED_FEATURES) \
+		--report-output=output.txt; cat output.txt" | tee output.txt
+	./scripts/check-pod-exit-code.sh
+	sed -e '1,/GatewayAPIInferenceExtensionVersion/d' output.txt > conformance-profile-inference.yaml
+	rm output.txt
+	core_result=`yq '.profiles[0].core.result' conformance-profile-inference.yaml`; \
+	if [ "$$core_result" != "failure" ] ; then \
+		exit 0; \
+	else \
+		exit 2; \
+	fi
+
 .PHONY: cleanup-conformance-tests
 cleanup-conformance-tests: ## Clean up conformance tests fixtures
 	kubectl delete pod conformance
@@ -197,7 +221,7 @@ add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-ne
 update-firewall-with-local-ip: ## Update the firewall rule with local IP address
 	./scripts/update-firewall-with-local-ip.sh
 
-HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --skip-schema-validation
+HELM_PARAMETERS += --set nginxGateway.name=nginx-gateway --set nginx.service.type=ClusterIP --set nginxGateway.gwAPIInferenceExtension.enable=$(ENABLE_INFERENCE_EXTENSION) --set nginxGateway.config.logging.level=debug
 
 # this target is used to install the gateway-api CRDs from the main branch (only used in the nightly CI job)
 # it overrides the target in the main Makefile when the GW_API_VERSION is set to main
@@ -218,6 +242,9 @@ uninstall-ngf: ## Uninstall NGF on configured kind cluster
 	-make uninstall-gateway-crds
 	-kubectl delete namespace nginx-gateway
 	-kubectl kustomize ../config/crd | kubectl delete -f -
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) uninstall-inference-crds; \
+	fi
 
 # Run CEL validation integration tests against a real cluster
 .PHONY: test-cel-validation
diff --git a/tests/README.md b/tests/README.md
index a1d55b79f2..d58138dd1e 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -20,7 +20,9 @@ This directory contains the tests for NGINX Gateway Fabric. The tests are divide
     - [Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster](#option-1---build-and-install-nginx-gateway-fabric-from-local-to-configured-kind-cluster)
     - [Option 2 - Install NGINX Gateway Fabric from local already built image to configured kind cluster](#option-2---install-nginx-gateway-fabric-from-local-already-built-image-to-configured-kind-cluster)
   - [Step 2 - Build conformance test runner image](#step-2---build-conformance-test-runner-image)
-  - [Step 3 - Run Gateway conformance tests](#step-3---run-gateway-conformance-tests)
+  - [Step 3 - Run Conformance tests](#step-3---run-conformance-tests)
+    - [To run Gateway conformance tests](#to-run-gateway-conformance-tests)
+    - [To run Inference conformance tests](#to-run-inference-conformance-tests)
   - [Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric](#step-4---cleanup-the-conformance-test-fixtures-and-uninstall-nginx-gateway-fabric)
   - [Step 5 - Revert changes to Go modules](#step-5---revert-changes-to-go-modules)
   - [Step 6 - Delete kind cluster](#step-6---delete-kind-cluster)
@@ -138,6 +140,12 @@ TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.
  export ENABLE_EXPERIMENTAL=true
 ```
 
+> If you want to run the Inference conformance tests, set the following environment variable before deploying NGF:
+
+```bash
+export ENABLE_INFERENCE_EXTENSION=true
+```
+
 #### Option 1 - Build and install NGINX Gateway Fabric from local to configured kind cluster
 
 ```makefile
@@ -188,12 +196,20 @@ go mod tidy
 make build-test-runner-image
 ```
 
-### Step 3 - Run Gateway conformance tests
+### Step 3 - Run Conformance tests
+
+#### To run Gateway conformance tests
 
 ```makefile
 make run-conformance-tests
 ```
 
+#### To run Inference conformance tests
+
+```makefile
+make run-inference-conformance-tests
+```
+
 ### Step 4 - Cleanup the conformance test fixtures and uninstall NGINX Gateway Fabric
 
 ```makefile
diff --git a/tests/conformance-profile-inference.yaml b/tests/conformance-profile-inference.yaml
new file mode 100644
index 0000000000..1fa89f1580
--- /dev/null
+++ b/tests/conformance-profile-inference.yaml
@@ -0,0 +1,24 @@
+apiVersion: gateway.networking.k8s.io/v1
+date: "2025-10-16T01:03:45Z"
+gatewayAPIChannel: experimental
+gatewayAPIVersion: v1.3.0
+implementation:
+  contact:
+  - https://github.com/nginx/nginx-gateway-fabric/discussions/new/choose
+  organization: nginx
+  project: nginx-gateway-fabric
+  url: https://github.com/nginx/nginx-gateway-fabric
+  version: edge
+kind: ConformanceReport
+mode: default
+profiles:
+- core:
+    result: partial
+    skippedTests:
+    - InferencePoolResolvedRefsCondition
+    statistics:
+      Failed: 0
+      Passed: 8
+      Skipped: 1
+  name: Gateway
+  summary: Core tests partially succeeded with 1 test skips.
diff --git a/tests/conformance/conformance-rbac.yaml b/tests/conformance/conformance-rbac.yaml
index 6cdf2d0a86..f063191f98 100644
--- a/tests/conformance/conformance-rbac.yaml
+++ b/tests/conformance/conformance-rbac.yaml
@@ -24,6 +24,7 @@ rules:
   - get
   - list
   - update
+  - watch
 - apiGroups:
   - apps
   resources:
@@ -33,6 +34,7 @@ rules:
   - delete
   - get
   - list
+  - update
 - apiGroups:
   - gateway.networking.k8s.io
   resources:
@@ -48,6 +50,7 @@ rules:
   - get
   - list
   - patch
+  - update
 - apiGroups:
   - apiextensions.k8s.io
   resources:
@@ -64,6 +67,43 @@ rules:
   - get
   - list
   - patch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+  - create
+  - delete
+  - update
+- apiGroups:
+  - inference.networking.x-k8s.io
+  resources:
+  - inferencepools
+  - inferenceobjectives
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+- apiGroups:
+  - rbac.authorization.k8s.io
+  resources:
+  - roles
+  - rolebindings
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - update
 ---
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
diff --git a/tests/conformance/conformance_test.go b/tests/conformance/conformance_test.go
index d792046e96..6c3daece11 100644
--- a/tests/conformance/conformance_test.go
+++ b/tests/conformance/conformance_test.go
@@ -22,6 +22,7 @@ import (
 	"testing"
 
 	. "github.com/onsi/gomega"
+	inference_conformance "sigs.k8s.io/gateway-api-inference-extension/conformance"
 	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1beta1"
 	"sigs.k8s.io/gateway-api/conformance"
@@ -32,9 +33,11 @@ import (
 	"sigs.k8s.io/yaml"
 )
 
-// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation.
-// This is needed to give the conformance tests an example valid ip unusable address.
-const unusableGatewayIPAddress = "198.51.100.0"
+const (
+	// unusableGatewayIPAddress 198.51.100.0 is a publicly reserved IP address specifically for documentation.
+	// This is needed to give the conformance tests an example valid ip unusable address.
+	unusableGatewayIPAddress = "198.51.100.0"
+)
 
 func TestConformance(t *testing.T) {
 	g := NewWithT(t)
@@ -86,3 +89,27 @@ func TestConformance(t *testing.T) {
 	_, err = f.Write(yamlReport)
 	g.Expect(err).ToNot(HaveOccurred())
 }
+
+func TestInferenceExtensionConformance(t *testing.T) {
+	t.Logf(`Running inference conformance tests with %s GatewayClass\n cleanup: %t\n`+
+		`debug: %t\n enable all features: %t \n supported extended features: [%v]\n exempt features: [%v]\n`+
+		`skip tests: [%v]`,
+		*flags.GatewayClassName, *flags.CleanupBaseResources, *flags.ShowDebug,
+		*flags.EnableAllSupportedFeatures, *flags.SupportedFeatures, *flags.ExemptFeatures, *flags.SkipTests,
+	)
+
+	opts := inference_conformance.DefaultOptions(t)
+
+	opts.Implementation = conf_v1.Implementation{
+		Organization: "nginx",
+		Project:      "nginx-gateway-fabric",
+		URL:          "https://github.com/nginx/nginx-gateway-fabric",
+		Version:      *flags.ImplementationVersion,
+		Contact: []string{
+			"https://github.com/nginx/nginx-gateway-fabric/discussions/new/choose",
+		},
+	}
+
+	opts.ConformanceProfiles.Insert(inference_conformance.GatewayLayerProfileName)
+	inference_conformance.RunConformanceWithOptions(t, opts)
+}
diff --git a/tests/go.mod b/tests/go.mod
index ac6ff0fc6b..364223745b 100644
--- a/tests/go.mod
+++ b/tests/go.mod
@@ -11,12 +11,14 @@ require (
 	github.com/prometheus/client_golang v1.23.2
 	github.com/prometheus/common v0.67.1
 	github.com/tsenart/vegeta/v12 v12.12.0
+	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.34.1
 	k8s.io/apiextensions-apiserver v0.34.1
 	k8s.io/apimachinery v0.34.1
 	k8s.io/client-go v0.34.1
 	sigs.k8s.io/controller-runtime v0.22.3
 	sigs.k8s.io/gateway-api v1.3.0
+	sigs.k8s.io/gateway-api-inference-extension v1.0.0
 	sigs.k8s.io/yaml v1.6.0
 )
 
@@ -80,7 +82,6 @@ require (
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
 	k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect
-	sigs.k8s.io/gateway-api-inference-extension v1.0.0 // indirect
 	sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
diff --git a/tests/go.sum b/tests/go.sum
index bf4f331284..efdce58f15 100644
--- a/tests/go.sum
+++ b/tests/go.sum
@@ -245,6 +245,8 @@ gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnf
 gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
 gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=