From c7d49472031bd87fc69a0b40f9d2f0fde91e137c Mon Sep 17 00:00:00 2001 From: Todd Short Date: Thu, 6 Nov 2025 12:56:23 -0500 Subject: [PATCH] fix(grpc): Use passthrough resolver when proxy is detected (#3700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix CatalogSource reporting TRANSIENT_FAILURE in Hypershift guest clusters by automatically using the "passthrough" resolver scheme when a proxy is detected. Root Cause: The migration from grpc.Dial() to grpc.NewClient() introduced a resolver scheme issue. When grpc.NewClient() is used with WithContextDialer (for proxy support), gRPC defaults to the "dns" resolver which tries to resolve addresses client-side. In Hypershift, the catalog operator runs in the management cluster and connects via SOCKS5 proxy to catalog pods in the guest cluster. Service addresses like "service.namespace.svc:50051" only exist in the guest cluster's DNS and cannot be resolved from the management cluster, causing connections to fail with TRANSIENT_FAILURE. Solution: Automatically detect when a proxy is being used (proxyURL != nil) and prepend "passthrough:///" to the target address. The passthrough resolver bypasses client-side DNS resolution and delegates it to the custom dialer (proxy), which resolves addresses in the guest cluster where they exist. This solution: - Requires no environment variables or configuration - Automatically activates only when proxy is used - Follows gRPC best practices per documentation - Simpler than alternative env var approaches (e.g., PR #3699) Fixes: OCPBUGS-64574 Related: OCPBUGS-64631, #3698, #3699 🤖 Generated with [Claude Code](https://claude.com/claude-code) via /jira:solve OCPBUGS-64574 Signed-off-by: Todd Short Co-authored-by: Claude Upstream-repository: operator-lifecycle-manager Upstream-commit: 8d9a698c9b60ca6e6a41a4633a5f803947a27ab0 --- .../pkg/controller/registry/grpc/source.go | 14 +++++++++++++- .../pkg/controller/registry/grpc/source.go | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/staging/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go b/staging/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go index c7aea05f44..fe0ccac260 100644 --- a/staging/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go +++ b/staging/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go @@ -153,6 +153,7 @@ func grpcConnection(address string) (*grpc.ClientConn, error) { return nil, err } + target := address if proxyURL != nil { dialOptions = append(dialOptions, grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { dialer, err := proxy.FromURL(proxyURL, &net.Dialer{}) @@ -161,9 +162,20 @@ func grpcConnection(address string) (*grpc.ClientConn, error) { } return dialer.Dial("tcp", addr) })) + + // When using a custom dialer (proxy), use the "passthrough" resolver scheme + // to bypass client-side name resolution and delegate it to the dialer. + // This is required for scenarios like Hypershift where the catalog operator + // runs in a management cluster and connects via proxy to catalog pods in a + // guest cluster. The service addresses (e.g., "service.namespace.svc:50051") + // only exist in the guest cluster and must be resolved by the proxy, not by + // the client. + // + // See: https://github.com/grpc/grpc-go/blob/master/dialoptions.go#L469 + target = "passthrough:///" + address } - return grpc.NewClient(address, dialOptions...) + return grpc.NewClient(target, dialOptions...) } func (s *SourceStore) Add(key registry.CatalogKey, address string) (*SourceConn, error) { diff --git a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go index c7aea05f44..fe0ccac260 100644 --- a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go +++ b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/registry/grpc/source.go @@ -153,6 +153,7 @@ func grpcConnection(address string) (*grpc.ClientConn, error) { return nil, err } + target := address if proxyURL != nil { dialOptions = append(dialOptions, grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { dialer, err := proxy.FromURL(proxyURL, &net.Dialer{}) @@ -161,9 +162,20 @@ func grpcConnection(address string) (*grpc.ClientConn, error) { } return dialer.Dial("tcp", addr) })) + + // When using a custom dialer (proxy), use the "passthrough" resolver scheme + // to bypass client-side name resolution and delegate it to the dialer. + // This is required for scenarios like Hypershift where the catalog operator + // runs in a management cluster and connects via proxy to catalog pods in a + // guest cluster. The service addresses (e.g., "service.namespace.svc:50051") + // only exist in the guest cluster and must be resolved by the proxy, not by + // the client. + // + // See: https://github.com/grpc/grpc-go/blob/master/dialoptions.go#L469 + target = "passthrough:///" + address } - return grpc.NewClient(address, dialOptions...) + return grpc.NewClient(target, dialOptions...) } func (s *SourceStore) Add(key registry.CatalogKey, address string) (*SourceConn, error) {