diff --git a/pyproject.toml b/pyproject.toml index 0a9d3228..ef4103ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [project] name = "codeflare-sdk" -version = "0.32.0" +version = "0.32.2" [tool.poetry] name = "codeflare-sdk" -version = "0.32.0" +version = "0.32.2" description = "Python SDK for codeflare client" license = "Apache-2.0" diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index 8538dba3..7167ea1a 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -387,6 +387,8 @@ def is_dashboard_ready(self) -> bool: This method attempts to send a GET request to the cluster dashboard URI. If the request is successful (HTTP status code 200), it returns True. + For OAuth-protected dashboards, a 302 redirect to the OAuth login page + also indicates the dashboard is ready (the OAuth proxy is working). If an SSL error occurs, it returns False, indicating the dashboard is not ready. Returns: @@ -399,11 +401,14 @@ def is_dashboard_ready(self) -> bool: return False try: + # Don't follow redirects - we want to see the redirect response + # A 302 redirect from OAuth proxy indicates the dashboard is ready response = requests.get( dashboard_uri, headers=self._client_headers, timeout=5, verify=self._client_verify_tls, + allow_redirects=False, ) except requests.exceptions.SSLError: # pragma no cover # SSL exception occurs when oauth ingress has been created but cluster is not up @@ -412,7 +417,11 @@ def is_dashboard_ready(self) -> bool: # Any other exception (connection errors, timeouts, etc.) return False - if response.status_code == 200: + # Dashboard is ready if: + # - 200: Dashboard is accessible (no auth required or already authenticated) + # - 302: OAuth redirect - dashboard and OAuth proxy are ready, just needs authentication + # - 401/403: OAuth is working and blocking unauthenticated requests - dashboard is ready + if response.status_code in (200, 302, 401, 403): return True else: return False @@ -1153,36 +1162,68 @@ def _get_dashboard_url_from_httproute( cluster_name: str, namespace: str ) -> Optional[str]: """ - Attempts to get the Ray dashboard URL from an HTTPRoute resource. - This is used for RHOAI v3.0+ clusters that use Gateway API. - + Get the Ray dashboard URL from an HTTPRoute (RHOAI v3.0+ Gateway API). + Searches for HTTPRoute labeled with ray.io/cluster-name and ray.io/cluster-namespace. + Returns the dashboard URL if found, or None to allow fallback to Routes/Ingress. Args: - cluster_name: Name of the Ray cluster - namespace: Namespace of the Ray cluster - + cluster_name: Ray cluster name + namespace: Ray cluster namespace Returns: - Dashboard URL if HTTPRoute is found, None otherwise + Dashboard URL if found, else None """ try: config_check() api_instance = client.CustomObjectsApi(get_api_client()) - # Try to get HTTPRoute for this Ray cluster + label_selector = ( + f"ray.io/cluster-name={cluster_name},ray.io/cluster-namespace={namespace}" + ) + + # Try cluster-wide search first (if permissions allow) try: - httproute = api_instance.get_namespaced_custom_object( + httproutes = api_instance.list_cluster_custom_object( group="gateway.networking.k8s.io", version="v1", - namespace=namespace, plural="httproutes", - name=cluster_name, + label_selector=label_selector, ) - except client.exceptions.ApiException as e: - if e.status == 404: - # HTTPRoute not found - this is expected for SDK v0.31.1 and below or Kind clusters + items = httproutes.get("items", []) + if items: + httproute = items[0] + else: + # No HTTPRoute found + return None + except Exception: + # No cluster-wide permissions, try namespace-specific search + search_namespaces = [ + "redhat-ods-applications", + "opendatahub", + "default", + "ray-system", + ] + + httproute = None + for ns in search_namespaces: + try: + httproutes = api_instance.list_namespaced_custom_object( + group="gateway.networking.k8s.io", + version="v1", + namespace=ns, + plural="httproutes", + label_selector=label_selector, + ) + items = httproutes.get("items", []) + if items: + httproute = items[0] + break + except client.ApiException: + continue + + if not httproute: + # No HTTPRoute found return None - raise - # Get the Gateway reference from HTTPRoute + # Extract Gateway reference and construct dashboard URL parent_refs = httproute.get("spec", {}).get("parentRefs", []) if not parent_refs: return None @@ -1203,7 +1244,6 @@ def _get_dashboard_url_from_httproute( name=gateway_name, ) - # Extract hostname from Gateway listeners listeners = gateway.get("spec", {}).get("listeners", []) if not listeners: return None @@ -1212,14 +1252,9 @@ def _get_dashboard_url_from_httproute( if not hostname: return None - # Construct the dashboard URL using RHOAI v3.0+ Gateway API pattern - # The HTTPRoute existence confirms v3.0+, so we use the standard path pattern - # Format: https://{hostname}/ray/{namespace}/{cluster-name} - protocol = "https" # Gateway API uses HTTPS - dashboard_url = f"{protocol}://{hostname}/ray/{namespace}/{cluster_name}" - - return dashboard_url + # Construct dashboard URL: https://{hostname}/ray/{namespace}/{cluster-name} + return f"https://{hostname}/ray/{namespace}/{cluster_name}" - except Exception as e: # pragma: no cover - # If any error occurs, return None to fall back to OpenShift Route + except Exception: # pragma: no cover + # Any error means no HTTPRoute - fall back to Routes/Ingress return None diff --git a/src/codeflare_sdk/ray/cluster/test_cluster.py b/src/codeflare_sdk/ray/cluster/test_cluster.py index c8742a3e..ed07cbaf 100644 --- a/src/codeflare_sdk/ray/cluster/test_cluster.py +++ b/src/codeflare_sdk/ray/cluster/test_cluster.py @@ -991,14 +991,22 @@ def test_get_dashboard_url_from_httproute(mocker): }, } - # Mock the CustomObjectsApi to return HTTPRoute and Gateway - def mock_get_namespaced_custom_object(group, version, namespace, plural, name): + # Mock list_cluster_custom_object to return HTTPRoute (cluster-wide search) + def mock_list_cluster_custom_object(group, version, plural, label_selector): if plural == "httproutes": - return mock_httproute - elif plural == "gateways": + return {"items": [mock_httproute]} + raise Exception("Unexpected plural") + + # Mock get_namespaced_custom_object to return Gateway + def mock_get_namespaced_custom_object(group, version, namespace, plural, name): + if plural == "gateways": return mock_gateway raise Exception("Unexpected plural") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_custom_object, + ) mocker.patch( "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", side_effect=mock_get_namespaced_custom_object, @@ -1011,15 +1019,24 @@ def mock_get_namespaced_custom_object(group, version, namespace, plural, name): ) assert result == expected_url, f"Expected {expected_url}, got {result}" - # Test HTTPRoute not found (404) - should return None - def mock_404_error(group, version, namespace, plural, name): - error = client.exceptions.ApiException(status=404) - error.status = 404 - raise error + # Test HTTPRoute not found - should return None + def mock_list_cluster_empty(group, version, plural, label_selector): + if plural == "httproutes": + return {"items": []} + raise Exception("Unexpected plural") + + def mock_list_namespaced_empty(group, version, namespace, plural, label_selector): + if plural == "httproutes": + return {"items": []} + raise Exception("Unexpected plural") mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=mock_404_error, + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_empty, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=mock_list_namespaced_empty, ) result = _get_dashboard_url_from_httproute("nonexistent-cluster", "test-ns") @@ -1031,14 +1048,14 @@ def mock_404_error(group, version, namespace, plural, name): "spec": {"parentRefs": []}, # Empty parentRefs } - def mock_httproute_no_parents_fn(group, version, namespace, plural, name): + def mock_list_cluster_no_parents(group, version, plural, label_selector): if plural == "httproutes": - return mock_httproute_no_parents + return {"items": [mock_httproute_no_parents]} raise Exception("Unexpected plural") mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=mock_httproute_no_parents_fn, + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_no_parents, ) result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") @@ -1059,14 +1076,14 @@ def mock_httproute_no_parents_fn(group, version, namespace, plural, name): }, } - def mock_httproute_no_name_fn(group, version, namespace, plural, name): + def mock_list_cluster_no_name(group, version, plural, label_selector): if plural == "httproutes": - return mock_httproute_no_name + return {"items": [mock_httproute_no_name]} raise Exception("Unexpected plural") mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=mock_httproute_no_name_fn, + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_no_name, ) result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") @@ -1087,14 +1104,14 @@ def mock_httproute_no_name_fn(group, version, namespace, plural, name): }, } - def mock_httproute_no_namespace_fn(group, version, namespace, plural, name): + def mock_list_cluster_no_namespace(group, version, plural, label_selector): if plural == "httproutes": - return mock_httproute_no_namespace + return {"items": [mock_httproute_no_namespace]} raise Exception("Unexpected plural") mocker.patch( - "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", - side_effect=mock_httproute_no_namespace_fn, + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_no_namespace, ) result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") @@ -1183,6 +1200,75 @@ def mock_403_error(group, version, namespace, plural, name): result is None ), "Should return None when non-404 exception occurs (caught by outer handler)" + # Real-world scenario: Cluster-wide permissions denied, falls back to namespace search + # This simulates a regular data scientist without cluster-admin permissions + call_count = {"cluster_wide": 0, "namespaced": 0} + + def mock_list_cluster_permission_denied(group, version, plural, label_selector): + call_count["cluster_wide"] += 1 + # Simulate permission denied for cluster-wide search + error = client.exceptions.ApiException(status=403) + error.status = 403 + raise error + + def mock_list_namespaced_success(group, version, namespace, plural, label_selector): + call_count["namespaced"] += 1 + # First namespace fails, second succeeds (simulates opendatahub deployment) + if namespace == "redhat-ods-applications": + return {"items": []} + elif namespace == "opendatahub": + return {"items": [mock_httproute]} + return {"items": []} + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_permission_denied, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=mock_list_namespaced_success, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_get_namespaced_custom_object, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + expected_url = ( + "https://data-science-gateway.apps.example.com/ray/test-ns/test-cluster" + ) + assert result == expected_url, f"Expected {expected_url}, got {result}" + assert call_count["cluster_wide"] == 1, "Should try cluster-wide search first" + assert ( + call_count["namespaced"] >= 2 + ), "Should fall back to namespace search and try multiple namespaces" + + # Real-world scenario: Gateway not found (404) - should return None + # This can happen if Gateway was deleted but HTTPRoute still exists + def mock_list_cluster_with_httproute(group, version, plural, label_selector): + if plural == "httproutes": + return {"items": [mock_httproute]} + raise Exception("Unexpected plural") + + def mock_get_gateway_404(group, version, namespace, plural, name): + if plural == "gateways": + error = client.exceptions.ApiException(status=404) + error.status = 404 + raise error + raise Exception("Unexpected plural") + + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", + side_effect=mock_list_cluster_with_httproute, + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=mock_get_gateway_404, + ) + + result = _get_dashboard_url_from_httproute("test-cluster", "test-ns") + assert result is None, "Should return None when Gateway not found (404)" + def test_cluster_dashboard_uri_httproute_first(mocker): """