Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[project]
name = "codeflare-sdk"
version = "0.32.0"
version = "0.32.2"

[tool.poetry]
name = "codeflare-sdk"
version = "0.32.0"
version = "0.32.2"
description = "Python SDK for codeflare client"

license = "Apache-2.0"
Expand Down
89 changes: 62 additions & 27 deletions src/codeflare_sdk/ray/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,8 @@ def is_dashboard_ready(self) -> bool:

This method attempts to send a GET request to the cluster dashboard URI.
If the request is successful (HTTP status code 200), it returns True.
For OAuth-protected dashboards, a 302 redirect to the OAuth login page
also indicates the dashboard is ready (the OAuth proxy is working).
If an SSL error occurs, it returns False, indicating the dashboard is not ready.

Returns:
Expand All @@ -399,11 +401,14 @@ def is_dashboard_ready(self) -> bool:
return False

try:
# Don't follow redirects - we want to see the redirect response
# A 302 redirect from OAuth proxy indicates the dashboard is ready
response = requests.get(
dashboard_uri,
headers=self._client_headers,
timeout=5,
verify=self._client_verify_tls,
allow_redirects=False,
)
except requests.exceptions.SSLError: # pragma no cover
# SSL exception occurs when oauth ingress has been created but cluster is not up
Expand All @@ -412,7 +417,11 @@ def is_dashboard_ready(self) -> bool:
# Any other exception (connection errors, timeouts, etc.)
return False

if response.status_code == 200:
# Dashboard is ready if:
# - 200: Dashboard is accessible (no auth required or already authenticated)
# - 302: OAuth redirect - dashboard and OAuth proxy are ready, just needs authentication
# - 401/403: OAuth is working and blocking unauthenticated requests - dashboard is ready
if response.status_code in (200, 302, 401, 403):
return True
else:
return False
Expand Down Expand Up @@ -1153,36 +1162,68 @@ def _get_dashboard_url_from_httproute(
cluster_name: str, namespace: str
) -> Optional[str]:
"""
Attempts to get the Ray dashboard URL from an HTTPRoute resource.
This is used for RHOAI v3.0+ clusters that use Gateway API.

Get the Ray dashboard URL from an HTTPRoute (RHOAI v3.0+ Gateway API).
Searches for HTTPRoute labeled with ray.io/cluster-name and ray.io/cluster-namespace.
Returns the dashboard URL if found, or None to allow fallback to Routes/Ingress.
Args:
cluster_name: Name of the Ray cluster
namespace: Namespace of the Ray cluster

cluster_name: Ray cluster name
namespace: Ray cluster namespace
Returns:
Dashboard URL if HTTPRoute is found, None otherwise
Dashboard URL if found, else None
"""
try:
config_check()
api_instance = client.CustomObjectsApi(get_api_client())

# Try to get HTTPRoute for this Ray cluster
label_selector = (
f"ray.io/cluster-name={cluster_name},ray.io/cluster-namespace={namespace}"
)

# Try cluster-wide search first (if permissions allow)
try:
httproute = api_instance.get_namespaced_custom_object(
httproutes = api_instance.list_cluster_custom_object(
group="gateway.networking.k8s.io",
version="v1",
namespace=namespace,
plural="httproutes",
name=cluster_name,
label_selector=label_selector,
)
except client.exceptions.ApiException as e:
if e.status == 404:
# HTTPRoute not found - this is expected for SDK v0.31.1 and below or Kind clusters
items = httproutes.get("items", [])
if items:
httproute = items[0]
else:
# No HTTPRoute found
return None
except Exception:
# No cluster-wide permissions, try namespace-specific search
search_namespaces = [
"redhat-ods-applications",
"opendatahub",
"default",
"ray-system",
]

httproute = None
for ns in search_namespaces:
try:
httproutes = api_instance.list_namespaced_custom_object(
group="gateway.networking.k8s.io",
version="v1",
namespace=ns,
plural="httproutes",
label_selector=label_selector,
)
items = httproutes.get("items", [])
if items:
httproute = items[0]
break
except client.ApiException:
continue

if not httproute:
# No HTTPRoute found
return None
raise

# Get the Gateway reference from HTTPRoute
# Extract Gateway reference and construct dashboard URL
parent_refs = httproute.get("spec", {}).get("parentRefs", [])
if not parent_refs:
return None
Expand All @@ -1203,7 +1244,6 @@ def _get_dashboard_url_from_httproute(
name=gateway_name,
)

# Extract hostname from Gateway listeners
listeners = gateway.get("spec", {}).get("listeners", [])
if not listeners:
return None
Expand All @@ -1212,14 +1252,9 @@ def _get_dashboard_url_from_httproute(
if not hostname:
return None

# Construct the dashboard URL using RHOAI v3.0+ Gateway API pattern
# The HTTPRoute existence confirms v3.0+, so we use the standard path pattern
# Format: https://{hostname}/ray/{namespace}/{cluster-name}
protocol = "https" # Gateway API uses HTTPS
dashboard_url = f"{protocol}://{hostname}/ray/{namespace}/{cluster_name}"

return dashboard_url
# Construct dashboard URL: https://{hostname}/ray/{namespace}/{cluster-name}
return f"https://{hostname}/ray/{namespace}/{cluster_name}"

except Exception as e: # pragma: no cover
# If any error occurs, return None to fall back to OpenShift Route
except Exception: # pragma: no cover
# Any error means no HTTPRoute - fall back to Routes/Ingress
return None
132 changes: 109 additions & 23 deletions src/codeflare_sdk/ray/cluster/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,14 +991,22 @@ def test_get_dashboard_url_from_httproute(mocker):
},
}

# Mock the CustomObjectsApi to return HTTPRoute and Gateway
def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
# Mock list_cluster_custom_object to return HTTPRoute (cluster-wide search)
def mock_list_cluster_custom_object(group, version, plural, label_selector):
if plural == "httproutes":
return mock_httproute
elif plural == "gateways":
return {"items": [mock_httproute]}
raise Exception("Unexpected plural")

# Mock get_namespaced_custom_object to return Gateway
def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
if plural == "gateways":
return mock_gateway
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_custom_object,
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_get_namespaced_custom_object,
Expand All @@ -1011,15 +1019,24 @@ def mock_get_namespaced_custom_object(group, version, namespace, plural, name):
)
assert result == expected_url, f"Expected {expected_url}, got {result}"

# Test HTTPRoute not found (404) - should return None
def mock_404_error(group, version, namespace, plural, name):
error = client.exceptions.ApiException(status=404)
error.status = 404
raise error
# Test HTTPRoute not found - should return None
def mock_list_cluster_empty(group, version, plural, label_selector):
if plural == "httproutes":
return {"items": []}
raise Exception("Unexpected plural")

def mock_list_namespaced_empty(group, version, namespace, plural, label_selector):
if plural == "httproutes":
return {"items": []}
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_404_error,
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_empty,
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
side_effect=mock_list_namespaced_empty,
)

result = _get_dashboard_url_from_httproute("nonexistent-cluster", "test-ns")
Expand All @@ -1031,14 +1048,14 @@ def mock_404_error(group, version, namespace, plural, name):
"spec": {"parentRefs": []}, # Empty parentRefs
}

def mock_httproute_no_parents_fn(group, version, namespace, plural, name):
def mock_list_cluster_no_parents(group, version, plural, label_selector):
if plural == "httproutes":
return mock_httproute_no_parents
return {"items": [mock_httproute_no_parents]}
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_httproute_no_parents_fn,
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_no_parents,
)

result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
Expand All @@ -1059,14 +1076,14 @@ def mock_httproute_no_parents_fn(group, version, namespace, plural, name):
},
}

def mock_httproute_no_name_fn(group, version, namespace, plural, name):
def mock_list_cluster_no_name(group, version, plural, label_selector):
if plural == "httproutes":
return mock_httproute_no_name
return {"items": [mock_httproute_no_name]}
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_httproute_no_name_fn,
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_no_name,
)

result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
Expand All @@ -1087,14 +1104,14 @@ def mock_httproute_no_name_fn(group, version, namespace, plural, name):
},
}

def mock_httproute_no_namespace_fn(group, version, namespace, plural, name):
def mock_list_cluster_no_namespace(group, version, plural, label_selector):
if plural == "httproutes":
return mock_httproute_no_namespace
return {"items": [mock_httproute_no_namespace]}
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_httproute_no_namespace_fn,
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_no_namespace,
)

result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
Expand Down Expand Up @@ -1183,6 +1200,75 @@ def mock_403_error(group, version, namespace, plural, name):
result is None
), "Should return None when non-404 exception occurs (caught by outer handler)"

# Real-world scenario: Cluster-wide permissions denied, falls back to namespace search
# This simulates a regular data scientist without cluster-admin permissions
call_count = {"cluster_wide": 0, "namespaced": 0}

def mock_list_cluster_permission_denied(group, version, plural, label_selector):
call_count["cluster_wide"] += 1
# Simulate permission denied for cluster-wide search
error = client.exceptions.ApiException(status=403)
error.status = 403
raise error

def mock_list_namespaced_success(group, version, namespace, plural, label_selector):
call_count["namespaced"] += 1
# First namespace fails, second succeeds (simulates opendatahub deployment)
if namespace == "redhat-ods-applications":
return {"items": []}
elif namespace == "opendatahub":
return {"items": [mock_httproute]}
return {"items": []}

mocker.patch(
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_permission_denied,
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
side_effect=mock_list_namespaced_success,
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_get_namespaced_custom_object,
)

result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
expected_url = (
"https://data-science-gateway.apps.example.com/ray/test-ns/test-cluster"
)
assert result == expected_url, f"Expected {expected_url}, got {result}"
assert call_count["cluster_wide"] == 1, "Should try cluster-wide search first"
assert (
call_count["namespaced"] >= 2
), "Should fall back to namespace search and try multiple namespaces"

# Real-world scenario: Gateway not found (404) - should return None
# This can happen if Gateway was deleted but HTTPRoute still exists
def mock_list_cluster_with_httproute(group, version, plural, label_selector):
if plural == "httproutes":
return {"items": [mock_httproute]}
raise Exception("Unexpected plural")

def mock_get_gateway_404(group, version, namespace, plural, name):
if plural == "gateways":
error = client.exceptions.ApiException(status=404)
error.status = 404
raise error
raise Exception("Unexpected plural")

mocker.patch(
"kubernetes.client.CustomObjectsApi.list_cluster_custom_object",
side_effect=mock_list_cluster_with_httproute,
)
mocker.patch(
"kubernetes.client.CustomObjectsApi.get_namespaced_custom_object",
side_effect=mock_get_gateway_404,
)

result = _get_dashboard_url_from_httproute("test-cluster", "test-ns")
assert result is None, "Should return None when Gateway not found (404)"


def test_cluster_dashboard_uri_httproute_first(mocker):
"""
Expand Down
Loading