diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 11cf5fdb..4a5f69fb 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -492,7 +492,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True): + def from_k8_cluster_object(rc, mcad=True, ingress_domain=None, ingress_options={}): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -502,6 +502,10 @@ def from_k8_cluster_object(rc, mcad=True): "volumeMounts" in rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0] ) + if local_interactive: + ingress_domain = get_ingress_domain_from_client( + rc["metadata"]["name"], rc["metadata"]["namespace"] + ) cluster_config = ClusterConfiguration( name=rc["metadata"]["name"], namespace=rc["metadata"]["namespace"], @@ -532,6 +536,8 @@ def from_k8_cluster_object(rc, mcad=True): ]["image"], local_interactive=local_interactive, mcad=mcad, + ingress_domain=ingress_domain, + ingress_options=ingress_options, ) return Cluster(cluster_config) @@ -685,7 +691,62 @@ def get_cluster(cluster_name: str, namespace: str = "default"): for rc in rcs["items"]: if rc["metadata"]["name"] == cluster_name: mcad = _check_aw_exists(cluster_name, namespace) - return Cluster.from_k8_cluster_object(rc, mcad=mcad) + + try: + config_check() + api_instance = client.NetworkingV1Api(api_config_handler()) + ingresses = api_instance.list_namespaced_ingress(namespace) + ingress_host = None + ingress_options = {} + for ingress in ingresses.items: + # Search for ingress with AppWrapper name as the owner + if ( + "ingress-owner" in ingress.metadata.labels + and ingress.metadata.labels["ingress-owner"] == cluster_name + ): + ingress_host = ingress.spec.rules[0].host + if ( + "ingress-options" in ingress.metadata.labels + and ingress.metadata.labels["ingress-options"] == "true" + ): + ingress_name = ingress.metadata.name + port = ( + ingress.spec.rules[0] + .http.paths[0] + .backend.service.port.number + ) + annotations = ingress.metadata.annotations + path = ingress.spec.rules[0].http.paths[0].path + ingress_class_name = ingress.spec.ingress_class_name + path_type = ingress.spec.rules[0].http.paths[0].path_type + + ingress_options = { + "ingresses": [ + { + "ingressName": ingress_name, + "port": port, + "annotations": annotations, + "ingressClassName": ingress_class_name, + "pathType": path_type, + "path": path, + "host": ingress_host, + } + ] + } + except Exception as e: + return _kube_api_error_handling(e) + # We gather the ingress domain from the host + if ingress_host is not None and ingress_options == {}: + ingress_domain = ingress_host.split(".", 1)[1] + else: + ingress_domain = None + + return Cluster.from_k8_cluster_object( + rc, + mcad=mcad, + ingress_domain=ingress_domain, + ingress_options=ingress_options, + ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" ) @@ -734,7 +795,10 @@ def _get_ingress_domain(self): # pragma: no cover return _kube_api_error_handling(e) for route in routes["items"]: - if route["spec"]["port"]["targetPort"] == "client": + if ( + route["spec"]["port"]["targetPort"] == "client" + or route["spec"]["port"]["targetPort"] == 10001 + ): domain = route["spec"]["host"] else: try: @@ -921,3 +985,30 @@ def _copy_to_ray(cluster: Cluster) -> RayCluster: if ray.status == CodeFlareClusterStatus.READY: ray.status = RayClusterStatus.READY return ray + + +def get_ingress_domain_from_client(cluster_name: str, namespace: str = "default"): + if is_openshift_cluster(): + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + route = api_instance.get_namespaced_custom_object( + group="route.openshift.io", + version="v1", + namespace=namespace, + plural="routes", + name=f"rayclient-{cluster_name}", + ) + return route["spec"]["host"].split(".", 1)[1] + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) + else: + try: + config_check() + api_instance = client.NetworkingV1Api(api_config_handler()) + ingress = api_instance.read_namespaced_ingress( + f"rayclient-{cluster_name}", namespace + ) + return ingress.spec.rules[0].host.split(".", 1)[1] + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index 8e6fd0e9..1e99040c 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -293,6 +293,9 @@ spec: namespace: default annotations: annotations-example:annotations-example + labels: + ingress-options: "false" + ingress-owner: appwrapper-name spec: ingressClassName: nginx rules: diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 95c17cc2..95c962d1 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -129,28 +129,45 @@ def update_dashboard_ingress( raise ValueError( f"Error: 'port' is not of type int for ingress item at index {index}" ) - if ingress_option["port"] == 8265: + if ingress_option is not None: metadata["name"] = ingress_option["ingressName"] metadata["namespace"] = namespace - if "annotations" not in ingress_option.keys(): + metadata["labels"]["ingress-owner"] = cluster_name + metadata["labels"]["ingress-options"] = "true" + if ( + "annotations" not in ingress_option.keys() + or ingress_option["annotations"] is None + ): del metadata["annotations"] else: metadata["annotations"] = ingress_option["annotations"] - if "path" not in ingress_option.keys(): + if ( + "path" not in ingress_option.keys() + or ingress_option["path"] is None + ): del spec["rules"][0]["http"]["paths"][0]["path"] else: spec["rules"][0]["http"]["paths"][0]["path"] = ingress_option[ "path" ] - if "pathType" not in ingress_option.keys(): + if ( + "pathType" not in ingress_option.keys() + or ingress_option["pathType"] is None + ): spec["rules"][0]["http"]["paths"][0][ "pathType" ] = "ImplementationSpecific" - if "host" not in ingress_option.keys(): + if ( + "host" not in ingress_option.keys() + or ingress_option["host"] is None + ): del spec["rules"][0]["host"] else: spec["rules"][0]["host"] = ingress_option["host"] - if "ingressClassName" not in ingress_option.keys(): + if ( + "ingressClassName" not in ingress_option.keys() + or ingress_option["ingressClassName"] is None + ): del spec["ingressClassName"] else: spec["ingressClassName"] = ingress_option["ingressClassName"] @@ -161,6 +178,7 @@ def update_dashboard_ingress( else: spec["ingressClassName"] = "nginx" metadata["name"] = gen_dashboard_ingress_name(cluster_name) + metadata["labels"]["ingress-owner"] = cluster_name metadata["namespace"] = namespace spec["rules"][0]["http"]["paths"][0]["backend"]["service"][ "name" diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index 484636bc..4be18dc6 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -142,6 +142,9 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-options: 'false' + ingress-owner: unit-test-cluster-ray name: ray-dashboard-unit-test-cluster-ray namespace: ns spec: diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 70b68e97..72c73083 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -175,6 +175,9 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-options: 'false' + ingress-owner: prio-test-cluster name: ray-dashboard-prio-test-cluster namespace: ns spec: diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 920459c4..8b0677cf 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -172,6 +172,9 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-options: 'false' + ingress-owner: unit-test-cluster name: ray-dashboard-unit-test-cluster namespace: ns spec: diff --git a/tests/unit_test.py b/tests/unit_test.py index 7ad0d08d..ab9e3dcd 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -39,6 +39,7 @@ _app_wrapper_status, _ray_cluster_status, _get_ingress_domain, + get_ingress_domain_from_client, ) from codeflare_sdk.cluster.auth import ( TokenAuthentication, @@ -397,7 +398,7 @@ def arg_check_apply_effect(group, version, namespace, plural, body, *args): with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: yamls = yaml.load_all(f, Loader=yaml.FullLoader) for resource in yamls: - if resource["kind"] == "Route": + if resource["kind"] == "Ingress": assert body == resource else: assert 1 == 0 @@ -414,8 +415,8 @@ def arg_check_del_effect(group, version, namespace, plural, name, *args): assert group == "ray.io" assert version == "v1alpha1" assert name == "unit-test-cluster-ray" - elif plural == "routes": - assert group == "route.openshift.io" + elif plural == "ingresses": + assert group == "networking.k8s.io" assert version == "v1" assert name == "ray-dashboard-unit-test-cluster-ray" @@ -616,19 +617,27 @@ def ray_addr(self, *args): return self._address -def ingress_retrieval(port, annotations=None): +def ingress_retrieval(port, annotations=None, cluster_name="unit-test-cluster"): + labels = {"ingress-owner": cluster_name, "ingress-options": "false"} if port == 10001: serviceName = "client" else: serviceName = "dashboard" mock_ingress = client.V1Ingress( metadata=client.V1ObjectMeta( - name=f"ray-{serviceName}-unit-test-cluster", annotations=annotations + name=f"ray-{serviceName}-{cluster_name}", + annotations=annotations, + labels=labels, + owner_references=[ + client.V1OwnerReference( + api_version="v1", kind="Ingress", name=cluster_name, uid="unique-id" + ) + ], ), spec=client.V1IngressSpec( rules=[ client.V1IngressRule( - host=f"ray-{serviceName}-unit-test-cluster-ns.apps.cluster.awsroute.org", + host=f"ray-{serviceName}-{cluster_name}-ns.apps.cluster.awsroute.org", http=client.V1HTTPIngressRuleValue( paths=[ client.V1HTTPIngressPath( @@ -1148,6 +1157,11 @@ def get_ray_obj(group, version, namespace, plural, cls=None): return api_obj +def get_named_aw(group, version, namespace, plural, name): + aws = get_aw_obj("workload.codeflare.dev", "v1beta1", "ns", "appwrappers") + return aws["items"][0] + + def get_aw_obj(group, version, namespace, plural): api_obj1 = { "items": [ @@ -1403,21 +1417,37 @@ def get_aw_obj(group, version, namespace, plural): { "allocated": 0, "generictemplate": { - "apiVersion": "route.openshift.io/v1", - "kind": "Route", + "apiVersion": "networking.k8s.io/v1", + "kind": "Ingress", "metadata": { "labels": { - "odh-ray-cluster-service": "quicktest-head-svc" + "ingress-owner": "appwrapper-name", + "ingress-options": "false", }, "name": "ray-dashboard-quicktest", "namespace": "default", }, "spec": { - "port": {"targetPort": "dashboard"}, - "to": { - "kind": "Service", - "name": "quicktest-head-svc", - }, + "ingressClassName": "nginx", + "rules": [ + { + "http": { + "paths": { + "backend": { + "service": { + "name": "quicktest-head-svc", + "port": { + "number": 8265 + }, + }, + }, + "pathType": "Prefix", + "path": "/", + }, + }, + "host": "quicktest.awsroute.com", + } + ], }, }, "metadata": {}, @@ -1788,10 +1818,14 @@ def test_get_cluster(mocker): side_effect=get_ray_obj, ) mocker.patch( - "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", - return_value=True, + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=get_named_aw, + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(port=8265, cluster_name="quicktest"), ) - cluster = get_cluster(cluster_name="quicktest") + cluster = get_cluster("quicktest") cluster_config = cluster.config assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" assert ( @@ -1809,6 +1843,48 @@ def test_get_cluster(mocker): assert cluster_config.num_workers == 1 +def test_get_ingress_domain_from_client(mocker): + mocker.patch("kubernetes.config.load_kube_config") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.NetworkingV1Api.read_namespaced_ingress", + return_value=ingress_retrieval( + port=8265, cluster_name="unit-test-cluster" + ).items[0], + ) + + ingress_domain = get_ingress_domain_from_client("unit-test-cluster", "ns") + assert ingress_domain == "apps.cluster.awsroute.org" + + mocker.patch( + "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=route_retrieval, + ) + ingress_domain = get_ingress_domain_from_client("unit-test-cluster", "ns") + assert ingress_domain == "apps.cluster.awsroute.org" + + +def route_retrieval(group, version, namespace, plural, name): + assert group == "route.openshift.io" + assert version == "v1" + assert namespace == "ns" + assert plural == "routes" + assert name == "ray-dashboard-unit-test-cluster" + return { + "items": [ + { + "metadata": {"name": "ray-dashboard-unit-test-cluster"}, + "spec": { + "host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + }, + } + ] + } + + def test_list_clusters(mocker, capsys): mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch(