From fe0b063bde6c86ee270e8d12528a420f684ddf03 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 13 Dec 2023 10:18:07 +0000 Subject: [PATCH 1/3] Fixed get_cluster() method --- src/codeflare_sdk/cluster/cluster.py | 27 +++++++- .../templates/base-template.yaml | 2 + src/codeflare_sdk/utils/generate_yaml.py | 4 +- tests/test-case-no-mcad.yamls | 2 + tests/test-case-prio.yaml | 2 + tests/test-case.yaml | 2 + tests/unit_test.py | 62 ++++++++++++++----- 7 files changed, 81 insertions(+), 20 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 11cf5fdb..4f24663d 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -492,7 +492,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True): + def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -532,6 +532,7 @@ def from_k8_cluster_object(rc, mcad=True): ]["image"], local_interactive=local_interactive, mcad=mcad, + ingress_domain=ingress_domain, ) return Cluster(cluster_config) @@ -685,7 +686,29 @@ def get_cluster(cluster_name: str, namespace: str = "default"): for rc in rcs["items"]: if rc["metadata"]["name"] == cluster_name: mcad = _check_aw_exists(cluster_name, namespace) - return Cluster.from_k8_cluster_object(rc, mcad=mcad) + + try: + config_check() + api_instance = client.NetworkingV1Api(api_config_handler()) + ingresses = api_instance.list_namespaced_ingress(namespace) + if mcad == True: + for ingress in ingresses.items: + # Search for ingress with AppWrapper name as the owner + if cluster_name == ingress.metadata.owner_references[0].name: + ingress_host = ingress.spec.rules[0].host + else: + for ingress in ingresses.items: + # Search for the ingress with the ingress-owner label + if ingress.metadata.labels["ingress-owner"] == cluster_name: + ingress_host = ingress.spec.rules[0].host + except Exception as e: + return _kube_api_error_handling(e) + + # We gather the ingress domain from the host + ingress_domain = ingress_host.split(".", 1)[1] + return Cluster.from_k8_cluster_object( + rc, mcad=mcad, ingress_domain=ingress_domain + ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" ) diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index 8e6fd0e9..0e6ef09c 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -293,6 +293,8 @@ spec: namespace: default annotations: annotations-example:annotations-example + labels: + ingress-owner: appwrapper-name spec: ingressClassName: nginx rules: diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 95c17cc2..af1b9ece 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -129,9 +129,10 @@ def update_dashboard_ingress( raise ValueError( f"Error: 'port' is not of type int for ingress item at index {index}" ) - if ingress_option["port"] == 8265: + if ingress_option is not None: metadata["name"] = ingress_option["ingressName"] metadata["namespace"] = namespace + metadata["labels"]["ingress-owner"] = cluster_name if "annotations" not in ingress_option.keys(): del metadata["annotations"] else: @@ -161,6 +162,7 @@ def update_dashboard_ingress( else: spec["ingressClassName"] = "nginx" metadata["name"] = gen_dashboard_ingress_name(cluster_name) + metadata["labels"]["ingress-owner"] = cluster_name metadata["namespace"] = namespace spec["rules"][0]["http"]["paths"][0]["backend"]["service"][ "name" diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index 484636bc..77f90f89 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -142,6 +142,8 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-owner: unit-test-cluster-ray name: ray-dashboard-unit-test-cluster-ray namespace: ns spec: diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 70b68e97..b6d820ae 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -175,6 +175,8 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-owner: prio-test-cluster name: ray-dashboard-prio-test-cluster namespace: ns spec: diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 920459c4..e96fa89e 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -172,6 +172,8 @@ spec: apiVersion: networking.k8s.io/v1 kind: Ingress metadata: + labels: + ingress-owner: unit-test-cluster name: ray-dashboard-unit-test-cluster namespace: ns spec: diff --git a/tests/unit_test.py b/tests/unit_test.py index 7ad0d08d..b217b281 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -397,7 +397,7 @@ def arg_check_apply_effect(group, version, namespace, plural, body, *args): with open(f"{aw_dir}unit-test-cluster-ray.yaml") as f: yamls = yaml.load_all(f, Loader=yaml.FullLoader) for resource in yamls: - if resource["kind"] == "Route": + if resource["kind"] == "Ingress": assert body == resource else: assert 1 == 0 @@ -414,8 +414,8 @@ def arg_check_del_effect(group, version, namespace, plural, name, *args): assert group == "ray.io" assert version == "v1alpha1" assert name == "unit-test-cluster-ray" - elif plural == "routes": - assert group == "route.openshift.io" + elif plural == "ingresses": + assert group == "networking.k8s.io" assert version == "v1" assert name == "ray-dashboard-unit-test-cluster-ray" @@ -623,7 +623,13 @@ def ingress_retrieval(port, annotations=None): serviceName = "dashboard" mock_ingress = client.V1Ingress( metadata=client.V1ObjectMeta( - name=f"ray-{serviceName}-unit-test-cluster", annotations=annotations + name=f"ray-{serviceName}-unit-test-cluster", + annotations=annotations, + owner_references=[ + client.V1OwnerReference( + api_version="v1", kind="Ingress", name="quicktest", uid="unique-id" + ) + ], ), spec=client.V1IngressSpec( rules=[ @@ -1148,6 +1154,11 @@ def get_ray_obj(group, version, namespace, plural, cls=None): return api_obj +def get_named_aw(group, version, namespace, plural, name): + aws = get_aw_obj("workload.codeflare.dev", "v1beta1", "ns", "appwrappers") + return aws["items"][0] + + def get_aw_obj(group, version, namespace, plural): api_obj1 = { "items": [ @@ -1403,21 +1414,34 @@ def get_aw_obj(group, version, namespace, plural): { "allocated": 0, "generictemplate": { - "apiVersion": "route.openshift.io/v1", - "kind": "Route", + "apiVersion": "networking.k8s.io/v1", + "kind": "Ingress", "metadata": { - "labels": { - "odh-ray-cluster-service": "quicktest-head-svc" - }, + "labels": {"ingress-owner": "appwrapper-name"}, "name": "ray-dashboard-quicktest", "namespace": "default", }, "spec": { - "port": {"targetPort": "dashboard"}, - "to": { - "kind": "Service", - "name": "quicktest-head-svc", - }, + "ingressClassName": "nginx", + "rules": [ + { + "http": { + "paths": { + "backend": { + "service": { + "name": "quicktest-head-svc", + "port": { + "number": 8265 + }, + }, + }, + "pathType": "Prefix", + "path": "/", + }, + }, + "host": "quicktest.awsroute.com", + } + ], }, }, "metadata": {}, @@ -1788,10 +1812,14 @@ def test_get_cluster(mocker): side_effect=get_ray_obj, ) mocker.patch( - "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", - return_value=True, + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=get_named_aw, + ) + mocker.patch( + "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", + return_value=ingress_retrieval(port=8265), ) - cluster = get_cluster(cluster_name="quicktest") + cluster = get_cluster("quicktest") cluster_config = cluster.config assert cluster_config.name == "quicktest" and cluster_config.namespace == "ns" assert ( From 418aec59c9ff0825e58cee6a951b5d4935a88eeb Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 21 Dec 2023 11:04:06 +0000 Subject: [PATCH 2/3] Added logic for getting clusters with routes --- src/codeflare_sdk/cluster/cluster.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 4f24663d..22c3b6a4 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -691,6 +691,7 @@ def get_cluster(cluster_name: str, namespace: str = "default"): config_check() api_instance = client.NetworkingV1Api(api_config_handler()) ingresses = api_instance.list_namespaced_ingress(namespace) + ingress_host = None if mcad == True: for ingress in ingresses.items: # Search for ingress with AppWrapper name as the owner @@ -705,7 +706,11 @@ def get_cluster(cluster_name: str, namespace: str = "default"): return _kube_api_error_handling(e) # We gather the ingress domain from the host - ingress_domain = ingress_host.split(".", 1)[1] + if ingress_host is not None: + ingress_domain = ingress_host.split(".", 1)[1] + else: + ingress_domain = None + return Cluster.from_k8_cluster_object( rc, mcad=mcad, ingress_domain=ingress_domain ) From 366aa88fd13815e9562438d54098637652668ece Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 8 Jan 2024 14:48:17 +0000 Subject: [PATCH 3/3] Fixed get_cluster for ingress_options --- src/codeflare_sdk/cluster/cluster.py | 93 ++++++++++++++++--- .../templates/base-template.yaml | 1 + src/codeflare_sdk/utils/generate_yaml.py | 26 +++++- tests/test-case-no-mcad.yamls | 1 + tests/test-case-prio.yaml | 1 + tests/test-case.yaml | 1 + tests/unit_test.py | 60 ++++++++++-- 7 files changed, 157 insertions(+), 26 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 22c3b6a4..4a5f69fb 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -492,7 +492,7 @@ def torchx_config( to_return["requirements"] = requirements return to_return - def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): + def from_k8_cluster_object(rc, mcad=True, ingress_domain=None, ingress_options={}): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -502,6 +502,10 @@ def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): "volumeMounts" in rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0] ) + if local_interactive: + ingress_domain = get_ingress_domain_from_client( + rc["metadata"]["name"], rc["metadata"]["namespace"] + ) cluster_config = ClusterConfiguration( name=rc["metadata"]["name"], namespace=rc["metadata"]["namespace"], @@ -533,6 +537,7 @@ def from_k8_cluster_object(rc, mcad=True, ingress_domain=None): local_interactive=local_interactive, mcad=mcad, ingress_domain=ingress_domain, + ingress_options=ingress_options, ) return Cluster(cluster_config) @@ -692,27 +697,55 @@ def get_cluster(cluster_name: str, namespace: str = "default"): api_instance = client.NetworkingV1Api(api_config_handler()) ingresses = api_instance.list_namespaced_ingress(namespace) ingress_host = None - if mcad == True: - for ingress in ingresses.items: - # Search for ingress with AppWrapper name as the owner - if cluster_name == ingress.metadata.owner_references[0].name: - ingress_host = ingress.spec.rules[0].host - else: - for ingress in ingresses.items: - # Search for the ingress with the ingress-owner label - if ingress.metadata.labels["ingress-owner"] == cluster_name: - ingress_host = ingress.spec.rules[0].host + ingress_options = {} + for ingress in ingresses.items: + # Search for ingress with AppWrapper name as the owner + if ( + "ingress-owner" in ingress.metadata.labels + and ingress.metadata.labels["ingress-owner"] == cluster_name + ): + ingress_host = ingress.spec.rules[0].host + if ( + "ingress-options" in ingress.metadata.labels + and ingress.metadata.labels["ingress-options"] == "true" + ): + ingress_name = ingress.metadata.name + port = ( + ingress.spec.rules[0] + .http.paths[0] + .backend.service.port.number + ) + annotations = ingress.metadata.annotations + path = ingress.spec.rules[0].http.paths[0].path + ingress_class_name = ingress.spec.ingress_class_name + path_type = ingress.spec.rules[0].http.paths[0].path_type + + ingress_options = { + "ingresses": [ + { + "ingressName": ingress_name, + "port": port, + "annotations": annotations, + "ingressClassName": ingress_class_name, + "pathType": path_type, + "path": path, + "host": ingress_host, + } + ] + } except Exception as e: return _kube_api_error_handling(e) - # We gather the ingress domain from the host - if ingress_host is not None: + if ingress_host is not None and ingress_options == {}: ingress_domain = ingress_host.split(".", 1)[1] else: ingress_domain = None return Cluster.from_k8_cluster_object( - rc, mcad=mcad, ingress_domain=ingress_domain + rc, + mcad=mcad, + ingress_domain=ingress_domain, + ingress_options=ingress_options, ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" @@ -762,7 +795,10 @@ def _get_ingress_domain(self): # pragma: no cover return _kube_api_error_handling(e) for route in routes["items"]: - if route["spec"]["port"]["targetPort"] == "client": + if ( + route["spec"]["port"]["targetPort"] == "client" + or route["spec"]["port"]["targetPort"] == 10001 + ): domain = route["spec"]["host"] else: try: @@ -949,3 +985,30 @@ def _copy_to_ray(cluster: Cluster) -> RayCluster: if ray.status == CodeFlareClusterStatus.READY: ray.status = RayClusterStatus.READY return ray + + +def get_ingress_domain_from_client(cluster_name: str, namespace: str = "default"): + if is_openshift_cluster(): + try: + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) + route = api_instance.get_namespaced_custom_object( + group="route.openshift.io", + version="v1", + namespace=namespace, + plural="routes", + name=f"rayclient-{cluster_name}", + ) + return route["spec"]["host"].split(".", 1)[1] + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) + else: + try: + config_check() + api_instance = client.NetworkingV1Api(api_config_handler()) + ingress = api_instance.read_namespaced_ingress( + f"rayclient-{cluster_name}", namespace + ) + return ingress.spec.rules[0].host.split(".", 1)[1] + except Exception as e: # pragma no cover + return _kube_api_error_handling(e) diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index 0e6ef09c..1e99040c 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -294,6 +294,7 @@ spec: annotations: annotations-example:annotations-example labels: + ingress-options: "false" ingress-owner: appwrapper-name spec: ingressClassName: nginx diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index af1b9ece..95c962d1 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -133,25 +133,41 @@ def update_dashboard_ingress( metadata["name"] = ingress_option["ingressName"] metadata["namespace"] = namespace metadata["labels"]["ingress-owner"] = cluster_name - if "annotations" not in ingress_option.keys(): + metadata["labels"]["ingress-options"] = "true" + if ( + "annotations" not in ingress_option.keys() + or ingress_option["annotations"] is None + ): del metadata["annotations"] else: metadata["annotations"] = ingress_option["annotations"] - if "path" not in ingress_option.keys(): + if ( + "path" not in ingress_option.keys() + or ingress_option["path"] is None + ): del spec["rules"][0]["http"]["paths"][0]["path"] else: spec["rules"][0]["http"]["paths"][0]["path"] = ingress_option[ "path" ] - if "pathType" not in ingress_option.keys(): + if ( + "pathType" not in ingress_option.keys() + or ingress_option["pathType"] is None + ): spec["rules"][0]["http"]["paths"][0][ "pathType" ] = "ImplementationSpecific" - if "host" not in ingress_option.keys(): + if ( + "host" not in ingress_option.keys() + or ingress_option["host"] is None + ): del spec["rules"][0]["host"] else: spec["rules"][0]["host"] = ingress_option["host"] - if "ingressClassName" not in ingress_option.keys(): + if ( + "ingressClassName" not in ingress_option.keys() + or ingress_option["ingressClassName"] is None + ): del spec["ingressClassName"] else: spec["ingressClassName"] = ingress_option["ingressClassName"] diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index 77f90f89..4be18dc6 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -143,6 +143,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: labels: + ingress-options: 'false' ingress-owner: unit-test-cluster-ray name: ray-dashboard-unit-test-cluster-ray namespace: ns diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index b6d820ae..72c73083 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -176,6 +176,7 @@ spec: kind: Ingress metadata: labels: + ingress-options: 'false' ingress-owner: prio-test-cluster name: ray-dashboard-prio-test-cluster namespace: ns diff --git a/tests/test-case.yaml b/tests/test-case.yaml index e96fa89e..8b0677cf 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -173,6 +173,7 @@ spec: kind: Ingress metadata: labels: + ingress-options: 'false' ingress-owner: unit-test-cluster name: ray-dashboard-unit-test-cluster namespace: ns diff --git a/tests/unit_test.py b/tests/unit_test.py index b217b281..ab9e3dcd 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -39,6 +39,7 @@ _app_wrapper_status, _ray_cluster_status, _get_ingress_domain, + get_ingress_domain_from_client, ) from codeflare_sdk.cluster.auth import ( TokenAuthentication, @@ -616,25 +617,27 @@ def ray_addr(self, *args): return self._address -def ingress_retrieval(port, annotations=None): +def ingress_retrieval(port, annotations=None, cluster_name="unit-test-cluster"): + labels = {"ingress-owner": cluster_name, "ingress-options": "false"} if port == 10001: serviceName = "client" else: serviceName = "dashboard" mock_ingress = client.V1Ingress( metadata=client.V1ObjectMeta( - name=f"ray-{serviceName}-unit-test-cluster", + name=f"ray-{serviceName}-{cluster_name}", annotations=annotations, + labels=labels, owner_references=[ client.V1OwnerReference( - api_version="v1", kind="Ingress", name="quicktest", uid="unique-id" + api_version="v1", kind="Ingress", name=cluster_name, uid="unique-id" ) ], ), spec=client.V1IngressSpec( rules=[ client.V1IngressRule( - host=f"ray-{serviceName}-unit-test-cluster-ns.apps.cluster.awsroute.org", + host=f"ray-{serviceName}-{cluster_name}-ns.apps.cluster.awsroute.org", http=client.V1HTTPIngressRuleValue( paths=[ client.V1HTTPIngressPath( @@ -1417,7 +1420,10 @@ def get_aw_obj(group, version, namespace, plural): "apiVersion": "networking.k8s.io/v1", "kind": "Ingress", "metadata": { - "labels": {"ingress-owner": "appwrapper-name"}, + "labels": { + "ingress-owner": "appwrapper-name", + "ingress-options": "false", + }, "name": "ray-dashboard-quicktest", "namespace": "default", }, @@ -1817,7 +1823,7 @@ def test_get_cluster(mocker): ) mocker.patch( "kubernetes.client.NetworkingV1Api.list_namespaced_ingress", - return_value=ingress_retrieval(port=8265), + return_value=ingress_retrieval(port=8265, cluster_name="quicktest"), ) cluster = get_cluster("quicktest") cluster_config = cluster.config @@ -1837,6 +1843,48 @@ def test_get_cluster(mocker): assert cluster_config.num_workers == 1 +def test_get_ingress_domain_from_client(mocker): + mocker.patch("kubernetes.config.load_kube_config") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") + mocker.patch( + "kubernetes.client.NetworkingV1Api.read_namespaced_ingress", + return_value=ingress_retrieval( + port=8265, cluster_name="unit-test-cluster" + ).items[0], + ) + + ingress_domain = get_ingress_domain_from_client("unit-test-cluster", "ns") + assert ingress_domain == "apps.cluster.awsroute.org" + + mocker.patch( + "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True + ) + mocker.patch( + "kubernetes.client.CustomObjectsApi.get_namespaced_custom_object", + side_effect=route_retrieval, + ) + ingress_domain = get_ingress_domain_from_client("unit-test-cluster", "ns") + assert ingress_domain == "apps.cluster.awsroute.org" + + +def route_retrieval(group, version, namespace, plural, name): + assert group == "route.openshift.io" + assert version == "v1" + assert namespace == "ns" + assert plural == "routes" + assert name == "ray-dashboard-unit-test-cluster" + return { + "items": [ + { + "metadata": {"name": "ray-dashboard-unit-test-cluster"}, + "spec": { + "host": "ray-dashboard-unit-test-cluster-ns.apps.cluster.awsroute.org" + }, + } + ] + } + + def test_list_clusters(mocker, capsys): mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") mocker.patch(