From d009a49d837f70b34515a71bf663eafe876feb9e Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Thu, 16 Oct 2025 11:48:00 +0100 Subject: [PATCH] no-jira: Vendor kuberay python client --- .github/workflows/coverage-badge.yaml | 2 +- .github/workflows/unit-tests.yml | 2 +- poetry.lock | 22 +- pyproject.toml | 12 +- src/codeflare_sdk/ray/rayjobs/rayjob.py | 4 +- src/codeflare_sdk/vendored/.gitignore | 35 ++ src/codeflare_sdk/vendored/LICENSE | 272 +++++++++ src/codeflare_sdk/vendored/README.md | 158 +++++ src/codeflare_sdk/vendored/__init__.py | 14 + .../vendored/examples/complete-example.py | 144 +++++ .../vendored/examples/use-builder.py | 79 +++ .../vendored/examples/use-director.py | 98 +++ .../examples/use-raw-config_map_with-api.py | 213 +++++++ .../vendored/examples/use-raw-with-api.py | 195 ++++++ .../vendored/examples/use-utils.py | 117 ++++ src/codeflare_sdk/vendored/poetry.lock | 439 ++++++++++++++ src/codeflare_sdk/vendored/pyproject.toml | 26 + .../vendored/python_client/__init__.py | 1 + .../vendored/python_client/constants.py | 13 + .../python_client/kuberay_cluster_api.py | 311 ++++++++++ .../vendored/python_client/kuberay_job_api.py | 381 ++++++++++++ .../vendored/python_client/utils/__init__.py | 0 .../utils/kuberay_cluster_builder.py | 326 ++++++++++ .../utils/kuberay_cluster_utils.py | 473 +++++++++++++++ .../vendored/python_client_test/README.md | 29 + .../vendored/python_client_test/helpers.py | 135 +++++ .../python_client_test/test_cluster_api.py | 345 +++++++++++ .../python_client_test/test_director.py | 121 ++++ .../python_client_test/test_job_api.py | 567 ++++++++++++++++++ .../vendored/python_client_test/test_utils.py | 352 +++++++++++ .../rayjob/rayjob_existing_cluster_test.py | 2 +- .../rayjob/rayjob_lifecycled_cluster_test.py | 4 +- 32 files changed, 4862 insertions(+), 30 deletions(-) create mode 100644 src/codeflare_sdk/vendored/.gitignore create mode 100644 src/codeflare_sdk/vendored/LICENSE create mode 100644 src/codeflare_sdk/vendored/README.md create mode 100644 src/codeflare_sdk/vendored/__init__.py create mode 100644 src/codeflare_sdk/vendored/examples/complete-example.py create mode 100644 src/codeflare_sdk/vendored/examples/use-builder.py create mode 100644 src/codeflare_sdk/vendored/examples/use-director.py create mode 100644 src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py create mode 100644 src/codeflare_sdk/vendored/examples/use-raw-with-api.py create mode 100644 src/codeflare_sdk/vendored/examples/use-utils.py create mode 100644 src/codeflare_sdk/vendored/poetry.lock create mode 100755 src/codeflare_sdk/vendored/pyproject.toml create mode 100644 src/codeflare_sdk/vendored/python_client/__init__.py create mode 100644 src/codeflare_sdk/vendored/python_client/constants.py create mode 100644 src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py create mode 100644 src/codeflare_sdk/vendored/python_client/kuberay_job_api.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/__init__.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py create mode 100644 src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/README.md create mode 100644 src/codeflare_sdk/vendored/python_client_test/helpers.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_director.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_job_api.py create mode 100644 src/codeflare_sdk/vendored/python_client_test/test_utils.py diff --git a/.github/workflows/coverage-badge.yaml b/.github/workflows/coverage-badge.yaml index 2a92010b..d793a699 100644 --- a/.github/workflows/coverage-badge.yaml +++ b/.github/workflows/coverage-badge.yaml @@ -29,7 +29,7 @@ jobs: poetry install --with test - name: Generate coverage report run: | - coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py" -m pytest + coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py,src/codeflare_sdk/vendored/**" -m pytest - name: Coverage Badge uses: tj-actions/coverage-badge-py@v2 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index e38e6973..e276ee3e 100755 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -26,7 +26,7 @@ jobs: poetry install --with test - name: Test with pytest and check coverage run: | - coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py" -m pytest + coverage run --omit="src/**/test_*.py,src/codeflare_sdk/common/utils/unit_test_support.py,src/codeflare_sdk/vendored/**" -m pytest coverage=$(coverage report -m | tail -1 | tail -c 4 | head -c 2) if (( $coverage < 90 )); then echo "Coverage failed at ${coverage}%"; exit 1; else echo "Coverage passed, ${coverage}%"; fi - name: Upload to Codecov diff --git a/poetry.lock b/poetry.lock index 88ceb3cb..0e2d4eac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3416,26 +3416,6 @@ files = [ [package.dependencies] pytest = ">=7.0.0" -[[package]] -name = "python-client" -version = "0.0.0-dev" -description = "Python Client for Kuberay" -optional = false -python-versions = "^3.11" -groups = ["main"] -files = [] -develop = false - -[package.dependencies] -kubernetes = ">=25.0.0" - -[package.source] -type = "git" -url = "https://github.com/ray-project/kuberay.git" -reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" -resolved_reference = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" -subdirectory = "clients/python-client" - [[package]] name = "python-dateutil" version = "3.9.0" @@ -4823,4 +4803,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "9a052e3a816450844fa2cf3427e4660715977aca3b14561d1e9991899624b7c2" +content-hash = "1a3968dbde8f4356b4d93b17f5bcf75f2bc38587553273742de05d9f0f6ee87c" diff --git a/pyproject.toml b/pyproject.toml index cda1337f..0a9d3228 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,15 @@ description = "Python SDK for codeflare client" license = "Apache-2.0" +# Exclude vendored tests, examples, and build files from the package +exclude = [ + "src/codeflare_sdk/vendored/python_client_test", + "src/codeflare_sdk/vendored/examples", + "src/codeflare_sdk/vendored/pyproject.toml", + "src/codeflare_sdk/vendored/poetry.lock", + "src/codeflare_sdk/vendored/README.md" +] + authors = [ "Michael Clifford ", "Mustafa Eyceoz ", @@ -33,7 +42,6 @@ cryptography = "43.0.3" executing = "1.2.0" pydantic = ">= 2.10.6" ipywidgets = "8.1.2" -python-client = { git = "https://github.com/ray-project/kuberay.git", subdirectory = "clients/python-client", rev = "b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570" } [[tool.poetry.source]] name = "pypi" @@ -73,7 +81,7 @@ markers = [ "openshift", "nvidia_gpu" ] -addopts = "--timeout=900" +addopts = "--timeout=900 --ignore=src/codeflare_sdk/vendored" testpaths = ["src/codeflare_sdk"] collect_ignore = ["src/codeflare_sdk/common/utils/unit_test_support.py"] diff --git a/src/codeflare_sdk/ray/rayjobs/rayjob.py b/src/codeflare_sdk/ray/rayjobs/rayjob.py index 65478087..c06c596e 100644 --- a/src/codeflare_sdk/ray/rayjobs/rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/rayjob.py @@ -27,8 +27,8 @@ from codeflare_sdk.common.utils.constants import MOUNT_PATH from codeflare_sdk.common.utils.utils import get_ray_image_for_python_version -from python_client.kuberay_job_api import RayjobApi -from python_client.kuberay_cluster_api import RayClusterApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_cluster_api import RayClusterApi from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from codeflare_sdk.ray.rayjobs.runtime_env import ( create_file_secret, diff --git a/src/codeflare_sdk/vendored/.gitignore b/src/codeflare_sdk/vendored/.gitignore new file mode 100644 index 00000000..d6d73f9c --- /dev/null +++ b/src/codeflare_sdk/vendored/.gitignore @@ -0,0 +1,35 @@ + + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + +# Distribution / packaging +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +htmlcov +.coverage +.cache +nosetests.xml +coverage.xml diff --git a/src/codeflare_sdk/vendored/LICENSE b/src/codeflare_sdk/vendored/LICENSE new file mode 100644 index 00000000..1dcfa84a --- /dev/null +++ b/src/codeflare_sdk/vendored/LICENSE @@ -0,0 +1,272 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +Code in python/ray/rllib/{evolution_strategies, dqn} adapted from +https://github.com/openai (MIT License) + +Copyright (c) 2016 OpenAI (http://openai.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +Code in python/ray/rllib/impala/vtrace.py from +https://github.com/deepmind/scalable_agent + +Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +Code in python/ray/rllib/ars is adapted from https://github.com/modestyachts/ARS + +Copyright (c) 2018, ARS contributors (Horia Mania, Aurelia Guy, Benjamin Recht) +All rights reserved. + +Redistribution and use of ARS in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/codeflare_sdk/vendored/README.md b/src/codeflare_sdk/vendored/README.md new file mode 100644 index 00000000..2189cb5b --- /dev/null +++ b/src/codeflare_sdk/vendored/README.md @@ -0,0 +1,158 @@ +# Overview + +This python client library provide APIs to handle `raycluster` and `rayjobs` from your python application. + +## Prerequisites + +It is assumed that your `k8s cluster in already setup`. Your kubectl configuration is expected to be +in `~/.kube/config` if you are running the code directly from you terminal. + +It is also expected that the `kuberay operator` is installed. +[Installation instructions are here][quick-start] + +## Usage + +There are multiple levels of using the API with increasing levels of complexity. + +### director + +This is the easiest form of using the API to create rayclusters with predefined cluster sizes + +```python +my_kuberay_api = kuberay_cluster_api.RayClusterApi() + +my_cluster_director = kuberay_cluster_builder.Director() + +cluster0 = my_cluster_director.build_small_cluster(name="new-cluster0") + +if cluster0: + my_kuberay_api.create_ray_cluster(body=cluster0) +``` + +the director create the cluster definition, and the `cluster_api` acts as the HTTP client sending +the create (post) request to the k8s api-server + +### cluster_builder + +The builder allows you to build the cluster piece by piece. You can customize the cluster more. + +```python +cluster1 = ( + my_cluster_builder.build_meta(name="new-cluster1") + .build_head() + .build_worker(group_name="workers", replicas=3) + .get_cluster() + ) + +if not my_cluster_builder.succeeded: + return + +my_kuberay_api.create_ray_cluster(body=cluster1) +``` + +### cluster_utils + +`cluster_utils` gives you even more options to modify your cluster definition, add/remove worker +groups, change replicas in a worker group, duplicate a worker group, etc. + +```python +my_Cluster_utils = kuberay_cluster_utils.ClusterUtils() + +cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster2, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 +) + +if succeeded: + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) +``` + +### cluster_api + +The `cluster_api` is the one you always use to implement your cluster change in k8s. You can +use it with raw `JSON` if you wish. The `director/cluster_builder/cluster_utils` are just tools to +shield the user from using raw `JSON`. + +### job_api + +Finally, the `job_api` can be used to submit RayJobs to a pre-existing RayCluster. + +#### Submitting to Existing Cluster + +```python +from codeflare_sdk.vendored.python_client import kuberay_job_api, kuberay_cluster_api, constants + +job_body = { + "apiVersion": "ray.io/v1", + "kind": "RayJob", + "metadata": {...}, + "spec": { + "clusterSelector": { + "ray.io/cluster": "ray-cluster-name", + }, + "entrypoint": 'python -c training_script.py', + "submissionMode": "K8sJobMode", + }, +} + +kuberay_job_api.submit_job( + job=job_body, + k8s_namespace=namespace, +) +``` + +## Code Organization + +```text +clients/ +└── python-client + ├── examples + │ ├── complete-example.py + │ ├── use-builder.py + │ ├── use-director.py + │ ├── use-raw-config_map_with-api.py + │ ├── use-raw-with-api.py + │ └── use-utils.py + ├── LICENSE + ├── poetry.lock + ├── pyproject.toml + ├── python_client + │ ├── __init__.py + │ ├── constants.py + │ ├── kuberay_cluster_api.py + │ ├── kuberay_job_api.py + │ └── utils + │ ├── __init__.py + │ ├── kuberay_cluster_builder.py + │ └── kuberay_cluster_utils.py + ├── python_client_test + │ ├── README.md + │ ├── test_cluster_api.py + │ ├── test_director.py + │ ├── test_job_api.py + │ └── test_utils.py + └── README.md +``` + +## For developers + +make sure you have installed setuptool + +`pip install -U pip setuptools` + +### run the pip command + +from the directory `path/to/kuberay/clients/python-client` + +`pip install -e .` + +### to uninstall the module run + +`pip uninstall python-client` + +### For testing run + + `python -m unittest discover 'path/to/kuberay/clients/python-client/python_client_test/'` + +[quick-start]: https://github.com/ray-project/kuberay#quick-start diff --git a/src/codeflare_sdk/vendored/__init__.py b/src/codeflare_sdk/vendored/__init__.py new file mode 100644 index 00000000..93f1b14f --- /dev/null +++ b/src/codeflare_sdk/vendored/__init__.py @@ -0,0 +1,14 @@ +""" +Vendored third-party dependencies. + +This directory contains code from external projects that are bundled +with codeflare-sdk to avoid PyPI publishing restrictions. + +Contents: +- python_client: KubeRay Python client from ray-project/kuberay + Source: https://github.com/ray-project/kuberay @ b2fd91b58c2bbe22f9b4f730c5a8f3180c05e570 + License: Apache 2.0 (see LICENSE file) + + Vendored because the python-client is not published to PyPI and PyPI + does not allow direct git dependencies. +""" diff --git a/src/codeflare_sdk/vendored/examples/complete-example.py b/src/codeflare_sdk/vendored/examples/complete-example.py new file mode 100644 index 00000000..8cfdfdcc --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/complete-example.py @@ -0,0 +1,144 @@ +import sys +import os +from os import path + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() # this is the main api object + + my_cluster_director = ( + kuberay_cluster_builder.Director() + ) # this is the director object, to create a cluster with a single line of code + + my_cluster_builder = ( + kuberay_cluster_builder.ClusterBuilder() + ) # this is the builder object, to create a cluster with a more granular control + + my_Cluster_utils = ( + kuberay_cluster_utils.ClusterUtils() + ) # this is the utils object, to perform operations on a cluster + + cluster0 = my_cluster_director.build_small_cluster( + name="new-cluster0", labels={"demo-cluster": "yes"} + ) # this is the cluster object, it is a dict + + if cluster0: + my_kuberay_api.create_ray_cluster( + body=cluster0 + ) # this is the api call to create the cluster0 in k8s + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + my_kuberay_api.create_ray_cluster( + body=cluster1 + ) # this is the api call to create the cluster1 in k8s + + cluster2 = ( + my_cluster_builder.build_meta( + name="new-cluster2", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + my_kuberay_api.create_ray_cluster( + body=cluster2 + ) # this is the api call to create the cluster2 in k8s + + # modifying the number of replicas in the workergroup + cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster2, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 + ) + + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster2 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.duplicate_worker_group( + cluster1, group_name="workers", new_group_name="new-workers" + ) # this is the call to duplicate the worker group in cluster1 + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster1 in k8s + + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) # this is the api call to list all the clusters in k8s + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # this is the api call to delete the cluster in k8s + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-builder.py b/src/codeflare_sdk/vendored/examples/use-builder.py new file mode 100644 index 00000000..5309fc00 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-builder.py @@ -0,0 +1,79 @@ +import sys +import os +from os import path +import json + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() + + my_cluster_builder = kuberay_cluster_builder.ClusterBuilder() + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + print("creating raycluster = {}".format(cluster1["metadata"]["name"])) + my_kuberay_api.create_ray_cluster(body=cluster1) + + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-director.py b/src/codeflare_sdk/vendored/examples/use-director.py new file mode 100644 index 00000000..2608c154 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-director.py @@ -0,0 +1,98 @@ +import sys +import os +from os import path +import json +import time + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) + +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +def wait(duration: int = 5, step_name: str = "next"): + print("waiting for {} seconds before {} step".format(duration, step_name)) + for i in range(duration, 0, -1): + sys.stdout.write(str(i) + " ") + sys.stdout.flush() + time.sleep(1) + print() + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() + + my_cluster_director = kuberay_cluster_builder.Director() + + # building the raycluster representation + cluster_body = my_cluster_director.build_small_cluster( + name="new-small-cluster", k8s_namespace="default" + ) + + # creating the raycluster in k8s + if cluster_body: + print("creating the cluster...") + my_kube_ray_api.create_ray_cluster(body=cluster_body) + + # now the cluster should be created. + # the rest of the code is simply to fetch, print and cleanup the created cluster + + print("fetching the cluster...") + # fetching the raycluster from k8s api-server + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print( + "try: kubectl -n {} get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["namespace"], + kube_ray_cluster["metadata"]["name"], + ) + ) + wait(step_name="print created cluster in JSON") + print("printing the raycluster JSON representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + + # waiting until the cluster is running, and has its status updated + is_running = my_kube_ray_api.wait_until_ray_cluster_running( + name=kube_ray_cluster["metadata"]["name"], + k8s_namespace=kube_ray_cluster["metadata"]["namespace"], + ) + + print( + "raycluster {} status is {}".format( + kube_ray_cluster["metadata"]["name"], "Running" if is_running else "unknown" + ) + ) + + wait(step_name="cleaning up") + print("deleting raycluster {}.".format(kube_ray_cluster["metadata"]["name"])) + + my_kube_ray_api.delete_ray_cluster( + name=kube_ray_cluster["metadata"]["name"], + k8s_namespace=kube_ray_cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py b/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py new file mode 100644 index 00000000..97ac6a57 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-raw-config_map_with-api.py @@ -0,0 +1,213 @@ +import json +from os import path +import os +import sys +import time +from kubernetes.client.rest import ApiException +from kubernetes import client +from kubernetes.stream import stream + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + + +configmap_body: dict = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "ray-code-single"}, + "data": { + "sample_code.py": 'import ray\nfrom os import environ\nredis_pass = environ.get("REDIS_PASSWORD") \nprint("trying to connect to Ray!")\nray.init(address="auto", _redis_password=redis_pass)\nprint("now executing some code with Ray!")\nimport time\nstart = time.time()\n@ray.remote\ndef f():\n time.sleep(0.01)\n return ray._private.services.get_node_ip_address()\nvalues=set(ray.get([f.remote() for _ in range(1000)]))\nprint("Ray Nodes: ",str(values))\nfile = open("/tmp/ray_nodes.txt","a")\nfile.write("available nodes: %s\\n" % str(values))\nfile.close()\nend = time.time()\nprint("Execution time = ",end - start)\n' + }, +} + +cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-getting-started", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": { + "dashboard-host": "0.0.0.0", + "num-cpus": "2", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "volumeMounts": [{"mountPath": "/opt", "name": "config"}], + } + ], + "resources": { + "limits": {"cpu": "2", "memory": "3G"}, + "requests": {"cpu": "1500m", "memory": "3G"}, + }, + "volumes": [ + { + "name": "config", + "configMap": { + "name": configmap_body["metadata"]["name"], + "items": [ + {"key": "sample_code.py", "path": "sample_code.py"} + ], + }, + } + ], + } + }, + }, + }, +} + +""" +the following code is simply to create a configmap and a raycluster using the kuberay_cluster_api + +after the cluster is created, the code will execute a python command in the head pod of the cluster + +then it will print the logs of the head pod + +then it will list the clusters and delete the cluster and the configmap +""" + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() # creating the api object + + try: + my_kube_ray_api.core_v1_api.create_namespaced_config_map( + "default", configmap_body + ) + + except ApiException as e: + if e.status == 409: + print( + "configmap {} already exists = {} moving on...".format( + configmap_body["metadata"]["name"], e + ) + ) + else: + print("error creating configmap: {}".format(e)) + + # waiting for the configmap tp be created + time.sleep(3) + + my_kube_ray_api.create_ray_cluster(body=cluster_body) # creating the cluster + + # the rest of the code is simply to fetch, print and cleanup the created cluster + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print("printing the raycluster json representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + else: + print("Unable to fetch cluster {}".format(cluster_body["metadata"]["name"])) + return + + print( + "try: kubectl -n default get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["name"] + ) + ) + # the rest of the code is simply to list and cleanup the created cluster + + time.sleep(3) + try: + pod_list: client.V1PodList = my_kube_ray_api.core_v1_api.list_namespaced_pod( + namespace="default", + label_selector="ray.io/cluster={}".format(cluster_body["metadata"]["name"]), + ) # getting the pods of the cluster + if pod_list != None: + for pod in pod_list.items: + try: + # Calling exec and waiting for response + exec_command = ["python", "/opt/sample_code.py"] + + print( + "executing a Python command in the raycluster: {}".format( + exec_command + ) + ) + # executing a ray command in the head pod + resp = stream( + my_kube_ray_api.core_v1_api.connect_get_namespaced_pod_exec, + pod.metadata.name, + "default", + command=exec_command, + stderr=True, + stdin=False, + stdout=True, + tty=False, + ) + print("Response: " + resp) + + # getting the logs from the pod + time.sleep(3) + print( + "getting the logs from the raycluster pod: {}".format( + pod.metadata.name + ) + ) + api_response = my_kube_ray_api.core_v1_api.read_namespaced_pod_log( + name=pod.metadata.name, namespace="default" + ) + print(api_response) + + except ApiException as e: + print("An exception has ocurred in reading the logs {}".format(e)) + except ApiException as e: + print("An exception has ocurred in listing pods the logs".format(e)) + + kube_ray_list = my_kube_ray_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kube_ray_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # deleting the cluster + + try: + my_kube_ray_api.core_v1_api.delete_namespaced_config_map( + configmap_body["metadata"]["name"], "default" + ) # deleting the configmap + print("deleting configmap: {}".format(configmap_body["metadata"]["name"])) + except ApiException as e: + if e.status == 404: + print( + "configmap = {}, does not exist moving on...".format( + configmap_body["metadata"]["name"], e + ) + ) + else: + print("error deleting configmap: {}".format(e)) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-raw-with-api.py b/src/codeflare_sdk/vendored/examples/use-raw-with-api.py new file mode 100644 index 00000000..5ab89586 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-raw-with-api.py @@ -0,0 +1,195 @@ +import json +from os import path +import os +import sys + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-mini-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": { + "dashboard-host": "0.0.0.0", + "num-cpus": "1", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": 1, "memory": "2Gi"}, + "requests": {"cpu": "500m", "memory": "2Gi"}, + }, + "ports": [ + {"containerPort": 6379, "name": "gcs-server"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + } + ] + } + }, + }, + }, +} + + +cluster_body2: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0", "demo-cluster": "yes"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "2", "memory": "3G"}, + "requests": {"cpu": "1500m", "memory": "3G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, +} + + +def main(): + print("starting cluster handler...") + + my_kube_ray_api = kuberay_cluster_api.RayClusterApi() + + my_kube_ray_api.create_ray_cluster(body=cluster_body) + + my_kube_ray_api.create_ray_cluster(body=cluster_body2) + + # the rest of the code is simply to fetch, print and cleanup the created cluster + kube_ray_cluster = my_kube_ray_api.get_ray_cluster( + name=cluster_body["metadata"]["name"], k8s_namespace="default" + ) + + if kube_ray_cluster: + print("printing the raycluster json representation...") + json_formatted_str = json.dumps(kube_ray_cluster, indent=2) + print(json_formatted_str) + else: + print("Unable to fetch cluster {}".format(cluster_body["metadata"]["name"])) + return + + print( + "try: kubectl -n default get raycluster {} -o yaml".format( + kube_ray_cluster["metadata"]["name"] + ) + ) + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kube_ray_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kube_ray_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/examples/use-utils.py b/src/codeflare_sdk/vendored/examples/use-utils.py new file mode 100644 index 00000000..ab3e3736 --- /dev/null +++ b/src/codeflare_sdk/vendored/examples/use-utils.py @@ -0,0 +1,117 @@ +import sys +import os +from os import path +import json + + +""" +in case you are working directly with the source, and don't wish to +install the module with pip install, you can directly import the packages by uncommenting the following code. +""" + +""" +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) + +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir)) +sibling_dirs = [ + d for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d)) +] +for sibling_dir in sibling_dirs: + sys.path.append(os.path.join(parent_dir, sibling_dir)) +""" + +from codeflare_sdk.vendored.python_client import kuberay_cluster_api + +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +def main(): + print("starting cluster handler...") + my_kuberay_api = kuberay_cluster_api.RayClusterApi() # this is the main api object + + my_cluster_builder = ( + kuberay_cluster_builder.ClusterBuilder() + ) # this is the builder object, to create a cluster with a more granular control + + my_Cluster_utils = ( + kuberay_cluster_utils.ClusterUtils() + ) # this is the utils object, to perform operations on a cluster + + cluster1 = ( + my_cluster_builder.build_meta( + name="new-cluster1", labels={"demo-cluster": "yes"} + ) + .build_head() + .build_worker(group_name="workers") + .get_cluster() + ) # this is the cluster object, it is a dict + + if not my_cluster_builder.succeeded: + print("error building the cluster, aborting...") + return + + print("creating raycluster = {}".format(cluster1["metadata"]["name"])) + my_kuberay_api.create_ray_cluster( + body=cluster1 + ) # this is the api call to create the cluster1 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.update_worker_group_replicas( + cluster1, group_name="workers", max_replicas=4, min_replicas=1, replicas=2 + ) + + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) # this is the api call to patch the cluster1 in k8s + + cluster_to_patch, succeeded = my_Cluster_utils.duplicate_worker_group( + cluster1, group_name="workers", new_group_name="duplicate-workers" + ) # this is the api call to duplicate the worker group in the cluster1 + if succeeded: + print( + "trying to patch raycluster = {}".format( + cluster_to_patch["metadata"]["name"] + ) + ) + my_kuberay_api.patch_ray_cluster( + name=cluster_to_patch["metadata"]["name"], ray_patch=cluster_to_patch + ) + + # the rest of the code is simply to list and cleanup the created cluster + kube_ray_list = my_kuberay_api.list_ray_clusters( + k8s_namespace="default", label_selector="demo-cluster=yes" + ) # this is the api call to list the clusters in k8s + if "items" in kube_ray_list: + line = "-" * 72 + print(line) + print("{:<63s}{:>2s}".format("Name", "Namespace")) + print(line) + for cluster in kube_ray_list["items"]: + print( + "{:<63s}{:>2s}".format( + cluster["metadata"]["name"], + cluster["metadata"]["namespace"], + ) + ) + print(line) + + if "items" in kube_ray_list: + for cluster in kube_ray_list["items"]: + print("deleting raycluster = {}".format(cluster["metadata"]["name"])) + my_kuberay_api.delete_ray_cluster( + name=cluster["metadata"]["name"], + k8s_namespace=cluster["metadata"]["namespace"], + ) # this is the api call to delete the cluster in k8s + + +if __name__ == "__main__": + main() diff --git a/src/codeflare_sdk/vendored/poetry.lock b/src/codeflare_sdk/vendored/poetry.lock new file mode 100644 index 00000000..b8d82ccc --- /dev/null +++ b/src/codeflare_sdk/vendored/poetry.lock @@ -0,0 +1,439 @@ +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. + +[[package]] +name = "cachetools" +version = "5.5.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, + {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, +] + +[[package]] +name = "certifi" +version = "2025.6.15" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057"}, + {file = "certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a"}, + {file = "charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a"}, + {file = "charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c"}, + {file = "charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7"}, + {file = "charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cad5f45b3146325bb38d6855642f6fd609c3f7cad4dbaf75549bf3b904d3184"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2680962a4848b3c4f155dc2ee64505a9c57186d0d56b43123b17ca3de18f0fa"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36b31da18b8890a76ec181c3cf44326bf2c48e36d393ca1b72b3f484113ea344"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4074c5a429281bf056ddd4c5d3b740ebca4d43ffffe2ef4bf4d2d05114299da"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9e36a97bee9b86ef9a1cf7bb96747eb7a15c2f22bdb5b516434b00f2a599f02"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:1b1bde144d98e446b056ef98e59c256e9294f6b74d7af6846bf5ffdafd687a7d"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:915f3849a011c1f593ab99092f3cecfcb4d65d8feb4a64cf1bf2d22074dc0ec4"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:25a23ea5c7edc53e0f29bae2c44fcb5a1aa10591aae107f2a2b2583a9c5cbc64"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:770cab594ecf99ae64c236bc9ee3439c3f46be49796e265ce0cc8bc17b10294f"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win32.whl", hash = "sha256:6a0289e4589e8bdfef02a80478f1dfcb14f0ab696b5a00e1f4b8a14a307a3c58"}, + {file = "charset_normalizer-3.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6fc1f5b51fa4cecaa18f2bd7a003f3dd039dd615cd69a2afd6d3b19aed6775f2"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76af085e67e56c8816c3ccf256ebd136def2ed9654525348cfa744b6802b69eb"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e45ba65510e2647721e35323d6ef54c7974959f6081b58d4ef5d87c60c84919a"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75d10d37a47afee94919c4fab4c22b9bc2a8bf7d4f46f87363bcf0573f3ff4f5"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6333b3aa5a12c26b2a4d4e7335a28f1475e0e5e17d69d55141ee3cab736f66d1"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8323a9b031aa0393768b87f04b4164a40037fb2a3c11ac06a03ffecd3618027"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:24498ba8ed6c2e0b56d4acbf83f2d989720a93b41d712ebd4f4979660db4417b"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:844da2b5728b5ce0e32d863af26f32b5ce61bc4273a9c720a9f3aa9df73b1455"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:65c981bdbd3f57670af8b59777cbfae75364b483fa8a9f420f08094531d54a01"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3c21d4fca343c805a52c0c78edc01e3477f6dd1ad7c47653241cf2a206d4fc58"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:dc7039885fa1baf9be153a0626e337aa7ec8bf96b0128605fb0d77788ddc1681"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win32.whl", hash = "sha256:8272b73e1c5603666618805fe821edba66892e2870058c94c53147602eab29c7"}, + {file = "charset_normalizer-3.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:70f7172939fdf8790425ba31915bfbe8335030f05b9913d7ae00a87d4395620a"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e92fca20c46e9f5e1bb485887d074918b13543b1c2a1185e69bb8d17ab6236a7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50bf98d5e563b83cc29471fa114366e6806bc06bc7a25fd59641e41445327836"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721c76e84fe669be19c5791da68232ca2e05ba5185575086e384352e2c309597"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d8fd25b7f4675d0c47cf95b594d4e7b158aca33b76aa63d07186e13c0e0ab7"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3daeac64d5b371dea99714f08ffc2c208522ec6b06fbc7866a450dd446f5c0f"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dccab8d5fa1ef9bfba0590ecf4d46df048d18ffe3eec01eeb73a42e0d9e7a8ba"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aaf27faa992bfee0264dc1f03f4c75e9fcdda66a519db6b957a3f826e285cf12"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:eb30abc20df9ab0814b5a2524f23d75dcf83cde762c161917a2b4b7b55b1e518"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c72fbbe68c6f32f251bdc08b8611c7b3060612236e960ef848e0a517ddbe76c5"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:982bb1e8b4ffda883b3d0a521e23abcd6fd17418f6d2c4118d257a10199c0ce3"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win32.whl", hash = "sha256:43e0933a0eff183ee85833f341ec567c0980dae57c464d8a508e1b2ceb336471"}, + {file = "charset_normalizer-3.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:d11b54acf878eef558599658b0ffca78138c8c3655cf4f3a4a673c437e67732e"}, + {file = "charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0"}, + {file = "charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63"}, +] + +[[package]] +name = "durationpy" +version = "0.10" +description = "Module for converting between datetime.timedelta and Go's Duration strings." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286"}, + {file = "durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba"}, +] + +[[package]] +name = "google-auth" +version = "2.40.3" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca"}, + {file = "google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0)", "requests (>=2.20.0,<3.0.0)"] +enterprise-cert = ["cryptography", "pyopenssl"] +pyjwt = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyjwt (>=2.0)"] +pyopenssl = ["cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0)"] +testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"] +urllib3 = ["packaging", "urllib3"] + +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "kubernetes" +version = "33.1.0" +description = "Kubernetes python client" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5"}, + {file = "kubernetes-33.1.0.tar.gz", hash = "sha256:f64d829843a54c251061a8e7a14523b521f2dc5c896cf6d65ccf348648a88993"}, +] + +[package.dependencies] +certifi = ">=14.05.14" +durationpy = ">=0.7" +google-auth = ">=1.0.1" +oauthlib = ">=3.2.2" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4.1" +requests = "*" +requests-oauthlib = "*" +six = ">=1.9.0" +urllib3 = ">=1.24.2" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" + +[package.extras] +adal = ["adal (>=1.0.2)"] + +[[package]] +name = "oauthlib" +version = "3.3.1" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1"}, + {file = "oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "pyasn1" +version = "0.6.1" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, + {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, + {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, +] + +[package.dependencies] +pyasn1 = ">=0.6.1,<0.7.0" + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + +[[package]] +name = "requests" +version = "2.32.4" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"}, + {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset_normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=3.4" +groups = ["main"] +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "rsa" +version = "4.9.1" +description = "Pure-Python RSA implementation" +optional = false +python-versions = "<4,>=3.6" +groups = ["main"] +files = [ + {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, + {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, + {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "websocket-client" +version = "1.8.0" +description = "WebSocket client for Python with low level API options" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[metadata] +lock-version = "2.1" +python-versions = "^3.11" +content-hash = "b36691a561f80767983438fab17d96ee0064eac943f5055abc2dcfee84c07dd7" diff --git a/src/codeflare_sdk/vendored/pyproject.toml b/src/codeflare_sdk/vendored/pyproject.toml new file mode 100755 index 00000000..916829ba --- /dev/null +++ b/src/codeflare_sdk/vendored/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "python-client" +version = "0.0.0-dev" +description = "Python Client for Kuberay" +license = "Apache-2.0" + +readme = "README.md" +repository = "https://github.com/ray-project/kuberay" +homepage = "https://github.com/ray-project/kuberay" +keywords = ["kuberay", "python", "client"] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent" +] +packages = [ + { include = "python_client" } +] + +[tool.poetry.dependencies] +python = "^3.11" +kubernetes = ">=25.0.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/src/codeflare_sdk/vendored/python_client/__init__.py b/src/codeflare_sdk/vendored/python_client/__init__.py new file mode 100644 index 00000000..6849410a --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/__init__.py @@ -0,0 +1 @@ +__version__ = "1.1.0" diff --git a/src/codeflare_sdk/vendored/python_client/constants.py b/src/codeflare_sdk/vendored/python_client/constants.py new file mode 100644 index 00000000..d47e270d --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/constants.py @@ -0,0 +1,13 @@ +# Declares the constants that are used by the client +import logging + +# Group, Version, Plural +GROUP = "ray.io" +CLUSTER_VERSION = "v1" +JOB_VERSION = "v1" +CLUSTER_PLURAL = "rayclusters" +JOB_PLURAL = "rayjobs" +CLUSTER_KIND = "RayCluster" +JOB_KIND = "RayJob" +# log level +LOGLEVEL = logging.INFO diff --git a/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py b/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py new file mode 100644 index 00000000..8307cdda --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/kuberay_cluster_api.py @@ -0,0 +1,311 @@ +""" +Set of APIs to manage rayclusters. +""" + +__copyright__ = "Copyright 2021, Microsoft Corp." + +import logging +import time +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from typing import Any, Optional +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + + +class RayClusterApi: + """ + RayClusterApi provides APIs to list, get, create, build, update, delete rayclusters. + + Methods: + - list_ray_clusters(k8s_namespace: str = "default", async_req: bool = False) -> Any: + - get_ray_cluster(name: str, k8s_namespace: str = "default") -> Any: + - create_ray_cluster(body: Any, k8s_namespace: str = "default") -> Any: + - delete_ray_cluster(name: str, k8s_namespace: str = "default") -> bool: + - patch_ray_cluster(name: str, ray_patch: Any, k8s_namespace: str = "default") -> Any: + """ + + # initial config to setup the kube client + def __init__(self): + # loading the config + try: + self.kube_config: Optional[Any] = config.load_kube_config() + except config.ConfigException: + # No kubeconfig found, try in-cluster config + try: + self.kube_config: Optional[Any] = config.load_incluster_config() + except config.ConfigException: + log.error("Failed to load both kubeconfig and in-cluster config") + raise + + self.api = client.CustomObjectsApi() + self.core_v1_api = client.CoreV1Api() + + def __del__(self): + self.api = None + self.kube_config = None + + def list_ray_clusters( + self, + k8s_namespace: str = "default", + label_selector: str = "", + async_req: bool = False, + ) -> Any: + """List Ray clusters in a given namespace. + + Parameters: + - k8s_namespace (str, optional): The namespace in which to list the Ray clusters. Defaults to "default". + - async_req (bool, optional): Whether to make the request asynchronously. Defaults to False. + + Returns: + Any: The custom resource for Ray clusters in the specified namespace, or None if not found. + + Raises: + ApiException: If there was an error fetching the custom resource. + """ + try: + resource: Any = self.api.list_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + namespace=k8s_namespace, + label_selector=label_selector, + async_req=async_req, + ) + if "items" in resource: + return resource + return None + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + def get_ray_cluster(self, name: str, k8s_namespace: str = "default") -> Any: + """Get a specific Ray cluster in a given namespace. + + Parameters: + - name (str): The name of the Ray cluster custom resource. Defaults to "". + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + + Returns: + Any: The custom resource for the specified Ray cluster, or None if not found. + + Raises: + ApiException: If there was an error fetching the custom resource. + """ + try: + resource: Any = self.api.get_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + def get_ray_cluster_status( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> Any: + """Get a specific Ray cluster status in a given namespace. + + This method waits until the cluster has a status field populated by the operator. + + Parameters: + - name (str): The name of the Ray cluster custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying to get status. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + Any: The custom resource status for the specified Ray cluster, or None if not found or timeout. + """ + while timeout > 0: + try: + resource: Any = self.api.get_namespaced_custom_object_status( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error("raycluster resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + if resource and "status" in resource and resource["status"]: + return resource["status"] + else: + log.info("raycluster {} status not set yet, waiting...".format(name)) + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("timed out waiting for raycluster {} status".format(name)) + return None + + def wait_until_ray_cluster_running( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray cluster is in ready state. + + This method waits for the cluster to have a state field with value 'ready'. + + Parameters: + - name (str): The name of the Ray cluster custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray cluster. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the raycluster status is 'ready', False otherwise. + """ + while timeout > 0: + status = self.get_ray_cluster_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status and "state" in status: + current_state = status["state"] + if current_state == "ready": + log.info( + "raycluster {} is ready with state: {}".format( + name, current_state + ) + ) + return True + else: + log.info( + "raycluster {} is in state: {} (waiting for ready)".format( + name, current_state + ) + ) + else: + log.info( + "raycluster {} state field not available yet, waiting...".format( + name + ) + ) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("raycluster {} has not become ready before timeout".format(name)) + return False + + def create_ray_cluster(self, body: Any, k8s_namespace: str = "default") -> Any: + """Create a new Ray cluster custom resource. + + Parameters: + - body (Any): The data of the custom resource to create. + - k8s_namespace (str, optional): The namespace in which to create the custom resource. Defaults to "default". + + Returns: + Any: The created custom resource, or None if it already exists or there was an error. + """ + try: + resource: Any = self.api.create_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + body=body, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 409: + log.error( + "raycluster resource already exists. error = {}".format(e.reason) + ) + return None + else: + log.error("error creating custom resource: {}".format(e)) + return None + + def delete_ray_cluster(self, name: str, k8s_namespace: str = "default") -> bool: + """Delete a Ray cluster custom resource. + + Parameters: + - name (str): The name of the Ray cluster custom resource to delete. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + + Returns: + Any: The deleted custom resource, or None if already deleted or there was an error. + """ + try: + resource: Any = self.api.delete_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + namespace=k8s_namespace, + ) + return resource + except ApiException as e: + if e.status == 404: + log.error( + "raycluster custom resource already deleted. error = {}".format( + e.reason + ) + ) + return None + else: + log.error( + "error deleting the raycluster custom resource: {}".format(e.reason) + ) + return None + + def patch_ray_cluster( + self, name: str, ray_patch: Any, k8s_namespace: str = "default" + ) -> Any: + """Patch an existing Ray cluster custom resource. + + Parameters: + - name (str): The name of the Ray cluster custom resource to be patched. + - ray_patch (Any): The patch data for the Ray cluster. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + + Returns: + bool: True if the patch was successful, False otherwise. + """ + try: + # we patch the existing raycluster with the new config + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.CLUSTER_VERSION, + plural=constants.CLUSTER_PLURAL, + name=name, + body=ray_patch, + namespace=k8s_namespace, + ) + except ApiException as e: + log.error("raycluster `{}` failed to patch, with error: {}".format(name, e)) + return False + else: + log.info("raycluster `%s` is patched successfully", name) + + return True diff --git a/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py b/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py new file mode 100644 index 00000000..d2d1d7e0 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/kuberay_job_api.py @@ -0,0 +1,381 @@ +""" +Set of APIs to manage rayjobs. +""" + +import logging +import time +from kubernetes import client, config +from kubernetes.client.rest import ApiException +from typing import Any, Optional +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + +TERMINAL_JOB_STATUSES = [ + "STOPPED", + "SUCCEEDED", + "FAILED", +] + + +class RayjobApi: + """ + RayjobApi provides APIs to list, get, create, build, update, delete rayjobs. + Methods: + - submit_job(k8s_namespace: str, job: Any) -> Any: Submit and execute a job asynchronously. + - suspend_job(name: str, k8s_namespace: str) -> bool: Stop a job by suspending it. + - resubmit_job(name: str, k8s_namespace: str) -> bool: Resubmit a job that has been suspended. + - get_job(name: str, k8s_namespace: str) -> Any: Get a job. + - list_jobs(k8s_namespace: str) -> Any: List all jobs. + - get_job_status(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> Any: Get the most recent status of a job. + - wait_until_job_finished(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> bool: Wait until a job is completed. + - wait_until_job_running(name: str, k8s_namespace: str, timeout: int, delay_between_attempts: int) -> bool: Wait until a job reaches running state. + - delete_job(name: str, k8s_namespace: str) -> bool: Delete a job and all of its associated data. + """ + + # initial config to setup the kube client + def __init__(self): + # loading the config + try: + self.kube_config: Optional[Any] = config.load_kube_config() + except config.ConfigException: + # No kubeconfig found, try in-cluster config + try: + self.kube_config: Optional[Any] = config.load_incluster_config() + except config.ConfigException: + log.error("Failed to load both kubeconfig and in-cluster config") + raise + + self.api = client.CustomObjectsApi() + + def __del__(self): + self.api = None + self.kube_config = None + + def submit_job(self, k8s_namespace: str = "default", job: Any = None) -> Any: + """Submit a Ray job to a given namespace.""" + try: + rayjob = self.api.create_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + body=job, + namespace=k8s_namespace, + ) + return rayjob + except ApiException as e: + log.error("error submitting ray job: {}".format(e)) + return None + + def get_job_status( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> Any: + """Get a specific Ray job status in a given namespace. + + This method waits until the job has a status field populated by the operator. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying to get status. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + Any: The custom resource status for the specified Ray job, or None if not found or timeout. + """ + while timeout > 0: + try: + resource: Any = self.api.get_namespaced_custom_object_status( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error("rayjob resource is not found. error = {}".format(e)) + return None + else: + log.error("error fetching custom resource: {}".format(e)) + return None + + if resource and "status" in resource and resource["status"]: + return resource["status"] + else: + log.info("rayjob {} status not set yet, waiting...".format(name)) + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("rayjob {} status not set yet, timing out...".format(name)) + return None + + def wait_until_job_finished( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray job reaches a terminal status. + + This method waits for the job to reach a terminal state by checking both jobStatus + (STOPPED, SUCCEEDED, FAILED) and jobDeploymentStatus (Complete, Failed). + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the rayjob reaches a terminal status, False otherwise. + """ + while timeout > 0: + status = self.get_job_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status: + if "jobDeploymentStatus" in status: + deployment_status = status["jobDeploymentStatus"] + if deployment_status in ["Complete", "Failed"]: + log.info( + "rayjob {} has finished with deployment status: {}".format( + name, deployment_status + ) + ) + return True + elif deployment_status == "Suspended": + log.info("rayjob {} is suspended".format(name)) + # Suspended is not terminal, continue waiting + elif deployment_status in ["Initializing", "Running", "Suspending"]: + log.info( + "rayjob {} is {}".format(name, deployment_status.lower()) + ) + elif deployment_status: + log.info( + "rayjob {} deployment status: {}".format( + name, deployment_status + ) + ) + + if "jobStatus" in status: + current_status = status["jobStatus"] + if current_status in ["", "PENDING"]: + log.info("rayjob {} has not started yet".format(name)) + elif current_status == "RUNNING": + log.info("rayjob {} is running".format(name)) + elif current_status in TERMINAL_JOB_STATUSES: + log.info( + "rayjob {} has finished with status {}!".format( + name, current_status + ) + ) + return True + else: + log.info( + "rayjob {} has an unknown status: {}".format( + name, current_status + ) + ) + elif "jobDeploymentStatus" not in status: + log.info( + "rayjob {} status fields not available yet, waiting...".format( + name + ) + ) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info( + "rayjob {} has not reached terminal status before timeout".format(name) + ) + return False + + def wait_until_job_running( + self, + name: str, + k8s_namespace: str = "default", + timeout: int = 60, + delay_between_attempts: int = 5, + ) -> bool: + """Wait until a Ray job reaches Running state. + + This method waits for the job's jobDeploymentStatus to reach "Running". + Useful for confirming a job has started after submission or resubmission. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to retrieve the Ray job. Defaults to "default". + - timeout (int, optional): The duration in seconds after which we stop trying. Defaults to 60 seconds. + - delay_between_attempts (int, optional): The duration in seconds to wait between attempts. Defaults to 5 seconds. + + Returns: + bool: True if the rayjob reaches Running status, False otherwise. + """ + while timeout > 0: + status = self.get_job_status( + name, k8s_namespace, timeout, delay_between_attempts + ) + + if status and "jobDeploymentStatus" in status: + deployment_status = status["jobDeploymentStatus"] + if deployment_status == "Running": + log.info("rayjob {} is running".format(name)) + return True + elif deployment_status in ["Complete", "Failed", "Suspended"]: + log.info( + "rayjob {} reached terminal/suspended status {} before running".format( + name, deployment_status + ) + ) + return False + elif deployment_status: + log.info("rayjob {} is {}".format(name, deployment_status.lower())) + else: + log.info("rayjob {} deployment status not set yet".format(name)) + else: + log.info("rayjob {} status not available yet, waiting...".format(name)) + + time.sleep(delay_between_attempts) + timeout -= delay_between_attempts + + log.info("rayjob {} has not reached running status before timeout".format(name)) + return False + + def suspend_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Stop a Ray job by setting the suspend field to True. + + This will delete the associated RayCluster and transition the job to 'Suspended' status. + Only works on jobs in 'Running' or 'Initializing' status. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to stop the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully suspended, False otherwise. + """ + try: + patch_body = {"spec": {"suspend": True}} + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + body=patch_body, + ) + log.info( + f"Successfully suspended rayjob {name} in namespace {k8s_namespace}" + ) + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + else: + log.error(f"error stopping rayjob {name}: {e.reason}") + return False + + def resubmit_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Resubmit a suspended Ray job by setting the suspend field to False. + + This will create a new RayCluster and resubmit the job. + Only works on jobs in 'Suspended' status. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to resubmit the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully resubmitted, False otherwise. + """ + try: + # Patch the RayJob to set suspend=false + patch_body = {"spec": {"suspend": False}} + self.api.patch_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + body=patch_body, + ) + log.info( + f"Successfully resubmitted rayjob {name} in namespace {k8s_namespace}" + ) + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + else: + log.error(f"error resubmitting rayjob {name}: {e.reason}") + return False + + def delete_job(self, name: str, k8s_namespace: str = "default") -> bool: + """Delete a Ray job and all of its associated data. + + Parameters: + - name (str): The name of the Ray job custom resource. + - k8s_namespace (str, optional): The namespace in which to delete the Ray job. Defaults to "default". + + Returns: + bool: True if the job was successfully deleted, False otherwise. + """ + try: + self.api.delete_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + log.info(f"Successfully deleted rayjob {name} in namespace {k8s_namespace}") + return True + except ApiException as e: + if e.status == 404: + log.error(f"rayjob custom resource already deleted. error = {e.reason}") + return False + else: + log.error(f"error deleting the rayjob custom resource: {e.reason}") + return False + + def get_job(self, name: str, k8s_namespace: str = "default") -> Any: + """Get a Ray job in a given namespace.""" + try: + return self.api.get_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + name=name, + namespace=k8s_namespace, + ) + except ApiException as e: + if e.status == 404: + log.error(f"rayjob {name} not found in namespace {k8s_namespace}") + return None + else: + log.error(f"error fetching rayjob {name}: {e.reason}") + return None + + def list_jobs(self, k8s_namespace: str = "default") -> Any: + """List all Ray jobs in a given namespace.""" + try: + return self.api.list_namespaced_custom_object( + group=constants.GROUP, + version=constants.JOB_VERSION, + plural=constants.JOB_PLURAL, + namespace=k8s_namespace, + ) + except ApiException as e: + log.error(f"error fetching rayjobs: {e.reason}") + return None diff --git a/src/codeflare_sdk/vendored/python_client/utils/__init__.py b/src/codeflare_sdk/vendored/python_client/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py new file mode 100644 index 00000000..be0a66e5 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_builder.py @@ -0,0 +1,326 @@ +""" +Set of helper methods to manage rayclusters. Requires Python 3.9 and higher +""" + +import copy +import logging +import math +from typing import Any +from abc import ABCMeta, abstractmethod +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_utils +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + + +class IClusterBuilder(metaclass=ABCMeta): + """ + IClusterBuilder is an interface for building a cluster. + + The class defines abstract methods for building the metadata, head pod, worker groups, and retrieving the built cluster. + """ + + @staticmethod + @abstractmethod + def build_meta(): + "builds the cluster metadata" + + @staticmethod + @abstractmethod + def build_head(): + "builds the head pod" + + @staticmethod + @abstractmethod + def build_worker(): + "builds a worker group" + + @staticmethod + @abstractmethod + def get_cluster(): + "Returns the built cluster" + + +# Concrete implementation of the builder interface +class ClusterBuilder(IClusterBuilder): + """ + ClusterBuilder implements the abstract methods of IClusterBuilder to build a cluster. + """ + + def __init__(self): + self.cluster: dict[str, Any] = {} + self.succeeded: bool = False + self.cluster_utils = kuberay_cluster_utils.ClusterUtils() + + def build_meta( + self, + name: str, + k8s_namespace: str = "default", + labels: dict = None, + ray_version: str = "2.46.0", + ): + """Builds the metadata and ray version of the cluster. + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The namespace in which the Ray cluster exists. Defaults to "default". + - labels (dict, optional): A dictionary of key-value pairs to add as labels to the cluster. Defaults to None. + - ray_version (str, optional): The version of Ray to use for the cluster. Defaults to "2.46.0". + """ + self.cluster = self.cluster_utils.populate_meta( + cluster=self.cluster, + name=name, + k8s_namespace=k8s_namespace, + labels=labels, + ray_version=ray_version, + ) + return self + + def build_head( + self, + ray_image: str = "rayproject/ray:2.46.0", + service_type: str = "ClusterIP", + cpu_requests: str = "2", + memory_requests: str = "3G", + cpu_limits: str = "2", + memory_limits: str = "3G", + ray_start_params: dict = { + "dashboard-host": "0.0.0.0", + }, + ): + """Build head node of the ray cluster. + + Parameters: + - ray_image (str): Docker image for the head node. Default value is "rayproject/ray:2.46.0". + - service_type (str): Service type of the head node. Default value is "ClusterIP", which creates a headless ClusterIP service. + - cpu_requests (str): CPU requests for the head node. Default value is "2". + - memory_requests (str): Memory requests for the head node. Default value is "3G". + - cpu_limits (str): CPU limits for the head node. Default value is "2". + - memory_limits (str): Memory limits for the head node. Default value is "3G". + - ray_start_params (dict): Dictionary of start parameters for the head node. + Default values is "dashboard-host": "0.0.0.0". + """ + self.cluster, self.succeeded = self.cluster_utils.populate_ray_head( + self.cluster, + ray_image=ray_image, + service_type=service_type, + cpu_requests=cpu_requests, + memory_requests=memory_requests, + cpu_limits=cpu_limits, + memory_limits=memory_limits, + ray_start_params=ray_start_params, + ) + return self + + def build_worker( + self, + group_name: str, + ray_image: str = "rayproject/ray:2.46.0", + ray_command: Any = ["/bin/bash", "-lc"], + init_image: str = "busybox:1.28", + cpu_requests: str = "1", + memory_requests: str = "1G", + cpu_limits: str = "2", + memory_limits: str = "2G", + replicas: int = 1, + min_replicas: int = -1, + max_replicas: int = -1, + ray_start_params: dict = {}, + ): + """Build worker specifications of the cluster. + + This function sets the worker configuration of the cluster, including the Docker image, CPU and memory requirements, number of replicas, and other parameters. + + Parameters: + - group_name (str): name of the worker group. + - ray_image (str, optional): Docker image for the Ray process. Default is "rayproject/ray:2.46.0". + - ray_command (Any, optional): Command to run in the Docker container. Default is ["/bin/bash", "-lc"]. + - init_image (str, optional): Docker image for the init container. Default is "busybox:1.28". + - cpu_requests (str, optional): CPU requests for the worker pods. Default is "1". + - memory_requests (str, optional): Memory requests for the worker pods. Default is "1G". + - cpu_limits (str, optional): CPU limits for the worker pods. Default is "2". + - memory_limits (str, optional): Memory limits for the worker pods. Default is "2G". + - replicas (int, optional): Number of worker pods to run. Default is 1. + - min_replicas (int, optional): Minimum number of worker pods to run. Default is -1. + - max_replicas (int, optional): Maximum number of worker pods to run. Default is -1. + - ray_start_params (dict, optional): Additional parameters to pass to the ray start command. Default is {}. + """ + if min_replicas < 0: + min_replicas = int(math.ceil(replicas / 2)) + if max_replicas < 0: + max_replicas = int(replicas * 3) + + if "spec" in self.cluster.keys(): + if "workerGroupSpecs" not in self.cluster.keys(): + log.info( + "setting the workerGroupSpecs for group_name {}".format(group_name) + ) + self.cluster["spec"]["workerGroupSpecs"] = [] + else: + log.error( + "error creating custom resource: {meta}, the spec section is missing, did you run build_head()?".format( + self.cluster["metadata"] + ) + ) + self.succeeded = False + return self + + worker_group, self.succeeded = self.cluster_utils.populate_worker_group( + group_name, + ray_image, + ray_command, + init_image, + cpu_requests, + memory_requests, + cpu_limits, + memory_limits, + replicas, + min_replicas, + max_replicas, + ray_start_params, + ) + + if self.succeeded: + self.cluster["spec"]["workerGroupSpecs"].append(worker_group) + return self + + def get_cluster(self): + cluster = copy.deepcopy(self.cluster) + return cluster + + +class Director: + def __init__(self): + self.cluster_builder = ClusterBuilder() + + def build_basic_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a basic cluster with the given name and k8s_namespace parameters. + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The basic cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_small_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a small cluster with the given name and k8s_namespace parameters with 1 workergroup, + the workgroup has 1 replica with 2 cpu and 2G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=1, + min_replicas=0, + max_replicas=2, + cpu_requests="1", + memory_requests="1G", + cpu_limits="2", + memory_limits="2G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_medium_cluster( + self, name: str, k8s_namespace: str = "default", labels: str = None + ) -> dict: + """Builds a medium cluster with the given name and k8s_namespace parameters with 1 workergroup, + the workgroup has 3 replicas with 4 cpu and 4G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=3, + min_replicas=0, + max_replicas=6, + cpu_requests="2", + memory_requests="2G", + cpu_limits="4", + memory_limits="4G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None + + def build_large_cluster( + self, name: str, k8s_namespace: str = "default", labels: dict = None + ) -> dict: + """Builds a medium cluster with the given name and k8s_namespace parameters. with 1 workergroup, + the workgroup has 6 replicas with 6 cpu and 6G memory limits + + Parameters: + - name (str): The name of the cluster. + - k8s_namespace (str, optional): The kubernetes namespace for the cluster, with a default value of "default". + + Returns: + dict: The small cluster as a dictionary. + """ + cluster: dict = ( + self.cluster_builder.build_meta( + name=name, k8s_namespace=k8s_namespace, labels=labels + ) + .build_head() + .build_worker( + group_name="{}-workers".format(name), + replicas=6, + min_replicas=0, + max_replicas=12, + cpu_requests="3", + memory_requests="4G", + cpu_limits="6", + memory_limits="8G", + ) + .get_cluster() + ) + + if self.cluster_builder.succeeded: + return cluster + return None diff --git a/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py new file mode 100644 index 00000000..ac36fa93 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client/utils/kuberay_cluster_utils.py @@ -0,0 +1,473 @@ +""" +Set of helper methods to manage rayclusters. Requires Python 3.6 and higher +""" + +import logging +import copy +import re +from typing import Any, Tuple +from codeflare_sdk.vendored.python_client import constants + + +log = logging.getLogger(__name__) +if logging.getLevelName(log.level) == "NOTSET": + logging.basicConfig(format="%(asctime)s %(message)s", level=constants.LOGLEVEL) + +""" +ClusterUtils contains methods to facilitate modifying/populating the config of a raycluster +""" + + +class ClusterUtils: + """ + ClusterUtils - Utility class for populating cluster information + + Methods: + - populate_meta(cluster: dict, name: str, k8s_namespace: str, labels: dict, ray_version: str) -> dict: + - populate_ray_head(cluster: dict, ray_image: str,service_type: str, cpu_requests: str, memory_requests: str, cpu_limits: str, memory_limits: str, ray_start_params: dict) -> Tuple[dict, bool]: + - populate_worker_group(cluster: dict, group_name: str, ray_image: str, ray_command: Any, init_image: str, cpu_requests: str, memory_requests: str, cpu_limits: str, memory_limits: str, replicas: int, min_replicas: int, max_replicas: int, ray_start_params: dict) -> Tuple[dict, bool]: + - update_worker_group_replicas(cluster: dict, group_name: str, max_replicas: int, min_replicas: int, replicas: int) -> Tuple[dict, bool]: + """ + + def populate_meta( + self, + cluster: dict, + name: str, + k8s_namespace: str, + labels: dict, + ray_version: str, + ) -> dict: + """Populate the metadata and ray version of the cluster. + + Parameters: + - cluster (dict): A dictionary representing a cluster. + - name (str): The name of the cluster. + - k8s_namespace (str): The namespace of the cluster. + - labels (dict): A dictionary of labels to be applied to the cluster. + - ray_version (str): The version of Ray to use in the cluster. + + Returns: + dict: The updated cluster dictionary with metadata and ray version populated. + """ + + assert self.is_valid_name(name) + + cluster["apiVersion"] = "{group}/{version}".format( + group=constants.GROUP, version=constants.CLUSTER_VERSION + ) + cluster["kind"] = constants.CLUSTER_KIND + cluster["metadata"] = { + "name": name, + "namespace": k8s_namespace, + "labels": labels, + } + cluster["spec"] = {"rayVersion": ray_version} + return cluster + + def populate_ray_head( + self, + cluster: dict, + ray_image: str, + service_type: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + ray_start_params: dict, + ) -> Tuple[dict, bool]: + """Populate the ray head specs of the cluster + Parameters: + - cluster (dict): The dictionary representation of the cluster. + - ray_image (str): The name of the ray image to use for the head node. + - service_type (str): The type of service to run for the head node. + - cpu_requests (str): The CPU resource requests for the head node. + - memory_requests (str): The memory resource requests for the head node. + - cpu_limits (str): The CPU resource limits for the head node. + - memory_limits (str): The memory resource limits for the head node. + - ray_start_params (dict): The parameters for starting the Ray cluster. + + Returns: + - Tuple (dict, bool): The updated cluster, and a boolean indicating whether the update was successful. + """ + # validate arguments + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + "error creating ray head, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + # make sure metadata exists + if "spec" in cluster.keys(): + if "headGroupSpec" not in cluster.keys(): + log.info( + "setting the headGroupSpec for cluster {}".format( + cluster["metadata"]["name"] + ) + ) + cluster["spec"]["headGroupSpec"] = [] + else: + log.error("error creating ray head, the spec and/or metadata is not define") + return cluster, False + + # populate headGroupSpec + cluster["spec"]["headGroupSpec"] = { + "serviceType": service_type, + "rayStartParams": ray_start_params, + "template": { + "spec": { + "containers": [ + { + "image": ray_image, + "name": "ray-head", + "ports": [ + { + "containerPort": 6379, + "name": "gcs-server", + "protocol": "TCP", + }, + { + "containerPort": 8265, + "name": "dashboard", + "protocol": "TCP", + }, + { + "containerPort": 10001, + "name": "client", + "protocol": "TCP", + }, + ], + "resources": { + "requests": { + "cpu": cpu_requests, + "memory": memory_requests, + }, + "limits": {"cpu": cpu_limits, "memory": memory_limits}, + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + } + ], + "volumes": [{"emptyDir": {}, "name": "ray-logs"}], + } + }, + } + + return cluster, True + + def populate_worker_group( + self, + group_name: str, + ray_image: str, + ray_command: Any, + init_image: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + replicas: int, + min_replicas: int, + max_replicas: int, + ray_start_params: dict, + ) -> Tuple[dict, bool]: + """Populate the worker group specification in the cluster dictionary. + + Parameters: + - cluster (dict): Dictionary representing the cluster spec. + - group_name (str): The name of the worker group. + - ray_image (str): The image to use for the Ray worker containers. + - ray_command (Any): The command to run in the Ray worker containers. + - init_image (str): The init container image to use. + - cpu_requests (str): The requested CPU resources for the worker containers. + - memory_requests (str): The requested memory resources for the worker containers. + - cpu_limits (str): The limit on CPU resources for the worker containers. + - memory_limits (str): The limit on memory resources for the worker containers. + - replicas (int): The desired number of replicas for the worker group. + - min_replicas (int): The minimum number of replicas for the worker group. + - max_replicas (int): The maximum number of replicas for the worker group. + - ray_start_params (dict): The parameters to pass to the Ray worker start command. + + Returns: + - Tuple[dict, bool]: A tuple of the cluster specification and a boolean indicating + whether the worker group was successfully populated. + """ + # validate arguments + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas" and k != "ray_start_params": + assert v + except AssertionError as e: + log.error( + "error populating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return None, False + + assert self.is_valid_name(group_name) + assert max_replicas >= min_replicas + + worker_group: dict[str, Any] = { + "groupName": group_name, + "maxReplicas": max_replicas, + "minReplicas": min_replicas, + "rayStartParams": ray_start_params, + "replicas": replicas, + "template": { + "spec": { + "containers": [ + { + "image": ray_image, + "command": ray_command, + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "name": "ray-worker", + "resources": { + "requests": { + "cpu": cpu_requests, + "memory": memory_requests, + }, + "limits": { + "cpu": cpu_limits, + "memory": memory_limits, + }, + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + } + ], + "volumes": [{"emptyDir": {}, "name": "ray-logs"}], + } + }, + } + + return worker_group, True + + def update_worker_group_replicas( + self, + cluster: dict, + group_name: str, + max_replicas: int, + min_replicas: int, + replicas: int, + ) -> Tuple[dict, bool]: + """Update the number of replicas for a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster to update. + - group_name (str): The name of the worker group to update. + - max_replicas (int): The maximum number of replicas for the worker group. + - min_replicas (int): The minimum number of replicas for the worker group. + - replicas (int): The desired number of replicas for the worker group. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster and a flag indicating whether the update was successful. + """ + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas": + assert v + except AssertionError as e: + log.error( + "error updating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + assert max_replicas >= min_replicas + + for i in range(len(cluster["spec"]["workerGroupSpecs"])): + if cluster["spec"]["workerGroupSpecs"][i]["groupName"] == group_name: + cluster["spec"]["workerGroupSpecs"][i]["maxReplicas"] = max_replicas + cluster["spec"]["workerGroupSpecs"][i]["minReplicas"] = min_replicas + cluster["spec"]["workerGroupSpecs"][i]["replicas"] = replicas + return cluster, True + + return cluster, False + + def update_worker_group_resources( + self, + cluster: dict, + group_name: str, + cpu_requests: str, + memory_requests: str, + cpu_limits: str, + memory_limits: str, + container_name="unspecified", + ) -> Tuple[dict, bool]: + """Update the resources for a worker group pods in the cluster. + + Parameters: + - cluster (dict): The cluster to update. + - group_name (str): The name of the worker group to update. + - cpu_requests (str): CPU requests for the worker pods. + - memory_requests (str): Memory requests for the worker pods. + - cpu_limits (str): CPU limits for the worker pods. + - memory_limits (str): Memory limits for the worker pods. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster and a flag indicating whether the update was successful. + """ + try: + arguments = locals() + for k, v in arguments.items(): + if k != "min_replicas": + assert v + except AssertionError as e: + log.error( + "error updating worker group, the parameters are not fully defined. {} = {}".format( + k, v + ) + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + + def add_values(group_index: int, container_index: int): + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["requests"]["cpu"] = cpu_requests + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["requests"]["memory"] = memory_requests + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["limits"]["cpu"] = cpu_limits + worker_groups[group_index]["template"]["spec"]["containers"][ + container_index + ]["resources"]["limits"]["memory"] = memory_limits + + for group_index, worker_group in enumerate(worker_groups): + if worker_group["groupName"] != group_name: + continue + + containers = worker_group["template"]["spec"]["containers"] + container_names = [container["name"] for container in containers] + + if len(containers) == 0: + log.error( + f"error updating container resources, the worker group {group_name} has no containers" + ) + return cluster, False + + if container_name == "unspecified": + add_values(group_index, 0) + return cluster, True + elif container_name == "all_containers": + for container_index in range(len(containers)): + add_values(group_index, container_index) + return cluster, True + elif container_name in container_names: + container_index = container_names.index(container_name) + add_values(group_index, container_index) + return cluster, True + + return cluster, False + + def duplicate_worker_group( + self, + cluster: dict, + group_name: str, + new_group_name: str, + ) -> Tuple[dict, bool]: + """Duplicate a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster definition. + - group_name (str): The name of the worker group to be duplicated. + - new_group_name (str): The name for the duplicated worker group. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster definition and a boolean indicating the success of the operation. + """ + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + f"error duplicating worker group, the parameters are not fully defined. {k} = {v}" + ) + return cluster, False + assert self.is_valid_name(new_group_name) + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + for _, worker_group in enumerate(worker_groups): + if worker_group["groupName"] == group_name: + duplicate_group = copy.deepcopy(worker_group) + duplicate_group["groupName"] = new_group_name + worker_groups.append(duplicate_group) + return cluster, True + + log.error( + f"error duplicating worker group, no match was found for {group_name}" + ) + return cluster, False + + def delete_worker_group( + self, + cluster: dict, + group_name: str, + ) -> Tuple[dict, bool]: + """Deletes a worker group in the cluster. + + Parameters: + - cluster (dict): The cluster definition. + - group_name (str): The name of the worker group to be duplicated. + + Returns: + Tuple[dict, bool]: A tuple containing the updated cluster definition and a boolean indicating the success of the operation. + """ + try: + arguments = locals() + for k, v in arguments.items(): + assert v + except AssertionError as e: + log.error( + f"error creating ray head, the parameters are not fully defined. {k} = {v}" + ) + return cluster, False + + assert cluster["spec"]["workerGroupSpecs"] + + worker_groups = cluster["spec"]["workerGroupSpecs"] + first_or_none = next( + (x for x in worker_groups if x["groupName"] == group_name), None + ) + if first_or_none: + worker_groups.remove(first_or_none) + return cluster, True + + log.error(f"error removing worker group, no match was found for {group_name}") + return cluster, False + + def is_valid_name(self, name: str) -> bool: + msg = "The name must be 63 characters or less, begin and end with an alphanumeric character, and contain only dashes, dots, and alphanumerics." + if len(name) > 63 or not bool(re.match("^[a-z0-9]([-.]*[a-z0-9])+$", name)): + log.error(msg) + return False + return True + + def is_valid_label(self, name: str) -> bool: + msg = "The label name must be 63 characters or less, begin and end with an alphanumeric character, and contain only dashes, underscores, dots, and alphanumerics." + if len(name) > 63 or not bool(re.match("^[a-z0-9]([-._]*[a-z0-9])+$", name)): + log.error(msg) + return False + return True diff --git a/src/codeflare_sdk/vendored/python_client_test/README.md b/src/codeflare_sdk/vendored/python_client_test/README.md new file mode 100644 index 00000000..6c32e260 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/README.md @@ -0,0 +1,29 @@ +# Overview + +## For developers + +1. `pip install -U pip setuptools` +1. `cd clients/python-client && pip install -e .` + +Uninstall with `pip uninstall python-client`. + +## For testing run + +`python -m unittest discover 'clients/python-client/python_client_test/'` + +### Coverage report + +#### Pre-requisites + +* `sudo apt install libsqlite3-dev` +* `pyenv install 3.6.5` # or your Python version +* `pip install db-sqlite3 coverage` + +__To gather data__ +`python -m coverage run -m unittest` + +__to generate a coverage report__ +`python -m coverage report` + +__to generate the test coverage report in HTML format__ +`python -m coverage html` diff --git a/src/codeflare_sdk/vendored/python_client_test/helpers.py b/src/codeflare_sdk/vendored/python_client_test/helpers.py new file mode 100644 index 00000000..1bcfdbc2 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/helpers.py @@ -0,0 +1,135 @@ +import time +from codeflare_sdk.vendored.python_client import constants + + +def create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + entrypoint="python -c \"import ray; ray.init(); @ray.remote\ndef hello(): return 'Hello from Ray!'; print(ray.get(hello.remote()))\"", + labels=None, +): + job_body = { + "apiVersion": constants.GROUP + "/" + constants.JOB_VERSION, + "kind": constants.JOB_KIND, + "metadata": { + "name": job_name, + "namespace": namespace, + "labels": { + "app.kubernetes.io/name": job_name, + "app.kubernetes.io/managed-by": "kuberay", + }, + }, + "spec": { + "clusterSelector": { + "ray.io/cluster": cluster_name, + }, + "entrypoint": entrypoint, + "submissionMode": "K8sJobMode", + }, + } + + # Add any additional labels if provided + if labels: + job_body["metadata"]["labels"].update(labels) + + return job_body + + +def create_job_with_ray_cluster_spec( + job_name, + namespace, + entrypoint="python -c \"import ray; ray.init(); @ray.remote\ndef hello(): return 'Hello from Ray!'; print(ray.get(hello.remote()))\"", + labels=None, +): + job_body = { + "apiVersion": constants.GROUP + "/" + constants.JOB_VERSION, + "kind": constants.JOB_KIND, + "metadata": { + "name": job_name, + "namespace": namespace, + "labels": { + "app.kubernetes.io/name": job_name, + "app.kubernetes.io/managed-by": "kuberay", + }, + }, + "spec": { + "rayClusterSpec": { + "headGroupSpec": { + "serviceType": "ClusterIP", + "replicas": 1, + "rayStartParams": { + "dashboard-host": "0.0.0.0", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.48.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + { + "containerPort": 8265, + "name": "dashboard", + }, + { + "containerPort": 10001, + "name": "client", + }, + ], + "resources": { + "limits": { + "cpu": "1", + "memory": "2Gi", + }, + "requests": { + "cpu": "500m", + "memory": "1Gi", + }, + }, + } + ] + } + }, + }, + "workerGroupSpecs": [ + { + "groupName": "small-worker", + "replicas": 1, + "rayStartParams": { + "num-cpus": "1", + }, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.48.0", + "resources": { + "limits": { + "cpu": "1", + "memory": "1Gi", + }, + "requests": { + "cpu": "500m", + "memory": "512Mi", + }, + }, + } + ] + } + }, + } + ], + }, + "entrypoint": entrypoint, + "submissionMode": "K8sJobMode", + "shutdownAfterJobFinishes": True, + }, + } + + if labels: + job_body["metadata"]["labels"].update(labels) + + return job_body diff --git a/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py b/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py new file mode 100644 index 00000000..3fdb18e7 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_cluster_api.py @@ -0,0 +1,345 @@ +import unittest +from codeflare_sdk.vendored.python_client import kuberay_cluster_api, constants +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +# Keep the original test cluster body for reference if needed +test_cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + { + "name": "side-car", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, + "status": { + "state": "ready", + "availableWorkerReplicas": 2, + "desiredWorkerReplicas": 1, + "endpoints": {"client": "10001", "dashboard": "8265", "gcs-server": "6379"}, + "head": {"serviceIP": "10.152.183.194"}, + "lastUpdateTime": "2023-02-16T05:15:17Z", + "maxWorkerReplicas": 2, + }, +} + + +class TestClusterApi(unittest.TestCase): + """Comprehensive test suite for RayClusterApi functionality.""" + + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.api = kuberay_cluster_api.RayClusterApi() + self.director = kuberay_cluster_builder.Director() + + def test_create_and_get_ray_cluster(self): + """Test creating a cluster and retrieving it.""" + cluster_name = "test-create-cluster" + namespace = "default" + + # Build a small cluster using the director + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "create-cluster"}, + ) + + # Ensure cluster was built successfully + self.assertIsNotNone(cluster_body, "Cluster should be built successfully") + self.assertEqual(cluster_body["metadata"]["name"], cluster_name) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + self.assertEqual(created_cluster["metadata"]["name"], cluster_name) + + # Get the cluster and verify it exists + retrieved_cluster = self.api.get_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.assertIsNotNone( + retrieved_cluster, "Cluster should be retrieved successfully" + ) + self.assertEqual(retrieved_cluster["metadata"]["name"], cluster_name) + self.assertEqual( + retrieved_cluster["spec"]["rayVersion"], + cluster_body["spec"]["rayVersion"], + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) + + def test_list_ray_clusters(self): + """Test listing Ray clusters in a namespace.""" + cluster_name_1 = "test-list-cluster-1" + cluster_name_2 = "test-list-cluster-2" + namespace = "default" + test_label = "test-list-clusters" + + # Build two small clusters + cluster_body_1 = self.director.build_small_cluster( + name=cluster_name_1, + k8s_namespace=namespace, + labels={"test": test_label}, + ) + cluster_body_2 = self.director.build_small_cluster( + name=cluster_name_2, + k8s_namespace=namespace, + labels={"test": test_label}, + ) + + try: + # Create both clusters + created_cluster_1 = self.api.create_ray_cluster( + body=cluster_body_1, k8s_namespace=namespace + ) + created_cluster_2 = self.api.create_ray_cluster( + body=cluster_body_2, k8s_namespace=namespace + ) + + self.assertIsNotNone(created_cluster_1, "First cluster should be created") + self.assertIsNotNone(created_cluster_2, "Second cluster should be created") + + # List all clusters + clusters_list = self.api.list_ray_clusters(k8s_namespace=namespace) + self.assertIsNotNone(clusters_list, "Should be able to list clusters") + self.assertIn("items", clusters_list, "Response should contain items") + + # Verify our test clusters are in the list + cluster_names = [ + item["metadata"]["name"] for item in clusters_list["items"] + ] + self.assertIn( + cluster_name_1, + cluster_names, + "First test cluster should be in the list", + ) + self.assertIn( + cluster_name_2, + cluster_names, + "Second test cluster should be in the list", + ) + + # Test listing with label selector + labeled_clusters = self.api.list_ray_clusters( + k8s_namespace=namespace, label_selector=f"test={test_label}" + ) + self.assertIsNotNone( + labeled_clusters, "Should be able to list clusters with label selector" + ) + labeled_cluster_names = [ + item["metadata"]["name"] for item in labeled_clusters["items"] + ] + self.assertIn( + cluster_name_1, + labeled_cluster_names, + "First test cluster should match label", + ) + self.assertIn( + cluster_name_2, + labeled_cluster_names, + "Second test cluster should match label", + ) + + finally: + # Clean up both clusters + self.api.delete_ray_cluster(name=cluster_name_1, k8s_namespace=namespace) + self.api.delete_ray_cluster(name=cluster_name_2, k8s_namespace=namespace) + + def test_cluster_status_and_wait_until_running(self): + """Test getting cluster status and waiting for cluster to be ready.""" + cluster_name = "test-status-cluster" + namespace = "default" + + # Build a small cluster + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "status-cluster"}, + ) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + + # Test getting cluster status (may take some time to populate) + status = self.api.get_ray_cluster_status( + name=cluster_name, + k8s_namespace=namespace, + timeout=120, + delay_between_attempts=5, + ) + self.assertIsNotNone(status, "Cluster status should be retrieved") + + # Test waiting for cluster to be running + is_running = self.api.wait_until_ray_cluster_running( + name=cluster_name, + k8s_namespace=namespace, + timeout=180, + delay_between_attempts=10, + ) + self.assertTrue(is_running, "Cluster should become ready within timeout") + + # Verify final status after cluster is ready + final_status = self.api.get_ray_cluster_status( + name=cluster_name, + k8s_namespace=namespace, + timeout=10, + delay_between_attempts=2, + ) + self.assertIsNotNone(final_status, "Final status should be available") + self.assertIn("state", final_status, "Status should contain state field") + self.assertEqual( + final_status["state"], "ready", "Cluster should be in ready state" + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) + + def test_patch_ray_cluster(self): + """Test patching an existing Ray cluster.""" + cluster_name = "test-patch-cluster" + namespace = "default" + + # Build a small cluster + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"test": "patch-cluster"}, + ) + + try: + # Create the cluster + created_cluster = self.api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone( + created_cluster, "Cluster should be created successfully" + ) + + # Wait for cluster to be ready before patching + self.api.wait_until_ray_cluster_running( + name=cluster_name, + k8s_namespace=namespace, + timeout=180, + delay_between_attempts=10, + ) + + # Create a patch to update the cluster (e.g., add a label) + patch_data = { + "metadata": {"labels": {"test": "patch-cluster", "patched": "true"}} + } + + # Apply the patch + patch_result = self.api.patch_ray_cluster( + name=cluster_name, ray_patch=patch_data, k8s_namespace=namespace + ) + self.assertTrue(patch_result, "Patch operation should succeed") + + # Verify the patch was applied + updated_cluster = self.api.get_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.assertIsNotNone(updated_cluster, "Updated cluster should be retrieved") + self.assertIn( + "patched", + updated_cluster["metadata"]["labels"], + "Patched label should be present", + ) + self.assertEqual( + updated_cluster["metadata"]["labels"]["patched"], + "true", + "Patched label should have correct value", + ) + + finally: + # Clean up + self.api.delete_ray_cluster(name=cluster_name, k8s_namespace=namespace) diff --git a/src/codeflare_sdk/vendored/python_client_test/test_director.py b/src/codeflare_sdk/vendored/python_client_test/test_director.py new file mode 100644 index 00000000..07536971 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_director.py @@ -0,0 +1,121 @@ +import unittest +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder + + +class TestDirector(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.director = kuberay_cluster_builder.Director() + + def test_build_basic_cluster(self): + cluster = self.director.build_basic_cluster(name="basic-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "basic-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + def test_build_small_cluster(self): + cluster = self.director.build_small_cluster(name="small-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 1 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "1" + self.assertEqual(actual, expected) + + def test_build_medium_cluster(self): + cluster = self.director.build_medium_cluster(name="medium-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "medium-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 3 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["groupName"] + expected = "medium-cluster-workers" + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + def test_build_large_cluster(self): + cluster = self.director.build_large_cluster(name="large-cluster") + # testing meta + actual = cluster["metadata"]["name"] + expected = "large-cluster" + self.assertEqual(actual, expected) + + actual = cluster["metadata"]["namespace"] + expected = "default" + self.assertEqual(actual, expected) + + # testing the head pod + actual = cluster["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"] + expected = "2" + self.assertEqual(actual, expected) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 6 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["groupName"] + expected = "large-cluster-workers" + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"][ + "containers" + ][0]["resources"]["requests"]["cpu"] + expected = "3" + self.assertEqual(actual, expected) diff --git a/src/codeflare_sdk/vendored/python_client_test/test_job_api.py b/src/codeflare_sdk/vendored/python_client_test/test_job_api.py new file mode 100644 index 00000000..bad75edc --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_job_api.py @@ -0,0 +1,567 @@ +import time +import unittest +from codeflare_sdk.vendored.python_client import kuberay_job_api, kuberay_cluster_api +from codeflare_sdk.vendored.python_client.utils import kuberay_cluster_builder +from helpers import create_job_with_cluster_selector, create_job_with_ray_cluster_spec + +namespace = "default" + + +class TestJobApi(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + + self.api = kuberay_job_api.RayjobApi() + self.cluster_api = kuberay_cluster_api.RayClusterApi() + self.director = kuberay_cluster_builder.Director() + + def test_submit_ray_job_to_existing_cluster(self): + """Test submitting a job to an existing cluster using clusterSelector.""" + cluster_name = "premade" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + self.assertIsNotNone(cluster_body, "Cluster should be built successfully") + self.assertEqual(cluster_body["metadata"]["name"], cluster_name) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + job_name = "premade-cluster-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + self.assertEqual(submitted_job["metadata"]["name"], job_name) + self.assertEqual( + submitted_job["spec"]["clusterSelector"]["ray.io/cluster"], cluster_name + ) + + self.api.wait_until_job_finished(job_name, namespace, 120, 10) + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + + self.api.delete_job(job_name, namespace) + + def test_get_job_status(self): + """Test getting job status for a running job.""" + cluster_name = "status-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + + job_name = "status-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + status = self.api.get_job_status( + job_name, namespace, timeout=30, delay_between_attempts=2 + ) + self.assertIsNotNone(status, "Job status should be retrieved") + + # Verify expected status fields + self.assertIn( + "jobDeploymentStatus", + status, + "Status should contain jobDeploymentStatus field", + ) + self.assertIn("jobId", status, "Status should contain jobId field") + self.assertIn( + "rayClusterName", status, "Status should contain rayClusterName field" + ) + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + final_status = self.api.get_job_status( + job_name, namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone(final_status, "Final job status should be retrieved") + + self.assertIn( + "jobDeploymentStatus", + final_status, + "Final status should contain jobDeploymentStatus field", + ) + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.api.delete_job(job_name, namespace) + + def test_wait_until_job_finished(self): + """Test waiting for job completion.""" + cluster_name = "wait-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running( + cluster_name, namespace, 180, 10 + ) + + job_name = "wait-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=180, delay_between_attempts=2 + ) + self.assertTrue(result, "Job should complete successfully within timeout") + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + self.api.delete_job(job_name, namespace) + + def test_delete_job(self): + """Test deleting a job.""" + cluster_name = "delete-test-cluster" + + cluster_body = self.director.build_small_cluster( + name=cluster_name, + k8s_namespace=namespace, + labels={"ray.io/cluster": cluster_name}, + ) + + created_cluster = self.cluster_api.create_ray_cluster( + body=cluster_body, k8s_namespace=namespace + ) + self.assertIsNotNone(created_cluster, "Cluster should be created successfully") + + self.cluster_api.wait_until_ray_cluster_running(cluster_name, namespace, 60, 10) + + job_name = "delete-test-job" + try: + job_body = create_job_with_cluster_selector( + job_name, + namespace, + cluster_name, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + delete_result = self.api.delete_job(job_name, namespace) + self.assertTrue(delete_result, "Job should be deleted successfully") + + finally: + self.cluster_api.delete_ray_cluster( + name=cluster_name, k8s_namespace=namespace + ) + + def test_get_job_status_nonexistent_job(self): + """Test getting status for a non-existent job.""" + status = self.api.get_job_status( + "nonexistent-job", namespace, timeout=2, delay_between_attempts=2 + ) + self.assertIsNone(status, "Status should be None for non-existent job") + + def test_wait_until_job_finished_nonexistent_job(self): + """Test waiting for completion of a non-existent job.""" + result = self.api.wait_until_job_finished( + "nonexistent-job", namespace, timeout=2, delay_between_attempts=2 + ) + self.assertFalse(result, "Should return False for non-existent job") + + def test_delete_job_nonexistent_job(self): + """Test deleting a non-existent job.""" + result = self.api.delete_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_submit_job_invalid_spec(self): + """Test submitting a job with invalid specification.""" + invalid_job = { + "apiVersion": "invalid/version", + "kind": "InvalidKind", + "metadata": { + "name": "invalid-job", + "namespace": namespace, + }, + "spec": { + "invalidField": "invalidValue", + }, + } + + result = self.api.submit_job(job=invalid_job, k8s_namespace=namespace) + self.assertIsNone(result, "Should return None for invalid job specification") + + def test_submit_job_with_ray_cluster_spec(self): + """Test submitting a job with rayClusterSpec - KubeRay will create and manage the cluster lifecycle.""" + job_name = "cluster-spec-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + self.assertEqual(submitted_job["metadata"]["name"], job_name) + + # Verify rayClusterSpec structure + self.assertIn( + "rayClusterSpec", + submitted_job["spec"], + "Job should have rayClusterSpec", + ) + self.assertIn( + "headGroupSpec", + submitted_job["spec"]["rayClusterSpec"], + "rayClusterSpec should have headGroupSpec", + ) + self.assertIn( + "workerGroupSpecs", + submitted_job["spec"]["rayClusterSpec"], + "rayClusterSpec should have workerGroupSpecs", + ) + + result = self.api.wait_until_job_finished(job_name, namespace, 300, 10) + self.assertTrue(result, "Job should complete successfully within timeout") + + final_status = self.api.get_job_status( + job_name, namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone(final_status, "Final job status should be retrieved") + self.assertIn( + "jobDeploymentStatus", + final_status, + "Final status should contain jobDeploymentStatus field", + ) + + finally: + self.api.delete_job(job_name, namespace) + + def test_suspend_job(self): + """Test stopping a running job.""" + job_name = "stop-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Job should reach running state before suspension") + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue(suspended, "Job deployment status should be Suspended") + + finally: + self.api.delete_job(job_name, namespace) + + def test_resubmit_job(self): + """Test resubmitting a suspended job.""" + job_name = "resubmit-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Job should reach running state before suspension") + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue( + suspended, "Job should be in Suspended status before resubmission" + ) + + resubmit_result = self.api.resubmit_job(job_name, namespace) + self.assertTrue(resubmit_result, "Job should be resubmitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Resubmitted job should complete successfully") + + finally: + self.api.delete_job(job_name, namespace) + + def test_stop_and_resubmit_job(self): + """Test the full stop and resubmit cycle.""" + job_name = "stop-resubmit-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue( + result, + "Job should reach running state before suspension, completion, or failure", + ) + + stop_result = self.api.suspend_job(job_name, namespace) + self.assertTrue(stop_result, "Job should be suspended successfully") + + suspended = self.wait_for_job_status( + job_name, namespace, "Suspended", timeout=30 + ) + self.assertTrue( + suspended, "Job should reach Suspended status within 30 seconds" + ) + + resubmit_result = self.api.resubmit_job(job_name, namespace) + self.assertTrue(resubmit_result, "Job should be resubmitted successfully") + + result = self.api.wait_until_job_finished( + job_name, namespace, timeout=120, delay_between_attempts=5 + ) + self.assertTrue(result, "Resubmitted job should complete successfully") + + finally: + self.api.delete_job(job_name, namespace) + + def test_suspend_job_nonexistent(self): + """Test stopping a non-existent job.""" + result = self.api.suspend_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_resubmit_job_nonexistent(self): + """Test resubmitting a non-existent job.""" + result = self.api.resubmit_job("nonexistent-job", namespace) + self.assertFalse(result, "Should return False for non-existent job") + + def test_wait_until_job_running(self): + """Test waiting for a job to reach running state.""" + job_name = "wait-running-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + result = self.api.wait_until_job_running( + job_name, namespace, timeout=60, delay_between_attempts=3 + ) + self.assertTrue(result, "Job should reach running state") + + self.api.wait_until_job_finished(job_name, namespace, 60, 5) + + finally: + self.api.delete_job(job_name, namespace) + + def test_get_job(self): + """Test getting a job.""" + job_name = "get-test-job" + + try: + job_body = create_job_with_ray_cluster_spec( + job_name=job_name, + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone(submitted_job, "Job should be submitted successfully") + + status = self.api.get_job_status( + job_name, namespace, timeout=30, delay_between_attempts=2 + ) + self.assertIsNotNone(status, "Job status should be available") + + job = self.api.get_job(job_name, namespace) + self.assertIsNotNone(job, "Job should be retrieved successfully") + self.assertEqual(job["metadata"]["name"], job_name) + finally: + self.api.delete_job(job_name, namespace) + + def test_list_jobs(self): + """Test listing all jobs in a namespace.""" + created_jobs = [] + + try: + initial_result = self.api.list_jobs(k8s_namespace=namespace) + self.assertIsNotNone(initial_result, "List jobs should return a result") + self.assertIn( + "items", initial_result, "Result should contain 'items' field" + ) + initial_count = len(initial_result.get("items", [])) + + test_jobs = [ + {"name": "list-test-job-1", "type": "cluster_spec"}, + {"name": "list-test-job-2", "type": "cluster_spec"}, + {"name": "list-test-job-3", "type": "cluster_spec"}, + ] + + for job_info in test_jobs: + job_body = create_job_with_ray_cluster_spec( + job_name=job_info["name"], + namespace=namespace, + ) + + submitted_job = self.api.submit_job( + job=job_body, + k8s_namespace=namespace, + ) + self.assertIsNotNone( + submitted_job, + f"Job {job_info['name']} should be submitted successfully", + ) + created_jobs.append(job_info["name"]) + + status = self.api.get_job_status( + job_info["name"], namespace, timeout=10, delay_between_attempts=1 + ) + self.assertIsNotNone( + status, f"Job {job_info['name']} status should be available" + ) + + result = self.api.list_jobs(k8s_namespace=namespace) + self.assertIsNotNone(result, "List jobs should return a result") + self.assertIn("items", result, "Result should contain 'items' field") + + items = result.get("items", []) + current_count = len(items) + + self.assertGreaterEqual( + current_count, + initial_count + len(test_jobs), + f"Should have at least {len(test_jobs)} more jobs than initially", + ) + + job_names_in_list = [item.get("metadata", {}).get("name") for item in items] + for job_name in created_jobs: + self.assertIn( + job_name, job_names_in_list, f"Job {job_name} should be in the list" + ) + + finally: + for job_name in created_jobs: + try: + self.api.delete_job(job_name, namespace) + except Exception as e: + print(f"Failed to delete job {job_name}: {e}") + + def wait_for_job_status( + self, job_name, namespace, expected_status, timeout=60, check_interval=3 + ): + """Wait for a job to reach a specific status with polling.""" + start_time = time.time() + while time.time() - start_time < timeout: + status = self.api.get_job_status( + job_name, namespace, timeout=5, delay_between_attempts=1 + ) + current_status = status.get("jobDeploymentStatus") if status else None + + if current_status == expected_status: + return True + + time.sleep(check_interval) + + return False diff --git a/src/codeflare_sdk/vendored/python_client_test/test_utils.py b/src/codeflare_sdk/vendored/python_client_test/test_utils.py new file mode 100644 index 00000000..93d79db9 --- /dev/null +++ b/src/codeflare_sdk/vendored/python_client_test/test_utils.py @@ -0,0 +1,352 @@ +import unittest +import copy +from codeflare_sdk.vendored.python_client.utils import ( + kuberay_cluster_utils, + kuberay_cluster_builder, +) + + +test_cluster_body: dict = { + "apiVersion": "ray.io/v1", + "kind": "RayCluster", + "metadata": { + "labels": {"controller-tools.k8s.io": "1.0"}, + "name": "raycluster-complete-raw", + }, + "spec": { + "rayVersion": "2.46.0", + "headGroupSpec": { + "rayStartParams": {"dashboard-host": "0.0.0.0"}, + "template": { + "metadata": {"labels": {}}, + "spec": { + "containers": [ + { + "name": "ray-head", + "image": "rayproject/ray:2.46.0", + "ports": [ + {"containerPort": 6379, "name": "gcs"}, + {"containerPort": 8265, "name": "dashboard"}, + {"containerPort": 10001, "name": "client"}, + ], + "lifecycle": { + "preStop": { + "exec": {"command": ["/bin/sh", "-c", "ray stop"]} + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "2G"}, + "requests": {"cpu": "500m", "memory": "2G"}, + }, + } + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + }, + }, + }, + "workerGroupSpecs": [ + { + "replicas": 1, + "minReplicas": 1, + "maxReplicas": 10, + "groupName": "small-group", + "rayStartParams": {}, + "template": { + "spec": { + "containers": [ + { + "name": "ray-worker", + "image": "rayproject/ray:2.46.0", + "lifecycle": { + "preStop": { + "exec": { + "command": ["/bin/sh", "-c", "ray stop"] + } + } + }, + "volumeMounts": [ + {"mountPath": "/tmp/ray", "name": "ray-logs"} + ], + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + { + "name": "side-car", + "image": "rayproject/ray:2.46.0", + "resources": { + "limits": {"cpu": "1", "memory": "1G"}, + "requests": {"cpu": "500m", "memory": "1G"}, + }, + }, + ], + "volumes": [{"name": "ray-logs", "emptyDir": {}}], + } + }, + } + ], + }, + "status": { + "availableWorkerReplicas": 2, + "desiredWorkerReplicas": 1, + "endpoints": {"client": "10001", "dashboard": "8265", "gcs-server": "6379"}, + "head": {"serviceIP": "10.152.183.194"}, + "lastUpdateTime": "2023-02-16T05:15:17Z", + "maxWorkerReplicas": 2, + }, +} + + +class TestUtils(unittest.TestCase): + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) + self.director = kuberay_cluster_builder.Director() + self.utils = kuberay_cluster_utils.ClusterUtils() + + def test_populate_worker_group(self): + worker_group, succeeded = self.utils.populate_worker_group( + group_name="small-group", + ray_image="rayproject/ray:2.46.0", + ray_command=["/bin/bash", "-lc"], + init_image="busybox:1.28", + cpu_requests="3", + memory_requests="1G", + cpu_limits="5", + memory_limits="10G", + replicas=1, + min_replicas=1, + max_replicas=3, + ray_start_params={"block": "True"}, + ) + self.assertIsNotNone(worker_group) + self.assertEqual(succeeded, True) + + self.assertEqual(worker_group["groupName"], "small-group") + self.assertEqual(worker_group["maxReplicas"], 3) + self.assertEqual(worker_group["minReplicas"], 1) + self.assertEqual(worker_group["rayStartParams"], {"block": "True"}) + self.assertEqual(worker_group["replicas"], 1) + + container = worker_group["template"]["spec"]["containers"][0] + self.assertEqual(container["image"], "rayproject/ray:2.46.0") + self.assertEqual(container["command"], ["/bin/bash", "-lc"]) + + resources = container["resources"] + self.assertEqual(resources["requests"]["cpu"], "3") + self.assertEqual(resources["requests"]["memory"], "1G") + self.assertEqual(resources["limits"]["cpu"], "5") + self.assertEqual(resources["limits"]["memory"], "10G") + + # min_replicas can be 0 and ray_start_params can be an empty dict. + worker_group, succeeded = self.utils.populate_worker_group( + group_name="small-group", + ray_image="rayproject/ray:2.46.0", + ray_command=["/bin/bash", "-lc"], + init_image="busybox:1.28", + cpu_requests="3", + memory_requests="1G", + cpu_limits="5", + memory_limits="10G", + replicas=1, + min_replicas=0, + max_replicas=3, + ray_start_params={}, + ) + self.assertIsNotNone(worker_group) + self.assertEqual(succeeded, True) + self.assertEqual(worker_group["rayStartParams"], {}) + self.assertEqual(worker_group["minReplicas"], 0) + + def test_update_worker_group_replicas(self): + cluster = self.director.build_small_cluster(name="small-cluster") + + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.update_worker_group_replicas( + cluster, + group_name="small-cluster-workers", + max_replicas=10, + min_replicas=1, + replicas=5, + ) + + self.assertEqual(succeeded, True) + + # testing the workergroup + actual = cluster["spec"]["workerGroupSpecs"][0]["replicas"] + expected = 5 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["maxReplicas"] + expected = 10 + self.assertEqual(actual, expected) + + actual = cluster["spec"]["workerGroupSpecs"][0]["minReplicas"] + expected = 1 + self.assertEqual(actual, expected) + + def test_update_worker_group_resources(self): + cluster: dict = copy.deepcopy(test_cluster_body) + actual = cluster["metadata"]["name"] + expected = "raycluster-complete-raw" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="3", + memory_requests="5G", + cpu_limits="5", + memory_limits="10G", + container_name="unspecified", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"], + "3", + ) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["cpu"], + "500m", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="5G", + cpu_limits="5", + memory_limits="10G", + container_name="side-car", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["cpu"], + "4", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="15G", + cpu_limits="5", + memory_limits="25G", + container_name="all_containers", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][1][ + "resources" + ]["requests"]["memory"], + "15G", + ) + + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + memory_requests="15G", + cpu_limits="5", + memory_limits="25G", + container_name="wrong_name", + ) + self.assertEqual(succeeded, False) + + # missing parameter test + with self.assertRaises(TypeError): + cluster, succeeded = self.utils.update_worker_group_resources( + cluster, + group_name="small-group", + cpu_requests="4", + ) + + def test_duplicate_worker_group(self): + cluster = self.director.build_small_cluster(name="small-cluster") + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.duplicate_worker_group( + cluster, + group_name="small-cluster-workers", + new_group_name="new-small-group-workers", + ) + self.assertEqual(succeeded, True) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][1]["groupName"], + "new-small-group-workers", + ) + self.assertEqual( + cluster["spec"]["workerGroupSpecs"][1]["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"], + "1", + ) + + # missing parameter test + with self.assertRaises(TypeError): + cluster, succeeded = self.utils.duplicate_worker_group( + cluster, + group_name="small-cluster-workers", + ) + + def test_delete_worker_group(self): + """ + Test delete_worker_group + """ + cluster = self.director.build_small_cluster(name="small-cluster") + actual = cluster["metadata"]["name"] + expected = "small-cluster" + self.assertEqual(actual, expected) + + cluster, succeeded = self.utils.delete_worker_group( + cluster, + group_name="small-cluster-workers", + ) + self.assertEqual(succeeded, True) + self.assertEqual(len(cluster["spec"]["workerGroupSpecs"]), 0) + + # deleting the same worker group again should fail + with self.assertRaises(AssertionError): + cluster, succeeded = self.utils.delete_worker_group( + cluster, + group_name="small-cluster-workers", + ) + + def test_name(self): + self.assertEqual(self.utils.is_valid_name("name"), True) + self.assertEqual(self.utils.is_valid_name("name-"), False) + self.assertEqual(self.utils.is_valid_name(".name"), False) + self.assertEqual(self.utils.is_valid_name("name_something"), False) + self.assertEqual( + self.utils.is_valid_name( + "toooooooooooooooooooooooooooooooooooooooooo-loooooooooooooooooooong" + ), + False, + ) + + def test_label(self): + self.assertEqual(self.utils.is_valid_label("name"), True) + self.assertEqual(self.utils.is_valid_label("name-"), False) + self.assertEqual(self.utils.is_valid_label(".name"), False) + self.assertEqual(self.utils.is_valid_label("name_something"), True) + self.assertEqual(self.utils.is_valid_label("good.name"), True) + self.assertEqual( + self.utils.is_valid_label( + "toooooooooooooooooooooooooooooooooooooooooo-loooooooooooooooooooong" + ), + False, + ) diff --git a/tests/e2e/rayjob/rayjob_existing_cluster_test.py b/tests/e2e/rayjob/rayjob_existing_cluster_test.py index 82858d28..8f6f0c3b 100644 --- a/tests/e2e/rayjob/rayjob_existing_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_existing_cluster_test.py @@ -12,7 +12,7 @@ ) from codeflare_sdk import RayJob, TokenAuthentication from codeflare_sdk.ray.rayjobs.status import CodeflareRayJobStatus -from python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi class TestRayJobExistingCluster: diff --git a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py index 10390011..2256f06f 100644 --- a/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py +++ b/tests/e2e/rayjob/rayjob_lifecycled_cluster_test.py @@ -10,8 +10,8 @@ from codeflare_sdk import RayJob, ManagedClusterConfig from kubernetes import client -from python_client.kuberay_job_api import RayjobApi -from python_client.kuberay_cluster_api import RayClusterApi +from codeflare_sdk.vendored.python_client.kuberay_job_api import RayjobApi +from codeflare_sdk.vendored.python_client.kuberay_cluster_api import RayClusterApi class TestRayJobLifecycledCluster: