In [1]:
# Import pieces from codeflare-sdk
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication

In [None]:
# Create authentication object and log in to desired user account (if not already authenticated)
auth = TokenAuthentication(
    token = "XXXX",
    server = "XXXX",
    skip_tls = False
)
auth.login()


NOTE: 'quay.io/rhoai/ray:2.23.0-py39-cu121' is the default community image used by the CodeFlare SDK for creating a RayCluster resource. 
If you have your own Ray image which suits your purposes, specify it in image field to override the default image.

In [None]:
# Create and submit our cluster
# The SDK will try to find the name of your default local queue based on the annotation "kueue.x-k8s.io/default-queue": "true" unless you specify the local queue manually below
cluster_name = "hfgputest-1"

cluster = Cluster(ClusterConfiguration(
                                       name=cluster_name,
                                       head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus
                                       num_gpus=0,
                                       num_workers=1,
                                       min_cpus=1,
                                       max_cpus=1,
                                       min_memory=4,
                                       max_memory=4,
                                       # image="", # Optional Field 
                                       write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources 
                                       # local_queue="local-queue-name" # Specify the local queue manually
                                       ))

In [3]:
cluster.up()

In [4]:
cluster.wait_ready()

Waiting for requested resources to be set up...
Requested cluster up and running!


### Connect via the rayclient route

In [None]:
from codeflare_sdk import generate_cert

generate_cert.generate_tls_cert(cluster_name, cluster.config.namespace)
generate_cert.export_env(cluster_name, cluster.config.namespace)

In [6]:
import ray

ray.shutdown()
ray.init(address=cluster.local_client_url(), logging_level="DEBUG")

2023-06-27 19:14:16,088	INFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level
2023-06-27 19:14:16,100	DEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE
2023-06-27 19:14:16,308	DEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING
2023-06-27 19:14:16,434	DEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY
2023-06-27 19:14:16,436	DEBUG worker.py:807 -- Pinging server.
2023-06-27 19:14:18,634	DEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000
2023-06-27 19:14:18,635	DEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x01\x00\x00\x00'
2023-06-27 19:14:18,645	DEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000
2023-06-27 19:14:19,454	DEBUG worker.py:636 -- Releasing c8

0,1
Python version:,3.8.13
Ray version:,2.1.0
Dashboard:,http://10.254.20.41:8265


In [7]:
import math
import ray

@ray.remote
def heavy_calculation_part(num_iterations):
    result = 0.0
    for i in range(num_iterations):
        for j in range(num_iterations):
            for k in range(num_iterations):
                result += math.sin(i) * math.cos(j) * math.tan(k)
    return result
@ray.remote
def heavy_calculation(num_iterations):
    results = ray.get([heavy_calculation_part.remote(num_iterations//30) for _ in range(30)])
    return sum(results)


In [8]:
ref = heavy_calculation.remote(3000)

2023-06-27 19:14:28,222	DEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000
2023-06-27 19:14:28,222	DEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x02\x00\x00\x00'


In [9]:
ray.get(ref)

2023-06-27 19:14:29,202	DEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000
2023-06-27 19:14:31,224	DEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]


1789.4644387076714

In [10]:
ray.cancel(ref)
ray.shutdown()

2023-06-27 19:14:33,161	DEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {
}

2023-06-27 19:14:34,460	DEBUG dataclient.py:278 -- Shutting down data channel.


In [11]:
cluster.down()