In [None]:
# Copyright  2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Ray core operation examples

### Configurations

In [1]:
! pip install --user -q "google-cloud-aiplatform[ray]>=1.56.0" \
                        "ray[data,train,tune,serve]>=2.9.3" \
                        "xgboost_ray"

In [2]:
# @title Define constants
PROJECT_NBR = "721521243942"
PROJECT_ID = "ai-hangsik"
REGION="us-central1"
RAY_CLUSTER_NM = "ray293-cluster-20250217-075541"

In [3]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

In [4]:
from typing import Tuple

from ray.runtime_env import RuntimeEnv
from ray.air.config import RunConfig
from ray.air import CheckpointConfig, ScalingConfig
from ray.util.joblib import register_ray


import ray
from ray.data import Dataset, Preprocessor
from ray.data.preprocessors import StandardScaler
from ray.train.xgboost import XGBoostTrainer
from ray.train import Result, ScalingConfig
# import xgboost


In [5]:
ray.__version__

'2.9.3'

### Connect to Ray on Vertex AI

In [6]:

ray.shutdown()

In [8]:
RAY_ADDRESS=f"vertex_ray://projects/{PROJECT_NBR}/locations/{REGION}/persistentResources/{RAY_CLUSTER_NM}"
print(f"RAY_ADDRESS:{RAY_ADDRESS}")

RUNTIME_ENV = {
  "pip": [
        "google-cloud-aiplatform[ray]>=1.56.0",
        "ray[data]==2.9.3",
        "ray[train]==2.9.3",
        "ray[tune]==2.9.3",
        "torch==2.1.2",
        "torchvision==0.16.2",
        "torchmetrics==1.2.1",
        "setuptools==69.5.1",
        "ipython",
        "xgboost_ray",
        
  ],
}

ray.init(address=RAY_ADDRESS,runtime_env=RUNTIME_ENV)

RAY_ADDRESS:vertex_ray://projects/721521243942/locations/us-central1/persistentResources/ray293-cluster-20250217-075541
[Ray on Vertex AI]: Cluster State = State.RUNNING


ConnectionError: ray client connection timeout

### Training

In [8]:
from typing import Tuple

import ray
from ray.data import Dataset, Preprocessor
from ray.data.preprocessors import StandardScaler
from ray.train.xgboost import XGBoostTrainer
from ray.train import Result, ScalingConfig
# import xgboost

ModuleNotFoundError: No module named 'xgboost_ray'

In [9]:
@ray.remote
def train_xgboost(num_workers: int, use_gpu: bool = False) -> Result:
    
    dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
    train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
    test_dataset = valid_dataset.drop_columns(["target"])    
    
    # train_dataset, valid_dataset, _ = prepare_data()

    # Scale some random columns
    columns_to_scale = ["mean radius", "mean texture"]
    preprocessor = StandardScaler(columns=columns_to_scale)
    train_dataset = preprocessor.fit_transform(train_dataset)
    valid_dataset = preprocessor.transform(valid_dataset)

    # XGBoost specific params
    params = {
        "tree_method": "approx",
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    }

    trainer = XGBoostTrainer(
        scaling_config=ScalingConfig(num_workers=num_workers, use_gpu=use_gpu),
        label_column="target",
        params=params,
        datasets={"train": train_dataset, "valid": valid_dataset},
        num_boost_round=100,
        metadata = {"preprocessor_pkl": preprocessor.serialize()}
    )
    result = trainer.fit()
    print(result.metrics)

    return result

In [10]:
train_xgboost.remote(num_workers=2, use_gpu=False)


# result = train_xgboost(num_workers=2, use_gpu=False)

ClientObjectRef(912e96ffd8909c19ffffffffffffffffffffffff0c00000001000000)

In [13]:
import pandas as pd
from ray.train import Checkpoint

# @ray.remote
class Predict:

    def __init__(self, checkpoint: Checkpoint):
        self.model = XGBoostTrainer.get_model(checkpoint)
        self.preprocessor = Preprocessor.deserialize(checkpoint.get_metadata()["preprocessor_pkl"])

    def __call__(self, batch: pd.DataFrame) -> pd.DataFrame:
        preprocessed_batch = self.preprocessor.transform_batch(batch)
        dmatrix = xgboost.DMatrix(preprocessed_batch)
        return {"predictions": self.model.predict(dmatrix)}

# @ray.remote
def predict_xgboost(result: Result):
    # _, _, test_dataset = prepare_data()

    dataset = ray.data.read_csv("s3://anonymous@air-example-data/breast_cancer.csv")
    train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)
    test_dataset = valid_dataset.drop_columns(["target"])        
    
    scores = test_dataset.map_batches(
        Predict, 
        fn_constructor_args=[result.checkpoint], 
        concurrency=1, 
        batch_format="pandas"
    )
    
    predicted_labels = scores.map_batches(lambda df: (df > 0.5).astype(int), batch_format="pandas")
    print(f"PREDICTED LABELS")
    predicted_labels.show()

TypeError: Remote functions cannot be called directly. Instead of running '__main__.train_xgboost()', try '__main__.train_xgboost.remote()'.

### Ray Basic concept

#### Basic Operation

In [9]:
import time

# Define the square task.
@ray.remote
def square(x):
    time.sleep(3)
    return x * x

# Launch four parallel square tasks.
futures = [square.remote(i) for i in range(3)]

# Retrieve results.
print(ray.get(futures))
# -> [0, 1, 4,]

[0, 1, 4]


#### Define actor and operate

In [10]:
# Define the Counter actor.
@ray.remote
class Counter:
    def __init__(self):
        self.i = 0

    def get(self):
        return self.i

    def increase(self, value):
        self.i += value

# Create a Counter actor.
c = Counter.remote()

# Submit calls to the actor. These calls run asynchronously but in
# submission order on the remote actor process.
for _ in range(10):
    c.increase.remote(1)

# Retrieve final actor state.
print(ray.get(c.get.remote()))
# -> 10

10


#### Passing an Object

In [13]:
import numpy as np

# Define a task that sums the values in a matrix.
@ray.remote
def sum_matrix(matrix):
    return np.sum(matrix)

# Call the task with a literal argument value.
print(ray.get(sum_matrix.remote(np.ones((100, 100)))))
# -> 10000.0

# Put a large array into the object store.
matrix_ref = ray.put(np.ones((1000, 1000)))

# Call the task with the object reference as an argument.
print(ray.get(sum_matrix.remote(matrix_ref)))
# -> 1000000.0

10000.0
1000000.0


### Tasks
* https://docs.ray.io/en/latest/ray-core/tasks.html

In [11]:
import ray
import time


# A regular Python function.
def normal_function():
    return 1


# By adding the `@ray.remote` decorator, a regular Python function
# becomes a Ray remote function.
@ray.remote
def my_function():
    return 1


# To invoke this remote function, use the `remote` method.
# This will immediately return an object ref (a future) and then create
# a task that will be executed on a worker process.
obj_ref = my_function.remote()

# The result can be retrieved with ``ray.get``.
assert ray.get(obj_ref) == 1

In [20]:
@ray.remote
def slow_function():
    time.sleep(10)
    return 1


# Ray tasks are executed in parallel.
# All computation is performed in the background, driven by Ray's internal event loop.
for _ in range(4):
    # This doesn't block.
    slow_function.remote()

#### Specifying required resources

In [12]:
# Specify required resources.
@ray.remote(num_cpus=4, num_gpus=2)
def my_function():
    return 1

# Override the default resource requirements.
my_function.options(num_cpus=3).remote()

ClientObjectRef(1e360ffa862f8fe3ffffffffffffffffffffffff0100000001000000)

#### Passing object refs to Ray taks

* myfunction --> refs --> function with an argument --> refs.

In [13]:
@ray.remote
def my_function():
    return 1

@ray.remote
def function_with_an_argument(value):
    return value + 1

obj_ref1 = my_function.remote()
result1 = ray.get(obj_ref1)
print(result1)

# You can pass an object ref as an argument to another Ray task.
obj_ref2 = function_with_an_argument.remote(obj_ref1)
result2 = ray.get(obj_ref2)
print(result2)

1
2


#### wait for the slow function.

In [15]:
@ray.remote
def slow_function():
    time.sleep(10)
    return 1


object_refs = [slow_function.remote() for _ in range(2)]
# Return as soon as one of the tasks finished execution.
ready_refs, remaining_refs = ray.wait(object_refs, num_returns=1, timeout=None)

print(f"ready_refs:{ready_refs}")
print(f"remaining_refs:{remaining_refs}")


ready_refs:[ClientObjectRef(c76a79b2875a7251ffffffffffffffffffffffff0100000001000000)]
remaining_refs:[ClientObjectRef(465c0fb8d6cb3cdcffffffffffffffffffffffff0100000001000000)]


#### Multiple resturns

In [16]:
# By default, a Ray task only returns a single Object Ref.
@ray.remote
def return_single():
    return 0, 1, 2  # tuple return.


object_ref = return_single.remote()
assert ray.get(object_ref) == (0, 1, 2)


# However, you can configure Ray tasks to return multiple Object Refs.
@ray.remote(num_returns=3)
def return_multiple():
    return 0, 1, 2


object_ref0, object_ref1, object_ref2 = return_multiple.remote()

ray.get(object_ref0), ray.get(object_ref1), ray.get(object_ref2)

(0, 1, 2)

### Actors

#### Actors basic

In [48]:
import ray

#@ray.remote(num_cpus=1, num_gpus=1) # specify the resources
@ray.remote # specify the resources
class Counter:
    def __init__(self):
        self.value = 20

    def increment(self, num):
        self.value = self.value + num
        return self.value

    def get_counter(self):
        return self.value

# Create an actor from this class.
counter = Counter.remote()

# Call the actor.
obj_ref = counter.increment.remote(10)
print(ray.get(obj_ref))



30


#### Multiple operations

In [52]:
# Create ten Counter actors.
counters = [Counter.remote() for _ in range(10)]

# Increment each Counter once and get the results. These tasks all happen in parallel.
results = ray.get([c.increment.remote(10) for c in counters])
print(results)


[30, 30, 30, 30, 30, 30, 30, 30, 30, 30]


In [53]:
# Increment the first Counter five times. These tasks are executed serially and share state.
results = ray.get([counters[0].increment.remote(20) for _ in range(10)])
print(results)

[50, 70, 90, 110, 130, 150, 170, 190, 210, 230]
