In [1]:
%load_ext autoreload
%autoreload 2
from tqdm import tqdm_notebook
from ipywidgets import FloatProgress
from IPython.display import display

In [2]:
import ray
ray.init()

2023-10-05 18:11:29,161	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.9.16
Ray version:,2.2.0
Dashboard:,http://127.0.0.1:8265


### Ray Datasets

In [3]:
items = [{"name": str(i), "data": i} for i in range(10000)]

In [4]:
ds = ray.data.from_items(items)

In [5]:
squares = ds.map(lambda x: x["data"] ** 2)

Map: 100%|████████████████████████████████████| 200/200 [00:02<00:00, 81.04it/s]


In [6]:
squares

VBox(children=(HTML(value='<h2>Dataset</h2>'), Tab(children=(HTML(value='<div class="scrollableTable jp-Render…

In [7]:
evens = squares.filter(lambda x: x % 2 == 0)

Filter: 100%|███████████████████████████████| 200/200 [00:00<00:00, 1291.87it/s]


In [8]:
evens

VBox(children=(HTML(value='<h2>Dataset</h2>'), Tab(children=(HTML(value='<div class="scrollableTable jp-Render…

In [9]:
cubes = evens.flat_map(lambda x: [x, x**3])

Flat_Map: 100%|█████████████████████████████| 200/200 [00:00<00:00, 1068.58it/s]


In [10]:
cubes

VBox(children=(HTML(value='<h2>Dataset</h2>'), Tab(children=(HTML(value='<div class="scrollableTable jp-Render…

In [11]:
sample = cubes.take(10)

In [12]:
print(sample)

[0, 0, 4, 64, 16, 4096, 36, 46656, 64, 262144]


### Ray DatasetPipeline

In [13]:
pipe = ds.window()

2023-10-05 18:12:16,240	INFO dataset.py:3693 -- Created DatasetPipeline with 20 windows: 7390b min, 8000b max, 7944b mean
2023-10-05 18:12:16,241	INFO dataset.py:3703 -- Blocks per window: 10 min, 10 max, 10 mean
2023-10-05 18:12:16,243	INFO dataset.py:3742 -- ✔️  This pipeline's windows likely fit in object store memory without spilling.


In [14]:
result = pipe\
            .map(lambda x: x["data"] ** 2)\
            .filter(lambda x: x % 2 == 0)\
            .flat_map(lambda x: [x, x ** 3])
result.show(10)

Stage 0:   0%|                                           | 0/20 [00:00<?, ?it/s]
  0%|                                                    | 0/20 [00:00<?, ?it/s][A
Stage 1:   5%|█▊                                 | 1/20 [00:00<00:02,  8.94it/s][A
Stage 0:  10%|███▌                               | 2/20 [00:00<00:01, 17.40it/s]

0
0
4
64
16
4096
36
46656
64
262144





In [15]:
import gym

In [16]:
!rllib example get cartpole-ppo

/bin/bash: /home/cdsw/.local/bin/rllib: /opt/conda/bin/python3.10: bad interpreter: No such file or directory


In [17]:
from ray import tune
import math
import time

  VALID_NP_HPARAMS = (np.bool8, np.float32, np.float64, np.int32, np.int64)


In [18]:
def training_function(config):
    x, y = config["x"], config["y"]
    time.sleep(10)
    score = objective(x,y)
    tune.report(score=score)

def objective(x,y):
    return math.sqrt((x**2 + y**2)/2)

In [19]:
result = tune.run(
    training_function,
    config={
        "x": tune.grid_search([-1, -.5, 0, .5, 1]),
        "y": tune.grid_search([-1, -.5, 0, .5, 1])
    })
print(result)

0,1
Current time:,2023-10-05 18:13:13
Running for:,00:00:31.62
Memory:,7.3/62.0 GiB

Trial name,status,loc,x,y,iter,total time (s),score
training_function_c5fac_00000,TERMINATED,100.100.137.165:1982,-1.0,-1.0,1,10.0486,1.0
training_function_c5fac_00001,TERMINATED,100.100.137.165:2067,-0.5,-1.0,1,10.0488,0.790569
training_function_c5fac_00002,TERMINATED,100.100.137.165:2069,0.0,-1.0,1,10.0463,0.707107
training_function_c5fac_00003,TERMINATED,100.100.137.165:2071,0.5,-1.0,1,10.0502,0.790569
training_function_c5fac_00004,TERMINATED,100.100.137.165:2073,1.0,-1.0,1,10.0496,1.0
training_function_c5fac_00005,TERMINATED,100.100.137.165:2090,-1.0,-0.5,1,10.0495,0.790569
training_function_c5fac_00006,TERMINATED,100.100.137.165:2123,-0.5,-0.5,1,10.0466,0.5
training_function_c5fac_00007,TERMINATED,100.100.137.165:2124,0.0,-0.5,1,10.0475,0.353553
training_function_c5fac_00008,TERMINATED,100.100.137.165:2126,0.5,-0.5,1,10.0483,0.5
training_function_c5fac_00009,TERMINATED,100.100.137.165:2143,1.0,-0.5,1,10.047,0.790569


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,node_ip,pid,score,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
training_function_c5fac_00000,2023-10-05_18-12-55,True,,30665c1445ab4b8a91cd627afb4d253e,"0_x=-1,y=-1",0gnd39tczbn8ubas,1,100.100.137.165,1982,1.0,10.0486,10.0486,10.0486,1696529575,0,,1,c5fac_00000,0.00277567
training_function_c5fac_00001,2023-10-05_18-13-02,True,,3742531de8124074ad47f1367cafb933,"1_x=-0.5000,y=-1",0gnd39tczbn8ubas,1,100.100.137.165,2067,0.790569,10.0488,10.0488,10.0488,1696529582,0,,1,c5fac_00001,0.00358033
training_function_c5fac_00002,2023-10-05_18-13-02,True,,71f51b170ac54fd49b74043bdfc7fb59,"2_x=0,y=-1",0gnd39tczbn8ubas,1,100.100.137.165,2069,0.707107,10.0463,10.0463,10.0463,1696529582,0,,1,c5fac_00002,0.00319099
training_function_c5fac_00003,2023-10-05_18-13-02,True,,f8dbecb27abd47e2b8c2ae320811765e,"3_x=0.5000,y=-1",0gnd39tczbn8ubas,1,100.100.137.165,2071,0.790569,10.0502,10.0502,10.0502,1696529582,0,,1,c5fac_00003,0.00374627
training_function_c5fac_00004,2023-10-05_18-13-02,True,,2c6acc6efe1741cfbb320dc02ebcb19b,"4_x=1,y=-1",0gnd39tczbn8ubas,1,100.100.137.165,2073,1.0,10.0496,10.0496,10.0496,1696529582,0,,1,c5fac_00004,0.00364256
training_function_c5fac_00005,2023-10-05_18-13-02,True,,f00225a91c6c437590f005bebf803203,"5_x=-1,y=-0.5000",0gnd39tczbn8ubas,1,100.100.137.165,2090,0.790569,10.0495,10.0495,10.0495,1696529582,0,,1,c5fac_00005,0.00295949
training_function_c5fac_00006,2023-10-05_18-13-02,True,,b05a2c4bc0d34012a02e84885fc51dd6,"6_x=-0.5000,y=-0.5000",0gnd39tczbn8ubas,1,100.100.137.165,2123,0.5,10.0466,10.0466,10.0466,1696529582,0,,1,c5fac_00006,0.00326896
training_function_c5fac_00007,2023-10-05_18-13-02,True,,ae77028135544ca9ab1475127df5b195,"7_x=0,y=-0.5000",0gnd39tczbn8ubas,1,100.100.137.165,2124,0.353553,10.0475,10.0475,10.0475,1696529582,0,,1,c5fac_00007,0.00376749
training_function_c5fac_00008,2023-10-05_18-13-02,True,,2c2d94c158f84a63b12b6f0ae01c28df,"8_x=0.5000,y=-0.5000",0gnd39tczbn8ubas,1,100.100.137.165,2126,0.5,10.0483,10.0483,10.0483,1696529582,0,,1,c5fac_00008,0.00299907
training_function_c5fac_00009,2023-10-05_18-13-02,True,,9d2b90767c77477ca2651e164139846c,"9_x=1,y=-0.5000",0gnd39tczbn8ubas,1,100.100.137.165,2143,0.790569,10.047,10.047,10.047,1696529582,0,,1,c5fac_00009,0.0036757


2023-10-05 18:13:13,386	INFO tune.py:762 -- Total run time: 32.17 seconds (31.55 seconds for the tuning loop).


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x7fb444af1910>


In [20]:
print(result.get_best_config(metric="score", mode="min"))

{'x': 0, 'y': 0}


In [21]:
from ray import serve
from transformers import pipeline 
import requests

2023-10-05 18:13:23.848512: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-05 18:13:23.848554: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-05 18:13:23.848588: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-05 18:13:23.938488: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from tensorflow.tsl.python.lib.core impo

In [22]:
import os
os.environ["CDSW_IP_ADDRESS"]

'100.100.137.165'

In [23]:
os.environ["CDSW_READONLY_PORT"]

'8100'

In [24]:
serve.start(http_options={'port':os.environ["CDSW_READONLY_PORT"], 'location':'EveryNode'})

[2m[36m(ServeController pid=3481)[0m INFO 2023-10-05 18:15:06,528 controller 3481 http_state.py:129 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:pyVaeW:SERVE_PROXY_ACTOR-64d8e25a09abd344b5eee6d73d8b53036cac5592163a0cef5f2c1398' on node '64d8e25a09abd344b5eee6d73d8b53036cac5592163a0cef5f2c1398' listening on '127.0.0.1:8100'


<ray.serve._private.client.ServeControllerClient at 0x7fb3e34a0940>

[2m[36m(HTTPProxyActor pid=3542)[0m INFO:     Started server process [3542]


In [25]:
@serve.deployment
def model(request):
    language_model = pipeline("text-generation", model="gpt2")
    query = request.query_params["query"]
    return language_model(quert, max_length=100)

In [26]:
model.deploy()

[2m[36m(ServeController pid=3481)[0m INFO 2023-10-05 18:15:18,042 controller 3481 deployment_state.py:1310 - Adding 1 replica to deployment 'model'.
[2m[36m(ServeReplica:model pid=3612)[0m 2023-10-05 18:15:20.625668: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[2m[36m(ServeReplica:model pid=3612)[0m 2023-10-05 18:15:20.625701: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[2m[36m(ServeReplica:model pid=3612)[0m 2023-10-05 18:15:20.625731: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[2m[36m(ServeReplica:model pid=3612)[0m 2023-10-05 18:15:20.634025: I tensorfl

In [27]:
query = "what is the meaning of life?"
response = requests.get(f"http://localhost:8100/model?query={query}")
print(response.text)

[2m[36m(ServeReplica:model pid=3612)[0m All PyTorch model weights were used when initializing TFGPT2LMHeadModel.
[2m[36m(ServeReplica:model pid=3612)[0m 
[2m[36m(ServeReplica:model pid=3612)[0m All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
[2m[36m(ServeReplica:model pid=3612)[0m If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


Task Error. Traceback: [36mray::ServeReplica:model()[39m (pid=3612, ip=100.100.137.165)
  File "/home/cdsw/.local/lib/python3.9/site-packages/ray/serve/_private/utils.py", line 238, in wrap_to_ray_error
    raise exception
  File "/home/cdsw/.local/lib/python3.9/site-packages/ray/serve/_private/replica.py", line 443, in invoke_single
    result = await method_to_call(*args, **kwargs)
  File "/tmp/ipykernel_239/3971627431.py", line 5, in model
NameError: name 'quert' is not defined.


[2m[36m(HTTPProxyActor pid=3542)[0m INFO 2023-10-05 18:15:39,176 http_proxy 100.100.137.165 http_proxy.py:361 - GET /model 500 6337.3ms
[2m[36m(HTTPProxyActor pid=3542)[0m Task exception was never retrieved
[2m[36m(HTTPProxyActor pid=3542)[0m future: <Task finished name='Task-35' coro=<_wrap_awaitable() done, defined at /usr/local/lib/python3.9/asyncio/tasks.py:681> exception=RayTaskError(NameError)(NameError("name 'quert' is not defined"))>
[2m[36m(HTTPProxyActor pid=3542)[0m Traceback (most recent call last):
[2m[36m(HTTPProxyActor pid=3542)[0m   File "/usr/local/lib/python3.9/asyncio/tasks.py", line 688, in _wrap_awaitable
[2m[36m(HTTPProxyActor pid=3542)[0m     return (yield from awaitable.__await__())
[2m[36m(HTTPProxyActor pid=3542)[0m ray.exceptions.RayTaskError(NameError): [36mray::ServeReplica:model()[39m (pid=3612, ip=100.100.137.165)
[2m[36m(HTTPProxyActor pid=3542)[0m   File "/home/cdsw/.local/lib/python3.9/site-packages/ray/serve/_private/utils.py

In [31]:
ray.shutdown()