In [1]:
#!pip3 install ipywidgets

In [1]:
import ray
import os
import time
import numpy as np
import socket

In [2]:
ray.init(address="ray://172.31.83.227:10001")

0,1
Python version:,3.10.12
Ray version:,2.9.3
Dashboard:,http://127.0.0.1:8265


# Ray tasks

In [3]:
database = ["learning", "Ray", "for", "ditributed", "data ", "processing"]

In [4]:
def retrieve(idx):
    time.sleep(idx / 10.)
    return idx, database[idx]

In [5]:
t0 = time.time()
results = [retrieve(idx) for idx in range(6)]
t1 = time.time()
print(results)
print(t1-t0, "seconds")

[(0, 'learning'), (1, 'Ray'), (2, 'for'), (3, 'ditributed'), (4, 'data '), (5, 'processing')]
1.5050773620605469 seconds


In [6]:
@ray.remote
def retrieve_task(idx):
    return retrieve(idx)

In [7]:
t0 = time.time()
res_refs = [retrieve_task.remote(idx) for idx in range(6)]
results = ray.get(res_refs) # serve as a barrier
t1 = time.time()

print(results)
print(t1-t0, "seconds")

[(0, 'learning'), (1, 'Ray'), (2, 'for'), (3, 'ditributed'), (4, 'data '), (5, 'processing')]
2.595423698425293 seconds


In [8]:
db_ref = ray.put(database)

In [9]:
db_ref

ClientObjectRef(00ffffffffffffffffffffffffffffffffffffff0100000002e1f505)

In [10]:
@ray.remote
def retrieve_task_by_ref(idx, db_ref):
    time.sleep(idx / 10.)
    return idx, db_ref[idx], socket.gethostbyname(socket.gethostname())

In [11]:
t0 = time.time()
res_refs = [retrieve_task_by_ref.remote(idx, db_ref) for idx in range(6)]
results = ray.get(res_refs) # serve as a barrier
t1 = time.time()

print(results)
print(t1-t0, "seconds")

[(0, 'learning', '172.31.83.227'), (1, 'Ray', '172.31.83.227'), (2, 'for', '172.31.86.252'), (3, 'ditributed', '172.31.86.252'), (4, 'data ', '172.31.86.157'), (5, 'processing', '172.31.86.157')]
0.7968604564666748 seconds


In [12]:
data = np.random.randint(10, size=[10_000])

In [13]:
num_partitions = 5
partition_sz = len(data) // num_partitions
input_buckets = [data[i * partition_sz : (i+1) * partition_sz] for i in range(num_partitions)]

In [14]:
@ray.remote
def upstream_task(input):
    return input, socket.gethostbyname(socket.gethostname())

@ray.remote
def downstream_task(input):
    intermediate_res, hostname = input
    return np.sum(intermediate_res), hostname

In [15]:
obj_refs = [upstream_task.remote(input) for input in input_buckets]
final_refs = [downstream_task.remote(obj_ref) for obj_ref in obj_refs]

print(ray.get(final_refs))

[(9055, '172.31.83.227'), (8983, '172.31.86.252'), (8931, '172.31.86.157'), (9152, '172.31.83.227'), (9042, '172.31.86.252')]


# Ray actors

In [16]:
@ray.remote # indicates this is Ray actor class
class Actor:
    def __init__(self):
        self.counts = 0

    def increment(self):
        self.counts += 1

    def counts(self):
        return self.counts

In [17]:
@ray.remote
def downstream_task_actor(input, actor):
    intermediate_result, hostname = input
    actor.increment.remote()
    return np.sum(intermediate_result), hostname 

In [18]:
actor = Actor.remote() # initialize an actor

In [19]:
%%timeit

t0 = time.time()
upstream_task_result_refs = [upstream_task.remote(input) for input in input_buckets]
downstream_task_result_refs = [
        downstream_task_actor.remote(upstream_task_result_ref, actor) for upstream_task_result_ref in upstream_task_result_refs
]

final_results = ray.get(downstream_task_result_refs)
t1 = time.time()

t2 = time.time()
print(ray.get(actor.counts.remote()))
t3 = time.time()

print(upstream_task_result_refs)
print(downstream_task_result_refs)
print(final_results)
print(t1-t0, "seconds")
print(t3-t2, "seconds")

5
[ClientObjectRef(465c0fb8d6cb3cdcffffffffffffffffffffffff0100000001000000), ClientObjectRef(3d3e27c54ed1f5cfffffffffffffffffffffffff0100000001000000), ClientObjectRef(cae5e964086715a4ffffffffffffffffffffffff0100000001000000), ClientObjectRef(bcb4fef46b376cafffffffffffffffffffffffff0100000001000000), ClientObjectRef(88543757a8df6d2fffffffffffffffffffffffff0100000001000000)]
[ClientObjectRef(347cc60e0bb3da74ffffffffffffffffffffffff0100000001000000), ClientObjectRef(a02c24b8b7fc0a31ffffffffffffffffffffffff0100000001000000), ClientObjectRef(a631fe8d231813bfffffffffffffffffffffffff0100000001000000), ClientObjectRef(79cc316456d39201ffffffffffffffffffffffff0100000001000000), ClientObjectRef(c1464dc5b2308f10ffffffffffffffffffffffff0100000001000000)]
[(9055, '172.31.83.227'), (8983, '172.31.86.252'), (8931, '172.31.83.227'), (9152, '172.31.86.157'), (9042, '172.31.86.252')]
0.13525032997131348 seconds
0.3364560604095459 seconds
10
[ClientObjectRef(91581beb08e6c9deffffffffffffffffffffffff01000