# Getting Started with Ray Core

For a book on distributed Python, it's not without a certain irony that Python on its own is
largely ineffective for distributed computing.
Its interpreter is effectively single threaded which makes it difficult to e.g. leverage multiple CPUs on
the same machine, let alone a whole cluster of machines, using plain Python.
That means you need extra tooling, and luckily the Python ecosystem has some options for you.
For instance, libraries like `multiprocessing` can help you distribute work on a single machine, but not beyond.

In [1]:
import ray
ray.init(address='auto')


RayContext(dashboard_url='127.0.0.1:8265', python_version='3.8.10', ray_version='1.12.0', ray_commit='f18fc31c7562990955556899090f8e8656b48d2d', address_info={'node_ip_address': '192.168.2.133', 'raylet_ip_address': '192.168.2.133', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2022-04-30_11-37-33_867373_167976/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2022-04-30_11-37-33_867373_167976/sockets/raylet', 'webui_url': '127.0.0.1:8265', 'session_dir': '/tmp/ray/session_2022-04-30_11-37-33_867373_167976', 'metrics_export_port': 53876, 'gcs_address': '192.168.2.133:6379', 'address': '192.168.2.133:6379', 'node_id': 'e53ae7700806b00ed47124527a9c0ab13885234bca9c736059196e66'})

In [2]:
# tag::retrieve[]
import time

database = [  # <1>
    "Learning", "Ray",
    "Flexible", "Distributed", "Python", "for", "Data", "Science"
]


def retrieve(item):
    time.sleep(item / 10.)  # <2>
    return item, database[item]
# end::retrieve[]

In [3]:
# tag::duration[]
def print_runtime(input_data, start_time, decimals=1):
    print(f'Runtime: {time.time() - start_time:.{decimals}f} seconds, data:')
    print(*input_data, sep="\n")


start = time.time()
data = [retrieve(item) for item in range(8)]  # <1>
print_runtime(data, start)  # <2>
# end::duration[]

Runtime: 2.8 seconds, data:
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Data')
(7, 'Science')


In [4]:
# tag::remote[]
@ray.remote  # <1>
def retrieve_task(item):
    return retrieve(item)  # <2>
# end::remote[]

In [5]:
# tag::duration_remote[]
start = time.time()
data_references = [retrieve_task.remote(item) for item in range(8)]  # <1>
data = ray.get(data_references)  # <2>
print_runtime(data, start, 2)
# end::duration_remote[]

Runtime: 4.40 seconds, data:
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Data')
(7, 'Science')


In [6]:
# tag::object_store[]
database_object_ref = ray.put(database)  # <1>


@ray.remote
def retrieve_task(item):
    obj_store_data = ray.get(database_object_ref)  # <2>
    time.sleep(item / 10.)
    return item, obj_store_data[item]
# end::object_store[]

In [7]:
# tag::duration_object_store[]
start = time.time()
data_references = [retrieve_task.remote(item) for item in range(8)]
all_data = []

while len(data_references) > 0:  # <1>
    finished, data_references = ray.wait(data_references, num_returns=2, timeout=7.0)  # <2>
    data = ray.get(finished)
    print_runtime(data, start, 3)  # <3>
    all_data.extend(data)  # <4>

# end::duration_object_store[]
print_runtime(all_data, start)

Runtime: 0.127 seconds, data:
(0, 'Learning')
(1, 'Ray')
Runtime: 0.641 seconds, data:
(2, 'Flexible')
(3, 'Distributed')
Runtime: 1.554 seconds, data:
(4, 'Python')
(5, 'for')
Runtime: 2.871 seconds, data:
(6, 'Data')
(7, 'Science')
Runtime: 2.9 seconds, data:
(0, 'Learning')
(1, 'Ray')
(2, 'Flexible')
(3, 'Distributed')
(4, 'Python')
(5, 'for')
(6, 'Data')
(7, 'Science')


In [8]:
# tag::task_dependency[]
@ray.remote
def follow_up_task(retrieve_result):  # <1>
    original_item, _ = retrieve_result
    follow_up_result = retrieve(original_item + 1)  # <2>
    return retrieve_result, follow_up_result  # <3>


retrieve_refs = [retrieve_task.remote(item) for item in [0, 2, 4, 6]]
follow_up_refs = [follow_up_task.remote(ref) for ref in retrieve_refs]  # <4>

result = [print(data) for data in ray.get(follow_up_refs)]
# end::task_dependency[]

((0, 'Learning'), (1, 'Ray'))
((2, 'Flexible'), (3, 'Distributed'))
((4, 'Python'), (5, 'for'))
((6, 'Data'), (7, 'Science'))


In [9]:
# tag::actors[]
@ray.remote  # <1>
class DataTracker:
    def __init__(self):
        self._counts = 0

    def increment(self):
        self._counts += 1

    def counts(self):
        return self._counts
# end::actors[]

In [None]:
# tag::actors_remote[]
@ray.remote
def retrieve_tracker_task(item, tracker):  # <1>
    obj_store_data = ray.get(database_object_ref)
    time.sleep(item / 10.)
    tracker.increment.remote()  # <2>
    return item, obj_store_data[item]


tracker = DataTracker.remote()  # <3>

data_references = [retrieve_tracker_task.remote(item, tracker) for item in range(8)]  # <4>
data = ray.get(data_references)
print(ray.get(tracker.counts.remote()))  # <5>
# end::actors_remote[]

In [None]:
# tag::ownership[]
@ray.remote
def task_owned():
    return


@ray.remote
def task(dependency):
    res_owned = task_owned.remote()
    return


val = ray.put("value")
res = task.remote(dependency=val)
# end::ownership[]

# ![Task dependency](https://raw.githubusercontent.com/maxpumperla/learning_ray/main/notebooks/images/chapter_02/task_dependency.png)

# ![Workder Node](https://raw.githubusercontent.com/maxpumperla/learning_ray/main/notebooks/images/chapter_02/worker_node.png)

# ![Ray architecture](https://raw.githubusercontent.com/maxpumperla/learning_ray/main/notebooks/images/chapter_02/architecture.png)