In [1]:
## Ray Shared Outlinks



In [2]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

G = nx.erdos_renyi_graph(5,0.8, seed=None, directed=True)

In [3]:
gmat = nx.to_numpy_matrix(G)
gmat

matrix([[0., 1., 0., 0., 0.],
        [1., 0., 0., 1., 1.],
        [1., 1., 0., 1., 0.],
        [1., 1., 1., 0., 1.],
        [1., 1., 1., 1., 0.]])

The original implementation on a small graph.

In [4]:
gmat = nx.to_numpy_matrix(G)

outmat = np.zeros(gmat.shape)

for i in range(gmat.shape[0]):
    for j in range(i+1,gmat.shape[1]):        
        outmat[i,j] = np.dot(np.asarray(gmat[i,:]).reshape(-1), np.asarray(gmat[j,:]).reshape(-1))
outmat

array([[0., 0., 1., 1., 1.],
       [0., 0., 2., 2., 2.],
       [0., 0., 0., 2., 3.],
       [0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0.]])

Let's do it with ray remote functions and verify the output.

In [5]:
import ray
ray.init(num_cpus=8, ignore_reinit_error=True)

2021-06-02 19:22:12,918	INFO services.py:1267 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8270[39m[22m


{'node_ip_address': '172.23.52.116',
 'raylet_ip_address': '172.23.52.116',
 'redis_address': '172.23.52.116:34583',
 'object_store_address': '/tmp/ray/session_2021-06-02_19-22-11_919854_4158/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-06-02_19-22-11_919854_4158/sockets/raylet',
 'webui_url': '127.0.0.1:8270',
 'session_dir': '/tmp/ray/session_2021-06-02_19-22-11_919854_4158',
 'metrics_export_port': 63662,
 'node_id': 'd23a56fbe14ebbe969c11cbc827d6328853759475bec9071d9e97d47'}

In [6]:
@ray.remote
def inner_loop(i):
    partial_out = np.zeros(gmat.shape[1])
    for j in range(i+1,gmat.shape[1]):    
        partial_out[j] = np.dot(np.asarray(gmat[i,:]).reshape(-1), np.asarray(gmat[j,:]).reshape(-1))    
    return partial_out

ilids = [inner_loop.remote(i) for i in range(gmat.shape[0])]
results = ray.get(ilids)
    
outmat = np.reshape(results, gmat.shape)
    
outmat

array([[0., 0., 1., 1., 1.],
       [0., 0., 2., 2., 2.],
       [0., 0., 0., 2., 3.],
       [0., 0., 0., 0., 3.],
       [0., 0., 0., 0., 0.]])

ok, let's do it on a big graph.

In [7]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

G = nx.erdos_renyi_graph(1000, 0.01, seed=None, directed=True)
gmat=nx.to_numpy_matrix(G)

In [9]:
@ray.remote
def ray_inner_loop(i):
    partial_out = np.zeros(gmat.shape[1])
    for j in range(i+1,gmat.shape[1]):    
        partial_out[j] = np.dot(np.asarray(gmat[i,:]).reshape(-1), np.asarray(gmat[j,:]).reshape(-1))    
    return partial_out

ilids = [ray_inner_loop.remote(i) for i in range(gmat.shape[0])]
results = ray.get(ilids)
outmat = np.reshape(results, gmat.shape)
np.count_nonzero(outmat)

47633

And figure out the timing.

In [10]:
%%timeit

ilids = [ray_inner_loop.remote(i) for i in range(gmat.shape[0])]
results = ray.get(ilids)

1.57 s ± 58 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


And compare to the joblib version, need to make inner_loop not a remote function.

In [11]:
%%timeit

def jl_inner_loop(i):
    partial_out = np.zeros(gmat.shape[1])
    for j in range(i+1,gmat.shape[1]):    
        partial_out[j] = np.dot(np.asarray(gmat[i,:]).reshape(-1), np.asarray(gmat[j,:]).reshape(-1))    
    return partial_out

from joblib import Parallel, delayed
partials = Parallel(n_jobs=4)(delayed(jl_inner_loop)(i) for i in range(gmat.shape[0]))

3.03 s ± 115 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


OK, ray is better, but nowhere near compiled and multicore.  The problem lies in the lack of use of multicore and shared memory in addition to the efficiency of Python code.  Both joblib and ray have to package up the array and send it to each process. Let's see about shared memory in  ray. https://docs.ray.io/en/master/memory-management.html

In [12]:
# make a version of the function that uses no global data
# the function implicitly gets the object
@ray.remote
def shm_inner_loop(i, local_gmat):
    partial_out = np.zeros(local_gmat.shape[1])
    for j in range(i+1,local_gmat.shape[1]):    
        partial_out[j] = np.dot(np.asarray(local_gmat[i,:]).reshape(-1), np.asarray(local_gmat[j,:]).reshape(-1))    
    return partial_out

shoid = ray.put(gmat)

ilids = [shm_inner_loop.remote(i, shoid) for i in range(gmat.shape[0])]
results = ray.get(ilids)
outmat = np.reshape(results, gmat.shape)
np.count_nonzero(outmat)

47633

In [13]:
%%timeit

shoid = ray.put(gmat)
ilids = [shm_inner_loop.remote(i, shoid) for i in range(gmat.shape[0])]
results = ray.get(ilids)

2.3 s ± 127 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


OK not actually faster.  Let's work on a bigger matrix, four times as big.

In [18]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

G = nx.erdos_renyi_graph(10000, 0.005, seed=None, directed=True)
gmat=nx.to_numpy_matrix(G)

In [20]:
%%timeit -n 1
ilids = [ray_inner_loop.remote(i) for i in range(gmat.shape[0])]
results = ray.get(ilids)

3.63 s ± 648 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit -n 1
shoid = ray.put(gmat)
ilids = [shm_inner_loop.remote(i, shoid) for i in range(gmat.shape[0])]
results = ray.get(ilids)