# Profiling ray.put()

In [9]:
import ray
import cProfile
import pstats
import StringIO
import time
import sys
import string
import random
import numpy as np

In [2]:
ray.init()

Waiting for redis server at 127.0.0.1:43702 to respond...
Waiting for redis server at 127.0.0.1:15344 to respond...
Starting local scheduler with the following resources: {'GPU': 0, 'CPU': 4}.

View the web UI at http://localhost:8906/notebooks/ray_ui20850.ipynb?token=e4326d43fd9942fe5b4677420fe4849a17ec2c8c1eed9596



{'local_scheduler_socket_names': ['/tmp/scheduler1391573'],
 'node_ip_address': '127.0.0.1',
 'object_store_addresses': [ObjectStoreAddress(name='/tmp/plasma_store96370547', manager_name='/tmp/plasma_manager23532085', manager_port=29197)],
 'redis_address': '127.0.0.1:43702',
 'webui_url': 'http://localhost:8906/notebooks/ray_ui20850.ipynb?token=e4326d43fd9942fe5b4677420fe4849a17ec2c8c1eed9596'}

## Functions

In [3]:
def get_profile_data(arr):
    pr = cProfile.Profile()
    pr.enable()
    ray.put(arr)
    pr.disable()
    return pr

In [41]:
def stats_printer(profile_object):
    s = StringIO.StringIO()
    sortby = 'module'
    ps = pstats.Stats(profile_object, stream=s).sort_stats(sortby)
    ps.strip_dirs()
    ps.print_stats('put|plasma|store_and_register')
    print s.getvalue()

In [51]:
def print_profile_data(arr):
    pr = get_profile_data(arr)
    stats_printer(pr)
    s = StringIO.StringIO()
    ps = pstats.Stats(pr, stream=s)
    ps.strip_dirs()
    ps.print_stats('plasma')
    print s.getvalue()

## Test Inputs

In [62]:
NUM_STRINGS = 2*10**3
STRING_LENGTH = 20

short_numpy_array = np.array([1,2,3,4,5])
large_numpy_array = np.array([i for i in range(NUM_STRINGS)])
very_large_numpy_array = np.array([i for i in range(NUM_STRINGS * 10)])
large_string_array = [''.join(random.choice(string.ascii_letters) for _ in range(STRING_LENGTH)) for _ in range(NUM_STRINGS)]
array_of_large_integers = [np.random.randint(0, sys.maxsize) for _ in range(NUM_STRINGS * 10)]
large_string_array_asints = np.array([[ord(c) for c in s] for s in large_string_array], dtype=np.uint8)

In [63]:
print("Size of short numpy array: " + str(sys.getsizeof(short_numpy_array)/1024/1024) + " MB")
print("Size of large numpy array: " + str(sys.getsizeof(large_numpy_array)/1024/1024) + " MB")
print("Size of very large numpy array: " + str(sys.getsizeof(very_large_numpy_array)/1024/1024) + " MB")
print("Size of large string string: " + str(sys.getsizeof(large_string_array)/1024/1024) + " MB")
print("Size of large integer array: " + str(sys.getsizeof(array_of_large_integers)/1024/1024) + " MB")
print("Size of string array as ints: " + str(sys.getsizeof(large_string_array_asints)/1024/1024) + " MB")

Size of short numpy array: 0 MB
Size of large numpy array: 0 MB
Size of very large numpy array: 0 MB
Size of large string string: 0 MB
Size of large integer array: 0 MB
Size of string array as ints: 0 MB


## Profiling Results

In [67]:
print_profile_data(short_numpy_array)

         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.001    0.001 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.001    0.001 worker.py:2253(put)
        1    0.000    0.000    0.001    0.001 worker.py:322(put_object)



         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





In [68]:
print_profile_data(large_numpy_array)

         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.001    0.001 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.001    0.001 worker.py:2253(put)
        1    0.000    0.000    0.001    0.001 worker.py:322(put_object)



         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





In [69]:
print_profile_data(very_large_numpy_array)

         27 function calls in 0.011 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.011    0.011 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.011    0.011    0.011    0.011 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.011    0.011 worker.py:2253(put)
        1    0.000    0.000    0.011    0.011 worker.py:322(put_object)



         27 function calls in 0.011 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.011    0.011    0.011    0.011 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





In [70]:
print_profile_data(large_string_array)

         27 function calls in 0.004 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.004    0.004 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.004    0.004    0.004    0.004 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.004    0.004 worker.py:2253(put)
        1    0.000    0.000    0.004    0.004 worker.py:322(put_object)



         27 function calls in 0.004 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.004    0.004    0.004    0.004 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





In [71]:
print_profile_data(array_of_large_integers)

         27 function calls in 0.008 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.008    0.008 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.008    0.008    0.008    0.008 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.008    0.008 worker.py:2253(put)
        1    0.000    0.000    0.008    0.008 worker.py:322(put_object)



         27 function calls in 0.008 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.008    0.008    0.008    0.008 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





In [72]:
print_profile_data(large_string_array_asints)

         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 5 due to restriction <'put|plasma|store_and_register'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.001    0.001 worker.py:257(store_and_register)
        1    0.000    0.000    0.000    0.000 {method 'compute_put_id' of 'local_scheduler.LocalSchedulerClient' objects}
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}
        1    0.000    0.000    0.001    0.001 worker.py:2253(put)
        1    0.000    0.000    0.001    0.001 worker.py:322(put_object)



         27 function calls in 0.001 seconds

   Random listing order was used
   List reduced from 21 to 1 due to restriction <'plasma'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.001    0.001    0.001    0.001 {method 'put' of 'pyarrow.plasma.PlasmaClient' objects}





## Plots

In [61]:
pr = cProfile.Profile()
pr.enable()
ray.put(array_of_large_integers)
pr.disable()
s = StringIO.StringIO()
sortby = 'module'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.strip_dirs()
ps.print_stats('plasma')
ps
#print s.getvalue()

AttributeError: Stats instance has no attribute 'get'