# 1. Initialization

### Setup

In [2]:
import ray
import pandas as pd
import time 
import bokeh 
import numpy as np
import binascii
import redis
import pprint
import json
import qgrid
import matplotlib.pyplot as plt
pp = pprint.PrettyPrinter() 

### Function definitions

In [3]:
@ray.remote
def example(x):
    return "ok" 

@ray.remote
def example2(x): 
    return "hi"

@ray.remote
class TestCls():
    def __init__(self):
        self.g = 1
        
    def to_go(self, x):
        return x

    
@ray.remote
class Outer():
    def __init__(self):
        self.f = 1
        self.test = TestCls.remote()
    
    def to_go2(self, x):
        return x * 2
    
    def error(self):
        return 1/0

### Generate data in Redis

In [4]:
# Here, we generate data in redis for remote tasks
print("here")
results = ray.get([example.remote(x) for x in range(4000)])
print("here2")
results2 = ray.get([example2.remote(x) for x in range(2000)])
print("here3")
# Generating data for Actor tasks
actor = TestCls.remote()
actor_results = ray.get([actor.to_go.remote(1)])

err_actor = Outer.remote()
err_actor.error.remote()
print("done")

here


RayConnectionError: This command cannot be called before Ray has been started. You can start Ray with 'ray.init()'.

### Connect to Redis

In [5]:
addr, port = ray.worker.global_worker.redis_address.split(":")
rc = redis.StrictRedis(host=addr, port=port, decode_responses=True, encoding='latin-1', encoding_errors='replace')

# 2. Jobs Data


### Remote Functions Information

In [6]:
fn_table = ray.global_state.function_table()
fn_list = []
for fn_id in fn_table:
    val = fn_table[fn_id]
    val["function_id"] = fn_id
    fn_list.append(val)
qgrid.nbinstall(overwrite = True)
qgrid.show_grid(pd.DataFrame(fn_list))

### Task Information

In [7]:
from pandas.io.json import json_normalize

tt = ray.global_state.task_table()
tt_list = list(tt.values())
tt_list

for d in tt_list:
    d['TaskSpec']['ReturnObjectIDs'] = [oid.hex() for oid in d['TaskSpec']['ReturnObjectIDs']]

task_df = json_normalize(tt_list)
qgrid.show_grid(task_df)

### Actor Information

In [None]:
actor_info = dict()
actors = rc.keys("Actor*") 
for actor in actors:
    actor_key_str = actor[len('Actor:'):]
    actor_key_bytes = actor_key_str.encode('latin-1')
    actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))] = rc.hgetall(actor)
    x = actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))]
    if 'class_id' in x: 
        class_key_bytes = x['class_id'].encode('latin-1')
        x['class_id'] = format(hex_identifier(class_key_bytes))
    if 'driver_id' in x: 
        driver_bytes = x['driver_id'].encode('latin-1')
        x['driver_id'] = format(hex_identifier(driver_bytes))

actor_df = pd.DataFrame.from_dict(actor_info)
qgrid.show_grid(actor_df.T)

### Task - Worker Placement Information

In [None]:
event_names = rc.keys("event_log*")
results = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        worker_id = ""
        function_name = ""
    for element in event_dict:
        if "task_id" in element[3] and "worker_id" in element[3]:
            task_id = element[3]["task_id"]
            worker_id = element[3]["worker_id"]
            function_name = element[3]["function_name"]
        if task_id != "" and worker_id != "" and function_name != "":
            results[worker_id] = {}
            results[worker_id]["task_id"] = task_id
            results[worker_id]["function_name"] = function_name
results_table = pd.DataFrame.from_dict(results)
qgrid.show_grid(results_table.T)

### Task Profiles

In [None]:
task_profiles, events = ray.global_state.task_profiles()
profiles_dict = dict()
for task_id, profiles in task_profiles.items(): 
    for profile in profiles:
        start_exec = -1
        end_exec = -1 
        start_store = -1
        end_store = -1
        start_lock = -1
        end_lock = -1
        overall_start = profile[0][0]
        overall_end = profile[len(profile)-1][0]
        overall_dur = overall_end - overall_start
        for log in profile: 
            if log[1] == "ray:task:execute" and log[2] == 1: 
                start_exec = log[0]
            if log[1] == "ray:task:execute" and log[2] == 2: 
                end_exec = log[0]
            if log[1] == "ray:task:store_outputs" and log[2] == 1: 
                start_store = log[0]
            if log[1] == "ray:task:store_outputs" and log[2] == 2: 
                end_store = log[0]
            if log[1] == "ray:acquire_lock" and log[2] == 1: 
                start_lock = log[0]
            if log[1] == "ray:acquire_lock" and log[2] == 2: 
                end_lock = log[0]
        if start_exec != -1 and end_exec != -1 and start_store != -1 and end_store != -1 and start_lock != -1 and end_lock != -1:
            profiles_dict[task_id] = dict()
            exec_dur = end_exec - start_exec
            store_dur = end_store - start_store
            lock_dur = end_lock - start_lock
            overall_dur = overall_end - overall_start 
            profiles_dict[task_id]["execute"] = exec_dur
            profiles_dict[task_id]["store"] = store_dur
            profiles_dict[task_id]["acquire_lock"] = lock_dur
            profiles_dict[task_id]["total"] = overall_dur
            profiles_dict[task_id]["other"] = overall_dur - exec_dur - store_dur - lock_dur
results_table = pd.DataFrame.from_dict(profiles_dict)
qgrid.show_grid(results_table.T)



In [None]:
total_acq = 0
total_exec = 0
total_store = 0
total_other = 0
total = 0
for value in profiles_dict.values(): 
    total_exec += value["execute"]
    total_acq += value["acquire_lock"]
    total_store += value["store"]
    total_other += value["other"]
    total += value["total"]

labels = 'Acquire Lock', 'Execute', 'Store', 'Other'
sizes = [total_acq/total, total_exec/total, total_store/total, total_other/total]
explode = (0, 0.1, 0, 0)
plt.pie(sizes, explode=explode, labels=labels, shadow=True, startangle=140)
plt.axis('equal')
print("Overall Task Breakdowns:")
print("Acquire Lock: " + str(total_acq/total * 100) + "%")
print("Execute: " + str(total_exec/total * 100) + "%")
print("Store outputs: " + str(total_store/total * 100) + "%")
print("Other: " + str(total_other/total * 100) + "%")
plt.show()


### Event Profiles 

In [None]:
event_list = []

# Get and decode all task timing/event logs
for key in rc.keys("event_log*"):
    content = rc.lrange(key, 0, -1)
    event_list.append(json.loads(content[0])) 
    
from collections import defaultdict

# event_dict is used to store timing info
event_dict = defaultdict(lambda: np.full(len(event_list), np.nan))

# info_dict is used to store meta data - such as function names and task id
info_dict = defaultdict(lambda: [None] * len(event_list))

for i, task_event in enumerate(event_list):
    for event in (task_event):
        time, label, startstop, info = event
        event_dict[(label, startstop)][i] = time
        if info:
            for k in info:
                info_dict[k][i] = info[k]

edf = pd.DataFrame(dict(event_dict))
edf.rename(columns={1: 'start', 2:'end'}, inplace=True)
edf

### Stragglers

In [None]:
event_names = rc.keys("event_log*")
x = 10
stragglers = dict()

for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        overall_start = event_dict[0][0]
        overall_end = event_dict[len(event_dict)-1][0]
        overall_dur = overall_end - overall_start
        exec_start = -1
        exec_end = -1
        exec_dur = -1
        for element in event_dict:
            if element[1] == "ray:task:execute" and element[2] == 1:
                exec_start = element[0]
            if element[1] == "ray:task:execute" and element[2] == 2:
                exec_end = element[0]
            if "task_id" in element[3]:
                task_id = element[3]["task_id"]
        if exec_start != -1 and exec_end != -1 and task_id != "":
            exec_dur = exec_end - exec_start
            if len(stragglers.keys()) < x:
                stragglers[task_id] = exec_dur
            if len(stragglers.keys()) == x:
                shortest_time = min(stragglers.values()) 
                for tid, time in stragglers.items(): 
                    if time == shortest_time: 
                        del[tid] 
                        stragglers[task_id] = exec_dur 
                        break
results_table = pd.DataFrame(stragglers, index = [0])
qgrid.show_grid(results_table.T)


### Reconstructed Task Information

In [None]:
event_names = rc.keys("event_log*")
attempted = dict()
reconstructed = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        for element in event_dict:
            if "task_id" in element[3]:
                task_id = element[3]["task_id"]
        if task_id != "":
            if task_id in attempted:
                if task_id not in reconstructed:
                    reconstructed[task_id] = 0
                    reconstructed[task_id] += 1
                else:
                    attempted[task_id] = True
results_table = pd.DataFrame(reconstructed)
qgrid.show_grid(results_table)
# include objects

# 3. System State


### Node Information

In [None]:
# Using the global state API, we can populate a DataFrame with a list of Redis Clients currently connected
ctable = ray.global_state.client_table()

client_list = []
for node_ip in ctable:
    for client in ctable[node_ip]:
        client["node_ip_address"] = node_ip
        client_list.append(client)

client_df = pd.DataFrame(client_list)
qgrid.show_grid(client_df)

### Object Store

In [None]:
# We can populate a DataFrame with a list of objects in the object store
object_dict = {oid.hex(): v for oid, v in ray.global_state.object_table().items()}
object_df = pd.DataFrame(object_dict).transpose()
qgrid.show_grid(object_df)

### Object - Worker Placement Information 

In [None]:
# Objects associated with each worker_id 
object_table = ray.global_state.object_table()
location_to_objects = dict()

for object_id, object_descriptor in object_table.items():
    if object_descriptor["ManagerIDs"] != None: 
        for location in object_descriptor["ManagerIDs"]:
            if location not in location_to_objects:
                location_to_objects[location] = []
            object_id = str(object_id)
            obj_comp = object_id.split("(")
            obj_comps = obj_comp[1].split(")") 
            object_id = obj_comps[0]
            location_to_objects[location].append(object_id)
table = pd.DataFrame.from_dict(location_to_objects)
qgrid.show_grid(table)
# object id -> worker id 
# skew in how objects are distributed 
# physical nodes -> total amt data on node, num tasks 

### Worker Information

In [None]:
workers = rc.keys("Worker*") 
worker_info = dict()
for worker in workers:
    worker_key_str = worker[len('Workers:'):]
    worker_key_bytes = worker_key_str.encode('latin-1')
    worker_info['Workers:{}'.format(hex_identifier(worker_key_bytes))] = rc.hgetall(worker)
table = pd.DataFrame.from_dict(worker_info)
qgrid.show_grid(table.T)
# resource info for each physical node 
# double check the IP 

### Object Transfer Information 

In [None]:
log_files = ray.global_state.log_files()
transferred = dict()
for addr, inner_dict in log_files.items(): 
    for filename, contents in inner_dict.items(): 
        if "plasma_manager" in filename and ".out" in filename:
            cont = str(contents).split("ObjectID: ") 
            cont2 = cont[1].split("\\n")
            if cont2[0] not in transferred:
                transferred[cont2[0]] = 0 
            transferred[cont2[0]] += 1 
table = pd.DataFrame(transferred, index = [0]) 
qgrid.show_grid(table.T)


# 3. Error Information

### Error Profiles


In [None]:
event_names = rc.keys("event_log*")
error_profiles = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        traceback = ""
        worker_id = ""
        start_time = -1
    for element in event_dict:
        if element[1] == "ray:task:execute" and element[2] == 1:
            start_time = element[0]
        if "task_id" in element[3] and "worker_id" in element[3]:
            task_id = element[3]["task_id"]
            worker_id = element[3]["worker_id"]
        if "traceback" in element[3]:
            traceback = element[3]["traceback"]
        if task_id != "" and worker_id != "" and traceback != "":
            if start_time != -1:
                error_profiles[task_id] = dict()
                error_profiles[task_id]["worker_id"] = worker_id
                error_profiles[task_id]["traceback"] = traceback
                error_profiles[task_id]["start_time"] = start_time
table = pd.DataFrame.from_dict(error_profiles) 
qgrid.show_grid(table.T)

### Parallelization Score

In [21]:
event_names = rc.keys("event_log*")
total_exec = 0
earliest_start = float("inf")
latest_end = -1
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        start_point = 00125
        
        end_point = 0
        for element in event_dict:
            if element[1] == "ray:task:execute" and element[2] == 1:
                start_point = element[0]
            if start_point < earliest_start:
                earliest_start = start_point
            if element[1] == "ray:task:execute" and element[2] == 2:
                end_point = element[0]
            if end_point > latest_end:
                latest_end = end_point
        total_exec += (end_point - start_point)
job_dur = latest_end - earliest_start
table = ray.global_state.client_table()
total_cpus = 0
for key, value in table.items():
    for element in range(len(value)):
        if "NumCPUs" in value[element]:
            total_cpus += table[key][element]["NumCPUs"]
if total_exec != None and job_dur != None: 
    print("Parallelization Score: ")
    print ((total_exec) / (total_cpus * job_dur))

Parallelization Score: 
1.2988097921150623e-12


# Task Interactive Queries 

In [15]:
import pandas.io.sql as psql
from bokeh.plotting import figure
from bokeh.layouts import layout, widgetbox
from bokeh.models import ColumnDataSource, HoverTool, Div
from bokeh.models.widgets import Slider, Select, TextInput
from bokeh.io import curdoc

prof1 = dict() 
tid = 2
prof1[tid] = dict()
prof1[tid]["worker_id"] = 1
prof1[tid]["execute"] = 10
prof1[tid]["store"] = 5 
prof1[tid]["errored"] = False 

prof2 = dict() 
tid = 1
prof1[tid] = dict()
prof1[tid]["worker_id"] = 3
prof1[tid]["execute"] = 5
prof1[tid]["store"] = 15 
prof1[tid]["errored"] = False

profiles = [prof1, prof2]
axis_map = {
    "Time": "Time",
    "Workers": "Worker",
    "Get Task": "Reviews",
    "Box Office (dollars)": "BoxOffice",
    "Length (minutes)": "Runtime",
    "Year": "Year",
}

hover = HoverTool(tooltips=[
    ("Title", "@title"),
    ("Year", "@year"),
    ("$", "@revenue")
])
exec_time = Slider(title="Time to execute:", value=50, start=0, end=1000000, step=10)
store_time = Slider(title="Time to store outputs:", value=50, start=0, end=1000000, step=10)
x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="Time")
y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="Workers")

source = ColumnDataSource(data=dict(x=[], y=[], color=[], title=[], year=[], revenue=[], alpha=[]))
p = figure(plot_height=600, plot_width=700, title="", toolbar_location=None, tools=[hover])
p.circle(x="x", y="y", source=source, size=7, color="color", line_color=None, fill_alpha="alpha")

def select():
    return profiles

sizing_mode = 'fixed'  # 'scale_width' also looks nice with this example
controls = [x_axis, y_axis]
inputs = widgetbox(*controls, sizing_mode=sizing_mode)
l = layout([
    [inputs, p],
], sizing_mode=sizing_mode)
curdoc().add_root(l)
curdoc().title = "Tasks"


def update():
    df = select()
    x_name = axis_map[x_axis.value]
    y_name = axis_map[y_axis.value]

    p.xaxis.axis_label = x_axis.value
    p.yaxis.axis_label = y_axis.value
    #p.title.text = "%d movies selected" % len(df)
    source.data = dict(
        x=df[x_name],
        y=df[y_name],
        color=df["color"],
        title=df["Title"],
        year=df["Year"],
        revenue=df["revenue"],
        alpha=df["alpha"],
    )

update()

# movies["color"] = np.where(movies["Oscars"] > 0, "orange", "grey")
# movies["alpha"] = np.where(movies["Oscars"] > 0, 0.9, 0.25)
# movies.fillna(0, inplace=True)  # just replace missing values with zero
# movies["revenue"] = movies.BoxOffice.apply(lambda x: '{:,d}'.format(int(x)))

# with open(join(dirname(__file__), "razzies-clean.csv")) as f:
#     razzies = f.read().splitlines()
# movies.loc[movies.imdbID.isin(razzies), "color"] = "purple"
# movies.loc[movies.imdbID.isin(razzies), "alpha"] = 0.9

# axis_map = {
#     "Time": "Time",
#     "Workers": "Worker",
#     "Get Task": "Reviews",
#     "Box Office (dollars)": "BoxOffice",
#     "Length (minutes)": "Runtime",
#     "Year": "Year",
# }

# desc = Div(text=open(join(dirname(__file__), "description.html")).read(), width=800)

# # Create Input controls
# reviews = Slider(title="Minimum number of reviews", value=80, start=10, end=300, step=10)
# min_year = Slider(title="Year released", start=1940, end=2014, value=1970, step=1)
# max_year = Slider(title="End Year released", start=1940, end=2014, value=2014, step=1)
# oscars = Slider(title="Minimum number of Oscar wins", start=0, end=4, value=0, step=1)
# boxoffice = Slider(title="Dollars at Box Office (millions)", start=0, end=800, value=0, step=1)
# genre = Select(title="Genre", value="All",
#                options=open(join(dirname(__file__), 'genres.txt')).read().split())
# director = TextInput(title="Director name contains")
# cast = TextInput(title="Cast names contains")
# x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="Time")
# y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="Workers")

# # Create Column Data Source that will be used by the plot
# source = ColumnDataSource(data=dict(x=[], y=[], color=[], title=[], year=[], revenue=[], alpha=[]))

# hover = HoverTool(tooltips=[
#     ("Title", "@title"),
#     ("Year", "@year"),
#     ("$", "@revenue")
# ])

# p = figure(plot_height=600, plot_width=700, title="", toolbar_location=None, tools=[hover])
# p.circle(x="x", y="y", source=source, size=7, color="color", line_color=None, fill_alpha="alpha")


# def select_movies():
#     genre_val = genre.value
#     director_val = director.value.strip()
#     cast_val = cast.value.strip()
#     selected = movies[
#         (movies.Reviews >= reviews.value) &
#         (movies.BoxOffice >= (boxoffice.value * 1e6)) &
#         (movies.Year >= min_year.value) &
#         (movies.Year <= max_year.value) &
#         (movies.Oscars >= oscars.value)
#     ]
#     if (genre_val != "All"):
#         selected = selected[selected.Genre.str.contains(genre_val)==True]
#     if (director_val != ""):
#         selected = selected[selected.Director.str.contains(director_val)==True]
#     if (cast_val != ""):
#         selected = selected[selected.Cast.str.contains(cast_val)==True]
#     return selected


# def update():
#     df = select_movies()
#     x_name = axis_map[x_axis.value]
#     y_name = axis_map[y_axis.value]

#     p.xaxis.axis_label = x_axis.value
#     p.yaxis.axis_label = y_axis.value
#     p.title.text = "%d movies selected" % len(df)
#     source.data = dict(
#         x=df[x_name],
#         y=df[y_name],
#         color=df["color"],
#         title=df["Title"],
#         year=df["Year"],
#         revenue=df["revenue"],
#         alpha=df["alpha"],
#     )

# controls = [reviews, boxoffice, genre, min_year, max_year, oscars, director, cast, x_axis, y_axis]
# for control in controls:
#     control.on_change('value', lambda attr, old, new: update())

# sizing_mode = 'fixed'  # 'scale_width' also looks nice with this example

# inputs = widgetbox(*controls, sizing_mode=sizing_mode)
# l = layout([
#     [desc],
#     [inputs, p],
# ], sizing_mode=sizing_mode)

# update()  # initial load of the data

# curdoc().add_root(l)
# curdoc().title = "Tasks"

TypeError: list indices must be integers or slices, not str