# Evaluate the box below to initialize the web UI.

In [None]:
import binascii
import os
import pandas as pd
import qgrid 
import ray
import redis
import sys

In [None]:
redis_address = os.environ["REDIS_ADDRESS"]
ray.init(redis_address=redis_address)

In [None]:
addr, port = ray.worker.global_worker.redis_address.split(":")
rc = redis.StrictRedis(host=addr, port=port, decode_responses=True, encoding='latin-1', encoding_errors='replace')

# Task and Actor Information 

### Remote Function Information 


In [None]:
fn_table = ray.global_state.function_table()
fn_list = []
for fn_id in fn_table:
    val = fn_table[fn_id]
    val["function_id"] = fn_id
    fn_list.append(val)
qgrid.nbinstall(overwrite = True)
frame = pd.DataFrame(fn_list) 
frame.columns = ["DriverID", "Module", "Function", "FunctionID"]
qgrid.show_grid(frame)

### Task Information 

In [None]:
from pandas.io.json import json_normalize

tt = ray.global_state.task_table()
tt_list = list(tt.values())
for d in tt_list:
    d['TaskSpec']['ReturnObjectIDs'] = [oid.hex() for oid in d['TaskSpec']['ReturnObjectIDs']]
    d['TaskSpec']['Args'] = [arg.hex() if isinstance(arg, ray.local_scheduler.ObjectID) else arg for arg in d['TaskSpec']['Args']]

task_df = json_normalize(tt_list)
task_df.columns = ["Local Scheduler ID", "State", "Actor Counter", "ActorID", "Arguments", "DriverID", "FunctionID", 
                  "Parent Counter", "Parent Task ID", "Required CPUs", "Required GPUs", "Return Object IDs", "TaskID" ]
qgrid.show_grid(task_df)

### Actor Information 


In [None]:
def hex_identifier(identifier):
    return binascii.hexlify(identifier).decode()

actor_info = dict()
actors = rc.keys("Actor*") 
for actor in actors:
    actor_key_str = actor[len('Actor:'):]
    actor_key_bytes = actor_key_str.encode('latin-1')
    actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))] = rc.hgetall(actor)
    x = actor_info['Actor:{}'.format(hex_identifier(actor_key_bytes))]
    if 'class_id' in x: 
        class_key_bytes = x['class_id'].encode('latin-1')
        x['class_id'] = format(hex_identifier(class_key_bytes))
    if 'driver_id' in x: 
        driver_bytes = x['driver_id'].encode('latin-1')
        x['driver_id'] = format(hex_identifier(driver_bytes))

actor_df = pd.DataFrame.from_dict(actor_info)
df = actor_df.T
df.index.name = "ActorID"
df.columns = [ "Method", "Class", "Class ID", "Class Name", "DriverID", "Module", "Num GPUs"] 
qgrid.show_grid(df)

# System State Information 


### Node Information

In [None]:
ctable = ray.global_state.client_table()

client_list = []
for node_ip in ctable:
    for client in ctable[node_ip]:
        client["node_ip_address"] = node_ip
        client_list.append(client)

client_df = pd.DataFrame(client_list)
client_df.columns = ["Aux Address", "Client Type", "DB Client ID", "Deleted", "Local Scheduler Socket", "Num CPUs", "NumGPUs", "Node IP Address"]
qgrid.show_grid(client_df)

### Worker Information 

In [None]:
workers = rc.keys("Worker*") 
worker_info = dict()
for worker in workers:
    worker_key_str = worker[len('Workers:'):]
    worker_key_bytes = worker_key_str.encode('latin-1')
    worker_info['Workers:{}'.format(hex_identifier(worker_key_bytes))] = rc.hgetall(worker)
table = pd.DataFrame.from_dict(worker_info)
table_t = table.T
table_t.index.name = "WorkerID"
table_t.columns = ["Local Scheduler Socket", "Node IP Address", "Plasma Manager Socket", "Plasma Store Socket", "Stderr File", "Stdout File"]
qgrid.show_grid(table_t)

### Object Store Information


In [None]:
object_dict = {oid.hex(): v for oid, v in ray.global_state.object_table().items()}
object_df = pd.DataFrame(object_dict).transpose()
object_df.index.name = "ObjectID"
object_df.columns = ["Data Size", "Hash", "IsPut", "ManagerIDs", "TaskID"]
qgrid.show_grid(object_df)

# Error Information


In [None]:
event_names = rc.keys("event_log*")
error_profiles = dict()
for i in range(len(event_names)):
    event_list = rc.lrange(event_names[i], 0, -1)
    for event in event_list:
        event_dict = json.loads(event)
        task_id = ""
        traceback = ""
        worker_id = ""
        start_time = -1
    for element in event_dict:
        if element[1] == "ray:task:execute" and element[2] == 1:
            start_time = element[0]
        if "task_id" in element[3] and "worker_id" in element[3]:
            task_id = element[3]["task_id"]
            worker_id = element[3]["worker_id"]
        if "traceback" in element[3]:
            traceback = element[3]["traceback"]
        if task_id != "" and worker_id != "" and traceback != "":
            if start_time != -1:
                error_profiles[task_id] = dict()
                error_profiles[task_id]["worker_id"] = worker_id
                error_profiles[task_id]["traceback"] = traceback
                error_profiles[task_id]["start_time"] = start_time
table = pd.DataFrame.from_dict(error_profiles) 
qgrid.show_grid(table.T)