## How to Use

1. Run the job with `-L <rate>`.
2. Create `rays.csv` and `bags.csv by running the following commands in the log directory:
```
r2t2/scripts/parse_worker_info.py -i . -t RAG -o rays.csv
r2t2/scripts/parse_worker_info.py -i . -t BAG -o bags.csv
```

**Make sure** you have `numpy`, `matplotlib` and `pandas` installed.

In [34]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from IPython.display import display

plt.rcParams['figure.figsize'] = 12, 10
plt.rcParams.update({'font.size': 16})
pd.options.display.max_rows = 20

def show_more(d, lines):
    save = pd.options.display.max_rows
    pd.options.display.max_rows = lines
    display(d)
    pd.options.display.max_rows = save
    
def show_all(d):
    return show_more(d, len(d))

def read_data(rays, bags):
    ray_data = pd.read_csv(rays, dtype={'bag': object})
    bag_data = pd.read_csv(bags)
    
    min_timestamp_ray = min(ray_data.timestamp)
    min_timestamp_bag = min(bag_data.timestamp)
    min_timestamp = min(min_timestamp_ray, min_timestamp_bag)
    
    ray_data['timestamp_ms'] = ray_data.timestamp - min_timestamp
    bag_data['timestamp_ms'] = bag_data.timestamp - min_timestamp
    
    del ray_data['timestamp']
    del bag_data['timestamp']
    
    return ray_data, bag_data

def show_path(data, path_id):
    p = (data[(data.pathId == path_id) & (data.shadowRay == 0)]
        .loc[:, ['hop', 'workerId', 'treeletId', 'timestamp_ms', 'action', 'bag']]
        .sort_values(by=['hop', 'timestamp_ms'])).reset_index(drop=True)
    
    def highlighter(x):
        return ['background-color: rgba(255, 255, 255, 0.1)' if (x.hop % 2 == 0) else '' for y in x]
    
    q = p.style.apply(highlighter, axis=1)
    show_more(q, len(p))
    return q

def show_longest_paths(data):
    return (data[data.action.isin(['Generated', 'Finished'])]
            .groupby('pathId')
            .agg({'timestamp_ms': 'max', 'hop': 'max'})
            .sort_values('timestamp_ms', ascending=False)
            .head(20))

In [19]:
rays, bags = read_data("/data/cloudrt/logs/2019-12-30/killeroo-simple/rays.csv",
                       "/data/cloudrt/logs/2019-12-30/killeroo-simple/bags.csv")

Unnamed: 0,pathId,hop,shadowRay,workerId,treeletId,action,bag,timestamp_ms
0,0,0,0,1,0,Generated,,42
1,1,0,0,1,0,Generated,,42
2,2,0,0,1,0,Generated,,42
3,3,0,0,1,0,Generated,,42
4,4,0,0,1,0,Generated,,42
5,5,0,0,1,0,Generated,,42
6,6,0,0,1,0,Generated,,42
7,7,0,0,1,0,Generated,,42
8,8,0,0,1,0,Generated,,42
9,9,0,0,1,0,Generated,,42


Unnamed: 0,bag,workerId,count,size,action,timestamp_ms
0,T0/W1/B0,1,10609,1884617,Enqueued,265
1,T3/W52/B0,1,812,186711,Dequeued,617
2,T3/W16/B0,1,3602,992659,Dequeued,658
3,T3/W20/B1,1,296,69913,Dequeued,768
4,T0/W1/B1,1,812,182973,Enqueued,780
5,T3/W7/B1,1,2301,556354,Dequeued,950
6,T0/W1/B2,1,3898,1042856,Enqueued,990
7,samples/W1/B0,1,27,891,Enqueued,1049
8,T0/W1/B3,1,2274,543362,Enqueued,1206
9,T3/W9/B2,1,3755,1061187,Dequeued,1257


In [35]:
show_longest_paths(rays)

Unnamed: 0_level_0,timestamp_ms,hop
pathId,Unnamed: 1_level_1,Unnamed: 2_level_1
600867,23826,77
608196,23509,76
566190,23469,78
584697,23469,78
584560,23469,75
607116,23415,77
613207,23415,77
610153,23415,77
598012,23342,76
579505,23342,76
