In [None]:
import os
import sys
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pprint import pprint
from IPython.display import display

from jupyterthemes import jtplot

jtplot.style(context='talk', figsize=(8.5, 6), fscale=1.75, ticks=True, grid=False)

plt.rcParams.update({'font.size': 18})
pd.options.display.max_rows = 20

def histogram_sum(hists):
    # find the max length
    res = []
    
    max_shape = (0,)
    for h in hists:
        max_shape = max(max_shape, h.shape)
    
    for h in hists:
        h.resize(max_shape, refcheck=False)
        
def read_logs(logs_root):
    files = [x for x in os.listdir(logs_root) if x.endswith(".INFO")]
    
    rays = []
    shadows = []
    paths = []
    
    for f in files:
        path = os.path.join(logs_root, f)
                
        with open(path) as fin:
            for line in fin:
                if line.startswith("[RAYHOPS]"):
                    rays += [np.array(json.loads(line.split()[1])['bins'])]
                elif line.startswith("[SHADOWHOPS]"):
                    shadows += [np.array(json.loads(line.split()[1])['bins'])]
                elif line.startswith("[PATHHOPS]"):
                    paths += [np.array(json.loads(line.split()[1])['bins'])]
                    
    histogram_sum(rays)
    rays_hist = np.sum(rays, axis=0)
    
    histogram_sum(shadows)
    shadows_hist = np.sum(shadows, axis=0)
    
    histogram_sum(paths)
    paths_hist = np.sum(paths, axis=0)
    
    return rays_hist, shadows_hist, paths_hist

def read_timelogs(logs_root):
    files = [x for x in os.listdir(logs_root) if x.endswith(".INFO")]
    timelogs = []
    
    for f in files:
        path = os.path.join(logs_root, f)
                
        with open(path) as fin:
            for line in fin:
                if line.startswith("[TIMING]"):
                    timelogs += [json.loads(line.split()[1])]
                    
    return timelogs

def plot_histogram(hist, width, start):
    hist = hist / np.sum(hist)
    plt.bar([start + width * i for i in range(len(hist))],
            np.cumsum(hist), width=width, align='edge')

In [None]:
def plot_completion_marks(data):
    targets = [0.25, 0.50, 0.75, 0.9, 0.95, 0.99, 1.0]

    timestamps = data.timestamp.to_numpy()
    fractions = np.cumsum(data.pathsFinished.to_numpy())
    fractions /= np.max(fractions)

    T = []
    i = 0

    for target in targets:
        while fractions[i] < target:
            i += 1

        A = (fractions[i - 1], timestamps[i - 1])
        B = (fractions[i], timestamps[i])

        t = A[1] + ((A[1] - B[1]) / (A[0] - B[0])) * (target - A[0])
        T += [t]

    #plt.scatter(T, [1] * len(T), marker='|', c='red')

    #ax2 = plt.gca().twiny()
    #ax2.set_xticks(T)
    #ax2.set_xticklabels(["{:d}%".format(int(f * 100)) for f in targets])

    for i in range(len(T)):
        plt.axvline(T[i], linestyle=':', linewidth=0.5, color='#999999')
        plt.annotate("{}%".format(int(targets[i] * 100)), (T[i], 0.98),
                     xycoords=("data", "axes fraction"), textcoords="offset pixels",
                     xytext=(10, 0),
                     fontsize='small', color='#999999', ha='left', va='center_baseline',
                     rotation='vertical')

In [None]:
plot_histogram(read_logs("/data/cloudrt/logs/2020-01-05/killeroo-2000-256s-250ms")[0], 5, 0)
plt.xlabel('Hops')
plt.ylabel('Rays (CDF)')

In [None]:
plot_histogram(read_logs("/data/cloudrt/logs/2020-01-05/killeroo-2000")[1], 5, 0)
plt.xlabel('Hops')
plt.ylabel('Shadow rays (CDF)')

In [None]:
plot_histogram(read_logs("/data/cloudrt/logs/2020-01-05/killeroo-2000")[2], 5, 0)
plt.xlabel('Hops')
plt.ylabel('Paths (CDF)')

In [None]:
data = pd.read_csv("/data/cloudrt/logs/2020-01-05/killeroo-2000/workers.csv")
data = data.groupby(['timestamp']).agg({'timestamp': 'max',
                                        'numSamples': 'sum',
                                        'bagsEnqueued': np.sum,
                                        'bytesEnqueued': np.sum,
                                        'bytesDequeued': np.average,
                                        'pathsFinished': 'sum'})

fig, ax1 = plt.subplots()
ax1.plot(data.timestamp, data.numSamples)
#ax1.set_xlim(left=np.min(data[data.bytesDequeued > 0].timestamp))

ax2 = ax1.twinx()
ax2.plot(data.timestamp, data.bytesEnqueued / data.bagsEnqueued, 'g*-', label='average bag size')

plot_completion_marks(data)

In [None]:
data = pd.read_csv("/data/cloudrt/logs/2020-01-05/killeroo-2000-500ms/workers.csv")
data = data.groupby(['timestamp']).agg({'timestamp': 'max',
                                        'numSamples': 'sum',
                                        'bagsEnqueued': np.average,
                                        'bytesEnqueued': np.average,
                                        'pathsFinished': 'sum'})


fig, ax1 = plt.subplots()
ax1.plot(data.timestamp, data.numSamples)

ax2 = ax1.twinx()
ax2.plot(data.timestamp, data.bytesEnqueued / data.bagsEnqueued, 'g-', label='average bag size')

plot_completion_marks(data)

In [None]:
def plot_timelogs(timelogs):
    items = {}
    x = list(range(0, len(timelogs)))
    
    timelogs = sorted(timelogs,
                      key=lambda x: {y['title']: y['percentage']
                                     for y in x['categories']}['TraceQueue'])
    
    for worker in timelogs:
        for cat in worker['categories']:
            items[cat['title']] = items.get(cat['title'], []) + [cat['percentage']]
    
    starts = [0 for i in x]
    cats = ["TraceQueue", "OutQueue", "SealedBags", "ReceiveQueue"]
    labels = ["Trace", "Serialization", "Compression", "Decompression+Deserialization"]

    jtplot.style(context='talk', figsize=(10.5, 8), fscale=1.75, ticks=True, grid=False)
    
    for cat in cats:
        values = items[cat]
        
        plt.bar(x, values, width=1, bottom=starts)
        
        for i in range(len(starts)):
            starts[i] += values[i]
            
    plt.legend(labels)
    plt.xlabel('Worker #')
    plt.ylabel('% of Time')
        
#timelogs = read_timelogs("/data/r2t2/logs/2020-03-02/moana-full-400w-64spp")
plot_timelogs(timelogs)