# Workload distributions

In [None]:
from pathlib import Path
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
sns.set("paper")
sns.set_style("whitegrid")
mpl.rcParams.update({
    'text.usetex': False,
    'font.family': 'serif',
    'text.latex.preamble': [
        r'\usepackage{amsmath,amssymb}',
    ],

    'lines.linewidth': 1.5,
    'lines.markeredgewidth': 0,

    'scatter.marker': '.',
    'scatter.edgecolors': 'none',

    'savefig.dpi': 300,
})

In [None]:
basedir = Path('distributions')
w1 = basedir / "Facebook_WebServerDist_IntraCluster.txt"
w2 = basedir / "DCTCP_MsgSizeDist.txt"
w3 = basedir / "Facebook_HadoopDist_All.txt"


In [None]:
def load_dist(filename):
    x, y = [], []
    with open(filename) as file:
        # First line is average size, skip.
        for line in file.readlines()[1:]:
            _x, _y = line.split()
            x.append(float(_x))
            y.append(float(_y))
    return x, y
        

In [None]:
fig, ax = plt.subplots(figsize=(3.5, 2))

ax.plot(*load_dist(w1), label="W1")
ax.plot(*load_dist(w2), label="W2")
ax.plot(*load_dist(w3), label="W3")


ax.set(
    xlim=(1e0, 1e7),
    ylim=(0, 1),
    xscale='log',
    xlabel="Message Size [Bytes]",
    ylabel="Cumulative Probability"
)
ax.legend()
plt.show()
fig.tight_layout(pad=0)
fig.savefig("workload_dists.pdf", dpi=600)