# Plasma Communication Overhead Investigation

In this notebook we investigate the overhead of writing tensors to plasma and reading it, comparing it to just sending the image to zmq. 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [None]:
import glob

In [None]:
files = glob.glob("../results/*.txt")
files = [p for p in files if 'hacky' not in p] # get rid of the hacky plasma approach

In [None]:
arrs = {
    path.split('/')[1].split('.')[0]: np.loadtxt(path) for path in files
}
arrs

In [None]:
plt.figure(figsize=(12,8))
for name, arr in arrs.items():
#     if 'noop' not in name:
    plt.scatter(np.arange(2000), arr, label=name, marker='.')
plt.legend()
# plt.ylim(0, 20000)

In [None]:
arr = arrs['zmq']

In [None]:
plt.figure(figsize=(16,8))
for name in arrs.keys():
    sns.distplot(arrs[name], bins=np.linspace(0, 5000, num=150), label=name, kde=False)
plt.xlim(0, 5000)
plt.legend()
plt.ylabel("Occurance Count")
plt.xlabel("Duration (us)")
plt.title("Distribution of durations (< 5000 us)")

In [None]:
3*224*224

## Fine-grained Plasma Performance Investigation

In [None]:
ts = pd.read_csv('results/hash_turned_off.csv', header=None).drop(7, axis=1)

In [None]:
ts = pd.concat([pd.Series(np.zeros(2000).astype(int), name='start'), ts], axis=1)

In [None]:
ts_durations = ts.diff(axis=1).drop('start',axis=1)

In [None]:
timestamp = \
[
  'create_tensor', 'get_metadata', 'create_plasma_obj',
  'write_tensor', 'seal_plasma_obj','send_obj', 'recv_ack'
]

In [None]:
plt.figure(figsize=(16,8))

for name, col in zip(timestamp, ts_durations.columns):
    sns.distplot(ts_durations[col], bins=np.linspace(0,2000,num=150), label=name)
plt.xlim(0, 2000)
plt.legend()

From Phillip:
- Hasing is there to detect non-determinism if a task fails and is re-run, but you probably don't need it.
- If you don't need that feature, we can put in a flag to deactivate it