In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from shutil import copyfile
import pickle

In [None]:
with open('./data-workers.pickle', 'rb') as f:
    data = pickle.load(f)

print(data['timestamp'])
out_dir = './plots/workers/' + data['timestamp']

# Create the directory if it does not yet exist
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

copyfile('./data-workers.pickle', out_dir + '/data-workers.pickle')

In [None]:
print(data)

x_vals_in = ['256k', '512k', '1024k', '2048k', '4096k']
x_vals_batch = ['64k', '128k', '256k', '512k', '1024k', '2048k', '4096k', '8192k']

m_cost_per_second = 0.4 / 60 / 60
f_cost_per_second = 1.65 / 60 / 60

In [None]:
# RUNTIME IN SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

plt.plot(x_vals_in, list(data['0']['in_size'].values()), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_in, list(data['1']['in_size'].values()), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_in, list(data['2']['in_size'].values()), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_in, list(data['3']['in_size'].values()), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Number of records')
plt.ylabel('Total query runtime (seconds)')

plt.xticks(x_vals_in)
plt.xticks(rotation=-90)

plt.title('Runtime in sizes benchmark - batch size 1M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_in_sizes.png')

plt.show()

In [None]:
# THROUGHPUT IN SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

in_sizes = [256e3, 512e3, 1024e3, 2048e3, 4096e3]
in_bytes = np.array([x * 100 * 1 for x in in_sizes])

throughput = {
    '0': np.divide(in_bytes, list(data['0']['in_size'].values())),
    '1': np.divide(in_bytes, list(data['1']['in_size'].values()))
    # '2': np.divide(in_bytes, list(data['2']['in_size'].values())),
    # '3': np.divide(in_bytes, list(data['3']['in_size'].values()))
}

plt.plot(x_vals_in, list(throughput['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_in, list(throughput['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_in, list(throughput['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_in, list(throughput['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Number of records')
plt.ylabel('Total query throughput (bytes/s)')

plt.xticks(x_vals_in)
plt.xticks(rotation=-90)

plt.title('Throughput in sizes benchmark - batch size 1M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_in_sizes_throughput.png')

plt.show()

In [None]:
# SPEEDUP IN SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

speedup = {
    '0': np.divide(list(data['0']['in_size'].values()), list(data['0']['in_size'].values())),
    '1': np.divide(list(data['0']['in_size'].values()), list(data['1']['in_size'].values()))
    # '2': np.divide(list(data['0']['in_size'].values()), list(data['2']['in_size'].values())),
    # '3': np.divide(list(data['0']['in_size'].values()), list(data['3']['in_size'].values()))
}

plt.plot(x_vals_in, list(speedup['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_in, list(speedup['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_in, list(speedup['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_in, list(speedup['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Number of records')
plt.ylabel('Speedup')

plt.xticks(x_vals_in)
plt.xticks(rotation=-90)

plt.title('Speedup in sizes benchmark - batch size 1M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_in_sizes_speedup.png')

plt.show()

In [None]:
# COST IN SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

cost = {
    '0': np.multiply(list(data['0']['in_size'].values()), (m_cost_per_second)),
    '1': np.multiply(list(data['1']['in_size'].values()), (f_cost_per_second)),
    # '2': np.multiply(list(data['2']['in_size'].values()), (m_cost_per_second + 2*f_cost_per_second)),
    # '3': np.multiply(list(data['3']['in_size'].values()), (3*f_cost_per_second))
}

plt.plot(x_vals_in, list(cost['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_in, list(cost['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_in, list(cost['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_in, list(cost['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Number of records')
plt.ylabel('Cost per query ($)')

plt.xticks(x_vals_in)
plt.xticks(rotation=-90)

plt.title('Cost in sizes benchmark - batch size 1M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_in_sizes_cost.png')

plt.show()

In [None]:
# RUNTIME BATCH SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

plt.plot(x_vals_batch, list(data['0']['batch_size'].values()), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_batch, list(data['1']['batch_size'].values()), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_batch, list(data['2']['batch_size'].values()), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_batch, list(data['3']['batch_size'].values()), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Batch size')
plt.ylabel('Total query runtime (seconds)')

plt.xticks(x_vals_batch)
plt.xticks(rotation=-90)

plt.title('Runtime batch sizes benchmark - in size 4M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_batch_sizes.png')

plt.show()

In [None]:
# THROUGHPUT BATCH SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

in_bytes = 100 * 1 * 4096e3

throughput = {
    '0': np.divide(in_bytes, list(data['0']['batch_size'].values())),
    '1': np.divide(in_bytes, list(data['1']['batch_size'].values())),
    # '2': np.divide(in_bytes, list(data['2']['batch_size'].values())),
    # '3': np.divide(in_bytes, list(data['3']['batch_size'].values()))
}

plt.plot(x_vals_batch, list(throughput['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_batch, list(throughput['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_batch, list(throughput['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_batch, list(throughput['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Batch size')
plt.ylabel('Total query throughput (bytes/s)')

plt.xticks(x_vals_batch)
plt.xticks(rotation=-90)

plt.title('Throughput batch sizes benchmark - in size 4M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_batch_sizes_throughput.png')

plt.show()

In [None]:
# SPEEDUP BATCH SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

speedup = {
    '0': np.divide(list(data['0']['batch_size'].values()), list(data['0']['batch_size'].values())),
    '1': np.divide(list(data['0']['batch_size'].values()), list(data['1']['batch_size'].values())),
    # '2': np.divide(list(data['0']['batch_size'].values()), list(data['2']['batch_size'].values())),
    # '3': np.divide(list(data['0']['batch_size'].values()), list(data['3']['batch_size'].values()))
}

plt.plot(x_vals_batch, list(speedup['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_batch, list(speedup['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_batch, list(speedup['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_batch, list(speedup['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Batch size')
plt.ylabel('Speedup')

plt.xticks(x_vals_batch)
plt.xticks(rotation=-90)

plt.title('Speedup batch sizes benchmark - in size 4M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_batch_sizes_speedup.png')

plt.show()

In [None]:
# COST BATCH SIZE

fig = plt.figure(figsize=(9,7))
fig.patch.set_facecolor('white')

cost = {
    '0': np.multiply(list(data['0']['batch_size'].values()), (m_cost_per_second)),
    '1': np.multiply(list(data['1']['batch_size'].values()), (f_cost_per_second)),
    # '2': np.multiply(list(data['2']['batch_size'].values()), (m_cost_per_second + 2*f_cost_per_second)),
    # '3': np.multiply(list(data['3']['batch_size'].values()), (3*f_cost_per_second))
}

plt.plot(x_vals_batch, list(cost['0']), color='r', marker='x', label='vanilla worker', zorder=3)
plt.plot(x_vals_batch, list(cost['1']), color='g', marker='o', label='tidre worker', zorder=3)
# plt.plot(x_vals_batch, list(cost['2']), color='b', marker='>', label='2 accelerated workers', zorder=3)
# plt.plot(x_vals_batch, list(cost['3']), color='orange', marker='^', label='3 accelerated workers', zorder=3)

# Add xticks on the middle of the group bars
plt.xlabel('Batch size')
plt.ylabel('Cost per query ($)')

plt.xticks(x_vals_batch)
plt.xticks(rotation=-90)

plt.title('Cost batch sizes benchmark - in size 4M')

axes = plt.gca()
axes.grid(which='both', axis='y', linestyle='--')

# plt.yscale('log')

# Create legend & Show graphic
plt.legend()

# Save fig as pdf
plt.savefig(out_dir + '/cluster_batch_sizes_cost.png')

plt.show()