In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [ ]:
df_tcpstream = pd.read_csv('data/bench_tcpstream.csv', delimiter=';', )
df_file = pd.read_csv('data/bench_file.csv', delimiter=';')

In [ ]:
# Calculate the mean of each column for both dataframes
tcpstream_means = df_tcpstream.mean()
file_means = df_file.mean()


In [ ]:
tcpstream_means['io_time'] = tcpstream_means['read_io_time'] + tcpstream_means['write_io_time']
file_means['io_time'] = file_means['read_io_time'] + file_means['write_io_time']


In [ ]:
tcpstream_means['p2m_overhead'] = tcpstream_means['read_enroll_req_time'] + tcpstream_means['write_enroll_req_time'] + tcpstream_means['read_enroll_ack_time'] + tcpstream_means['write_enroll_ack_time'] + tcpstream_means['read_io_request_req_time'] + tcpstream_means['write_io_request_req_time'] + tcpstream_means['read_io_request_ack_time'] + tcpstream_means['write_io_request_ack_time'] + tcpstream_means['read_io_report_req_time'] + tcpstream_means['write_io_report_req_time'] + tcpstream_means['read_io_report_ack_time'] + tcpstream_means['write_io_report_ack_time']
file_means['p2m_overhead'] = file_means['read_enroll_req_time'] + file_means['write_enroll_req_time'] + file_means['read_enroll_ack_time'] + file_means['write_enroll_ack_time'] + file_means['read_io_request_req_time'] + file_means['write_io_request_req_time'] + file_means['read_io_request_ack_time'] + file_means['write_io_request_ack_time'] + file_means['read_io_report_req_time'] + file_means['write_io_report_req_time'] + file_means['read_io_report_ack_time'] + file_means['write_io_report_ack_time']

In [ ]:
tcpstream_means['middleware_overhead'] = tcpstream_means['read_enroll_processing_time'] + tcpstream_means['write_enroll_processing_time'] + tcpstream_means['read_io_request_processing_time'] + tcpstream_means['write_io_request_processing_time'] + tcpstream_means['read_io_report_processing_time'] + tcpstream_means['write_io_report_processing_time']
file_means['middleware_overhead'] = file_means['read_enroll_processing_time'] + file_means['write_enroll_processing_time'] + file_means['read_io_request_processing_time'] + file_means['write_io_request_processing_time'] + file_means['read_io_report_processing_time'] + file_means['write_io_report_processing_time']

In [ ]:
tcpstream_means['m2m_overhead'] = tcpstream_means['write_reserve_req_time'] + tcpstream_means['write_reserve_ack_time'] + tcpstream_means['write_sync_prov_req_time'] + tcpstream_means['write_sync_prov_ack_time']

In [ ]:
# Create labels and positions for the bars
labels = ['File', 'TcpStream']
x = np.arange(len(labels))
width = 0.35  # width of the bars

# Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6))

# For the single bars showing io_time only
io_time_bars = [file_means['io_time'], tcpstream_means['io_time']]
ax.bar(x - width/2, io_time_bars, width, label='IO Time Only', color='skyblue')

# For the stacked bars
# First layer: io_time
stacked_io_time = ax.bar(x + width/2, 
                        [file_means['io_time'], tcpstream_means['io_time']], 
                        width, 
                        label='IO Time', 
                        color='royalblue')

# Check if 'middleware_overhead' exists in the dataframes
# If not present, you might need to calculate it or set to 0
if 'middleware_overhead' not in file_means or 'middleware_overhead' not in tcpstream_means:
    file_means['middleware_overhead'] = 0
    tcpstream_means['middleware_overhead'] = 0

# Second layer: middleware_overhead
stacked_middleware = ax.bar(x + width/2, 
                            [file_means['middleware_overhead'], tcpstream_means['middleware_overhead']], 
                            width, 
                            bottom=[file_means['io_time'], tcpstream_means['io_time']], 
                            label='Middleware Overhead', 
                            color='lightgreen')

# Third layer: p2m_overhead
stacked_p2m = ax.bar(x + width/2, 
                    [file_means['p2m_overhead'], tcpstream_means['p2m_overhead']], 
                    width, 
                    bottom=[file_means['io_time'] + file_means['middleware_overhead'], 
                           tcpstream_means['io_time'] + tcpstream_means['middleware_overhead']], 
                    label='P2M Overhead', 
                    color='salmon')

# Add labels, title and legend
ax.set_xlabel('I/O Method')
ax.set_ylabel('Time (seconds)')
ax.set_title('Comparison of I/O Methods: File vs TcpStream')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

# Add values on top of the bars
for i, v in enumerate(io_time_bars):
    ax.text(i - width/2, v + 0.01, f'{v:.3f}', ha='center', va='bottom')

# Adding a total time label on top of the stacked bars
total_time_file = file_means['io_time'] + file_means['middleware_overhead'] + file_means['p2m_overhead']
total_time_tcp = tcpstream_means['io_time'] + tcpstream_means['middleware_overhead'] + tcpstream_means['p2m_overhead']
ax.text(0 + width/2, total_time_file + 0.01, f'{total_time_file:.3f}', ha='center', va='bottom')
ax.text(1 + width/2, total_time_tcp + 0.01, f'{total_time_tcp:.3f}', ha='center', va='bottom')

# Add a grid to make it easier to read values
ax.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()