# EXP_2: Concurrent Applications performance

EXP_2 benchmarks are executed against the [HyperWatchdog](https://gitlab.com/ecs-lab/hyper-watchdog) smart contract, with:

- the maximum possible number of transactions requested per second (obtained by leveraging the fixed-load mode of Caliper, that keeps sending transaction requests to always keep a backlog queue of pending transactions)
- variable network conditions: ideal and real-world network emulation

The aim is to evaluate the maximum performances of the Hyperwatchdog smart contract in both ideal and real-world network conditions. The results are then leveraged to evaluate the maximum number of concurrent applications that can be managed by a single smart contract deployment under the emulated conditions, varying the Chunk Size parameter of the Hyperwatchdog smart contract.
Note: the endorsement policy is set to majority, therefore in this experiment the number of endorsers is 1.

In [None]:
# Reminder: restart kernel after installing packages!
%pip install pandas
%pip install plotly
%pip install nbformat

import os
import pandas as pd

pd.options.mode.chained_assignment = None

# Settings
NOTEBOOK_NAME = "exp_2"
TARGET_CALIPER_REPORTS = "vm_16"

## Parsing

The following code block parses the data from the html reports generated by Caliper and stores data into a .csv file.

In [None]:
target_caliper_reports_dir_path = ["..", "launcher", "results", "exp_2_concurrent_apps"] # Reports dir
target_caliper_reports_dir = os.path.join(*target_caliper_reports_dir_path)
target_reports_filename_begin_with = TARGET_CALIPER_REPORTS # Filename filter
export_filename = NOTEBOOK_NAME + "_results.csv"

columns = [
    "Bechmark",
    "Name",
    "Succ",
    "Fail",
    "Send Rate (TPS)",
    "Max Latency (s)",
    "Min Latency (s)",
    "Avg Latency (s)",
    "Throughput (TPS)",
]
rows = []

for filename in os.listdir(target_caliper_reports_dir):
    # Filter by filename
    filepath = os.path.join(target_caliper_reports_dir, filename)
    if target_reports_filename_begin_with not in filepath:
        continue

    # Skip directories
    if os.path.isdir(filepath):
        continue

    with open(filepath, "r") as fp:
        state = 0
        for line in fp.readlines():
            line = line.strip()
            if state == 0 and "Performance metrics" in line:
                state = 1
            if state > 0 and "</table>" in line:
                state = 0
            # if state==1 and "<th>" in line:
            #    key = [l.split("</th>")[0] for l in line.split("<th>")[1:]]
            #    print(key)
            if state == 1 and "<td>" in line:
                rows.append(
                    [filepath] + [l.split("</td>")[0] for l in line.split("<td>")[1:]]
                )
                # print(values)
            if state > 0:
                pass

with open(export_filename, "w") as fp:
    print(";".join(columns), file=fp)
    for row in rows:
        print(";".join(row), file=fp)

## Import and preprocess

The following code block imports the .csv file generated by the parsing code block.
Data is stored in a pandas dataframe.

Also, new columns are added to the dataframe to store the data in a more convenient way.

In [None]:
df = pd.read_csv(export_filename, sep=";")

# Create two new columns "Chunk Length" and "Requested TPS" from the column "Name"
df["Chunk Length"] = df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][0])
# df["Requested TPS"] = df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][1])

# Create a new column "Benchmark ID" from the column "Bechmark"
df["Benchmark ID"] = df["Bechmark"].apply(lambda x: x.split("/")[len(target_caliper_reports_dir_path)].split("_")[1].split(".")[0])
print("Benchmark IDs detected:", df["Benchmark ID"].unique())

# Create a new column "Success Rate" from the column "Succ" and "Fail"
df["Success Rate"] = df["Succ"] / (df["Succ"] + df["Fail"])

# Create a new column "Transaction Size (KiB)" from the column "Chunk Length" considering that each transaction is 580 bytes
# Round the result as integer
df["Transaction Size (Byte)"] = df["Chunk Length"] * 580
df["Transaction Size (Byte)"] = df["Transaction Size (Byte)"].apply(lambda x: round(x))

# Function to extract new features from the filename
def extract_feature_from_filename(inputFilename, featureTag: str, targetType = None):
    filename = inputFilename.split("/")[-1]
    params = filename.split(".")[0].split("_")
    params[-1] = params[-1].split(".")[0]

    if featureTag in params:
        i = params.index(featureTag)
        # Cast to the target type
        if targetType is not None:
            if targetType == "int":
                return int(params[i + 1])
            elif targetType == "float":
                return float(params[i + 1])
            elif targetType == "str":
                return params[i + 1]
        return params[i + 1]
    else:
        # If the feature is not found, return "ideal"
        return "ideal"

# Create new columns from the filename
for feature_tag, column_name, column_type in [
    ("pl", "Network Packet Loss (%)", "int"),
    ("thr", "Network Max Throughput (Mbps)", "int"),
    ("del", "Network Avg Latency (ms)", "int"),
    ("jit", "Network Jitter (ms)", "int"),
    ]:
    df[column_name] = df["Bechmark"].apply(lambda x: extract_feature_from_filename(
        inputFilename=x,
        featureTag=feature_tag,
        targetType=column_type,
        ))

# Evaluate data consistency
def eval_consistency(df: pd.DataFrame):
    # Create new column "Consistency" of type boolean and set it to True
    df["Consistency"] = True

    for row in df.iterrows():
        row_consistency = True

        filename = row[1]["Bechmark"].split("/")[-1]
        params = filename.split(".")[0].split("_")

        for i in range(len(params)):
            if params[i] == 'tps':
                if int(params[i+1]) != row[1]["Requested TPS"]:
                    row_consistency = False
            elif params[i] == 'chunklen':
                if int(params[i+1]) != row[1]["Chunk Length"]:
                    row_consistency = False
        
        if row_consistency == False:
            df.loc[row[0], "Consistency"] = False

eval_consistency(df)

# Check that none of the rows is inconsistent
if False in df["Consistency"].values:
    print("Inconsistency detected!")

    print("Inconsistent rows:")
    print(df[df["Consistency"] == False]["Bechmark"].values)

## Data analysis

### Average throughput versus Chunk Size at maximum load

In [None]:
import plotly.express as px

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Network Conditions", "Transaction Size (Byte)"])
# Filter by value ranges
df_sorted = df_sorted[df_sorted["Transaction Size (Byte)"] >= 99000]
df_sorted = df_sorted[df_sorted["Transaction Size (Byte)"] <= 1000000]
# Plot
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Network Conditions", y="Throughput (TPS)", markers=True, template="simple_white")

# Set log scale
fig.update_layout(
    xaxis_type="log",
    # yaxis_type="log",
)

# Set x-axis range
fig.update_xaxes(range=[4.96, 6.04]) # Logarithmic range

# Add title
fig.update_layout(
    title="Throughput vs Transaction Size for different network conditions",
    title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Packet Loss (%)")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Save EPS figure
# fig.write_image(NOTEBOOK_NAME + "throughput_vs_tx_size.eps", width=800, height=450, scale=1)

fig.show()

### Average latency versus Chunk Size at maximum load

In [None]:
import plotly.express as px

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Network Conditions", "Transaction Size (Byte)"])
# Filter by value ranges
df_sorted = df_sorted[df_sorted["Transaction Size (Byte)"] >= 99000]
df_sorted = df_sorted[df_sorted["Transaction Size (Byte)"] <= 1001000]
# Plot
fig2 = px.line(df_sorted, x='Transaction Size (Byte)', color="Network Conditions", y="Avg Latency (s)", markers=True, template="simple_white")

# Set log scale
fig2.update_layout(
    xaxis_type="log",
    # yaxis_type="log",
)

# Set x-axis range
fig2.update_xaxes(range=[4.96, 6.04]) # Logarithmic range

# Add title
fig2.update_layout(
    title="Avg Latency vs Transaction Size for different network conditions",
    title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
)

# Add grid
fig2.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig2.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig2.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig2.update_legends(title_text="Packet Loss (%)")

# Center the legend
fig2.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Set the balloon to show the Chunk Length from the dataframe


# Save EPS figure
# fig.write_image(NOTEBOOK_NAME + "throughput_vs_tx_size.eps", width=800, height=450, scale=1)

fig2.show()

## Concurrent applications: evaluation

In [None]:
# Get from the plotted dataframe a subset of the data containing the Transaction Size (Byte) and Throughput (TPS) for
# the realistic network condition only
df_real = df_sorted[df_sorted["Network Conditions"] == "Packet Loss 3%,<br>Max Throughput 2920Mbps,<br>Avg Latency 74ms,<br>Jitter 39ms"]

# Sensors' frequency
sensors_frequency = 833 # Hz

# Size of a data chunk of 1s (approx equal to tx size)
chunk_size = 580 # Byte/s

# A batch of 1s will have a length of sensors_frequency.
# We can process data chunks with a length of chunk_size, thus the number of transactions per second is
# equal to: requested_tps = sensors_frequency / chunk_size
df_real["Sensors Frequency"] = sensors_frequency
df_real["Requested TPS"] = df_real["Chunk Length"].apply(lambda x: sensors_frequency / x)

# Using the maximum throughput, we can compute the maximum number of applications that can be satisfied as follows:
# max_satisfiable_applications = throughput / requested_tps
df_real["Max Satisfiable Applications"] = df_real["Throughput (TPS)"] // df_real["Requested TPS"]
df_real["Max Satisfiable Applications"] = df_real["Max Satisfiable Applications"].astype(int)

# Create a readable column for x-axis
df_real["Transaction Size (KiB)"] = df_real["Transaction Size (Byte)"] // 1024
df_real["Transaction Size (KiB)"] = df_real["Transaction Size (KiB)"].astype(int)
df_real["Transaction Size (KiB) | Chunk Length"] = df_real["Transaction Size (KiB)"].astype(str) + " | " + df_real["Chunk Length"].astype(str)

fig = px.scatter(
    df_real, 
    x='Transaction Size (KiB) | Chunk Length', 
    y="Throughput (TPS)", 
    template="simple_white", 
    text="Max Satisfiable Applications",
    color='Avg Latency (s)',
    # size='Max Satisfiable Applications',
    )
fig.update_traces(marker_size=20)

fig.update_traces(
    textposition="top center",
    textfont=dict(
        family="Courier New",
        size=16,
        color="darkgreen"
    )
    )

# Add title
fig.update_layout(
    title="Maximum number of satisfiable applications vs Transaction Size",
    title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Outline an example
selected_chunk_length = 864
row = df_real[df_real["Chunk Length"] == selected_chunk_length]
x = row["Transaction Size (KiB) | Chunk Length"].values[0]
y = row["Throughput (TPS)"].values[0]
fig.add_annotation(
    x=x,
    y=y,
    text=f"Example: in this case, the maximum<br> number of satisfiable applications is {row['Max Satisfiable Applications'].values[0]}",
    
    # Arrow
    showarrow=True,
    arrowhead=3,
    arrowsize=1,
    arrowwidth=2,
    # arrowhead=2,
    # arrowcolor="#636363",
    ax=-25,
    ay=-120,
    yshift=40,
    xshift=-8,

    # Text
    # font=dict(
    #     family="Courier New, monospace",
    #     size=16,
    #     color="#ffffff"
    #     ),
    align="center",

    # Box
    bordercolor="#000000",
    borderwidth=1,
    borderpad=5,
    bgcolor="#ffffff",
    # opacity=0.8
    )

# Show
fig.show()


In [None]:
from plotly.subplots import make_subplots
complex_fig = make_subplots(specs=[[{"secondary_y": True}]])

# First trace: Max Satisfiable Applications
complex_fig.add_trace(
    px.histogram(
        df_real,
        x='Transaction Size (KiB) | Chunk Length',
        y="Max Satisfiable Applications",
        text_auto=True,
        template="simple_white",
        ).update_traces(
            name="Max Satisfiable Applications", # Name of the trace, to be used in the legend
        ).data[0],
    secondary_y=True, # Use the left y-axes
)

# Second trace: Average Latency
complex_fig.add_trace(
    px.scatter(df_real, x='Transaction Size (KiB) | Chunk Length', y="Avg Latency (s)", template="simple_white").data[0].update(
        name="Average Latency", # Name of the trace, to be used in the legend
    ),
    secondary_y=False, # Use the right y-axes
)

# Third trace: Throughput
complex_fig.add_trace(
    px.scatter(df_real, x='Transaction Size (KiB) | Chunk Length', y="Throughput (TPS)", template="simple_white").data[0].update(
        name="Throughput", # Name of the trace, to be used in the legend
    ),
    secondary_y=True, # Use the left y-axes
)

# Set x-axis and y-axes
complex_fig.update_layout(
    legend=dict(orientation="h"),
    yaxis=dict(
        title=dict(text="Avg Latency (s)"),
        side="left",
        range=[0, 2.5],
    ),
    yaxis2=dict(
        title=dict(text="Number of applications (count)<br>Avg Throughput (TPS)"),
        side="right",
        range=[0, 50],
        overlaying="y",
        tickmode="sync",
    ),
    xaxis=dict(
        title=dict(text="Transaction Size (KiB) | Chunk Length"),
    ),
)

# Select template
complex_fig.update_layout(template="simple_white")

# Change the color of the first trace
complex_fig.update_traces(
    selector=dict(type='histogram'), # Select the histogram trace
    marker_color='rgb(158,202,225)',
    marker_line_color='rgb(8,48,107)',
    marker_line_width=1.5,
    opacity=0.6,
)

# Change the color of the second trace
complex_fig.update_traces(
    selector=dict(type='scatter', name="Average Latency"), # Select the scatter trace
    marker_color='rgba(0, 152, 0, .9)',
    marker_line_color='rgb(60,30,0)',
    marker_line_width=1.5,
    opacity=0.6,
    mode='lines+markers', # Add line
    line=dict(
        # dash='no',
        width=2,
        color='rgb(60,30,0)',
    ),
)

# Change the color of the third trace
complex_fig.update_traces(
    selector=dict(type='scatter', name="Throughput"), # Select the scatter trace
    marker_color='rgba(120, 0, 0, .9)',
    marker_line_color='rgb(0,0,0)',
    marker_line_width=1.5,
    opacity=0.6,
    mode='lines+markers', # Add line
    line=dict(
        dash='dash',
        width=2,
        color='rgb(0,0,0)',
    ),
)

# Enable the legend for each trace
complex_fig.update_traces(
    showlegend=True,
)

# Show the legend on the right
complex_fig.update_layout(
    legend=dict(
        yanchor="middle",
        y=1.05,
        xanchor="center",
        x=0.5,
        # title="Legend",
        orientation='h', # 'v'
    )
)

# Add grid
complex_fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
        # range=[-1, 10],
        # Create a log scale using an array of powers of 2
        # tickmode="array",
        # tickvals=[10**i for i in range(10)],
        # ticktext=[2**i for i in range(10)],
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
        # Logarithmic scale
    ),
)

# Add title
complex_fig.update_layout(
    title="Maximum number of satisfiable applications",
    title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
)

# Rezize before showing
complex_fig.update_layout(
    width=800,
    height=500,
    # Resize text
    font=dict(
        size=16,
    )
)

# Save svg
# complex_fig.write_image("exp_2_satisfiable_applications_2_histogram_alt.svg", engine="kaleido")

complex_fig.show()


In [None]:
from plotly.subplots import make_subplots
complex_fig = make_subplots(specs=[[{"secondary_y": True}]])

# Second trace: Average Latency
complex_fig.add_trace(
    px.scatter(df_real, x='Transaction Size (KiB)', y="Avg Latency (s)", template="simple_white").data[0].update(
        name="Average Latency", # Name of the trace, to be used in the legend
    ),
    secondary_y=False, # Use the right y-axes
)

# Third trace: Throughput
complex_fig.add_trace(
    px.scatter(df_real, x='Transaction Size (KiB)', y="Throughput (TPS)", template="simple_white").data[0].update(
        name="Throughput", # Name of the trace, to be used in the legend
    ),
    secondary_y=True, # Use the left y-axes
)

# Set x-axis and y-axes
complex_fig.update_layout(
    legend=dict(orientation="h"),
    yaxis=dict(
        title=dict(text="Avg Latency (s)"),
        side="left",
        range=[0, 2.5],
    ),
    yaxis2=dict(
        title=dict(text="Number of applications (count)<br>Avg Throughput (TPS)"),
        side="right",
        range=[0, 50],
        overlaying="y",
        tickmode="sync",
    ),
    xaxis=dict(
        title=dict(text="Transaction Size (KiB) | Chunk Length"),
        # Log axis
        type="log",
    ),
)

# Select template
complex_fig.update_layout(template="simple_white")

# Change the color of the first trace
complex_fig.update_traces(
    selector=dict(type='histogram'), # Select the histogram trace
    marker_color='rgb(158,202,225)',
    marker_line_color='rgb(8,48,107)',
    marker_line_width=1.5,
    opacity=0.6,
)

# Change the color of the second trace
complex_fig.update_traces(
    selector=dict(type='scatter', name="Average Latency"), # Select the scatter trace
    marker_color='rgba(0, 152, 0, .9)',
    marker_line_color='rgb(60,30,0)',
    marker_line_width=1.5,
    opacity=0.6,
    mode='lines+markers', # Add line
    line=dict(
        # dash='no',
        width=2,
        color='rgb(60,30,0)',
    ),
)

# Change the color of the third trace
complex_fig.update_traces(
    selector=dict(type='scatter', name="Throughput"), # Select the scatter trace
    marker_color='rgba(120, 0, 0, .9)',
    marker_line_color='rgb(0,0,0)',
    marker_line_width=1.5,
    opacity=0.6,
    mode='lines+markers', # Add line
    line=dict(
        dash='dash',
        width=2,
        color='rgb(0,0,0)',
    ),
)

# Enable the legend for each trace
complex_fig.update_traces(
    showlegend=True,
)

# Show the legend on the right
complex_fig.update_layout(
    legend=dict(
        yanchor="middle",
        y=-0.10,
        xanchor="center",
        x=0.5,
        # title="Legend",
        orientation='h', # 'v'
    )
)

# Add grid
complex_fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
        # range=[-1, 10],
        # Create a log scale using an array of powers of 2
        # tickmode="log",
        tickvals=[i for i in range(100, 1100, 100)],
        # ticktext=[2**i for i in range(10)],
        # Move to the top
        side="top",
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
        # Logarithmic scale
    ),
)

# Add title
# complex_fig.update_layout(
    # title="Maximum number of satisfiable applications",
    # title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
# )

# Rezize before showing
complex_fig.update_layout(
    width=800,
    height=500,
    # Resize text
    font=dict(
        size=16,
    )
)

# Save svg
# complex_fig.write_image("exp_2_satisfiable_applications_2_log_scale_alt.svg", engine="kaleido")

complex_fig.show()