# EXP_1: Nominal Performances

EXP_1 benchmarks are executed against the [HyperWatchdog](https://gitlab.com/ecs-lab/hyper-watchdog) smart contract, with:

- the use-case-specific working load
- variable network conditions

The aim is to evaluate the performance of the HyperWatchdog smart contract when subject to a realistic working load and variable network conditions, including a real-world network emulation.
Note: the endorsement policy is set to majority, therefore in this experiment the number of endorsers is 1.

In [None]:
# Reminder: restart kernel after installing packages!
%pip install pandas
%pip install plotly
%pip install nbformat

import os
import pandas as pd

# Settings
NOTEBOOK_NAME = "exp_1"
CALIPER_REPORTS_FILENAMES_START_WITH = None # Set None to disable filtering

## Parsing

The following code block parses the data from the html reports generated by Caliper and stores data into a .csv file.

In [None]:
target_caliper_reports_dir_path = [
    ["..", "launcher", "results", "exp_1_nominal_performances"],
    ]
target_caliper_reports_dir = [os.path.join(*p) for p in target_caliper_reports_dir_path]
target_reports_filename_begin_with = CALIPER_REPORTS_FILENAMES_START_WITH # Filename filter
export_filename = NOTEBOOK_NAME + "_results.csv"

columns = [
    "Bechmark",
    "Name",
    "Succ",
    "Fail",
    "Send Rate (TPS)",
    "Max Latency (s)",
    "Min Latency (s)",
    "Avg Latency (s)",
    "Throughput (TPS)",
]
rows = []

def parse_reports(reports_dir, output_list):
    for filename in os.listdir(reports_dir):
        # Filter by filename
        filepath = os.path.join(reports_dir, filename)
        if target_reports_filename_begin_with is not None and\
           target_reports_filename_begin_with not in filepath:
            continue

        # Skip directories
        if os.path.isdir(filepath):
            continue

        with open(filepath, "r") as fp:
            state = 0
            for line in fp.readlines():
                line = line.strip()
                if state == 0 and "Performance metrics" in line:
                    state = 1
                if state > 0 and "</table>" in line:
                    state = 0
                # if state==1 and "<th>" in line:
                #    key = [l.split("</th>")[0] for l in line.split("<th>")[1:]]
                #    print(key)
                if state == 1 and "<td>" in line:
                    output_list.append(
                        [filepath] + [l.split("</td>")[0] for l in line.split("<td>")[1:]]
                    )
                    # print(values)
                if state > 0:
                    pass

for reports_dir in target_caliper_reports_dir:
    parse_reports(reports_dir, rows)

with open(export_filename, "w") as fp:
    print(";".join(columns), file=fp)
    for row in rows:
        print(";".join(row), file=fp)

## Import and preprocess

The following code block imports the .csv file generated by the parsing code block.
Data is stored in a pandas dataframe.

Also, new columns are added to the dataframe to store the data in a more convenient way.

In [None]:
source_df = pd.read_csv(export_filename, sep=";")

# Create two new columns "Chunk Length" and "Requested TPS" from the column "Name"
source_df["Chunk Length"] = source_df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][0])
# df["Requested TPS"] = df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][1])

# Create a new column "Benchmark ID" from the column "Bechmark"
source_df["Benchmark ID"] = source_df["Bechmark"].apply(lambda x: x.split("/")[-1].split("_")[1].split(".")[0])
print("Benchmark IDs detected:", source_df["Benchmark ID"].unique())

# Create a new column "Success Rate" from the column "Succ" and "Fail"
source_df["Success Rate"] = source_df["Succ"] / (source_df["Succ"] + source_df["Fail"])

# Create a new column "Transaction Size (KiB)" from the column "Chunk Length" considering that each transaction is 580 bytes
# Round the result as integer
source_df["Transaction Size (Byte)"] = source_df["Chunk Length"] * 580
source_df["Transaction Size (Byte)"] = source_df["Transaction Size (Byte)"].apply(lambda x: round(x))

# Function to extract new features from the filename
def extract_feature_from_filename(inputFilename, featureTag: str, targetType = None):
    filename = inputFilename.split("/")[-1]
    params = filename.split(".")[0].split("_")
    params[-1] = params[-1].split(".")[0]

    if featureTag in params:
        i = params.index(featureTag)
        # Cast to the target type
        if targetType is not None:
            if targetType == "int":
                return int(params[i + 1])
            elif targetType == "float":
                return float(params[i + 1])
            elif targetType == "str":
                return params[i + 1]
        return params[i + 1]
    else:
        # If the feature is not found, return "ideal"
        return "ideal"

# Create new columns from the filename
for feature_tag, column_name, column_type in [
    ("pl", "Network Packet Loss (%)", "int"),
    ("thr", "Network Max Throughput (Mbps)", "int"),
    ("del", "Network Avg Latency (ms)", "int"),
    ("jit", "Network Jitter (ms)", "int"),
    ]:
    source_df[column_name] = source_df["Bechmark"].apply(lambda x: extract_feature_from_filename(
        inputFilename=x,
        featureTag=feature_tag,
        targetType=column_type,
        ))

# Create a new column "Network Handicaps" from all the network features, containing a string with all the features
def parse_row(row_value, column_name):
    if row_value == "ideal":
        return ""
    elif column_name == "Network Packet Loss (%)":
        return "l"
    elif column_name == "Network Max Throughput (Mbps)":
        return "t"
    elif column_name == "Network Avg Latency (ms)":
        return "d"
    elif column_name == "Network Jitter (ms)":
        return "j"
    
source_df["Network Handicaps"] = source_df.apply(lambda x: " ".join([parse_row(x[column_name], column_name) for column_name in [
    "Network Packet Loss (%)",
    "Network Max Throughput (Mbps)",
    "Network Avg Latency (ms)",
    "Network Jitter (ms)",
    ]]), axis=1)

# Evaluate data consistency
def eval_consistency(df: pd.DataFrame):
    # Create new column "Consistency" of type boolean and set it to True
    df["Consistency"] = True

    for row in df.iterrows():
        row_consistency = True

        filename = row[1]["Bechmark"].split("/")[-1]
        params = filename.split(".")[0].split("_")

        for i in range(len(params)):
            if params[i] == 'tps':
                if int(params[i+1]) != row[1]["Requested TPS"]:
                    row_consistency = False
            elif params[i] == 'chunklen':
                if int(params[i+1]) != row[1]["Chunk Length"]:
                    row_consistency = False
        
        if row_consistency == False:
            df.loc[row[0], "Consistency"] = False

eval_consistency(source_df)

# Check that none of the rows is inconsistent
if False in source_df["Consistency"].values:
    print("Inconsistency detected!")

    print("Inconsistent rows:")
    print(source_df[source_df["Consistency"] == False]["Bechmark"].values)

## Data analysis

### Throughput at different network conditions

In [None]:
import plotly.express as px

# Create a new dataframe containing only the rows with "Benchmark ID" equal to one of the values contained in the list
df = source_df[source_df["Benchmark ID"] == "11"]

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Network Conditions", "Transaction Size (Byte)"])
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Network Conditions", y="Throughput (TPS)", markers=True, template="simple_white")

# Set log scale
fig.update_layout(
    xaxis_type="log",
    # yaxis_type="log",
)

# Add title
# fig.update_layout(
    # title="Throughput vs Transaction Size for different network conditions",
    # title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
# )

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Network Conditions")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Rezize before showing
fig.update_layout(
    width=1000,
    height=400,
    # Resize text
    font=dict(
        size=18,
    ),
)

# Requested TPS
multiplier = 580
x = [i*multiplier for i in [208, 417, 833, 1666, 4165, 8330, 12495, 16660]]
y = [4.0, 2.0, 1.0, 0.5, 0.2, 0.1, 0.1, 0.1]
fig.add_trace(px.scatter(x=x, y=y).data[0].
              update(name="Requested TPS").
              update(mode="markers").
              update(
                  marker=dict(size=8, symbol="line-ew", line=dict(width=2, color="Teal"))).
                  update(showlegend=True))

# Save EPS figure
# fig.write_image("exp_1_graph_1_log_size_throughput_vm.eps", engine="kaleido")

fig.show()

### Avg latency at different network conditions

In [None]:
import plotly.express as px

# Create a new dataframe containing only the rows with "Benchmark ID" equal to one of the values contained in the list
df = source_df[source_df["Benchmark ID"].isin(["11", "09"])]

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_filtered = df[df["Benchmark ID"].isin(["11"])]
df_sorted = df_filtered.sort_values(by=["Network Conditions", "Transaction Size (Byte)"])
fig2 = px.line(df_sorted, x='Transaction Size (Byte)', color="Network Conditions", y="Avg Latency (s)", markers=True, template="simple_white")

# Set log scale
fig2.update_layout(
    xaxis_type="log",
    # yaxis_type="log",
)

# Add title
# fig2.update_layout(
    # title="Avg Latency vs Transaction Size for different network conditions",
    # title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
# )

# Add grid
fig2.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig2.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig2.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig2.update_legends(title_text="Network Conditions")

# Center the legend
fig2.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Rezize before showing
fig2.update_layout(
    width=1000,
    height=400,
    # Resize text
    font=dict(
        size=18,
    ),
)

# Save EPS figure
# fig2.write_image("exp_1_graph_2_log_size_latency_vm.eps", engine="kaleido")

fig2.show()

### Avg latency at different network conditions for different block configurations

In [None]:
# Create a new dataframe containing only the experiment results with realistic network conditions
df = source_df[source_df["Benchmark ID"].isin(["11", "09"])]
df = df[df["Network Handicaps"] == "l t d j"]

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i].replace('<br>', ' ')}")

df["Benchmark Name"] = df["Benchmark ID"].apply(lambda x: "Standard<br>configuration" if x == "11" else "Alternative<br>configuration")

# Sort dataframe
df_sorted = df.sort_values(by=["Benchmark ID", "Transaction Size (Byte)"], ascending=False)
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Benchmark Name", y="Avg Latency (s)", markers=True, template="simple_white")

# Set log scale
fig.update_layout(
    xaxis_type="log",
    # yaxis_type="log",
)

# Add title
fig.update_layout(
    title="Average transaction latency for different block production configurations",
    title_x=0.5, # Center title
    # xaxis_title="Chunk Length",
    # yaxis_title="Average Throughput (TPS)",
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Block Production")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Rezize before showing
fig.update_layout(
    width=800,
    height=300,
    # Resize text
    font=dict(
        size=12,
    ),
)

# Save EPS figure
# fig.write_image("exp_1_comparison_latency_with_diff_block_configs.eps", scale=1, engine="kaleido")
fig.show()