# EXP_3: Performances varying the number of organizations

EXP_3 benchmarks are executed against the [HyperWatchdog](https://gitlab.com/ecs-lab/hyper-watchdog) smart contract, with:

- the maximum possible number of transactions requested per second (obtained by leveraging the fixed-load mode of Caliper, that keeps sending transaction requests to always keep a backlog queue of pending transactions)
- real-world network conditions
- varying the number of organizations

The aim is to evaluate the maximum performance that can be achieved by the system, and how it scales with the number of organizations.
Note that in this experiment we are using an endorsement policy that requires all the organizations to endorse the transaction, so the number of organizations is also the number of required endorsers.

In [None]:
# Reminder: restart kernel after installing packages!
%pip install pandas
%pip install plotly
%pip install nbformat

import os
import pandas as pd

pd.options.mode.chained_assignment = None

# Settings
NOTEBOOK_NAME = "exp_3"
TARGET_CALIPER_REPORTS = "exp_3"

## Parsing

The following code block parses the data from the html reports generated by Caliper and stores data into a .csv file.

In [None]:
target_caliper_reports_dir_path = ["..", "launcher", "results", "exp_3_multiple_orgs"] # Reports dir
target_caliper_reports_dir = os.path.join(*target_caliper_reports_dir_path)
target_reports_filename_begin_with = TARGET_CALIPER_REPORTS # Filename filter
export_filename = NOTEBOOK_NAME + "_results.csv"

columns = [
    "Bechmark",
    "Name",
    "Succ",
    "Fail",
    "Send Rate (TPS)",
    "Max Latency (s)",
    "Min Latency (s)",
    "Avg Latency (s)",
    "Throughput (TPS)",
]
rows = []

for filename in os.listdir(target_caliper_reports_dir):
    # Filter by filename
    filepath = os.path.join(target_caliper_reports_dir, filename)
    if target_reports_filename_begin_with not in filepath:
        continue

    # Skip directories
    if os.path.isdir(filepath):
        continue

    with open(filepath, "r") as fp:
        state = 0
        for line in fp.readlines():
            line = line.strip()
            if state == 0 and "Performance metrics" in line:
                state = 1
            if state > 0 and "</table>" in line:
                state = 0
            # if state==1 and "<th>" in line:
            #    key = [l.split("</th>")[0] for l in line.split("<th>")[1:]]
            #    print(key)
            if state == 1 and "<td>" in line:
                rows.append(
                    [filepath] + [l.split("</td>")[0] for l in line.split("<td>")[1:]]
                )
                # print(values)
            if state > 0:
                pass

with open(export_filename, "w") as fp:
    print(";".join(columns), file=fp)
    for row in rows:
        print(";".join(row), file=fp)

## Import and preprocess

The following code block imports the .csv file generated by the parsing code block.
Data is stored in a pandas dataframe.

Also, new columns are added to the dataframe to store the data in a more convenient way.

In [None]:
df = pd.read_csv(export_filename, sep=";")

# Create two new columns "Chunk Length" and "Requested TPS" from the column "Name"
df["Chunk Length"] = df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][0])
# df["Requested TPS"] = df["Name"].apply(lambda x: [int(i) for i in "".join([i for i in x if not i.isalpha()]).strip().split()][1])

# Create a new column "Benchmark ID" from the column "Bechmark"
df["Benchmark ID"] = df["Bechmark"].apply(lambda x: x.split("/")[len(target_caliper_reports_dir_path)].split("_")[1].split(".")[0])
print("Benchmark IDs detected:", df["Benchmark ID"].unique())

# Create a new column "Success Rate" from the column "Succ" and "Fail"
df["Success Rate"] = df["Succ"] / (df["Succ"] + df["Fail"])

# Create a new column "Transaction Size (KiB)" from the column "Chunk Length" considering that each transaction is 580 bytes
# Round the result as integer
df["Transaction Size (Byte)"] = df["Chunk Length"] * 580
df["Transaction Size (Byte)"] = df["Transaction Size (Byte)"].apply(lambda x: round(x))

# Function to extract new features from the filename
def extract_feature_from_filename(inputFilename, featureTag: str, targetType = None):
    filename = inputFilename.split("/")[-1]
    params = filename.split(".")[0].split("_")
    params[-1] = params[-1].split(".")[0]

    if featureTag in params:
        i = params.index(featureTag)
        # Cast to the target type
        if targetType is not None:
            if targetType == "int":
                return int(params[i + 1])
            elif targetType == "float":
                return float(params[i + 1])
            elif targetType == "str":
                return params[i + 1]
        return params[i + 1]
    else:
        # If the feature is not found, return "ideal"
        return "ideal"

# Create new columns from the filename
for feature_tag, column_name, column_type in [
    ("pl", "Network Packet Loss (%)", "int"),
    ("thr", "Network Max Throughput (Mbps)", "int"),
    ("del", "Network Avg Latency (ms)", "int"),
    ("jit", "Network Jitter (ms)", "int"),
    ("orgs", "Number of Organizations", "int"),
    ]:
    df[column_name] = df["Bechmark"].apply(lambda x: extract_feature_from_filename(
        inputFilename=x,
        featureTag=feature_tag,
        targetType=column_type,
        ))

# Evaluate data consistency
def eval_consistency(df: pd.DataFrame):
    # Create new column "Consistency" of type boolean and set it to True
    df["Consistency"] = True

    for row in df.iterrows():
        row_consistency = True

        filename = row[1]["Bechmark"].split("/")[-1]
        params = filename.split(".")[0].split("_")

        for i in range(len(params)):
            if params[i] == 'tps':
                if int(params[i+1]) != row[1]["Requested TPS"]:
                    row_consistency = False
            elif params[i] == 'chunklen':
                if int(params[i+1]) != row[1]["Chunk Length"]:
                    row_consistency = False
        
        if row_consistency == False:
            df.loc[row[0], "Consistency"] = False

eval_consistency(df)

# Check that none of the rows is inconsistent
if False in df["Consistency"].values:
    print("Inconsistency detected!")

    print("Inconsistent rows:")
    print(df[df["Consistency"] == False]["Bechmark"].values)

## Data analysis

### Average throughput versus number of organizations

In [None]:
import plotly.express as px

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Number of Organizations", "Transaction Size (Byte)"])

# Plot
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Number of Organizations", y="Throughput (TPS)", markers=True, template="simple_white")

# Set log scale
# fig.update_layout(
#     xaxis_type="log",
#     # yaxis_type="log",
# )
# fig.update_xaxes(range=[4.96, 6.04]) # Logarithmic range

# Add title
fig.update_layout(
    title="Throughput vs Transaction Size for different Number of Organizations",
    title_x=0.5, # Center title
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Number of Organizations")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Save EPS figure
# fig.write_image(NOTEBOOK_NAME + "throughput_vs_nodes.eps", width=800, height=450, scale=1)

fig.show()

### Average latency versus number of organizations

In [None]:
import plotly.express as px

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Number of Organizations", "Transaction Size (Byte)"])

# Plot
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Number of Organizations", y="Avg Latency (s)", markers=True, template="simple_white")

# Set log scale
# fig.update_layout(
#     xaxis_type="log",
#     # yaxis_type="log",
# )
# fig.update_xaxes(range=[4.96, 6.04]) # Logarithmic range

# Add title
fig.update_layout(
    title="Avg Latency vs Transaction Size for different Number of Organizations",
    title_x=0.5, # Center title
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Number of Organizations")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Save EPS figure
# fig.write_image(NOTEBOOK_NAME + "throughput_vs_nodes.eps", width=800, height=450, scale=1)

fig.show()

### Success rate versus number of organizations

In [None]:
import plotly.express as px

def group_network_conditions(s):
    network_conditions = ""

    for column_name in ["Network Packet Loss (%)", "Network Max Throughput (Mbps)", "Network Avg Latency (ms)", "Network Jitter (ms)"]:
        if s[column_name] != "ideal":
            name = column_name.lstrip("Network").split("(")[0].strip() # Before the first "("
            value = s[column_name] # The value
            unit = str(column_name.split('(')[1].strip(")")).strip() # Inside the parentheses
            network_conditions += f",<br>{name} {value}{unit}"
    
    if network_conditions == "":
        return "Ideal"
    else:
        return network_conditions[5:]

# Create a new column "Conditions" grouping the network conditions
df["Network Conditions"] = df.apply(lambda x: group_network_conditions(x), axis=1)

cds = df["Network Conditions"].unique() # All the different network conditions
for i in range(len(cds)):
    print(f"Network condition {i}: {cds[i]}")

# Sort dataframe
df_sorted = df.sort_values(by=["Number of Organizations", "Transaction Size (Byte)"])

# Plot
fig = px.line(df_sorted, x='Transaction Size (Byte)', color="Number of Organizations", y="Success Rate", markers=True, template="simple_white")

# Set log scale
# fig.update_layout(
#     xaxis_type="log",
#     # yaxis_type="log",
# )
# fig.update_xaxes(range=[4.96, 6.04]) # Logarithmic range

# Add title
fig.update_layout(
    title="Success Rate vs Transaction Size for different Number of Organizations",
    title_x=0.5, # Center title
)

# Add grid
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='LightGrey',
    ),
)

# Set border pane lines
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)

# Set cathegories title
fig.update_legends(title_text="Number of Organizations")

# Center the legend
fig.update_layout(legend=dict(
    yanchor="middle",
    y=0.5,
    xanchor="left",
    x=1.02
))

# Save EPS figure
# fig.write_image(NOTEBOOK_NAME + "throughput_vs_nodes.eps", width=800, height=450, scale=1)

fig.show()