In [66]:
import os
import re

N_values = set()
D_values = set()
K_values = set()

latency_values = {}
tps_values = {}

get_key = lambda N, D, K: f"N{N}_D{D}_K{K}"

dir = 'outputs'
file_names = os.listdir(dir)

for file_name in file_names:
    key = file_name.split('.')[0]
    parts = key.split('_')
    N_values.add(parts[0][1:])
    D_values.add(parts[1][1:])
    K_values.add(parts[2][1:])

    # Read the file and extract the latency average value
    with open(dir + '/' + file_name, 'r') as file:
        file_content = file.read()

        # Extract latency average using regular expression
        latency_average_match = re.search(r'latency average = (\d+\.\d+) ms', file_content)
        if latency_average_match:
            latency_values[key] = float(latency_average_match.group(1))

        # Extract TPS (Transactions Per Second) using regular expression
        tps_match = re.search(r'tps = (\d+\.\d+)', file_content)
        if tps_match:
            tps_values[key] = float(tps_match.group(1))

N_values = sorted(N_values, key=int)
D_values = sorted(D_values, key=int)
K_values = sorted(K_values, key=int)

In [69]:
import plotly.graph_objects as go

def generate_plot(title, xaxis_title, latency_dict):
    fig = go.Figure()
    
    for key, (x_values, y_values) in latency_dict.items():
        fig.add_trace(go.Scatter(
            x=x_values,
            y=y_values,
            mode='lines+markers',
            name=key
        ))
    
    fig.update_layout(
        title=title,
        xaxis_title=xaxis_title,
        yaxis_title='Latency (ms)'
    )
    
    fig.show()

In [70]:
# x-axis: N
# y-axis: latency
# per: D
# fixed: k = 4

latency_dict = {}

K = 4
for D in D_values:
    x_values = []
    y_values = []
    for N in N_values:
        key = get_key(N, D, K)
        if key in latency_values:
            x_values.append(N)
            y_values.append(latency_values[key])
    if len(x_values) > 0:
        latency_dict[D] = (x_values, y_values)

generate_plot('Latency vs. N per dimension', 'Number of rows (N)', latency_dict)

In [71]:
# x-axis: k
# y-axis: latency
# per: D
# fixed: N = 100000

latency_dict = {}

N = 100000
for D in D_values:
    x_values = []
    y_values = []
    for K in K_values:
        key = get_key(N, D, K)
        if key in latency_values:
            x_values.append(K)
            y_values.append(latency_values[key])
    if len(x_values) > 0:
        latency_dict[D] = (x_values, y_values)

generate_plot('Latency vs. K per dimension', 'Number of similar vectors (K)', latency_dict)

In [74]:
# x-axis: D
# y-axis: latency
# per: N
# fixed: k = 4

latency_dict = {}

k = 4
for N in N_values:
    x_values = []
    y_values = []
    for D in D_values:
        key = get_key(N, D, K)
        if key in latency_values:
            x_values.append(D)
            y_values.append(latency_values[key])
    if len(x_values) > 0:
        latency_dict[N] = (x_values, y_values)

generate_plot('Latency vs. D per N', 'Dimension of vector (D)', latency_dict)