In [115]:
import os
import re

N_values = set()
D_values = set()
K_values = set()

latency_values = {}
tps_values = {}

get_key = lambda indexed, N, D, K: f"{'true' if indexed else 'false'}_N{N}_D{D}_K{K}"

dir = 'outputs'
file_names = os.listdir(dir)

for file_name in file_names:
    key = file_name.split('.')[0]
    parts = key.split('_')
    if len(parts) != 4:
        continue
    indexed = parts[0]
    N_values.add(parts[1][1:])
    D_values.add(parts[2][1:])
    K_values.add(parts[3][1:])

    # Read the file and extract the latency average value
    with open(f"{dir}/{file_name}", 'r') as file:
        file_content = file.read()

        # Extract latency average using regular expression
        latency_average_match = re.search(r'latency average = (\d+\.\d+) ms', file_content)
        if latency_average_match:
            latency_values[key] = float(latency_average_match.group(1))

        # Extract TPS (Transactions Per Second) using regular expression
        tps_match = re.search(r'tps = (\d+\.\d+)', file_content)
        if tps_match:
            tps_values[key] = float(tps_match.group(1))

N_values = sorted(N_values, key=int)
D_values = sorted(D_values, key=int)
K_values = sorted(K_values, key=int)

In [116]:
full_strings = {
    'N': 'Number of rows (N)',
    'D': 'Dimensions of vectors (D)',
    'K': 'Number of similar vectors (K)'
}

param_values = {
    'D': D_values,
    'N': N_values,
    'K': K_values
}

def generate_plot(x, per, fixed, fixed_value):
    # Process data
    latency_dict = {}
    for indexed in [True, False]:
        for param_per in param_values[per]:
            x_values = []
            y_values = []
            for param_x in param_values[x]:
                key_params = {}
                key_params[x] = param_x
                key_params[per] = param_per
                key_params[fixed] = fixed_value

                key = get_key(indexed, key_params['N'], key_params['D'], key_params['K'])
                if key in latency_values:
                    x_values.append(param_x)
                    y_values.append(latency_values[key])
            if len(x_values) > 0:
                indexed_str = 'indexed' if indexed else 'unindexed'
                latency_dict[f"{param_per} ({indexed_str})"] = (x_values, y_values)

    # Plot data
    fig = go.Figure()
    for key, (x_values, y_values) in latency_dict.items():
        fig.add_trace(go.Scatter(
            x=x_values,
            y=y_values,
            mode='lines+markers',
            name=key
        ))
    fig.update_layout(
        title=f"Latency vs. {x} per {per}",
        xaxis_title=full_strings[x],
        yaxis_title='Latency (ms)'
    )
    fig.show()


generate_plot(x='N', per='D', fixed='K', fixed_value=4)
generate_plot(x='K', per='D', fixed='N', fixed_value=100000)
generate_plot(x='D', per='N', fixed='K', fixed_value=4)