In [2]:
import os
import shutil
import re
from pathlib import Path
from collections import defaultdict
import matplotlib.pyplot as plt

In [3]:
# Directories
directory = "cleaned"
input_dir = Path("output")
output_dir = Path(directory)

if (os.path.exists(directory)):
    shutil.rmtree(directory)
output_dir.mkdir(exist_ok=True)

# Pattern to detect the beginning of a chunk, e.g., (0), (1), etc.
chunk_start_pattern = re.compile(r"^\((\d+)\)")

def clean_file(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    # Walk through the file backwards and collect unique rank chunks
    seen_ranks = set()
    chunks = []
    current_chunk = []

    for line in reversed(lines):
        if chunk_start_pattern.match(line):
            rank = chunk_start_pattern.match(line).group(1)
            if rank not in seen_ranks:
                seen_ranks.add(rank)
                current_chunk.insert(0, line)  # prepend the starting line
                chunks.insert(0, current_chunk)  # prepend the entire chunk
            current_chunk = []
        else:
            current_chunk.insert(0, line)  # prepend each line to current_chunk

    # Flatten the chunks
    cleaned_lines = [line for chunk in chunks for line in chunk]

    return cleaned_lines

# Process each file
for file in input_dir.glob("*.out"):
    cleaned_lines = clean_file(file)
    output_file = output_dir / file.name
    with open(output_file, 'w') as f:
        f.writelines(cleaned_lines)

In [4]:
data = defaultdict(lambda: defaultdict(dict))

pattern = re.compile(r'^([a-zA-Z0-9_]+)-(\d+)-(\d+)\.out$')

if not os.path.isdir(directory):
    raise FileNotFoundError(f"Directory '{directory}' does not exist.")

for filename in os.listdir(directory):
    match = pattern.match(filename)
    if not match:
        print(f"Skipping invalid filename: {filename}")
        continue

    algorithm, cluster_size, msg_size = match.groups()
    cluster_size = int(cluster_size)
    msg_size = int(msg_size)

    filepath = os.path.join(directory, filename)
    with open(filepath, 'r') as f:
        content = f.read()

    data[algorithm][cluster_size][msg_size] = content

In [5]:
ar_pattern = re.compile(r'\(\d+\)\s+[^=]+=\s*([-\d.eE]+):')
max_ar_values = defaultdict(lambda: defaultdict(dict))

for algorithm in data:
    for cluster in data[algorithm]:
        for message in data[algorithm][cluster]:
            text = data[algorithm][cluster][message]
            ar_matches = ar_pattern.findall(text)

            if not ar_matches:
                max_ar = None
            else:
                ar_values = [float(ar) for ar in ar_matches]
                max_ar = max(ar_values)

            max_ar_values[algorithm][cluster][message] = max_ar

In [6]:
# if os.path.exists('graphs'):
#     shutil.rmtree('graphs')
os.makedirs('graphs', exist_ok=True)

# Precompute baseline values for normalization
baseline = {}
for cluster in max_ar_values.get("ring", {}):
    baseline[cluster] = max_ar_values["ring"][cluster]

clusters = set()
for algorithm in max_ar_values:
    clusters.update(max_ar_values[algorithm].keys())

for cluster in sorted(clusters):
    plt.figure(figsize=(10, 6))

    for algorithm in sorted(max_ar_values.keys()):
        if cluster not in max_ar_values[algorithm]:
            continue
        if cluster not in baseline:
            continue  # Skip if there's no ring baseline to compare to

        message_ar = max_ar_values[algorithm][cluster]
        baseline_ar = baseline[cluster]

        sorted_msgs = sorted(message_ar.keys())
        x = []
        y = []
        for msg in sorted_msgs:
            if msg not in baseline_ar:
                continue  # Can't normalize without a baseline
            base = baseline_ar[msg]
            val = message_ar[msg]
            if base and val is not None:
                x.append(msg)
                # y.append(base / val)
                y.append(val)
            else:
                x.append(msg)
                y.append(float('nan'))  # Optional: or skip

        plt.plot(x, y, label=algorithm, marker='o')

    plt.title(f"Latency vs Message Size (Cluster Size = {cluster})")
    plt.xlabel("Message Size")
    plt.xscale("log")
    plt.yscale("log")
    plt.ylabel("Latency")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    
    filename = f"graphs/perf-{cluster}.png"  # customize the filename
    plt.savefig(filename)

    plt.close()


In [8]:
import matplotlib.ticker as mticker

os.makedirs('graphs', exist_ok=True)

chunk_header_re = re.compile(r"\((\d+)\)\s+[^\n]*=\s+([0-9eE\.\-]+):")
value_line_re = re.compile(r"^\s*\w+:\s*([-\d\.eE]+)\s*/\s*[-\d\.eE]+")

for algorithm in data:
    for cluster_size in data[algorithm]:
        # Build a dict of: rank → {msg_size → chunk string}
        rank_data = {i: {} for i in range(cluster_size)}

        for msg_size in data[algorithm][cluster_size]:
            raw = data[algorithm][cluster_size][msg_size]
            chunks = re.split(r"\n(?=\(\d+\))", raw.strip())

            for chunk in chunks:
                match = chunk_header_re.match(chunk)
                if match:
                    rank = int(match.group(1))
                    if rank < cluster_size:
                        rank_data[rank][msg_size] = chunk

        # Create subplots: one per rank
        fig, axs = plt.subplots(cluster_size, 1, figsize=(10, 3 * cluster_size), sharex=False)
        if cluster_size == 1:
            axs = [axs]

        fig.suptitle(f"Algorithm: {algorithm}, Cluster Size: {cluster_size}")

        for rank in range(cluster_size):
            ax = axs[rank]
            ax.set_title(f"Proc. {rank}")
            n_round = 0
            for msg_size in sorted(rank_data[rank]):
                chunk = rank_data[rank][msg_size]
                lines = chunk.strip().splitlines()
                cumulative = []
                total = 0.0
                for line in lines:
                    match = value_line_re.match(line)
                    if match:
                        try:
                            val = float(match.group(1))
                            total += val
                            cumulative.append(total)
                        except ValueError:
                            continue

                # Normalize cumulative list to end at 1.0
                if cumulative and cumulative[-1] != 0:
                    normalized = [v / cumulative[-1] for v in cumulative]
                    ax.plot(range(1, len(normalized) + 1), normalized,
                            label=f"msg={msg_size}", marker='o')
                n_round = len(cumulative)

            ax.xaxis.set_major_locator(mticker.MaxNLocator(integer=True))
            # if (algorithm == "circ_rs_ag"):
            #     ax.axvline(x=2 -0.1, color='red', linestyle='--')
            #     ax.axvline(x=n_round-1+0.1, color='red', linestyle='--')
            #     ax.axvline(x=((n_round-3)/2)+2, color='red', linestyle='--')
            ax.set_xlabel("Step")
            ax.set_ylabel("Progress")
            ax.grid(True)
            ax.legend()

        plt.tight_layout(rect=[0, 0, 1, 0.95])

        filename = f"graphs/{algorithm}_{cluster_size}.png"  # customize the filename
        plt.savefig(filename)

        plt.close()