In [1]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ROOT = os.getcwd()
CSV_PATH = 'results_3.2.csv'
PLOTS_DIR = 'plots'
os.makedirs(PLOTS_DIR, exist_ok=True)

res = pd.read_csv(CSV_PATH)
if 'user' not in res.columns:
    res['user'] = 'unknown'
res['n'] = res['n'].astype(int)
res['sparsity'] = res['sparsity'].astype(int)
res['reps'] = res['reps'].astype(int)
# procs may contain 'sequential' string for baseline; convert invalid to 1
try:
    res['procs'] = res['procs'].astype(int)
except Exception:
    res['procs'] = pd.to_numeric(res['procs'], errors='coerce').fillna(1).astype(int)

import numpy as np

def sanitize_user(u):
    u = str(u).strip()
    return re.sub(r"[^0-9A-Za-z._-]", '_', u)

for user in sorted(res['user'].unique()):
    user_data = res[res['user'] == user]
    uname = sanitize_user(user)
    print('Generating plots for user:', user)
    for n in sorted(user_data['n'].unique()):
        for s in sorted(user_data[user_data['n']==n]['sparsity'].unique()):
            data = user_data[(user_data['n']==n) & (user_data['sparsity']==s)]
            if data.empty: continue
            summary = data.groupby('procs').mean(numeric_only=True).reset_index().sort_values('procs')
            procs = summary['procs'].values
            x = np.arange(len(procs))
            width = 0.2
            fig, ax = plt.subplots(figsize=(6,3.5))
            ax.bar(x - width*1.5, summary['time_csr_construct'].values, width, label='CSR construct', color='#4C72B0')
            ax.bar(x - width/2, summary['time_send'].values, width, label='CSR send', color='#DD8452')
            ax.bar(x + width/2, summary['time_spmv'].values, width, label='CSR SpMV', color='#55A868')
            ax.bar(x + width*1.5, summary['time_dense_total'].values, width, label='Dense total', color='#C44E52')
            ax.set_xticks(x); ax.set_xticklabels([str(int(p)) for p in procs])
            ax.set_xlabel('MPI processes'); ax.set_ylabel('Time (s)'); ax.set_title(f'n={n} s={s}%')
            ax.grid(axis='y', alpha=0.25); ax.legend(fontsize=8); plt.tight_layout()
            fig.savefig(os.path.join(PLOTS_DIR, f'timing_n{n}_s{s}_user_{uname}.png'), dpi=200)
            plt.close(fig)

    # speedup plot
    for n in sorted(user_data['n'].unique()):
        grouped = user_data[user_data['n']==n].groupby(['sparsity','procs']).mean(numeric_only=True).reset_index()
        if grouped.empty: continue
        fig, ax = plt.subplots(figsize=(6,3.5))
        for s, sdata in grouped.groupby('sparsity'):
            sdata = sdata.sort_values('procs')
            base = sdata[sdata['procs']==1]['time_spmv'].mean()
            if np.isnan(base) or base == 0: continue
            ax.plot(sdata['procs'], base / sdata['time_spmv'].values, marker='o', label=f's={s}%')
        ax.set_xticks(sorted(grouped['procs'].unique())); ax.set_xlabel('MPI processes')
        ax.set_ylabel('Speedup (x)'); ax.set_title(f'Speedup (CSR SpMV) n={n} (user={user})')
        ax.grid(True, alpha=0.25); ax.legend(fontsize=8); plt.tight_layout()
        fig.savefig(os.path.join(PLOTS_DIR, f'speedup_n{n}_user_{uname}.png'), dpi=200)
        plt.close(fig)

print('All plots generated to', PLOTS_DIR)

Generating plots for user: marr
All plots generated to plots
