In [9]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

CSV_PATH = os.path.join(os.getcwd(), 'results_2.2.csv')
PLOTS_DIR = os.path.join(os.getcwd(), 'plots')
os.makedirs(PLOTS_DIR, exist_ok=True)

res = pd.read_csv(CSV_PATH)
if 'user' not in res.columns:
    res['user'] = 'unknown'

res['n'] = res['n'].astype(int)
res['sparsity'] = res['sparsity'].astype(int)
res['reps'] = res['reps'].astype(int)
res['threads_label'] = res['threads'].astype(str)

def threads_to_int(x):
    if str(x).lower() == 'sequential':
        return -1
    try:
        return int(x)
    except Exception:
        return 1

res['threads'] = res['threads_label'].apply(threads_to_int)

# Component labels for timing breakdown (only components available in CSV)
component_labels = {
    'time_init': 'Init',
    'time_csr_construct': 'CSR Construct',
    'time_spmv_total': 'CSR SpMV',
    'time_dense_total': 'Dense'
}

In [10]:
# Timing breakdown multiplot (similar to plotter_2_1)
def plot_timing_breakdown_multiplots(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return
    
    # Get unique (n, sparsity) combinations
    configs = user_data.groupby(['n', 'sparsity']).size().reset_index()[['n', 'sparsity']]
    configs = configs.sort_values(['n', 'sparsity']).reset_index(drop=True)
    
    if len(configs) == 0:
        return
    
    # Create subplots (2x2 grid)
    nplots = min(len(configs), 4)
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for idx in range(nplots):
        n = configs.iloc[idx]['n']
        sparsity = configs.iloc[idx]['sparsity']
        
        config_data = user_data[(user_data['n'] == n) & (user_data['sparsity'] == sparsity)].copy()
        
        # Compute mean and std across runs per threads
        mean_data = config_data.groupby('threads').mean(numeric_only=True).reset_index()
        std_data = config_data.groupby('threads').std(numeric_only=True).reset_index().fillna(0)
        
        mean_data = mean_data.sort_values('threads').reset_index(drop=True)
        std_data = std_data.set_index('threads').reindex(mean_data['threads']).reset_index(drop=True)
        
        ax = axes[idx]
        x_positions = np.arange(len(mean_data))
        
        bottom = np.zeros(len(mean_data))
        available_components = [c for c in component_labels if c in mean_data.columns]
        
        for component in available_components:
            values = mean_data[component].values
            errs = std_data[component].values if component in std_data.columns else np.zeros_like(values)
            # Clamp error bars to not go below zero
            errs = np.minimum(errs, values)
            label = component_labels[component] if np.max(values) > 1e-6 else None
            ax.bar(x_positions, values, bottom=bottom, yerr=errs, capsize=3, 
                   label=label, alpha=0.85, width=0.6)
            bottom += values
        
        ax.set_xlabel('Number of Threads')
        ax.set_ylabel('Time (seconds)')
        ax.set_title(f'n={int(n)}, sparsity={int(sparsity)}%')
        ax.set_xticks(x_positions)
        tick_labels = mean_data['threads'].apply(lambda x: 'sequential' if int(x) == -1 else str(int(x)))
        ax.set_xticklabels(tick_labels)
        if idx == 0:
            ax.legend(fontsize=8)
        ax.grid(True, alpha=0.3, axis='y')
    
    # Hide unused subplots
    for idx in range(nplots, 4):
        axes[idx].axis('off')
    
    fig.suptitle(f'Execution Time Breakdown (User: {user})', fontsize=16)
    plt.tight_layout()
    
    fname = os.path.join(PLOTS_DIR, f'timing_breakdown_all_configs_{user}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)


# Compact timing plots - now combined into subplots
def plot_compact_timing_subplots(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return
    
    # Get unique (n, sparsity) combinations
    configs = user_data.groupby(['n', 'sparsity']).size().reset_index()[['n', 'sparsity']]
    configs = configs.sort_values(['n', 'sparsity']).reset_index(drop=True)
    
    if len(configs) == 0:
        return
    
    # Create subplots (2x2 grid)
    nplots = min(len(configs), 4)
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for idx in range(nplots):
        n = configs.iloc[idx]['n']
        sparsity = configs.iloc[idx]['sparsity']
        
        data = user_data[(user_data['n'] == n) & (user_data['sparsity'] == sparsity)].copy()
        if data.empty:
            continue
            
        summary_mean = data.groupby('threads').mean(numeric_only=True).reset_index()
        summary_std = data.groupby('threads').std(numeric_only=True).reset_index().fillna(0)
        summary_mean = summary_mean.sort_values('threads')
        summary_std = summary_std.set_index('threads').reindex(summary_mean['threads']).reset_index(drop=True)

        threads = summary_mean['threads'].values
        width = 0.25
        x = np.arange(len(threads))

        ax = axes[idx]
        # Clamp error bars for each component
        err_csr = np.minimum(summary_std['time_csr_construct'].values, 
                             summary_mean['time_csr_construct'].values)
        err_spmv = np.minimum(summary_std['time_spmv_total'].values, 
                              summary_mean['time_spmv_total'].values)
        err_dense = np.minimum(summary_std['time_dense_total'].values, 
                               summary_mean['time_dense_total'].values)
        
        ax.bar(x - width, summary_mean['time_csr_construct'].values, width, 
               yerr=err_csr, capsize=3, 
               label='CSR construct', color='#4C72B0')
        ax.bar(x, summary_mean['time_spmv_total'].values, width, 
               yerr=err_spmv, capsize=3, 
               label='CSR SpMV', color='#55A868')
        ax.bar(x + width, summary_mean['time_dense_total'].values, width, 
               yerr=err_dense, capsize=3, 
               label='Dense', color='#C44E52')
        ax.set_xticks(x)
        ax.set_xticklabels([str(int(t)) if t != -1 else 'seq' for t in threads])
        ax.set_xlabel('Threads')
        ax.set_ylabel('Time (s)')
        ax.set_title(f'n={n} s={sparsity}%')
        ax.grid(axis='y', alpha=0.25)
        if idx == 0:
            ax.legend(fontsize=8)
    
    # Hide unused subplots
    for idx in range(nplots, 4):
        axes[idx].axis('off')
    
    fig.suptitle(f'Component-wise Time Comparison (User: {user})', fontsize=14)
    plt.tight_layout()
    
    fname = os.path.join(PLOTS_DIR, f'compact_timing_all_{user}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)


In [11]:
# CSR parallel speedup (baseline = sequential CSR) with same formatting as previous speedup plots
def plot_csr_parallel_speedup_subplots(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return

    # Get unique n values
    n_values = sorted(user_data['n'].unique())
    if len(n_values) == 0:
        return

    # Create subplots (2x2 grid)
    nplots = min(len(n_values), 4)
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()

    for idx, n in enumerate(n_values[:nplots]):
        data = user_data[user_data['n'] == n].copy()
        if data.empty:
            continue

        grouped = data.groupby(['sparsity','threads']).mean(numeric_only=True).reset_index()
        ax = axes[idx]

        sparsities = sorted(grouped['sparsity'].unique())
        # Create offset for each sparsity level
        offset_step = 0.15
        offsets = np.linspace(-offset_step * (len(sparsities)-1)/2,
                              offset_step * (len(sparsities)-1)/2,
                              len(sparsities))

        # Calculate baseline uncertainty from all sequential runs
        seq_data = data[data['threads_label'].str.lower() == 'sequential']
        if not seq_data.empty:
            baseline_mean = seq_data['time_spmv_total'].mean()
            baseline_std = seq_data['time_spmv_total'].std()
            if baseline_mean > 0 and not np.isnan(baseline_std):
                baseline_uncertainty = baseline_std / baseline_mean
            else:
                baseline_uncertainty = 0
        else:
            baseline_uncertainty = 0

        for sparsity_idx, (s, sdata) in enumerate(grouped.groupby('sparsity')):
            sdata = sdata.sort_values('threads')
            # Use raw data for baseline detection (sequential CSR)
            raw_s = data[data['sparsity'] == s]
            baseline_raw = raw_s[raw_s['threads_label'].str.lower() == 'sequential']
            if not baseline_raw.empty:
                b = baseline_raw['time_spmv_total'].mean()
                sb = baseline_raw['time_spmv_total'].std() if baseline_raw['time_spmv_total'].std() else 0.0
            else:
                b_series = raw_s[raw_s['threads'] == 1]['time_spmv_total']
                if b_series.empty or b_series.mean() == 0:
                    continue
                b = b_series.mean()
                sb = b_series.std() if not np.isnan(b_series.std()) else 0.0

            # Compute means and stds for CSR at each thread (excluding sequential)
            s_mean = sdata[sdata['threads'] != -1].groupby('threads')['time_spmv_total'].mean().reset_index()
            s_std = sdata[sdata['threads'] != -1].groupby('threads')['time_spmv_total'].std().reset_index().fillna(0)
            s_mean = s_mean.sort_values('threads').reset_index(drop=True)
            s_std = s_std.set_index('threads').reindex(s_mean['threads']).reset_index(drop=True)

            if s_mean.empty:
                continue

            m = s_mean['time_spmv_total'].values
            sm = s_std['time_spmv_total'].values
            speedup_mean = b / m
            # Propagate uncertainty: var = (sb^2)/(m^2) + (b^2)*(sm^2)/(m^4)
            var = (sb**2) / (m**2) + (b**2) * (sm**2) / (m**4)
            speedup_std = np.sqrt(var)
            speedup_std = np.maximum(speedup_std, 0)

            # Apply offset to x-axis positions
            x_pos = s_mean['threads'].values + offsets[sparsity_idx]
            ax.errorbar(x_pos, speedup_mean, yerr=speedup_std,
                       marker='o', label=f's={s}%', capsize=4)

        # Add baseline reference at 1.0
        ax.axhline(y=1.0, color='r', linestyle='--', alpha=0.5, label='No speedup (1.0x)')
        
        # Add baseline uncertainty band
        if baseline_uncertainty > 0:
            ax.axhspan(1.0 - baseline_uncertainty, 1.0 + baseline_uncertainty, 
                       color='red', alpha=0.1, label='Sequential Uncertainty')
        
        # Show thread counts on the x-axis (excluding sequential)
        all_threads = sorted([t for t in grouped['threads'].unique() if t != -1])
        ax.set_xticks(all_threads)
        ax.set_xticklabels([str(int(t)) for t in all_threads])
        ax.set_xlabel('Threads')
        ax.set_ylabel('Speedup (x)')
        ax.set_title(f'n={n}')
        ax.grid(True, alpha=0.25)
        ax.legend(fontsize=8)

    # Hide unused subplots
    for idx in range(nplots, 4):
        axes[idx].axis('off')

    fig.suptitle(f'CSR SpMV Parallel Speedup vs Thread Count (User: {user})', fontsize=14)
    plt.tight_layout()

    fname = os.path.join(PLOTS_DIR, f'csr_parallel_speedup_all_{user}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)


# Dense vs CSR speedup (dense / csr) with same formatting
def plot_dense_vs_csr_subplots(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return

    # Get unique n values
    n_values = sorted(user_data['n'].unique())
    if len(n_values) == 0:
        return

    # Create subplots (2x2 grid)
    nplots = min(len(n_values), 4)
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()

    for idx, n in enumerate(n_values[:nplots]):
        data = user_data[user_data['n'] == n].copy()
        if data.empty:
            continue

        grouped = data.groupby(['sparsity','threads']).mean(numeric_only=True).reset_index()
        ax = axes[idx]

        sparsities = sorted(grouped['sparsity'].unique())
        # Create offset for each sparsity level
        offset_step = 0.15
        offsets = np.linspace(-offset_step * (len(sparsities)-1)/2,
                              offset_step * (len(sparsities)-1)/2,
                              len(sparsities))

        # Calculate baseline uncertainty from all sequential runs
        seq_data = data[data['threads_label'].str.lower() == 'sequential']
        if not seq_data.empty:
            baseline_mean = seq_data['time_spmv_total'].mean()
            baseline_std = seq_data['time_spmv_total'].std()
            if baseline_mean > 0 and not np.isnan(baseline_std):
                baseline_uncertainty = baseline_std / baseline_mean
            else:
                baseline_uncertainty = 0
        else:
            baseline_uncertainty = 0

        for sparsity_idx, (s, sdata) in enumerate(grouped.groupby('sparsity')):
            sdata = sdata.sort_values('threads')
            # Use raw data for CSR baseline detection (sequential CSR)
            raw_s = data[data['sparsity'] == s]
            baseline_raw = raw_s[raw_s['threads_label'].str.lower() == 'sequential']
            if not baseline_raw.empty:
                b = baseline_raw['time_spmv_total'].mean()
                sb = baseline_raw['time_spmv_total'].std() if baseline_raw['time_spmv_total'].std() else 0.0
            else:
                b_series = raw_s[raw_s['threads'] == 1]['time_spmv_total']
                if b_series.empty or b_series.mean() == 0:
                    continue
                b = b_series.mean()
                sb = b_series.std() if not np.isnan(b_series.std()) else 0.0

            # Compute means and stds for Dense at each thread (excluding sequential)
            s_mean = sdata[sdata['threads'] != -1].groupby('threads')['time_dense_total'].mean().reset_index()
            s_std = sdata[sdata['threads'] != -1].groupby('threads')['time_dense_total'].std().reset_index().fillna(0)
            s_mean = s_mean.sort_values('threads').reset_index(drop=True)
            s_std = s_std.set_index('threads').reindex(s_mean['threads']).reset_index(drop=True)

            if s_mean.empty:
                continue

            m = s_mean['time_dense_total'].values
            sm = s_std['time_dense_total'].values
            speedup_mean = m / b
            # Propagate uncertainty: var = (sm^2)/(b^2) + (m^2)*(sb^2)/(b^4)
            var = (sm**2) / (b**2) + (m**2) * (sb**2) / (b**4)
            speedup_std = np.sqrt(var)
            speedup_std = np.maximum(speedup_std, 0)

            x_pos = s_mean['threads'].values + offsets[sparsity_idx]
            ax.errorbar(x_pos, speedup_mean, yerr=speedup_std,
                       marker='o', label=f's={s}%', capsize=4)

        # Add baseline reference at 1.0
        ax.axhline(y=1.0, color='r', linestyle='--', alpha=0.5, label='Equal performance (1.0x)')
        
        # Add baseline uncertainty band
        if baseline_uncertainty > 0:
            ax.axhspan(1.0 - baseline_uncertainty, 1.0 + baseline_uncertainty, 
                       color='red', alpha=0.1, label='Sequential Uncertainty')
        
        # Show thread counts on the x-axis (excluding sequential)
        all_threads = sorted([t for t in grouped['threads'].unique() if t != -1])
        ax.set_xticks(all_threads)
        ax.set_xticklabels([str(int(t)) for t in all_threads])
        ax.set_xlabel('Threads')
        ax.set_ylabel('Speedup (Dense / CSR)')
        ax.set_title(f'n={n}')
        ax.grid(True, alpha=0.25)
        ax.legend(fontsize=8)

    # Hide unused subplots
    for idx in range(nplots, 4):
        axes[idx].axis('off')

    fig.suptitle(f'Dense vs CSR Performance Ratio (User: {user})', fontsize=14)
    plt.tight_layout()

    fname = os.path.join(PLOTS_DIR, f'dense_vs_csr_all_{user}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)


In [12]:
# Generate plots per user
for user in sorted(res['user'].unique()):
    user_df = res[res['user'] == user]
    print(f'Generating plots for user: {user}')

    plot_timing_breakdown_multiplots(res, user)
    plot_compact_timing_subplots(res, user)
    plot_csr_parallel_speedup_subplots(res, user)
    plot_dense_vs_csr_subplots(res, user)

print('All plots generated and saved to', PLOTS_DIR)

Generating plots for user: ea24205
Generating plots for user: marr
Generating plots for user: phoebus
All plots generated and saved to /home/marr/threads/Thread-Experiments/2_2_sparse_array_vector_multiplication/plots
