In [13]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

CSV_PATH = os.path.join(os.getcwd(), 'results_2.3.csv')
PLOTS_DIR = os.path.join(os.getcwd(), 'plots')
os.makedirs(PLOTS_DIR, exist_ok=True)

res = pd.read_csv(CSV_PATH)
if 'user' not in res.columns:
    res['user'] = 'unknown'

res['n'] = res['n'].astype(int)
res['threads_label'] = res['threads'].astype(str)

def threads_to_int(x):
    if str(x).lower() == 'sequential':
        return -1
    try:
        return int(x)
    except Exception:
        return 1

res['threads'] = res['threads_label'].apply(threads_to_int)

def sanitize_user(u):
    u = str(u)
    u = u.strip()
    u = re.sub(r"[^0-9A-Za-z._-]", '_', u)
    return u

component_labels = {
    'time_alloc': 'Alloc',
    'time_init': 'Init',
    'time_thread_create': 'Thread Create',
    'time_compute': 'Compute',
    'time_join': 'Join',
    'time_reduce': 'Reduce',
    'time_verify': 'Verify',
    'time_cleanup': 'Cleanup'
}

In [14]:
def plot_timing_breakdown_multiplots(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return
    
    n_values = sorted(user_data['n'].unique())
    if len(n_values) == 0:
        return
    
    nplots = min(len(n_values), 4)
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.flatten()
    
    for idx, n in enumerate(n_values[:nplots]):
        config_data = user_data[user_data['n'] == n].copy()
        
        mean_data = config_data.groupby('threads').mean(numeric_only=True).reset_index()
        std_data = config_data.groupby('threads').std(numeric_only=True).reset_index().fillna(0)
        
        mean_data = mean_data.sort_values('threads').reset_index(drop=True)
        std_data = std_data.set_index('threads').reindex(mean_data['threads']).reset_index(drop=True)
        
        ax = axes[idx]
        x_positions = np.arange(len(mean_data))
        
        bottom = np.zeros(len(mean_data))
        available_components = [c for c in component_labels if c in mean_data.columns]
        
        for component in available_components:
            values = mean_data[component].values
            errs = std_data[component].values if component in std_data.columns else np.zeros_like(values)
            errs = np.minimum(errs, values)
            # Only add label if component has non-negligible values
            label = component_labels[component] if np.max(values) > 1e-6 else None
            ax.bar(x_positions, values, bottom=bottom, yerr=errs, capsize=3, 
                   label=label, alpha=0.85, width=0.6)
            bottom += values
        
        ax.set_xlabel('Number of Threads')
        ax.set_ylabel('Time (seconds)')
        ax.set_title(f'n={int(n)}')
        ax.set_xticks(x_positions)
        tick_labels = mean_data['threads'].apply(lambda x: 'sequential' if int(x) == -1 else str(int(x)))
        ax.set_xticklabels(tick_labels)
        if idx == 0:
            ax.legend(fontsize=8)
        ax.grid(True, alpha=0.3, axis='y')
    
    for idx in range(nplots, 4):
        axes[idx].axis('off')
    
    fig.suptitle(f'Timing Breakdown per Array Size (User = {user})', fontsize=16)
    plt.tight_layout()
    
    uname = sanitize_user(user)
    fname = os.path.join(PLOTS_DIR, f'timing_breakdown_all_configs_{uname}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)

def plot_speedup_single(df, user):
    user_data = df[df['user'] == user].copy()
    if user_data.empty:
        return
    
    # Get sequential baselines with std
    sequential_data = user_data[user_data['threads_label'].str.lower() == 'sequential']
    baselines = sequential_data.groupby('n')['time_total'].agg(['mean', 'std']).reset_index()
    baselines.columns = ['n', 'baseline_mean', 'baseline_std']
    
    # Get parallel data
    parallel_data = user_data[user_data['threads'] != -1].copy()
    
    # Merge with baselines
    speedup_data = parallel_data.merge(baselines, on='n')
    speedup_data['speedup'] = speedup_data['baseline_mean'] / speedup_data['time_total']
    
    # Aggregate speedup stats
    speedup_stats = speedup_data.groupby(['n', 'threads'])['speedup'].agg(['mean', 'std']).reset_index()
    speedup_stats['std'] = speedup_stats['std'].fillna(0)
    
    n_values = sorted(speedup_stats['n'].unique())
    if len(n_values) == 0:
        return
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    offset_step = 0.1
    offsets = np.linspace(-offset_step * (len(n_values)-1)/2, 
                          offset_step * (len(n_values)-1)/2, 
                          len(n_values))
    
    for n_idx, n in enumerate(n_values):
        n_stats = speedup_stats[speedup_stats['n'] == n]
        if n_stats.empty:
            continue
        
        x_pos = n_stats['threads'].values + offsets[n_idx]
        ax.errorbar(x_pos, n_stats['mean'].values, yerr=n_stats['std'].values, 
                   marker='o', capsize=4, label=f'n={n}')
    
    ax.axhline(y=1.0, color='r', linestyle='--', alpha=0.5, label='No speedup (1.0x)')
    
    baseline_uncertainty = baselines['baseline_std'].mean() / baselines['baseline_mean'].mean()
    ax.axhspan(1.0 - baseline_uncertainty, 1.0 + baseline_uncertainty, 
               color='red', alpha=0.1, label='Sequential Uncertainty')
    
    all_threads = sorted(parallel_data['threads'].unique())
    ax.set_xticks(all_threads)
    ax.set_xticklabels([str(int(t)) for t in all_threads])
    ax.set_xlabel('Threads')
    ax.set_ylabel('Speedup (x)')
    ax.set_title(f'Speedup per Array Size (User = {user})')
    ax.legend()
    ax.grid(True, alpha=0.25)
    plt.tight_layout()
    
    uname = sanitize_user(user)
    fname = os.path.join(PLOTS_DIR, f'speedup_all_{uname}.png')
    fig.savefig(fname, dpi=300)
    plt.close(fig)

for user in sorted(res['user'].unique()):
    print(f'Generating plots for user: {user}')
    plot_timing_breakdown_multiplots(res, user)
    plot_speedup_single(res, user)
    
print('All plots generated and saved to', PLOTS_DIR)
    
    

Generating plots for user: marr
Generating plots for user: phoebus
All plots generated and saved to /home/marr/threads/Thread-Experiments/2_3_mergesort/plots
