In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
from collections import defaultdict

BASE_TEST_DIR = 'results/reports'
OUTPUT_DIR = 'results/graphs'

In [None]:
def parse_log_file(filepath):
    """
    Parses a single log file according to the AEAgle standard.
    """
    if not os.path.exists(filepath):
        print(f"File not found: {filepath}")
        return None

    tick_hz = 1
    data = defaultdict(list)

    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            if not parts or not parts[0]: continue
            keyword = parts[0]
            
            try:
                if keyword == "META" and parts[1] == "tick_hz":
                    tick_hz = int(parts[2])
                elif keyword == "TIME":
                    record = {
                        'phase': parts[1], 'operation': parts[2], 'size': int(parts[3]),
                        't_in': int(parts[4]), 't_out': int(parts[5]),
                        'duration_ticks': int(parts[5]) - int(parts[4]),
                        'result': parts[6], 'alloc_cnt': int(parts[7]), 'free_cnt': int(parts[8]),
                    }
                    data['time'].append(record)
                elif keyword == "SNAP":
                    record = {
                        'phase': parts[1], 'free_bytes': int(parts[2]),
                        'allocated_bytes': int(parts[3]), 'max_allocated_bytes': int(parts[4]),
                        'timestamp': data['time'][-1]['t_out'] if data['time'] else 0
                    }
                    data['snap'].append(record)
                elif keyword == "FAULT":
                    data['fault'].append({'tick': int(parts[1]), 'error_code': parts[3]})
                elif keyword in ["LEAK", "NOLEAK"]:
                    data['leak'].append({'result': keyword, 'address': parts[1]})
            except (IndexError, ValueError) as e:
                print(f"Skipping malformed line in {filepath}: {line.strip()} -> Error: {e}")

    dfs = {}
    for key, records in data.items():
        df = pd.DataFrame(records)
        if 'duration_ticks' in df.columns:
            df['duration_us'] = (df['duration_ticks'] * 1000000.0 / tick_hz) if tick_hz else 0
        if 'timestamp' in df.columns:
            df['time_s'] = df['timestamp'] / tick_hz if tick_hz else 0
        dfs[key] = df
    dfs['meta'] = {'tick_hz': tick_hz}
    return dfs

def load_all_test_data(base_dir):
    """
    Walks the test directory structure and organizes parsed data.
    """
    if not os.path.isdir(base_dir):
        print(f"Error: Base directory '{base_dir}' not found.")
        return {}
    all_data = defaultdict(dict)
    print(f"Searching for allocators in '{base_dir}'...")
    for allocator_name in sorted(os.listdir(base_dir)):
        allocator_path = os.path.join(base_dir, allocator_name)
        if os.path.isdir(allocator_path):
            # print(f"  Processing allocator: {allocator_name}")
            for filename in sorted(os.listdir(allocator_path)):
                if filename.endswith(".csv"):
                    test_name = os.path.splitext(filename)[0]
                    log_path = os.path.join(allocator_path, filename)
                    parsed_data = parse_log_file(log_path)
                    if parsed_data:
                        all_data[allocator_name][test_name] = parsed_data
    return dict(all_data)

In [None]:

all_allocator_data = load_all_test_data(BASE_TEST_DIR)

print("\n--- Data Loading Complete ---")
if all_allocator_data:
    print("The following data has been loaded and is ready for analysis:")
    for allocator, tests in all_allocator_data.items():
        print(f"  - {allocator}: {list(tests.keys())}")
else:
    print("No data was loaded. Please check your directory structure and .csv files.")


In [None]:
def bucket_time_data(df, bucket_size, tick_hz):
    """Transforms a DataFrame of TIME logs into bucketed data."""
    if df.empty or tick_hz == 0:
        return pd.DataFrame()
    
    num_buckets = len(df) // bucket_size
    if num_buckets == 0:
        return pd.DataFrame()

    reshaped_df = df.iloc[:num_buckets * bucket_size].copy()
    reshaped_df['bucket'] = np.repeat(np.arange(num_buckets), bucket_size)

    bucketed = reshaped_df.groupby('bucket').agg(
        t_in_first=('t_in', 'first'),
        t_out_last=('t_out', 'last'),
    )
    
    bucketed['total_duration_ticks'] = bucketed['t_out_last'] - bucketed['t_in_first']
    bucketed['avg_duration_ticks'] = bucketed['total_duration_ticks'] / bucket_size
    bucketed['avg_duration_us'] = (bucketed['avg_duration_ticks'] * 1000000.0) / tick_hz
    
    return bucketed

In [None]:
def create_and_save_grid(plot_data_list, figure_title, filename, output_dir, bucket_size, plot_unit='ticks', outlier_k=3.0):
    """
    Creates and saves a grid of plots with corrected conditional outlier filtering.
    """
    if not plot_data_list:
        print(f"No data to plot for '{figure_title}'.")
        return
    
    y_column, y_label = ('avg_duration_us', 'Avg Latency per Op (µs)') if plot_unit == 'us' else ('avg_duration_ticks', 'Avg Latency per Op (Ticks)')

    nrows, ncols = 4, 3
    fig, axes = plt.subplots(nrows, ncols, figsize=(12, 8), constrained_layout=True)
    axes = axes.flatten()

    for i, plot_info in enumerate(plot_data_list):
        if i >= len(axes): break
        ax = axes[i]
        df = plot_info['data']
        allocator = plot_info['allocator']
        
        plot_df = df

        if allocator.startswith('freertos'):
            ABSOLUTE_THRESHOLD_US = 30000.0
            tick_hz = plot_info['tick_hz']
            
            if plot_unit == 'us':
                absolute_threshold = ABSOLUTE_THRESHOLD_US
            else: # unit is 'ticks'
                absolute_threshold = (ABSOLUTE_THRESHOLD_US * tick_hz) / 1000000.0

            # Pass 1: Filter using the absolute threshold
            extreme_outliers = df[df[y_column] > absolute_threshold]
            sane_df = df[df[y_column] <= absolute_threshold]
            
            # if not extreme_outliers.empty:
            #     print(f"\n*** Pass 1: Extreme Outlier Omitted for {allocator} ({plot_info['test_name']}) ***")
            #     print(f"    (Absolute Threshold > {absolute_threshold:.2f} {plot_unit})")
            #     print(extreme_outliers[['avg_duration_ticks', 'avg_duration_us']])
            
            q1 = sane_df[y_column].quantile(0.25)
            q3 = sane_df[y_column].quantile(0.75)
            iqr = q3 - q1
            
            if iqr > 0:
                statistical_threshold = q3 + (iqr * outlier_k)
                statistical_outliers = sane_df[sane_df[y_column] > statistical_threshold]
                # if not statistical_outliers.empty:
                #     print(f"\n--- Pass 2: Statistical Outlier Omitted for {allocator} ({plot_info['test_name']}) ---")
                #     print(f"    (Statistical Threshold > {statistical_threshold:.2f} {plot_unit})")
                #     print(statistical_outliers[['avg_duration_ticks', 'avg_duration_us']])
                plot_df = sane_df[sane_df[y_column] <= statistical_threshold]
            else:
                plot_df = sane_df

        ax.plot(plot_df.index, plot_df[y_column], marker='.', linestyle='-', markersize=4)
        ax.set_title(allocator, fontsize=14, fontweight='bold')
        ax.set_xlabel(f'Bucket Index (Size = {bucket_size})', fontsize=10)
        ax.set_ylabel(y_label, fontsize=10)
        ax.grid(True, which="both", ls="--", linewidth=0.5)

    for i in range(len(plot_data_list), len(axes)):
        axes[i].axis('off')

    fig.suptitle(figure_title, fontsize=22, fontweight='bold')
    output_path = os.path.join(output_dir, filename)
    plt.savefig(output_path, format='pdf', bbox_inches='tight')
    print(f"\n  - Saved grid plot to {output_path}")
    plt.show()
    plt.close()

In [None]:
def plot_bucketed_latency(all_data, output_dir, bucket_size=2, unit='ticks'):
    """
    Main function to process and plot bucketed latency data for all allocators.
    This function calls the helper 'create_and_save_grid' to do the plotting.

    Args:
        all_data (dict): The fully loaded dictionary of allocator data.
        output_dir (str): The directory to save plots into.
        bucket_size (int): The number of operations to group into a bucket.
        unit (str): The unit for the y-axis. Can be 'ticks' or 'us'.
    """
    # Define a constant for outlier sensitivity
    OUTLIER_K = 3.0
    
    if unit not in ['ticks', 'us']:
        print(f"Invalid unit '{unit}'. Please choose 'ticks' or 'us'.")
        return

    
    # --- Step 1: Data Segregation and Bucketing ---
    mixed_lifetime_plots = []
    leak_exhaust_plots = []
    
    ordered_allocators = [
    'contiki-heapmem',
    'freertosv1',
    'newlib',
    'contiki-memb',
    'freertosv2',
    'newlib-nano',
    'riot-mema',
    'freertosv4',
    'zephyr',
    'riot-tlsf'
]
    
    mixed_lifetime_plots = []
    leak_exhaust_plots = []


    for allocator in ordered_allocators:
        if allocator in all_data:
            tests = all_data[allocator]

            # Prepare MixedLifetime data
            if 'MixedLifetime' in tests and 'time' in tests['MixedLifetime']:
                tick_hz = tests['MixedLifetime']['meta'].get('tick_hz', 1)
                burst_df = tests['MixedLifetime']['time'][lambda x: x['phase'] == 'burst']
                bucketed_data = bucket_time_data(burst_df, bucket_size, tick_hz)
                if not bucketed_data.empty:
                    mixed_lifetime_plots.append({
                        'allocator': allocator,
                        'test_name': 'MixedLifetime',
                        'data': bucketed_data,
                        'tick_hz': tick_hz
                    })

            # Prepare LeakExhaust data
            if 'LeakExhaust' in tests and 'time' in tests['LeakExhaust']:
                tick_hz = tests['LeakExhaust']['meta'].get('tick_hz', 1)
                leak_loop_df = tests['LeakExhaust']['time'][lambda x: x['phase'] == 'leakloop']
                bucketed_data = bucket_time_data(leak_loop_df, bucket_size, tick_hz)
                if not bucketed_data.empty:
                    leak_exhaust_plots.append({
                        'allocator': allocator,
                        'test_name': 'LeakExhaust',
                        'data': bucketed_data,
                        'tick_hz': tick_hz
                    })

    # --- Step 2: Call the plotting grid function for each test type ---
    unit_suffix = unit.capitalize()
    mixed_filename = f"Bucketed_Latency_MixedLifetime_B{bucket_size}_{unit_suffix}_Filtered.pdf"
    leak_filename = f"Bucketed_Latency_LeakExhaust_B{bucket_size}_{unit_suffix}_Filtered.pdf"
    
    create_and_save_grid(mixed_lifetime_plots, 
                         f'Avg Latency in Buckets of {bucket_size}: Mixed Lifetime (Burst)', 
                         mixed_filename, 
                         output_dir,
                         bucket_size=bucket_size,
                         plot_unit=unit,
                         outlier_k=OUTLIER_K)

    create_and_save_grid(leak_exhaust_plots, 
                         f'Avg Malloc Latency in Buckets of {bucket_size}: Leak & Exhaust', 
                         leak_filename, 
                         output_dir,
                         bucket_size=bucket_size,
                         plot_unit=unit,
                         outlier_k=OUTLIER_K)

In [None]:
plot_bucketed_latency(all_allocator_data, OUTPUT_DIR, 2, "us")

In [None]:
def plot_volumetric_efficiency(all_data, output_dir):
    """
    Generates a bar chart comparing the maximum bytes each allocator was able
    to serve before exhaustion. Includes a reference line for theoretical max
    heap size and has fallback logic for missing final snapshots.
    """
    print(f"\n--- Generating Modified Volumetric Efficiency Plot (Metric 4) ---")
    
    efficiency_data = []
    for allocator, tests in all_data.items():
        if 'LeakExhaust' in tests and 'snap' in tests['LeakExhaust']:
            snap_df = tests['LeakExhaust']['snap']
            if snap_df.empty:
                print(f"  - Warning: No snapshot data found for {allocator}. Skipping.")
                continue

            final_snap = snap_df[snap_df['phase'] == 'after_leakloop_exhaustion']
            
            #If the specific final snapshot isn't found, use the last available one
            if final_snap.empty:
                final_snap = snap_df.tail(1)
                print(f"  - Note: Using last available snapshot for {allocator} (phase: '{final_snap['phase'].iloc[0]}').")

            max_bytes = final_snap['max_allocated_bytes'].iloc[0]
            efficiency_data.append({
                'allocator': allocator,
                'max_allocated_bytes': max_bytes
            })

    if not efficiency_data:
        print("No LeakExhaust snapshot data found to generate efficiency plot.")
        return

    efficiency_df = pd.DataFrame(efficiency_data)
    efficiency_df.sort_values('max_allocated_bytes', ascending=False, inplace=True)

    plt.figure(figsize=(10,7))
    bar_plot = sns.barplot(
        x='max_allocated_bytes',
        y='allocator',
        data=efficiency_df,
        palette='magma',
        hue='allocator',
        legend=False,
        orient='h'
    )

    plt.title('Volumetric Efficiency: Max Bytes Allocated Before Exhaustion', fontsize=20, fontweight='bold')
    plt.xlabel('Max Allocated Bytes (More is Better)', fontsize=12)
    plt.ylabel('Allocator', fontsize=12)
    plt.grid(axis='x', linestyle='--', linewidth=0.6)
    
    plt.axvline(x=65536, color='r', linestyle='--', linewidth=2, label='Theoretical Max Heap (65536 bytes)')
    plt.legend()
    
    for container in bar_plot.containers:
        bar_plot.bar_label(container, fmt='{:,.0f}', padding=5)
        
    plt.tight_layout()

    filename = "Volumetric_Efficiency_Comparison_Modified.pdf"
    output_path = os.path.join(output_dir, filename)
    plt.savefig(output_path, format='pdf')
    print(f"  - Saved modified volumetric efficiency plot to {output_path}")
    
    plt.show()
    plt.close()


In [None]:
plot_volumetric_efficiency(all_allocator_data, OUTPUT_DIR)