## CIFAR

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import uuid
import numpy as np

def extract_quality(filepath, quality_metric='f1'):
    """
    Extract quality metrics from an Excel file.
    Returns init and step metrics for the specified quality metric.
    """
    xls = pd.ExcelFile(filepath)
    quality = pd.read_excel(xls, 'quality_comparison')
    quality.columns = quality.iloc[0]
    quality = quality.drop(quality.index[0])
    init_val = quality[quality['mode'] == quality_metric]['original'].values[0]
    step_val = quality[quality['mode'] == quality_metric]['fedcore'].values[0]
    return float(init_val), float(step_val)

def extract_computational(filepath):
    """
    Extract computational metrics from an Excel file.
    Returns latency, size, and throughput for init and step.
    """
    xls = pd.ExcelFile(filepath)
    computational = pd.read_excel(xls, 'computational_comparison')
    latency_init, size_init, thr_init = computational.iloc[2, 1], computational.iloc[3, 1], computational.iloc[4, 1]
    latency_step, size_step, thr_step = computational.iloc[2, 2], computational.iloc[3, 2], computational.iloc[4, 2]
    return (float(latency_init), float(size_init), float(thr_init)), (float(latency_step), float(size_step), float(thr_step))

def analyze_results(gpu_path, cpu_path, tab_name, quality_metric='f1'):
    """
    Analyze PEFT pipeline results and generate plots and tables for top 5 pipelines.
    """
    # Create output directories
    os.makedirs('imgs', exist_ok=True)
    os.makedirs('tables', exist_ok=True)

    # Initialize results storage
    results = []

    # Process both GPU and CPU folders
    for device_path, device in [(gpu_path, 'GPU'), (cpu_path, 'CPU')]:
        for pipeline in os.listdir(device_path):
            # Check if pipeline has exactly 4 steps
            if pipeline == '.DS_Store':
                continue
            try:
                xlsx_files = [f for f in os.listdir(os.path.join(device_path, pipeline)) if f.endswith('.xlsx')]
            except:
                _ = 1
            count = len(xlsx_files)
            if count < 3:
                continue

            pipeline_path = os.path.join(device_path, pipeline)
            if not os.path.isdir(pipeline_path):
                continue

            # Extract metrics for each step
            quality_metrics = []
            comp_metrics = []
            for step_file in sorted(os.listdir(pipeline_path)):
                if not step_file.endswith('.xlsx'):
                    continue
                step_path = os.path.join(pipeline_path, step_file)
                quality_metrics.append(extract_quality(step_path, quality_metric))
                comp_metrics.append(extract_computational(step_path))

            # Use final step metrics for comparison
            # try:
            init_quality, final_quality = quality_metrics[0][0], quality_metrics[-1][1]
            (init_latency, init_size, init_thr), (final_latency, final_size, final_thr) = comp_metrics[0][0], comp_metrics[-1][1]
            # except:
            #     _ = 1

            # Calculate percentage changes
            quality_change = ((final_quality - init_quality) / init_quality * 100) if init_quality != 0 else 0
            latency_change = ((final_latency - init_latency) / init_latency * 100) if init_latency != 0 else 0
            size_change = ((final_size - init_size) / init_size * 100) if init_size != 0 else 0
            thr_change = ((final_thr - init_thr) / init_thr * 100) if init_thr != 0 else 0

            results.append({
                'pipeline': pipeline,
                'device': device,
                'quality_init': init_quality,
                'quality_final': final_quality,
                'quality_change': quality_change,
                'latency': final_latency,
                'latency_change': latency_change,
                'throughput': final_thr,
                'throughput_change': thr_change,
                'size': final_size,
                'size_change': size_change
            })

    # Create DataFrame and sort by quality metric
    df = pd.DataFrame(results)
    if quality_metric.lower() == 'rmse':
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final').head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final').head(5)
    else:
        # top_5 = df.sort_values('quality_final', ascending=False).head(5)
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final', ascending=False).head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final', ascending=False).head(5)

    top_5 = pd.concat([top_5_gpu, top_5_cpu], axis=0).reset_index(drop=True)
    if quality_metric.lower() == 'rmse':
        best_top = top_5.sort_values('quality_final').head(5)
    else:
        best_top = top_5.sort_values('quality_final', ascending=False).head(5)
    # Print top 5 pipelines
    print("Top 5 Pipelines:")
    for _, row in best_top.iterrows():
        print(f"{row['device']} - {row['pipeline']}: {quality_metric} = {row['quality_final']:.3f} ({row['quality_change']:+.1f}%)")

    # Plot quality metrics before and after
    plt.figure(figsize=(12, 6))
    bar_width = 0.35
    index = np.arange(len(top_5))
    plt.bar(index, top_5['quality_init'], bar_width, label='Before')
    plt.bar(index + bar_width, top_5['quality_final'], bar_width, label='After')
    plt.xlabel('Pipeline')
    plt.ylabel(quality_metric)
    plt.title(f'{quality_metric} Before and After Pipeline')
    plt.xticks(index + bar_width/2, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig('imgs/quality_comparison.png')
    plt.close()

    # Plot percentage changes
    metrics = ['quality_change', 'latency_change', 'throughput_change', 'size_change']
    metric_labels = [quality_metric, 'Latency', 'Throughput', 'Model Size']
    plt.figure(figsize=(12, 6))
    for i, (metric, label) in enumerate(zip(metrics, metric_labels)):
        plt.bar(index + i*bar_width, top_5[metric], bar_width, label=label)
    plt.xlabel('Pipeline')
    plt.ylabel('Percentage Change (%)')
    plt.title('Percentage Change in Metrics by Pipeline')
    plt.xticks(index + 1.5*bar_width, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    plt.savefig('imgs/percent_change.png')
    plt.close()

    #  table data
    table_data = []
    # original row
    original_row = {
        'Pipeline': 'Original',
        quality_metric: f"{top_5['quality_init'].iloc[0]:.3f}",
        'CPU Latency (ms)': f"{df[df['device'] == 'CPU']['latency'].iloc[0]:.3f}",
        'GPU Latency (ms)': f"{df[df['device'] == 'GPU']['latency'].iloc[0]:.3f}",
        'CPU Throughput (IPS)': f"{df[df['device'] == 'CPU']['throughput'].iloc[0]:.0f}",
        'GPU Throughput (IPS)': f"{df[df['device'] == 'GPU']['throughput'].iloc[0]:.0f}",
        'Model Size (MB)': f"{top_5['size'].iloc[0]:.3f}"
    }
    # table_data.append(original_row)


    for _, row in top_5.iterrows():
        table_row = {
            # 'Pipeline': f"{row['device']}: {row['pipeline']}",
            'Pipeline': f"{row['pipeline']}",
            'Device': row['device'],
            quality_metric: f"{row['quality_final']:.3f} / {row['quality_change']:+.1f}%",
            'CPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'CPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'Model Size (MB)': f"{row['size']:.3f} / {row['size_change']:+.1f}%"
        }
        table_data.append(table_row)

    table_df = pd.DataFrame(table_data)
    table_df = table_df.groupby('Pipeline')[[
        quality_metric, 'CPU Latency (ms)', 'GPU Latency (ms)',
        'CPU Throughput (IPS)', 'GPU Throughput (IPS)', 'Model Size (MB)'
        ]].min().reset_index(drop=False)
    
    original_df = pd.DataFrame(original_row, index=[0])
    table_df = pd.concat([original_df, table_df], axis=0).reset_index(drop=True)
    table_df.to_csv(f'tables/{tab_name}.csv', index=True)

    # latex table
    latex_content = """
\\begin{table*}[h]
\\centering
\\caption{Top 5 PEFT Pipelines}
\\label{tab:peft_pipelines}
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{lcccccc}
\\toprule
\\textbf{Pipeline} & \\textbf{""" + quality_metric + """} & \\textbf{CPU Latency (ms)} & \\textbf{GPU Latency (ms)} & \\textbf{CPU Throughput (IPS)} & \\textbf{GPU Throughput (IPS)} & \\textbf{Model Size (MB)} \\
\\midrule
"""
    for i, row in enumerate(table_data):
        escaped_row = {k: str(v).replace('%', '\%').replace('∞', '$\infty$') for k, v in row.items()}
        # latex_content += " & ".join(str(val) for val in row.values()) + " \\\\\n"
        latex_content += " & ".join(escaped_row.values()) + " \\\\\n"

        if i == 0:
            latex_content += "\\cmidrule{1-7}\n"

    latex_content += """\\bottomrule
\\end{tabular}
}
\\footnotesize
\\vspace{0.2cm}
\\emph{Note}: Values show final metric / percentage change from original. Best values are bold for non-quantized models and underlined for quantized models. Abbreviations: LR - Low-Rank Decomposition, Pr - Pruning, QAT - Quant-Aware Training, PDQ - Post-training Dynamic Quantization.
\\end{table*}
"""

    # Save LaTeX table
    with open(f'tables/{tab_name}_latex.txt', 'w') as f:
        f.write(latex_content)

    return table_df

gpu_path = "../results/ResNet18/CIFAR10"
cpu_path = "../results_cpu/ResNet18/CIFAR10"

tab_name = 'cifar'
tab = analyze_results(gpu_path, cpu_path, tab_name, quality_metric='f1')

Top 5 Pipelines:
CPU - low-rank_quant-dynamic_pruning_quant-qat: f1 = 0.758 (-0.1%)
CPU - low-rank_quant-qat_pruning_quant-static: f1 = 0.747 (-1.6%)
CPU - low-rank_quant-static_pruning_quant-qat: f1 = 0.746 (-1.7%)
CPU - pruning_quant-dynamic_low-rank_quant-qat: f1 = 0.736 (-3.0%)
CPU - quant-dynamic_low-rank_quant-static_pruning: f1 = 0.732 (-3.6%)


In [2]:
tab

Unnamed: 0,Pipeline,f1,CPU Latency (ms),GPU Latency (ms),CPU Throughput (IPS),GPU Throughput (IPS),Model Size (MB)
0,Original,0.759,0.004,1.901,1890,17,43.349
1,low-rank_quant-dynamic_pruning_quant-qat,0.702 / -7.5%,0.005 / +9.6%,2.271 / +79.9%,1436 / -22.8%,15 / -42.0%,43.349 / +1.6%
2,low-rank_quant-qat_pruning_quant-static,0.747 / -1.6%,0.005 / +9.8%,∞,1792 / -9.4%,∞,44.137 / +3.5%
3,low-rank_quant-static_pruning_quant-dynamic,0.656 / -13.6%,∞,1.873 / +34.9%,∞,17 / -25.0%,10.861 / -74.5%
4,low-rank_quant-static_pruning_quant-qat,0.660 / -13.0%,0.004 / -4.9%,1.901 / +34.5%,1890 / -6.7%,17 / -24.4%,10.854 / -74.6%
5,pruning_quant-dynamic_low-rank_quant-qat,0.642 / -15.5%,0.005 / +2.4%,1.983 / +54.0%,1912 / -3.2%,15 / -43.6%,10.607 / -75.1%
6,pruning_quant-qat_low-rank_quant-dynamic,0.639 / -15.9%,∞,2.080 / +83.5%,∞,24 / -18.2%,10.607 / -75.1%
7,quant-dynamic_low-rank_quant-static_pruning,0.732 / -3.6%,0.004 / -8.0%,∞,2139 / +16.1%,∞,43.466 / +1.9%


## imagenette

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import uuid
import numpy as np

def extract_quality(filepath, quality_metric='f1'):
    """
    Extract quality metrics from an Excel file.
    Returns init and step metrics for the specified quality metric.
    """
    xls = pd.ExcelFile(filepath)
    quality = pd.read_excel(xls, 'quality_comparison')
    quality.columns = quality.iloc[0]
    quality = quality.drop(quality.index[0])
    init_val = quality[quality['mode'] == quality_metric]['original'].values[0]
    step_val = quality[quality['mode'] == quality_metric]['fedcore'].values[0]
    return float(init_val), float(step_val)

def extract_computational(filepath):
    """
    Extract computational metrics from an Excel file.
    Returns latency, size, and throughput for init and step.
    """
    xls = pd.ExcelFile(filepath)
    computational = pd.read_excel(xls, 'computational_comparison')
    latency_init, size_init, thr_init = computational.iloc[2, 1], computational.iloc[3, 1], computational.iloc[4, 1]
    latency_step, size_step, thr_step = computational.iloc[2, 2], computational.iloc[3, 2], computational.iloc[4, 2]
    return (float(latency_init), float(size_init), float(thr_init)), (float(latency_step), float(size_step), float(thr_step))

def analyze_results(gpu_path, cpu_path, tab_name, quality_metric='f1'):
    """
    Analyze PEFT pipeline results and generate plots and tables for top 5 pipelines.
    """
    # Create output directories
    os.makedirs('imgs', exist_ok=True)
    os.makedirs('tables', exist_ok=True)

    # Initialize results storage
    results = []

    # Process both GPU and CPU folders
    for device_path, device in [(gpu_path, 'GPU'), (cpu_path, 'CPU')]:
        if device_path is None:
            continue
        for pipeline in os.listdir(device_path):
            # Check if pipeline has exactly 4 steps
            if pipeline == '.DS_Store':
                continue
            try:
                xlsx_files = [f for f in os.listdir(os.path.join(device_path, pipeline)) if f.endswith('.xlsx')]
            except:
                _ = 1
            count = len(xlsx_files)
            if count < 3:
                continue

            pipeline_path = os.path.join(device_path, pipeline)
            if not os.path.isdir(pipeline_path):
                continue

            # Extract metrics for each step
            quality_metrics = []
            comp_metrics = []
            for step_file in sorted(os.listdir(pipeline_path)):
                if not step_file.endswith('.xlsx'):
                    continue
                step_path = os.path.join(pipeline_path, step_file)
                quality_metrics.append(extract_quality(step_path, quality_metric))
                comp_metrics.append(extract_computational(step_path))

            # Use final step metrics for comparison
            # try:
            init_quality, final_quality = quality_metrics[0][0], quality_metrics[-1][1]
            (init_latency, init_size, init_thr), (final_latency, final_size, final_thr) = comp_metrics[0][0], comp_metrics[-1][1]
            # except:
            #     _ = 1

            # Calculate percentage changes
            quality_change = ((final_quality - init_quality) / init_quality * 100) if init_quality != 0 else 0
            latency_change = ((final_latency - init_latency) / init_latency * 100) if init_latency != 0 else 0
            size_change = ((final_size - init_size) / init_size * 100) if init_size != 0 else 0
            thr_change = ((final_thr - init_thr) / init_thr * 100) if init_thr != 0 else 0

            results.append({
                'pipeline': pipeline,
                'device': device,
                'quality_init': init_quality,
                'quality_final': final_quality,
                'quality_change': quality_change,
                'latency': final_latency,
                'latency_change': latency_change,
                'throughput': final_thr,
                'throughput_change': thr_change,
                'size': final_size,
                'size_change': size_change
            })

    # Create DataFrame and sort by quality metric
    df = pd.DataFrame(results)
    if quality_metric.lower() == 'rmse':
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final').head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final').head(5)
    else:
        # top_5 = df.sort_values('quality_final', ascending=False).head(5)
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final', ascending=False).head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final', ascending=False).head(5)

    top_5 = pd.concat([top_5_gpu, top_5_cpu], axis=0).reset_index(drop=True)
    if quality_metric.lower() == 'rmse':
        best_top = top_5.sort_values('quality_final').head(5)
    else:
        best_top = top_5.sort_values('quality_final', ascending=False).head(5)
    # Print top 5 pipelines
    print("Top 5 Pipelines:")
    for _, row in best_top.iterrows():
        print(f"{row['device']} - {row['pipeline']}: {quality_metric} = {row['quality_final']:.3f} ({row['quality_change']:+.1f}%)")

    # Plot quality metrics before and after
    plt.figure(figsize=(12, 6))
    bar_width = 0.35
    index = np.arange(len(top_5))
    plt.bar(index, top_5['quality_init'], bar_width, label='Before')
    plt.bar(index + bar_width, top_5['quality_final'], bar_width, label='After')
    plt.xlabel('Pipeline')
    plt.ylabel(quality_metric)
    plt.title(f'{quality_metric} Before and After Pipeline')
    plt.xticks(index + bar_width/2, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    # plt.savefig('imgs/quality_comparison.png')
    plt.close()

    # Plot percentage changes
    metrics = ['quality_change', 'latency_change', 'throughput_change', 'size_change']
    metric_labels = [quality_metric, 'Latency', 'Throughput', 'Model Size']
    plt.figure(figsize=(12, 6))
    for i, (metric, label) in enumerate(zip(metrics, metric_labels)):
        plt.bar(index + i*bar_width, top_5[metric], bar_width, label=label)
    plt.xlabel('Pipeline')
    plt.ylabel('Percentage Change (%)')
    plt.title('Percentage Change in Metrics by Pipeline')
    plt.xticks(index + 1.5*bar_width, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    # plt.savefig('imgs/percent_change.png')
    plt.close()

    #  table data
    table_data = []
    # original row
    original_row = {
        'Pipeline': 'Original',
        quality_metric: f"{top_5['quality_init'].iloc[0]:.3f}",
        'CPU Latency (ms)': f"{df[df['device'] == 'CPU']['latency'].iloc[0]:.3f}" if len(df[df['device'] == 'CPU']['latency']) > 0 else np.NaN,
        'GPU Latency (ms)': f"{df[df['device'] == 'GPU']['latency'].iloc[0]:.3f}" if len(df[df['device'] == 'GPU']['latency']) > 0 else np.NaN,
        'CPU Throughput (IPS)': f"{df[df['device'] == 'CPU']['throughput'].iloc[0]:.0f}" if len(df[df['device'] == 'CPU']['throughput']) > 0 else np.NaN,
        'GPU Throughput (IPS)': f"{df[df['device'] == 'GPU']['throughput'].iloc[0]:.0f}"if len(df[df['device'] == 'GPU']['throughput']) > 0 else np.NaN,
        'Model Size (MB)': f"{top_5['size'].iloc[0]:.3f}"
    }
    # table_data.append(original_row)


    for _, row in top_5.iterrows():
        table_row = {
            # 'Pipeline': f"{row['device']}: {row['pipeline']}",
            'Pipeline': f"{row['pipeline']}",
            'Device': row['device'],
            quality_metric: f"{row['quality_final']:.3f} / {row['quality_change']:+.1f}%",
            'CPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'CPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'Model Size (MB)': f"{row['size']:.3f} / {row['size_change']:+.1f}%"
        }
        table_data.append(table_row)

    table_df = pd.DataFrame(table_data)
    table_df = table_df.groupby('Pipeline')[[
        quality_metric, 'CPU Latency (ms)', 'GPU Latency (ms)',
        'CPU Throughput (IPS)', 'GPU Throughput (IPS)', 'Model Size (MB)'
        ]].min().reset_index(drop=False)
    
    original_df = pd.DataFrame(original_row, index=[0])
    table_df = pd.concat([original_df, table_df], axis=0).reset_index(drop=True)
    table_df.to_csv(f'tables/{tab_name}.csv', index=True)

    # latex table
    latex_content = """
\\begin{table*}[h]
\\centering
\\caption{Top 5 PEFT Pipelines}
\\label{tab:peft_pipelines}
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{lcccccc}
\\toprule
\\textbf{Pipeline} & \\textbf{""" + quality_metric + """} & \\textbf{CPU Latency (ms)} & \\textbf{GPU Latency (ms)} & \\textbf{CPU Throughput (IPS)} & \\textbf{GPU Throughput (IPS)} & \\textbf{Model Size (MB)} \\
\\midrule
"""
    for i, row in enumerate(table_data):
        escaped_row = {k: str(v).replace('%', '\%').replace('∞', '$\infty$') for k, v in row.items()}
        # latex_content += " & ".join(str(val) for val in row.values()) + " \\\\\n"
        latex_content += " & ".join(escaped_row.values()) + " \\\\\n"

        if i == 0:
            latex_content += "\\cmidrule{1-7}\n"

    latex_content += """\\bottomrule
\\end{tabular}
}
\\footnotesize
\\vspace{0.2cm}
\\emph{Note}: Values show final metric / percentage change from original. Best values are bold for non-quantized models and underlined for quantized models. Abbreviations: LR - Low-Rank Decomposition, Pr - Pruning, QAT - Quant-Aware Training, PDQ - Post-training Dynamic Quantization.
\\end{table*}
"""

    # Save LaTeX table
    with open(f'tables/{tab_name}_latex.txt', 'w') as f:
        f.write(latex_content)

    return table_df

gpu_path = "../results/ResNet18/Imagenette"
# cpu_path = "../results_cpu/ResNet18/Imagenette"

# tab_name = 'imagenette'
tab = analyze_results(gpu_path=gpu_path, cpu_path=None, tab_name='imagenette', quality_metric='f1')

Top 5 Pipelines:
GPU - pruning_quant-static_low-rank_quant-qat: f1 = 0.477 (+2068.2%)
GPU - pruning_quant-dynamic_low-rank_quant-qat: f1 = 0.472 (+2045.5%)
GPU - pruning_quant-static_low-rank_quant-dynamic: f1 = 0.467 (+2022.7%)
GPU - pruning_quant-dynamic_low-rank_quant-static: f1 = 0.462 (+2000.0%)
GPU - pruning_quant-qat_low-rank_quant-dynamic: f1 = 0.461 (+1995.5%)


In [4]:
tab

Unnamed: 0,Pipeline,f1,CPU Latency (ms),GPU Latency (ms),CPU Throughput (IPS),GPU Throughput (IPS),Model Size (MB)
0,Original,0.022,,1.796,,20,10.453
1,pruning_quant-dynamic_low-rank_quant-qat,0.472 / +2045.5%,∞,1.991 / +11.5%,∞,18 / +93.9%,10.453 / -75.5%
2,pruning_quant-dynamic_low-rank_quant-static,0.462 / +2000.0%,∞,2.016 / +39.0%,∞,17 / +68.2%,10.453 / -75.5%
3,pruning_quant-qat_low-rank_quant-dynamic,0.461 / +1995.5%,∞,2.007 / +43.9%,∞,17 / +60.6%,10.453 / -75.5%
4,pruning_quant-static_low-rank_quant-dynamic,0.467 / +2022.7%,∞,2.004 / +38.3%,∞,17 / +57.6%,10.453 / -75.5%
5,pruning_quant-static_low-rank_quant-qat,0.477 / +2068.2%,∞,2.057 / +49.9%,∞,17 / +68.3%,10.453 / -75.5%


## ApplienceEnergy

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import uuid
import numpy as np

def extract_quality(filepath, quality_metric='f1'):
    """
    Extract quality metrics from an Excel file.
    Returns init and step metrics for the specified quality metric.
    """
    xls = pd.ExcelFile(filepath)
    quality = pd.read_excel(xls, 'quality_comparison')
    quality.columns = quality.iloc[0]
    quality = quality.drop(quality.index[0])
    init_val = quality[quality['mode'] == quality_metric]['original'].values[0]
    step_val = quality[quality['mode'] == quality_metric]['fedcore'].values[0]
    return float(init_val), float(step_val)

def extract_computational(filepath):
    """
    Extract computational metrics from an Excel file.
    Returns latency, size, and throughput for init and step.
    """
    xls = pd.ExcelFile(filepath)
    computational = pd.read_excel(xls, 'computational_comparison')
    latency_init, size_init, thr_init = computational.iloc[2, 1], computational.iloc[3, 1], computational.iloc[4, 1]
    latency_step, size_step, thr_step = computational.iloc[2, 2], computational.iloc[3, 2], computational.iloc[4, 2]
    return (float(latency_init), float(size_init), float(thr_init)), (float(latency_step), float(size_step), float(thr_step))

def analyze_results(gpu_path, cpu_path, tab_name, quality_metric='f1'):
    """
    Analyze PEFT pipeline results and generate plots and tables for top 5 pipelines.
    """
    # Create output directories
    os.makedirs('imgs', exist_ok=True)
    os.makedirs('tables', exist_ok=True)

    # Initialize results storage
    results = []

    # Process both GPU and CPU folders
    for device_path, device in [(gpu_path, 'GPU'), (cpu_path, 'CPU')]:
        if device_path is None:
            continue
        for pipeline in os.listdir(device_path):
            # Check if pipeline has exactly 4 steps
            if pipeline == '.DS_Store':
                continue

            xlsx_files = [f for f in os.listdir(os.path.join(device_path, pipeline)) if f.endswith('.xlsx')]

            count = len(xlsx_files)
            if count < 2:
                continue

            pipeline_path = os.path.join(device_path, pipeline)
            if not os.path.isdir(pipeline_path):
                continue

            # Extract metrics for each step
            quality_metrics = []
            comp_metrics = []
            for step_file in sorted(os.listdir(pipeline_path)):
                if not step_file.endswith('.xlsx'):
                    continue
                step_path = os.path.join(pipeline_path, step_file)
                quality_metrics.append(extract_quality(step_path, quality_metric))
                comp_metrics.append(extract_computational(step_path))

            # Use final step metrics for comparison
            # try:
            init_quality, final_quality = quality_metrics[0][0], quality_metrics[-1][1]
            (init_latency, init_size, init_thr), (final_latency, final_size, final_thr) = comp_metrics[0][0], comp_metrics[-1][1]
            # except:
            #     _ = 1

            # Calculate percentage changes
            quality_change = ((final_quality - init_quality) / init_quality * 100) if init_quality != 0 else 0
            latency_change = ((final_latency - init_latency) / init_latency * 100) if init_latency != 0 else 0
            size_change = ((final_size - init_size) / init_size * 100) if init_size != 0 else 0
            thr_change = ((final_thr - init_thr) / init_thr * 100) if init_thr != 0 else 0

            results.append({
                'pipeline': pipeline,
                'device': device,
                'quality_init': init_quality,
                'quality_final': final_quality,
                'quality_change': quality_change,
                'latency': final_latency,
                'latency_change': latency_change,
                'throughput': final_thr,
                'throughput_change': thr_change,
                'size': final_size,
                'size_change': size_change
            })

    # Create DataFrame and sort by quality metric
    df = pd.DataFrame(results)
    if quality_metric.lower() == 'rmse':
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final').head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final').head(5)
    else:
        # top_5 = df.sort_values('quality_final', ascending=False).head(5)
        top_5_cpu = df[df['device'] == 'CPU'].sort_values('quality_final', ascending=False).head(5)
        top_5_gpu = df[df['device'] == 'GPU'].sort_values('quality_final', ascending=False).head(5)

    top_5 = pd.concat([top_5_gpu, top_5_cpu], axis=0).reset_index(drop=True)
    if quality_metric.lower() == 'rmse':
        best_top = top_5.sort_values('quality_final').head(5)
    else:
        best_top = top_5.sort_values('quality_final', ascending=False).head(5)
    # Print top 5 pipelines
    print("Top 5 Pipelines:")
    for _, row in best_top.iterrows():
        print(f"{row['device']} - {row['pipeline']}: {quality_metric} = {row['quality_final']:.3f} ({row['quality_change']:+.1f}%)")

    # Plot quality metrics before and after
    plt.figure(figsize=(12, 6))
    bar_width = 0.35
    index = np.arange(len(top_5))
    plt.bar(index, top_5['quality_init'], bar_width, label='Before')
    plt.bar(index + bar_width, top_5['quality_final'], bar_width, label='After')
    plt.xlabel('Pipeline')
    plt.ylabel(quality_metric)
    plt.title(f'{quality_metric} Before and After Pipeline')
    plt.xticks(index + bar_width/2, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    # plt.savefig('imgs/quality_comparison.png')
    plt.close()

    # Plot percentage changes
    metrics = ['quality_change', 'latency_change', 'throughput_change', 'size_change']
    metric_labels = [quality_metric, 'Latency', 'Throughput', 'Model Size']
    plt.figure(figsize=(12, 6))
    for i, (metric, label) in enumerate(zip(metrics, metric_labels)):
        plt.bar(index + i*bar_width, top_5[metric], bar_width, label=label)
    plt.xlabel('Pipeline')
    plt.ylabel('Percentage Change (%)')
    plt.title('Percentage Change in Metrics by Pipeline')
    plt.xticks(index + 1.5*bar_width, [f"{row['device']}: {row['pipeline']}" for _, row in top_5.iterrows()], rotation=45, ha='right')
    plt.legend()
    plt.tight_layout()
    # plt.savefig('imgs/percent_change.png')
    plt.close()

    #  table data
    table_data = []
    # original row
    original_row = {
        'Pipeline': 'Original',
        quality_metric: f"{top_5['quality_init'].iloc[0]:.3f}",
        'CPU Latency (ms)': f"{df[df['device'] == 'CPU']['latency'].iloc[0]:.3f}" if len(df[df['device'] == 'CPU']['latency']) > 0 else np.NaN,
        'GPU Latency (ms)': f"{df[df['device'] == 'GPU']['latency'].iloc[0]:.3f}" if len(df[df['device'] == 'GPU']['latency']) > 0 else np.NaN,
        'CPU Throughput (IPS)': f"{df[df['device'] == 'CPU']['throughput'].iloc[0]:.0f}" if len(df[df['device'] == 'CPU']['throughput']) > 0 else np.NaN,
        'GPU Throughput (IPS)': f"{df[df['device'] == 'GPU']['throughput'].iloc[0]:.0f}"if len(df[df['device'] == 'GPU']['throughput']) > 0 else np.NaN,
        'Model Size (MB)': f"{top_5['size'].iloc[0]:.3f}"
    }
    # table_data.append(original_row)


    for _, row in top_5.iterrows():
        table_row = {
            # 'Pipeline': f"{row['device']}: {row['pipeline']}",
            'Pipeline': f"{row['pipeline']}",
            'Device': row['device'],
            quality_metric: f"{row['quality_final']:.3f} / {row['quality_change']:+.1f}%",
            'CPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Latency (ms)': f"{row['latency']:.3f} / {row['latency_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'CPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'CPU' else '∞',
            'GPU Throughput (IPS)': f"{row['throughput']:.0f} / {row['throughput_change']:+.1f}%" if row['device'] == 'GPU' else '∞',
            'Model Size (MB)': f"{row['size']:.3f} / {row['size_change']:+.1f}%"
        }
        table_data.append(table_row)

    table_df = pd.DataFrame(table_data)
    table_df = table_df.groupby('Pipeline')[[
        quality_metric, 'CPU Latency (ms)', 'GPU Latency (ms)',
        'CPU Throughput (IPS)', 'GPU Throughput (IPS)', 'Model Size (MB)'
        ]].min().reset_index(drop=False)
    
    original_df = pd.DataFrame(original_row, index=[0])
    table_df = pd.concat([original_df, table_df], axis=0).reset_index(drop=True)
    table_df.replace('∞', '$\infty$')
    table_df.to_csv(f'tables/{tab_name}.csv', index=True)

    # latex table
    latex_content = """
\\begin{table*}[h]
\\centering
\\caption{Top 5 PEFT Pipelines}
\\label{tab:peft_pipelines}
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{lcccccc}
\\toprule
\\textbf{Pipeline} & \\textbf{""" + quality_metric + """} & \\textbf{CPU Latency (ms)} & \\textbf{GPU Latency (ms)} & \\textbf{CPU Throughput (IPS)} & \\textbf{GPU Throughput (IPS)} & \\textbf{Model Size (MB)} \\
\\midrule
"""
    for i, row in enumerate(table_data):
        escaped_row = {k: str(v).replace('%', '\%').replace('∞', '$\infty$') for k, v in row.items()}
        # latex_content += " & ".join(str(val) for val in row.values()) + " \\\\\n"
        latex_content += " & ".join(escaped_row.values()) + " \\\\\n"

        if i == 0:
            latex_content += "\\cmidrule{1-7}\n"

    latex_content += """\\bottomrule
\\end{tabular}
}
\\footnotesize
\\vspace{0.2cm}
\\emph{Note}: Values show final metric / percentage change from original. Best values are bold for non-quantized models and underlined for quantized models. Abbreviations: LR - Low-Rank Decomposition, Pr - Pruning, QAT - Quant-Aware Training, PDQ - Post-training Dynamic Quantization.
\\end{table*}
"""

    # Save LaTeX table
    with open(f'tables/{tab_name}_latex.txt', 'w') as f:
        f.write(latex_content)

    return table_df

gpu_path = "../results/InceptionNet/AppliancesEnergy"
# cpu_path = "../results_cpu/InceptionNet/AppliancesEnergy"

# tab_name = 'imagenette'
tab = analyze_results(gpu_path=gpu_path, cpu_path=None, tab_name='timseseries', quality_metric='rmse')

Top 5 Pipelines:
GPU - pruning_quant-static_low-rank_quant-dynamic: rmse = 3.811 (+10.8%)
GPU - pruning_quant-static_low-rank_quant-qat: rmse = 3.817 (+10.9%)
GPU - pruning_quant-qat_low-rank_quant-dynamic: rmse = 3.841 (+11.6%)
GPU - pruning_quant-qat_low-rank_quant-static: rmse = 3.845 (+11.7%)
GPU - low-rank_quant-qat_pruning_quant-dynamic: rmse = 4.577 (+33.0%)


In [6]:
tab

Unnamed: 0,Pipeline,rmse,CPU Latency (ms),GPU Latency (ms),CPU Throughput (IPS),GPU Throughput (IPS),Model Size (MB)
0,Original,3.441,,4.345,,2,0.923
1,low-rank_quant-qat_pruning_quant-dynamic,4.577 / +33.0%,∞,4.427 / +44.2%,∞,2 / -32.1%,0.591 / -80.0%
2,pruning_quant-qat_low-rank_quant-dynamic,3.841 / +11.6%,∞,3.664 / +22.8%,∞,3 / -16.1%,0.923 / -68.8%
3,pruning_quant-qat_low-rank_quant-static,3.845 / +11.7%,∞,3.409 / +3.0%,∞,3 / -11.2%,0.923 / -68.8%
4,pruning_quant-static_low-rank_quant-dynamic,3.811 / +10.8%,∞,3.852 / +30.4%,∞,3 / -11.3%,0.923 / -68.8%
5,pruning_quant-static_low-rank_quant-qat,3.817 / +10.9%,∞,3.538 / +15.8%,∞,3 / -16.1%,0.923 / -68.8%
