In [None]:
import pandas as pd

# Load the benchmark results
csv_path = "/tmp/audio_bench_5fz296a1/benchmark_results.csv"
df = pd.read_csv(csv_path)

print("Data shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nFirst few rows:")
df.head()

Data shape: (128, 9)

Columns: ['operation', 'library', 'duration', 'channels', 'file_size_mb', 'mean_time', 'std_time', 'throughput_mb_s', 'mean_memory_mb']

First few rows:


Unnamed: 0,operation,library,duration,channels,file_size_mb,mean_time,std_time,throughput_mb_s,mean_memory_mb
0,read,audio_samples,0.1,1,0.016865,6.1e-05,1.1e-05,276.702616,0.000504
1,read,soundfile,0.1,1,0.016865,0.000241,2.1e-05,69.898849,0.018729
2,read,scipy,0.1,1,0.016865,0.000146,9e-06,115.905184,0.022395
3,read,torchaudio,0.1,1,0.016865,0.234751,1.639119,0.071841,1.612279
4,read,audio_samples,0.1,2,0.033688,6.7e-05,6e-06,499.127739,0.000479


In [2]:
# Analyze the data structure
print("Unique operations:", df['operation'].unique())
print("Unique libraries:", df['library'].unique())
print("Duration range:", df['duration'].unique())
print("Channel range:", df['channels'].unique())

# Show sample data for understanding
print("\nSample data:")
print(df.groupby(['operation', 'library']).size())

Unique operations: ['read' 'write']
Unique libraries: ['audio_samples' 'soundfile' 'scipy' 'torchaudio']
Duration range: [ 0.1  0.5  1.   2.   5.  10.  30.  60. ]
Channel range: [1 2]

Sample data:
operation  library      
read       audio_samples    16
           scipy            16
           soundfile        16
           torchaudio       16
write      audio_samples    16
           scipy            16
           soundfile        16
           torchaudio       16
dtype: int64


In [10]:
def create_latex_table_for_operation(df, operation):
    """Create a LaTeX table for a specific operation comparing libraries across conditions."""
    op_data = df[df['operation'] == operation].copy()
    
    # Create a pivot table with mean_time as the main metric (focusing on timings as requested)
    pivot = op_data.pivot_table(
        index='library', 
        columns=['duration', 'channels'], 
        values='mean_time',
        aggfunc='mean'
    )
    
    # Start building the LaTeX table
    latex_lines = []
    latex_lines.append(f"\\begin{{table}}[h!]")
    latex_lines.append(f"\\centering")
    latex_lines.append(f"\\caption{{Performance comparison for {operation} operation (mean time in seconds)}}")
    latex_lines.append(f"\\label{{tab:{operation}_performance}}")
    
    # Create column specification
    n_conditions = len(pivot.columns)
    col_spec = "l" + "r" * n_conditions
    latex_lines.append(f"\\begin{{tabular}}{{{col_spec}}}")
    latex_lines.append("\\hline")
    
    # Create header
    header = "Library"
    for (duration, channels) in pivot.columns:
        header += f" & {duration}s, {channels}ch"
    header += " \\\\"
    latex_lines.append(header)
    latex_lines.append("\\hline")
    
    # Add data rows
    for library in pivot.index:
        row = library
        for col in pivot.columns:
            value = pivot.loc[library, col]
            if pd.isna(value):
                row += " & N/A"
            else:
                row += f" & {value:.2e}"
        row += " \\\\"
        latex_lines.append(row)
    
    latex_lines.append("\\hline")
    latex_lines.append("\\end{tabular}")
    latex_lines.append("\\end{table}")
    
    return "\n".join(latex_lines)

# Generate LaTeX tables for each operation
operations = df['operation'].unique()
print(f"Generating LaTeX tables for operations: {operations}")

latex_tables = {}
for operation in operations:
    latex_tables[operation] = create_latex_table_for_operation(df, operation)
    print(f"\nLaTeX table for {operation} operation:")
    print(latex_tables[operation])

Generating LaTeX tables for operations: ['read' 'write']

LaTeX table for read operation:
\begin{table}[h!]
\centering
\caption{Performance comparison for read operation (mean time in seconds)}
\label{tab:read_performance}
\begin{tabular}{lrrrrrrrrrrrrrrrr}
\hline
Library & 0.1s, 1ch & 0.1s, 2ch & 0.5s, 1ch & 0.5s, 2ch & 1.0s, 1ch & 1.0s, 2ch & 2.0s, 1ch & 2.0s, 2ch & 5.0s, 1ch & 5.0s, 2ch & 10.0s, 1ch & 10.0s, 2ch & 30.0s, 1ch & 30.0s, 2ch & 60.0s, 1ch & 60.0s, 2ch \\
\hline
audio_samples & 6.09e-05 & 6.75e-05 & 7.79e-05 & 8.73e-05 & 8.66e-05 & 1.13e-04 & 1.08e-04 & 1.61e-04 & 1.86e-04 & 3.24e-04 & 3.30e-04 & 6.05e-04 & 8.27e-04 & 1.62e-03 & 1.54e-03 & 2.19e-03 \\
scipy & 1.46e-04 & 1.62e-04 & 1.65e-04 & 1.77e-04 & 1.73e-04 & 1.99e-04 & 1.90e-04 & 2.50e-04 & 2.70e-04 & 3.97e-04 & 3.97e-04 & 6.32e-04 & 8.25e-04 & 1.48e-03 & 1.44e-03 & 2.77e-03 \\
soundfile & 2.41e-04 & 2.55e-04 & 2.66e-04 & 2.84e-04 & 2.79e-04 & 3.01e-04 & 3.04e-04 & 3.54e-04 & 3.75e-04 & 5.06e-04 & 5.06e-04 & 7.38e-04

In [11]:
# Save all LaTeX tables to a text file for easy copying
with open('benchmark_latex_tables.tex', 'w') as f:
    f.write("% LaTeX tables for audio library benchmark results\n")
    f.write("% Generated automatically from CSV data\n\n")
    
    for operation in operations:
        f.write(f"% Table for {operation} operation\n")
        f.write(latex_tables[operation])
        f.write("\n\n")

print("LaTeX tables saved to 'benchmark_latex_tables.tex'")

# Also display a summary of the timing differences
print("\nTiming Summary (mean_time in seconds):")
for operation in operations:
    print(f"\n{operation.upper()} OPERATION:")
    op_data = df[df['operation'] == operation]
    summary = op_data.groupby('library')['mean_time'].agg(['mean', 'min', 'max']).round(6)
    print(summary)

LaTeX tables saved to 'benchmark_latex_tables.tex'

Timing Summary (mean_time in seconds):

READ OPERATION:
                   mean       min       max
library                                    
audio_samples  0.000525  0.000061  0.002191
scipy          0.000605  0.000146  0.002770
soundfile      0.000713  0.000241  0.002890
torchaudio     0.018761  0.000572  0.234751

WRITE OPERATION:
                   mean       min       max
library                                    
audio_samples  0.001780  0.000062  0.016265
scipy          0.001874  0.000223  0.012368
soundfile      0.002628  0.000271  0.015932
torchaudio     0.002901  0.000267  0.011589
