In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sps
import scikit_posthocs as sp

## Reusable functions

In [None]:
def create_boxplot(df, col_value, col_category, title):
    plot_data = []
    groups = []

    for group in df[col_category].unique():
        group_data = df[df[col_category] == group][col_value].dropna()
        plot_data.append(group_data)
        groups.append(group)

    fig, ax = plt.subplots()
    bp = ax.boxplot(plot_data, patch_artist=True, showfliers=False)

    colors = plt.cm.viridis(np.linspace(0, 1, len(plot_data)))
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color) 

    ax.set_title(title)
    ax.set_xticklabels(groups, rotation=45, ha='right')
    ax.set_ylabel('Value')
        
    ax.grid(True, linestyle='--', alpha=0.7)


In [None]:
def create_heatmap(df, col_value, col_category, title):
    plot_data = []
    groups = []
                
    for group in df[col_category].unique():
        group_data = df[df[col_category] == group][col_value].dropna()
        plot_data.append(group_data)
        groups.append(group)
                
    # Perform Dunn test
    posthoc_results = sp.posthoc_dunn(plot_data, p_adjust='bonferroni')

    fig, ax = plt.subplots()
    sns.heatmap(
        posthoc_results,
        annot=True,
        cmap='coolwarm_r',
        vmin=0,
        vmax=0.05,
        ax=ax,
        xticklabels=groups,
        yticklabels=groups,
        cbar=False,
        fmt='.2f',
        annot_kws={"fontsize":7}
    )

    # Color p-values < 0.05 differently for emphasis
    for i in range(posthoc_results.shape[0]):
        for j in range(posthoc_results.shape[1]):
            if posthoc_results.iloc[i, j] < 0.05:
                ax.add_patch(plt.Rectangle((j, i), 1, 1, fill=False, edgecolor='black', lw=1.5))

    ax.set_title(title)

In [None]:
def create_mean_std_table(df, param_name, category_col, decimal_places=3):
    """
    Creates a table with mean ± std values for a parameter across different pool orders.
    
    Args:
        df: DataFrame with the data
        param_name: Base name of the parameter (e.g., 'gamma', 'delta', 'beta')
        category_col: Column name for categories (e.g., 'language', 'source')
        decimal_places: Number of decimal places for formatting (ignored now, kept for backward compatibility)
        
    Returns:
        DataFrame with mean ± std values
    """
    # Pool orders we're interested in
    pool_orders = [1, 3, 9, 27]
    
    # Create an empty DataFrame for results
    categories = sorted(df[category_col].unique())
    result_df = pd.DataFrame(index=categories, columns=pool_orders)
    
    # Calculate mean and std for each category and pool order
    for category in categories:
        for order in pool_orders:
            param_col = f"{param_name}_{order}"
            category_data = df[df[category_col] == category][param_col].dropna()
            
            if len(category_data) > 0:
                mean_val = category_data.mean()
                std_val = category_data.std()
                
                # Format with 2 significant digits for std and matching precision for mean
                if pd.isna(std_val) or std_val == 0:
                    result_df.loc[category, order] = f"{mean_val:.2f} ± 0.00"
                else:
                    # Determine the order of magnitude of std
                    magnitude = int(np.floor(np.log10(abs(std_val))))
                    
                    # Calculate decimal places needed for 2 significant digits in std
                    significant_places = max(0, 2 - magnitude - 1)
                    
                    # Format the values with the appropriate precision
                    format_str = f"{{:.{significant_places}f}} ± {{:.{significant_places}f}}"
                    result_df.loc[category, order] = format_str.format(
                        round(mean_val, significant_places), 
                        round(std_val, significant_places)
                    )
            else:
                result_df.loc[category, order] = "N/A"
    
    return result_df

def export_table_to_latex(df, caption, label):
    """
    Exports a DataFrame to LaTeX format with proper styling.
    
    Args:
        df: DataFrame to export
        caption: Caption for the table
        label: Label for the table
        
    Returns:
        LaTeX code as a string
    """
    # Start of LaTeX table
    latex_code = "\\begin{table}[p]\n\\centering\n"
    
    # Get column headers
    columns = df.columns
    num_columns = len(columns)
    
    # Create tabular environment
    tabular_spec = "l" + "r" * num_columns
    latex_code += f"\\begin{{tabular}}{{{tabular_spec}}}\n"
    
    # Top rule
    latex_code += "  \\toprule\n"
    
    # Header row
    header_row = "  & " + " & ".join([f"\\multicolumn{{1}}{{c}}{{{col}}}" for col in columns]) + " \\\\\n"
    latex_code += header_row
    
    # Middle rule
    latex_code += "  \\midrule\n"
    
    # Data rows
    for idx, row in df.iterrows():
        row_values = [str(idx)] + [str(val) for val in row.values]
        latex_code += "  " + " & ".join(row_values) + " \\\\\n"
    
    # Bottom rule
    latex_code += "  \\bottomrule\n"
    
    # End of tabular environment
    latex_code += "\\end{tabular}\n"
    
    # Caption and label
    latex_code += f"\\caption{{{caption}}}\n"
    latex_code += f"\\label{{{label}}}\n"
    
    # End of table environment
    latex_code += "\\end{table}"
    
    return latex_code

def collect_tables_to_file(tables, filename, title):
    """
    Collects multiple LaTeX tables and saves them to a single file.
    
    Args:
        tables: List of LaTeX table code strings
        filename: Output filename to save the combined tables
        title: Title for the collection of tables
    """
    # Start the LaTeX document
    latex_content = "\\documentclass{article}\n\\usepackage{booktabs}\n\\usepackage{amsmath}\n"
    latex_content += "\\title{" + title + "}\n\\begin{document}\n\\maketitle\n\n"
    
    # Add all tables
    for table in tables:
        latex_content += table + "\n\n"
    
    # End the LaTeX document
    latex_content += "\\end{document}"
    
    # Write to file
    with open(filename, "w") as f:
        f.write(latex_content)
        
    print(f"All tables saved to {filename}")

## Standardized Project Gutenberg Corpus

### Prepare the data

In [None]:
# Load the results for further analysis
spgc_metadata_sampled = pd.read_csv("results/spgc_metadata_sampled_after.csv")

# Reverse sign of gamma
spgc_metadata_sampled["gamma_0"] = -spgc_metadata_sampled["gamma_0"]
spgc_metadata_sampled["gamma_3"] = -spgc_metadata_sampled["gamma_3"]
spgc_metadata_sampled["gamma_9"] = -spgc_metadata_sampled["gamma_9"]
spgc_metadata_sampled["gamma_27"] = -spgc_metadata_sampled["gamma_27"]

# Rename columns _0 to _1
spgc_metadata_sampled.rename(columns=lambda x: x.replace("_0", "_1"), inplace=True)

# Skip Chinese language, because embeddings from NLTK are not well-suited for SPGC corpus
spgc_metadata_sampled = spgc_metadata_sampled[spgc_metadata_sampled["language"] != "['zh']"]

# Strip brackets from language codes
spgc_metadata_sampled["language"] = spgc_metadata_sampled["language"].str.replace(r"[\[\]']", "", regex=True)

# Explore missing values in the fitted parameters
selected_columns = spgc_metadata_sampled.filter(regex="^(gamma|delta|beta)").columns
missing_values = spgc_metadata_sampled[selected_columns].isnull().sum()
missing_percentage = (missing_values / len(spgc_metadata_sampled)) * 100

missing_data_summary = pd.DataFrame({
    "Parameter Name": missing_values.index,
    "Missing Count": missing_values.values,
    "Missing Percentage": missing_percentage.values
})

missing_data_summary

### Failure ratio and error metrics

In [None]:
pool_orders = [1, 3, 9, 27]
for order in pool_orders:
    fig, ax = plt.subplots()
    create_boxplot(
        spgc_metadata_sampled,
        col_value=f"error_pl_{order}",
        col_category="language",
        title=f"SSLR for power law (order {order})"
    )
    pdf_filename = f"figures/spgc_arsr_pl_{order}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

    fig, ax = plt.subplots()
    create_boxplot(
        spgc_metadata_sampled,
        col_value=f"error_se_{order}",
        col_category="language",
        title=f"SSLR for stretched exponential (order {order})"
    )
    pdf_filename = f"figures/spgc_arsr_se_{order}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

### Distribution of fitted parameters

In [None]:
# Parameter base names for tables
param_bases = ["gamma", "alpha_pl", "delta", "beta", "alpha_se"]

# Create a list to collect all tables
spgc_tables = []

# Generate tables for each parameter type
for param_base in param_bases:
    # Create and save the table
    table_df = create_mean_std_table(
        spgc_metadata_sampled, 
        param_base, 
        "language", 
        decimal_places=2
    )
    
    # Create a display name for the parameter
    if param_base == "gamma":
        param_display = "\\gamma"
        param_name = "gamma"
    elif param_base == "delta":
        param_display = "\\delta"
        param_name = "delta"
    elif param_base == "beta":
        param_display = "\\beta"
        param_name = "beta"
    elif param_base == "alpha_pl":
        param_display = "c"
        param_name = "c"
    elif param_base == "alpha_se":
        param_display = "b"
        param_name = "b"
    
    caption = f"Mean and standard deviation of ${param_display}$ parameter for different languages"
    label = f"tab:spgc_{param_name}_mean_std"
    
    # Export to LaTeX and collect
    latex_code = export_table_to_latex(table_df, caption, label)
    spgc_tables.append(latex_code)
    
    # Print the table
    print(f"Table for {param_base}:")
    print(table_df)
    print("\n")

# Save all tables to a single file
collect_tables_to_file(spgc_tables, "figures/spgc_all_tables.tex", "SPGC Parameter Analysis")

In [None]:
all_params = [
    "gamma_1", "gamma_3", "gamma_9", "gamma_27",
    "alpha_pl_1", "alpha_pl_3", "alpha_pl_9", "alpha_pl_27",
    "delta_1", "delta_3", "delta_9", "delta_27",
    "beta_1", "beta_3", "beta_9", "beta_27",
    "alpha_se_1", "alpha_se_3", "alpha_se_9", "alpha_se_27",
]

for i, param in enumerate(all_params, 1):
    # Titles with greek letters
    if param.startswith("gamma"):
        title = r"Fitted $\gamma$ (order {}) for all languages".format(param.split("_")[1])
    elif param.startswith("delta"):
        title = r"Fitted $\delta$ (order {}) for all languages".format(param.split("_")[1])
    elif param.startswith("beta"):
        title = r"Fitted $\beta$ (order {}) for all languages".format(param.split("_")[1])
    elif param.startswith("alpha_pl"):
        title = r"Fitted $c$ (order {}) for all languages".format(param.split("_")[2])
    elif param.startswith("alpha_se"):
        title = r"Fitted $b$ (order {}) for all languages".format(param.split("_")[2])
    else:
        title = f"Fitted {param} for all languages"

    fig, ax = plt.subplots()
    create_boxplot(
        df = spgc_metadata_sampled,
        col_value = param,
        col_category = "language",
        title = title
    )

    if param.startswith("alpha_pl"):
        param_name = "c" + param.split("_")[2]
    elif param.startswith("alpha_se"):
        param_name = "b" + param.split("_")[2]
    else:
        param_name = param

    pdf_filename = f"figures/spgc_{param_name}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

### Analysis of variance

In [None]:
# Create a dataframe to store the Kruskal-Wallis test results
kw_results = pd.DataFrame(
    index=['gamma', 'delta', 'beta'],
    columns=['1', '3', '9', '27']
)

# Define parameter groups
param_groups = {
    'gamma': ['gamma_1', 'gamma_3', 'gamma_9', 'gamma_27'],
    'delta': ['delta_1', 'delta_3', 'delta_9', 'delta_27'],
    'beta': ['beta_1', 'beta_3', 'beta_9', 'beta_27']
}

# Perform Kruskal-Wallis test for each parameter
for group_name, group_params in param_groups.items():
    for param in group_params:
        pool_order = param.split('_')[1]  # Extract pool order (1, 3, 9, 27)
        
        # Create a list of data for each language
        groups = []
        group_labels = []
        
        for lang in spgc_metadata_sampled['language'].unique():
            data = spgc_metadata_sampled.loc[spgc_metadata_sampled['language'] == lang, param].dropna()
            if len(data) > 0:
                groups.append(data)
                group_labels.append(lang)
        
        # Perform Kruskal-Wallis test
        if len(groups) > 1:  # Need at least 2 groups for the test
            stat, p_value = sps.kruskal(*groups)
            kw_results.loc[group_name, pool_order] = p_value
        else:
            kw_results.loc[group_name, pool_order] = float('nan')

# Format p-values with scientific notation for small values
kw_results_formatted = kw_results.applymap(lambda x: f"{x:.2e}" if pd.notnull(x) else "NaN")

# Display the results
kw_results_formatted

In [None]:
for i, param in enumerate(all_params, 1):
    # Titles with greek letters
    if param.startswith("gamma"):
        title = r"p-value of Dunn test for $\gamma$ (order {})".format(param.split("_")[1])
    elif param.startswith("delta"):
        title = r"Fp-value of Dunn test for $\delta$ (order {})".format(param.split("_")[1])
    elif param.startswith("beta"):
        title = r"p-value of Dunn test for $\beta$ (order {})".format(param.split("_")[1])
    else:
        title = f"p-value of Dunn test for {param}"

    fig, ax = plt.subplots()
    create_heatmap(
        df = spgc_metadata_sampled,
        col_value = param,
        col_category = "language",
        title = title
    )
    pdf_filename = f"figures/spgc_heatmap-{i}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

In [None]:
# Helper function to calculate mean and std with consistent formatting
def format_mean_std(data, is_percentage=False):
    """Format mean ± std with 2 significant digits for std, skipping infinity values"""
    # Filter out infinity values
    filtered_data = data[~np.isinf(data)]
    
    # Check if we have any data left after filtering
    if len(filtered_data) == 0:
        return "N/A"  # No finite values available
    
    mean_val = filtered_data.mean()
    std_val = filtered_data.std()
    
    if pd.isna(std_val) or std_val == 0:
        return f"{mean_val:.2f} ± 0.00"
    
    # Determine order of magnitude of std
    magnitude = int(np.floor(np.log10(abs(std_val))))
    
    # Calculate decimal places needed for 2 significant digits in std
    significant_places = max(0, 2 - magnitude - 1)
    
    # Apply percentage multiplier if needed
    if is_percentage:
        mean_val *= 100
        std_val *= 100
    
    # Format with proper precision
    format_str = f"{{:.{significant_places}f}} ± {{:.{significant_places}f}}"
    return format_str.format(round(mean_val, significant_places), round(std_val, significant_places))

# Create failure rates DataFrames with proper formatting
pooling_orders = ['1', '3', '9', '27']
pl_failure_rates = []
pl_avg_errors = []
se_failure_rates = []
se_avg_errors = []

for order in [1, 3, 9, 27]:
    # PL failure rates (as simple percentages)
    pl_null_data = spgc_metadata_sampled[f"gamma_{order}"].isnull()
    pl_null_percentage = 100 * pl_null_data.mean()  # mean of boolean series = proportion of True values
    pl_failure_rates.append(f"{pl_null_percentage:.2f}%")
    
    # PL avg errors
    pl_error_data = spgc_metadata_sampled[f"error_pl_{order}"].dropna()
    pl_avg_errors.append(format_mean_std(pl_error_data))
    
    # SE failure rates (as simple percentages)
    se_null_data = spgc_metadata_sampled[f"delta_{order}"].isnull()
    se_null_percentage = 100 * se_null_data.mean()
    se_failure_rates.append(f"{se_null_percentage:.2f}%")
    
    # SE avg errors
    se_error_data = spgc_metadata_sampled[f"error_se_{order}"].dropna()
    se_avg_errors.append(format_mean_std(se_error_data))

# Create the table
failure_rates_df = pd.DataFrame({
    'Pooling order': pooling_orders,
    'PL failure rate': pl_failure_rates,
    'PL avg. SSLR': pl_avg_errors,
    'SE failure rate': se_failure_rates,
    'SE avg. SSLR': se_avg_errors
})

# Export to LaTeX with consistent formatting
failure_rates_latex = export_table_to_latex(
    failure_rates_df, 
    caption="Failure rates and average error metrics for SPGC corpus", 
    label="tab:spgc_failure_rates"
)

print(failure_rates_latex)

## Human vs LLM Corpus

### Prepare the data

In [None]:
# Load the results for further analysis
df_human_vs_llm_sampled = pd.read_csv("results/human_vs_llm_sampled_after.csv")

# Reverse sign of gamma
df_human_vs_llm_sampled["gamma_1"] = -df_human_vs_llm_sampled["gamma_1"]
df_human_vs_llm_sampled["gamma_3"] = -df_human_vs_llm_sampled["gamma_3"]
df_human_vs_llm_sampled["gamma_9"] = -df_human_vs_llm_sampled["gamma_9"]
df_human_vs_llm_sampled["gamma_27"] = -df_human_vs_llm_sampled["gamma_27"]

# Explore missing values in the fitted parameters
selected_columns = df_human_vs_llm_sampled.filter(regex="^(gamma|delta|beta)").columns
missing_values = df_human_vs_llm_sampled[selected_columns].isnull().sum()
missing_percentage = (missing_values / len(df_human_vs_llm_sampled)) * 100

missing_data_summary = pd.DataFrame({
    "Parameter Name": missing_values.index,
    "Missing Count": missing_values.values,
    "Missing Percentage": missing_percentage.values
})

missing_data_summary

### Failure ratio and error metrics

In [None]:
pool_orders = [1, 3, 9, 27]
for order in pool_orders:
    fig, ax = plt.subplots()
    create_boxplot(
        df_human_vs_llm_sampled,
        col_value=f"error_pl_{order}",
        col_category="source",
        title=f"SSLR for power law (order {order})"
    )
    pdf_filename = f"figures/hllm_arsr_pl_{order}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

    fig, ax = plt.subplots()
    create_boxplot(
        df_human_vs_llm_sampled,
        col_value=f"error_se_{order}",
        col_category="source",
        title=f"SSLR for stretched exponential (order {order})"
    )
    pdf_filename = f"figures/hllm_arsr_se_{order}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

### Distribution of fitted parameters

In [None]:
# Parameter base names for tables
param_bases = ["gamma", "alpha_pl", "delta", "beta", "alpha_se"]

# Create a list to collect all tables
hllm_tables = []

# Generate tables for each parameter type
for param_base in param_bases:
    # Create and save the table
    table_df = create_mean_std_table(
        df_human_vs_llm_sampled, 
        param_base, 
        "source", 
        decimal_places=4
    )
    
    # Create a display name for the parameter
    if param_base == "gamma":
        param_display = "\\gamma"
        param_name = "gamma"
    elif param_base == "delta":
        param_display = "\\delta"
        param_name = "delta"
    elif param_base == "beta":
        param_display = "\\beta"
        param_name = "beta"
    elif param_base == "alpha_pl":
        param_display = "c"
        param_name = "c"
    elif param_base == "alpha_se":
        param_display = "b"
        param_name = "b"
    
    caption = f"Mean and standard deviation of ${param_display}$ parameter for different sources"
    label = f"tab:hllm_{param_name}_mean_std"
    
    # Export to LaTeX and collect
    latex_code = export_table_to_latex(table_df, caption, label)
    hllm_tables.append(latex_code)
    
    # Print the table
    print(f"Table for {param_base}:")
    print(table_df)
    print("\n")

# Save all tables to a single file
collect_tables_to_file(hllm_tables, "figures/hllm_all_tables.tex", "Human vs LLM Parameter Analysis")

In [None]:
for i, param in enumerate(all_params, 1):
    # Titles with greek letters
    if param.startswith("gamma"):
        title = r"Fitted $\gamma$ (order {}) for all sources".format(param.split("_")[1])
    elif param.startswith("delta"):
        title = r"Fitted $\delta$ (order {}) for all sources".format(param.split("_")[1])
    elif param.startswith("beta"):
        title = r"Fitted $\beta$ (order {}) for all sources".format(param.split("_")[1])
    elif param.startswith("alpha_pl"):
        title = r"Fitted $c$ (order {}) for all languages".format(param.split("_")[2])
    elif param.startswith("alpha_se"):
        title = r"Fitted $b$ (order {}) for all languages".format(param.split("_")[2])
    else:
        title = f"Fitted {param} for all sources"

    fig, ax = plt.subplots()
    create_boxplot(
        df = df_human_vs_llm_sampled,
        col_value = param,
        col_category = "source",
        title = title
    )

    if param.startswith("alpha_pl"):
        param_name = "c" + param.split("_")[2]
    elif param.startswith("alpha_se"):
        param_name = "b" + param.split("_")[2]
    else:
        param_name = param

    pdf_filename = f"figures/hllm_{param_name}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

### Analysis of variance

In [None]:
# Create a dataframe to store the Kruskal-Wallis test results
kw_results = pd.DataFrame(
    index=['gamma', 'delta', 'beta'],
    columns=['1', '3', '9', '27']
)

# Define parameter groups
param_groups = {
    'gamma': ['gamma_1', 'gamma_3', 'gamma_9', 'gamma_27'],
    'delta': ['delta_1', 'delta_3', 'delta_9', 'delta_27'],
    'beta': ['beta_1', 'beta_3', 'beta_9', 'beta_27']
}

# Perform Kruskal-Wallis test for each parameter
for group_name, group_params in param_groups.items():
    for param in group_params:
        pool_order = param.split('_')[1]  # Extract pool order (1, 3, 9, 27)
        
        # Create a list of data for each language
        groups = []
        group_labels = []
        
        for lang in df_human_vs_llm_sampled['source'].unique():
            data = df_human_vs_llm_sampled.loc[df_human_vs_llm_sampled['source'] == lang, param].dropna()
            if len(data) > 0:
                groups.append(data)
                group_labels.append(lang)
        
        # Perform Kruskal-Wallis test
        if len(groups) > 1:  # Need at least 2 groups for the test
            stat, p_value = sps.kruskal(*groups)
            kw_results.loc[group_name, pool_order] = p_value
        else:
            kw_results.loc[group_name, pool_order] = float('nan')

# Format p-values with scientific notation for small values
kw_results_formatted = kw_results.applymap(lambda x: f"{x:.2e}" if pd.notnull(x) else "NaN")

# Display the results
kw_results_formatted

In [None]:
for i, param in enumerate(all_params, 1):
    # Titles with greek letters
    if param.startswith("gamma"):
        title = r"p-value of Dunn test for $\gamma$ (order {})".format(param.split("_")[1])
    elif param.startswith("delta"):
        title = r"Fp-value of Dunn test for $\delta$ (order {})".format(param.split("_")[1])
    elif param.startswith("beta"):
        title = r"p-value of Dunn test for $\beta$ (order {})".format(param.split("_")[1])
    else:
        title = f"p-value of Dunn test for {param}"

    fig, ax = plt.subplots()
    create_heatmap(
        df = df_human_vs_llm_sampled,
        col_value = param,
        col_category = "source",
        title = title
    )
    pdf_filename = f"figures/hllm_heatmap-{i}.pdf"
    plt.savefig(pdf_filename)
    plt.close(fig)

In [None]:
# Create failure rates DataFrames with proper formatting for human vs LLM
pooling_orders = ['1', '3', '9', '27']
pl_failure_rates = []
pl_avg_errors = []
se_failure_rates = []
se_avg_errors = []

for order in [1, 3, 9, 27]:
    # PL failure rates (as simple percentages)
    pl_null_data = df_human_vs_llm_sampled[f"gamma_{order}"].isnull()
    pl_null_percentage = 100 * pl_null_data.mean()
    pl_failure_rates.append(f"{pl_null_percentage:.2f}%")
    
    # PL avg errors
    pl_error_data = df_human_vs_llm_sampled[f"error_pl_{order}"].dropna()
    pl_avg_errors.append(format_mean_std(pl_error_data))
    
    # SE failure rates (as simple percentages)
    se_null_data = df_human_vs_llm_sampled[f"delta_{order}"].isnull()
    se_null_percentage = 100 * se_null_data.mean()
    se_failure_rates.append(f"{se_null_percentage:.2f}%")
    
    # SE avg errors
    se_error_data = df_human_vs_llm_sampled[f"error_se_{order}"].dropna()
    se_avg_errors.append(format_mean_std(se_error_data))

# Create the table
failure_rates_df = pd.DataFrame({
    'Pooling order': pooling_orders,
    'PL failure rate': pl_failure_rates,
    'PL avg. SSLR': pl_avg_errors,
    'SE failure rate': se_failure_rates,
    'SE avg. SSLR': se_avg_errors
})

# Export to LaTeX with consistent formatting
failure_rates_latex = export_table_to_latex(
    failure_rates_df, 
    caption="Failure rates and average error metrics for Human vs LLM corpus", 
    label="tab:hllm_failure_rates"
)

print(failure_rates_latex)