# Init
This cell imports all required libraries, sets up output directories, and loads the data. It also performs initial preprocessing such as normalization and feature engineering.

In [None]:
# Import required libraries
import os
import json
import pandas as pd
from scipy.stats import ttest_ind, f_oneway, chi2_contingency
import statsmodels.api as sm
from statsmodels.formula.api import ols
from itertools import combinations, permutations
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy import stats
from statsmodels.sandbox.stats.multicomp import multipletests
from const import Const

# --- Setup output directory ---
output_dir = "../results/latex_tables/"
os.makedirs(output_dir, exist_ok=True)

# Delete all files in output folder
for file in os.listdir(output_dir):
    os.remove(os.path.join(output_dir, file))

json_dir = ''  # Path to the JSON data file

if json_dir == '':
    raise ValueError("please set json_dir")

# --- Load data ---
with open(json_dir, 'r') as f:
    data = json.load(f)
df = pd.DataFrame(data['num_faces']['combined'])

# --- Normalize max_depth ---
scaler = MinMaxScaler()
df[Const.mean_depth] = scaler.fit_transform(df[[Const.mean_depth]])

# Zone classification (3x3 grid)
df['x_zone'] = pd.cut(df['face_center_x'], bins=3, labels=['left','center','right'])
df['y_zone'] = pd.cut(df['face_center_y'], bins=3, labels=['top','middle','bottom'])
df['grid_zone'] = df['y_zone'].astype(str) + '-' + df['x_zone'].astype(str)

# Define categorical and continuous features
categorical_features = [Const.gender, Const.race, Const.age_range, Const.emotion]
continuous_features = [Const.centrality, Const.mean_depth, Const.face_center_y]

In [None]:
num_img_file = {}

def save_latex_table(
    df: pd.DataFrame,
    caption: str,
    label: str,
    col_format: str = None,
    bold_header: bool = True,
    filename: str = "table.tex",
    mode: str = 'w',
    all: bool = False,
    img_label: str = '',
    latex_img_filename: str = '',
    figure_caption: str = '',
    num_figs_row: int = 3,
    num_figs_total: int = 9,
    include_row_labels: bool = True # New parameter to control row labels
) -> None:
    """
    Save a DataFrame as a LaTeX table in tabularx format and generate an image of the table.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        caption (str): The caption for the LaTeX table and image.
        label (str): The label for the LaTeX table and image.
        col_format (str, optional): LaTeX column format. Defaults to None, which uses a fixed width for data columns.
        filename (str): Output .tex filename.
        mode (str): File write mode ('w' or 'a').
        all (bool): If True, use 3 subfigures per row, else 2.
        img_label (str): Label for the image figure.
        latex_img_filename (str): Output filename for LaTeX image code.
        figure_caption (str): Caption for the figure.
        num_figs_row (int): Number of figures per row in the LaTeX document.
        num_figs_total (int): Total number of figures to display in the LaTeX document.
        include_row_labels (bool): Whether to include row labels in the table image.
    """
    if num_figs_total % num_figs_row != 0:
        raise ValueError("num_figs_total must be divisible by num_figs_row")

    # Track number of images per label/filename
    if latex_img_filename not in num_img_file:
        num_img_file[latex_img_filename] = {}
    if img_label not in num_img_file[latex_img_filename]:
        num_img_file[latex_img_filename][img_label] = 0

    # Set column format if not provided
    num_data_cols = len(df.columns)
    if col_format is None:
        col_format = "| l | " + "X" * num_data_cols + ' |'
    # Escape underscores in caption and label
    caption = caption.replace('_', ' ')
    label = label.replace('_', '')

    # Build LaTeX table string
    latex_str = f"\\begin{{table}}[caption={{{caption}}}, label={label}]\n"
    latex_str += "\t\\centering\n"
    latex_str += f"\t\\begin{{tabularx}}{{\\textwidth}}{{{col_format}}}\n"
    latex_str += "\t\t \\hline\n"

    # Column headers
    if bold_header:
        header_row = ["\\bf " + str(c) for c in df.columns]
    else:
        header_row = [str(c) for c in df.columns]
    latex_str += "\t\t   & " + " & ".join(header_row) + " \\\\ \n"
    latex_str += "         \\hline\n"

    # Table rows
    for idx in df.index:
        row_name = f"\\bf {str(idx)}" if bold_header else str(idx)
        values = " & ".join([
            f"{val:.2f}" if isinstance(val, (int, float)) else str(val).replace('_', ' ')
            for val in df.loc[idx]
        ])
        latex_str += f"         {row_name} & {values}\\\\\n"

    latex_str += "         \\hline\n"
    latex_str += "\t\\end{tabularx}\n"
    latex_str += "\\end{table}\n"
    # Write LaTeX table to file
    with open(os.path.join(output_dir, filename), mode) as f:
        f.write(latex_str.replace('_', ''))
        if mode == 'a':
            f.write("\n\n") # Add newlines between tables when appending

    # Save table as image (SVG)
    image_filename = os.path.join(output_dir, f"{label.replace(':', '_')}.svg")
    rows, cols = df.shape
    row_height = 0.2  # Height per row (inches)
    col_width = 1.2   # Width per column (inches)
    fig_height = max(2, rows * row_height)
    fig_width = max(5, cols * col_width)

    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    ax.set_facecolor('#00000000')
    fig.patch.set_facecolor('#00000000')
    ax.axis('off')

    # Format cell text
    cell_text = [
        [f"{x:.2f}" if isinstance(x, (int, float)) else str(x).replace('_', ' ') for x in row]
        for row in df.values
    ]

    # Include row labels if requested
    if include_row_labels:
        table = ax.table(cellText=cell_text, colLabels=df.columns, rowLabels=df.index, loc='center')
        # Adjust column width to include row labels
        table.auto_set_column_width(list(range(-1, len(df.columns))))
    else:
        table = ax.table(cellText=cell_text, colLabels=df.columns, loc='center')
        # Adjust column width without row labels
        table.auto_set_column_width(list(range(len(df.columns))))

    table.auto_set_font_size(False)
    table.set_fontsize(10)

    # Make the table prettier
    for (i, j), cell in table.get_celld().items():
        cell.set_text_props(ha='center', va='center') # Center align text
        cell.set_edgecolor('black') # Add cell borders
        cell.set_linewidth(0.5) # Set border thickness
        cell.pad = 0.1 # Add padding to cells

        if i == 0: # Header row
            cell.set_text_props(weight='bold') # Bold header text
            cell.set_facecolor('#bdbdbd') # Darker gray background for header
        elif include_row_labels and i > 0 and j == -1: # First column (row labels)
            cell.set_text_props(weight='bold', ha='right') # Bold and right-align row labels
            cell.set_facecolor('#e0e0e0') # Light gray background for row labels
        elif i > 0 and i % 2 != 0: # Odd rows (excluding header and row labels if present)
            cell.set_facecolor('#cccccc') # Darker light gray background for alternating rows


    plt.tight_layout()
    plt.savefig(image_filename, bbox_inches='tight')
    plt.close()

    # Build LaTeX code for including the image
    img_latex_str = ''
    if len(num_img_file[latex_img_filename]) > 1 and num_img_file[latex_img_filename][img_label] == 0:
        img_latex_str += "\\end{figure}\n\n\n"

    if num_img_file[latex_img_filename][img_label] == 0:
        img_latex_str += f"\\begin{{figure}}[label={{{img_label}}}, caption={{{caption}}}]\n\t\\centering\n"

    if num_img_file[latex_img_filename][img_label] % num_figs_row != 0:
        img_latex_str += '\t\\hfill\n'

    if num_img_file[latex_img_filename][img_label] % num_figs_total == 0 and num_img_file[latex_img_filename][img_label] > 0:
        img_latex_str += "\\end{figure}\n\n\n"
        img_latex_str += f"\\begin{{figure}}[label={{{img_label}:{int(num_img_file[latex_img_filename][img_label] / num_figs_total)}}}, caption={{{caption} - {int(num_img_file[latex_img_filename][img_label] / num_figs_total)}}}]\n\t\\centering\n"

    img_latex_str += f"\t\\begin{{subfigure}}{{{((1 / num_figs_row) - 0.02):.2f}\\textwidth}}\n"

    if '.svg' in image_filename:
        img_latex_str += f"\t\t\\includesvg[width=\\textwidth]{{{os.path.join('figures/results/', image_filename.replace(output_dir, ''))}}}\n"
    else:
        img_latex_str += f"\t\t\\includegraphics[width=\\textwidth]{{{os.path.join('figures/results/', image_filename.replace(output_dir, ''))}}}\n"
    img_latex_str += f"\t\t\\caption{{{figure_caption}}}\n"
    img_latex_str += f"\t\t\\label{{fig:{label}:{num_img_file[latex_img_filename][img_label]}}}\n"
    img_latex_str += "\t\\end{subfigure}\n"

    # Write LaTeX image code to file
    with open(os.path.join(output_dir, latex_img_filename), mode) as f:
        f.write(img_latex_str)

    num_img_file[latex_img_filename][img_label] += 1

In [None]:
# ============== Two way =======================

# Define filenames for all and relevant results and figures
all_results_filename = 'twoway_cor_all.tex'
relevant_results_filename = 'twoway_cor_relevant.tex'
all_results_img_filename = f'Twoway_figures_all.tex'
relevant_results_img_filename = f'Twoway_figures_sig.tex'

# --- Two-Way Correlations ---
# Define LaTeX subsection title
subsection_latex = "\\subsection{Two-Way Correlations}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through all permutations of two categorical features
for combo in permutations(categorical_features, 2):
    f1, f2 = combo
    # Create a crosstabulation of the two features and normalize by index (row)
    crosstab_percentage = pd.crosstab(df[f1], df[f2], normalize='index') * 100
    # Reindex the crosstab for specific features if needed (e.g., emotion and race)
    if {f1, f2} == {"emotion", "race"}:
        desired_rows = ["Angry", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]
        desired_cols = ["Asian", "Black", "Indian", "Latino Hispanic", "Middle Eastern", "White"]
        crosstab_percentage = crosstab_percentage.reindex(index=desired_rows, columns=desired_cols)

    # Define column format for the LaTeX table
    col_format = "| l | " + "X" * len(crosstab_percentage.columns) + ' |'
    
    f1_wout = f1.title().replace('_', '')
    f2_wout = f2.title().replace('_', '')

    f1 = f1.title().replace('_', ' ')
    f2 = f2.title().replace('_', ' ')
    

    # Define LaTeX subsubsection title
    subsection_title = f"{f1.title()} and {f2.title()}"
    subsection_latex = f"\\subsubsection{{{subsection_title}}}\n\n"

    # Write the subsubsection title to the all results file
    with open(os.path.join(output_dir, all_results_filename), 'a') as f:
        f.write(subsection_latex)
    # Save the full crosstabulation table to the all results LaTeX file and generate an image
    save_latex_table(
        crosstab_percentage.round(2),
        caption=f"All Correlated Results",
        label=f"tab:corstat:{f1_wout}{f2_wout}",
        filename=all_results_filename,
        mode='a',
        col_format=col_format,
        img_label=f"fig:corstat",
        latex_img_filename=all_results_img_filename,
        figure_caption=f"All Correlated Results - {f1} and {f2}",
        all=True,
        num_figs_row = 3,
        num_figs_total = 9
    )


    # For relevant results, include all correlation tables as they are descriptive
    # Write the subsubsection title to the relevant results file
    with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
        f.write(subsection_latex)
    # Save the full crosstabulation table to the relevant results LaTeX file and generate an image
    save_latex_table(
        crosstab_percentage.round(2),
        caption=f"Significant Correlated Results",
        label=f"tab:corstatsig:{f1_wout}{f2_wout}",
        filename=relevant_results_filename,
        mode='a',
        col_format=col_format,
        img_label=f"fig:corstatsig",
        latex_img_filename=relevant_results_img_filename,
        figure_caption=f"Significant Correlated Results - {f1} and {f2}",
        num_figs_row = 2,
        num_figs_total = 6
    )

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# ============== Three way =======================

# Define filenames for all and relevant results and figures for three-way correlations
all_results_filename = 'threeway_cor_all.tex'
relevant_results_filename = 'threeway_cor_relevant.tex'
all_results_img_filename = f'threeway_fig_all.tex'
relevant_results_img_filename = f'threeway_fig_relevant.tex'

# Define LaTeX subsection title for three-way correlations
subsection_latex = "\\subsection{Three-Way Correlations}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)

with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through all permutations of three categorical features
for i, combo in enumerate(permutations(categorical_features, 3)):
    f1, f2, f3 = combo
    # Group by the first two features and calculate value counts for the third feature, then unstack and fill NaNs
    grouped_percentage = df.groupby([f1, f2])[f3].value_counts(normalize=True).unstack().fillna(0) * 100

    # Define column format for the LaTeX table
    col_format = "| l | " + "X" * len(grouped_percentage.columns) + ' |'
    
    
    f1_wout = f1.title().replace('_', '')
    f2_wout = f2.title().replace('_', '')
    f3_wout = f3.title().replace('_', '')

    f1 = f1.title().replace('_', ' ')
    f2 = f2.title().replace('_', ' ')
    f3 = f3.title().replace('_', ' ')

    # Define LaTeX subsubsection title
    subsection_title = f"{f1}, {f2}, {f3}"
    subsection_latex = f"\\subsubsection{{{subsection_title.replace('_', ' ')}}}\n\n"

    # Write the subsubsection title to the all results file
    with open(os.path.join(output_dir, all_results_filename), 'a') as f:
        f.write(subsection_latex)
    # Save the full three-way correlation table to the all results LaTeX file and generate an image
    save_latex_table(
        grouped_percentage.round(2),
        caption=f"All Three-way Correlation: {f1}, {f2}, {f3}",
        label=f"tab:corstat3:{f1_wout}{f2_wout}{f3_wout}",
        filename=all_results_filename,
        mode='a', # Always append after the first table
        col_format=col_format,
        img_label=f"fig:corstat3",
        latex_img_filename=all_results_img_filename,
        figure_caption=f"All Three-way Correlation: {f1}, {f2}, {f3}",
        all=True,
        num_figs_row = 3,
        num_figs_total = 9
    )

    # For relevant results, include all correlation tables as they are descriptive
    # Write the subsubsection title to the relevant results file
    with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
        f.write(subsection_latex)
    # Save the full three-way correlation table to the relevant results LaTeX file and generate an image
    save_latex_table(
        grouped_percentage.round(2),
        caption=f"Significant Three-way Correlation: {f1}, {f2}, {f3}",
        label=f"tab:corstat3sig:{f1_wout}{f2_wout}{f3_wout}",
        filename=relevant_results_filename,
        mode='a', # Always append after the first table
        col_format=col_format,
        img_label=f"fig:corstat3sig",
        latex_img_filename=relevant_results_img_filename,
        figure_caption=f"Significant All Three-way Correlation: {f1}, {f2}, {f3}",
        num_figs_row = 2,
        num_figs_total = 6
    )

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- T-tests ---
# Initialize dictionaries to store t-test results (all and significant)
t_results = {Const.centrality:{}, Const.mean_depth: {} , Const.face_center_y : {}}
significant_t_results = {Const.centrality:{}, Const.mean_depth: {}, Const.face_center_y : {}}

# Define filenames for all and relevant t-test results and figures
all_results_filename = 'ttest_all.tex'
relevant_results_filename = 'ttest_relevant.tex'
all_results_img_filename = f'ttest_figure_all.tex'
relevant_results_img_filename = f'ttest_figure_relevant.tex'

# Define LaTeX section title for t-tests
section_latex = "\\section{T-tests}\n\n"
# Write the section title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(section_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(section_latex)

# Write the subsection title to the image files (note: this should probably be a section title)
# Correcting to write section title to image files for consistency
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(section_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(section_latex)


# Iterate through each categorical feature
for cat_feature in categorical_features:
    # Get unique values in the categorical feature
    unique_values = df[cat_feature].unique()
    # Iterate through all combinations of two unique values
    for val1, val2 in combinations(unique_values, 2):
        # Iterate through each continuous feature
        for cont_feature in continuous_features:
            # Get the data for the two groups, dropping missing values
            g1 = df[df[cat_feature] == val1][cont_feature].dropna() # Added dropna() to handle missing values
            g2 = df[df[cat_feature] == val2][cont_feature].dropna() # Added dropna() to handle missing values
            # Perform independent t-test if both groups have more than one sample
            if len(g1) > 1 and len(g2) > 1:
                ttest_result = ttest_ind(g1, g2)
                # Store the results (category, groups, continuous feature, means, t-stat, p-value)
                result_row = [cat_feature, val1, val2, cont_feature, g1.mean(), g2.mean(), ttest_result.statistic, ttest_result.pvalue] # Added means to the result row
                # Append results to the all results dictionary
                if cat_feature not in t_results[cont_feature]:
                    t_results[cont_feature][cat_feature] = []
                t_results[cont_feature][cat_feature].append(result_row)
                # If the p-value is less than 0.05, store in the significant results dictionary
                if ttest_result.pvalue < 0.05: # Assuming significance level of 0.05
                     if cat_feature not in significant_t_results[cont_feature]:
                        significant_t_results[cont_feature][cat_feature] = []
                     significant_t_results[cont_feature][cat_feature].append(result_row)

# Iterate through the continuous features (keys in t_results)
for key in t_results:
    # Define LaTeX subsection title for the continuous feature
    subsection_latex = f"\\subsection{{{key.replace('_', ' ').title()}}}\n\n"

    # Write the subsection title to the all and relevant results files
    with open(os.path.join(output_dir, all_results_filename), 'a') as f:
        f.write(subsection_latex)

    with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
        f.write(subsection_latex)

    # Iterate through the categorical features (keys in the nested dictionary)
    for cat in categorical_features:
        # Create a DataFrame from the all t-test results for the current features
        t_df = pd.DataFrame(t_results[key][cat], columns=["Category", "Group1", "Group2", "Measure", "Mean1", "Mean2", "t-stat", "p-value"])
        
        key_wout = key.replace('_', '').title()
        cat_wout = cat.replace('_', '').title()
        
        key_w = key.replace('_', ' ').title()
        cat_w= cat.replace('_', ' ').title()
        
        # Define LaTeX subsubsection title for the categorical feature
        subsubsection_latex = f"\\subsubsection{{{cat}}}\n\n"

        # Write the subsubsection title to the all results file
        with open(os.path.join(output_dir, all_results_filename), 'a') as f:
            f.write(subsubsection_latex)
        # Save the all t-test results DataFrame to a LaTeX table and generate an image
        save_latex_table(
            t_df,
            caption=f"Full t-test Results",
            label=f"tab:ttests:{key_wout}:{cat_wout}",
            bold_header=True,
            filename=all_results_filename,
            mode='a',
            img_label=f"fig:ttests",
            latex_img_filename=all_results_img_filename,
            figure_caption=f"Full t-test Results - {key_w} for {cat_w}",
            all=True,
            num_figs_row = 3,
            num_figs_total = 9,
            include_row_labels=False)

        # Check if there are significant results for the current features
        if cat in significant_t_results[key]:

            # Write the subsubsection title to the relevant results file
            with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
                f.write(subsubsection_latex)
            # Create a DataFrame from the significant t-test results
            significant_t_df = pd.DataFrame(significant_t_results[key][cat], columns=["Category", "Group1", "Group2", "Measure", "Mean1", "Mean2", "t-stat", "p-value"]) # Updated columns
            # Save the significant t-test results DataFrame to a LaTeX table and generate an image
            save_latex_table(
                significant_t_df,
                caption=f"Significant T-test Results",
                label=f"tab:ttestssig:{key_wout}:{cat_wout}",
                bold_header=True,
                filename=relevant_results_filename,
                mode='a',
                img_label=f"fig:ttestssig",
                latex_img_filename=relevant_results_img_filename,
                figure_caption=f"Significant t-test Results - {key_w} for {cat_w}",
                num_figs_row = 2,
                num_figs_total = 6,
            include_row_labels=False)

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- One-Way ANOVA ---
# Initialize lists to store ANOVA results (all and significant)
anova_results = []
significant_anova_results = []

# Define filenames for all and relevant ANOVA results and figures
all_results_filename = 'anova_one_all.tex'
relevant_results_filename = 'anova_one_relevant.tex'

all_results_img_filename = f'anova_one_figure_all.tex'
relevant_results_img_filename = f'anova_one_figure_relevant.tex'

# Define LaTeX subsection title for One-Way ANOVA
subsection_latex = "\\subsection{One-Way ANOVA}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)

# Write the subsection title to the image files
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through each categorical feature
for cat_feature in categorical_features:
    # Iterate through each continuous feature
    for cont_feature in continuous_features:
        # Create groups based on unique values in the categorical feature, dropping missing values
        groups = [df[cont_feature][df[cat_feature] == val].dropna() for val in df[cat_feature].unique()]
        # Perform one-way ANOVA if there are more than one group and each group has more than one sample
        if all(len(g) > 1 for g in groups) and len(groups) > 1:
            f_statistic, p_value = f_oneway(*groups)
            # Store the results (category, continuous feature, F-stat, p-value)
            result_row = [cat_feature, cont_feature, f_statistic, p_value]
            # Append results to the all results list
            anova_results.append(result_row)
            # If the p-value is less than 0.05, append to the significant results list
            if p_value < 0.05: # Assuming significance level of 0.05
                significant_anova_results.append(result_row)

# Create a DataFrame from the all ANOVA results
anova_df = pd.DataFrame(anova_results, columns=["Category", "Measure", "F-stat", "p-value"])
# Save the all ANOVA results DataFrame to a LaTeX table and generate an image
save_latex_table(
    anova_df,
     caption="Full One-Way ANOVA Results",
     label="tab:anova1",
     filename=all_results_filename,
     mode='a',
    img_label=f"fig:anova1",
    latex_img_filename=all_results_img_filename,
    figure_caption="Full One-Way ANOVA Results",
    all=True,
    num_figs_row = 3,
    num_figs_total = 9,
    include_row_labels=False)

# Check if there are significant ANOVA results
if significant_anova_results:
    # Create a DataFrame from the significant ANOVA results
    significant_anova_df = pd.DataFrame(significant_anova_results, columns=["Category", "Measure", "F-stat", "p-value"])
    # Save the significant ANOVA results DataFrame to a LaTeX table and generate an image
    save_latex_table(
        significant_anova_df,
        caption="Significant One-Way ANOVA Results",
        label="tab:anova1_sig",
        filename=relevant_results_filename,
        mode='a',
        img_label=f"fig:anova1_sig",
        latex_img_filename=relevant_results_img_filename,
        figure_caption="Significant One-Way ANOVA Results",
        num_figs_row = 2,
        num_figs_total = 6,
        include_row_labels=False)

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- Two-Way ANOVA ---
# Define filenames for all and relevant two-way ANOVA results and figures
all_results_filename = 'anova_two_all.tex'
relevant_results_filename = 'anova_two_relevant.tex'

all_results_img_filename = f'anova_two_figure_all.tex'
relevant_results_img_filename = f'anova_two_figure_relevant.tex'

# Define LaTeX subsection title for Two-Way ANOVA
subsection_latex = "\\subsection{Two-Way ANOVA}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)

# Write the subsection title to the image files
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through all combinations of two categorical features
for combo in combinations(categorical_features, 2):
    f1, f2 = combo
    # Iterate through each continuous feature
    for cont_feature in continuous_features:
        # Define the OLS formula for the two-way ANOVA model
        formula = f"{cont_feature} ~ C({f1}) * C({f2})"
        # Ensure there are enough samples in each group combination for ANOVA
        if df.groupby([f1, f2]).size().min() > 1:
            # Fit the OLS model and perform ANOVA (Type 2)
            model = ols(formula, data=df).fit()
            anova_table = sm.stats.anova_lm(model, typ=2)
            # Reset index and rename the 'index' column to 'Source'
            anova_table = anova_table.reset_index().rename(columns={"index": "Source"})
            f1_wout = f1.replace('_', '').title()
            f2_wout = f2.replace('_', '').title()
            cont_feature_wout = cont_feature.replace('_', '').title()
            
            f1_w = f1.replace('_', ' ').title()
            f2_w = f2.replace('_', ' ').title()
            cont_feature_w = cont_feature.replace('_', ' ').title()
            
            

            # Define LaTeX subsubsection title
            subsubsection_latex = f"\\subsubsection{{{f1_w} and {f2_w} on {cont_feature_w}}}\n\n"
            # Write the subsubsection title to the all results file
            with open(os.path.join(output_dir, all_results_filename), 'a') as f:
                f.write(subsubsection_latex)
            # Save the full ANOVA table to the all results LaTeX file and generate an image
            save_latex_table(
                anova_table.round(4),
                caption=f"Full two-Way ANOVA",
                label=f"tab:anova2:{f1_wout}{f2_wout}{cont_feature_wout}",
                filename=all_results_filename,
                mode='a',
                img_label=f"fig:anova2",
                latex_img_filename=all_results_img_filename,
                figure_caption=f"Full two-Way ANOVA: {f1_w} and {f2_w} on {cont_feature_w}",
                all=True,
                num_figs_row = 3,
                num_figs_total = 9,
                include_row_labels=False
                )

            # Check for significant interaction term (p-value < 0.05)
            interaction_p_value = anova_table[anova_table['Source'] == f'C({f1}):C({f2})']['PR(>F)'].iloc[0]
            if interaction_p_value < 0.05:
                # If interaction is significant, write the subsubsection title to the relevant results file
                with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
                    f.write(subsubsection_latex)
                # Save the full ANOVA table to the relevant results LaTeX file and generate an image
                save_latex_table(
                    anova_table.round(4), # Save the full table if interaction is significant
                    caption=f"Two-Way ANOVA (Significant Interaction)",
                    label=f"tab:anova2sig:int{f1_wout}{f2_wout}{cont_feature_wout}",
                    filename=relevant_results_filename,
                    mode='a',
                    img_label=f"fig:anova2sig",
                    latex_img_filename=relevant_results_img_filename,
                    figure_caption=f"Two-Way ANOVA: {f1_w} and {f2_w} on {cont_feature_w} (Significant Interaction)",
                    num_figs_row = 2,
                    num_figs_total = 6,
                    include_row_labels=False
                    )

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- 1. Spatial bias ---

# Define filenames for all and relevant spatial bias results and figures
all_results_filename = 'spatial_all.tex'
relevant_results_filename = 'spatial_relevant.tex'

all_results_img_filename = f'spatial_figure_all.tex'
relevant_results_img_filename = f'spatial_figure_relevant.tex'

# Initialize lists to store spatial bias results (all and relevant)
centrality_results_all = []
centrality_results_relevant = []

# Define LaTeX subsection title for Centrality Bias
subsection_latex = "\\subsection*{Centrality Bias by Demographic}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'a') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
    f.write(subsection_latex)

# Write the subsection title to the image files
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through each continuous feature (centrality, mean_depth, face_center_y)
for cont in continuous_features:
    # Initialize lists to store results for the current continuous feature
    centrality_results_all = []
    centrality_results_relevant = []
    # Iterate through each categorical feature (demographic)
    for col in categorical_features:
        # Calculate the mean of the continuous feature for each group in the categorical feature
        group_means = df.groupby(col)[cont].mean().sort_values()

        # Perform ANOVA if there are more than 2 groups, otherwise perform a t-test
        groups = [g[cont].values for _, g in df.groupby(col)]
        if len(groups) > 2:
            # Perform one-way ANOVA
            f_stat, p_val = f_oneway(*groups)

            # Store ANOVA results
            result_row = [col, 'ANOVA', f_stat, p_val]
            centrality_results_all.append(result_row)
            # If the p-value is less than 0.05, store in relevant results
            if p_val < 0.05:
                centrality_results_relevant.append(result_row)

        elif len(groups) == 2:
            # Perform independent t-test for two groups
            t_stat, p_val = ttest_ind(groups[0], groups[1])

            # Store t-test results
            result_row = [col, 't-test', t_stat, p_val]
            centrality_results_all.append(result_row)
            # If the p-value is less than 0.05, store in relevant results
            if p_val < 0.05:
                centrality_results_relevant.append(result_row)

    cont_wout = cont.replace('_', '').title()
    cont_w = cont.replace('_', ' ').title()

    # Save spatial bias results to LaTeX tables and generate images
    if centrality_results_all:
        centrality_df_all = pd.DataFrame(centrality_results_all, columns=["Demographic", "Test", "Statistic", "p-value"])
        save_latex_table(
            centrality_df_all.round(4),
            caption=f"Bias Statistical Tests (All)",
            label=f"tab:biasall:{cont_wout}all",
            filename=all_results_filename,
            mode='a',
            img_label=f"fig:biasall",
            latex_img_filename=all_results_img_filename,
            figure_caption=f"{cont_w} Bias Statistical Tests (All)",
            all=True,
            num_figs_row = 3,
            num_figs_total = 9,
            include_row_labels=False)

    if centrality_results_relevant:
        centrality_df_relevant = pd.DataFrame(centrality_results_relevant, columns=["Demographic", "Test", "Statistic", "p-value"])
        save_latex_table(
            centrality_df_relevant.round(4),
            caption=f"Bias Statistical Tests (Relevant)",
            label=f"tab:biassig:{cont_wout}relevant",
            filename=relevant_results_filename,
            mode='a',
            img_label=f"fig:biassig",
            latex_img_filename=relevant_results_img_filename,
            figure_caption=f"{cont_w} Bias Statistical Tests (Relevant)",
            num_figs_row = 2,
            num_figs_total = 6,
            include_row_labels=False)

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- 2. Zone bias ---

# Define filenames for all and relevant zone bias results and figures
all_results_filename = 'chi_all.tex'
relevant_results_filename = 'chi_relevant.tex'

all_results_img_filename = f'chi_figure_all.tex'
relevant_results_img_filename = f'chi_figure_relevant.tex'

# Initialize lists to store zone bias results (all and relevant)
zone_results_all = []
zone_results_relevant = []

# Define LaTeX subsection title for Zone Bias (Chi-square)
subsection_latex = "\\subsection{Zone Bias (Chi-square)}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)

# Write the subsection title to the image files
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through each categorical feature (demographic)
for col in categorical_features:
    # Create a contingency table between the categorical feature and the grid zone
    contingency = pd.crosstab(df[col], df['grid_zone'])
    # Perform the chi-square test of independence
    chi2, p, dof, expected = chi2_contingency(contingency)
    # Store the results (demographic, chi2-stat, p-value, degrees of freedom)
    result_row = [col, chi2, p, dof]
    zone_results_all.append(result_row)
    # If the p-value is less than 0.05, store in relevant results
    if p < 0.05:
        zone_results_relevant.append(result_row)

# Save zone bias results to LaTeX tables and generate images
if zone_results_all:
    zone_df_all = pd.DataFrame(zone_results_all, columns=["Demographic", "Chi2-stat", "p-value", "DOF"])
    save_latex_table(
        zone_df_all.round(4),
        caption="Zone Bias Chi-square Tests (All)",
        label="tab:zonebiasall",
        filename=all_results_filename,
        mode='a',
        img_label=f"fig:zonebiasall",
        latex_img_filename=all_results_img_filename,
        figure_caption="Zone Bias Chi-square Tests (All)",
        all=True,
        num_figs_row = 3,
        num_figs_total = 9,
        include_row_labels=False)

if zone_results_relevant:
    zone_df_relevant = pd.DataFrame(zone_results_relevant, columns=["Demographic", "Chi2-stat", "p-value", "DOF"])
    save_latex_table(
        zone_df_relevant.round(4),
        caption="Zone Bias Chi-square Tests (Relevant)",
        label="tab:zonebiasrelevant",
        filename=relevant_results_filename,
        mode='a',
        img_label=f"fig:zonebiasrelevant",
        latex_img_filename=relevant_results_img_filename,
        figure_caption="Zone Bias Chi-square Tests (Relevant)",
        num_figs_row = 2,
        num_figs_total = 6,
        include_row_labels=False)

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- 3. Horizontal and vertical bias ---

# Define filenames for all and relevant horizontal/vertical bias results and figures
all_results_filename = 'horizontal_all.tex'
relevant_results_filename = 'horizontal_relevant.tex'

# Note: The image filename is the same for all and relevant results here. This might be a mistake
# or intentional to group all position bias figures together. Assuming intentional for now.
all_results_img_filename = f'horizontal_figure_all.tex'
relevant_results_img_filename = f'horizontal_figure_relevant.tex'

# Initialize lists to store position bias results (all and relevant)
pos_results_all = []
pos_results_relevant = []

# Define LaTeX subsection title for Horizontal / Vertical Position Bias
subsection_latex = "\\subsection{Horizontal / Vertical Position Bias}\n\n"
# Write the subsection title to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(subsection_latex)

# Write the subsection title to the image files
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(subsection_latex)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(subsection_latex)

# Iterate through both horizontal ('face_center_x') and vertical ('face_center_y') axes
for axis, axis_name in [('face_center_x','horizontal'), ('face_center_y','vertical')]:
    # Iterate through each categorical feature (demographic)
    for col in ['gender','race','emotion','age_range']:
        # Create groups based on unique values in the categorical feature for the current axis
        groups = [g[axis].values for _, g in df.groupby(col)]
        # Perform ANOVA if there are more than 2 groups, otherwise perform a t-test
        if len(groups) > 2:
            # Perform one-way ANOVA
            f_stat, p_val = f_oneway(*groups)
            # Store ANOVA results
            result_row = [axis_name, col, 'ANOVA', f_stat, p_val]
            pos_results_all.append(result_row)
            # If the p-value is less than 0.05, store in relevant results
            if p_val < 0.05:
                pos_results_relevant.append(result_row)
        elif len(groups) == 2:
            # Perform independent t-test for two groups
            t_stat, p_val = ttest_ind(groups[0], groups[1])
            # Store t-test results
            result_row = [axis_name, col, 't-test', t_stat, p_val]
            pos_results_all.append(result_row)
            # If the p-value is less than 0.05, store in relevant results
            if p_val < 0.05:
                pos_results_relevant.append(result_row)

# Save position bias results to LaTeX tables and generate images
if pos_results_all:
    pos_df_all = pd.DataFrame(pos_results_all, columns=["Axis", "Demographic", "Test", "Statistic", "p-value"])
    save_latex_table(
        pos_df_all.round(4),
        caption="Position Bias Statistical Tests (All)",
        label="tab:posbiasall",
        filename=all_results_filename,
        mode='a',
        img_label=f"fig:posbiasall",
        latex_img_filename=all_results_img_filename,
        figure_caption="Position Bias Statistical Tests (All)",
        all=True,
        num_figs_row = 3,
        num_figs_total = 9,
        include_row_labels=False)

if pos_results_relevant:
    pos_df_relevant = pd.DataFrame(pos_results_relevant, columns=["Axis", "Demographic", "Test", "Statistic", "p-value"])
    save_latex_table(
        pos_df_relevant.round(4),
        caption="Position Bias Statistical Tests (Relevant)",
        label="tab:posbiasrelevant",
        filename=relevant_results_filename,
        mode='a',
        img_label=f"fig:posbiasrelevant",
        latex_img_filename=relevant_results_img_filename,
        figure_caption="Position Bias Statistical Tests (Relevant)",
        num_figs_row = 3,
        num_figs_total = 9,
        include_row_labels=False)

# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')

In [None]:
# --- 4. Heatmaps for each demographic ---

def plot_heatmap(subset, title):
    """
    Generates and saves a heatmap for face center positions for a given subset of data.

    Args:
        subset (pd.DataFrame): The DataFrame containing the subset of data to plot.
        title (str): The title for the heatmap and the basis for the filename.
    """
    plt.figure(figsize=(8, 6))
    # Generate a 2D histogram of face center x and y coordinates
    heatmap_data, xedges, yedges = np.histogram2d(
        subset['face_center_x'],
        subset['face_center_y'],
        bins=20 # Use 20 bins for the heatmap grid
    )
    # Display the heatmap data
    plt.imshow(heatmap_data.T, origin='lower', cmap='hot',
               extent=[0, df['face_center_x'].max(),
                       0, df['face_center_y'].max()]) # Use actual max values for extent for proper scaling
    plt.title(title)
    plt.xlabel('X position')
    plt.ylabel('Y position')
    plt.colorbar(label='Count') # Add a colorbar to show counts
    # Save heatmap as an image file (PNG format)
    plt.savefig(os.path.join(output_dir, f"heatmap_{title.replace(' ', '_').replace('=', '_')}.png"))
    plt.close() # Close the plot to free up memory

# Plot heatmaps for each unique value within each categorical feature
for col in categorical_features:
    # Get all unique values for plotting all heatmaps
    unique_vals = df[col].unique()
    for val in unique_vals:
        # Filter the DataFrame for the current demographic group and drop rows with missing face center data
        subset = df[df[col] == val].dropna(subset=['face_center_x', 'face_center_y'])
        # Plot the heatmap if the subset is not empty
        if not subset.empty:
            plot_heatmap(subset, f"Heatmap for {col}={val}")

In [None]:
# --- Post hoc ---

# Define filenames for all and relevant post-hoc test results and figures
all_results_filename = 'post_all.tex'
relevant_results_filename = 'post_relevant.tex'

all_results_img_filename = f'post_figure_all.tex'
relevant_results_img_filename = f'post_figure_relevant.tex'

# Define LaTeX section titles for post-hoc test results
section_latex_all = "\\section{Post-hoc Test Results (All)}\n\n"
section_latex_relevant = "\\section{Post-hoc Test Results (Relevant)}\n\n"
# Write the section titles to the output files
with open(os.path.join(output_dir, all_results_filename), 'w') as f:
    f.write(section_latex_all)
with open(os.path.join(output_dir, relevant_results_filename), 'w') as f:
    f.write(section_latex_relevant)

# Write the subsection title to the image files (Note: This should likely be a section title)
# Correcting to write section title to image files for consistency
with open(os.path.join(output_dir, all_results_img_filename), 'w') as f:
    f.write(section_latex_all)
with open(os.path.join(output_dir, relevant_results_img_filename), 'w') as f:
    f.write(section_latex_relevant)

# Iterate through each continuous feature
for cont_feature in continuous_features:
    significant_interactions = []

    # Iterate through all combinations of two categorical features to find significant interactions
    for combo in combinations(categorical_features, 2):
        f1, f2 = combo
        # Define the OLS formula for the two-way ANOVA model
        formula = f"{cont_feature} ~ C({f1}) * C({f2})"

        # Ensure there are enough samples in each group combination for ANOVA
        if df.groupby([f1, f2]).size().min() > 1:
            try:
                # Fit the OLS model and perform ANOVA (Type 2)
                model = ols(formula, data=df).fit()
                anova_table = sm.stats.anova_lm(model, typ=2)

                # Locate the interaction term and get its p-value
                interaction_term = f'C({f1}):C({f2})'
                if interaction_term in anova_table.index:
                    p_value = anova_table.loc[interaction_term, 'PR(>F)']

                    # If the interaction is significant (p-value < 0.05), store the details
                    if p_value < 0.05:
                        significant_interactions.append((f1, f2, cont_feature, p_value))
            except Exception as e:
                # Print any exceptions that occur during ANOVA
                print(e)
    # If there are significant interactions for the current continuous feature, perform post-hoc tests
    if significant_interactions:
        for f1, f2, measure, p_val in significant_interactions:

            # Perform post-hoc tests (Tukey's HSD)
            # Combine features into a single categorical variable for Tukey's test
            combined_col_name = f'{f1}_{f2}'
            # Drop the combined column if it already exists to avoid errors in case of re-running the cell
            if combined_col_name in df.columns:
                df = df.drop(columns=[combined_col_name])
            df[combined_col_name] = df[f1].astype(str) + '_' + df[f2].astype(str)

            # Perform Tukey's HSD test
            try:
                # Ensure there is more than one unique combined group with data for Tukey's test
                if df[combined_col_name].nunique() > 1:
                    # Perform Tukey's HSD test on the continuous measure, grouped by the combined categorical feature
                    tukey_results = pairwise_tukeyhsd(endog=df[measure].dropna(), groups=df.loc[df[measure].dropna().index, combined_col_name], alpha=0.05)

                    # Convert Tukey results to DataFrame for LaTeX output
                    tukey_df_all = pd.DataFrame(data=tukey_results._results_table.data[1:], columns=tukey_results._results_table.data[0])
                    # Rename columns for clarity in the LaTeX table
                    tukey_df_all = tukey_df_all.rename(columns={'meandiff': 'Mean Diff', 'p-adj': 'p-adj (Tukey)', 'lower': 'Lower CI', 'upper': 'Upper CI'})
                    
                    f1_wout = f1.replace('_', '').title()
                    f2_wout = f2.replace('_', '').title()
                    measure_wout = measure.replace('_', '').title()
                    
                    f1_w = f1.replace('_', ' ').title()
                    f2_w = f2.replace('_', ' ').title()
                    measure_w = measure.replace('_', ' ').title()

                    # Save all Tukey results to the all results LaTeX file and generate an image
                    subsection_latex = f"\\subsection{{Tukey's HSD for {f1_w} and {f2_w} on {measure_w} (All)}}\n\n"
                    with open(os.path.join(output_dir, all_results_filename), 'a') as f:
                        f.write(subsection_latex)
                    save_latex_table(
                        tukey_df_all.round(4),
                        caption=f"Tukey's HSD Results (All)",
                        label=f"tab:tukey_all:{f1_wout}{f2_wout}{measure_wout}",
                        filename=all_results_filename,
                        mode='a',
                        img_label=f"fig:tukey_all",
                        latex_img_filename=all_results_img_filename,
                        figure_caption=f"Tukey's HSD Results (All): {f1_w} and {f2_w} on {measure_w}",
                        all=True,
                        num_figs_row = 3,
                        num_figs_total = 9,
                        include_row_labels=False
                    )

                    # Save significant and meaningfully different Tukey results to the relevant results LaTeX file
                    # Filter for both statistical significance (reject) and a meaningful difference in means
                    match measure:
                        case Const.centrality:
                            mean_diff_threshold = 20
                        case Const.mean_depth:
                            mean_diff_threshold = 0.06
                        case Const.face_center_y:
                            mean_diff_threshold = 80
                        case _:
                            mean_diff_threshold = 0 # Default threshold if measure is not explicitly handled

                    # Filter the Tukey results based on significance and mean difference threshold
                    tukey_df_relevant = tukey_df_all[(tukey_df_all['reject']) & (abs(tukey_df_all['Mean Diff']) >= mean_diff_threshold)].copy()

                    # If there are relevant significant differences, save them to LaTeX and generate an image
                    if not tukey_df_relevant.empty:
                        subsection_latex = f"\\subsection{{Tukey's HSD for {f1_w} and {f2_w} on {measure_w} (Relevant)}}\n\n"
                        with open(os.path.join(output_dir, relevant_results_filename), 'a') as f:
                            f.write(subsection_latex)
                        save_latex_table(
                            tukey_df_relevant.round(4),
                            caption=f"Tukey's HSD Results (Relevant)",
                            label=f"tab:tukey_relevant:{f1_wout}{f2_wout}{measure_wout}",
                            filename=relevant_results_filename,
                            mode='a',
                            img_label=f"fig:tukey_relevant",
                            latex_img_filename=relevant_results_img_filename,
                            figure_caption=f"Tukey's HSD Results (Relevant): {f1_w} and {f2_w} on {measure_w}",
                            num_figs_row = 2,
                            num_figs_total = 6,
                            include_row_labels=False
                        )
                else:
                    print('else') # This case is likely when all combined groups have no data
            except ValueError as e:
                print(e) # Print any ValueErrors that occur during Tukey's test
# End the figure environments in the image LaTeX files
with open(os.path.join(output_dir, all_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')
with open(os.path.join(output_dir, relevant_results_img_filename), 'a') as f:
    f.write('\\end{figure}\n\n')