# Violin Plot of Two Metrics accross Configurations

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
import numpy as np
import pandas as pd
from typing import Union

def plot_metric_comparison(
    results_df: Union[pd.DataFrame, str],
    configurations: dict,
    metric_pair: tuple[str, str],
    sample_size: int,
    output_path: str,
    y_limit: tuple[float, float] = (0.85, 1.0),
    max_iterations: int = 100,
    figure_height: int = 6,
    figure_aspect: int = 2
) -> None:
    """
    Creates a violin plot comparing two metrics across configurations with scatter points for individual results.
    
    Parameters:
    -----------
    results_df : Union[pd.DataFrame, str]
        Either a DataFrame containing the results or a path to a CSV file
    configurations : dict
        Dictionary of model configurations
    metric_pair : tuple[str, str]
        Pair of metrics to compare (e.g., ("OA", "OA_tuned") or ("F1", "balanced_F1"))
    sample_size : int
        Sample size to filter results for
    output_path : str
        Path where to save the figure
    y_limit : tuple[float, float], optional
        Y-axis limits (min, max), defaults to (0.85, 1.0)
    max_iterations : int, optional
        Maximum number of iterations to include, defaults to 100
    figure_height : int, optional
        Height of the figure in inches, defaults to 6
    figure_aspect : int, optional
        Aspect ratio of the figure, defaults to 2
    """
    # Handle input as either DataFrame or file path
    if isinstance(results_df, str):
        results_df = pd.read_csv(results_df)

    # Filter results
    filtered_results = results_df[
        (results_df["sample_size"] == sample_size) & 
        (results_df["iteration"] <= max_iterations)
    ]

    # Melt the DataFrame for Seaborn compatibility
    melted_df = filtered_results.melt(
        id_vars=["configuration", "site_left_out"],
        value_vars=list(metric_pair),
        var_name="Metric",
        value_name="Value"
    )

    # Set Seaborn style
    sns.set_theme(style="whitegrid", font="Arial", font_scale=1.2)

    # Create the Seaborn Catplot
    catplot = sns.catplot(
        data=melted_df,
        x="configuration",
        y="Value",
        hue="Metric",
        kind="violin",
        split=True,
        legend=None,
        inner=None,
        palette=["lightgray", "darkgray"],
        height=figure_height,
        aspect=figure_aspect
    )

    # Reduce alpha of violin plots
    for ax in catplot.axes.flat:
        for collection in ax.collections:
            if isinstance(collection, PolyCollection):
                collection.set_alpha(0.6)

    # Define a color palette for AOIs
    aoi_palette = sns.color_palette("colorblind", len(filtered_results["site_left_out"].unique()))
    site_colors = {site: aoi_palette[i] for i, site in enumerate(filtered_results["site_left_out"].unique())}

    # Overlay scatter points
    x_positions = np.arange(len(configurations))
    jitter_factor = 0.1

    for ax in catplot.axes.flat:
        for i, config in enumerate(configurations):
            config_results = filtered_results[filtered_results["configuration"] == config]

            # Scatter points for both metrics
            for _, row in config_results.iterrows():
                for metric, offset, marker in [(metric_pair[0], -0.1, 'o'), (metric_pair[1], 0.1, '^')]:
                    x_jittered = x_positions[i] + offset + np.random.uniform(-jitter_factor, jitter_factor)
                    ax.scatter(
                        x_jittered, row[metric],
                        color=site_colors[row["site_left_out"]],
                        alpha=0.8,
                        marker=marker,
                        linewidths=2,
                        s=50,
                        label=row['site_left_out'] if i == 0 and metric == metric_pair[0] else None,
                        zorder=3
                    )

    # Adjust y-axis limits using the new parameter
    catplot.set(ylim=y_limit)

    # Customize the plot
    metric_name = metric_pair[0].replace("_", " ").title()
    catplot.set_axis_labels("Configuration", f"{metric_name}")
    catplot.set_xticklabels(rotation=45, horizontalalignment="right")

    # Add legend for metrics
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='gray', 
                  label=metric_pair[0], markersize=10),
        plt.Line2D([0], [0], marker='^', color='w', markerfacecolor='gray', 
                  label=metric_pair[1], markersize=10)
    ]
    catplot.ax.legend(handles=legend_elements, title="Metrics", 
                     loc='upper right', bbox_to_anchor=(1.15, 1))

    # Save the figure
    plt.savefig(output_path, format="svg", bbox_inches="tight", dpi=300)
    plt.show()

# Boxplot of Single Metric accross Sample Sizes and Configurations

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from typing import Optional

def single_metric_accross_sample_sizes_boxplot(
    results_df: pd.DataFrame,
    metric: str,
    output_path: str,
    y_limit: tuple[float, float] = (0.85, 1.0),
    figure_height: int = 6,
    figure_aspect: int = 3,
    show_sample_sizes: Optional[list[int]] = None
) -> None:
    """
    Creates a boxplot comparing a specific metric across configurations and sample sizes.

    Parameters:
    -----------
    results_df : pd.DataFrame
        DataFrame containing the results
    metric : str
        Metric to plot (e.g., "F1", "OA", "OA_tuned", "balanced_F1")
    output_path : str
        Path where to save the figure
    y_limit : tuple[float, float], optional
        Y-axis limits (min, max), defaults to (0.85, 1.0)
    figure_height : int, optional
        Height of the figure in inches, defaults to 6
    figure_aspect : int, optional
        Aspect ratio of the figure, defaults to 3
    show_sample_sizes : list[int], optional
        List of sample sizes to show. If None, shows all sample sizes.
        Example: [1000] will only show sample size 1000 and make others transparent
    """
    # Set Seaborn style
    sns.set_theme(style="whitegrid", font="Arial", font_scale=1)
    sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 2.5})

    # Filter to include only the specified metric
    filtered_results = results_df[["configuration", "sample_size", metric]]
    plt.figure(figsize=(12, 4))

    # Create color palette based on show_sample_sizes parameter
    if show_sample_sizes is not None:
        unique_sizes = sorted(filtered_results["sample_size"].unique())
        custom_palette = {
            size: "#9f9f9f" if size in show_sample_sizes else "#FFFFFF00"
            for size in unique_sizes
        }
    else:
        custom_palette = sns.color_palette("colorblind", 
                                         n_colors=len(filtered_results["sample_size"].unique()))

    # Create the Seaborn Catplot
    catplot = sns.catplot(
        data=filtered_results,
        x="configuration",
        y=metric,
        hue="sample_size",
        kind="box",
        palette=custom_palette,
        height=figure_height,
        aspect=figure_aspect,
        legend=True if show_sample_sizes is None else False,
        linewidth=1.5
    )

    # Adjust y-axis limits
    catplot.set(ylim=y_limit)

    # Customize the plot
    metric_label = metric.replace("_", " ").title()
    catplot.set_axis_labels("", f"{metric_label}")
    catplot.set_xticklabels(rotation=45, horizontalalignment="right")

    # Save the figure
    plt.savefig(output_path, format="svg", bbox_inches="tight")
    print(f"Plot saved to {output_path}")

    plt.show()

# Example usage:
"""
# Plot F1 scores showing only sample size 1000
single_metric_accross_sample_sizes_boxplot(
    results_df=pd.read_csv("results/data/evaluation_results.csv"),
    metric="F1",
    output_path="results/figures/configurations/f1_boxplot.svg",
    show_sample_sizes=[1000]
)

# Plot OA scores showing all sample sizes
single_metric_accross_sample_sizes_boxplot(
    results_df=pd.read_csv("results/data/evaluation_results.csv"),
    metric="OA",
    output_path="results/figures/configurations/oa_boxplot.svg",
    y_limit=(0.7, 1.0)
)

# Plot tuned OA scores
single_metric_accross_sample_sizes_boxplot(
    results_df=pd.read_csv("results/data/evaluation_results.csv"),
    metric="OA_tuned",
    output_path="results/figures/configurations/oa_tuned_boxplot.svg",
    show_sample_sizes=[1000]
)
"""

# Boxplot of Multiple Metrics for one Sample Size accross Configurations

In [None]:
def plot_metrics_comparison_boxplot(
    results_df: pd.DataFrame,
    metrics: list[str],
    sample_size: int,
    output_path: str,
    y_limit: tuple[float, float] = (0.85, 1.0),
    figure_height: int = 6,
    figure_aspect: int = 3
) -> None:
    """
    Creates a boxplot comparing multiple metrics across configurations for a specific sample size.

    Parameters:
    -----------
    results_df : pd.DataFrame
        DataFrame containing the results
    metrics : list[str]
        List of metrics to plot (e.g., ["F1", "OA", "OA_tuned", "balanced_F1"])
    sample_size : int
        Sample size to filter results for
    output_path : str
        Path where to save the figure
    y_limit : tuple[float, float], optional
        Y-axis limits (min, max), defaults to (0.85, 1.0)
    figure_height : int, optional
        Height of the figure in inches, defaults to 6
    figure_aspect : int, optional
        Aspect ratio of the figure, defaults to 3
    """
    # Set Seaborn style
    sns.set_theme(style="whitegrid", font="Arial", font_scale=1)
    sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 2.5})

    # Filter for the specified sample size and metrics
    filtered_results = results_df[
        results_df["sample_size"] == sample_size
    ][["configuration"] + metrics]

    # Melt the DataFrame to get it into the right format for seaborn
    melted_results = filtered_results.melt(
        id_vars=["configuration"],
        value_vars=metrics,
        var_name="Metric",
        value_name="Score"
    )

    # Create the Seaborn Catplot
    catplot = sns.catplot(
        data=melted_results,
        x="configuration",
        y="Score",
        hue="Metric",
        kind="box",
        palette="colorblind",
        height=figure_height,
        aspect=figure_aspect,
        legend_out=True,
        linewidth=1.5
    )

    # Adjust y-axis limits
    catplot.set(ylim=y_limit)

    # Customize the plot
    catplot.set_axis_labels("", "Score")
    catplot.set_xticklabels(rotation=45, horizontalalignment="right")
    
    # Customize legend
    plt.legend(title="Metrics", bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Add title showing sample size
    plt.suptitle(f"Metric Comparison (Sample Size: {sample_size})", y=1.02)

    # Save the figure
    plt.savefig(output_path, format="svg", bbox_inches="tight")
    print(f"Plot saved to {output_path}")

    plt.show()

# Example usage:
"""
# Compare original and tuned metrics for sample size 1000
plot_metrics_comparison_boxplot(
    results_df=pd.read_csv("results/data/evaluation_results.csv"),
    metrics=["OA", "OA_tuned", "F1", "F1_tuned"],
    sample_size=1000,
    output_path="results/figures/configurations/metrics_comparison_1000.svg",
    y_limit=(0.7, 1.0)
)

# Compare balanced metrics for sample size 100
plot_metrics_comparison_boxplot(
    results_df=pd.read_csv("results/data/evaluation_results.csv"),
    metrics=["balanced_OA", "balanced_F1"],
    sample_size=100,
    output_path="results/figures/configurations/balanced_metrics_comparison_100.svg"
)
"""