In [None]:
from pathlib import Path

from prepare_data import get_dataframe

crud = get_dataframe(
    Path("../../Static Analysis/impl-crud/crud_martin.csv"),
    application="CRUD"
)

cqrs = get_dataframe(
    Path("../../Static Analysis/impl-es-cqrs/es-cqrs-martin.csv"),
    application="ES-CQRS"
)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

from colors import APP_COLORS

dataset = pd.concat([crud, cqrs], ignore_index=True)

metrics = {
    "Ca": "$C_a$ (Afferent Couplings) per package",
    "Ce": "$C_e$ (Efferent Couplings) per package",
    "I": "Instability per package",
    "D": "Distance from the main sequence",
    "A": "Abstractness"
}


In [None]:
sns.set_theme(style="whitegrid")

for name, title in metrics.items():
    plt.figure(figsize=(5, 7))
    # Plotting Application on X and Dcy* on Y
    plot = sns.boxplot(data=dataset, x='Application', y=name, hue="Application", palette=APP_COLORS)

    # 4. Add labels and title
    plt.title(f'Comparison of ${name}$ by Application', fontsize=14)
    plt.xlabel('Application Name', fontsize=12)
    plt.ylabel(title, fontsize=12)

    plt.show()

In [None]:
dataset['Ca_norm'] = dataset['Ca'] / dataset['C']
dataset['Ce_norm'] = dataset['Ce'] / dataset['C']

metrics_norm = {
    "Ca_norm": "Normalized Afferent Coupling ($C_a / Class Count$)",
    "Ce_norm": "Normalized Efferent Coupling ($C_e / Class Count$)",
}

sns.set_theme(style="whitegrid")

for name, title in metrics_norm.items():
    plt.figure(figsize=(6, 7))

    plot = sns.boxplot(
        data=dataset,
        x='Application',
        y=name,
        hue="Application",
        palette=APP_COLORS
    )

    metric_name = "$C_a$" if name == "Ca_norm" else "$C_e$"

    plt.title(f'Comparison of normalized {metric_name} by Application', fontsize=14)
    plt.xlabel('Application Name', fontsize=12)
    plt.ylabel(title, fontsize=12)

    plt.show()

In [None]:
table_metrics = metrics


def count_outliers(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    return ((series < (Q1 - 1.5 * IQR)) | (series > (Q3 + 1.5 * IQR))).sum()


for metric in table_metrics.keys():
    # Get stats and add outliers
    stats = dataset.groupby('Application')[metric].describe()[['min', '25%', '50%', '75%', 'max']]
    stats['Outliers'] = dataset.groupby('Application')[metric].apply(count_outliers)

    stats = stats.reset_index()

    stats.columns = [f"\\textbf{{{c}}}" for c in ['Application', 'Min', 'P25', 'Median', 'P75', 'Max', 'Outliers']]

    print(f"\n% --- LaTeX Table for {metric} ---")
    print(stats.to_latex(
        index=False,  # Hide the 0, 1, 2... index numbers
        caption=f"Descriptive Statistics for {table_metrics[metric]}",
        label=f"tab:{metric.replace('*', 'star').lower()}",
        escape=False,
        column_format='lrrrrrr',
        float_format="%.1f"
    ))