#### CLAHE WITH AND WITHOUT

In [None]:
import pandas as pd
import re
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Load data
df_clahe = pd.read_csv("results_with_clahe.csv")
df_no_clahe = pd.read_csv("results_invert.csv")

# Function to extract metadata from image names
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Add metadata columns
for df in [df_clahe, df_no_clahe]:
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algorithm'] = df['Image'].apply(extract_algorithm)

# Merge CLAHE and No CLAHE on image name
df_merged = pd.merge(df_clahe, df_no_clahe, on="Image", suffixes=('_clahe', '_no_clahe'))

# Allowed color space/channel pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter only valid pairs
df_filtered = df_merged[
    df_merged.apply(
        lambda row: (row['ColorSpace_clahe'], row['Channel_clahe']) in allowed_pairs,
        axis=1
    )
].copy()

# Run statistical tests
results = []

for algorithm in df_filtered['Algorithm_clahe'].dropna().unique():
    subset = df_filtered[df_filtered['Algorithm_clahe'] == algorithm]

    for color_space, channel in allowed_pairs:
        group = subset[
            (subset['ColorSpace_clahe'] == color_space) &
            (subset['Channel_clahe'] == channel)
        ]

        if len(group) < 4:
            continue

        # Use Dice instead of IoU
        clahe_dice = group['Dice_clahe']
        no_clahe_dice = group['Dice_no_clahe']

        # Normality check
        p_clahe = shapiro(clahe_dice).pvalue
        p_no_clahe = shapiro(no_clahe_dice).pvalue
        normal_clahe = p_clahe > 0.05
        normal_no_clahe = p_no_clahe > 0.05

        if normal_clahe and normal_no_clahe:
            stat, p_val = ttest_rel(clahe_dice, no_clahe_dice)
            test_type = "paired t-test"
        else:
            stat, p_val = wilcoxon(clahe_dice, no_clahe_dice)
            test_type = "Wilcoxon signed-rank"

        results.append({
            "Algorithm": algorithm,
            "ColorSpace": color_space,
            "Channel": channel,
            "Test": test_type,
            "Statistic": stat,
            "p-value": p_val,
            "Shapiro_p_CLAHE": p_clahe,
            "Shapiro_p_No_CLAHE": p_no_clahe
        })

# Display results
print("Comparison of Dice: CLAHE vs No CLAHE")
for res in results:
    print(f"{res['Algorithm']} - {res['ColorSpace']}-{res['Channel']}: {res['Test']}, "
          f"stat={res['Statistic']:.3f}, p={res['p-value']:.4f}, "
          f"Shapiro CLAHE p={res['Shapiro_p_CLAHE']:.4f}, No CLAHE p={res['Shapiro_p_No_CLAHE']:.4f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

for color_space, channel in allowed_pairs:
    # Filter both CLAHE and No CLAHE for current pair
    clahe_subset = df_clahe[
        (df_clahe['ColorSpace'] == color_space) &
        (df_clahe['Channel'] == channel)
    ].copy()
    no_clahe_subset = df_no_clahe[
        (df_no_clahe['ColorSpace'] == color_space) &
        (df_no_clahe['Channel'] == channel)
    ].copy()

    # Assign condition labels
    clahe_subset['Condition'] = 'CLAHE'
    no_clahe_subset['Condition'] = 'No CLAHE'

    # Combine data
    combined = pd.concat([clahe_subset, no_clahe_subset])

    # Plot boxplot
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=combined, x='Algorithm', y='Dice', hue='Condition')
    plt.title(f'Dice Scores by Algorithm ({color_space}-{channel}) - CLAHE vs No CLAHE')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


#### TOP-HAT WITH AND WITHOUT

In [None]:
import pandas as pd
import re
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Load data
df_th = pd.read_csv("results_th.csv")
df_no_th = pd.read_csv("results_invert.csv")

# Function to extract metadata from image names
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Add metadata columns
for df in [df_th, df_no_th]:
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algorithm'] = df['Image'].apply(extract_algorithm)

# Merge th and No th on image name
df_merged = pd.merge(df_th, df_no_th, on="Image", suffixes=('_th', '_no_th'))

# Allowed color space/channel pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter only valid pairs
df_filtered = df_merged[
    df_merged.apply(
        lambda row: (row['ColorSpace_th'], row['Channel_th']) in allowed_pairs,
        axis=1
    )
].copy()

# Run statistical tests
results = []

for algorithm in df_filtered['Algorithm_th'].dropna().unique():
    subset = df_filtered[df_filtered['Algorithm_th'] == algorithm]

    for color_space, channel in allowed_pairs:
        group = subset[
            (subset['ColorSpace_th'] == color_space) &
            (subset['Channel_th'] == channel)
        ]

        if len(group) < 4:
            continue

        # Use Dice instead of IoU
        th_dice = group['Dice_th']
        no_th_dice = group['Dice_no_th']

        # Normality check
        p_th = shapiro(th_dice).pvalue
        p_no_th = shapiro(no_th_dice).pvalue
        normal_th = p_th > 0.05
        normal_no_th = p_no_th > 0.05

        if normal_th and normal_no_th:
            stat, p_val = ttest_rel(th_dice, no_th_dice)
            test_type = "paired t-test"
        else:
            stat, p_val = wilcoxon(th_dice, no_th_dice)
            test_type = "Wilcoxon signed-rank"

        results.append({
            "Algorithm": algorithm,
            "ColorSpace": color_space,
            "Channel": channel,
            "Test": test_type,
            "Statistic": stat,
            "p-value": p_val,
            "Shapiro_p_th": p_th,
            "Shapiro_p_No_th": p_no_th
        })

# Display results
print("Comparison of Dice: th vs No th")
for res in results:
    print(f"{res['Algorithm']} - {res['ColorSpace']}-{res['Channel']}: {res['Test']}, "
          f"stat={res['Statistic']:.3f}, p={res['p-value']:.4f}, "
          f"Shapiro th p={res['Shapiro_p_th']:.4f}, No th p={res['Shapiro_p_No_th']:.4f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

for color_space, channel in allowed_pairs:
    # Filter both th and No th for current pair
    th_subset = df_th[
        (df_th['ColorSpace'] == color_space) &
        (df_th['Channel'] == channel)
    ].copy()
    no_th_subset = df_no_th[
        (df_no_th['ColorSpace'] == color_space) &
        (df_no_th['Channel'] == channel)
    ].copy()

    # Assign condition labels
    th_subset['Condition'] = 'th'
    no_th_subset['Condition'] = 'No th'

    # Combine data
    combined = pd.concat([th_subset, no_th_subset])

    # Plot boxplot
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=combined, x='Algorithm', y='Dice', hue='Condition')
    plt.title(f'Dice Scores by Algorithm ({color_space}-{channel}) - th vs No th')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


#### ALL

In [None]:
import pandas as pd
import re
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Load data
df_all = pd.read_csv("results.csv")
df_no_all = pd.read_csv("results_invert.csv")

# Function to extract metadata from image names
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algoriallm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Add metadata columns
for df in [df_all, df_no_all]:
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algoriallm'] = df['Image'].apply(extract_algoriallm)

# Merge all and No all on image name
df_merged = pd.merge(df_all, df_no_all, on="Image", suffixes=('_all', '_no_all'))

# Allowed color space/channel pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter only valid pairs
df_filtered = df_merged[
    df_merged.apply(
        lambda row: (row['ColorSpace_all'], row['Channel_all']) in allowed_pairs,
        axis=1
    )
].copy()

# Run statistical tests
results = []

for algoriallm in df_filtered['Algoriallm_all'].dropna().unique():
    subset = df_filtered[df_filtered['Algoriallm_all'] == algoriallm]

    for color_space, channel in allowed_pairs:
        group = subset[
            (subset['ColorSpace_all'] == color_space) &
            (subset['Channel_all'] == channel)
        ]

        if len(group) < 4:
            continue

        # Use Dice instead of IoU
        all_dice = group['Dice_all']
        no_all_dice = group['Dice_no_all']

        # Normality check
        p_all = shapiro(all_dice).pvalue
        p_no_all = shapiro(no_all_dice).pvalue
        normal_all = p_all > 0.05
        normal_no_all = p_no_all > 0.05

        if normal_all and normal_no_all:
            stat, p_val = ttest_rel(all_dice, no_all_dice)
            test_type = "paired t-test"
        else:
            stat, p_val = wilcoxon(all_dice, no_all_dice)
            test_type = "Wilcoxon signed-rank"

        results.append({
            "Algoriallm": algoriallm,
            "ColorSpace": color_space,
            "Channel": channel,
            "Test": test_type,
            "Statistic": stat,
            "p-value": p_val,
            "Shapiro_p_all": p_all,
            "Shapiro_p_No_all": p_no_all
        })

# Display results
print("Comparison of Dice: all vs No all")
for res in results:
    print(f"{res['Algoriallm']} - {res['ColorSpace']}-{res['Channel']}: {res['Test']}, "
          f"stat={res['Statistic']:.3f}, p={res['p-value']:.4f}, "
          f"Shapiro all p={res['Shapiro_p_all']:.4f}, No all p={res['Shapiro_p_No_all']:.4f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

for color_space, channel in allowed_pairs:
    # Filter boall all and No all for current pair
    all_subset = df_all[
        (df_all['ColorSpace'] == color_space) &
        (df_all['Channel'] == channel)
    ].copy()
    no_all_subset = df_no_all[
        (df_no_all['ColorSpace'] == color_space) &
        (df_no_all['Channel'] == channel)
    ].copy()

    # Assign condition labels
    all_subset['Condition'] = 'all'
    no_all_subset['Condition'] = 'No all'

    # Combine data
    combined = pd.concat([all_subset, no_all_subset])

    # Plot boxplot
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=combined, x='Algoriallm', y='Dice', hue='Condition')
    plt.title(f'Dice Scores by Algoriallm ({color_space}-{channel}) - all vs No all')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


#### CLAHE vs ALL

In [None]:
import pandas as pd
import re
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Load data
df_all = pd.read_csv("results.csv")
df_no_all = pd.read_csv("results_with_clahe.csv")

# Function to extract metadata from image names
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algoriallm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Add metadata columns
for df in [df_all, df_no_all]:
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algoriallm'] = df['Image'].apply(extract_algoriallm)

# Merge all and No all on image name
df_merged = pd.merge(df_all, df_no_all, on="Image", suffixes=('_all', '_no_all'))

# Allowed color space/channel pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter only valid pairs
df_filtered = df_merged[
    df_merged.apply(
        lambda row: (row['ColorSpace_all'], row['Channel_all']) in allowed_pairs,
        axis=1
    )
].copy()

# Run statistical tests
results = []

for algoriallm in df_filtered['Algoriallm_all'].dropna().unique():
    subset = df_filtered[df_filtered['Algoriallm_all'] == algoriallm]

    for color_space, channel in allowed_pairs:
        group = subset[
            (subset['ColorSpace_all'] == color_space) &
            (subset['Channel_all'] == channel)
        ]

        if len(group) < 4:
            continue

        # Use Dice instead of IoU
        all_dice = group['Dice_all']
        no_all_dice = group['Dice_no_all']

        # Normality check
        p_all = shapiro(all_dice).pvalue
        p_no_all = shapiro(no_all_dice).pvalue
        normal_all = p_all > 0.05
        normal_no_all = p_no_all > 0.05

        if normal_all and normal_no_all:
            stat, p_val = ttest_rel(all_dice, no_all_dice)
            test_type = "paired t-test"
        else:
            stat, p_val = wilcoxon(all_dice, no_all_dice)
            test_type = "Wilcoxon signed-rank"

        results.append({
            "Algoriallm": algoriallm,
            "ColorSpace": color_space,
            "Channel": channel,
            "Test": test_type,
            "Statistic": stat,
            "p-value": p_val,
            "Shapiro_p_all": p_all,
            "Shapiro_p_No_all": p_no_all
        })

# Display results
print("Comparison of Dice: all vs No all")
for res in results:
    print(f"{res['Algoriallm']} - {res['ColorSpace']}-{res['Channel']}: {res['Test']}, "
          f"stat={res['Statistic']:.3f}, p={res['p-value']:.4f}, "
          f"Shapiro all p={res['Shapiro_p_all']:.4f}, No all p={res['Shapiro_p_No_all']:.4f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

for color_space, channel in allowed_pairs:
    # Filter boall all and No all for current pair
    all_subset = df_all[
        (df_all['ColorSpace'] == color_space) &
        (df_all['Channel'] == channel)
    ].copy()
    no_all_subset = df_no_all[
        (df_no_all['ColorSpace'] == color_space) &
        (df_no_all['Channel'] == channel)
    ].copy()

    # Assign condition labels
    all_subset['Condition'] = 'all'
    no_all_subset['Condition'] = 'No all'

    # Combine data
    combined = pd.concat([all_subset, no_all_subset])

    # Plot boxplot
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=combined, x='Algoriallm', y='Dice', hue='Condition')
    plt.title(f'Dice Scores by Algoriallm ({color_space}-{channel}) - all vs No all')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


#### total

In [None]:
import pandas as pd
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Map of method names to file paths
method_files = {
    "Invert": "results_invert.csv",                          # Method 1
    "CLAHE": "results_with_clahe.csv",                            # Method 2
    "TopHat": "results_th.csv",                          # Method 3
    "CLAHE+TopHat": "results.csv"               # Method 4
}

# Allowed color space/channel pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Helpers
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Load all method data
method_dfs = {}
for name, path in method_files.items():
    df = pd.read_csv(path)
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algorithm'] = df['Image'].apply(extract_algorithm)
    method_dfs[name] = df

# Compare base (Method 1) to all others
base_method = "Invert"
comparison_methods = [m for m in method_files if m != base_method]

for comp_method in comparison_methods:
    df_base = method_dfs[base_method]
    df_comp = method_dfs[comp_method]

    df_merged = pd.merge(df_base, df_comp, on="Image", suffixes=('_base', '_comp'))

    # Filter allowed color/channel
    df_filtered = df_merged[
        df_merged.apply(
            lambda row: (row['ColorSpace_base'], row['Channel_base']) in allowed_pairs,
            axis=1
        )
    ].copy()

    results = []

    for algorithm in df_filtered['Algorithm_base'].dropna().unique():
        subset = df_filtered[df_filtered['Algorithm_base'] == algorithm]

        for color_space, channel in allowed_pairs:
            group = subset[
                (subset['ColorSpace_base'] == color_space) &
                (subset['Channel_base'] == channel)
            ]

            if len(group) < 4:
                continue

            base_dice = group['Dice_base']
            comp_dice = group['Dice_comp']

            # Normality check
            p1 = shapiro(base_dice).pvalue
            p2 = shapiro(comp_dice).pvalue
            normal = p1 > 0.05 and p2 > 0.05

            # Test
            if normal:
                stat, p_val = ttest_rel(comp_dice, base_dice)
                test = "t-test"
            else:
                stat, p_val = wilcoxon(comp_dice, base_dice)
                test = "Wilcoxon"

            delta = comp_dice.mean() - base_dice.mean()
            significance = '*' if p_val < 0.05 else ''

            results.append({
                "Algorithm": algorithm,
                "ColorSpace": color_space,
                "ΔDice": delta,
                "p-value": p_val,
                "Significance": significance,
                "Label": f"{delta:.3f}{significance}"
            })

    # Convert to DataFrame
    df_res = pd.DataFrame(results)
    heatmap_data = df_res.pivot(index='Algorithm', columns='ColorSpace', values='ΔDice')
    annot_data = df_res.pivot(index='Algorithm', columns='ColorSpace', values='Label')

    # Plot
    plt.figure(figsize=(10, 6))
    sns.heatmap(
        heatmap_data,
        annot=annot_data,
        fmt='',
        cmap='coolwarm',
        center=0,
        linewidths=0.5,
        cbar_kws={'label': f'ΔDice ({comp_method} - {base_method})'}
    )
    plt.title(f"ΔDice Heatmap: {comp_method} vs {base_method}\n(* = p < 0.05)")
    plt.xlabel("Color Space")
    plt.ylabel("Thresholding Algorithm")
    plt.tight_layout()
    plt.show()


In [None]:
import pandas as pd
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Method name -> file mapping
method_files = {
    "Invert": "results_invert.csv",             # Method 1
    "CLAHE": "results_with_clahe.csv",               # Method 2
    "TopHat": "results_th.csv",             # Method 3
    "CLAHE+TopHat": "results.csv"  # Method 4
}

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# --- Helper functions ---
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# --- Load and process method data ---
method_dfs = {}
for name, path in method_files.items():
    df = pd.read_csv(path)
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algorithm'] = df['Image'].apply(extract_algorithm)
    df['Method'] = name
    method_dfs[name] = df

# --- HEATMAPS: ΔDice vs Invert ---
base_method = "Invert"
comparison_methods = [m for m in method_files if m != base_method]

for comp_method in comparison_methods:
    df_base = method_dfs[base_method]
    df_comp = method_dfs[comp_method]

    df_merged = pd.merge(df_base, df_comp, on="Image", suffixes=('_base', '_comp'))

    df_filtered = df_merged[
        df_merged.apply(
            lambda row: (row['ColorSpace_base'], row['Channel_base']) in allowed_pairs,
            axis=1
        )
    ]

    results = []

    for algorithm in df_filtered['Algorithm_base'].dropna().unique():
        subset = df_filtered[df_filtered['Algorithm_base'] == algorithm]

        for color_space, channel in allowed_pairs:
            group = subset[
                (subset['ColorSpace_base'] == color_space) &
                (subset['Channel_base'] == channel)
            ]

            if len(group) < 4:
                continue

            base_dice = group['Dice_base']
            comp_dice = group['Dice_comp']

            # Normality check
            p1 = shapiro(base_dice).pvalue
            p2 = shapiro(comp_dice).pvalue
            normal = p1 > 0.05 and p2 > 0.05

            if normal:
                stat, p_val = ttest_rel(comp_dice, base_dice)
            else:
                stat, p_val = wilcoxon(comp_dice, base_dice)

            delta = comp_dice.mean() - base_dice.mean()
            sig = '*' if p_val < 0.05 else ''

            results.append({
                "Algorithm": algorithm,
                "ColorSpace": color_space,
                "ΔDice": delta,
                "p-value": p_val,
                "Significance": sig,
                "Label": f"{delta:.3f}{sig}"
            })

    df_res = pd.DataFrame(results)
    #print(df_res)
    heatmap_data = df_res.pivot(index='Algorithm', columns='ColorSpace', values='ΔDice')
    annot_data = df_res.pivot(index='Algorithm', columns='ColorSpace', values='Label')

    # Plot heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(
        heatmap_data,
        annot=annot_data,
        fmt='',
        cmap='cubehelix',
        center=0,
        linewidths=0.5,
        cbar_kws={'label': f'ΔDice ({comp_method} - {base_method})'}
    )
    plt.title(f"ΔDice Heatmap: {comp_method} vs {base_method}\n(* = p < 0.05)")
    plt.xlabel("Color Space")
    plt.ylabel("Thresholding Algorithm")
    plt.tight_layout()
    plt.show()

# --- BOXPLOTS: All Methods ---
# Combine all methods into one DataFrame
df_all_methods = pd.concat(method_dfs.values(), ignore_index=True)

# Plot boxplots for each (ColorSpace, Channel)
for color_space, channel in allowed_pairs:
    subset = df_all_methods[
        (df_all_methods['ColorSpace'] == color_space) &
        (df_all_methods['Channel'] == channel)
    ]

    if subset.empty:
        continue

    plt.figure(figsize=(12, 6))
    sns.boxplot(
        data=subset,
        x='Algorithm',
        y='Dice',
        hue='Method',
        palette='Set2'
    )
    plt.title(f'Dice Scores by Algorithm ({color_space}-{channel}) - All Methods')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import shapiro, ttest_rel, wilcoxon

# Load method 1 and 4 data
df1 = pd.read_csv("results_invert.csv")          # Method 1
df4 = pd.read_csv("results.csv")    # Method 4

# Helper functions
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Enrich data
for df in [df1, df4]:
    df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
    df['Algorithm'] = df['Image'].apply(extract_algorithm)

# Merge on image name
df_merged = pd.merge(df1, df4, on="Image", suffixes=('_m1', '_m4'))

# Allowed color space/channel combinations
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter valid channel pairs
df_filtered = df_merged[
    df_merged.apply(lambda row: (row['ColorSpace_m1'], row['Channel_m1']) in allowed_pairs, axis=1)
]

# Statistical test results
results = []

for algo in df_filtered['Algorithm_m1'].dropna().unique():
    subset = df_filtered[df_filtered['Algorithm_m1'] == algo]

    for cs, ch in allowed_pairs:
        group = subset[
            (subset['ColorSpace_m1'] == cs) & (subset['Channel_m1'] == ch)
        ]

        if len(group) < 4:
            continue

        vals1 = group['Dice_m1']
        vals4 = group['Dice_m4']

        # Normality checks
        p1 = shapiro(vals1).pvalue
        p4 = shapiro(vals4).pvalue
        norm1 = p1 > 0.05
        norm4 = p4 > 0.05

        # Select test
        if norm1 and norm4:
            stat, p_val = ttest_rel(vals4, vals1)
            test = "Paired t-test"
        else:
            stat, p_val = wilcoxon(vals4, vals1)
            test = "Wilcoxon"

        sig = "✓" if p_val < 0.05 else "✗"

        results.append({
            "Algorithm": algo,
            "ColorSpace-Channel": f"{cs}-{ch}",
            "Test Type": test,
            "Statistic": round(stat, 3),
            "p-value": round(p_val, 4),
            "Sig.": sig,
            "Normal (C/N)": f"{'✓' if norm1 else '✗'} / {'✓' if norm4 else '✗'}"
        })

# Convert to DataFrame
df_results = pd.DataFrame(results)

# Sort if desired
df_results = df_results.sort_values(by=["Algorithm", "ColorSpace-Channel"])

# Display as table
print(df_results.to_string(index=False))


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import shapiro, ttest_ind, mannwhitneyu
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations

# Load data
df = pd.read_csv("results_th.csv")

# Extract color space and channel from image name
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

# Extract algorithm name
def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
df['Algorithm'] = df['Image'].apply(extract_algorithm)

allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter df to only allowed pairs
df_filtered = df[df.apply(lambda row: (row['ColorSpace'], row['Channel']) in allowed_pairs, axis=1)].copy()

# Prepare for pairwise channel comparisons
results = []

# We will compare Dice (or IoU) scores between every pair of allowed pairs within each algorithm
for algorithm in df_filtered['Algorithm'].dropna().unique():
    df_algo = df_filtered[df_filtered['Algorithm'] == algorithm]

    # All pairs of allowed color-channel combos for comparison
    for (cs1, ch1), (cs2, ch2) in combinations(allowed_pairs, 2):
        group1 = df_algo[(df_algo['ColorSpace'] == cs1) & (df_algo['Channel'] == ch1)]
        group2 = df_algo[(df_algo['ColorSpace'] == cs2) & (df_algo['Channel'] == ch2)]

        # Skip if not enough data
        if len(group1) < 4 or len(group2) < 4:
            print(f"{algorithm}: comparing {cs1}-{ch1} vs {cs2}-{ch2} -> not enough data")
            continue

        scores1 = group1['Dice'] if 'Dice' in df.columns else group1['IoU']
        scores2 = group2['Dice'] if 'Dice' in df.columns else group2['IoU']

        # Normality tests
        p1 = shapiro(scores1).pvalue
        p2 = shapiro(scores2).pvalue
        normal = (p1 > 0.05) and (p2 > 0.05)

        if normal:
            stat, p_val = ttest_ind(scores1, scores2, equal_var=False)
            test_type = "t-test"
        else:
            stat, p_val = mannwhitneyu(scores1, scores2, alternative='two-sided')
            test_type = "Mann-Whitney U"

        delta = scores1.mean() - scores2.mean()
        sig = '*' if p_val < 0.05 else ''

        results.append({
            'Algorithm': algorithm,
            'Comparison': f'{cs1}-{ch1} vs {cs2}-{ch2}',
            'ΔDice': delta,
            'p-value': p_val,
            'Significance': sig,
            'Test': test_type
        })

# Convert to DataFrame
res_df = pd.DataFrame(results)

# Create heatmaps per algorithm
for algorithm in res_df['Algorithm'].unique():
    df_algo = res_df[res_df['Algorithm'] == algorithm]

    # Pivot table for heatmap: rows and columns are channel pairs
    # Since we compare pairs, we'll split into a matrix showing ΔDice for each pair
    # For visualization, we will create a matrix with channels on both axes

    channels = [f"{cs}-{ch}" for cs, ch in allowed_pairs]
    heatmap_data = pd.DataFrame(np.nan, index=channels, columns=channels)
    annot_data = pd.DataFrame('', index=channels, columns=channels, dtype=str)

    for _, row in df_algo.iterrows():
        left, right = row['Comparison'].split(' vs ')
        heatmap_data.loc[left, right] = row['ΔDice']
        annot_data.loc[left, right] = f"{row['ΔDice']:.3f}{row['Significance']}"

        # Also fill the symmetric cell with negative delta
        heatmap_data.loc[right, left] = -row['ΔDice']
        annot_data.loc[right, left] = f"{-row['ΔDice']:.3f}{row['Significance']}"

    plt.figure(figsize=(10,8))
    sns.heatmap(heatmap_data, annot=annot_data, fmt='', cmap='coolwarm', center=0, linewidths=0.5,
                cbar_kws={'label': f'ΔDice Between Channels ({algorithm})'})
    plt.title(f'ΔDice Heatmap Between Channels\nAlgorithm: {algorithm}')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.show()

# Boxplots showing Dice distribution per algorithm, per channel
plt.figure(figsize=(14, 8))
sns.boxplot(data=df_filtered, x='Algorithm', y='Dice', hue='ColorSpace')
plt.title('Dice Coefficients by Algorithm and Color Space')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import re
from scipy.stats import shapiro, ttest_ind, mannwhitneyu
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import combinations

# Load your data
df = pd.read_csv("results_invert.csv")

# Extract ColorSpace and Channel from image name
def extract_color_channel(image_name):
    match = re.search(r'img\d+_(\w+)_(\w+)_', image_name)
    return match.groups() if match else (None, None)

# Extract algorithm name
def extract_algorithm(image_name):
    match = re.search(r'_(Default|Otsu|IsoData|Moments|Yen)\.png$', image_name)
    return match.group(1) if match else None

# Apply parsing
df[['ColorSpace', 'Channel']] = df['Image'].apply(lambda x: pd.Series(extract_color_channel(x)))
df['Algorithm'] = df['Image'].apply(extract_algorithm)

# Exclude Yen
df = df[df['Algorithm'] != 'Yen']

# Define allowed (ColorSpace, Channel) pairs
allowed_pairs = [
    ('Gray', 'Gray'),
    ('YUV', 'Y'),
    ('YCrCb', 'Y'),
    ('RGB', 'G'),
    ('CIELab', 'L')
]

# Filter to allowed pairs
df_filtered = df[df.apply(lambda row: (row['ColorSpace'], row['Channel']) in allowed_pairs, axis=1)].copy()

# Use Dice or IoU depending on what exists
score_column = 'Dice' if 'Dice' in df.columns else 'IoU'

# Statistical comparisons across all (ColorSpace, Channel) groups — pooled over all algorithms except Yen
results = []

for (cs1, ch1), (cs2, ch2) in combinations(allowed_pairs, 2):
    group1 = df_filtered[(df_filtered['ColorSpace'] == cs1) & (df_filtered['Channel'] == ch1)]
    group2 = df_filtered[(df_filtered['ColorSpace'] == cs2) & (df_filtered['Channel'] == ch2)]

    if len(group1) < 4 or len(group2) < 4:
        print(f"Skipping {cs1}-{ch1} vs {cs2}-{ch2}: not enough data.")
        continue

    scores1 = group1[score_column]
    scores2 = group2[score_column]

    # Test for normality
    p1 = shapiro(scores1).pvalue
    p2 = shapiro(scores2).pvalue
    normal = (p1 > 0.05) and (p2 > 0.05)

    if normal:
        stat, p_val = ttest_ind(scores1, scores2, equal_var=False)
        test_type = "t-test"
    else:
        stat, p_val = mannwhitneyu(scores1, scores2, alternative='two-sided')
        test_type = "Mann-Whitney U"

    delta = scores1.mean() - scores2.mean()
    sig = '*' if p_val < 0.05 else ''

    results.append({
        'Comparison': f'{cs1}-{ch1} vs {cs2}-{ch2}',
        'ΔDice': delta,
        'p-value': p_val,
        'Significance': sig,
        'Test': test_type
    })

# Format results into DataFrame
res_df = pd.DataFrame(results)

# Prepare heatmap
channels = [f"{cs}-{ch}" for cs, ch in allowed_pairs]
heatmap_data = pd.DataFrame(np.nan, index=channels, columns=channels)
annot_data = pd.DataFrame('', index=channels, columns=channels, dtype=str)

for _, row in res_df.iterrows():
    left, right = row['Comparison'].split(' vs ')
    delta = row['ΔDice']
    sig = row['Significance']

    heatmap_data.loc[left, right] = delta
    heatmap_data.loc[right, left] = -delta

    annot_data.loc[left, right] = f"{delta:.3f}{sig}"
    annot_data.loc[right, left] = f"{-delta:.3f}{sig}"

# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data, annot=annot_data, fmt='', cmap='coolwarm', center=0, linewidths=0.5,
            cbar_kws={'label': 'ΔDice Between Channels (All Except Yen)'})
plt.title('ΔDice Heatmap Between Channels\n(All Algorithms Except Yen)')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Optional: Boxplot by channel group (across all algorithms except Yen)
df_filtered['ChannelGroup'] = df_filtered['ColorSpace'] + '-' + df_filtered['Channel']
plt.figure(figsize=(12, 8))
sns.boxplot(data=df_filtered, x='ChannelGroup', y=score_column)
plt.title(f'{score_column} Distribution per Channel (All Except Yen)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
