In [None]:
#Summary stats of features
import pandas as pd
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests

file_path = r"\PATH.xlsx"
df = pd.read_excel(file_path)

columns_of_interest = [
    "Feature",
    "PFPE intrasession CCC",
    "PFPE intersession CCC",
    "PFCE intrasession CCC",
    "PFCE intersession CCC", 
    "In vivo intrasession CCC"
]
df_filtered = df[columns_of_interest]

shape_keywords = ['shape']
intensity_keywords = ['firstorder']
texture_keywords = [kw for kw in df_filtered["Feature"] if kw not in shape_keywords + intensity_keywords]

def calculate_median_iqr(df_column):
    median = df_column.median()
    q25 = df_column.quantile(0.25)
    q75 = df_column.quantile(0.75)
    return median, q25, q75

shape_features = df_filtered[df_filtered['Feature'].str.contains('|'.join(shape_keywords), case=False)]
intensity_features = df_filtered[df_filtered['Feature'].str.contains('|'.join(intensity_keywords), case=False)]
texture_features = df_filtered[~df_filtered['Feature'].str.contains('|'.join(shape_keywords + intensity_keywords), case=False)]

summary_stats = {}
for col in columns_of_interest[1:]:
    overall_median, overall_q25, overall_q75 = calculate_median_iqr(df_filtered[col])
    shape_median, shape_q25, shape_q75 = calculate_median_iqr(shape_features[col])
    intensity_median, intensity_q25, intensity_q75 = calculate_median_iqr(intensity_features[col])
    texture_median, texture_q25, texture_q75 = calculate_median_iqr(texture_features[col])

    summary_stats[col] = {
        "Overall CCC Median (IQR)": f"{overall_median:.3f} ({overall_q25:.3f} - {overall_q75:.3f})",
        "Shape CCC Median (IQR)": f"{shape_median:.3f} ({shape_q25:.3f} - {shape_q75:.3f})",
        "Intensity CCC Median (IQR)": f"{intensity_median:.3f} ({intensity_q25:.3f} - {intensity_q75:.3f})",
        "Texture CCC Median (IQR)": f"{texture_median:.3f} ({texture_q25:.3f} - {texture_q75:.3f})"
    }

summary_stats_df = pd.DataFrame(summary_stats).T
summary_stats_df#.to_excel('general_ccc_stats.xlsx')

In [None]:
#Statistical testing of the distribution of CCCs across features
from scipy.stats import kruskal

test_results = []
for col in columns_of_interest[1:]:
    overall = df_filtered[col].dropna()
    shape = shape_features[col].dropna()
    intensity = intensity_features[col].dropna()
    texture = texture_features[col].dropna()

    # Kruskal-Wallis
    if len(overall) > 0 and len(shape) > 0:
        _, p_shape = kruskal(overall, shape)
        test_results.append({'Comparison': f'{col}: Overall vs Shape', 'P-value': p_shape})
    if len(overall) > 0 and len(intensity) > 0:
        _, p_intensity = kruskal(overall, intensity)
        test_results.append({'Comparison': f'{col}: Overall vs Intensity', 'P-value': p_intensity})
    if len(overall) > 0 and len(texture) > 0:
        _, p_texture = kruskal(overall, texture)
        test_results.append({'Comparison': f'{col}: Overall vs Texture', 'P-value': p_texture})

# Correct for multiple testing 
p_values = [result['P-value'] for result in test_results]
corrections = multipletests(p_values, method='fdr_bh')
for i, result in enumerate(test_results):
    result['Adjusted P-value'] = corrections[1][i]

test_results_df = pd.DataFrame(test_results)

test_results_df#.to_excel('comparison_overall_stratified.xlsx')

In [None]:
# Testing for normality of CCC values
from scipy.stats import shapiro

normality_results = []

for col in columns_of_interest[1:]:
    # Full dataset
    stat_all, p_all = shapiro(df_filtered[col].dropna())
    normality_results.append({
        'CCC Type': col,
        'Group': 'All',
        'Shapiro-W': stat_all,
        'P-value': p_all
    })

    # Shape features
    stat_shape, p_shape = shapiro(shape_features[col].dropna())
    normality_results.append({
        'CCC Type': col,
        'Group': 'Shape',
        'Shapiro-W': stat_shape,
        'P-value': p_shape
    })

    # Intensity features
    stat_intensity, p_intensity = shapiro(intensity_features[col].dropna())
    normality_results.append({
        'CCC Type': col,
        'Group': 'Intensity',
        'Shapiro-W': stat_intensity,
        'P-value': p_intensity
    })

    # Texture features
    stat_texture, p_texture = shapiro(texture_features[col].dropna())
    normality_results.append({
        'CCC Type': col,
        'Group': 'Texture',
        'Shapiro-W': stat_texture,
        'P-value': p_texture
    })

normality_df = pd.DataFrame(normality_results)
normality_df

# non_normal_distributions = normality_df[normality_df['P-value'] < 0.05]

normality_df#.to_excel('normality_testing.xlsx')

In [None]:
comparison_results = []
for prefix, intra_col, inter_col in [
    ("PFPE", "PFPE intrasession CCC", "PFPE intersession CCC"),
    ("PFCE", "PFCE intrasession CCC", "PFCE intersession CCC")
]:
    # Overall comparison
    intra_overall = df_filtered[intra_col].dropna()
    inter_overall = df_filtered[inter_col].dropna()
    if len(intra_overall) > 0 and len(inter_overall) > 0:
        _, p_overall = kruskal(intra_overall, inter_overall)
        comparison_results.append({
            "Comparison": f"{prefix} Overall Intrasession vs Intersession",
            "P-value": p_overall
        })
    
    # Stratified comparisons
    for category, features in zip(
        ["Shape", "Intensity", "Texture"],
        [shape_features, intensity_features, texture_features]
    ):
        intra_category = features[intra_col].dropna()
        inter_category = features[inter_col].dropna()
        if len(intra_category) > 0 and len(inter_category) > 0:
            _, p_category = kruskal(intra_category, inter_category)
            comparison_results.append({
                "Comparison": f"{prefix} {category} Intrasession vs Intersession",
                "P-value": p_category
            })

# Correct for multiple testing
p_values = [result['P-value'] for result in comparison_results]
corrections = multipletests(p_values, method='fdr_bh')
for i, result in enumerate(comparison_results):
    result['Adjusted P-value'] = corrections[1][i]

comparison_results_df = pd.DataFrame(comparison_results)

comparison_results_df#.to_excel('comparison_intra_vs_interssion.xlsx')

In [None]:
# Perform comparisons of PFPE vs PFCE for intrasession and intersession CCC
pfpe_vs_pfce_results = []
for session_type, pfpe_col, pfce_col in [
    ("Intrasession", "PFPE intrasession CCC", "PFCE intrasession CCC"),
    ("Intersession", "PFPE intersession CCC", "PFCE intersession CCC"),
]:
    # Overall comparison
    pfpe_overall = df_filtered[pfpe_col].dropna()
    pfce_overall = df_filtered[pfce_col].dropna()
    if len(pfpe_overall) > 0 and len(pfce_overall) > 0:
        _, p_overall = kruskal(pfpe_overall, pfce_overall)
        pfpe_vs_pfce_results.append({
            "Comparison": f"Overall {session_type}: PFPE vs PFCE",
            "P-value": p_overall
        })

    # Stratified comparisons
    for category, features in zip(
        ["Shape", "Intensity", "Texture"],
        [shape_features, intensity_features, texture_features]
    ):
        pfpe_category = features[pfpe_col].dropna()
        pfce_category = features[pfce_col].dropna()
        if len(pfpe_category) > 0 and len(pfce_category) > 0:
            _, p_category = kruskal(pfpe_category, pfce_category)
            pfpe_vs_pfce_results.append({
                "Comparison": f"{category} {session_type}: PFPE vs PFCE",
                "P-value": p_category
            })

# Correct for multiple testing
p_values = [result['P-value'] for result in pfpe_vs_pfce_results]
corrections = multipletests(p_values, method='fdr_bh')
for i, result in enumerate(pfpe_vs_pfce_results):
    result['Adjusted P-value'] = corrections[1][i]

pfpe_vs_pfce_results_df = pd.DataFrame(pfpe_vs_pfce_results)

pfpe_vs_pfce_results_df#.to_excel('comparison_pfpe_vs_pfce.xlsx')