In [None]:
#This is the code of statistical analysis for different machine learning approach in the diagnosis of differen I-level and II-level dieseases.
#Friedman test + Nemenyi post-hoc test are used here.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import friedmanchisquare
from scikit_posthocs import posthoc_nemenyi_friedman

# Load Data. shuzhibiaoxian3.csv is provided in our attachments.
filepath = "C:\\Users\\PS\\Desktop\\Open code\\shuzhibiaoxian.csv" #Set the user's own path.
df = pd.read_csv(filepath, header=0, encoding="gbk")

# View the original data.
print("Original DataFrame (wide format):")
print(df.head())

#Transform the data from wide format to long format.
df_long = df.melt(id_vars=['Diseases', 'Algorithms'], 
                  var_name='Metric', 
                  value_name='Value')

# View the transformed data.
print("\nTransformed DataFrame (long format):")
print(df_long.head())

# 1. Friedman test
friedman_results = {}
for metric in df_long['Metric'].unique():
    metric_data = df_long[df_long['Metric'] == metric].pivot(index='Diseases', columns='Algorithms', values='Value')
    stat, p = friedmanchisquare(*metric_data.T.values)
    friedman_results[metric] = {'Statistic': stat, 'p-value': p}

# Print the results of the Friedman test.
print("\nFriedman Test Results:")
for metric, result in friedman_results.items():
    print(f"{metric}: Statistic={result['Statistic']:.4f}, p-value={result['p-value']:.4f}")

# 2. Conduct the Nemenyi post-hoc test.
nemenyi_results = {}
for metric in df_long['Metric'].unique():
    metric_data = df_long[df_long['Metric'] == metric].pivot(index='Diseases', columns='Algorithms', values='Value')
    nemenyi_result = posthoc_nemenyi_friedman(metric_data)
    nemenyi_results[metric] = nemenyi_result

# Significance level.
alpha = 0.05
# Print the results of the Nemenyi post-hoc test.
print("\nNemenyi Post-hoc Test Results:")
for metric, result in nemenyi_results.items():
    print(f"\nNemenyi Post-hoc Test for {metric}:")
    print(result)  # Output detailed pairwise comparison results.
    print("\nSignificant differences (p-value < 0.05):")
    for i in result.columns:
        for j in result.columns:
            if i != j and result.loc[i, j] < alpha:
                print(f"{i} vs {j}: p-value = {result.loc[i, j]:.4f} (Significant)")
            elif i != j:
                print(f"{i} vs {j}: p-value = {result.loc[i, j]:.4f} (Not Significant)")

# 3. Comprehensive Comparison
# Calculate the average performance for each algorithm.
mean_performance = df_long.groupby(['Algorithms', 'Metric'])['Value'].mean().unstack()
variance_performance = df_long.groupby(['Algorithms', 'Metric'])['Value'].var().unstack()

# Plot a bar chart.
plt.rcParams['font.family'] = 'Times New Roman'
fig, ax = plt.subplots(figsize=(18, 10))
bar_width = 0.8 # Bar width.
bars = mean_performance.plot(kind='bar', width=bar_width, ax=ax, fontsize=12)

# Set the chart title and labels.
ax.set_title('Average performance of different machine learning strategies in disease pre-diagnosis', fontsize=24)
ax.set_ylabel('Macro-average value', fontsize=22)
ax.set_xlabel('Algorithm', fontsize=22)

# Set the legend and Place the legend in the upper right corner of the chart.
legend = ax.legend(title='Metric', fontsize=20, loc='upper right', title_fontsize=20, bbox_to_anchor=(1.17, 1)) 
# Set the x-axis labels to horizontal.
ax.set_xticklabels(ax.get_xticklabels(), fontsize=18, rotation=0, ha='center')
ax.tick_params(axis='y', labelsize=14)  # Modify the font size of the y-axis tick labels.

# Set the layout.
for container in ax.containers:
    ax.bar_label(container, fmt='%.3f', fontsize = 11)
    
plt.tight_layout()
# Save the chart in TIFF format.
plt.savefig('C:\\Users\\PS\\Desktop\\Open code\\algorithm_performance.tiff', format='tiff', dpi=600)

# Display the chart.
plt.show()