In [4]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel

def load_metrics(file_path):
    accuracies = []
    f1_scores = []

    in_accuracy_section = False
    in_f1_section = False

    with open(file_path, 'r') as f:
        for line in f:
            # Check if the line starts a new section
            if "Accuracies:" in line:
                in_accuracy_section = True
                in_f1_section = False  # Disable F1 section
                continue  # Skip this line

            elif "F1 Scores:" in line:
                in_f1_section = True
                in_accuracy_section = False  # Disable Accuracy section
                continue  # Skip this line

            # Collect numerical values from the appropriate sections
            try:
                if in_accuracy_section:
                    accuracies.append(float(line.strip()))

                elif in_f1_section:
                    f1_scores.append(float(line.strip()))

            except ValueError:
                # Skip any non-numeric lines
                print(f"Skipping malformed line in {file_path}: {line.strip()}")

    return np.array(accuracies), np.array(f1_scores)

# Load metrics for each model
minirocket_acc, minirocket_f1 = load_metrics('minirocket_result/metrics.txt')
convtran_acc, convtran_f1 = load_metrics('convtran_result/metrics_best.txt')
s4d_acc, s4d_f1 = load_metrics('s4d_result/metrics.txt')

def load_csv_metrics(file_path):
    """Load accuracy and F1 scores from a CSV file."""
    df = pd.read_csv(file_path)
    return df['Accuracy'].values, df['F1_Score'].values

# Load metrics for KNN and XGBoost from wavelet features
knn_acc, knn_f1 = load_csv_metrics('wavelet_result/knn_result.csv')
xgboost_acc, xgboost_f1 = load_csv_metrics('wavelet_result/xgboost_result.csv')

# Ensure the arrays are non-empty and of the same length before running t-tests
if minirocket_acc.size and convtran_acc.size and minirocket_acc.size == convtran_acc.size:
    t_stat_acc, p_value_acc = ttest_rel(minirocket_acc, convtran_acc)
    t_stat_f1, p_value_f1 = ttest_rel(minirocket_f1, convtran_f1)

    print("MiniRocket vs ConvTran:")
    print(f"T-statistic (Accuracy): {t_stat_acc}, P-value: {p_value_acc}")
    print(f"T-statistic (F1 Score): {t_stat_f1}, P-value: {p_value_f1}")

if minirocket_acc.size and s4d_acc.size and minirocket_acc.size == s4d_acc.size:
    t_stat_acc, p_value_acc = ttest_rel(minirocket_acc, s4d_acc)
    t_stat_f1, p_value_f1 = ttest_rel(minirocket_f1, s4d_f1)

    print("\nMiniRocket vs S4D:")
    print(f"T-statistic (Accuracy): {t_stat_acc}, P-value: {p_value_acc}")
    print(f"T-statistic (F1 Score): {t_stat_f1}, P-value: {p_value_f1}")

alpha = 0.05  # Significance level

# Function to interpret and print t-test results
def report_t_test(model1, model2, t_stat_acc, p_value_acc, t_stat_f1, p_value_f1):
    print(f"\n{model1} vs {model2}:")
    print(f"T-statistic (Accuracy): {t_stat_acc}, P-value: {p_value_acc}")
    print(f"T-statistic (F1 Score): {t_stat_f1}, P-value: {p_value_f1}")

    if p_value_acc < alpha:
        print(f"Significant difference in Accuracy between {model1} and {model2}.")
    else:
        print(f"No significant difference in Accuracy between {model1} and {model2}.")

    if p_value_f1 < alpha:
        print(f"Significant difference in F1 Score between {model1} and {model2}.")
    else:
        print(f"No significant difference in F1 Score between {model1} and {model2}.")

# Report t-test results for MiniRocket vs ConvTran
report_t_test(
    "MiniRocket", "ConvTran", 
    *ttest_rel(minirocket_acc, convtran_acc), 
    *ttest_rel(minirocket_f1, convtran_f1)
)

# Report t-test results for MiniRocket vs S4D
report_t_test(
    "MiniRocket", "S4D", 
    *ttest_rel(minirocket_acc, s4d_acc), 
    *ttest_rel(minirocket_f1, s4d_f1)
)

report_t_test(
    "MiniRocket", "KNN", 
    *ttest_rel(minirocket_acc, knn_acc), 
    *ttest_rel(minirocket_f1, s4d_f1))

report_t_test(
    "MiniRocket", "XGBoost", 
    *ttest_rel(minirocket_acc, xgboost_acc), 
    *ttest_rel(minirocket_f1, xgboost_f1))

Skipping malformed line in minirocket_result/metrics.txt: 
Skipping malformed line in minirocket_result/metrics.txt: Average LOPOCV Accuracy: 46.29%
Skipping malformed line in minirocket_result/metrics.txt: Average LOPOCV F1 Score: 43.45%
Skipping malformed line in convtran_result/metrics_best.txt: 
Skipping malformed line in convtran_result/metrics_best.txt: Average LOPOCV Accuracy: 46.98%
Skipping malformed line in convtran_result/metrics_best.txt: Average LOPOCV F1 Score: 41.98%
Skipping malformed line in s4d_result/metrics.txt: 
Skipping malformed line in s4d_result/metrics.txt: Average LOPOCV Accuracy: 46.78%
Skipping malformed line in s4d_result/metrics.txt: Average LOPOCV F1 Score: 39.74%
MiniRocket vs ConvTran:
T-statistic (Accuracy): -0.23363657784130473, P-value: 0.8186488694654932
T-statistic (F1 Score): 0.5810582554094299, P-value: 0.570437924541577

MiniRocket vs S4D:
T-statistic (Accuracy): -0.17906430770447998, P-value: 0.8604528015482578
T-statistic (F1 Score): 1.375682