In [38]:
# -*- coding: utf-8 -*-
"""Friedman Test Example"""

import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
from scipy.stats import friedmanchisquare

##############################################################################
# 1) LOAD AND CLEAN THE DATA
##############################################################################

file_path = "FF_Modelmetrics.csv"
df_raw = pd.read_csv(file_path)

print("Initial DataFrame shape:", df_raw.shape)
print(df_raw.head())

def parse_metric(value):

    if isinstance(value, str):
        if value.strip().endswith('%'):
            try:
                return float(value.strip().replace('%',''))
            except:
                return np.nan
        else:
            try:
                return float(value)
            except:
                return np.nan
    else:
        return value

metric_cols = ['Accuracy','TPR','TNR','F1 Score']
for col in metric_cols:
    df_raw[col] = df_raw[col].apply(parse_metric)


def parse_noise(noise_str):
    """
    Convert strings like "0%", "5%" to floats 0.0, 5.0, etc.
    If it's already numeric, just return it.
    """
    if isinstance(noise_str, str):
        # strip whitespace
        noise_str = noise_str.strip()
        if noise_str.endswith('%'):
            noise_val = noise_str.replace('%','')
            try:
                return float(noise_val)
            except:
                return np.nan
        else:
            # direct float if possible
            try:
                return float(noise_str)
            except:
                return np.nan
    else:
        return noise_str

df_raw['Noise'] = df_raw['Noise'].apply(parse_noise)

# Create a clean working copy
df = df_raw.copy()

print("\nData after cleaning/conversion:")
print(df.head(10))
print(df.info())



model_list = sorted(df['Model'].unique())
noise_list = sorted(df['Noise'].dropna().unique())
print("\nUnique Models:", model_list)
print("Unique Noise Levels:", noise_list)

from scipy.stats import friedmanchisquare

friedman_results = []
for model in model_list:
    sub = df[df['Model'] == model].copy()

    pivoted = sub.pivot_table(index='Dataset',
                              columns='Noise',
                              values='Accuracy')

    pivoted = pivoted.dropna(axis=0, how='any')

    if pivoted.shape[0] < 2 or pivoted.shape[1] < 2:

        continue

    pivoted = pivoted[sorted(pivoted.columns)]

    data_for_test = [pivoted[col].values for col in pivoted.columns]

    stat, pval = friedmanchisquare(*data_for_test)
    friedman_results.append((model, stat, pval))

friedman_df = pd.DataFrame(friedman_results,
                           columns=['Model','Friedman_Stat','p_value'])

print("\nFriedman Test (by model across noise levels, aggregated by dataset blocks):")
print(friedman_df)



Initial DataFrame shape: (270, 12)
           Dataset                     Model Noise      TN      FP     FN  \
0  sepsis balanced  Logistic Regression (LR)    0%  293999  121541  15138   
1  sepsis balanced  Logistic Regression (LR)    5%  276228  139312  12661   
2  sepsis balanced  Logistic Regression (LR)   10%  264774  150766  11382   
3  sepsis balanced  Logistic Regression (LR)   15%  255915  159625  10692   
4  sepsis balanced  Logistic Regression (LR)   20%  250847  164693  10376   

      TP Accuracy    TPR    TNR    PPV  F1 Score  
0  41612    0.711  0.733  0.708  0.255     0.378  
1  44089    0.678  0.777  0.665  0.240     0.367  
2  45368    0.657  0.799  0.637  0.231     0.359  
3  46058    0.639  0.812  0.616  0.224     0.351  
4  46374    0.629  0.817  0.604  0.220     0.346  

Data after cleaning/conversion:
           Dataset                     Model  Noise      TN      FP     FN  \
0  sepsis balanced  Logistic Regression (LR)    0.0  293999  121541  15138   
1  seps

In [40]:
##############################################################################
# 3) CORRELATION CHECK: ACCURACY vs. NOISE
#    For each Model, we see how strongly performance declines with noise
##############################################################################

from scipy.stats import pearsonr

print("\n=== 3) CORRELATION CHECK: Accuracy vs. Noise (averaged across datasets) ===")
model_list = sorted(df['Model'].unique())
for model in model_list:
    sub = df[df['Model'] == model]
    mean_perf = sub.groupby('Noise')['Accuracy'].mean().dropna().reset_index()

    if mean_perf.shape[0] < 2:
        continue

    r, pval = pearsonr(mean_perf['Noise'], mean_perf['Accuracy'])
    print(f"Model={model:<40s} Pearson r={r:.3f}, p={pval:.3e}")

##############################################################################
# 4) IDENTIFY COMMON FALL-OFF POINTS:
#    Largest single-step drop from one noise level to the next
##############################################################################

print("\n=== 4) COMMON FALL-OFF POINTS (largest single-step drop) ===")

falloff_records = []
group_cols = ['Dataset','Model']

for grp, sub_df in df.groupby(group_cols):
    sub_df = sub_df.sort_values(by='Noise')
    if sub_df['Accuracy'].isnull().all():
        continue

    sub_df['Acc_diff'] = sub_df['Accuracy'].diff()

    min_diff = sub_df['Acc_diff'].min()
    if pd.isnull(min_diff):
        continue

    idx_min_diff = sub_df['Acc_diff'].idxmin()
    if idx_min_diff not in sub_df.index:
        continue


    row_ids = list(sub_df.index)
    pos = row_ids.index(idx_min_diff)
    if pos <= 0:
        continue

    from_idx = row_ids[pos - 1]
    to_idx   = row_ids[pos]

    from_row = sub_df.loc[from_idx]
    to_row   = sub_df.loc[to_idx]

    drop_val = to_row['Accuracy'] - from_row['Accuracy']
    falloff_records.append({
        'Dataset': grp[0],
        'Model': grp[1],
        'From_Noise': from_row['Noise'],
        'To_Noise': to_row['Noise'],
        'Drop_Value': drop_val
    })

falloff_df = pd.DataFrame(falloff_records)
falloff_df.sort_values('Drop_Value', inplace=True)
print(falloff_df.head(20))

##############################################################################
# 5) CROSSOVER ANALYSIS:
#    Rank models at each noise level, see if rank order changes
#    (We do this per dataset so that 'rank' is consistent within the dataset.)
##############################################################################

print("\n=== 5) CROSSOVER ANALYSIS (ranking models) ===")

rank_list = []
for dset, dsub in df.groupby('Dataset'):
    for noise_val in sorted(dsub['Noise'].dropna().unique()):
        noise_subset = dsub[dsub['Noise'] == noise_val]
        noise_subset = noise_subset.copy()
        noise_subset['Rank'] = noise_subset['Accuracy'].rank(method='average',
                                                             ascending=False)
        for _, row in noise_subset.iterrows():
            rank_list.append({
                'Dataset': dset,
                'Noise': noise_val,
                'Model': row['Model'],
                'Accuracy': row['Accuracy'],
                'Rank': row['Rank']
            })

rank_df = pd.DataFrame(rank_list)
rank_df.sort_values(['Dataset','Noise','Rank'], inplace=True)
print("Top 20 ranked results (lowest rank = best) by dataset & noise:\n")
print(rank_df.head(20))


##############################################################################
# 6) THRESHOLD DETECTION:
#    Flag places where the slope is steeply negative or performance stabilizes
##############################################################################

print("\n=== 6) THRESHOLD DETECTION (sharp decline vs. stabilization) ===")

threshold_records = []
big_drop_cutoff = -0.05
stabilize_cutoff = 0.01

for grp, sub_df in df.groupby(['Dataset','Model']):
    sub_df = sub_df.sort_values(by='Noise')
    sub_df['Next_Acc'] = sub_df['Accuracy'].shift(-1)
    sub_df['Delta'] = sub_df['Next_Acc'] - sub_df['Accuracy']

    for i, row in sub_df.iterrows():
        if pd.isnull(row['Delta']):
            continue  # no next row
        delta_val = row['Delta']

        if delta_val <= big_drop_cutoff:
            threshold_records.append({
                'Dataset': grp[0],
                'Model': grp[1],
                'Noise': row['Noise'],
                'Event': 'Significant Drop',
                'Delta': delta_val
            })
        elif abs(delta_val) <= stabilize_cutoff:
            threshold_records.append({
                'Dataset': grp[0],
                'Model': grp[1],
                'Noise': row['Noise'],
                'Event': 'Stabilization',
                'Delta': delta_val
            })

threshold_df = pd.DataFrame(threshold_records)
threshold_df.sort_values(['Dataset','Model','Noise'], inplace=True)
print(threshold_df.head(30))

print("\n=== Done with correlation, fall-off, crossover, and threshold analyses. ===")


=== 3) CORRELATION CHECK: Accuracy vs. Noise (averaged across datasets) ===
Model=Decision Tree (DT)                       Pearson r=-0.997, p=3.639e-09
Model=K-Nearest Neighbors (KNN)                Pearson r=-0.935, p=2.132e-04
Model=Logistic Regression (LR)                 Pearson r=-0.997, p=7.589e-09
Model=Naive Bayes (NB)                         Pearson r=0.248, p=5.204e-01
Model=Random Forest (RF)                       Pearson r=-0.968, p=1.963e-05
Model=Support Vector Machine (SVM)             Pearson r=-0.989, p=5.085e-07

=== 4) COMMON FALL-OFF POINTS (largest single-step drop) ===
                  Dataset                         Model  From_Noise  To_Noise  \
25                 stroke     K-Nearest Neighbors (KNN)         0.0       5.0   
27                 stroke              Naive Bayes (NB)        20.0      25.0   
26                 stroke      Logistic Regression (LR)         0.0       5.0   
28                 stroke            Random Forest (RF)        35.0      40.

In [43]:
##############################################################################
# 4) IDENTIFY COMMON FALL-OFF POINTS (up to 25% noise ONLY)
##############################################################################

print("\n=== 4) COMMON FALL-OFF POINTS (largest single-step drop, Noise ≤ 25) ===")

falloff_records = []
group_cols = ['Dataset','Model']

for grp, sub_df in df.groupby(group_cols):
    # Restrict to Noise <= 25
    sub_df = sub_df[sub_df['Noise'] <= 30].copy()

    # Sort by Noise ascending
    sub_df = sub_df.sort_values(by='Noise')
    if sub_df['Accuracy'].isnull().all() or len(sub_df) < 2:
        continue

    # Compute consecutive Accuracy differences
    sub_df['Acc_diff'] = sub_df['Accuracy'].diff()
    min_diff = sub_df['Acc_diff'].min()
    if pd.isnull(min_diff):
        continue

    idx_min_diff = sub_df['Acc_diff'].idxmin()
    if idx_min_diff not in sub_df.index:
        continue

    row_ids = list(sub_df.index)
    pos = row_ids.index(idx_min_diff)
    if pos <= 0:
        continue

    from_idx = row_ids[pos - 1]
    to_idx   = row_ids[pos]
    from_row = sub_df.loc[from_idx]
    to_row   = sub_df.loc[to_idx]

    drop_val = to_row['Accuracy'] - from_row['Accuracy']
    falloff_records.append({
        'Dataset': grp[0],
        'Model': grp[1],
        'From_Noise': from_row['Noise'],
        'To_Noise': to_row['Noise'],
        'Drop_Value': drop_val
    })

falloff_df = pd.DataFrame(falloff_records)
falloff_df.sort_values('Drop_Value', inplace=True)
print(falloff_df.head(50))


##############################################################################
# 6) THRESHOLD DETECTION (DROP-OFFS ONLY)
##############################################################################

print("\n=== 6) THRESHOLD DETECTION (Significant Drop only) ===")

threshold_records = []
big_drop_cutoff = -0.05

for grp, sub_df in df.groupby(['Dataset','Model']):
    sub_df = sub_df.sort_values(by='Noise')
    sub_df['Next_Acc'] = sub_df['Accuracy'].shift(-1)
    sub_df['Delta'] = sub_df['Next_Acc'] - sub_df['Accuracy']

    for i, row in sub_df.iterrows():
        if pd.isnull(row['Delta']):
            continue
        delta_val = row['Delta']

        if delta_val <= big_drop_cutoff:
            threshold_records.append({
                'Dataset': grp[0],
                'Model': grp[1],
                'Noise': row['Noise'],
                'Event': 'Significant Drop',
                'Delta': delta_val
            })

threshold_df = pd.DataFrame(threshold_records)
threshold_df.sort_values(['Dataset','Model','Noise'], inplace=True)
print(threshold_df.head(30))



=== 4) COMMON FALL-OFF POINTS (largest single-step drop, Noise ≤ 25) ===
                  Dataset                         Model  From_Noise  To_Noise  \
25                 stroke     K-Nearest Neighbors (KNN)         0.0       5.0   
27                 stroke              Naive Bayes (NB)        20.0      25.0   
26                 stroke      Logistic Regression (LR)         0.0       5.0   
29                 stroke  Support Vector Machine (SVM)         0.0       5.0   
24                 stroke            Decision Tree (DT)        15.0      20.0   
28                 stroke            Random Forest (RF)        20.0      25.0   
17      genomics balanced  Support Vector Machine (SVM)        20.0      25.0   
12      genomics balanced            Decision Tree (DT)        15.0      20.0   
13      genomics balanced     K-Nearest Neighbors (KNN)        20.0      25.0   
15      genomics balanced              Naive Bayes (NB)        20.0      25.0   
16      genomics balanced          