In [1]:
import os
import pandas as pd
import similarity_index as similarity_index

In [2]:
OUT_DIR = "outputs"
XL_PATH = r"inputs/radiomicsFeatures.csv"

In [3]:
num_repeats = 100

feats_df = pd.read_csv(XL_PATH)

In [45]:
stability_df = {"fs_method":[], "similarity_measure":[], "top_k":[], "estimate":[]}

fs_methods = ["random", "oneDSAE", "bayesianDSAE", "ensembleDSAE", "backwardSFS/LogisticRegression", "backwardSFS/SVC", "backwardSFS/RandomForestClassifier", "backwardSFS/MLPClassifier"]
similarity_methods = {"jaccard":similarity_index.jaccard, "dice":similarity_index.dice, "kuncheva":similarity_index.kuncheva, "mwm":similarity_index.mwm}
top_ks = [5, 10, 15, 20, 25]


for fs_method in fs_methods:

    for i in range(num_repeats):
    
        for j in range(i+1, num_repeats):
    
            df1 = pd.read_csv(os.path.join(OUT_DIR, fs_method, f"rank_df{i}.csv"))
            df2 = pd.read_csv(os.path.join(OUT_DIR, fs_method, f"rank_df{j}.csv"))

            for similarity_measure, similarity_fn in similarity_methods.items():

                for k in top_ks:

                    estimate = similarity_fn(df1=df1, df2=df2, k=k, feats_df = feats_df)

                    stability_df["fs_method"].append(fs_method)
                    stability_df["similarity_measure"].append(similarity_measure)
                    stability_df["top_k"].append(k)
                    stability_df["estimate"].append(estimate)

                
            estimate = similarity_index.global_spearman(df1, df2)

            stability_df["fs_method"].append(fs_method)
            stability_df["similarity_measure"].append("global_spearman")
            stability_df["top_k"].append("NA")
            stability_df["estimate"].append(estimate)

In [46]:
stability_df = pd.DataFrame(stability_df)

In [47]:
mean_stability_df = stability_df.groupby(by=["fs_method", "similarity_measure", "top_k"]).mean()

In [48]:
mean_stability_df.loc['random']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.056525
dice,10.0,0.112162
dice,15.0,0.167084
dice,20.0,0.222717
dice,25.0,0.279451
global_spearman,,0.000558
jaccard,5.0,0.032088
jaccard,10.0,0.062205
jaccard,15.0,0.093689
jaccard,20.0,0.127768


In [49]:
mean_stability_df.loc['bayesianDSAE']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.384808
dice,10.0,0.531495
dice,15.0,0.595461
dice,20.0,0.622313
dice,25.0,0.625648
global_spearman,,0.539272
jaccard,5.0,0.259473
jaccard,10.0,0.377176
jaccard,15.0,0.436638
jaccard,20.0,0.461065


In [50]:
mean_stability_df.loc['ensembleDSAE']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.437212
dice,10.0,0.575576
dice,15.0,0.658007
dice,20.0,0.683596
dice,25.0,0.698861
global_spearman,,0.728306
jaccard,5.0,0.301112
jaccard,10.0,0.419038
jaccard,15.0,0.502142
jaccard,20.0,0.52858


In [54]:
mean_stability_df.loc['backwardSFS/MLPClassifier']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.126343
dice,10.0,0.198182
dice,15.0,0.250397
dice,20.0,0.294626
dice,25.0,0.340671
global_spearman,,0.089736
jaccard,5.0,0.074962
jaccard,10.0,0.115183
jaccard,15.0,0.147282
jaccard,20.0,0.175994


In [33]:
mean_stability_df.loc['bayesianDSAE']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.384808
dice,10.0,0.531495
dice,15.0,0.595461
dice,20.0,0.622313
dice,25.0,0.625648
global_spearman,,0.539272
jaccard,5.0,0.259473
jaccard,10.0,0.377176
jaccard,15.0,0.436638
jaccard,20.0,0.461065


In [34]:
mean_stability_df.loc['ensembleDSAE']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.437212
dice,10.0,0.575576
dice,15.0,0.658007
dice,20.0,0.683596
dice,25.0,0.698861
global_spearman,,0.728306
jaccard,5.0,0.301112
jaccard,10.0,0.419038
jaccard,15.0,0.502142
jaccard,20.0,0.52858


In [7]:
mean_stability_df.loc['random']

Unnamed: 0_level_0,Unnamed: 1_level_0,estimate
similarity_measure,top_k,Unnamed: 2_level_1
dice,5.0,0.056525
dice,10.0,0.112162
dice,15.0,0.167084
dice,20.0,0.222717
dice,25.0,0.279451
global_spearman,,0.000558
jaccard,5.0,0.032088
jaccard,10.0,0.062205
jaccard,15.0,0.093689
jaccard,20.0,0.127768


In [10]:
!nvidia-smi

Tue Apr 30 11:29:26 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.27.04    Driver Version: 460.27.04    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0    35W /  70W |   3152MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |
| N/A   74C    P0    32W /  70W |   1709MiB / 15109MiB |     20%      Default |
|       

In [None]:
stability_df.to_csv(os.path.join(OUT_DIR, "stability_df.csv"), index=False)

In [None]:
mean_stability_df.loc['oneDSAE']

In [None]:
mean_stability_df.loc['oneDSAE1']

In [None]:
mean_stability_df.loc['random']

### Statistical Analysis

##### <> 1. Random v/s FS Methods -Wilcoxon Signed Rank Test
- global, top_k = [5, 10, 15, 20, 25]

### Statistical Analysis (U_test between 4 stability estimates from conventional methods and 3 stability estimates from autoencoder variants)