In [1]:
import os
from os import listdir
from os.path import isfile, join, dirname, basename
import pandas as pd

In [2]:
%run analysis_utils.py


In [3]:
def collect_auprc_scores( clf='avg', base_dir='./outcomes'):
    all_results = []

    for subdir in os.listdir(base_dir):
        exp_path = os.path.join(base_dir, subdir)
        file_path = os.path.join(exp_path, f'{clf}.csv')
        if os.path.isfile(file_path):
                df = pd.read_csv(file_path, index_col=0)
                cross_table = df.pivot_table(
                    index="experiment",
                    columns="Metrics",
                    values="model",
                    aggfunc="mean"
                )
                cross_table['strategy'] = clf
                cross_table['exp'] = subdir
                # print(clf)
                all_results.append(cross_table)
    all_results_df = pd.concat(all_results)
    all_results_df['strategy'] =clf
    return all_results_df

In [4]:
df = collect_auprc_scores( clf='avg', base_dir='./metrics')
df

Metrics,AUC,AUPRC,Accuracy,F1,Precision,Recall,strategy,exp
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Geneformer-V2-104M,0.955556,0.962188,0.903846,0.901658,0.927381,0.907143,avg,brca_full_pre_post
Geneformer-V2-104M_CLcancer,0.978571,0.983333,0.903846,0.903313,0.917857,0.907143,avg,brca_full_pre_post
Geneformer-V2-104M_finetune,0.485714,0.500427,0.484615,0.326316,0.242308,0.500000,avg,brca_full_pre_post
Geneformer-V2-316M,0.988889,0.991667,0.902564,0.901195,0.921429,0.904762,avg,brca_full_pre_post
cellplm,0.857540,0.893817,0.805128,0.803104,0.820119,0.804762,avg,brca_full_pre_post
...,...,...,...,...,...,...,...,...
scgpt,0.812500,0.708333,0.800000,0.444444,0.400000,0.500000,avg,luad1
scgpt_cancer,0.500000,0.487500,0.800000,0.444444,0.400000,0.500000,avg,luad1
scimilarity,0.468750,0.362500,0.750000,0.427083,0.393750,0.468750,avg,luad1
scvi,0.562500,0.500000,0.800000,0.444444,0.400000,0.500000,avg,luad1


In [29]:
strategy = ['avg', 'vote', 'mil']
all_results= []
for s in strategy:
    df = collect_auprc_scores( clf=s, base_dir='./metrics')
    all_results.append(df)

all_df =pd.concat(all_results)
    

In [30]:
all_df = all_df[all_df.exp !='luad1']
all_df = all_df[~all_df.index.str.contains('finetune')]

In [31]:
all_df

Metrics,AUC,AUPRC,Accuracy,F1,Precision,Recall,strategy,exp
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Geneformer-V2-104M,0.955556,0.962188,0.903846,0.901658,0.927381,0.907143,avg,brca_full_pre_post
Geneformer-V2-104M_CLcancer,0.978571,0.983333,0.903846,0.903313,0.917857,0.907143,avg,brca_full_pre_post
Geneformer-V2-316M,0.988889,0.991667,0.902564,0.901195,0.921429,0.904762,avg,brca_full_pre_post
cellplm,0.857540,0.893817,0.805128,0.803104,0.820119,0.804762,avg,brca_full_pre_post
gf-6L-30M-i2048,0.934127,0.940952,0.835897,0.834246,0.849286,0.838095,avg,brca_full_pre_post
...,...,...,...,...,...,...,...,...
scfoundation,0.825000,0.833333,0.653333,0.644286,0.723333,0.725000,mil,brca_full_outcome
scgpt,0.725000,0.723333,0.693333,0.408889,0.346667,0.500000,mil,brca_full_outcome
scgpt_cancer,0.700000,0.640000,0.693333,0.408889,0.346667,0.500000,mil,brca_full_outcome
scimilarity,0.775000,0.790000,0.686667,0.663810,0.683333,0.700000,mil,brca_full_outcome


In [32]:
all_df = all_df.reset_index(names= 'model')

In [33]:
all_df.model = all_df.model.map(model_name_map)

In [34]:
all_df

Metrics,model,AUC,AUPRC,Accuracy,F1,Precision,Recall,strategy,exp
0,GF-V2,0.955556,0.962188,0.903846,0.901658,0.927381,0.907143,avg,brca_full_pre_post
1,GF-V2 [cancer],0.978571,0.983333,0.903846,0.903313,0.917857,0.907143,avg,brca_full_pre_post
2,GF-V2-Deep,0.988889,0.991667,0.902564,0.901195,0.921429,0.904762,avg,brca_full_pre_post
3,CellPLM,0.857540,0.893817,0.805128,0.803104,0.820119,0.804762,avg,brca_full_pre_post
4,GF-V1,0.934127,0.940952,0.835897,0.834246,0.849286,0.838095,avg,brca_full_pre_post
...,...,...,...,...,...,...,...,...,...
175,scFoundation,0.825000,0.833333,0.653333,0.644286,0.723333,0.725000,mil,brca_full_outcome
176,scGPT,0.725000,0.723333,0.693333,0.408889,0.346667,0.500000,mil,brca_full_outcome
177,scGPT [cancer],0.700000,0.640000,0.693333,0.408889,0.346667,0.500000,mil,brca_full_outcome
178,SCimilarity,0.775000,0.790000,0.686667,0.663810,0.683333,0.700000,mil,brca_full_outcome


In [35]:
all_df['exeriment'] = all_df.exp.map(experiment_name_map)

In [36]:
all_df= all_df.round(3)

In [37]:
all_df

Metrics,model,AUC,AUPRC,Accuracy,F1,Precision,Recall,strategy,exp,exeriment
0,GF-V2,0.956,0.962,0.904,0.902,0.927,0.907,avg,brca_full_pre_post,Treatment Naive vs Anti PD1
1,GF-V2 [cancer],0.979,0.983,0.904,0.903,0.918,0.907,avg,brca_full_pre_post,Treatment Naive vs Anti PD1
2,GF-V2-Deep,0.989,0.992,0.903,0.901,0.921,0.905,avg,brca_full_pre_post,Treatment Naive vs Anti PD1
3,CellPLM,0.858,0.894,0.805,0.803,0.820,0.805,avg,brca_full_pre_post,Treatment Naive vs Anti PD1
4,GF-V1,0.934,0.941,0.836,0.834,0.849,0.838,avg,brca_full_pre_post,Treatment Naive vs Anti PD1
...,...,...,...,...,...,...,...,...,...,...
175,scFoundation,0.825,0.833,0.653,0.644,0.723,0.725,mil,brca_full_outcome,T-cell exhaustion
176,scGPT,0.725,0.723,0.693,0.409,0.347,0.500,mil,brca_full_outcome,T-cell exhaustion
177,scGPT [cancer],0.700,0.640,0.693,0.409,0.347,0.500,mil,brca_full_outcome,T-cell exhaustion
178,SCimilarity,0.775,0.790,0.687,0.664,0.683,0.700,mil,brca_full_outcome,T-cell exhaustion


In [38]:
all_df.to_csv('./tables/Table8_classification_metrics.csv', index=False)

In [39]:
all_df.strategy.value_counts()

strategy
avg     60
vote    60
mil     60
Name: count, dtype: int64