In [2]:
import pandas as pd

## Breast Cancer

In [3]:
bcancer_df = pd.read_csv('../data/explainable_ai/Breast-AdenoCa_drivers.csv').drop("Unnamed: 0", axis = 1)
astro_df = pd.read_csv('../data/explainable_ai/CNS-PiloAstro_drivers.csv').drop("Unnamed: 0", axis = 1)
eso_df = pd.read_csv('../data/explainable_ai/Eso-AdenoCa_drivers.csv').drop("Unnamed: 0", axis = 1)
kid_df = pd.read_csv('../data/explainable_ai/Kidney-RCC_drivers.csv').drop("Unnamed: 0", axis = 1)
liv_df = pd.read_csv('../data/explainable_ai/Liver-HCC_drivers.csv').drop("Unnamed: 0", axis = 1)
lympth_df = pd.read_csv('../data/explainable_ai/Lymph-CLL_drivers.csv').drop("Unnamed: 0", axis = 1)
panc_df = pd.read_csv('../data/explainable_ai/Panc-Endocrine_drivers.csv').drop("Unnamed: 0", axis = 1)
post_df = pd.read_csv('../data/explainable_ai/Prost-AdenoCA_drivers.csv').drop("Unnamed: 0", axis = 1)

breast_vec = [1,1,1,1,1,1,0.5,1,1,0.5]
astrocytima_vec = [1,1,1,1,0,1,0.5,1,1,0]
eso_vec = [0,1,0,0.5,1,1,1,0.5,0.5,1]
kid_vec = [1,1,1,1,0,1,1,1,1,1]
liv_vec = [1,1,1,1,0.5,1,1,1,1,1]
lymph_vec = [1,1,1,1,1,1,0.5,1,0.5,1]
panc_vec = [1,1,1,1,0.5,1,0,1,0.5,0]
post_adeno = [1,0.5,1,1,1,0.5,1,1,0.5,1]


In [4]:
def compute_weighted_score(df, flag_vec):
    weights = df['abs_weight'].values
    weighted_score = (weights * flag_vec).sum()
    normalized_score = weighted_score / weights.sum()
    return weighted_score, normalized_score

scores = {
    "Breast AdenoCa":     compute_weighted_score(bcancer_df, breast_vec),
    "CNS PiloAstro":      compute_weighted_score(astro_df, astrocytima_vec),
    "Eso AdenoCa":        compute_weighted_score(eso_df, eso_vec),
    "Kidney RCC":         compute_weighted_score(kid_df, kid_vec),
    "Liver HCC":          compute_weighted_score(liv_df, liv_vec),
    "Lymph CLL":          compute_weighted_score(lympth_df, lymph_vec),
    "Pancreas Endocrine": compute_weighted_score(panc_df, panc_vec),
    "Prostate AdenoCa":   compute_weighted_score(post_df, post_adeno),
}

total_acc = 0
for cancer, (raw_score, norm_score) in scores.items():
    total_acc += norm_score
    print(f"{cancer:20s} Raw Score is {raw_score:.4f}, Normalized is {norm_score:.4f}")

print(f"Global average accuracy is : {total_acc/len(scores)}")


Breast AdenoCa       Raw Score is 2.5658, Normalized is 0.9084
CNS PiloAstro        Raw Score is 4.3143, Normalized is 0.7672
Eso AdenoCa          Raw Score is 0.8553, Normalized is 0.6296
Kidney RCC           Raw Score is 3.3761, Normalized is 0.9190
Liver HCC            Raw Score is 2.3756, Normalized is 0.9533
Lymph CLL            Raw Score is 4.8217, Normalized is 0.9242
Pancreas Endocrine   Raw Score is 3.9368, Normalized is 0.7663
Prostate AdenoCa     Raw Score is 4.5946, Normalized is 0.8523
Global average accuracy is : 0.8400199900131268
