
Part I: compute ctrp validation p-value with ANOVA
Part II:  number of drugs/modules confirmed



In [None]:
import sys
import post_analysis as postproc
import os
import pandas as pd
from importlib import reload  # Python 3.4+ only.

# current directory
netphix_dir = os.getcwd() + "/"

In [20]:

# drug name/id 
drug_id_df = pd.read_csv(netphix_dir+"data/drug_target_id.txt", sep="\t")
drug_id_dic = dict(zip(drug_id_df.drug, drug_id_df.id))


# read CTRP AUC data
ctrp_auc_df = pd.read_csv(netphix_dir + "data/ctrp_auc_processed.txt", sep="\t", index_col=0)
ctrp_drugs = drug_id_df.drug[drug_id_df.CTRP == "YES"].values

def ctrp_common_samples(alt_df, auc_df):
    cell_dic = dict([(x, x.split("_")[0]) for x in alt_df.columns])
    alt_df = alt_df.rename(cell_dic, axis="columns")
    
    common_cells = set(auc_df.columns).intersection(alt_df.columns)
    return alt_df[common_cells], auc_df[common_cells]

# alteration file for NETPHIX
netphix_alt_file = netphix_dir + "data/AlterationsV2_final.txt.gz"
netphix_alt_df = pd.read_csv(netphix_alt_file, sep="\t", index_col=0)
netphix_ctrp_alt_df, netphix_ctrp_auc_df = ctrp_common_samples(netphix_alt_df, ctrp_auc_df)

# alteration file for UNCOVER
uncover_alt_file = netphix_dir + "data/AlterationsV2_uncover.txt"
uncover_alt_df = pd.read_csv(uncover_alt_file, sep="\t", index_col=0)
uncover_ctrp_alt_df, uncover_ctrp_auc_df = ctrp_common_samples(uncover_alt_df, ctrp_auc_df)


In [None]:
# ANOVA with NETPHIX modules
all_netphix_modules = pd.read_csv(netphix_dir+"results/max_sig_combined_modules_0.05.tsv", sep="\t")
netphix_ctrp_results_df = postproc.cross_val(all_netphix_modules, netphix_ctrp_alt_df, netphix_ctrp_auc_df, "mut", ctrp_drugs)

netphix_ctrp_results_df.set_index("drug", inplace=True)
netphix_ctrp_results_df.to_csv(netphix_dir+"results/max_sig_combined_modules_ctrp_cv_0.05.tsv", sep="\t")


In [28]:
# ANOVA with UNCOVER modules
new_uncover_df = pd.read_csv(netphix_dir+"data/merged_uncover_modules_0.05.tsv", sep="\t")
uncover_ctrp_results_df = postproc.cross_val(new_uncover_df, uncover_ctrp_alt_df, uncover_ctrp_auc_df, "mut", ctrp_drugs)


In [None]:
# COUNT
def sig_df(df, label, th=0.05):
    return df[df[label] < th]

print("NETPHIX (test, validated)")
print(netphix_ctrp_results_df.shape[0], sig_df(netphix_ctrp_results_df, "cv_mut_pv", 0.05).shape[0])
print("UNCOVER (test, validated)")
print(uncover_ctrp_results_df.shape[0], sig_df(uncover_ctrp_results_df, "cv_mut_pv", 0.05).shape[0])

# choose the best module for each drug
best_netphix_ctrp_results_df = pd.DataFrame(netphix_ctrp_results_df.groupby("drug")["cv_mut_pv"].min())
print("NETPHIX drugs (test, validated)")

print(best_netphix_ctrp_results_df.shape[0], sig_df(best_netphix_ctrp_results_df, "cv_mut_pv", 0.05).shape[0])
