In [1]:
import pandas as pd 
import torch 
import os 
from sklearn.metrics import roc_auc_score
import numpy as np

In [2]:
data = torch.load(f'../output/exp1/proc/Data.pt')

cellspace = list(data.cellspace)
drugspace = [x.split('__')[1] for x in data.node_names if 'DRUG__' in x]

cellinfo = pd.read_csv('../../data/cellinfo_beta.txt', sep='\t')[lambda x: x.cell_iname.isin(cellspace)]

druginfo = pd.read_csv('../../data/compoundinfo_beta.txt', sep='\t')
drugindi = pd.read_csv('../../data/repurposing_drugs_20180907.txt', sep='\t', encoding = "ISO-8859-1", skiprows=9)
druginfo = druginfo.merge(drugindi, left_on='cmap_name', right_on='pert_iname', how='left')[lambda x: x.pert_id.isin(drugspace)]
druginfo.head()

Unnamed: 0,pert_id,cmap_name,target_x,moa_x,canonical_smiles,inchi_key,compound_aliases,pert_iname,clinical_phase,moa_y,target_y,disease_area,indication
410,BRD-K42805893,AZD-9291,,,COc1cc(N(C)CCN(C)C)c(NC(=O)C=C)cc1Nc1nccc(n1)-...,DUYJMQONPNNFPI-UHFFFAOYSA-N,osimertinib,,,,,,
508,BRD-K62391742,GDC-0199,,,CC1(C)CCC(CN2CCN(CC2)c2ccc(C(=O)NS(=O)(=O)c3cc...,LQBVNQSMGBZMKD-UHFFFAOYSA-N,venetoclax,,,,,,
605,BRD-K43002773,GDC-0068,AKT3,Akt inhibitor,C[C@@H]1C[C@H](C2=C1C(=NC=N2)N3CCN(CC3)C(=O)[C...,GRZXWCHAXNAUHY-NSISKUIASA-N,ipatasertib,GDC-0068,Phase 2,AKT inhibitor,AKT1|AKT2|AKT3|PRKG1,,
606,BRD-K43002773,GDC-0068,AKT1,Akt inhibitor,C[C@@H]1C[C@H](C2=C1C(=NC=N2)N3CCN(CC3)C(=O)[C...,GRZXWCHAXNAUHY-NSISKUIASA-N,ipatasertib,GDC-0068,Phase 2,AKT inhibitor,AKT1|AKT2|AKT3|PRKG1,,
607,BRD-K43002773,GDC-0068,AKT2,Akt inhibitor,C[C@@H]1C[C@H](C2=C1C(=NC=N2)N3CCN(CC3)C(=O)[C...,GRZXWCHAXNAUHY-NSISKUIASA-N,ipatasertib,GDC-0068,Phase 2,AKT inhibitor,AKT1|AKT2|AKT3|PRKG1,,


In [3]:
cell_diseases = cellinfo.groupby(['primary_disease', 'subtype']).count()[['cell_iname']].sort_values('cell_iname', ascending=False).reset_index()
cell_diseases.head(10)

Unnamed: 0,primary_disease,subtype,cell_iname
0,lung cancer,non small cell lung carcinoma,8
1,skin cancer,melanoma,7
2,endometrial cancer,adenocarcinoma,6
3,breast cancer,carcinoma,5
4,breast cancer,adenocarcinoma,4
5,colon cancer,carcinoma,4
6,ovarian cancer,carcinoma,3
7,brain cancer,astrocytoma,3
8,lymphoma,b-cell lymphoma,3
9,bladder cancer,transitional cell carcinoma,3


In [4]:
drug_indications = druginfo[['pert_id', 'indication']].drop_duplicates().groupby('indication').count()[['pert_id']].sort_values('pert_id', ascending=False)
drug_indications.head(10)

Unnamed: 0_level_0,pert_id
indication,Unnamed: 1_level_1
breast cancer,12
contraceptive,7
non-small cell lung cancer (NSCLC),6
melanoma,4
organ rejection|lymphangioleiomyomatosis,4
skin ulcer,4
menopause|vaginal atrophy|urinary tract infections,4
depression,3
renal cell carcinoma (RCC),3
endometriosis|angioedema,3


# First, Generate Disease Specific Prioritization Goals

This will be used to evaluate the rationality of our priortization results using the CLUE repurposing hub disease indications. 

In [5]:
goal_metadata = pd.DataFrame({'goal_name':                  ['melanoma-NSCLC',                      'melanoma-breast',      'NSCLC-AML',                             'breast-AML',                          'breast-NSCLC',                          'breast-prostate',         'NSCLC-prostate',                       'breast-kidney',                    'NSCLC-kidney',                             'melanoma-kidney'],
                              'target_primary_disease':     ['skin cancer',                         'skin cancer',          'lung cancer',                           'breast cancer',                       'breast cancer',                         'breast cancer',           'lung cancer',                          'breast cancer',                    'lung cancer',                              'skin cancer'], 
                              'target_subtype':             ['melanoma',                            'melanoma',             'non small cell lung carcinoma',          None,                                  None,                                    None,                     'non small cell lung carcinoma',         None,                              'non small cell lung carcinoma',            'melanoma'],                   
                              'background_primary_disease': ['lung cancer',                         'breast cancer',        'leukemia',                              'leukemia',                            'lung cancer',                           'prostate cancer',         'prostate cancer',                      'kidney cancer',                    'kidney cancer',                            'kidney cancer'], 
                              'background_subtype':         ['non small cell lung carcinoma',        None,                  'acute myelogenous leukemia (aml)',      'acute myelogenous leukemia (aml)',    'non small cell lung carcinoma',          None,                      None,                                   None,                               None,                                       None],
                              'target_indication':          ['melanoma',                            'melanoma',             'non-small cell lung cancer (NSCLC)',    'breast cancer',                       'breast cancer',                         'breast cancer',           'non-small cell lung cancer (NSCLC)',   'breast cancer',                    'non-small cell lung cancer (NSCLC)',       'melanoma'],
                              'background_indication':      ['non-small cell lung cancer (NSCLC)',  'breast cancer',        'acute myeloid leukemia (AML)',          'acute myeloid leukemia (AML)',        'non-small cell lung cancer (NSCLC)',    'prostate cancer',         'prostate cancer',                      'renal cell carcinoma (RCC)',       'renal cell carcinoma (RCC)',               'renal cell carcinoma (RCC)']})

goal_metadata

Unnamed: 0,goal_name,target_primary_disease,target_subtype,background_primary_disease,background_subtype,target_indication,background_indication
0,melanoma-NSCLC,skin cancer,melanoma,lung cancer,non small cell lung carcinoma,melanoma,non-small cell lung cancer (NSCLC)
1,melanoma-breast,skin cancer,melanoma,breast cancer,,melanoma,breast cancer
2,NSCLC-AML,lung cancer,non small cell lung carcinoma,leukemia,acute myelogenous leukemia (aml),non-small cell lung cancer (NSCLC),acute myeloid leukemia (AML)
3,breast-AML,breast cancer,,leukemia,acute myelogenous leukemia (aml),breast cancer,acute myeloid leukemia (AML)
4,breast-NSCLC,breast cancer,,lung cancer,non small cell lung carcinoma,breast cancer,non-small cell lung cancer (NSCLC)
5,breast-prostate,breast cancer,,prostate cancer,,breast cancer,prostate cancer
6,NSCLC-prostate,lung cancer,non small cell lung carcinoma,prostate cancer,,non-small cell lung cancer (NSCLC),prostate cancer
7,breast-kidney,breast cancer,,kidney cancer,,breast cancer,renal cell carcinoma (RCC)
8,NSCLC-kidney,lung cancer,non small cell lung carcinoma,kidney cancer,,non-small cell lung cancer (NSCLC),renal cell carcinoma (RCC)
9,melanoma-kidney,skin cancer,melanoma,kidney cancer,,melanoma,renal cell carcinoma (RCC)


In [6]:
goal_df = pd.DataFrame({'cell_iname':cellspace})

for i,row in goal_metadata.iterrows(): 
    
    if row.target_primary_disease is None: 
        target_lines = cellinfo[lambda x: (x.subtype == row.target_subtype)].cell_iname.unique()
    elif row.target_subtype is None: 
        target_lines = cellinfo[lambda x: (x.primary_disease == row.target_primary_disease)].cell_iname.unique()
    else: 
        target_lines = cellinfo[lambda x: (x.primary_disease == row.target_primary_disease) & (x.subtype == row.target_subtype)].cell_iname.unique()

    if row.background_primary_disease is None: 
        background_lines = cellinfo[lambda x: (x.subtype == row.background_subtype)].cell_iname.unique()
    elif row.background_subtype is None: 
        background_lines = cellinfo[lambda x: (x.primary_disease == row.background_primary_disease)].cell_iname.unique()
    else: 
        background_lines = cellinfo[lambda x: (x.primary_disease == row.background_primary_disease) & (x.subtype == row.background_subtype)].cell_iname.unique()

    tmp = {'cell_iname':[], row.goal_name:[]}
    for cell in cellspace: 
        label = 'none'
        tmp['cell_iname'].append(cell)
        if cell in target_lines: 
            label = 'target'
        if cell in background_lines: 
            label = 'background'
        if (cell in target_lines) & (cell in background_lines): 
            raise ValueError(f'cell: {cell} is in both the target and background sets.')
        tmp[row.goal_name].append(label)
    tmp = pd.DataFrame(tmp)

    goal_df = goal_df.merge(tmp, on='cell_iname', validate='1:1')

    print('Prioritization Goal Name:', row.goal_name)
    print('\t# of target lines:', len(target_lines))
    print('\t# of background lines:', len(background_lines))

goal_df.to_csv('../output/disease_prioritization_goals.csv', sep=',', index=False)
goal_df.head()


Prioritization Goal Name: melanoma-NSCLC
	# of target lines: 7
	# of background lines: 8
Prioritization Goal Name: melanoma-breast
	# of target lines: 7
	# of background lines: 9
Prioritization Goal Name: NSCLC-AML
	# of target lines: 8
	# of background lines: 2
Prioritization Goal Name: breast-AML
	# of target lines: 9
	# of background lines: 2
Prioritization Goal Name: breast-NSCLC
	# of target lines: 9
	# of background lines: 8
Prioritization Goal Name: breast-prostate
	# of target lines: 9
	# of background lines: 3
Prioritization Goal Name: NSCLC-prostate
	# of target lines: 8
	# of background lines: 3
Prioritization Goal Name: breast-kidney
	# of target lines: 9
	# of background lines: 2
Prioritization Goal Name: NSCLC-kidney
	# of target lines: 8
	# of background lines: 2
Prioritization Goal Name: melanoma-kidney
	# of target lines: 7
	# of background lines: 2


Unnamed: 0,cell_iname,melanoma-NSCLC,melanoma-breast,NSCLC-AML,breast-AML,breast-NSCLC,breast-prostate,NSCLC-prostate,breast-kidney,NSCLC-kidney,melanoma-kidney
0,22RV1,none,none,none,none,none,background,background,none,none,none
1,5637,none,none,none,none,none,none,none,none,none,none
2,A204,none,none,none,none,none,none,none,none,none,none
3,A375,target,target,none,none,none,none,none,none,none,target
4,A549,none,none,none,none,none,none,none,none,none,none


# Triple Negative Breast Cancer (TNBC) priortization

In [7]:
TNBC_lines = ['BT20', 'HS578T', 'MDAMB231', 'MDAMB468']
breast_lines = cellinfo[lambda x: x.primary_disease == 'breast cancer'].cell_iname.unique().tolist()

print('# of TNBC lines:', len(TNBC_lines))
print('# of brest lines:', len(breast_lines))

breast_subtype_goals = pd.DataFrame({'cell_iname':data.cellspace})

_tnbc_vs_nonbreast = []
for cell in data.cellspace: 
    if cell in TNBC_lines: 
        _tnbc_vs_nonbreast.append('target')
    elif cell not in breast_lines: 
        _tnbc_vs_nonbreast.append('background')
    else: 
        _tnbc_vs_nonbreast.append('none')

_breast_vs_nonbreast = []
for cell in data.cellspace: 
    if cell in breast_lines: 
        _breast_vs_nonbreast.append('target')
    else: 
        _breast_vs_nonbreast.append('background')

_tnbc_vs_nontnbc = []
for cell in data.cellspace: 
    if cell in TNBC_lines: 
        _tnbc_vs_nontnbc.append('target')
    elif cell in breast_lines: 
        _tnbc_vs_nontnbc.append('background')
    else: 
        _tnbc_vs_nontnbc.append('none')

breast_subtype_goals = breast_subtype_goals.assign(TNBC_vs_nonbreast=_tnbc_vs_nonbreast)
breast_subtype_goals = breast_subtype_goals.assign(TNBC_vs_nonTNBC=_tnbc_vs_nontnbc)
breast_subtype_goals = breast_subtype_goals.assign(breast_vs_nonbreast=_breast_vs_nonbreast)

breast_subtype_goals.to_csv('../output/breast_subtype_goals.csv', sep=',', index=False)
breast_subtype_goals.head()

# of TNBC lines: 4
# of brest lines: 9


Unnamed: 0,cell_iname,TNBC_vs_nonbreast,TNBC_vs_nonTNBC,breast_vs_nonbreast
0,22RV1,background,none,background
1,5637,background,none,background
2,A204,background,none,background
3,A375,background,none,background
4,A549,background,none,background


# Evaluate Priotization results with Disease Indication 

In [7]:
def eval_prioritization(res, N=2500): 
    auroc_results = {'name':[], 'auroc':[], 'random_auroc_q025':[], 'random_auroc_q975':[], 'random_auroc_mean':[], 'P(random_auroc>=auroc)':[], 'num_target_indications':[], 'num_background_indications':[]}

    for key in res: 

        target_indication = goal_metadata[lambda x: x.goal_name == key].target_indication.item()
        background_indication = goal_metadata[lambda x: x.goal_name == key].background_indication.item()
        indications = [target_indication, background_indication]

        dis_res = res[key].merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), on='pert_id', how='left')[lambda x: x.indication.isin(indications)][['cmap_name', 'indication']]
        dis_res = dis_res.drop_duplicates() # non-unique mapping between pert_id -> cmap ... e.g., some cmap_names appear multiple times 

        indication_label = [(x in target_indication)*1. for x in dis_res.indication]   # indications with target_indication will have label 1, background indications will have label 0
        ranked_score = torch.arange(len(indication_label), 0, -1)

        auroc = roc_auc_score(indication_label, ranked_score)
        
        rand_aurocs = np.array([roc_auc_score(np.random.permutation(indication_label), ranked_score) for i in range(N)])

        auroc_results['name'].append(key)
        auroc_results['auroc'].append(auroc)
        auroc_results['random_auroc_q025'].append(np.quantile(rand_aurocs, q=0.025))
        auroc_results['random_auroc_q975'].append(np.quantile(rand_aurocs, q=0.975))
        auroc_results['random_auroc_mean'].append(np.mean(rand_aurocs))
        auroc_results['P(random_auroc>=auroc)'].append(np.mean(rand_aurocs >= auroc))
        auroc_results['num_target_indications'].append((dis_res.indication.values == target_indication).astype(int).sum())
        auroc_results['num_background_indications'].append((dis_res.indication.values == background_indication).astype(int).sum())

    auroc_results = pd.DataFrame(auroc_results)
    return auroc_results

In [8]:
_ROOT_GSNN_ = '../output/exp1/FOLD-1/GSNN//8c62d604-d99c-48d5-9bf0-9a8e9d30c644/'
_ROOT_NN_ = '../output/exp1/FOLD-1/NN//872dcbb7-e641-47af-ba26-8ea820c583d6/'

res_gsnn = {name[:-4]:pd.read_csv(f'{_ROOT_GSNN_}/prioritizations/disease_prioritization_goals/{name}') for name in os.listdir(_ROOT_GSNN_ + '/prioritizations/disease_prioritization_goals/')}
eval_res_gsnn = eval_prioritization(res_gsnn)

res_nn = {name[:-4]:pd.read_csv(f'{_ROOT_NN_}/prioritizations/disease_prioritization_goals/{name}') for name in os.listdir(_ROOT_NN_ + '/prioritizations/disease_prioritization_goals/')}
eval_res_nn = eval_prioritization(res_nn)

In [9]:
print('\\begin{tabular}{|l|l|l|l|l|l|}')
print('\\hline')
print('Target Dis. (\# lines) & Background Dis. (\# lines) & GSNN AUROC (FDR) & NN AUROC (FDR) & \# target indications & \# background indications \\\\ \\hline')

for i,row in eval_res_nn[['name', 'auroc', 'P(random_auroc>=auroc)']].merge(eval_res_gsnn, on='name').iterrows(): 

    disA,disB = row["name"].split('-')
    linesA = (goal_df[row["name"]] == 'target').sum()
    linesB = (goal_df[row["name"]] == 'background').sum()
    print(f'{disA} ({linesA}) & {disB} ({linesB}) & {row.auroc_y:.2f} ({row["P(random_auroc>=auroc)_y"]:.2f}) & {row.auroc_x:.2f} ({row["P(random_auroc>=auroc)_x"]:.2f}) & {row.num_target_indications} & {row.num_background_indications} \\\\ \\hline')

print('\\end{tabular}')

\begin{tabular}{|l|l|l|l|l|l|}
\hline
Target Dis. (\# lines) & Background Dis. (\# lines) & GSNN AUROC (FDR) & NN AUROC (FDR) & \# target indications & \# background indications \\ \hline
NSCLC (8) & AML (2) & 1.00 (0.17) & 0.60 (0.49) & 5 & 1 \\ \hline
breast (9) & prostate (3) & 1.00 (0.21) & 1.00 (0.20) & 4 & 1 \\ \hline
breast (9) & AML (2) & 1.00 (0.20) & 1.00 (0.19) & 4 & 1 \\ \hline
NSCLC (8) & prostate (3) & 1.00 (0.16) & 0.60 (0.51) & 5 & 1 \\ \hline
breast (9) & NSCLC (8) & 0.80 (0.09) & 0.75 (0.13) & 4 & 5 \\ \hline
melanoma (7) & breast (9) & 1.00 (0.02) & 1.00 (0.02) & 4 & 4 \\ \hline
breast (9) & kidney (2) & 1.00 (0.07) & 1.00 (0.07) & 4 & 2 \\ \hline
melanoma (7) & NSCLC (8) & 1.00 (0.01) & 1.00 (0.01) & 4 & 5 \\ \hline
melanoma (7) & kidney (2) & 1.00 (0.07) & 1.00 (0.07) & 4 & 2 \\ \hline
NSCLC (8) & kidney (2) & 0.70 (0.29) & 0.90 (0.09) & 5 & 2 \\ \hline
\end{tabular}


In [10]:
eval_res_gsnn.head()

Unnamed: 0,name,auroc,random_auroc_q025,random_auroc_q975,random_auroc_mean,P(random_auroc>=auroc),num_target_indications,num_background_indications
0,NSCLC-AML,1.0,0.0,1.0,0.5,0.1664,5,1
1,breast-prostate,1.0,0.0,1.0,0.5075,0.2092,4,1
2,breast-AML,1.0,0.0,1.0,0.5043,0.2028,4,1
3,NSCLC-prostate,1.0,0.0,1.0,0.49648,0.158,5,1
4,breast-NSCLC,0.8,0.1,0.9,0.49726,0.0872,4,5


In [14]:
target_indication = goal_metadata[lambda x: x.goal_name == 'breast-NSCLC'].target_indication.item()
background_indication = goal_metadata[lambda x: x.goal_name == 'breast-NSCLC'].background_indication.item()
indications = [target_indication, background_indication]

res_gsnn['breast-NSCLC'].merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), on='pert_id', how='left')[lambda x: x.indication.isin(indications)][['cmap_name', 'indication']].drop_duplicates()


Unnamed: 0,cmap_name,indication
19,fulvestrant,breast cancer
43,lapatinib,breast cancer
70,afatinib,non-small cell lung cancer (NSCLC)
91,gefitinib,non-small cell lung cancer (NSCLC)
113,toremifene,breast cancer
123,tamoxifen,breast cancer
144,alectinib,non-small cell lung cancer (NSCLC)
454,crizotinib,non-small cell lung cancer (NSCLC)
495,ceritinib,non-small cell lung cancer (NSCLC)


In [15]:
target_indication = goal_metadata[lambda x: x.goal_name == 'NSCLC-kidney'].target_indication.item()
background_indication = goal_metadata[lambda x: x.goal_name == 'NSCLC-kidney'].background_indication.item()
indications = [target_indication, background_indication]

res_gsnn['NSCLC-kidney'].merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), on='pert_id', how='left')[lambda x: x.indication.isin(indications)][['cmap_name', 'indication']].drop_duplicates()


Unnamed: 0,cmap_name,indication
2,afatinib,non-small cell lung cancer (NSCLC)
292,crizotinib,non-small cell lung cancer (NSCLC)
328,axitinib,renal cell carcinoma (RCC)
359,alectinib,non-small cell lung cancer (NSCLC)
408,ceritinib,non-small cell lung cancer (NSCLC)
477,gefitinib,non-small cell lung cancer (NSCLC)
481,temsirolimus,renal cell carcinoma (RCC)


# TNBC prioritization results

```bash
(gsnn) $ python prioritize.py --proc ../output/exp1/proc/ --uid_dir ../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/ --goals_path ../output/breast_subtype_goals.csv --model model-100.pt --doses 0.01 0.1 --verbose
```


In [107]:
tnbc_vs_nonbreast_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals/TNBC_vs_nonbreast.csv')
tnbc_vs_nonbreast_res = tnbc_vs_nonbreast_res.merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
tnbc_vs_nonbreast_res = tnbc_vs_nonbreast_res.sort_values('p_sens', ascending=False)
tnbc_vs_nonbreast_res[['cmap_name', 'dose_um_1', 'diff_mean', 'p_sens', 'indication', 'clinical_phase', 'disease_area']].head(15)

Unnamed: 0,cmap_name,dose_um_1,diff_mean,p_sens,indication,clinical_phase,disease_area
0,BMS-265246,0.1,-0.233018,0.995,,Preclinical,
1,BMS-265246,0.01,-0.200589,0.994,,Preclinical,
2,BMS-387032,0.1,-0.249839,0.981,,Phase 1,
3,NVP-AUY922,0.01,-0.293022,0.974,,Phase 2,
4,7-hydroxystaurosporine,0.1,-0.183509,0.967,,Phase 2,
5,NVP-AUY922,0.1,-0.214219,0.957,,Phase 2,
6,alvocidib,0.1,-0.146721,0.952,,Phase 2,
7,CGP-60474,0.01,-0.16806,0.944,,Preclinical,
8,bortezomib,0.1,-0.135117,0.926,multiple myeloma|mantle cell lymphoma (MCL),Launched,hematologic malignancy
9,NVP-AUY922,0.01,-0.238416,0.926,,Phase 2,


In [108]:
tnbc_vs_nontnbc_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals/TNBC_vs_nonTNBC.csv')
tnbc_vs_nontnbc_res = tnbc_vs_nontnbc_res.merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
tnbc_vs_nontnbc_res = tnbc_vs_nontnbc_res.sort_values('p_sens', ascending=False)
tnbc_vs_nontnbc_res[['cmap_name', 'dose_um_1', 'diff_mean', 'p_sens', 'indication', 'clinical_phase', 'disease_area']].head(15)

Unnamed: 0,cmap_name,dose_um_1,diff_mean,p_sens,indication,clinical_phase,disease_area
0,bortezomib,0.1,-0.429956,0.999,multiple myeloma|mantle cell lymphoma (MCL),Launched,hematologic malignancy
1,BMS-265246,0.01,-0.230298,0.995,,Preclinical,
2,NVP-AUY922,0.01,-0.436489,0.99,,Phase 2,
3,BMS-265246,0.1,-0.27756,0.99,,Preclinical,
4,alvocidib,0.1,-0.260261,0.987,,Phase 2,
5,MG-132,0.1,-0.363956,0.986,,Preclinical,
6,7-hydroxystaurosporine,0.1,-0.266795,0.981,,Phase 2,
7,NVP-AUY922,0.1,-0.382645,0.979,,Phase 2,
8,dasatinib,0.1,-0.205544,0.97,chronic myeloid leukemia (CML)|acute lymphobla...,Launched,hematologic malignancy
9,dinaciclib,0.1,-0.157316,0.933,,Phase 3,


In [109]:
breast_vs_nonbreast_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals/breast_vs_nonbreast.csv')
breast_vs_nonbreast_res = breast_vs_nonbreast_res.merge(druginfo[['pert_id', 'cmap_name', 'clinical_phase', 'indication', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
breast_vs_nonbreast_res = breast_vs_nonbreast_res.sort_values('p_sens', ascending=False)
breast_vs_nonbreast_res[['cmap_name', 'dose_um_1', 'diff_mean', 'p_sens', 'indication', 'clinical_phase', 'disease_area']].head(15)

Unnamed: 0,cmap_name,dose_um_1,diff_mean,p_sens,indication,clinical_phase,disease_area
0,BMS-265246,0.01,-0.076046,0.953,,Preclinical,
1,BMS-387032,0.1,-0.128524,0.937,,Phase 1,
2,afatinib,0.1,-0.085574,0.934,non-small cell lung cancer (NSCLC),Launched,oncology
3,PF-05212384,0.1,-0.092369,0.9,,Phase 2,
4,BMS-265246,0.1,-0.078265,0.899,,Preclinical,
5,neratinib,0.1,-0.055397,0.88,,Phase 3,
6,PKI-179,0.1,-0.095141,0.874,,Phase 1,
7,none,0.1,-0.090101,0.856,,,
8,fulvestrant,0.1,-0.090101,0.856,breast cancer,Launched,oncology
9,ICI-182780,0.01,-0.080769,0.851,,,


In [108]:
set(tnbc_vs_nonbreast_res.head(10).cmap_name.unique().tolist()).intersection(set(tnbc_vs_nontnbc_res.head(10).cmap_name.unique().tolist())).intersection(set(breast_vs_nonbreast_res.head(10).cmap_name.unique().tolist()))

{'BMS-265246', 'BMS-387032', 'bortezomib'}

In [109]:
set(tnbc_vs_nonbreast_res.head(10).cmap_name.unique().tolist()).intersection(set(breast_vs_nonbreast_res.head(10).cmap_name.unique().tolist()))

{'BMS-265246', 'BMS-387032', 'bortezomib'}

# TNBC combination agent priortization 

1. Select the top N drugs from single agents to use for drug comb. screening 
2. save to disk 
3. run combo priortization 

```bash
(gsnn) $ python prioritize.py --proc ../output/exp1/proc/ --uid_dir ../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/ --goals_path ../output/breast_subtype_goals.csv --model model-100.pt --drugs_path ../output/TNBC_combo_drugs_to_test.txt --doses 0.01 0.1 --verbose --combo
```

In [68]:
# Identify the top single agents and save to disk

K = 18

tnbc_vs_nonbreast_topK_drugs = tnbc_vs_nonbreast_res.pert_id.values[:K].tolist()
tnbc_vs_nontnbc_topK_drugs = tnbc_vs_nontnbc_res.pert_id.values[:K].tolist()
breast_vs_nonbreast_topK_drugs = breast_vs_nonbreast_res.pert_id.values[:K].tolist()

tnbc_drugs = np.unique(tnbc_vs_nonbreast_topK_drugs + tnbc_vs_nontnbc_topK_drugs + breast_vs_nonbreast_topK_drugs).tolist()
print('# breast/tnbc drugs:', len(tnbc_drugs))

with open('../output/TNBC_combo_drugs_to_test.txt', 'w') as f: 
    f.write(','.join(tnbc_drugs))

# breast/tnbc drugs: 25


In [100]:
def compute_synergy(res, target='p_sens', higher_is_better=True): 

    drugs = res.pert_id_1.unique() 
    doses = res.dose_um_1.unique() 

    out = {'pert_id_1':[], 'pert_id_2':[], 'dose_um_1':[], 'dose_um_2':[], 'CR':[]}

    for drug1 in drugs: 
        for drug2 in drugs: 
            for dose1 in doses: 
                for dose2 in doses: 
                    
                    try: 
                        drug1_res = res[lambda x: (x.pert_id_1 == drug1) & (x.dose_um_1 == dose1) & (x.pert_id_2 == 'none')]
                        drug2_res = res[lambda x: (x.pert_id_1 == drug2) & (x.dose_um_1 == dose2) & (x.pert_id_2 == 'none')]
                        
                        drug12_res = res[lambda x: (x.pert_id_1 == drug1) & (x.dose_um_1 == dose1) & (x.pert_id_2 == drug2) & (x.dose_um_2 == dose2)]
                        if drug12_res.shape[0] == 0: drug12_res = res[lambda x: (x.pert_id_1 == drug2) & (x.dose_um_1 == dose2) & (x.pert_id_2 == drug1) & (x.dose_um_2 == dose1)]

                        if higher_is_better: 
                            CR = drug12_res[target].item() / max(drug1_res[target].item(), drug2_res[target].item())
                        else: 
                            CR = drug12_res[target].item() / min(drug1_res[target].item(), drug2_res[target].item())

                        out['pert_id_1'].append(drug1)
                        out['pert_id_2'].append(drug2)
                        out['dose_um_1'].append(dose1)
                        out['dose_um_2'].append(dose2)
                        out['CR'].append(CR)
                    except: 
                        pass 

    return pd.DataFrame(out)

In [103]:
# load combo results 
tnbc_vs_nonbreast_combo_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals_combo/TNBC_vs_nonbreast.csv')
tnbc_vs_nonbreast_combo_res = tnbc_vs_nonbreast_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
tnbc_vs_nonbreast_combo_res = tnbc_vs_nonbreast_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_2', right_on='pert_id', how='left')
tnbc_vs_nonbreast_combo_res = tnbc_vs_nonbreast_combo_res.sort_values('p_sens', ascending=False)

# get combination ratio
cr_res = compute_synergy(tnbc_vs_nonbreast_combo_res, target='diff_mean', higher_is_better=False)
tnbc_vs_nonbreast_combo_res = tnbc_vs_nonbreast_combo_res.merge(cr_res, on=['pert_id_1', 'pert_id_2', 'dose_um_1', 'dose_um_2'], how='left')

tnbc_vs_nonbreast_combo_res[['cmap_name_x', 'cmap_name_y', 'dose_um_1', 'dose_um_2', 'diff_mean', 'p_sens', 'CR']].head(15)

Unnamed: 0,cmap_name_x,cmap_name_y,dose_um_1,dose_um_2,diff_mean,p_sens,CR
0,MG-132,BMS-265246,0.1,0.1,-0.303308,1.0,1.33431
1,BMS-265246,ixazomib,0.1,0.01,-0.255759,0.999,1.12513
2,BMS-265246,ixazomib,0.1,0.1,-0.295841,0.998,1.301462
3,MG-132,BMS-265246,0.01,0.1,-0.24996,0.997,1.099622
4,BMS-265246,,0.1,0.0,-0.227315,0.997,
5,BMS-265246,ixazomib,0.01,0.01,-0.209264,0.997,1.04299
6,BMS-265246,BMS-265246,0.1,0.1,-0.228043,0.997,1.003203
7,MG-132,BMS-265246,0.1,0.01,-0.284748,0.996,1.419208
8,BMS-265246,,0.01,0.0,-0.200638,0.996,
9,BMS-265246,BMS-265246,0.01,0.1,-0.23237,0.996,1.022239


In [None]:
# BMS-265246 (CDK 1/2)  & ixazomib (Proteasome)
#                       & MG-132 (Proteasome)

In [102]:
# load combo results 
tnbc_vs_nontnbc_combo_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals_combo/TNBC_vs_nonTNBC.csv')
tnbc_vs_nontnbc_combo_res = tnbc_vs_nontnbc_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
tnbc_vs_nontnbc_combo_res = tnbc_vs_nontnbc_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_2', right_on='pert_id', how='left')
tnbc_vs_nontnbc_combo_res = tnbc_vs_nontnbc_combo_res.sort_values('p_sens', ascending=False)

# get combination ratio
cr_res = compute_synergy(tnbc_vs_nontnbc_combo_res, target='diff_mean', higher_is_better=False)
tnbc_vs_nontnbc_combo_res = tnbc_vs_nontnbc_combo_res.merge(cr_res, on=['pert_id_1', 'pert_id_2', 'dose_um_1', 'dose_um_2'], how='left')

tnbc_vs_nontnbc_combo_res[['cmap_name_x', 'cmap_name_y', 'dose_um_1', 'dose_um_2', 'diff_mean', 'p_sens', 'CR']].head(15)

Unnamed: 0,cmap_name_x,cmap_name_y,dose_um_1,dose_um_2,diff_mean,p_sens,CR
0,bortezomib,bortezomib,0.1,0.1,-0.434062,0.999,1.002595
1,bortezomib,bortezomib,0.01,0.1,-0.424589,0.998,0.980714
2,bortezomib,,0.1,0.0,-0.432939,0.998,
3,BMS-265246,ixazomib,0.1,0.1,-0.369737,0.998,1.364159
4,MG-132,alvocidib,0.1,0.1,-0.378732,0.997,1.075813
5,MG-132,BMS-265246,0.01,0.1,-0.305381,0.997,1.126713
6,MG-132,BMS-265246,0.1,0.1,-0.404854,0.997,1.150013
7,MG-132,BMS-265246,0.1,0.01,-0.344413,0.996,0.978325
8,ixazomib,alvocidib,0.1,0.1,-0.340174,0.995,1.294838
9,BMS-265246,ixazomib,0.1,0.01,-0.313636,0.994,1.15717


In [None]:
# 	BMS-265246 (CDK 1/2) & ixazomib (Proteasome)
#                        & MG-132 (Proteasome)
#  alvocidib (CDKs )     & ixazomib (Proteasome)
#  alvocidib (CDKs )     & MG-132 (Proteasome)

In [101]:
# load combo results 
breast_vs_nonbreast_combo_res = pd.read_csv('../output/exp1/FOLD-1/GSNN/8c62d604-d99c-48d5-9bf0-9a8e9d30c644/prioritizations/breast_subtype_goals_combo/breast_vs_nonbreast.csv')
breast_vs_nonbreast_combo_res = breast_vs_nonbreast_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_1', right_on='pert_id', how='left')
breast_vs_nonbreast_combo_res = breast_vs_nonbreast_combo_res.merge(druginfo[['pert_id', 'cmap_name', 'indication', 'clinical_phase', 'disease_area']].drop_duplicates(), left_on='pert_id_2', right_on='pert_id', how='left')
breast_vs_nonbreast_combo_res = breast_vs_nonbreast_combo_res.sort_values('p_sens', ascending=False)

# get combination ratio
cr_res = compute_synergy(breast_vs_nonbreast_combo_res, target='diff_mean', higher_is_better=False)
breast_vs_nonbreast_combo_res = breast_vs_nonbreast_combo_res.merge(cr_res, on=['pert_id_1', 'pert_id_2', 'dose_um_1', 'dose_um_2'], how='left')

breast_vs_nonbreast_combo_res[['cmap_name_x', 'cmap_name_y', 'dose_um_1', 'dose_um_2', 'diff_mean', 'p_sens', 'CR']].head(15)

Unnamed: 0,cmap_name_x,cmap_name_y,dose_um_1,dose_um_2,diff_mean,p_sens,CR
0,alvespimycin,torin-2,0.01,0.1,-0.269892,1.0,3.669042
1,PF-05212384,dasatinib,0.1,0.1,-0.213951,1.0,2.30333
2,PKI-179,dasatinib,0.1,0.01,-0.188708,0.999,2.000846
3,geldanamycin,torin-2,0.01,0.1,-0.264545,0.999,3.596349
4,PKI-179,dasatinib,0.1,0.1,-0.168145,0.998,1.782819
5,BMS-265246,bazedoxifene,0.1,0.01,-0.194231,0.998,2.435519
6,PF-05212384,dasatinib,0.1,0.01,-0.210556,0.998,2.26678
7,ICI-182780,BMS-265246,0.1,0.1,-0.192866,0.997,
8,fulvestrant,BMS-265246,0.1,0.1,-0.192866,0.997,
9,ICI-182780,BMS-265246,0.01,0.1,-0.194041,0.997,


In [None]:
# lit. search - breast vs non-breast 

# HSP90 (heat shock) &   mTOR         --  Inhibition of Hsp90 Suppresses PI3K/AKT/mTOR Signaling and Has Antitumor Activity in Burkitt Lymphoma -- (https://aacrjournals.org/mct/article/16/9/1779/148605/Inhibition-of-Hsp90-Suppresses-PI3K-AKT-mTOR)
#                                     --  The Heat Shock Protein Story—From Taking mTORC1,2 and Heat Shock Protein Inhibitors as Therapeutic Measures for Treating Cancers to Development of Cancer Vaccines -- (https://www.scirp.org/journal/paperinformation.aspx?paperid=80657)
#   alvespimycin        torin-2
#   geldanamycin 
#   NVP-AUY922


# Dasatinib (SRC family -lots of targets)   &    PF-05212384 (mTOR/Pi3K)
#                                           &    PKI-179 (mTOR/pi3k)
#                                           &    torin-2  (mTOR)



#                                                                           CDK4/6 inhibitor resistance in estrogen receptor positive breast cancer, a 2023 perspective ( https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10073728/#:~:text=The%20CDK4%2F6%20inhibitors%20palbociclib,et%20al.%2C%202022))
# BMS-265246 (CDK 1/2)                  & bazedoxifene  (ESR modulator) 
#                                       & ICI-182780    (ESR antagonist)
#                                       & fulvestrant	(ESR inhbitor)