# Import

In [1]:
import os, joblib
import numpy as np
import pandas as pd

from itertools import product

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Helvetica'

import seaborn as sns

from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Concatenate

from scipy.stats import ttest_ind as ttest

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Define funcs

In [3]:
def loadEncoder(path, which='rna'):
    try:
        snn = load_model(path)
        encoder = snn.get_layer('model')
        encoder._name = f'{which}Encoder'
        return encoder
    except AttributeError:
        return None
    
def getModel(rnaPath=None, drugPath=None, rnaDim=463, drugDim=256):
        # Define encoded drug input
        drugInput = Input(drugDim)
        rnaInput = Input(rnaDim)
                                             
        if drugPath == None:
            if rnaPath == None:         
                pairEmbed = Concatenate()([drugInput, rnaInput])
            else: 
                rnaEmbed = loadEncoder(rnaPath)(rnaInput)
                pairEmbed = Concatenate()([drugInput, rnaEmbed])
        else:
            drugEmbed = loadEncoder(drugPath, which='drug')(drugInput)
            if rnaPath == None:         
                pairEmbed = Concatenate()([drugEmbed, rnaInput])
            else: 
                rnaEmbed = loadEncoder(rnaPath)(rnaInput)
                pairEmbed = Concatenate()([drugEmbed, rnaEmbed])

        return Model(inputs=[drugInput, rnaInput], outputs=pairEmbed)

## Define paths

## Cell line encoder

In [4]:
rnaPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/cellLines/siameseV1/models/'

rnaRFModel = 'CellLineFewShot_Layers2_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000'
rnaRFPath = os.path.join(rnaPath, rnaRFModel)

rnaLMModel = 'CellLineFewShot_Layers2_Hidden64_DO0-1_AFsigmoid_LR0-001_DR0-99_DS1000'
rnaLMPath = os.path.join(rnaPath, rnaLMModel)

## Drug encoder

In [5]:
# drugPath = '../../models/drugEncoders/'
drugPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/drugs/siameseV1/models/'
drugModel = 'DrugFewShot_Layers1_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000'
drugPath = os.path.join(drugPath, drugModel)

# Load Data

## RNA

In [6]:
testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)
preRNA = pd.read_csv('../../data/processed/cellLineEncoderPretrainData.csv', index_col=0)
RNA = pd.concat([testRNA, preRNA, newRNA])
RNA.head()

Unnamed: 0,XIAP,CDKN2B,ITGB1,TRAF1,GSTM4,LRP5,IL3,HEYL,CXCL12,FN1,...,CALM1,MGST3,ARAF,NFKB1,ZBTB17,FGF23,FZD5,RAC3,GSTA1,LAMB2
ACH-000961,13.154465,8.8491,15.549766,6.920184,12.0727,14.011049,0.130963,10.485365,7.216564,15.074025,...,14.184124,11.098191,11.818052,11.481821,10.725326,0.569748,12.588184,10.000158,6.114922,15.500269
ACH-000984,12.658436,11.099468,16.368337,10.291742,16.10027,13.418132,0.130963,1.764265,6.714064,13.98851,...,12.864645,12.050875,11.611258,12.007825,10.958988,1.569748,9.581694,6.629056,4.52996,13.717964
ACH-000978,12.391499,9.313873,14.526513,8.183218,10.895111,12.847516,0.130963,11.515809,10.011866,12.451502,...,13.081577,12.571796,10.471045,10.505967,10.594366,3.377103,10.755789,8.836714,16.416312,15.371843
ACH-000222,14.216933,13.075022,16.941942,9.317074,9.706432,14.477196,0.130963,1.764265,4.714064,10.887437,...,12.979026,12.599238,10.817039,11.842455,8.791468,1.569748,11.702942,6.244975,2.944997,14.53038
ACH-000164,13.991268,2.579163,17.134937,9.55439,11.698739,14.880114,0.130963,8.914012,6.588533,11.203131,...,12.769463,11.896701,12.273183,12.263763,10.776197,0.569748,9.966419,9.899531,2.944997,13.806081


In [7]:
RNA.shape

(983, 463)

## Drugs

In [8]:
drugInfo = pd.read_csv('../../data/processed/drugCellLinePairsData.csv')
drugInfo = drugInfo.loc[:, ['name', 'indication']].drop_duplicates(keep='first')
drugInfo = drugInfo[~drugInfo.indication.isna()].set_index('name')
print(drugInfo.shape)

(369, 1)


In [9]:
drugInfo

Unnamed: 0_level_0,indication
name,Unnamed: 1_level_1
floxuridine,colorectal cancer
valrubicin,bladder cancer
belinostat,peripheral T-cell lymphoma (PTCL)
romidepsin,cutaneous T-cell lymphoma (CTCL)
dihydroartemisinin,malaria
...,...
tofogliflozin,"diabetes mellitus, diabetes mellitus"
vindesine,"breast cancer, non-small cell lung cancer (NSC..."
vinblastine,"Hodgkin's lymphoma, true histiocytic lymphoma ..."
vinflunine,bladder cancer


In [10]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)
drugs = drugs[drugs.index.isin(list(drugInfo.index))]
print(drugs.shape)

(369, 256)


## Pairs

In [11]:
pairs = [*product(RNA.index.values, drugs.index.values)]

In [12]:
pairDF = pd.DataFrame({'cellLine': [p[0] for p in pairs],
                       'drug': [p[1] for p in pairs]})
pairDF.sort_values(by=['cellLine', 'drug'], inplace=True)
print(pairDF.shape)
pairDF.head()

(362727, 2)


Unnamed: 0,cellLine,drug
183943,ACH-000001,5-fluorouracil
183990,ACH-000001,SN-38
184034,ACH-000001,abemaciclib
183828,ACH-000001,acetophenazine
184106,ACH-000001,acipimox


## Add cancer type

In [13]:
cellInfo = pd.read_csv('../../data/processed/cellLineInfo.csv', index_col=0)
cellInfo = cellInfo.loc[list(pairDF.cellLine.unique()), ['primary_disease']]
cellInfo.rename(columns={'primary_disease': 'cancerType'}, inplace=True)
cellInfo

Unnamed: 0_level_0,cancerType
DepMap_ID,Unnamed: 1_level_1
ACH-000001,Ovarian Cancer
ACH-000002,Leukemia
ACH-000003,Colon/Colorectal Cancer
ACH-000004,Leukemia
ACH-000005,Leukemia
...,...
ACH-002680,Brain Cancer
ACH-002687,Eye Cancer
ACH-002693,Sarcoma
ACH-002710,Sarcoma


In [14]:
getCT = lambda x: cellInfo.loc[x, 'cancerType']
pairDF.insert(1, 'cancerType', pairDF.cellLine.apply(getCT))

## Add drug indication

In [15]:
getIndication = lambda x: drugInfo.loc[x, 'indication'].lower()
pairDF['drugIndication'] = pairDF.drug.apply(getIndication)

## Data

In [16]:
data = [drugs.loc[list(pairDF.drug.values), :].to_numpy(), 
        RNA.loc[list(pairDF.cellLine.values), :].to_numpy()]

# Process data

In [17]:
models = {}

## RF

In [18]:
models['rf'] = joblib.load(f'../../models/fsCDR/RF/EmbedDrug-EmbedCell-Concat-RF-CVfold1.joblib')

## DeepDSC

In [19]:
models['dsc'] = load_model(f'../../models/baselines/DeepDSC/DeepDSC_fold2')



# Get test preds

we want it to be sorted by cell line then drug going in so that we can re-sort after finding rank

In [20]:
predDFs = {}
for name, model in models.items():
    if name in ['rf']:
        if name == 'rf':
            cp = rnaRFPath
            dp = drugPath
            
        encoder = getModel(cp, dp)
        preds = [p[1] for p in model.predict_proba(encoder(data))]    
        
    else:
        if name == 'dsc':
            td = [data[1], data[0]]
        preds = model.predict(td).reshape(-1)
        
    pairDF[f'{name}Pred'] = preds
    pairDF[f'{name}CellRank'] = [0 for i in range(len(pairDF))]
    for cellLine, sub in pairDF.groupby(by='cellLine'):
        indicies = list(sub.index)
        sub.sort_values(by=f'{name}Pred', ascending=False, inplace=True)
        sub[f'{name}CellRank'] = [i+1 for i in range(len(sub))]
        
        sub.sort_values(by=['cellLine', 'drug'], inplace=True)
        pairDF.loc[indicies, f'{name}CellRank'] = sub[f'{name}CellRank'].values

In [21]:
pairDF.head()

Unnamed: 0,cellLine,cancerType,drug,drugIndication,rfPred,rfCellRank,dscPred,dscCellRank
183943,ACH-000001,Ovarian Cancer,5-fluorouracil,"colorectal cancer, breast cancer, pancreatic c...",0.001863,305,0.106888,252
183990,ACH-000001,Ovarian Cancer,SN-38,colorectal cancer,0.395247,8,0.151473,18
184034,ACH-000001,Ovarian Cancer,abemaciclib,breast cancer,0.003214,288,0.117888,140
183828,ACH-000001,Ovarian Cancer,acetophenazine,psychosis,0.041572,63,0.118217,135
184106,ACH-000001,Ovarian Cancer,acipimox,hyperlipidemia,0.016433,148,0.093828,365


# Define valid indications for cancer types

In [16]:
indications = []
for d in drugInfo.index:
    indication = getIndication(d).lower().split(", ")
    indications.extend(indication)

indications = set(indications)
print(len(indications))

295


In [17]:
indications

{'abdominal pain',
 'abstinence from alcohol',
 'acne vulgaris (av)',
 'acquired immunodeficiency syndrome (aids)',
 'actinic keratosis (ak)',
 'acute abdominal visceral spasm',
 'acute lymphoblastic leukemia (all)',
 'acute myeloid leukemia (aml)',
 'acute promyelocytic leukemia (apl)',
 'aggressive systemic mastocytosis (asm)',
 'allergic rhinitis',
 'amyotrophic lateral sclerosis (als)',
 'anacidity diagnostic',
 'anaphylactic shock',
 'anemia',
 'anesthetic',
 'angina pectoris',
 'angioedema',
 'ankylosing spondylitis',
 'anxiety',
 'arteriosclerosis',
 'ascariasis',
 'asthma',
 'atrial fibrillation (af)',
 'backache',
 'bacterial septicemia',
 'bacterial vaginosis',
 'basal cell carcinoma (bcc)',
 'bile stimulation',
 'bipolar disorder',
 'blackhead disease',
 'bladder cancer',
 'bovine respiratory disease (brd)',
 'breast cancer',
 'bronchitis',
 'bronchogenic carcinoma',
 'bronchospasm',
 'brucellosis',
 "buerger's disease",
 "burkitt's lymphoma",
 'bursitis',
 'cardiac arrest',

In [18]:
cTypeInd = {}
cTypeInd['Ovarian Cancer'] = ['ovarian cancer']

cTypeInd['Leukemia'] = ['leukemia', 'acute myeloid leukemia (aml)', 
                        'chronic eosinophilic leukemia (cel)',
                        'acute lymphoblastic leukemia (all)',
                        'chronic myelomonocytic leukemia (cmmol)',
                        'chronic lymphocytic leukemia (cll)',
                        'chronic myeloid leukemia (cml)',
                        'hairy cell leukemia',
                        'acute promyelocytic leukemia (apl)']

cTypeInd['Colon/Colorectal Cancer'] = ['colorectal cancer']

cTypeInd['Lung Cancer'] = ['small cell lung cancer', 
                           'mantle cell lymphoma (mcl)',
                           'non-small cell lung cancer (nsclc)']

cTypeInd['Kidney Cancer'] = ["wilm's tumor",
                             'renal cell carcinoma (rcc)']

cTypeInd['Breast Cancer'] = ['breast cancer']

cTypeInd['Myeloma'] = ['myeloma', 'multiple myeloma']

cTypeInd['Pancreatic Cancer'] = ['neuroendocrine tumors of pancreatic origin (pnet)',
                                 'pancreatic cancer']

cTypeInd['Lymphoma'] = ["burkitt's lymphoma", 
                        'small lymphocytic lymphoma (sll)',
                        "hodgkin's lymphoma",
                        'true histiocytic lymphoma (thl)',
                        'cutaneous t-cell lymphoma (ctcl)',
                        'non-hodgkin lymphoma (nhl)',
                        'peripheral t-cell lymphoma (ptcl)']

cTypeInd['Sarcoma'] = ['soft tissue sarcoma (sts)', 'kaposi sarcoma']

cTypeInd['Thyroid Cancer'] = ['thyroid cancer',
                              'parathyroid carcinoma',
                              'medullary thyroid cancer (mtc)']

cTypeInd['Neuroblastoma'] = ['neuroblastoma']

cTypeInd['Gastric Cancer'] = ['gastric adenocarcinoma']

cTypeInd['Prostate Cancer'] = ['prostate cancer']

cTypeInd['Bladder Cancer'] = ['bladder cancer']

cTypeInd['Endometrial/Uterine Cancer'] = ['gestational choriocarcinoma',
                                          'endometrial', 'uterine', 'uterus']

cTypeInd['Head and Neck Cancer'] = ['head and neck squamous cell carcinoma (hnscc)']

cTypeInd['Skin Cancer'] = ['melanoma']

cTypeInd['Liver Cancer'] = ['hepatocellular carcinoma (hcc)']

cTypeInd['Cervical Cancer'] = ['cervical cancer']

In [19]:
def checkIndication(x, y):
    overlap = set(y).intersection(set(x.lower().split(', ')))
    if len(overlap) >= 1:
        return True
    else:
        return False

# Get approved drug for each cancer

In [23]:
for ct, subdf in pairDF.groupby(by='cancerType'):
    if ct not in list(cTypeInd.keys()):
        continue
    print("\n" + ct + ":")
    check = lambda x: checkIndication(x, cTypeInd[ct])
    for i in subdf[subdf.drugIndication.apply(check)].drug.unique():
        print(i)


Bladder Cancer:
doxorubicin
valrubicin
vinflunine

Breast Cancer:
5-fluorouracil
abemaciclib
carmofur
cyclophosphamide
docetaxel
doxorubicin
epirubicin
everolimus
formestane
gemcitabine
ixabepilone
lapatinib
paclitaxel
palbociclib
ribociclib
toremifene
vindesine

Cervical Cancer:
topotecan

Colon/Colorectal Cancer:
5-fluorouracil
SN-38
carmofur
doxifluridine
floxuridine
irinotecan
oxaliplatin
regorafenib
tipiracil

Endometrial/Uterine Cancer:
methotrexate

Gastric Cancer:
5-fluorouracil
docetaxel
mitomycin-c

Head and Neck Cancer:
docetaxel
hydroxyurea

Kidney Cancer:
axitinib
doxorubicin
everolimus
pazopanib
sorafenib
sunitinib
temsirolimus

Leukemia:
amsacrine
azacitidine
bendamustine
bosutinib
busulfan
chlorambucil
cladribine
clofarabine
cyclophosphamide
dasatinib
daunorubicin
decitabine
doxorubicin
fludarabine
fludarabine-phosphate
homoharringtonine
hydroxyurea
ibrutinib
idarubicin
imatinib
mechlorethamine
mercaptopurine
methotrexate
midostaurin
mitoxantrone
nilotinib
ponatinib
ta

## Look at gastric cancer for FS-CDR (RF) for repurposing 

In [66]:
gastric = pairDF[pairDF.cancerType == 'Gastric Cancer']
gastric.shape

(14391, 8)

In [67]:
gastric.head()

Unnamed: 0,cellLine,cancerType,drug,drugIndication,rfPred,rfCellRank,dscPred,dscCellRank
229330,ACH-000110,Gastric Cancer,5-fluorouracil,"colorectal cancer, breast cancer, pancreatic c...",0.005435,284,0.082454,251
229377,ACH-000110,Gastric Cancer,SN-38,colorectal cancer,0.348161,12,0.117813,18
229421,ACH-000110,Gastric Cancer,abemaciclib,breast cancer,0.007412,252,0.091294,135
229215,ACH-000110,Gastric Cancer,acetophenazine,psychosis,0.041489,79,0.091557,131
229493,ACH-000110,Gastric Cancer,acipimox,hyperlipidemia,0.017761,166,0.071672,367


In [68]:
check = lambda x: checkIndication(x, cTypeInd['Gastric Cancer'])
gastric[gastric.drugIndication.apply(check)].groupby('drug').rfCellRank.mean()

drug
5-fluorouracil    278.410256
docetaxel          25.538462
mitomycin-c       173.358974
Name: rfCellRank, dtype: float64

In [69]:
gastric[gastric.drugIndication.apply(check)].groupby('drug').dscCellRank.mean()

drug
5-fluorouracil    251.794872
docetaxel           2.128205
mitomycin-c       180.512821
Name: dscCellRank, dtype: float64

In [58]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv')
cdr = cdr[cdr.DepMap_ID.isin(list(RNA.index))]
cdr.head()

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
0,ACH-000320,Pancreatic Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.972544,0.007879,0.336463,0.10514,6.692422,0
9,ACH-000846,Head and Neck Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.883318,0.052711,0.492464,0.101684,4.808385,0
10,ACH-000804,Neuroblastoma,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.972829,0.025269,0.421177,0.073301,5.808496,0
16,ACH-000305,Esophageal Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.786261,0.190516,0.693634,0.33356,2.625589,0
18,ACH-000046,Kidney Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.797691,0.037893,0.441029,0.032024,6.168264,0


In [64]:
cdr[cdr.name == '5-fluorouracil']

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
26816,ACH-000174,Thyroid Cancer,5-fluorouracil,thymidylate synthase inhibitor,"DPYD, TYMS","colorectal cancer, breast cancer, pancreatic c...",Launched,0.762251,1.703437,0.871191,0.352683,1.028369,0
26817,ACH-000836,Head and Neck Cancer,5-fluorouracil,thymidylate synthase inhibitor,"DPYD, TYMS","colorectal cancer, breast cancer, pancreatic c...",Launched,0.803679,1.385561,0.861655,0.355879,1.119514,0
26820,ACH-000510,Lung Cancer,5-fluorouracil,thymidylate synthase inhibitor,"DPYD, TYMS","colorectal cancer, breast cancer, pancreatic c...",Launched,0.779528,2.316117,0.885603,0.35169,0.901359,0


In [59]:
cdr[(cdr.cancer_type == 'Gastric Cancer') & (cdr.name.isin(['5-fluorouracil', 'docetaxel', 'mitomycin-c']))]

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
4783,ACH-000736,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.977671,0.038857,0.439954,0.02463,6.393319,0
4802,ACH-000948,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.817384,0.029509,0.412153,0.024279,6.670703,0
4819,ACH-000919,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.748284,0.037117,0.451819,0.055834,5.672975,0
4838,ACH-000880,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.842545,0.026288,0.450473,0.116318,5.371945,0
4839,ACH-000847,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.867757,0.008449,0.29708,0.039226,7.467776,1
4872,ACH-000351,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.846306,0.034948,0.441346,0.042524,5.980146,0
4885,ACH-000255,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.922466,0.01616,0.348987,0.018364,7.523763,1
4897,ACH-000678,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.777161,0.032964,0.486224,0.154576,4.912457,0
4934,ACH-000344,Gastric Cancer,docetaxel,tubulin polymerization inhibitor,"BCL2, MAP2, MAP4, MAPT, NR1I2, TUBB, TUBB1","breast cancer, non-small cell lung cancer (NSC...",Launched,0.810193,0.008861,0.394221,0.175086,6.15835,0
27805,ACH-000948,Gastric Cancer,mitomycin-c,"DNA alkylating agent, DNA synthesis inhibitor",,"pancreatic cancer, gastric adenocarcinoma",Launched,0.73492,0.094456,0.521706,0.006483,6.881884,0


# Create output table

In [29]:
cols=['Cancer type', 'Cell line', 'Drug count', 'Mean rank', 'Max rank', 'Top drug', 'Mean rank', 'Max rank', 'Top drug']
results = pd.DataFrame(columns=cols)


In [30]:
i = 0
for ct, indications in cTypeInd.items():
    df = pairDF.copy()[pairDF.cancerType == ct]
    check = lambda x: checkIndication(x, indications)
    for cellLine, subdf in df.groupby(by='cellLine'):
        row = [ct, cellLine]
        subdf = subdf[subdf.drugIndication.apply(check)]
        row.append(len(subdf))
        
        for m in ['dsc', 'rf']:
            row.append(round(subdf[f'{m}CellRank'].mean(), 2))
            row.append(int(subdf[f'{m}CellRank'].min()))
            row.append(subdf.sort_values(by=f'{m}CellRank').drug.values[0])
            
        results.loc[i, :] = row
        i += 1
    

In [31]:
r2 = results.sort_values(by=['Cancer type', 'Cell line'])
r2.head()

Unnamed: 0,Cancer type,Cell line,Drug count,Mean rank,Max rank,Top drug,Mean rank.1,Max rank.1,Top drug.1
651,Bladder Cancer,ACH-000127,3,22.33,6,doxorubicin,89.33,5,valrubicin
652,Bladder Cancer,ACH-000486,3,22.33,6,doxorubicin,70.33,7,valrubicin
653,Bladder Cancer,ACH-000522,3,22.33,6,doxorubicin,75.0,13,valrubicin
654,Bladder Cancer,ACH-000547,3,22.33,6,doxorubicin,49.33,21,valrubicin
655,Bladder Cancer,ACH-000834,3,22.0,5,doxorubicin,59.0,15,valrubicin


In [32]:
r2['Cell line'].nunique()

805

In [33]:
r2.to_csv('SupplementalTable9.csv', index=False)

# Get average variance in ranking

In [34]:
dscTopDrugs = r2.iloc[:, -4].unique()
cdrTopDrugs = r2.iloc[:, -1].unique()

In [35]:
len(dscTopDrugs)

13

In [36]:
dscRankVars = []
for d in dscTopDrugs:
    subdf = pairDF[pairDF.drug == d]
    dscRankVars.append(subdf.dscCellRank.var())
    
print(round(np.mean(dscRankVars), 4))

2.9479


In [37]:
cdrRankVars = []
for d in cdrTopDrugs:
    subdf = pairDF[pairDF.drug == d]
    cdrRankVars.append(subdf.rfCellRank.var())
    
print(round(np.mean(cdrRankVars), 4))

601.5828


# Get cancer type average & pvals

In [38]:
cols2=['Cancer type', 'Cell line count', 'Drug count', 
       'DeepDSC', 'FS-CDR', 'p-value', 
       'DeepDSC', 'FS-CSR', 'p-value', 
       'DeepDSC', 'FS-CDR']
results2 = pd.DataFrame(columns=cols2)

In [39]:
i = 0
pcntsDSC = []
pcntsCDR = []
for ct, subdf in results.groupby(by='Cancer type'):
    row = [ct, subdf.shape[0], subdf['Drug count'].values[0]]
    dscMean = subdf.iloc[:, 3].values
    fsMean = subdf.iloc[:, 6].values
    meanP = ttest(dscMean, fsMean).pvalue
    row.extend([round(dscMean.mean(),2), round(fsMean.mean(),2), round(meanP,4)])
    
    dscMax = subdf.iloc[:, 4].values
    fsMax = subdf.iloc[:, 7].values
    maxP = ttest(dscMax, fsMax).pvalue
    row.extend([round(dscMax.mean(), 2), round(fsMax.mean(), 2), round(maxP,4)])
    
    topDSC = subdf.iloc[:, -4].value_counts()
    topCDR = subdf.iloc[:, -1].value_counts()
    pcntDSC = round(100*(topDSC[0]/len(subdf)), 2)
    pcntCDR = round(100*(topCDR[0]/len(subdf)), 2)
    row.extend([f'{topDSC.index[0]} ({pcntDSC})', 
                f'{topCDR.index[0]} ({pcntCDR})'])
    if row[2] > 1:
        pcntsDSC.append(pcntDSC)
        pcntsCDR.append(pcntCDR)
    
    results2.loc[i, :] = row
    i += 1

In [40]:
np.mean(pcntsDSC)

94.96125

In [41]:
np.mean(pcntsCDR)

92.16375000000001

In [42]:
results2.shape

(20, 11)

In [43]:
results2

Unnamed: 0,Cancer type,Cell line count,Drug count,DeepDSC,FS-CDR,p-value,DeepDSC.1,FS-CSR,p-value.1,DeepDSC.2,FS-CDR.1
0,Bladder Cancer,17,3,22.19,98.92,0.0,5.65,20.71,0.0,doxorubicin (100.0),valrubicin (64.71)
1,Breast Cancer,43,17,133.14,163.68,0.0,2.0,2.72,0.0,docetaxel (62.79),gemcitabine (100.0)
2,Cervical Cancer,18,1,10.0,104.94,0.0,10.0,104.94,0.0,topotecan (100.0),topotecan (100.0)
3,Colon/Colorectal Cancer,50,9,183.5,238.18,0.0,10.98,16.1,0.0,irinotecan (100.0),SN-38 (100.0)
4,Endometrial/Uterine Cancer,22,1,222.27,332.27,0.0,222.27,332.27,0.0,methotrexate (100.0),methotrexate (100.0)
5,Gastric Cancer,39,3,144.81,159.1,0.0444,2.13,25.54,0.0,docetaxel (100.0),docetaxel (100.0)
6,Head and Neck Cancer,37,2,168.86,33.27,0.0,2.11,25.86,0.0,docetaxel (100.0),docetaxel (86.49)
7,Kidney Cancer,33,7,84.71,204.76,0.0,5.64,8.7,0.0,doxorubicin (100.0),temsirolimus (84.85)
8,Leukemia,104,33,190.98,169.65,0.0,5.62,4.47,0.0,doxorubicin (100.0),vincristine (89.42)
9,Liver Cancer,9,1,118.33,290.11,0.0,118.33,290.11,0.0,sorafenib (100.0),sorafenib (100.0)


In [44]:
results2[results2['Cancer type'].isin(['Bladder Cancer',
                                       'Head and Neck Cancer',
                                       'Gastric Cancer',
                                       'Prostate Cancer'])].sort_values(by='Drug count')

Unnamed: 0,Cancer type,Cell line count,Drug count,DeepDSC,FS-CDR,p-value,DeepDSC.1,FS-CSR,p-value.1,DeepDSC.2,FS-CDR.1
6,Head and Neck Cancer,37,2,168.86,33.27,0.0,2.11,25.86,0.0,docetaxel (100.0),docetaxel (86.49)
0,Bladder Cancer,17,3,22.19,98.92,0.0,5.65,20.71,0.0,doxorubicin (100.0),valrubicin (64.71)
5,Gastric Cancer,39,3,144.81,159.1,0.0444,2.13,25.54,0.0,docetaxel (100.0),docetaxel (100.0)
16,Prostate Cancer,10,5,77.78,35.66,0.0,2.4,2.8,0.2457,docetaxel (100.0),cabazitaxel (100.0)


In [1]:
37+17+39+10

103

In [45]:
len(set(i.split(" ")[0] for i in results2.iloc[:, -1])) / 20

0.8

In [46]:
set(i.split(" ")[0] for i in results2.iloc[:, -2])

{'cyclophosphamide',
 'docetaxel',
 'doxorubicin',
 'everolimus',
 'irinotecan',
 'lenvatinib',
 'methotrexate',
 'paclitaxel',
 'sorafenib',
 'topotecan',
 'vinblastine',
 'vindesine'}

In [47]:
np.sum([i.split(" ")[0] in ['docetaxel', 'doxorubicin'] for i in results2.iloc[:, -2]])

10

# Save predDF for repurposing case study

In [73]:
repDF = pairDF.iloc[:, :6].rename(columns={'rfPred': 'pred', 'rfCellRank': 'cellRank'})
repDF[repDF.cancerType.isin(['Bladder Cancer',
                             'Head and Neck Cancer',
                             'Gastric Cancer',
                             'Prostate Cancer'])]

Unnamed: 0,cellLine,cancerType,drug,drugIndication,pred,cellRank
183943,ACH-000001,Ovarian Cancer,5-fluorouracil,"colorectal cancer, breast cancer, pancreatic c...",0.001863,305
183990,ACH-000001,Ovarian Cancer,SN-38,colorectal cancer,0.395247,8
184034,ACH-000001,Ovarian Cancer,abemaciclib,breast cancer,0.003214,288
183828,ACH-000001,Ovarian Cancer,acetophenazine,psychosis,0.041572,63
184106,ACH-000001,Ovarian Cancer,acipimox,hyperlipidemia,0.016433,148
...,...,...,...,...,...,...
98736,ACH-002834,Bone Cancer,vismodegib,basal cell carcinoma (bcc),0.025566,124
98877,ACH-002834,Bone Cancer,vorinostat,cutaneous t-cell lymphoma (ctcl),0.019711,156
98769,ACH-002834,Bone Cancer,zaleplon,insomnia,0.024570,128
98834,ACH-002834,Bone Cancer,ziprasidone,"schizophrenia, bipolar disorder",0.013722,187
