# Import Packages

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from itertools import product
from sklearn.metrics.pairwise import cosine_similarity

# Define functions

In [2]:
def sim_btwn_grps(rep, groupA, groupB):
    sim = cosine_similarity(rep.iloc[:, 1:])
    sim = pd.DataFrame(sim, index=rep.index.values, columns=rep.index.values)
    
    cancers = set(rep.cancerType.unique().tolist())
    groupC = cancers - set(groupA)
    groupC = list(groupC - set(groupB))
    
    A = rep[(rep.cancerType.isin(groupA))].index.tolist()
    B = rep[(rep.cancerType.isin(groupB))].index.tolist()
    C = rep[(rep.cancerType.isin(groupC))].index.tolist()

    A2B = []
    for a, b in product(A, B):
        A2B.append(1 - sim.loc[a,b])
        
    A2C = []
    for a, c in product(A, C):
        A2C.append(1 - sim.loc[a, c])
    
    return A2B, A2C

# Load + process data 

## RNA

In [3]:
newCancerCellLines = pd.read_csv('../data/cell_lines/RNA_newcancer_allgenes.csv', index_col=0).index.tolist()
cancerGenes = pd.read_csv('../data/cell_lines/RNA_newcancer_cancergenes.csv', index_col=0).columns.tolist()

allRNA = pd.read_csv('../data/cell_lines/RNA_autoencoder_pretrain.csv', index_col=0)
allRNA = pd.concat([allRNA, pd.read_csv('../data/cell_lines/RNA_autoencoder_validation.csv', index_col=0)])
allRNA = allRNA[allRNA.index.str.startswith('ACH')]

## Cell line info 

In [4]:
cellLineInfo = pd.read_csv('../data/cell_lines/CCLE_INFO_22Q2.csv', index_col=0)
cellLineCancerType = cellLineInfo.loc[:, 'primary_disease']
cellLineCancerType.head()

DepMap_ID
ACH-000016    Kidney Cancer
ACH-000032         Leukemia
ACH-000033      Lung Cancer
ACH-000043    Non-Cancerous
ACH-000049    Non-Cancerous
Name: primary_disease, dtype: object

## New cancer type cell lines 

In [5]:
cellLines = pd.read_csv('../data/cell_lines/RNA_newcancer_cancergenes.csv', index_col=0).index.tolist()

cellLines = pd.DataFrame({'cellLine':cellLines})
cellLines['cancerType'] = cellLines.cellLine.apply(cellLineCancerType.get)
cellLines.set_index('cellLine', inplace=True)

# exclude Unknown b/c cancer type non known and Gallbladder b/c only 1 sample
cellLines = cellLines[~cellLines.cancerType.isin(['Unknown', 'Gallbladder Cancer'])]
cellLines.cancerType.value_counts()

Bone Cancer         12
Thyroid Cancer      10
Sarcoma              8
Bile Duct Cancer     6
Neuroblastoma        5
Rhabdoid             4
Prostate Cancer      3
Name: cancerType, dtype: int64

## Drug combinations

In [6]:
cdrRaw = pd.read_csv('../data/raw/CCLE_PRISM_IC50.csv', index_col='name')
cdrRaw.head()

  cdrRaw = pd.read_csv('../data/raw/CCLE_PRISM_IC50.csv', index_col='name')


Unnamed: 0_level_0,broad_id,depmap_id,ccle_name,screen_id,upper_limit,lower_limit,slope,r2,auc,ec50,ic50,moa,target,disease.area,indication,smiles,phase,passed_str_profiling,row_name
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
cytarabine,BRD-K71847383-001-12-5,ACH-000879,MFE296_ENDOMETRIUM,HTS002,1,2.122352,-0.022826,-0.026964,1.677789,8415093.0,,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)...,Launched,True,ACH-000879
cytarabine,BRD-K71847383-001-12-5,ACH-000320,PSN1_PANCREAS,HTS002,1,1.325174,-0.237504,-0.147274,1.2403,9.643742,,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)...,Launched,True,ACH-000320
cytarabine,BRD-K71847383-001-12-5,ACH-001145,OC316_OVARY,HTS002,1,2.08935,-0.302937,0.193893,1.472333,0.02776687,,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)...,Launched,True,ACH-001145
cytarabine,BRD-K71847383-001-12-5,ACH-000873,KYSE270_OESOPHAGUS,HTS002,1,1.31182,-0.209393,-0.00546,1.20716,2.654701,,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)...,Launched,True,ACH-000873
cytarabine,BRD-K71847383-001-12-5,ACH-000855,KYSE150_OESOPHAGUS,HTS002,1,1.369799,-0.27753,0.132818,1.229332,0.5889041,,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Nc1ccn([C@@H]2O[C@H](CO)[C@@H](O)[C@H]2O)c(=O)...,Launched,True,ACH-000855


## Convert drug combinations to drug info 

**Index: Drug name**

**Columns: MOA, (gene) target, disease area, indication, (latest clinical) phase**

In [7]:
columns = ['moa', 'target', 'disease.area', 'indication', 'phase']
drugInfo = cdrRaw.loc[:, columns]
drugInfo.drop_duplicates(inplace=True)
drugInfo.head()

Unnamed: 0_level_0,moa,target,disease.area,indication,phase
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cytarabine,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Launched
epinastine,histamine receptor antagonist,"ADRA1A, ADRA2A, HRH1, HRH2, HTR2A, HTR7",ophthalmology,conjunctivitis,Launched
floxuridine,DNA synthesis inhibitor,TYMS,oncology,colorectal cancer,Launched
valrubicin,"DNA inhibitor, topoisomerase inhibitor",TOP2A,oncology,bladder cancer,Launched
adapalene,retinoid receptor agonist,"RARA, RARB, RARG, RXRA, RXRB, RXRG",dermatology,acne vulgaris (AV),Launched


## Split drug info into cancer and non-cancer drugs

### Cancer drugs

In [8]:
cancerDrugs = drugInfo[drugInfo['disease.area'].str.contains('malignancy') | drugInfo['disease.area'].str.contains('oncology')]
cancerDrugs.head()

Unnamed: 0_level_0,moa,target,disease.area,indication,phase
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cytarabine,ribonucleotide reductase inhibitor,"POLA1, POLB, POLD1, POLE",hematologic malignancy,"acute lymphoblastic leukemia (ALL), chronic ly...",Launched
floxuridine,DNA synthesis inhibitor,TYMS,oncology,colorectal cancer,Launched
valrubicin,"DNA inhibitor, topoisomerase inhibitor",TOP2A,oncology,bladder cancer,Launched
fulvestrant,estrogen receptor antagonist,"ESR1, ESR2, GPER1",oncology,breast cancer,Launched
estramustine-phosphate,"DNA synthesis inhibitor, microtubule inhibitor","ESR1, ESR2, MAP1A, MAP2",oncology,prostate cancer,Launched


In [9]:
cancerDrugs.phase.value_counts()

Launched    130
Name: phase, dtype: int64

Launched means that the drug has been FDA approved

### Non-cancer drugs 

In [10]:
nonCancerDrugs = [('oncology' not in str(x)) & ('malignancy' not in str(x)) for x in drugInfo['disease.area']]
nonCancerDrugs = drugInfo[nonCancerDrugs]
nonCancerDrugs.head()

Unnamed: 0_level_0,moa,target,disease.area,indication,phase
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
epinastine,histamine receptor antagonist,"ADRA1A, ADRA2A, HRH1, HRH2, HTR2A, HTR7",ophthalmology,conjunctivitis,Launched
adapalene,retinoid receptor agonist,"RARA, RARB, RARG, RXRA, RXRB, RXRG",dermatology,acne vulgaris (AV),Launched
colforsin-daproate,adenylyl cyclase activator,,,,Launched
sulfamethazine,PABA antagonist,,gastroenterology,"enteritis, enteritis",Launched
niridazole,phosphofructokinase inhibitor,,infectious disease,schistosomiasis,Launched


In [11]:
nonCancerDrugs.phase.value_counts()

Launched           397
Preclinical        346
Phase 2            177
Phase 1            110
Phase 3            100
Phase 1/Phase 2     23
Withdrawn           17
Phase 2/Phase 3     10
Name: phase, dtype: int64

In [12]:
cancerDrugsList = cancerDrugs.index.tolist()
nonCancerDrugsList = nonCancerDrugs.index.tolist()

In [13]:
print(f"Number of cancer drugs: {len(cancerDrugsList)}")
print(f"Number of non-cancer drugs: {len(nonCancerDrugsList)}")

Number of cancer drugs: 130
Number of non-cancer drugs: 1180


## Pred dataframe 

In [14]:
base_path = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/mcRBM/full_models/newcancer_preds/'
fname = 'CDR-smcRBM_preRBM2_Layers1_Hidden8_DO3_AFrelu_LR001_DR96_DS5_L1001_preds.csv'
full_path = os.path.join(base_path, fname)

preds = pd.read_csv(full_path, index_col=0)
preds.head()

Unnamed: 0,cell_line,cancer_type,drug,true,pred
0,ACH-000359,Bone Cancer,cytarabine,0,0.489242
1,ACH-000804,Neuroblastoma,cytarabine,0,0.494004
2,ACH-000391,Bone Cancer,cytarabine,0,0.48301
3,ACH-000364,Bone Cancer,cytarabine,0,0.483253
4,ACH-001321,Thyroid Cancer,cytarabine,0,0.495825


### Bianarize preds using threshold

In [15]:
predThreshold = 0.4806
def binarize(n, threshold):
    if n < threshold:
        return 0
    else:
        return 1
    
toBinary = lambda x: binarize(x, predThreshold)
preds['predBool'] = preds.pred.apply(toBinary)
preds

Unnamed: 0,cell_line,cancer_type,drug,true,pred,predBool
0,ACH-000359,Bone Cancer,cytarabine,0,0.489242,1
1,ACH-000804,Neuroblastoma,cytarabine,0,0.494004,1
2,ACH-000391,Bone Cancer,cytarabine,0,0.483010,1
3,ACH-000364,Bone Cancer,cytarabine,0,0.483253,1
4,ACH-001321,Thyroid Cancer,cytarabine,0,0.495825,1
...,...,...,...,...,...,...
63030,ACH-001321,Thyroid Cancer,vandetanib,0,0.564938,1
63031,ACH-001321,Thyroid Cancer,vemurafenib,0,0.569870,1
63032,ACH-001321,Thyroid Cancer,venetoclax,0,0.609534,1
63033,ACH-001321,Thyroid Cancer,volasertib,1,0.630792,1


# Case Study 

## Procedure:

    1) Iterate through each cancer type not seen during training
        a) Get top 20 drugs (by pred float) for each cell line that are: 
            i) predicted true (predBool == 1)
            ii) effective combinations by drug screen (true == 1)
            iii) not indicated to treat the current cancer type
        b) Check if any of the drugs are recommended by majority of cell lines of current cancer type
        b) Check if any of the commonly recommended drugs are indicated to treat other cancers
            i) evaluate distance between cell lines of the current cancer and indicated cancer. 
               Plausible candidate if the distance between these groups are significantly closer than others
        c) Check if any of commonly recommended drugs are not indicated for cancer. 
            i) Plausible candidate if moa or gene targets shared between disease

## Get commonly recommened drugs

In [68]:
commonRecs = {}
for cancerType, subdf in preds.groupby(by='cancer_type'):
    if cancerType in ['Unknown', "Gallbladder Cancer"]:
        continue
    commonRecs[cancerType] = {}
    commonRecs[cancerType]['nCellLines'] = len(subdf.cell_line.unique())
    subdf = subdf[(subdf.predBool == 1) & (subdf.true == 1)]
    subdf.sort_values(by='pred', inplace=True)
    for cellLine, subsubdf in subdf.groupby(by='cell_line'):
        subsubdf.sort_values(by='pred')
        topk = 0
        for i in range(len(subsubdf)):
            drugName = subsubdf.iloc[i, :].drug
            if drugName not in drugInfo.index.tolist():
                continue
            cancer = cancerType.split(' ')[0].lower()
            if cancer not in str(drugInfo.loc[drugName, 'indication']):
                if drugName in list(commonRecs[cancerType].keys()):
                    commonRecs[cancerType][drugName]["count"] += 1
                    commonRecs[cancerType][drugName]["preds"].append(subsubdf.iloc[i, :].pred)
                    commonRecs[cancerType][drugName]["cellLines"].append(cellLine)
                else:
                    commonRecs[cancerType][drugName] = {}
                    commonRecs[cancerType][drugName]["count"] = 1
                    commonRecs[cancerType][drugName]["preds"] = [subsubdf.iloc[i, :].pred]
                    commonRecs[cancerType][drugName]["cellLines"] = [cellLine]
            topk += 1
            if topk >= 20:
                break

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subdf.sort_values(by='pred', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subdf.sort_values(by='pred', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subdf.sort_values(by='pred', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subdf.sort_values(by='pred', inplace=Tr

In [69]:
for cancer in commonRecs.keys():
    nCL = commonRecs[cancer]['nCellLines']
    print(f"Cancer: {cancer}; nCellLines: {nCL}")

Cancer: Bile Duct Cancer; nCellLines: 6
Cancer: Bone Cancer; nCellLines: 12
Cancer: Neuroblastoma; nCellLines: 5
Cancer: Prostate Cancer; nCellLines: 3
Cancer: Rhabdoid; nCellLines: 4
Cancer: Sarcoma; nCellLines: 8
Cancer: Thyroid Cancer; nCellLines: 10


In [70]:
results = pd.DataFrame(columns=['Cancer Type', 'Drug Candidate', 'PredScore', '%Rec', 'CLs',
                                'MOA', 'Target(s)', 'Phase', 'FDA indication'])
i = 0
for cancer in commonRecs.keys():
    nCL = commonRecs[cancer]['nCellLines']
    for drug in list(commonRecs[cancer].keys())[1:]:
        prctRec = 100 * (commonRecs[cancer][drug]['count'] / nCL)
        CLs = commonRecs[cancer][drug]['cellLines']
        avg_pred = np.mean(commonRecs[cancer][drug]['preds'])
        moa = drugInfo.loc[drug, 'moa']
        target = drugInfo.loc[drug, 'target']
        phase = drugInfo.loc[drug, 'phase']
        if phase == 'Launched':
            phase = 'Approved'
        indication = drugInfo.loc[drug, 'indication']
        
        results.loc[i, :] = [cancer, drug, avg_pred, prctRec, CLs, moa, target, phase, indication]
        i += 1

In [71]:
common = results[results['%Rec'] >= 50.]
commonSorted = common.sort_values(by=['Cancer Type', '%Rec', 'PredScore'], ascending=[True, False, False])

In [41]:
commonSorted['Cancer Type'].unique()

array(['Bile Duct Cancer', 'Bone Cancer', 'Neuroblastoma',
       'Prostate Cancer', 'Rhabdoid', 'Sarcoma', 'Thyroid Cancer'],
      dtype=object)

In [72]:
commonSorted[commonSorted['Cancer Type'] == 'Bile Duct Cancer']

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
33,Bile Duct Cancer,Ro-106-9920,0.485374,66.666667,"[ACH-000209, ACH-000268, ACH-000461, ACH-000808]",NFkB pathway inhibitor,,Preclinical,
31,Bile Duct Cancer,tyrphostin-AG-99,0.484903,66.666667,"[ACH-000209, ACH-000268, ACH-000461, ACH-000808]",tyrosine kinase inhibitor,EGFR,Preclinical,
42,Bile Duct Cancer,NSC-632839,0.483116,66.666667,"[ACH-000268, ACH-000461, ACH-000808, ACH-000976]",ubiquitin specific protease inhibitor,"SENP2, USP1, USP2, USP7",Preclinical,
25,Bile Duct Cancer,alexidine,0.482564,66.666667,"[ACH-000209, ACH-000268, ACH-000461, ACH-000808]",phosphatidylglycerophosphatase inhibitor,PTPMT1,Preclinical,
13,Bile Duct Cancer,ryuvidine,0.487055,50.0,"[ACH-000182, ACH-000268, ACH-000976]",histone lysine methyltransferase inhibitor,"CDK2, CDK4",Preclinical,
35,Bile Duct Cancer,cetylpyridinium,0.48701,50.0,"[ACH-000209, ACH-000268, ACH-000976]",,,Approved,"gingivitis, mouth inflammation"
38,Bile Duct Cancer,zardaverine,0.486857,50.0,"[ACH-000209, ACH-000268, ACH-000461]",phosphodiesterase inhibitor,PDE4D,Phase 2,
10,Bile Duct Cancer,metaraminol,0.486803,50.0,"[ACH-000182, ACH-000268, ACH-000976]",adrenergic receptor agonist,ADRA1A,Approved,hypotension
37,Bile Duct Cancer,ciclopirox,0.486758,50.0,"[ACH-000209, ACH-000268, ACH-000808]",membrane integrity inhibitor,ATP1A1,Approved,onychomycosis
36,Bile Duct Cancer,mexiletine,0.486692,50.0,"[ACH-000209, ACH-000268, ACH-000808]",sodium channel blocker,"AHR, SCN4A, SCN5A",Approved,"ventricular arrhythmias, ventricular tachycard..."


In [74]:
with pd.option_context('display.max_colwidth', None):
  display(commonSorted[commonSorted['Cancer Type'] == 'Bone Cancer'])

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
89,Bone Cancer,mexiletine,0.483848,50.0,"[ACH-000052, ACH-000082, ACH-000210, ACH-000279, ACH-000359, ACH-000613]",sodium channel blocker,"AHR, SCN4A, SCN5A",Approved,"ventricular arrhythmias, ventricular tachycardia (VT)"
80,Bone Cancer,melphalan,0.483103,50.0,"[ACH-000052, ACH-000082, ACH-000087, ACH-000210, ACH-000359, ACH-000748]","DNA alkylating agent, DNA inhibitor",,Approved,multiple myeloma
78,Bone Cancer,gemcitabine,0.483099,50.0,"[ACH-000052, ACH-000082, ACH-000087, ACH-000210, ACH-000359, ACH-000748]",ribonucleotide reductase inhibitor,"CMPK1, RRM1, RRM2, TYMS",Approved,"ovarian cancer, breast cancer, non-small cell lung cancer (NSCLC), pancreatic cancer"
82,Bone Cancer,Ro-106-9920,0.482824,50.0,"[ACH-000052, ACH-000087, ACH-000210, ACH-000279, ACH-000359, ACH-000748]",NFkB pathway inhibitor,,Preclinical,


In [75]:
commonSorted[commonSorted['Cancer Type'] == 'Neuroblastoma']

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
200,Neuroblastoma,NSC-319726,0.484434,60.0,"[ACH-000099, ACH-000259, ACH-000260]",p53 activator,,Preclinical,
186,Neuroblastoma,indisulam,0.482896,60.0,"[ACH-000099, ACH-000259, ACH-000260]",CDK inhibitor,"CA1, CA12, CA14, CA2, CA6, CA7, CA9",Phase 2,
181,Neuroblastoma,BAY-11-7085,0.482349,60.0,"[ACH-000099, ACH-000259, ACH-000260]",NFkB pathway inhibitor,NFKBIA,Preclinical,


In [76]:
commonSorted[commonSorted['Cancer Type'] == 'Prostate Cancer']

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
281,Prostate Cancer,trovafloxacin,0.483216,66.666667,"[ACH-000956, ACH-000977]",bacterial DNA gyrase inhibitor,TOP2A,Withdrawn,
271,Prostate Cancer,fenoprofen,0.482891,66.666667,"[ACH-000090, ACH-000977]",prostaglandin inhibitor,"PTGS1, PTGS2, SLC5A8",Approved,"rheumatoid arthritis, osteoarthritis"
272,Prostate Cancer,triapine,0.482854,66.666667,"[ACH-000090, ACH-000977]",ribonucleotide reductase inhibitor,"RRM1, RRM2",Phase 2,
279,Prostate Cancer,NH125,0.482454,66.666667,"[ACH-000956, ACH-000977]",eukaryotic translation elongation factor 2 inh...,,Preclinical,
277,Prostate Cancer,SB-200646,0.482255,66.666667,"[ACH-000956, ACH-000977]",serotonin receptor antagonist,"HTR2B, HTR2C",Preclinical,
268,Prostate Cancer,dexrazoxane,0.482127,66.666667,"[ACH-000090, ACH-000977]","chelating agent, topoisomerase inhibitor","TOP2A, TOP2B",Approved,cardiomyopathy
278,Prostate Cancer,iodipamide,0.482014,66.666667,"[ACH-000956, ACH-000977]",radiopaque medium,ALB,Approved,contrast agent
275,Prostate Cancer,IKK-2-inhibitor-V,0.481941,66.666667,"[ACH-000956, ACH-000977]","IKK inhibitor, NFkB pathway inhibitor",IKBKB,Phase 1,
274,Prostate Cancer,RN-1734,0.481769,66.666667,"[ACH-000956, ACH-000977]",TRPV antagonist,TRPV4,Preclinical,
260,Prostate Cancer,indisulam,0.481398,66.666667,"[ACH-000090, ACH-000956]",CDK inhibitor,"CA1, CA12, CA14, CA2, CA6, CA7, CA9",Phase 2,


In [77]:
commonSorted[commonSorted['Cancer Type'] == 'Rhabdoid']

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
321,Rhabdoid,BAY-11-7085,0.484254,75.0,"[ACH-000096, ACH-000172, ACH-001128]",NFkB pathway inhibitor,NFKBIA,Preclinical,
317,Rhabdoid,cyclocytidine,0.483499,75.0,"[ACH-000096, ACH-000172, ACH-001128]","DNA synthesis inhibitor, RNA synthesis inhibitor",,Preclinical,
314,Rhabdoid,SU3327,0.483097,75.0,"[ACH-000096, ACH-000172, ACH-001128]",JNK inhibitor,MAPK8,Preclinical,
310,Rhabdoid,thioguanine,0.482259,75.0,"[ACH-000096, ACH-000172, ACH-001128]",purine antagonist,"IMPDH1, IMPDH2",Approved,acute myeloid leukemia (AML)
313,Rhabdoid,zardaverine,0.482191,75.0,"[ACH-000096, ACH-000172, ACH-001128]",phosphodiesterase inhibitor,PDE4D,Phase 2,
322,Rhabdoid,ketoprofen,0.484647,50.0,"[ACH-000096, ACH-001128]",cyclooxygenase inhibitor,"CXCR1, PTGS1, PTGS2, SLC5A8",Approved,"rheumatoid arthritis, osteoarthritis"
331,Rhabdoid,triapine,0.484176,50.0,"[ACH-000172, ACH-000201]",ribonucleotide reductase inhibitor,"RRM1, RRM2",Phase 2,
343,Rhabdoid,atipamezole,0.484144,50.0,"[ACH-000201, ACH-001128]",adrenergic receptor antagonist,,Approved,reverse sedative
309,Rhabdoid,thiomersal,0.483595,50.0,"[ACH-000096, ACH-001128]",other antibiotic,OXCT1,Approved,
311,Rhabdoid,florfenicol,0.48329,50.0,"[ACH-000096, ACH-000172]",protein synthesis inhibitor,,Approved,bovine respiratory disease (BRD)


In [79]:
with pd.option_context('display.max_colwidth', None):
  display(commonSorted[commonSorted['Cancer Type'] == 'Sarcoma'])

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
371,Sarcoma,Ro-106-9920,0.484812,87.5,"[ACH-000037, ACH-000054, ACH-000169, ACH-000449, ACH-000505, ACH-000833, ACH-000939]",NFkB pathway inhibitor,,Preclinical,
370,Sarcoma,tyrphostin-AG-99,0.484424,87.5,"[ACH-000037, ACH-000054, ACH-000169, ACH-000449, ACH-000505, ACH-000833, ACH-000939]",tyrosine kinase inhibitor,EGFR,Preclinical,
383,Sarcoma,NSC-3852,0.48326,75.0,"[ACH-000054, ACH-000169, ACH-000449, ACH-000505, ACH-000833, ACH-000939]",HDAC inhibitor,HDAC1,Preclinical,
382,Sarcoma,BAY-11-7082,0.482987,75.0,"[ACH-000054, ACH-000169, ACH-000449, ACH-000505, ACH-000833, ACH-000939]",NFkB pathway inhibitor,RELA,Preclinical,
377,Sarcoma,zardaverine,0.486891,62.5,"[ACH-000037, ACH-000054, ACH-000169, ACH-000505, ACH-000835]",phosphodiesterase inhibitor,PDE4D,Phase 2,
394,Sarcoma,melphalan,0.483632,62.5,"[ACH-000169, ACH-000449, ACH-000505, ACH-000833, ACH-000939]","DNA alkylating agent, DNA inhibitor",,Approved,multiple myeloma
388,Sarcoma,cetylpyridinium,0.483243,62.5,"[ACH-000054, ACH-000169, ACH-000833, ACH-000835, ACH-000939]",,,Approved,"gingivitis, mouth inflammation"
384,Sarcoma,combretastatin-A-4,0.481865,62.5,"[ACH-000054, ACH-000169, ACH-000833, ACH-000835, ACH-000939]",tubulin polymerization inhibitor,,Phase 2,
367,Sarcoma,broxyquinoline,0.484086,50.0,"[ACH-000037, ACH-000449, ACH-000505, ACH-000833]",antiprotozoal agent,,Phase 1,
393,Sarcoma,DCEBIO,0.484046,50.0,"[ACH-000054, ACH-000833, ACH-000835, ACH-000939]",potassium channel activator,"KCNN2, KCNN3, KCNN4",Preclinical,


In [81]:
with pd.option_context('display.max_colwidth', None):
  display(commonSorted[commonSorted['Cancer Type'] == 'Thyroid Cancer'])

Unnamed: 0,Cancer Type,Drug Candidate,PredScore,%Rec,CLs,MOA,Target(s),Phase,FDA indication
436,Thyroid Cancer,alexidine,0.482831,60.0,"[ACH-000163, ACH-000716, ACH-000897, ACH-001306, ACH-001307, ACH-001321]",phosphatidylglycerophosphatase inhibitor,PTPMT1,Preclinical,
442,Thyroid Cancer,gemcitabine,0.486544,50.0,"[ACH-000163, ACH-000716, ACH-000897, ACH-001307, ACH-001321]",ribonucleotide reductase inhibitor,"CMPK1, RRM1, RRM2, TYMS",Approved,"ovarian cancer, breast cancer, non-small cell lung cancer (NSCLC), pancreatic cancer"
441,Thyroid Cancer,NSC-3852,0.4855,50.0,"[ACH-000163, ACH-000716, ACH-000897, ACH-001307, ACH-001321]",HDAC inhibitor,HDAC1,Preclinical,
437,Thyroid Cancer,teriflunomide,0.485018,50.0,"[ACH-000163, ACH-000716, ACH-000897, ACH-001306, ACH-001321]",dihydroorotate dehydrogenase inhibitor,DHODH,Approved,multiple sclerosis
439,Thyroid Cancer,BAY-11-7082,0.484927,50.0,"[ACH-000163, ACH-000716, ACH-000897, ACH-001307, ACH-001321]",NFkB pathway inhibitor,RELA,Preclinical,
492,Thyroid Cancer,chloroxine,0.483954,50.0,"[ACH-000716, ACH-000897, ACH-001306, ACH-001307, ACH-001321]",opioid receptor antagonist,OPRK1,Approved,"diarrhea, inflammatory bowel disease, giardiasis, dandruff"


In [82]:
commonSorted.MOA.value_counts()

NFkB pathway inhibitor                                  7
ribonucleotide reductase inhibitor                      5
membrane integrity inhibitor                            4
sodium channel blocker                                  4
phosphodiesterase inhibitor                             3
HDAC inhibitor                                          3
tubulin polymerization inhibitor                        2
tyrosine kinase inhibitor                               2
CDK inhibitor                                           2
p53 activator                                           2
DNA alkylating agent, DNA inhibitor                     2
phosphatidylglycerophosphatase inhibitor                2
potassium channel activator                             1
other antifungal                                        1
cyclooxygenase inhibitor                                1
tyrosine phosphatase inhibitor                          1
antiprotozoal agent                                     1
src inhibitor,

# Explore Results

## Neuroblastoma -- also need evidence of crossing BBB

### BAY-11-7082

In [None]:
drugInfo.loc['BAY-11-7082',:]

 - Evidence of crossing BBB: https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC5845744/ (used to kill brain eating ameoba)


 - Evidence of NFkB pathway involvement in cancer:
     - https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC2869521/
     - https://www-nature-com.proxy.lib.ohio-state.edu/articles/sigtrans201723
     - https://www-nature-com.proxy.lib.ohio-state.edu/articles/nri.2017.142
     - https://molecular-cancer.biomedcentral.com/articles/10.1186/1476-4598-12-86
     
     
 - Evidence of NFkB driven Neuroblastoma:
     - https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC4280060/
     
     
 - Evidence of RELA's (drug's target) involvment in cancer development
     - https://pubmed-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/31382678/
     
 - Evidence of drug + disease pathway intersection increasing survival
     - https://www.jbc.org/article/S0021-9258(19)49039-X/fulltext

#### Look at RELA expression in Neuroblastoma vs rest of cancer samples

In [None]:
NB_cellLines = preds[preds.cancer_type == 'Neuroblastoma'].cell_line.unique().tolist()

In [None]:
NB_RELA_mean = newCancerRNA.loc[NB_cellLines, 'RELA'].mean()
NB_RELA_std = newCancerRNA.loc[NB_cellLines, 'RELA'].std()

print(f"Mean differential RELA expression (STD) in Neuroblastoma cell lines: {round(NB_RELA_mean, 4)} ({round(NB_RELA_std, 4)})")


In [None]:
nonNB_cellLines = preds[preds.cancer_type != 'Neuroblastoma'].cell_line.unique().tolist()

nonNBExpRELA = newCancerRNA.loc[nonNB_cellLines, 'RELA'].tolist()
nonNBExpRELA.extend(trainRNA.loc[:, 'RELA'].tolist())

In [None]:
nonNB_RELA_mean = np.mean(nonNBExpRELA)
nonNB_RELA_std = np.std(nonNBExpRELA)

print(f"Mean differential RELA expression (STD) in non-Neuroblastoma cell lines: {round(nonNB_RELA_mean, 4)} ({round(nonNB_RELA_std, 4)})")

### Indisulam

In [None]:
drugInfo.loc['indisulam',:]

 - Evidence of drugs utility for treating high risk neuroblastoma: https://www-nature-com.proxy.lib.ohio-state.edu/articles/s41467-022-28907-3

### NSC-319726

In [None]:
drugInfo.loc['NSC-319726',:]

 - Evidence of utility for glioblastoma: https://pubmed-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/29576531/
 - Evidence of targeting p53 mutations (typically associated with mechanism of resistance to therapy): https://www-sciencedirect-com.proxy.lib.ohio-state.edu/science/article/pii/S153561082030605X?via%3Dihub

## Thyroid Cancer

### Alexidine

In [None]:
drugInfo.loc['alexidine',:]

MOA plays role in phosphoinositide 3-kinase pathway, which mediates actions of horomones, GFs, etc

 - Evidence of PI3K pathway role in thyroid cancer development: https://www-sciencedirect-com.proxy.lib.ohio-state.edu/science/article/pii/S0022227520354559
 
 - Evidence of cancer death promotion by inhibiting gene: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0053803
 
 - Genetic alterations in PI3K pathway induces especially aggressive thyroid cancers: https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC2935335/

### Teriflunomide

In [None]:
drugInfo.loc['teriflunomide',:]

- Recent evidence that targeting DHODH has potential to be effective therapeutic (especially in combination with others): 
    - https://cancerandmetabolism.biomedcentral.com/articles/10.1186/s40170-021-00250-z
    - https://www-sciencedirect-com.proxy.lib.ohio-state.edu/science/article/pii/S0163725818301967
    
    
- Evidence of similar drug suppressing growth of thyroid cancers
    - https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC3805832/
    

### Chloroxine

In [None]:
drugInfo.loc['chloroxine',:]

Throid stimulating horomone (TSH) binds to the thyroid stimulating horomone receptor (TSHR), which is a G-protein coupled receptor. Evidence that downregulation of GPCRs leads to thyroid cancer pathogenesis: https://pubmed-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/34000025/. Additionally, GPCRs mediate PI3K/ATK signalling (https://www-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/pmc/articles/PMC5207145/), and alterations to this pathway leads to thyroid cancer pathogenesis (https://www.frontiersin.org/articles/10.3389/fendo.2015.00188/full)

In [None]:
NB_cellLines = preds[preds.cancer_type == 'Thyroid Cancer'].cell_line.unique().tolist()

NB_RELA_mean = newCancerRNA.loc[NB_cellLines, 'OPRK1'].mean()
NB_RELA_std = newCancerRNA.loc[NB_cellLines, 'OPRK1'].std()

print(f"Mean differential OPRK1 expression (STD) in Thyroid cancer cell lines: {round(NB_RELA_mean, 4)} ({round(NB_RELA_std, 4)})")


nonNB_cellLines = preds[preds.cancer_type != 'Thyroid Cancer'].cell_line.unique().tolist()

nonNBExpRELA = newCancerRNA.loc[nonNB_cellLines, 'OPRK1'].tolist()
nonNBExpRELA.extend(trainRNA.loc[:, 'OPRK1'].tolist())

nonNB_RELA_mean = np.mean(nonNBExpRELA)
nonNB_RELA_std = np.std(nonNBExpRELA)

print(f"Mean differential OPRK1 expression (STD) in non-Thyroid cancer cell lines: {round(nonNB_RELA_mean, 4)} ({round(nonNB_RELA_std, 4)})")

### Gemcitabine

In [10]:
drugInfo.loc['gemcitabine',:]

moa                            ribonucleotide reductase inhibitor
target                                    CMPK1, RRM1, RRM2, TYMS
disease.area                                             oncology
indication      ovarian cancer, breast cancer, non-small cell ...
phase                                                    Launched
Name: gemcitabine, dtype: object

In [11]:
drugInfo.loc['gemcitabine',:].indication

'ovarian cancer, breast cancer, non-small cell lung cancer (NSCLC), pancreatic cancer'

In [12]:
[g in cancerGenes for g in ['CMPK1', 'RRM1', 'RRM2', 'TYMS']]

[False, False, False, False]

In [13]:
[g in allRNA.columns.tolist() for g in ['CMPK1', 'RRM1', 'RRM2', 'TYMS']]

[False, False, False, False]

In [16]:
rep = allRNA.copy()
rep.insert(0, 'cancerType', [cellLineCancerType.get(x) for x in allRNA.index.tolist()])

A2B, A2C = sim_btwn_grps(rep, groupA=['Thyroid Cancer'], groupB=['Ovarian Cancer', 'Breast Cancer', 
                                                                 'Pancreatic Cancer', 'Lung Cancer'])

print(f"Avg similarity between indicated cancer types and Thyroid cancer: {round(np.mean(A2B), 4)} ({round(np.std(A2B), 4)})")

print(f"Avg similarity between Thyroid cancer and non-indicated cancer types: {round(np.mean(A2C), 4)} ({round(np.std(A2C), 4)})")


Avg similarity between indicated cancer types and Thyroid cancer: 0.0462 (0.0141)
Avg similarity between Thyroid cancer and non-indicated cancer types: 0.0545 (0.0213)


Because the expression is similar, it is likely that these cancer types have similar mechanisms of pathogenesis and progression. As such, it is possible that Gemcitabine could be reporposed to treat Thyroid cancer.

 - Evidence that Gemcitabine enhances the effects of radiation in patients with medullary thyroid cancer: https://pubmed-ncbi-nlm-nih-gov.proxy.lib.ohio-state.edu/31725814/

## Sarcoma

### tyrphostin-AG-99

In [293]:
drugInfo.loc['tyrphostin-AG-99',:]

moa             tyrosine kinase inhibitor
target                               EGFR
disease.area                          NaN
indication                            NaN
phase                         Preclinical
Name: tyrphostin-AG-99, dtype: object

- Evidence of tyrosine kinase's activation in cancer:
    - https://molecular-cancer.biomedcentral.com/articles/10.1186/s12943-018-0782-4
- Evidence suggesting that TKIs are effective for reducing cancer growth, however often are difficult to approve due to off-target effects: https://www-nature-com.proxy.lib.ohio-state.edu/articles/s41388-021-01841-2

### Ro-106-9920

In [294]:
drugInfo.loc['Ro-106-9920',:]

moa             NFkB pathway inhibitor
target                             NaN
disease.area                       NaN
indication                         NaN
phase                      Preclinical
Name: Ro-106-9920, dtype: object

Can use same evidence for NFkB pathway targeting that was used for NB + BAY-11-7082

### BAY-11-7082

## Bone and Brain cancers

Like Sarcoma and Neuroblastoma, NFkB inhibitors are highly recommened, qualitatively, this is not surprising when looking at the cancer clustering. Bone, brain, NB, and sarcoma are all relatively closely grouped, which indicates that these cancers may have similar mechanisms driving pathogenesis 