In [1]:
import os, sys
import numpy as np
import pandas as pd

# 

# Define Cancer Type Specific Genes

In [18]:
bladder_ids = ['FGFR3', 'STAG2']

In [20]:
colon_ids = ['APC',
 'ARHGAP5',
 'ARHGEF10',
 'AXIN1',
 'BAX',
 'BCL9L',
 'CTNNB1',
 'EIF3E',
 'EP300',
 'ERBB3',
 'FBXW7',
 'MLH1',
 'MSH2',
 'MSH6',
 'NTRK2',
 'PIK3CA',
 'PTPRK',
 'RSPO2',
 'RSPO3',
 'SALL4',
 'SFRP4',
 'SMAD4',
 'SRC',
 'STAG1',
 'TCF7L2',
 'USP9X',
 'VTI1A',
 'WDCP',
 'ZNRF3']

In [22]:
nsclc_ids = ['CD74',
 'EML4',
 'EZR',
 'KDR',
 'KEAP1',
 'KIF5B',
 'LRIG3',
 'MAP2K1',
 'MAP2K2',
 'NFE2L2',
 'NKX2-1',
 'NRG1',
 'SDC4',
 'SLC34A2',
 'SMARCA4',
 'SOX2',
 'STK11']

In [24]:
ovarian_ids = ['AKT2', 'ARID1A', 'BRCA1', 'CASP3', 'CSMD3', 'PPP2R1A']

# 

# Read Drug-Gene-Cancer Type Interactions

In [9]:
df = pd.read_csv('../data/drug.target.interaction.fda.cosmic.cancer.type.tsv', sep = '\t')

In [11]:
df.head()

Unnamed: 0,DRUG_NAME,STRUCT_ID,TARGET_NAME,TARGET_CLASS,ACCESSION,GENE,SWISSPROT,ACT_VALUE,ACT_UNIT,ACT_TYPE,...,TDL,ORGANISM,Driver_Gene,FDA_Approved,EMA_Approved,PMDA_Approved,Bladder,Colon,NSCLC,Ovarian
0,levobupivacaine,4,Potassium voltage-gated channel subfamily H me...,Ion channel,Q12809,KCNH2,KCNH2_HUMAN,4.89,,IC50,...,Tclin,Homo sapiens,No,Yes,No,No,No,No,No,No
1,levobupivacaine,4,Sodium channel protein type 1 subunit alpha,Ion channel,P35498,SCN1A,SCN1A_HUMAN,5.79,,IC50,...,Tclin,Homo sapiens,No,Yes,No,No,No,No,No,No
2,levobupivacaine,4,Sodium channel protein type 4 subunit alpha,Ion channel,P35499,SCN4A,SCN4A_HUMAN,,,,...,Tclin,Homo sapiens,No,Yes,No,No,No,No,No,No
3,levobupivacaine,4,Prostaglandin E2 receptor EP1 subtype,GPCR,P34995,PTGER1,PE2R1_HUMAN,,,,...,Tclin,Homo sapiens,No,Yes,No,No,No,No,No,No
4,levobupivacaine,4,Cytochrome P450 2D6,Enzyme,P10635,CYP2D6,CP2D6_HUMAN,6.707,,IC50,...,Tclin,Homo sapiens,No,Yes,No,No,No,No,No,No


# 

# Calculate Gene Precision Score

## For Each Drug - Number of Genes for Cancer Type / Number of Total Genes for Cancer Type

In [50]:
df['GENE'] = df['GENE'].str.split('|')
df_exploded = df.explode('GENE', ignore_index=True)

In [116]:
drug_grouped_df = df_exploded.groupby('DRUG_NAME')

In [271]:
drug_names = list(drug_grouped_df.groups.keys())
drug_dps_df = pd.DataFrame()
drug_dps_df['DRUG_NAME'] = drug_names

In [273]:
def calculate_gsp(drug_grouped_df, drug_dps_df, cancer_type, cancer_type_ids):

    for name, group in drug_grouped_df:

        drug_gene_ids = group['GENE'].unique().tolist()  
        drug_gene_in_cancer_type_ids = list(set(drug_gene_ids) & set(cancer_type_ids))
        drug_cps_score = (len(drug_gene_in_cancer_type_ids)/len(cancer_type_ids)) * 100
        drug_gps_score = (len(drug_gene_in_cancer_type_ids)/len(drug_gene_ids)) * 100
        
        condition = drug_dsp_df['DRUG_NAME'] == name  
        drug_dps_df.loc[condition, 'nGENE'] = len(drug_gene_ids)
        drug_dps_df.loc[condition, cancer_type + '_CPS'] = drug_cps_score
        drug_dps_df.loc[condition, cancer_type + '_CGPS'] = drug_gps_score

    return drug_dps_df

In [275]:
drug_dps_df = calculate_gsp(drug_grouped_df, drug_dps_df, 'BLADDER', bladder_ids)

In [276]:
drug_dps_df = calculate_gsp(drug_grouped_df, drug_dps_df, 'COLON', colon_ids)

In [277]:
drug_dps_df = calculate_gsp(drug_grouped_df, drug_dps_df, 'NSCLC', nsclc_ids)

In [279]:
drug_dps_df = calculate_gsp(drug_grouped_df, drug_dps_df, 'OVARIAN', ovarian_ids)

In [280]:
drug_dps_df

Unnamed: 0,DRUG_NAME,nGENE,BLADDER_CPS,BLADDER_CGPS,COLON_CPS,COLON_CGPS,NSCLC_CPS,NSCLC_CGPS,OVARIAN_CPS,OVARIAN_CGPS
0,(S)-nicardipine,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,(S)-nitrendipine,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Choline C-11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Florbetapir F-18,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Flutemetamol (18F),1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
2582,zopiclone,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2583,zotarolimus,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2584,zotepine,35.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2585,zucapsaicin,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 

# Write Drug-Gene-Cancer Type Score Outfile

In [286]:
drug_dps_df.to_csv('../data/drug.target.interaction.scores.tsv', sep = '\t', index = False)