Find which proteins have been targeted by ChEMBL, DrugBank and FDA drug targets using CysDB proteins, UniProt ChEMBL, UniProt DrugBank and Human Protein Atlas annotations

In [None]:
import os, sys
import pandas as pd
import csv
import numpy as np

In [None]:
cd = os.getcwd()
cd

In [None]:
date = '220919'

# Read CysDB Proteins

In [None]:
cysdb_df = pd.read_excel('Table_S1.xlsx', sheet_name = 'Fig1C')

In [None]:
lig_df = cysdb_df[cysdb_df['ligandable'] == 'yes']
lig_ids = lig_df['proteinid'].unique()

# Read UniProt with ChEMBL and DrugBank Annotations

In [None]:
# Download UniProt/Swiss-Prot Human Proteome with ChEMBL and DrugBank Annotations
u_df = pd.read_excel('2209_uniprot.xlsx')

In [None]:
u_ids = u_df['Entry'].unique()

In [None]:
drugbank_df = u_df[u_df['DrugBank'].isna() != True]

In [None]:
chembl_df = u_df[u_df['ChEMBL'].isna() != True]

In [None]:
drugbank_ids = drugbank_df['Entry'].unique()

In [None]:
chembl_ids = chembl_df['Entry'].unique()

# Read Human Protein Atlas FDA Target Annotations

In [None]:
# Human Protein Atlas version 21.1 
# https://www.proteinatlas.org/about/download
hpa_df = pd.read_csv('220906_hpa_dataset.csv')

In [None]:
fda_df = hpa_df[hpa_df['FDA'] == 'yes']
fda_tot_ids = fda_df['Entry'].unique()

In [None]:
fda_ids = []
for i in range(len(fda_tot_ids)):
    if (fda_tot_ids[i] != 'Q6ZRZ4') & (fda_tot_ids[i] not in fda_ids):
        fda_ids.append(fda_tot_ids[i])

# Create Final Dataframe

In [None]:
drug_df = pd.DataFrame()
drug_df['proteinid'] = list(u_ids)

In [None]:
drug_df['ChEMBL'] = np.where(drug_df['proteinid'].isin(chembl_ids), 'yes', None)

In [None]:
drug_df['DrugBank'] = np.where(drug_df['proteinid'].isin(drugbank_ids), 'yes', None)

In [None]:
drug_df['FDA'] = np.where(drug_df['proteinid'].isin(fda_ids), 'yes', None)

In [None]:
drug_df['LIG'] = np.where(drug_df['proteinid'].isin(lig_ids), 'yes', None)

In [None]:
os.chdir('../')

In [None]:
drug_df.to_csv('druggable_ids.csv', index = False)