In [1]:
import numpy as np
import pandas as pd
from chembl_webresource_client.new_client import new_client
from tqdm.auto import tqdm


In [2]:
targets_api = new_client.target
compounds_api = new_client.molecule
bioactivities_api = new_client.activity

In [4]:
brc_report = pd.read_excel('/home/sumit/Downloads/C0006142_disease_gda_summary.xlsx')

In [5]:
brc_report.head()

Unnamed: 0,Disease,Disease_id,Gene,Gene_id,UniProt,Gene_Full_Name,Protein_Class,N_diseases_g,DSI_g,DPI_g,pLI,Score_gda,EL_gda,EI_gda,N_PMIDs,N_SNPs_gda,First_Ref,Last_Ref
0,Malignant neoplasm of breast,C0006142,ESR1,2099,P03372,estrogen receptor 1,Nuclear receptor,1101,0.324,0.962,0.9992,1.0,,0.967,3371,41,1983,2020
1,Malignant neoplasm of breast,C0006142,PIK3CA,5290,P42336,"phosphatidylinositol-4,5-bisphosphate 3-kinase...",Kinase,1511,0.292,0.923,1.0,1.0,,0.985,542,13,2001,2020
2,Malignant neoplasm of breast,C0006142,BARD1,580,Q99728,BRCA1 associated RING domain 1,,75,0.597,0.538,1.3777e-24,1.0,limited,0.896,67,74,1996,2020
3,Malignant neoplasm of breast,C0006142,BRCA1,672,P38398,BRCA1 DNA repair associated,Enzyme,747,0.367,0.923,9.2157e-29,1.0,strong,0.956,2827,251,1992,2020
4,Malignant neoplasm of breast,C0006142,TP53,7157,P04637,tumor protein p53,Transcription factor,2494,0.236,0.962,0.53235,1.0,,0.973,1169,49,1982,2020


In [10]:
uniprot_ids = list(brc_report['UniProt'])

In [11]:
inhibitors = []

In [12]:
for uniprot in tqdm(uniprot_ids):
    targets = targets_api.get(target_components__accession=uniprot)
    targets = pd.DataFrame.from_records(targets)
    
    # Check if the 'targets' DataFrame is empty or if the number of molecules is 0
    if targets.empty:
        inhibitors.append('N/A')
    else:
        target = targets.iloc[0]
        chembl_id = target.target_chembl_id
        bioactivities = bioactivities_api.filter(
            target_chembl_id=chembl_id, type="IC50", relation="=", assay_type="B", target_organism="Homo sapiens"
        ).only("activity_id", "assay_chembl_id", "assay_description", "assay_type", "molecule_chembl_id", "type", "standard_units", 
               "relation", "standard_value", "target_chembl_id", "target_organism",)
        molecules = len(bioactivities)
        if molecules == 0:
            inhibitors.append('N/A')
        else:
            inhibitors.append(molecules)

  0%|          | 0/49 [00:00<?, ?it/s]

In [13]:
brc_dict = dict(zip(uniprot_ids, inhibitors))

In [14]:
brc_dict

{'P03372': 3090,
 'P42336': 5422,
 'Q99728': 'N/A',
 'P38398': 15,
 'P04637': 27,
 'P51587': 'N/A',
 'Q03135': 'N/A',
 'P12830': 'N/A',
 'P04626': 2153,
 'Q86YC2': 'N/A',
 'Q9BX63': 'N/A',
 'O96017': 649,
 'P60484': 3,
 'P31749': 3619,
 'Q13315': 433,
 'P01116': 634,
 'P21802': 610,
 'Q09472': 387,
 'Q92698': 'N/A',
 'P55317': 'N/A',
 'P06400': 23,
 'Q92731': 2058,
 'P31751': 1033,
 'P42224': 25,
 'O15119': 'N/A',
 'P15559': 208,
 'Q9Y6Q9': 'N/A',
 'P35228': 856,
 'P09874': 2767,
 'Q14790': 261,
 'O15297': 8,
 'Q06609': 58,
 'P11487': 'N/A',
 'P12272': 'N/A',
 'P01112': 40,
 'P23771': 'N/A',
 'P10275': 1832,
 'P02751': 'N/A',
 'Q00987': 2559,
 'P03956': 2141,
 'P17948': 1014,
 'P46527': 'N/A',
 'P46531': 116,
 'Q01973': 'N/A',
 'P35232': 'N/A',
 'P11362': 2081,
 'Q969H0': 'N/A',
 'P05019': 'N/A',
 'Q92560': 'N/A'}

In [15]:
brc_df = pd.DataFrame(list(brc_dict.items()), columns=["Uniprot", "inhibitors"])

In [16]:
gene = brc_report['Gene']

In [17]:
brc_df['Gene'] = gene

In [21]:
brc_df = brc_df[['Uniprot', 'Gene', 'inhibitors']]

In [22]:
brc_df

Unnamed: 0,Uniprot,Gene,inhibitors
0,P03372,ESR1,3090.0
1,P42336,PIK3CA,5422.0
2,Q99728,BARD1,
3,P38398,BRCA1,15.0
4,P04637,TP53,27.0
5,P51587,BRCA2,
6,Q03135,CAV1,
7,P12830,CDH1,
8,P04626,ERBB2,2153.0
9,Q86YC2,PALB2,


In [23]:
brc_df.to_csv('brc_inhibitors.csv')