#### Import libraries

In [1]:
import numpy as np
import pandas as pd
from chembl_webresource_client.new_client import new_client
from tqdm.auto import tqdm

#### Connect with Chembl api

In [2]:
targets_api = new_client.target
bioactivities_api = new_client.activity

#### Import the proteins/targets of interest 

###### In this example I'm using the top targets of Breast Carcinoma (Disgenet: C0006142) to check the number of inhibitors associated with each via experimental validation IC50. You can make your list of targets based on your study.

In [3]:
brc_report = pd.read_excel('/home/sumit/Downloads/C0006142_disease_gda_summary.xlsx')

In [4]:
brc_report.head()

Unnamed: 0,Disease,Disease_id,Gene,Gene_id,UniProt,Gene_Full_Name,Protein_Class,N_diseases_g,DSI_g,DPI_g,pLI,Score_gda,EL_gda,EI_gda,N_PMIDs,N_SNPs_gda,First_Ref,Last_Ref
0,Malignant neoplasm of breast,C0006142,ESR1,2099,P03372,estrogen receptor 1,Nuclear receptor,1101,0.324,0.962,0.9992,1.0,,0.967,3371,41,1983,2020
1,Malignant neoplasm of breast,C0006142,PIK3CA,5290,P42336,"phosphatidylinositol-4,5-bisphosphate 3-kinase...",Kinase,1511,0.292,0.923,1.0,1.0,,0.985,542,13,2001,2020
2,Malignant neoplasm of breast,C0006142,BARD1,580,Q99728,BRCA1 associated RING domain 1,,75,0.597,0.538,1.3777e-24,1.0,limited,0.896,67,74,1996,2020
3,Malignant neoplasm of breast,C0006142,BRCA1,672,P38398,BRCA1 DNA repair associated,Enzyme,747,0.367,0.923,9.2157e-29,1.0,strong,0.956,2827,251,1992,2020
4,Malignant neoplasm of breast,C0006142,TP53,7157,P04637,tumor protein p53,Transcription factor,2494,0.236,0.962,0.53235,1.0,,0.973,1169,49,1982,2020


#### We will use UniProt IDs of the targets to obtain their respective CHEMBL IDs.  

In [5]:
uniprot_ids = list(brc_report['UniProt'])

In [6]:
inhibitors = []

In [7]:
uniprot_ids_dict = {uniprot_id: None for uniprot_id in uniprot_ids}

In [8]:
for uniprot in tqdm(uniprot_ids):
    targets = targets_api.get(target_components__accession=uniprot)
    targets = pd.DataFrame.from_records(targets)
    
    # Check if the 'targets' DataFrame is empty
    if targets.empty:
        inhibitors.append('N/A')
    else:
        target = targets.iloc[0]
        chembl_id = target.target_chembl_id
        bioactivities = bioactivities_api.filter(
            target_chembl_id=chembl_id, type="IC50", relation="=", 
            assay_type="B", target_organism="Homo sapiens"
        ).only("activity_id", "assay_chembl_id", "assay_description", 
               "assay_type", "molecule_chembl_id", "type", "standard_units", 
               "relation", "standard_value", "target_chembl_id", "target_organism",)
        molecules = len(bioactivities)
        if molecules == 0:             # If number of molecules is zero.
            inhibitors.append('N/A')
        else:
            inhibitors.append(molecules)
            if uniprot in uniprot_ids_dict:
                uniprot_ids_dict[uniprot] = pd.DataFrame.from_records(bioactivities)

  0%|          | 0/49 [00:00<?, ?it/s]

In [14]:
for key, value in uniprot_ids_dict.items():
    if value is not None:
        print(f"UNIPROT: {key}, No. of Molecules: {len(value)}")
    else:
        print(f"UNIPROT: {key}, No. of Molecules: NaN")

UNIPROT: P03372, No. of Molecules: 3091
UNIPROT: P42336, No. of Molecules: 5423
UNIPROT: Q99728, No. of Molecules: NaN
UNIPROT: P38398, No. of Molecules: 16
UNIPROT: P04637, No. of Molecules: 28
UNIPROT: P51587, No. of Molecules: NaN
UNIPROT: Q03135, No. of Molecules: NaN
UNIPROT: P12830, No. of Molecules: NaN
UNIPROT: P04626, No. of Molecules: 2154
UNIPROT: Q86YC2, No. of Molecules: NaN
UNIPROT: Q9BX63, No. of Molecules: NaN
UNIPROT: O96017, No. of Molecules: 650
UNIPROT: P60484, No. of Molecules: 4
UNIPROT: P31749, No. of Molecules: 3620
UNIPROT: Q13315, No. of Molecules: 434
UNIPROT: P01116, No. of Molecules: 635
UNIPROT: P21802, No. of Molecules: 611
UNIPROT: Q09472, No. of Molecules: 388
UNIPROT: Q92698, No. of Molecules: NaN
UNIPROT: P55317, No. of Molecules: NaN
UNIPROT: P06400, No. of Molecules: 24
UNIPROT: Q92731, No. of Molecules: 2059
UNIPROT: P31751, No. of Molecules: 1034
UNIPROT: P42224, No. of Molecules: 26
UNIPROT: O15119, No. of Molecules: NaN
UNIPROT: P15559, No. of M

In [15]:
excel_file_path = 'brc_targets_inhibitors.xlsx'

In [15]:
with pd.ExcelWriter(excel_file_path) as writer:
    for sheet_name, df in uniprot_ids_dict.items():
        if isinstance(df, pd.DataFrame): # save the dfs into excel sheets
            df.to_excel(writer, sheet_name=sheet_name, index=False)