#### Import libraries

In [17]:
import numpy as np
import pandas as pd
from chembl_webresource_client.new_client import new_client
from tqdm.auto import tqdm

#### Connect with Chembl api

In [18]:
targets_api = new_client.target
compounds_api = new_client.molecule
bioactivities_api = new_client.activity

#### Import the proteins/targets of interest 

###### In this example I'm using the top targets of Breast Carcinoma (Disgenet: C0006142) to check the number of inhibitors associated with each via experimental validation IC50. You can make your list of targets based on your study.

In [4]:
brc_report = pd.read_excel('/home/sumit/Downloads/C0006142_disease_gda_summary.xlsx')

In [5]:
brc_report.head()

Unnamed: 0,Disease,Disease_id,Gene,Gene_id,UniProt,Gene_Full_Name,Protein_Class,N_diseases_g,DSI_g,DPI_g,pLI,Score_gda,EL_gda,EI_gda,N_PMIDs,N_SNPs_gda,First_Ref,Last_Ref
0,Malignant neoplasm of breast,C0006142,ESR1,2099,P03372,estrogen receptor 1,Nuclear receptor,1101,0.324,0.962,0.9992,1.0,,0.967,3371,41,1983,2020
1,Malignant neoplasm of breast,C0006142,PIK3CA,5290,P42336,"phosphatidylinositol-4,5-bisphosphate 3-kinase...",Kinase,1511,0.292,0.923,1.0,1.0,,0.985,542,13,2001,2020
2,Malignant neoplasm of breast,C0006142,BARD1,580,Q99728,BRCA1 associated RING domain 1,,75,0.597,0.538,1.3777e-24,1.0,limited,0.896,67,74,1996,2020
3,Malignant neoplasm of breast,C0006142,BRCA1,672,P38398,BRCA1 DNA repair associated,Enzyme,747,0.367,0.923,9.2157e-29,1.0,strong,0.956,2827,251,1992,2020
4,Malignant neoplasm of breast,C0006142,TP53,7157,P04637,tumor protein p53,Transcription factor,2494,0.236,0.962,0.53235,1.0,,0.973,1169,49,1982,2020


#### We will use UniProt IDs of the targets to obtain their respective CHEMBL IDs.  

In [6]:
uniprot_ids = list(brc_report['UniProt'])

In [7]:
inhibitors = []

In [8]:
uniprot_ids_dict = {uniprot_id: None for uniprot_id in uniprot_ids}

In [9]:
uniprot_ids_dict

{'P03372': None,
 'P42336': None,
 'Q99728': None,
 'P38398': None,
 'P04637': None,
 'P51587': None,
 'Q03135': None,
 'P12830': None,
 'P04626': None,
 'Q86YC2': None,
 'Q9BX63': None,
 'O96017': None,
 'P60484': None,
 'P31749': None,
 'Q13315': None,
 'P01116': None,
 'P21802': None,
 'Q09472': None,
 'Q92698': None,
 'P55317': None,
 'P06400': None,
 'Q92731': None,
 'P31751': None,
 'P42224': None,
 'O15119': None,
 'P15559': None,
 'Q9Y6Q9': None,
 'P35228': None,
 'P09874': None,
 'Q14790': None,
 'O15297': None,
 'Q06609': None,
 'P11487': None,
 'P12272': None,
 'P01112': None,
 'P23771': None,
 'P10275': None,
 'P02751': None,
 'Q00987': None,
 'P03956': None,
 'P17948': None,
 'P46527': None,
 'P46531': None,
 'Q01973': None,
 'P35232': None,
 'P11362': None,
 'Q969H0': None,
 'P05019': None,
 'Q92560': None}

In [10]:
for uniprot in tqdm(uniprot_ids):
    targets = targets_api.get(target_components__accession=uniprot)
    targets = pd.DataFrame.from_records(targets)
    
    # Check if the 'targets' DataFrame is empty or if the number of molecules is 0
    if targets.empty:
        inhibitors.append('N/A')
    else:
        target = targets.iloc[0]
        chembl_id = target.target_chembl_id
        bioactivities = bioactivities_api.filter(
            target_chembl_id=chembl_id, type="IC50", relation="=", assay_type="B", target_organism="Homo sapiens"
        ).only("activity_id", "assay_chembl_id", "assay_description", "assay_type", "molecule_chembl_id", "type", "standard_units", 
               "relation", "standard_value", "target_chembl_id", "target_organism",)
        molecules = len(bioactivities)
        if molecules == 0:
            inhibitors.append('N/A')
        else:
            inhibitors.append(molecules)
            if uniprot in uniprot_ids_dict:
                uniprot_ids_dict[uniprot] = pd.DataFrame.from_records(bioactivities)

  0%|          | 0/49 [00:00<?, ?it/s]

In [11]:
uniprot_ids_dict

{'P03372':       activity_id assay_chembl_id  \
 0           72003    CHEMBL679321   
 1           72003    CHEMBL679321   
 2           74062    CHEMBL679321   
 3           76289    CHEMBL679321   
 4           77402    CHEMBL679321   
 ...           ...             ...   
 3086     24911036   CHEMBL5163453   
 3087     24911037   CHEMBL5163453   
 3088     24911038   CHEMBL5163453   
 3089     24926126   CHEMBL5167822   
 3090     24963750   CHEMBL5216562   
 
                                       assay_description assay_type  \
 0     Binding affinity towards human estrogen recept...          B   
 1     Binding affinity towards human estrogen recept...          B   
 2     Binding affinity towards human estrogen recept...          B   
 3     Binding affinity towards human estrogen recept...          B   
 4     Binding affinity towards human estrogen recept...          B   
 ...                                                 ...        ...   
 3086  Antagonist activity at ERalp

In [14]:
# Specify the Excel file path
excel_file_path = 'brc_targets_inhibitors.xlsx'

In [15]:
with pd.ExcelWriter(excel_file_path) as writer:
    # Iterate through the dictionary and write each DataFrame on a sheet
    for sheet_name, df in uniprot_ids_dict.items():
        # Check if df is a DataFrame
        if isinstance(df, pd.DataFrame):
            df.to_excel(writer, sheet_name=sheet_name, index=False)