In [1]:
# Download DHODH binding data from ChEMBL using the official python client from https://github.com/chembl/chembl_webresource_client
import itertools, os, os.path
import numpy as np, pandas as pd
import chembl_webresource_client.new_client

In [2]:
from chembl_webresource_client.new_client import new_client

available_resources = [resource for resource in dir(new_client) if not resource.startswith('_')]
print(available_resources)



In [3]:
DHODH_chembl_id = 'CHEMBL1966' # (now solved - ) https://github.com/chembl/GLaDOS/issues/1319 

In [4]:
df_activity = pd.DataFrame.from_records(chembl_webresource_client.new_client.new_client.activity.filter(target_chembl_id=DHODH_chembl_id, assay_type='B'))
print(len(df_activity))
df_activity.head(3).transpose()

2511


Unnamed: 0,0,1,2
action_type,,,
activity_comment,,,
activity_id,105032,105032,105033
activity_properties,[],[],[]
assay_chembl_id,CHEMBL665412,CHEMBL665412,CHEMBL665412
assay_description,In vitro inhibitory activity against human dih...,In vitro inhibitory activity against human dih...,In vitro inhibitory activity against human dih...
assay_type,B,B,B
assay_variant_accession,,,
assay_variant_mutation,,,
bao_endpoint,BAO_0000190,BAO_0000190,BAO_0000190


In [5]:
df_assay = pd.DataFrame.from_records(chembl_webresource_client.new_client.new_client.assay.filter(target_chembl_id=DHODH_chembl_id, assay_type='B'))
print(len(df_assay))
df_assay.head(3).transpose()

244


Unnamed: 0,0,1,2
aidx,CLD0,CLD0,CLD0
assay_category,,,
assay_cell_type,,,
assay_chembl_id,CHEMBL665409,CHEMBL665409,CHEMBL665410
assay_classifications,[],[],[]
assay_organism,Helicobacter pylori,Helicobacter pylori,
assay_parameters,[],[],[]
assay_strain,,,
assay_subcellular_fraction,,,
assay_tax_id,210.0,210.0,


In [6]:
df_actives = df_activity.dropna(subset=['pchembl_value']).copy()
df_actives['pchembl_value'] = pd.to_numeric(df_actives['pchembl_value'])
df_actives = df_actives.merge(df_assay[['assay_chembl_id', 'confidence_score']], on='assay_chembl_id').query('(confidence_score == 9)')
df_actives

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value,confidence_score
116,,,449364,[],CHEMBL872485,Inhibition of human dihydroorotate dehydrogena...,B,,,BAO_0000192,...,Dihydroorotate dehydrogenase,9606,,,Ki,nM,UO_0000065,,50000.0,9
175,,,1602450,[],CHEMBL879486,Inhibition of N-terminally truncated recombina...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,uM,UO_0000065,,0.41,9
176,,,1602453,[],CHEMBL879486,Inhibition of N-terminally truncated recombina...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,uM,UO_0000065,,0.667,9
177,,,1602454,[],CHEMBL879486,Inhibition of N-terminally truncated recombina...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,uM,UO_0000065,,3.8,9
178,,,1602455,[],CHEMBL879486,Inhibition of N-terminally truncated recombina...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,uM,UO_0000065,,0.134,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,"{'action_type': 'INHIBITOR', 'description': 'N...",,24827301,[],CHEMBL5141994,Inhibition of His fused human DHODH expressed ...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,nM,UO_0000065,,11.0,9
1267,"{'action_type': 'INHIBITOR', 'description': 'N...",,24827302,[],CHEMBL5141994,Inhibition of His fused human DHODH expressed ...,B,,,BAO_0000190,...,Dihydroorotate dehydrogenase,9606,,,IC50,nM,UO_0000065,,0.8,9
1268,"{'action_type': 'INHIBITOR', 'description': 'N...",,24827378,[],CHEMBL5142000,Binding affinity to His fused human DHODH expr...,B,,,BAO_0000034,...,Dihydroorotate dehydrogenase,9606,,,Kd,nM,UO_0000065,,3.3,9
1269,"{'action_type': 'INHIBITOR', 'description': 'N...",,24827379,[],CHEMBL5142000,Binding affinity to His fused human DHODH expr...,B,,,BAO_0000034,...,Dihydroorotate dehydrogenase,9606,,,Kd,nM,UO_0000065,,1.1,9


In [7]:
df_agg = df_actives.groupby(['canonical_smiles', 'molecule_chembl_id']).agg({
    'pchembl_value': np.mean,
    'molecule_pref_name': lambda x: next(iter(x)),
}).reset_index()
df_agg

Unnamed: 0,canonical_smiles,molecule_chembl_id,pchembl_value,molecule_pref_name
0,Brc1ccc(-c2csc(N/N=C/c3ccccc3)n2)cc1,CHEMBL1076869,5.48,
1,C/C(=C\C[C@@]1(C)[C@H](C)CCC(=O)[C@@H]1C)[C@@H...,CHEMBL4854654,5.33,
2,C/C(=C\Cc1c(O)c(Cl)c(C)c(C(=O)O)c1O)CC/C=C(\C)...,CHEMBL4859267,4.70,
3,C/C(=C\Cc1c(O)c(Cl)c(C)c(C=O)c1O)CC/C(C)=C(\C)...,CHEMBL4847651,4.50,
4,C/C(=C\Cc1c(O)c(Cl)c(C)c(C=O)c1O)CC/C=C(\C)CCC...,CHEMBL4874137,5.03,
...,...,...,...,...
931,OC1c2ccccc2C(O)c2c1nnn2-c1cc(F)c(-c2ccccc2F)c(...,CHEMBL4639751,8.29,
932,Oc1ccc(/C=N/c2cccc3ccccc23)c(O)c1,CHEMBL2178107,6.11,
933,Oc1ccc(/N=C/c2ccc3ccccc3c2)cc1,CHEMBL2178111,5.40,
934,[2H]C([2H])([2H])c1ccccc1NC(=O)c1cc(F)c(-c2cn(...,CHEMBL5073905,9.65,


In [8]:
fp_ = os.path.join(os.path.abspath(""), 'DHODH_ChEMBL.tsv') #https://stackoverflow.com/questions/52119454/how-to-obtain-jupyter-notebooks-path
df_agg.to_csv(fp_, sep='\t', index=False, header=True)
!wc -l {fp_}

     937 /Users/jjaenes/23.03_DHODH/resources/DHODH_ChEMBL/DHODH_ChEMBL.tsv


In [9]:
fp_ = os.path.join(os.path.abspath(""), 'DHODH_ChEMBL.smi')
df_agg[['canonical_smiles', 'molecule_chembl_id']].to_csv(fp_, sep='\t', index=False, header=False)
!wc -l {fp_}

     936 /Users/jjaenes/23.03_DHODH/resources/DHODH_ChEMBL/DHODH_ChEMBL.smi
