In [1]:
import pandas as pd
from chembl_webresource_client.new_client import new_client

from tqdm import tqdm

import time

In [2]:
molecule = new_client.molecule

safe_drugs = molecule.filter(organism="Homo sapiens", assay_type="B", pchembl_value__gte=6, pchembl_value__lte=9, max_phase=4, known_drug=True, oral=True).only(["molecule_chembl_id", "pref_name", "molecule_structures"])
safe_drugs_df = pd.DataFrame(safe_drugs)
safe_drugs_df['canonical_smiles'] = safe_drugs_df.loc[ safe_drugs_df['molecule_structures'].notnull(),'molecule_structures'].apply(lambda x: x['canonical_smiles'])

safe_drugs_list = safe_drugs_df["molecule_chembl_id"].tolist()
len(safe_drugs_list)

1923

In [3]:
safe_drugs_df

Unnamed: 0,molecule_chembl_id,molecule_structures,pref_name,canonical_smiles
0,CHEMBL2,{'canonical_smiles': 'COc1cc2nc(N3CCN(C(=O)c4c...,PRAZOSIN,COc1cc2nc(N3CCN(C(=O)c4ccco4)CC3)nc(N)c2cc1OC
1,CHEMBL3,"{'canonical_smiles': 'CN1CCC[C@H]1c1cccnc1', '...",NICOTINE,CN1CCC[C@H]1c1cccnc1
2,CHEMBL4,{'canonical_smiles': 'CC1COc2c(N3CCN(C)CC3)c(F...,OFLOXACIN,CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23
3,CHEMBL5,{'canonical_smiles': 'CCn1cc(C(=O)O)c(=O)c2ccc...,NALIDIXIC ACID,CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21
4,CHEMBL6,{'canonical_smiles': 'COc1ccc2c(c1)c(CC(=O)O)c...,INDOMETHACIN,COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1
...,...,...,...,...
1918,CHEMBL5315120,{'canonical_smiles': 'CCCCC1(CCCC)CN(c2ccccc2)...,ODEVIXIBAT SESQUIHYDRATE,CCCCC1(CCCC)CN(c2ccccc2)c2cc(SC)c(OCC(=O)N[C@@...
1919,CHEMBL5315121,{'canonical_smiles': 'O.O=C(NN1C(=O)[C@@H]2[C@...,TECOVIRIMAT MONOHYDRATE,O.O=C(NN1C(=O)[C@@H]2[C@@H]3C=C[C@@H]([C@H]4C[...
1920,CHEMBL5315124,{'canonical_smiles': 'C[C@H]1COc2c(N3CCN(C)CC3...,LEVOFLOXACIN,C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)...
1921,CHEMBL5315125,{'canonical_smiles': 'C[C@H](Nc1ncnc2nc[nH]c12...,DUVELISIB MONOHYDRATE,C[C@H](Nc1ncnc2nc[nH]c12)c1cc2cccc(Cl)c2c(=O)n...


In [4]:
def ChemblMOA(chembl_id):
    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            activity = new_client.mechanism.filter(molecule_chembl_id=chembl_id)[0]
            activity_df = pd.DataFrame(activity)
            drop_col = ['binding_site_comment', 'mec_id', 'mechanism_comment', 'mechanism_refs', 'molecular_mechanism', 'record_id', 'selectivity_comment', 'site_id', 'variant_sequence']
            cols_to_drop = list(set(drop_col).intersection(activity_df.columns))
            activity_clean_df = activity_df.drop(cols_to_drop, axis=1)
            activity_final_df = activity_clean_df.drop_duplicates(subset="molecule_chembl_id", keep="first")
            return(activity_final_df)
        except:
            retries += 1
            time.sleep(1)
            return None

In [5]:
ChemblMOA("CHEMBL112")

Unnamed: 0,action_type,direct_interaction,disease_efficacy,max_phase,mechanism_of_action,molecule_chembl_id,parent_molecule_chembl_id,target_chembl_id
0,INHIBITOR,1,1,4,Cyclooxygenase inhibitor,CHEMBL112,CHEMBL112,CHEMBL2094253


In [6]:
chembl_moa_list = []

for id in tqdm(safe_drugs_list, total = len(safe_drugs_list)):
    res = ChemblMOA(id)
    res2 = pd.DataFrame(res)
    chembl_moa_list.append(res2)
                
chembl_moa_df = pd.concat(chembl_moa_list, ignore_index=True)

100%|███████████████████████████████████████| 1923/1923 [05:19<00:00,  6.02it/s]


In [7]:
len(chembl_moa_df)

1425

In [8]:
chembl_moa_df = chembl_moa_df[chembl_moa_df["mechanism_of_action"].notna()]
len(chembl_moa_df)

1425

In [9]:
chembl_moa_names_df = chembl_moa_df.merge(safe_drugs_df, on="molecule_chembl_id", how="left")
chembl_moa_names_df

Unnamed: 0,action_type,direct_interaction,disease_efficacy,max_phase,mechanism_of_action,molecule_chembl_id,parent_molecule_chembl_id,target_chembl_id,molecule_structures,pref_name,canonical_smiles
0,AGONIST,1,1,4,Neuronal acetylcholine receptor; alpha4/beta2 ...,CHEMBL3,CHEMBL3,CHEMBL1907589,"{'canonical_smiles': 'CN1CCC[C@H]1c1cccnc1', '...",NICOTINE,CN1CCC[C@H]1c1cccnc1
1,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL4,CHEMBL4,CHEMBL2311224,{'canonical_smiles': 'CC1COc2c(N3CCN(C)CC3)c(F...,OFLOXACIN,CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23
2,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL5,CHEMBL5,CHEMBL2311224,{'canonical_smiles': 'CCn1cc(C(=O)O)c(=O)c2ccc...,NALIDIXIC ACID,CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21
3,INHIBITOR,1,1,4,Cyclooxygenase inhibitor,CHEMBL6,CHEMBL6,CHEMBL2094253,{'canonical_smiles': 'COc1ccc2c(c1)c(CC(=O)O)c...,INDOMETHACIN,COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1
4,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL8,CHEMBL8,CHEMBL2311224,{'canonical_smiles': 'O=C(O)c1cn(C2CC2)c2cc(N3...,CIPROFLOXACIN,O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O
...,...,...,...,...,...,...,...,...,...,...,...
1420,INHIBITOR,1,1,4,Ileal bile acid transporter inhibitor,CHEMBL5315120,CHEMBL4297588,CHEMBL2778,{'canonical_smiles': 'CCCCC1(CCCC)CN(c2ccccc2)...,ODEVIXIBAT SESQUIHYDRATE,CCCCC1(CCCC)CN(c2ccccc2)c2cc(SC)c(OCC(=O)N[C@@...
1421,INHIBITOR,1,1,4,Envelope phospholipase OPG057 inhibitor,CHEMBL5315121,CHEMBL1257073,CHEMBL5308522,{'canonical_smiles': 'O.O=C(NN1C(=O)[C@@H]2[C@...,TECOVIRIMAT MONOHYDRATE,O.O=C(NN1C(=O)[C@@H]2[C@@H]3C=C[C@@H]([C@H]4C[...
1422,INHIBITOR,1,1,4,DNA gyrase inhibitor,CHEMBL5315124,CHEMBL33,CHEMBL2311225,{'canonical_smiles': 'C[C@H]1COc2c(N3CCN(C)CC3...,LEVOFLOXACIN,C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)...
1423,INHIBITOR,1,1,4,PI3-kinase p110-delta subunit inhibitor,CHEMBL5315125,CHEMBL3039502,CHEMBL3130,{'canonical_smiles': 'C[C@H](Nc1ncnc2nc[nH]c12...,DUVELISIB MONOHYDRATE,C[C@H](Nc1ncnc2nc[nH]c12)c1cc2cccc(Cl)c2c(=O)n...


In [10]:
mito_str_list = ["Mitochondria", "mitochondria"]

mito_moa = chembl_moa_names_df[chembl_moa_names_df["mechanism_of_action"].str.contains('|'.join(mito_str_list))]
mito_moa

Unnamed: 0,action_type,direct_interaction,disease_efficacy,max_phase,mechanism_of_action,molecule_chembl_id,parent_molecule_chembl_id,target_chembl_id,molecule_structures,pref_name,canonical_smiles
542,INHIBITOR,1,1,4,"DNA topoisomerase I, mitochondrial inhibitor",CHEMBL1607,CHEMBL84,CHEMBL2362989,{'canonical_smiles': 'CC[C@@]1(O)C(=O)OCc2c1cc...,TOPOTECAN HYDROCHLORIDE,CC[C@@]1(O)C(=O)OCc2c1cc1n(c2=O)Cc2cc3c(CN(C)C...
591,INHIBITOR,1,1,4,Mitochondrial complex I (NADH dehydrogenase) i...,CHEMBL1703,CHEMBL1431,CHEMBL2363065,"{'canonical_smiles': 'CN(C)C(=N)NC(=N)N.Cl', '...",METFORMIN HYDROCHLORIDE,CN(C)C(=N)NC(=N)N.Cl
966,POSITIVE ALLOSTERIC MODULATOR,1,1,4,"Carbamoyl-phosphate synthase [ammonia], mitoch...",CHEMBL1201780,CHEMBL1201780,CHEMBL2362990,{'canonical_smiles': 'NC(=O)N[C@@H](CCC(=O)O)C...,CARGLUMIC ACID,NC(=O)N[C@@H](CCC(=O)O)C(=O)O


In [11]:
mito_safe_drugs = chembl_moa_names_df[~chembl_moa_names_df["mechanism_of_action"].str.contains('|'.join(mito_str_list))]
mito_safe_drugs

Unnamed: 0,action_type,direct_interaction,disease_efficacy,max_phase,mechanism_of_action,molecule_chembl_id,parent_molecule_chembl_id,target_chembl_id,molecule_structures,pref_name,canonical_smiles
0,AGONIST,1,1,4,Neuronal acetylcholine receptor; alpha4/beta2 ...,CHEMBL3,CHEMBL3,CHEMBL1907589,"{'canonical_smiles': 'CN1CCC[C@H]1c1cccnc1', '...",NICOTINE,CN1CCC[C@H]1c1cccnc1
1,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL4,CHEMBL4,CHEMBL2311224,{'canonical_smiles': 'CC1COc2c(N3CCN(C)CC3)c(F...,OFLOXACIN,CC1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)cn1c23
2,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL5,CHEMBL5,CHEMBL2311224,{'canonical_smiles': 'CCn1cc(C(=O)O)c(=O)c2ccc...,NALIDIXIC ACID,CCn1cc(C(=O)O)c(=O)c2ccc(C)nc21
3,INHIBITOR,1,1,4,Cyclooxygenase inhibitor,CHEMBL6,CHEMBL6,CHEMBL2094253,{'canonical_smiles': 'COc1ccc2c(c1)c(CC(=O)O)c...,INDOMETHACIN,COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccc(Cl)cc1
4,INHIBITOR,1,1,4,Bacterial DNA gyrase inhibitor,CHEMBL8,CHEMBL8,CHEMBL2311224,{'canonical_smiles': 'O=C(O)c1cn(C2CC2)c2cc(N3...,CIPROFLOXACIN,O=C(O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O
...,...,...,...,...,...,...,...,...,...,...,...
1420,INHIBITOR,1,1,4,Ileal bile acid transporter inhibitor,CHEMBL5315120,CHEMBL4297588,CHEMBL2778,{'canonical_smiles': 'CCCCC1(CCCC)CN(c2ccccc2)...,ODEVIXIBAT SESQUIHYDRATE,CCCCC1(CCCC)CN(c2ccccc2)c2cc(SC)c(OCC(=O)N[C@@...
1421,INHIBITOR,1,1,4,Envelope phospholipase OPG057 inhibitor,CHEMBL5315121,CHEMBL1257073,CHEMBL5308522,{'canonical_smiles': 'O.O=C(NN1C(=O)[C@@H]2[C@...,TECOVIRIMAT MONOHYDRATE,O.O=C(NN1C(=O)[C@@H]2[C@@H]3C=C[C@@H]([C@H]4C[...
1422,INHIBITOR,1,1,4,DNA gyrase inhibitor,CHEMBL5315124,CHEMBL33,CHEMBL2311225,{'canonical_smiles': 'C[C@H]1COc2c(N3CCN(C)CC3...,LEVOFLOXACIN,C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c(=O)c(C(=O)O)...
1423,INHIBITOR,1,1,4,PI3-kinase p110-delta subunit inhibitor,CHEMBL5315125,CHEMBL3039502,CHEMBL3130,{'canonical_smiles': 'C[C@H](Nc1ncnc2nc[nH]c12...,DUVELISIB MONOHYDRATE,C[C@H](Nc1ncnc2nc[nH]c12)c1cc2cccc(Cl)c2c(=O)n...


# Identify whether there are any hepatotox or cardiotox alerts in the 'mito safe drugs' dataset

In [12]:
drug_warning = new_client.drug_warning
hepatotox_mol = drug_warning.filter(warning_class__icontains='hepato')
hepatotox_df = pd.DataFrame(hepatotox_mol)
hepatotox_df

Unnamed: 0,efo_id,efo_id_for_warning_class,efo_term,molecule_chembl_id,parent_molecule_chembl_id,warning_class,warning_country,warning_description,warning_id,warning_refs,warning_type,warning_year
0,,EFO:0011052,,CHEMBL4303288,CHEMBL1380,hepatotoxicity,United States,,1,[{'ref_id': 'de109a2b-e36c-40d0-85fc-a67a9e7f1...,Black Box Warning,
1,,EFO:0011052,,CHEMBL112,CHEMBL112,hepatotoxicity,United States,,17,[{'ref_id': 'c5177abd-9465-40d8-861d-3904496d8...,Black Box Warning,
2,,EFO:0011052,,CHEMBL1131,CHEMBL1131,hepatotoxicity,United States,,27,[{'ref_id': '6af396c2-af3e-436d-ba9a-583637495...,Black Box Warning,
3,,EFO:0011052,,CHEMBL922,CHEMBL922,hepatotoxicity,United States,,36,[{'ref_id': 'e047f3b2-feae-4c5e-9d07-1fefb4c0e...,Black Box Warning,
4,,EFO:0011052,,CHEMBL957,CHEMBL957,hepatotoxicity,United States,,225,[{'ref_id': 'aceac005-5c16-41bf-9a5f-35ed4b2c1...,Black Box Warning,
...,...,...,...,...,...,...,...,...,...,...,...,...
191,EFO:0004228,EFO:0011052,drug-induced liver injury,CHEMBL1909288,CHEMBL1909288,hepatotoxicity,Australia,Hepatotoxicity,3708,"[{'ref_id': '10.1177/009286150103500134', 'ref...",Withdrawn,1971.0
192,,EFO:0011052,,CHEMBL20,CHEMBL20,hepatotoxicity,United States,,3724,[{'ref_id': '91dce113-7e16-4194-837e-d45719f8e...,Black Box Warning,
193,,EFO:0011052,,CHEMBL1201825,CHEMBL1201825,hepatotoxicity,United States,,3758,[{'ref_id': '9c6d9e2a-9a77-4d11-b692-de87cfde3...,Black Box Warning,
194,,EFO:0011052,,CHEMBL295698,CHEMBL295698,hepatotoxicity,United States,,3807,[],Black Box Warning,


In [13]:
drug_warning = new_client.drug_warning
cardiotox_mol = drug_warning.filter(warning_class__icontains='cardio')
cardiotox_df = pd.DataFrame(cardiotox_mol)
cardiotox_df

Unnamed: 0,efo_id,efo_id_for_warning_class,efo_term,molecule_chembl_id,parent_molecule_chembl_id,warning_class,warning_country,warning_description,warning_id,warning_refs,warning_type,warning_year
0,,EFO:1001482,,CHEMBL270190,CHEMBL270190,cardiotoxicity,United States,,81,[{'ref_id': '77a67dc6-35d3-48ff-9d18-292d4d442...,Black Box Warning,
1,,EFO:1001482,,CHEMBL1083993,CHEMBL633,cardiotoxicity,United States,,98,[{'ref_id': '730039c2-0a32-4775-855d-98b2207e9...,Black Box Warning,
2,,EFO:1001482,,CHEMBL501,CHEMBL405,cardiotoxicity,United States,,109,[{'ref_id': 'f469fb38-0380-4621-9db3-a4f429126...,Black Box Warning,
3,,EFO:1001482,,,,cardiotoxicity,United States,,135,[{'ref_id': 'a482eccd-8837-47ea-904d-2f2c294d1...,Black Box Warning,
4,,EFO:1001482,,CHEMBL24,CHEMBL24,cardiotoxicity,United States,,145,[{'ref_id': '06c0a04f-a77e-4871-9bb4-13abe2cbb...,Black Box Warning,
...,...,...,...,...,...,...,...,...,...,...,...,...
169,,EFO:1001482,,CHEMBL2364649,CHEMBL2364649,cardiotoxicity,United States,,3746,[{'ref_id': 'e97a5872-eabf-463b-8f4c-5b5aed9c7...,Black Box Warning,
170,,EFO:1001482,,CHEMBL4802239,CHEMBL4650319,cardiotoxicity,United States,,3784,[{'ref_id': 'f1a91500-a944-4cb8-b4a8-ae278bcf7...,Black Box Warning,
171,,EFO:1001482,,CHEMBL295698,CHEMBL295698,cardiotoxicity,United States,,3806,[],Black Box Warning,
172,,EFO:1001482,,CHEMBL4297517,CHEMBL4297517,cardiotoxicity,United States,,3813,[],Black Box Warning,


In [14]:
mito_safe_drugs_id = mito_safe_drugs["molecule_chembl_id"].tolist()
print(len(mito_safe_drugs_id))

hepatotox_id = hepatotox_df["molecule_chembl_id"].tolist()
print(len(hepatotox_id))

cardiotox_id = cardiotox_df["molecule_chembl_id"].tolist()
print(len(cardiotox_id))

1422
196
174


In [15]:
mito_safe_drugs_cross_hepatotox = list(set(mito_safe_drugs_id) & set(hepatotox_id))
print(len(mito_safe_drugs_cross_hepatotox))

mito_safe_drugs_cross_cardiotox = list(set(mito_safe_drugs_id) & set(cardiotox_id))
print(len(mito_safe_drugs_cross_cardiotox))

hepatotox_cross_cardiotox = list(set(cardiotox_id) & set(hepatotox_id))
print(len(hepatotox_cross_cardiotox))

56
84
6


In [16]:
mito_safe_drugs_hepatotox_filt = mito_safe_drugs[~mito_safe_drugs['molecule_chembl_id'].isin(hepatotox_id)]
len(mito_safe_drugs_hepatotox_filt)

1366

In [17]:
mito_safe_drugs_hepato_cardio_tox_filt = mito_safe_drugs_hepatotox_filt[~mito_safe_drugs_hepatotox_filt['molecule_chembl_id'].isin(cardiotox_id)]
len(mito_safe_drugs_hepato_cardio_tox_filt)

1287

In [18]:
#mito_safe_drugs_hepato_cardio_tox_filt.to_csv("../AL00_datasets/chembl_mito_safe_drugs_cardio_hepato_alerts_removed.csv", index=False)